url_trimmer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d26f5d3528cba18f8d691a3cabd30320d4aca627
4
+ data.tar.gz: bdefacd3a2b56bde497cdcca064807c1a58728ba
5
+ SHA512:
6
+ metadata.gz: fdf00065dd4cad05963b8b35e1e0f9c7c38ae452c693411f841e0848e8108bf69390a135855718d7c45ac6938fe541e45cacf34a4a7909a7d8693a2737f051ee
7
+ data.tar.gz: 9b2f4c34f990a71e1ce250f3f5aa00e76034cd202ed7d1005719edace747aebce59490eae3e08418ad38d0b7bbedb3f5c9e8df8360e5eff68ae14303b3a143ca
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
23
+ *.swp
24
+ .rbenv-gemsets
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_trimmer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Cristian Rasch
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # URLTrimmer
2
+
3
+ Reads in plain text files with one URL per line and outputs a list of unique URLs by domain.
4
+
5
+ ## Installation
6
+
7
+ Install it yourself as:
8
+
9
+ $ gem install url_trimmer
10
+
11
+ ## Usage
12
+
13
+ url-trimmer urls1.txt urls2.txt
14
+
15
+ ## Contributing
16
+
17
+ 1. Fork it ( https://github.com/wecodeio/url_trimmer/fork )
18
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
19
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
20
+ 4. Push to the branch (`git push origin my-new-feature`)
21
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "spec/**/*_spec.rb"
6
+ t.verbose = true
7
+ end
8
+
9
+ task default: :test
data/bin/url-trimmer ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/url_trimmer"
4
+
5
+ urls = ARGF.readlines.reject { |line| line.empty? }.map(&:chomp)
6
+ unique_urls = URLTrimmer::Worker.uniq_by_domain(urls)
7
+ print "#{unique_urls.join("\n")}\n"
@@ -0,0 +1,3 @@
1
+ module URLTrimmer
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,20 @@
1
+ require_relative "url_trimmer/version"
2
+ require "domain_name"
3
+
4
+ module URLTrimmer
5
+ class Worker
6
+ def self.uniq_by_domain(urls)
7
+ urls.map! do |url|
8
+ begin
9
+ url.downcase
10
+ rescue ArgumentError
11
+ url.encode("UTF-8", invalid: :replace, undef: :replace, replace: "").downcase
12
+ end
13
+ end
14
+ urls.map! { |url| url[%r(\Ahttps?://[^/]+), 0] }
15
+ urls.compact!
16
+ urls.uniq! { |url| DomainName(url).domain }
17
+ urls
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ require_relative "../spec_helper"
2
+ require_relative "../../lib/url_trimmer"
3
+
4
+ module URLTrimmer
5
+ describe Worker do
6
+ let(:urls) do
7
+ ["http://www.google.com.ar/blah1", "https://www.google.com.ar/blah2", "https://www.google.com.br/blah3",
8
+ "http://www.google.com/blah4" "https://plus.google.com/blah5"]
9
+ end
10
+
11
+ it "returns a list of unique URLs by domain" do
12
+ unique_urls = Worker.uniq_by_domain(urls)
13
+
14
+ unique_urls.size.must_equal 3
15
+ unique_urls.must_include("http://www.google.com.ar")
16
+ unique_urls.must_include("https://www.google.com.br")
17
+ unique_urls.must_include("http://www.google.com")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,2 @@
1
+ require "minitest/autorun"
2
+ require "minitest/pride"
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url_trimmer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url_trimmer"
8
+ spec.version = URLTrimmer::VERSION
9
+ spec.authors = ["Cristian Rasch"]
10
+ spec.email = ["cristian@box.cristianrasch.com.ar"]
11
+ spec.summary = %q{Reads in plain text files with one URL per line and outputs a list of unique URLs by domain}
12
+ spec.homepage = "https://github.com/wecodeio/url_trimmer"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_runtime_dependency "domain_name"
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_trimmer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Cristian Rasch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: domain_name
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - cristian@box.cristianrasch.com.ar
58
+ executables:
59
+ - url-trimmer
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - bin/url-trimmer
70
+ - lib/url_trimmer.rb
71
+ - lib/url_trimmer/version.rb
72
+ - spec/lib/url_trimmer_spec.rb
73
+ - spec/spec_helper.rb
74
+ - url_trimmer.gemspec
75
+ homepage: https://github.com/wecodeio/url_trimmer
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.2.2
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: Reads in plain text files with one URL per line and outputs a list of unique
99
+ URLs by domain
100
+ test_files:
101
+ - spec/lib/url_trimmer_spec.rb
102
+ - spec/spec_helper.rb