url_trimmer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d26f5d3528cba18f8d691a3cabd30320d4aca627
4
+ data.tar.gz: bdefacd3a2b56bde497cdcca064807c1a58728ba
5
+ SHA512:
6
+ metadata.gz: fdf00065dd4cad05963b8b35e1e0f9c7c38ae452c693411f841e0848e8108bf69390a135855718d7c45ac6938fe541e45cacf34a4a7909a7d8693a2737f051ee
7
+ data.tar.gz: 9b2f4c34f990a71e1ce250f3f5aa00e76034cd202ed7d1005719edace747aebce59490eae3e08418ad38d0b7bbedb3f5c9e8df8360e5eff68ae14303b3a143ca
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
23
+ *.swp
24
+ .rbenv-gemsets
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_trimmer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Cristian Rasch
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # URLTrimmer
2
+
3
+ Reads in plain text files with one URL per line and outputs a list of unique URLs by domain.
4
+
5
+ ## Installation
6
+
7
+ Install it yourself as:
8
+
9
+ $ gem install url_trimmer
10
+
11
+ ## Usage
12
+
13
+ url-trimmer urls1.txt urls2.txt
14
+
15
+ ## Contributing
16
+
17
+ 1. Fork it ( https://github.com/wecodeio/url_trimmer/fork )
18
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
19
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
20
+ 4. Push to the branch (`git push origin my-new-feature`)
21
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "spec/**/*_spec.rb"
6
+ t.verbose = true
7
+ end
8
+
9
+ task default: :test
data/bin/url-trimmer ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/url_trimmer"
4
+
5
+ urls = ARGF.readlines.reject { |line| line.empty? }.map(&:chomp)
6
+ unique_urls = URLTrimmer::Worker.uniq_by_domain(urls)
7
+ print "#{unique_urls.join("\n")}\n"
@@ -0,0 +1,3 @@
1
+ module URLTrimmer
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,20 @@
1
+ require_relative "url_trimmer/version"
2
+ require "domain_name"
3
+
4
+ module URLTrimmer
5
+ class Worker
6
+ def self.uniq_by_domain(urls)
7
+ urls.map! do |url|
8
+ begin
9
+ url.downcase
10
+ rescue ArgumentError
11
+ url.encode("UTF-8", invalid: :replace, undef: :replace, replace: "").downcase
12
+ end
13
+ end
14
+ urls.map! { |url| url[%r(\Ahttps?://[^/]+), 0] }
15
+ urls.compact!
16
+ urls.uniq! { |url| DomainName(url).domain }
17
+ urls
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ require_relative "../spec_helper"
2
+ require_relative "../../lib/url_trimmer"
3
+
4
+ module URLTrimmer
5
+ describe Worker do
6
+ let(:urls) do
7
+ ["http://www.google.com.ar/blah1", "https://www.google.com.ar/blah2", "https://www.google.com.br/blah3",
8
+ "http://www.google.com/blah4" "https://plus.google.com/blah5"]
9
+ end
10
+
11
+ it "returns a list of unique URLs by domain" do
12
+ unique_urls = Worker.uniq_by_domain(urls)
13
+
14
+ unique_urls.size.must_equal 3
15
+ unique_urls.must_include("http://www.google.com.ar")
16
+ unique_urls.must_include("https://www.google.com.br")
17
+ unique_urls.must_include("http://www.google.com")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,2 @@
1
+ require "minitest/autorun"
2
+ require "minitest/pride"
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url_trimmer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url_trimmer"
8
+ spec.version = URLTrimmer::VERSION
9
+ spec.authors = ["Cristian Rasch"]
10
+ spec.email = ["cristian@box.cristianrasch.com.ar"]
11
+ spec.summary = %q{Reads in plain text files with one URL per line and outputs a list of unique URLs by domain}
12
+ spec.homepage = "https://github.com/wecodeio/url_trimmer"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_runtime_dependency "domain_name"
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_trimmer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Cristian Rasch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: domain_name
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - cristian@box.cristianrasch.com.ar
58
+ executables:
59
+ - url-trimmer
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - bin/url-trimmer
70
+ - lib/url_trimmer.rb
71
+ - lib/url_trimmer/version.rb
72
+ - spec/lib/url_trimmer_spec.rb
73
+ - spec/spec_helper.rb
74
+ - url_trimmer.gemspec
75
+ homepage: https://github.com/wecodeio/url_trimmer
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.2.2
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: Reads in plain text files with one URL per line and outputs a list of unique
99
+ URLs by domain
100
+ test_files:
101
+ - spec/lib/url_trimmer_spec.rb
102
+ - spec/spec_helper.rb