url_trimmer 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 208db8066a5128f36d5d2a23435569e278346eb4
4
- data.tar.gz: 353abe12c38767112e252f3d709fc78ce1a0fa2e
3
+ metadata.gz: 04dae9e3b37ea013c12031188024c628f8df916a
4
+ data.tar.gz: 32d6caa837aeb69f97ab97c2609b6b3a7ac65dc7
5
5
  SHA512:
6
- metadata.gz: 626f3ad8f74050eb434382f8f40cdc2830dfac10812044db58ba99d0e9a0662eed8e78f705c25d5a32ce2b1ee6e405e6accc37e5d36df59a38f7ec71291a668c
7
- data.tar.gz: b20cafc045eb1cf0cad0b8b2cb9e6fe60bb6c36de85efb51d0571959b9c41f008a1d49a044c1f087d30e7d35c5492a90646590de40c400b98f2a621c91f5a349
6
+ metadata.gz: b7a84541cf9f49aa591fc59ed6063faf3da67ba53cbb6d74ac97cac31b577829460c04a52e0459163096f3693b30825883e5dd4dd9aef585f89fbfb158668258
7
+ data.tar.gz: 1ba47225a09ddf0d16c233bc59e7680d70550b6f725a895eb4601f185569c4358862090b4b1087a94c070ad1f1a3ee85e005ca83d261f560908a2eb927a2061c
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmp
22
22
  mkmf.log
23
23
  *.swp
24
24
  .rbenv-gemsets
25
+ *.komodoproject
data/README.md CHANGED
@@ -10,7 +10,17 @@ Install it yourself as:
10
10
 
11
11
  ## Usage
12
12
 
13
- url-trimmer urls1.txt urls2.txt
13
+ ### URL Trimmer
14
+
15
+ ```bash
16
+ url-trimmer urls1.txt urls2.txt .. urlsN.txt > unique-urls.txt
17
+ ```
18
+
19
+ ### URL Differ
20
+
21
+ ```bash
22
+ url-differ urls1.txt urls2.txt > urls2-unique.txt
23
+ ```
14
24
 
15
25
  ## Contributing
16
26
 
data/bin/url-differ ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/url_trimmer/differ"
4
+
5
+ file1, file2 = ARGV[0], ARGV[1]
6
+ urls1 = File.readlines(file1).reject { |line| line.empty? }.map(&:chomp)
7
+ urls2 = File.readlines(file2).reject { |line| line.empty? }.map(&:chomp)
8
+ unique_urls = URLTrimmer::Differ.between(urls1, urls2)
9
+ print "#{unique_urls.join("\n")}\n"
@@ -0,0 +1,19 @@
1
+ require_relative "../url_trimmer"
2
+
3
+ module URLTrimmer
4
+ class Differ
5
+ def self.between(urls1, urls2)
6
+ unique_urls1 = Worker.uniq_by_domain(urls1)
7
+ unique_urls1.map! { |url| url[Worker::URL_REGEXP, 0] }
8
+ domains1 = unique_urls1.map { |url| DomainName(url).domain }
9
+
10
+ unique_urls2 = Worker.uniq_by_domain(urls2)
11
+ unique_urls2.reject! do |url|
12
+ u = url[Worker::URL_REGEXP, 0]
13
+ domains1.include?(DomainName(u).domain)
14
+ end
15
+ unique_urls2.sort!
16
+ unique_urls2
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,3 @@
1
1
  module URLTrimmer
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,21 @@
1
+ require File.expand_path("spec/spec_helper")
2
+ require File.expand_path("lib/url_trimmer/differ")
3
+
4
+ module URLTrimmer
5
+ describe Differ do
6
+ describe ".between(urls1, urls2)" do
7
+ let(:urls1) { %w(http://www.google.com/1 https://www.google.com.ar/2) }
8
+ let(:urls2) do
9
+ %w(https://www.google.com/3 http://www.google.com.ar/4 http://www.google.com.br/5 http://www.example.com/6)
10
+ end
11
+
12
+ it "removes URLs from urls2 already present in urls1" do
13
+ unique_urls = Differ.between(urls1, urls2)
14
+
15
+ unique_urls.size.must_equal 2
16
+ unique_urls.must_include("http://www.google.com.br/5")
17
+ unique_urls.must_include("http://www.example.com/6")
18
+ end
19
+ end
20
+ end
21
+ end
data/url_trimmer.gemspec CHANGED
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_development_dependency "bundler", "~> 1.6"
23
23
  spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "minitest", "~> 5.3.5"
24
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_trimmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: domain_name
@@ -52,10 +52,25 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 5.3.5
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 5.3.5
55
69
  description:
56
70
  email:
57
71
  - cristian@box.cristianrasch.com.ar
58
72
  executables:
73
+ - url-differ
59
74
  - url-trimmer
60
75
  extensions: []
61
76
  extra_rdoc_files: []
@@ -66,9 +81,12 @@ files:
66
81
  - LICENSE.txt
67
82
  - README.md
68
83
  - Rakefile
84
+ - bin/url-differ
69
85
  - bin/url-trimmer
70
86
  - lib/url_trimmer.rb
87
+ - lib/url_trimmer/differ.rb
71
88
  - lib/url_trimmer/version.rb
89
+ - spec/lib/url_trimmer/differ_spec.rb
72
90
  - spec/lib/url_trimmer_spec.rb
73
91
  - spec/spec_helper.rb
74
92
  - url_trimmer.gemspec
@@ -98,5 +116,6 @@ specification_version: 4
98
116
  summary: Reads in plain text files with one URL per line and outputs a list of unique
99
117
  URLs by domain
100
118
  test_files:
119
+ - spec/lib/url_trimmer/differ_spec.rb
101
120
  - spec/lib/url_trimmer_spec.rb
102
121
  - spec/spec_helper.rb