url_trimmer 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 208db8066a5128f36d5d2a23435569e278346eb4
4
- data.tar.gz: 353abe12c38767112e252f3d709fc78ce1a0fa2e
3
+ metadata.gz: 04dae9e3b37ea013c12031188024c628f8df916a
4
+ data.tar.gz: 32d6caa837aeb69f97ab97c2609b6b3a7ac65dc7
5
5
  SHA512:
6
- metadata.gz: 626f3ad8f74050eb434382f8f40cdc2830dfac10812044db58ba99d0e9a0662eed8e78f705c25d5a32ce2b1ee6e405e6accc37e5d36df59a38f7ec71291a668c
7
- data.tar.gz: b20cafc045eb1cf0cad0b8b2cb9e6fe60bb6c36de85efb51d0571959b9c41f008a1d49a044c1f087d30e7d35c5492a90646590de40c400b98f2a621c91f5a349
6
+ metadata.gz: b7a84541cf9f49aa591fc59ed6063faf3da67ba53cbb6d74ac97cac31b577829460c04a52e0459163096f3693b30825883e5dd4dd9aef585f89fbfb158668258
7
+ data.tar.gz: 1ba47225a09ddf0d16c233bc59e7680d70550b6f725a895eb4601f185569c4358862090b4b1087a94c070ad1f1a3ee85e005ca83d261f560908a2eb927a2061c
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmp
22
22
  mkmf.log
23
23
  *.swp
24
24
  .rbenv-gemsets
25
+ *.komodoproject
data/README.md CHANGED
@@ -10,7 +10,17 @@ Install it yourself as:
10
10
 
11
11
  ## Usage
12
12
 
13
- url-trimmer urls1.txt urls2.txt
13
+ ### URL Trimmer
14
+
15
+ ```bash
16
+ url-trimmer urls1.txt urls2.txt .. urlsN.txt > unique-urls.txt
17
+ ```
18
+
19
+ ### URL Differ
20
+
21
+ ```bash
22
+ url-differ urls1.txt urls2.txt > urls2-unique.txt
23
+ ```
14
24
 
15
25
  ## Contributing
16
26
 
data/bin/url-differ ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/url_trimmer/differ"
4
+
5
+ file1, file2 = ARGV[0], ARGV[1]
6
+ urls1 = File.readlines(file1).reject { |line| line.empty? }.map(&:chomp)
7
+ urls2 = File.readlines(file2).reject { |line| line.empty? }.map(&:chomp)
8
+ unique_urls = URLTrimmer::Differ.between(urls1, urls2)
9
+ print "#{unique_urls.join("\n")}\n"
@@ -0,0 +1,19 @@
1
+ require_relative "../url_trimmer"
2
+
3
+ module URLTrimmer
4
+ class Differ
5
+ def self.between(urls1, urls2)
6
+ unique_urls1 = Worker.uniq_by_domain(urls1)
7
+ unique_urls1.map! { |url| url[Worker::URL_REGEXP, 0] }
8
+ domains1 = unique_urls1.map { |url| DomainName(url).domain }
9
+
10
+ unique_urls2 = Worker.uniq_by_domain(urls2)
11
+ unique_urls2.reject! do |url|
12
+ u = url[Worker::URL_REGEXP, 0]
13
+ domains1.include?(DomainName(u).domain)
14
+ end
15
+ unique_urls2.sort!
16
+ unique_urls2
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,3 @@
1
1
  module URLTrimmer
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,21 @@
1
+ require File.expand_path("spec/spec_helper")
2
+ require File.expand_path("lib/url_trimmer/differ")
3
+
4
+ module URLTrimmer
5
+ describe Differ do
6
+ describe ".between(urls1, urls2)" do
7
+ let(:urls1) { %w(http://www.google.com/1 https://www.google.com.ar/2) }
8
+ let(:urls2) do
9
+ %w(https://www.google.com/3 http://www.google.com.ar/4 http://www.google.com.br/5 http://www.example.com/6)
10
+ end
11
+
12
+ it "removes URLs from urls2 already present in urls1" do
13
+ unique_urls = Differ.between(urls1, urls2)
14
+
15
+ unique_urls.size.must_equal 2
16
+ unique_urls.must_include("http://www.google.com.br/5")
17
+ unique_urls.must_include("http://www.example.com/6")
18
+ end
19
+ end
20
+ end
21
+ end
data/url_trimmer.gemspec CHANGED
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_development_dependency "bundler", "~> 1.6"
23
23
  spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "minitest", "~> 5.3.5"
24
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_trimmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: domain_name
@@ -52,10 +52,25 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 5.3.5
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 5.3.5
55
69
  description:
56
70
  email:
57
71
  - cristian@box.cristianrasch.com.ar
58
72
  executables:
73
+ - url-differ
59
74
  - url-trimmer
60
75
  extensions: []
61
76
  extra_rdoc_files: []
@@ -66,9 +81,12 @@ files:
66
81
  - LICENSE.txt
67
82
  - README.md
68
83
  - Rakefile
84
+ - bin/url-differ
69
85
  - bin/url-trimmer
70
86
  - lib/url_trimmer.rb
87
+ - lib/url_trimmer/differ.rb
71
88
  - lib/url_trimmer/version.rb
89
+ - spec/lib/url_trimmer/differ_spec.rb
72
90
  - spec/lib/url_trimmer_spec.rb
73
91
  - spec/spec_helper.rb
74
92
  - url_trimmer.gemspec
@@ -98,5 +116,6 @@ specification_version: 4
98
116
  summary: Reads in plain text files with one URL per line and outputs a list of unique
99
117
  URLs by domain
100
118
  test_files:
119
+ - spec/lib/url_trimmer/differ_spec.rb
101
120
  - spec/lib/url_trimmer_spec.rb
102
121
  - spec/spec_helper.rb