url_trimmer 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +11 -1
- data/bin/url-differ +9 -0
- data/lib/url_trimmer/differ.rb +19 -0
- data/lib/url_trimmer/version.rb +1 -1
- data/spec/lib/url_trimmer/differ_spec.rb +21 -0
- data/url_trimmer.gemspec +1 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04dae9e3b37ea013c12031188024c628f8df916a
|
4
|
+
data.tar.gz: 32d6caa837aeb69f97ab97c2609b6b3a7ac65dc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b7a84541cf9f49aa591fc59ed6063faf3da67ba53cbb6d74ac97cac31b577829460c04a52e0459163096f3693b30825883e5dd4dd9aef585f89fbfb158668258
|
7
|
+
data.tar.gz: 1ba47225a09ddf0d16c233bc59e7680d70550b6f725a895eb4601f185569c4358862090b4b1087a94c070ad1f1a3ee85e005ca83d261f560908a2eb927a2061c
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,17 @@ Install it yourself as:
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
-
|
13
|
+
### URL Trimmer
|
14
|
+
|
15
|
+
```bash
|
16
|
+
url-trimmer urls1.txt urls2.txt .. urlsN.txt > unique-urls.txt
|
17
|
+
```
|
18
|
+
|
19
|
+
### URL Differ
|
20
|
+
|
21
|
+
```bash
|
22
|
+
url-differ urls1.txt urls2.txt > urls2-unique.txt
|
23
|
+
```
|
14
24
|
|
15
25
|
## Contributing
|
16
26
|
|
data/bin/url-differ
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative "../lib/url_trimmer/differ"
|
4
|
+
|
5
|
+
file1, file2 = ARGV[0], ARGV[1]
|
6
|
+
urls1 = File.readlines(file1).reject { |line| line.empty? }.map(&:chomp)
|
7
|
+
urls2 = File.readlines(file2).reject { |line| line.empty? }.map(&:chomp)
|
8
|
+
unique_urls = URLTrimmer::Differ.between(urls1, urls2)
|
9
|
+
print "#{unique_urls.join("\n")}\n"
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative "../url_trimmer"
|
2
|
+
|
3
|
+
module URLTrimmer
|
4
|
+
class Differ
|
5
|
+
def self.between(urls1, urls2)
|
6
|
+
unique_urls1 = Worker.uniq_by_domain(urls1)
|
7
|
+
unique_urls1.map! { |url| url[Worker::URL_REGEXP, 0] }
|
8
|
+
domains1 = unique_urls1.map { |url| DomainName(url).domain }
|
9
|
+
|
10
|
+
unique_urls2 = Worker.uniq_by_domain(urls2)
|
11
|
+
unique_urls2.reject! do |url|
|
12
|
+
u = url[Worker::URL_REGEXP, 0]
|
13
|
+
domains1.include?(DomainName(u).domain)
|
14
|
+
end
|
15
|
+
unique_urls2.sort!
|
16
|
+
unique_urls2
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/url_trimmer/version.rb
CHANGED
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.expand_path("spec/spec_helper")
|
2
|
+
require File.expand_path("lib/url_trimmer/differ")
|
3
|
+
|
4
|
+
module URLTrimmer
|
5
|
+
describe Differ do
|
6
|
+
describe ".between(urls1, urls2)" do
|
7
|
+
let(:urls1) { %w(http://www.google.com/1 https://www.google.com.ar/2) }
|
8
|
+
let(:urls2) do
|
9
|
+
%w(https://www.google.com/3 http://www.google.com.ar/4 http://www.google.com.br/5 http://www.example.com/6)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "removes URLs from urls2 already present in urls1" do
|
13
|
+
unique_urls = Differ.between(urls1, urls2)
|
14
|
+
|
15
|
+
unique_urls.size.must_equal 2
|
16
|
+
unique_urls.must_include("http://www.google.com.br/5")
|
17
|
+
unique_urls.must_include("http://www.example.com/6")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/url_trimmer.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_trimmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cristian Rasch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: domain_name
|
@@ -52,10 +52,25 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 5.3.5
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 5.3.5
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- cristian@box.cristianrasch.com.ar
|
58
72
|
executables:
|
73
|
+
- url-differ
|
59
74
|
- url-trimmer
|
60
75
|
extensions: []
|
61
76
|
extra_rdoc_files: []
|
@@ -66,9 +81,12 @@ files:
|
|
66
81
|
- LICENSE.txt
|
67
82
|
- README.md
|
68
83
|
- Rakefile
|
84
|
+
- bin/url-differ
|
69
85
|
- bin/url-trimmer
|
70
86
|
- lib/url_trimmer.rb
|
87
|
+
- lib/url_trimmer/differ.rb
|
71
88
|
- lib/url_trimmer/version.rb
|
89
|
+
- spec/lib/url_trimmer/differ_spec.rb
|
72
90
|
- spec/lib/url_trimmer_spec.rb
|
73
91
|
- spec/spec_helper.rb
|
74
92
|
- url_trimmer.gemspec
|
@@ -98,5 +116,6 @@ specification_version: 4
|
|
98
116
|
summary: Reads in plain text files with one URL per line and outputs a list of unique
|
99
117
|
URLs by domain
|
100
118
|
test_files:
|
119
|
+
- spec/lib/url_trimmer/differ_spec.rb
|
101
120
|
- spec/lib/url_trimmer_spec.rb
|
102
121
|
- spec/spec_helper.rb
|