url_trimmer 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/url_trimmer.rb +5 -3
- data/lib/url_trimmer/version.rb +1 -1
- data/spec/lib/url_trimmer_spec.rb +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 208db8066a5128f36d5d2a23435569e278346eb4
|
4
|
+
data.tar.gz: 353abe12c38767112e252f3d709fc78ce1a0fa2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 626f3ad8f74050eb434382f8f40cdc2830dfac10812044db58ba99d0e9a0662eed8e78f705c25d5a32ce2b1ee6e405e6accc37e5d36df59a38f7ec71291a668c
|
7
|
+
data.tar.gz: b20cafc045eb1cf0cad0b8b2cb9e6fe60bb6c36de85efb51d0571959b9c41f008a1d49a044c1f087d30e7d35c5492a90646590de40c400b98f2a621c91f5a349
|
data/lib/url_trimmer.rb
CHANGED
@@ -3,6 +3,8 @@ require "domain_name"
|
|
3
3
|
|
4
4
|
module URLTrimmer
|
5
5
|
class Worker
|
6
|
+
URL_REGEXP = %r(\Ahttps?://([^/]+))
|
7
|
+
|
6
8
|
def self.uniq_by_domain(urls)
|
7
9
|
urls.map! do |url|
|
8
10
|
begin
|
@@ -11,9 +13,9 @@ module URLTrimmer
|
|
11
13
|
url.encode("UTF-8", invalid: :replace, undef: :replace, replace: "").downcase
|
12
14
|
end
|
13
15
|
end
|
14
|
-
urls.
|
15
|
-
urls.
|
16
|
-
urls.
|
16
|
+
urls.select! { |url| url =~ URL_REGEXP }
|
17
|
+
urls.uniq! { |url| DomainName(url[URL_REGEXP, 1]).domain }
|
18
|
+
urls.sort!
|
17
19
|
urls
|
18
20
|
end
|
19
21
|
end
|
data/lib/url_trimmer/version.rb
CHANGED
@@ -5,16 +5,16 @@ module URLTrimmer
|
|
5
5
|
describe Worker do
|
6
6
|
let(:urls) do
|
7
7
|
["http://www.google.com.ar/blah1", "https://www.google.com.ar/blah2", "https://www.google.com.br/blah3",
|
8
|
-
"http://www.google.com/blah4" "https://plus.google.com/blah5"]
|
8
|
+
"http://www.google.com/blah4", "https://plus.google.com/blah5"]
|
9
9
|
end
|
10
10
|
|
11
11
|
it "returns a list of unique URLs by domain" do
|
12
12
|
unique_urls = Worker.uniq_by_domain(urls)
|
13
13
|
|
14
14
|
unique_urls.size.must_equal 3
|
15
|
-
unique_urls.must_include("http://www.google.com.ar")
|
16
|
-
unique_urls.must_include("https://www.google.com.br")
|
17
|
-
unique_urls.must_include("http://www.google.com")
|
15
|
+
unique_urls.must_include("http://www.google.com.ar/blah1")
|
16
|
+
unique_urls.must_include("https://www.google.com.br/blah3")
|
17
|
+
unique_urls.must_include("http://www.google.com/blah4")
|
18
18
|
end
|
19
19
|
end
|
20
20
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_trimmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cristian Rasch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: domain_name
|