rubyretriever 1.4.5 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/retriever/page.rb +7 -1
- data/lib/retriever/target.rb +2 -1
- data/lib/retriever/version.rb +1 -1
- data/spec/target_spec.rb +8 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6c43cec9b76bcc85b87dbe4c38591aca4e200a3
|
4
|
+
data.tar.gz: e9c88fcc10de3f42460c39400ddab2391ebd9ef9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a2120f7f3836dcdb94eb3ed9a8f51823acc54a5311632e6e77e44188d0cfe84da40448e2cae80d744c251ef36491fe1434f2c7dcc79bf690f704ef3fd2893e2
|
7
|
+
data.tar.gz: f8f3672c903876edb086aeac102303f4ed217173957daf7cb8bb26ef5f0958554219e0405f27e808d43a5bb2cc2f05d50dad5b18f9d3a7ba532e2e4017e45dd5
|
data/lib/retriever/page.rb
CHANGED
@@ -52,10 +52,16 @@ module Retriever
|
|
52
52
|
# meant to be a loose filter to catch all reasonable HREF attributes.
|
53
53
|
link = match[0]
|
54
54
|
next if HASH_RE =~ link
|
55
|
-
Link.new(@t.scheme,
|
55
|
+
Link.new(@t.scheme, host_with_port, link, @url).path
|
56
56
|
end.compact.uniq
|
57
57
|
end
|
58
58
|
|
59
|
+
def host_with_port
|
60
|
+
return @t.host if @t.port.nil?
|
61
|
+
|
62
|
+
@t.host + ':' + @t.port.to_s
|
63
|
+
end
|
64
|
+
|
59
65
|
def parse_internal
|
60
66
|
links.select do |x|
|
61
67
|
@t.host == Addressable::URI.parse(Addressable::URI.encode(x)).host
|
data/lib/retriever/target.rb
CHANGED
@@ -6,7 +6,7 @@ module Retriever
|
|
6
6
|
class Target
|
7
7
|
HTTP_RE = Regexp.new(/^http/i).freeze
|
8
8
|
|
9
|
-
attr_reader :host, :target, :host_re, :source, :file_re, :scheme
|
9
|
+
attr_reader :host, :target, :host_re, :source, :file_re, :scheme, :port
|
10
10
|
|
11
11
|
def initialize(url, file_re = nil)
|
12
12
|
fail 'Bad URL' unless url.include?('.')
|
@@ -17,6 +17,7 @@ module Retriever
|
|
17
17
|
@host_re = Regexp.new(@host.sub('www.', ''))
|
18
18
|
@file_re ||= file_re
|
19
19
|
@scheme = target_uri.scheme
|
20
|
+
@port = target_uri.port
|
20
21
|
end
|
21
22
|
|
22
23
|
def source
|
data/lib/retriever/version.rb
CHANGED
data/spec/target_spec.rb
CHANGED
@@ -18,6 +18,14 @@ describe 'Target' do
|
|
18
18
|
expect(t.host_re).to eq(/cnet.com/)
|
19
19
|
end
|
20
20
|
|
21
|
+
it 'creates port var (no port specified)' do
|
22
|
+
expect(t.port).to be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'creates port var (with port specified)' do
|
26
|
+
expect(Retriever::Target.new('http://www.cnet.com:3000/reviews/', /\.exe\z/).port).to be(3000)
|
27
|
+
end
|
28
|
+
|
21
29
|
it 'creates file_re var (when provided)' do
|
22
30
|
expect(t.file_re).to eq(/\.exe\z/)
|
23
31
|
end
|