rubyretriever 1.4.5 → 1.4.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/retriever/page.rb +7 -1
- data/lib/retriever/target.rb +2 -1
- data/lib/retriever/version.rb +1 -1
- data/spec/target_spec.rb +8 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6c43cec9b76bcc85b87dbe4c38591aca4e200a3
|
4
|
+
data.tar.gz: e9c88fcc10de3f42460c39400ddab2391ebd9ef9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a2120f7f3836dcdb94eb3ed9a8f51823acc54a5311632e6e77e44188d0cfe84da40448e2cae80d744c251ef36491fe1434f2c7dcc79bf690f704ef3fd2893e2
|
7
|
+
data.tar.gz: f8f3672c903876edb086aeac102303f4ed217173957daf7cb8bb26ef5f0958554219e0405f27e808d43a5bb2cc2f05d50dad5b18f9d3a7ba532e2e4017e45dd5
|
data/lib/retriever/page.rb
CHANGED
@@ -52,10 +52,16 @@ module Retriever
|
|
52
52
|
# meant to be a loose filter to catch all reasonable HREF attributes.
|
53
53
|
link = match[0]
|
54
54
|
next if HASH_RE =~ link
|
55
|
-
Link.new(@t.scheme,
|
55
|
+
Link.new(@t.scheme, host_with_port, link, @url).path
|
56
56
|
end.compact.uniq
|
57
57
|
end
|
58
58
|
|
59
|
+
def host_with_port
|
60
|
+
return @t.host if @t.port.nil?
|
61
|
+
|
62
|
+
@t.host + ':' + @t.port.to_s
|
63
|
+
end
|
64
|
+
|
59
65
|
def parse_internal
|
60
66
|
links.select do |x|
|
61
67
|
@t.host == Addressable::URI.parse(Addressable::URI.encode(x)).host
|
data/lib/retriever/target.rb
CHANGED
@@ -6,7 +6,7 @@ module Retriever
|
|
6
6
|
class Target
|
7
7
|
HTTP_RE = Regexp.new(/^http/i).freeze
|
8
8
|
|
9
|
-
attr_reader :host, :target, :host_re, :source, :file_re, :scheme
|
9
|
+
attr_reader :host, :target, :host_re, :source, :file_re, :scheme, :port
|
10
10
|
|
11
11
|
def initialize(url, file_re = nil)
|
12
12
|
fail 'Bad URL' unless url.include?('.')
|
@@ -17,6 +17,7 @@ module Retriever
|
|
17
17
|
@host_re = Regexp.new(@host.sub('www.', ''))
|
18
18
|
@file_re ||= file_re
|
19
19
|
@scheme = target_uri.scheme
|
20
|
+
@port = target_uri.port
|
20
21
|
end
|
21
22
|
|
22
23
|
def source
|
data/lib/retriever/version.rb
CHANGED
data/spec/target_spec.rb
CHANGED
@@ -18,6 +18,14 @@ describe 'Target' do
|
|
18
18
|
expect(t.host_re).to eq(/cnet.com/)
|
19
19
|
end
|
20
20
|
|
21
|
+
it 'creates port var (no port specified)' do
|
22
|
+
expect(t.port).to be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'creates port var (with port specified)' do
|
26
|
+
expect(Retriever::Target.new('http://www.cnet.com:3000/reviews/', /\.exe\z/).port).to be(3000)
|
27
|
+
end
|
28
|
+
|
21
29
|
it 'creates file_re var (when provided)' do
|
22
30
|
expect(t.file_re).to eq(/\.exe\z/)
|
23
31
|
end
|