crawl 1.1.4 → 1.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/crawl/engine.rb +2 -2
- data/lib/crawl/page.rb +6 -12
- data/lib/crawl/version.rb +1 -1
- data/spec/page_spec.rb +2 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd091ac6b2679c9dbe5336a73dfd7acab9e9ee65
|
4
|
+
data.tar.gz: 6457eced0c6e538809ad22fc5045615d37ce8a78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5538098c9d5b5f6785524e1378564a7121443dca24c63eeb4ffb9f12721afbd9d09db08e3bbea22be560917468405bb519e1d09f24c683316e085d2ddf3e17a2
|
7
|
+
data.tar.gz: 3f53020b8432926125b338f5114bdcdeb35560b7060d6d123855f5d69671a8398710d0b9a6a3d1e9ae333ba6635d03570218ac0c2339804c0d1109063bc48b1b
|
data/lib/crawl/engine.rb
CHANGED
@@ -21,7 +21,7 @@ class Crawl::Engine
|
|
21
21
|
@authorization = Base64.encode64("#{options[:username]}:#{options[:password]}")
|
22
22
|
@register = Crawl::Register.new
|
23
23
|
|
24
|
-
start_pages = options[:start].to_a.map{|page| Page.new(@register, page, '
|
24
|
+
start_pages = options[:start].to_a.map{|page| Page.new(@register, page, '/')}
|
25
25
|
|
26
26
|
@register.add(start_pages)
|
27
27
|
end
|
@@ -109,4 +109,4 @@ private
|
|
109
109
|
raw_links.delete_if{|link| IGNORE.any?{|pattern| link =~ pattern}}
|
110
110
|
raw_links.map{ |url| Page.new(@register, url, page.url) }
|
111
111
|
end
|
112
|
-
end
|
112
|
+
end
|
data/lib/crawl/page.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
1
3
|
class Page
|
2
4
|
include Comparable
|
3
5
|
|
@@ -14,27 +16,19 @@ class Page
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def relative_url
|
17
|
-
|
18
|
-
url
|
19
|
-
else
|
20
|
-
"#{source_directory}/#{url}"
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def source_directory
|
25
|
-
File.split(source).first.sub(/^\./, '').sub(/\/$/, '')
|
19
|
+
@relative_url ||= URI.join('http://example.com', source, url).path
|
26
20
|
end
|
27
21
|
|
28
22
|
def <=>(other)
|
29
|
-
|
23
|
+
relative_url <=> other.relative_url
|
30
24
|
end
|
31
25
|
|
32
26
|
def eql?(other)
|
33
|
-
|
27
|
+
relative_url.eql?(other.relative_url)
|
34
28
|
end
|
35
29
|
|
36
30
|
def hash
|
37
|
-
|
31
|
+
relative_url.hash
|
38
32
|
end
|
39
33
|
|
40
34
|
def success
|
data/lib/crawl/version.rb
CHANGED
data/spec/page_spec.rb
CHANGED
@@ -3,9 +3,11 @@ require './lib/crawl/page'
|
|
3
3
|
RSpec.describe Page do
|
4
4
|
describe "#relative_url" do
|
5
5
|
specify { expect(Page.new(:register, "/", "/").relative_url).to eq "/" }
|
6
|
+
specify { expect(Page.new(:register, "./", "/").relative_url).to eq "/" }
|
6
7
|
specify { expect(Page.new(:register, "page.html", "").relative_url).to eq "/page.html" }
|
7
8
|
specify { expect(Page.new(:register, "/interview", "/").relative_url).to eq "/interview" }
|
8
9
|
specify { expect(Page.new(:register, "overview.html", "/").relative_url).to eq "/overview.html" }
|
9
10
|
specify { expect(Page.new(:register, "post-5.html", "/posts/index.html").relative_url).to eq "/posts/post-5.html" }
|
11
|
+
specify { expect(Page.new(:register, "https://staging.alphasights.com/careers/meet-us", "/posts/foo").relative_url).to eq "/careers/meet-us" }
|
10
12
|
end
|
11
13
|
end
|