spidr 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ChangeLog.md +69 -54
- data/Gemfile +9 -5
- data/LICENSE.txt +1 -1
- data/README.md +34 -26
- data/Rakefile +4 -15
- data/gemspec.yml +3 -2
- data/lib/spidr/agent.rb +101 -44
- data/lib/spidr/{actions → agent}/actions.rb +32 -12
- data/lib/spidr/{events.rb → agent/events.rb} +4 -8
- data/lib/spidr/{filters.rb → agent/filters.rb} +14 -16
- data/lib/spidr/{sanitizers.rb → agent/sanitizers.rb} +5 -7
- data/lib/spidr/auth_store.rb +2 -2
- data/lib/spidr/cookie_jar.rb +2 -2
- data/lib/spidr/extensions/uri.rb +28 -16
- data/lib/spidr/page.rb +7 -11
- data/lib/spidr/{body.rb → page/body.rb} +1 -1
- data/lib/spidr/{headers.rb → page/headers.rb} +1 -1
- data/lib/spidr/{links.rb → page/links.rb} +43 -7
- data/lib/spidr/session_cache.rb +2 -2
- data/lib/spidr/spidr.rb +32 -5
- data/lib/spidr/version.rb +1 -1
- data/spec/agent/actions_spec.rb +60 -0
- data/spec/agent/filters_spec.rb +62 -0
- data/spec/agent/sanitizers_spec.rb +62 -0
- data/spec/agent_spec.rb +13 -13
- data/spec/auth_store_spec.rb +17 -17
- data/spec/cookie_jar_spec.rb +26 -26
- data/spec/extensions/uri_spec.rb +19 -9
- data/spec/helpers/history.rb +5 -5
- data/spec/helpers/wsoc.rb +2 -2
- data/spec/page_examples.rb +4 -4
- data/spec/page_spec.rb +28 -25
- data/spec/rules_spec.rb +14 -14
- data/spec/session_cache.rb +7 -7
- data/spec/spidr_spec.rb +10 -10
- metadata +37 -51
- data/lib/spidr/actions.rb +0 -2
- data/lib/spidr/actions/exceptions.rb +0 -4
- data/lib/spidr/actions/exceptions/action.rb +0 -9
- data/lib/spidr/actions/exceptions/paused.rb +0 -11
- data/lib/spidr/actions/exceptions/skip_link.rb +0 -12
- data/lib/spidr/actions/exceptions/skip_page.rb +0 -12
- data/spec/actions_spec.rb +0 -59
- data/spec/filters_spec.rb +0 -61
- data/spec/sanitizers_spec.rb +0 -61
data/spec/extensions/uri_spec.rb
CHANGED
@@ -5,39 +5,49 @@ require 'spec_helper'
|
|
5
5
|
describe URI do
|
6
6
|
describe "expand_path" do
|
7
7
|
it "should preserve single directory paths" do
|
8
|
-
URI.expand_path('path').
|
8
|
+
expect(URI.expand_path('path')).to eq('path')
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should preserve trailing '/'" do
|
12
|
-
URI.expand_path('test/path/').
|
12
|
+
expect(URI.expand_path('test/path/')).to eq('test/path/')
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should remove multiple '/' characters" do
|
16
|
-
URI.expand_path('///test///path///').
|
16
|
+
expect(URI.expand_path('///test///path///')).to eq('/test/path/')
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should remove '.' directories from the path" do
|
20
|
-
URI.expand_path('test/./path').
|
20
|
+
expect(URI.expand_path('test/./path')).to eq('test/path')
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should handle '..' directories properly" do
|
24
|
-
URI.expand_path('test/../path').
|
24
|
+
expect(URI.expand_path('test/../path')).to eq('path')
|
25
25
|
end
|
26
26
|
|
27
27
|
it "should limit the number of '..' directories resolved" do
|
28
|
-
URI.expand_path('/test/../../../..').
|
28
|
+
expect(URI.expand_path('/test/../../../..')).to eq('/')
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should preserve leading '/'" do
|
32
|
+
expect(URI.expand_path('/../../../foo')).to eq('/foo')
|
29
33
|
end
|
30
34
|
|
31
35
|
it "should preserve absolute paths" do
|
32
|
-
URI.expand_path('/test/path').
|
36
|
+
expect(URI.expand_path('/test/path')).to eq('/test/path')
|
33
37
|
end
|
34
38
|
|
35
39
|
it "should preserve the root path" do
|
36
|
-
URI.expand_path('/').
|
40
|
+
expect(URI.expand_path('/')).to eq('/')
|
37
41
|
end
|
38
42
|
|
39
43
|
it "should default empty paths to the root path" do
|
40
|
-
URI.expand_path('').
|
44
|
+
expect(URI.expand_path('')).to eq('/')
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should default zero-sum paths to a '/'" do
|
48
|
+
expect(URI.expand_path('foo/..')).to eq('/')
|
49
|
+
expect(URI.expand_path('foo/../bar/..')).to eq('/')
|
50
|
+
expect(URI.expand_path('././././.')).to eq('/')
|
41
51
|
end
|
42
52
|
end
|
43
53
|
end
|
data/spec/helpers/history.rb
CHANGED
@@ -15,20 +15,20 @@ module Helpers
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def should_visit_link(url)
|
18
|
-
visited_link?(url).
|
18
|
+
expect(visited_link?(url)).to eq(true)
|
19
19
|
end
|
20
20
|
|
21
21
|
def should_ignore_link(url)
|
22
|
-
visited_link?(url).
|
22
|
+
expect(visited_link?(url)).to eq(false)
|
23
23
|
end
|
24
24
|
|
25
25
|
def should_visit_once(url)
|
26
|
-
visited_once?(url).
|
26
|
+
expect(visited_once?(url)).to eq(true)
|
27
27
|
end
|
28
28
|
|
29
29
|
def should_fail_link(url)
|
30
|
-
visited_link?(url).
|
31
|
-
visit_failed?(url).
|
30
|
+
expect(visited_link?(url)).to eq(false)
|
31
|
+
expect(visit_failed?(url)).to eq(true)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
data/spec/helpers/wsoc.rb
CHANGED
@@ -9,8 +9,8 @@ module Helpers
|
|
9
9
|
include History
|
10
10
|
|
11
11
|
SERVER_URL = URI::HTTP.build(
|
12
|
-
:
|
13
|
-
:
|
12
|
+
host: (ENV['HOST'] || ::WSOC::Config::DEFAULT_HOST),
|
13
|
+
port: (ENV['PORT'] || ::WSOC::Config::DEFAULT_PORT)
|
14
14
|
)
|
15
15
|
|
16
16
|
SPECS_URL = SERVER_URL.merge(::WSOC::Config::SPECS_PATHS[:json])
|
data/spec/page_examples.rb
CHANGED
@@ -4,18 +4,18 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
shared_examples_for "Page" do
|
6
6
|
it "should have a status code" do
|
7
|
-
@page.code.
|
7
|
+
expect(@page.code).to be_integer
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should have a body" do
|
11
|
-
@page.body.
|
11
|
+
expect(@page.body).not_to be_empty
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should provide transparent access to the response headers" do
|
15
|
-
@page.content_type.
|
15
|
+
expect(@page.content_type).to eq(@page.response['Content-Type'])
|
16
16
|
end
|
17
17
|
|
18
18
|
it "should allow content-types" do
|
19
|
-
@page.content_types.
|
19
|
+
expect(@page.content_types).not_to be_empty
|
20
20
|
end
|
21
21
|
end
|
data/spec/page_spec.rb
CHANGED
@@ -13,84 +13,87 @@ describe Page do
|
|
13
13
|
it_should_behave_like "Page"
|
14
14
|
|
15
15
|
it "should be OK" do
|
16
|
-
@page.
|
16
|
+
expect(@page).to be_ok
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should have a content-type" do
|
20
|
-
@page.content_type.
|
20
|
+
expect(@page.content_type).to include('text/html')
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should be a html page" do
|
24
|
-
@page.
|
24
|
+
expect(@page).to be_html
|
25
25
|
end
|
26
26
|
|
27
27
|
it "should have provide a document" do
|
28
|
-
@page.doc.class.
|
28
|
+
expect(@page.doc.class).to eq(Nokogiri::HTML::Document)
|
29
29
|
end
|
30
30
|
|
31
31
|
it "should allow searching the document" do
|
32
|
-
@page.doc.search('//p').length.
|
33
|
-
@page.doc.at('//p[2]').inner_text.
|
32
|
+
expect(@page.doc.search('//p').length).to eq(2)
|
33
|
+
expect(@page.doc.at('//p[2]').inner_text).to eq('Ready! Set! Go!')
|
34
34
|
end
|
35
35
|
|
36
36
|
it "should have a title" do
|
37
|
-
@page.title.
|
37
|
+
expect(@page.title).to eq('Spidr :: Web-Spider Obstacle Course :: Start')
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should have links" do
|
41
|
-
@page.links.
|
41
|
+
expect(@page.links).not_to be_empty
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
45
|
describe "txt" do
|
46
46
|
before(:all) do
|
47
|
-
@page = get_page('
|
47
|
+
@page = get_page('https://www.ruby-lang.org/en/about/license.txt')
|
48
48
|
end
|
49
49
|
|
50
50
|
it_should_behave_like "Page"
|
51
51
|
|
52
52
|
it "should be OK" do
|
53
|
-
@page.
|
53
|
+
expect(@page).to be_ok
|
54
54
|
end
|
55
55
|
|
56
56
|
it "should have a content-type" do
|
57
|
-
@page.content_type.
|
57
|
+
expect(@page.content_type).to include('text/plain')
|
58
58
|
end
|
59
59
|
|
60
60
|
it "should be a txt page" do
|
61
|
-
@page.
|
61
|
+
expect(@page).to be_txt
|
62
62
|
end
|
63
63
|
|
64
64
|
it "should not have provide a document" do
|
65
|
-
@page.doc.
|
65
|
+
expect(@page.doc).to be_nil
|
66
66
|
end
|
67
67
|
|
68
68
|
it "should not allow searching the document" do
|
69
|
-
@page.search('//p').
|
70
|
-
@page.at('//p').
|
69
|
+
expect(@page.search('//p')).to be_empty
|
70
|
+
expect(@page.at('//p')).to be_nil
|
71
71
|
end
|
72
72
|
|
73
73
|
it "should not have links" do
|
74
|
-
@page.links.
|
74
|
+
expect(@page.links).to be_empty
|
75
75
|
end
|
76
76
|
|
77
77
|
it "should not have a title" do
|
78
|
-
@page.title.
|
78
|
+
expect(@page.title).to be_nil
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
82
82
|
describe "redirects" do
|
83
83
|
before(:all) do
|
84
84
|
@page = get_page('http://spidr.rubyforge.org/course/start.html')
|
85
|
-
|
85
|
+
end
|
86
|
+
|
87
|
+
before do
|
88
|
+
allow(@page).to receive(:body).and_return('<meta HTTP-EQUIV="REFRESH" content="0; url=http://spidr.rubyforge.org/redirected">')
|
86
89
|
end
|
87
90
|
|
88
91
|
it "should provide access to page-level redirects" do
|
89
|
-
@page.redirects_to.
|
92
|
+
expect(@page.redirects_to).to eq(['http://spidr.rubyforge.org/redirected'])
|
90
93
|
end
|
91
94
|
|
92
95
|
it "should include meta refresh redirects in the list of links" do
|
93
|
-
@page.links.
|
96
|
+
expect(@page.links).to include('http://spidr.rubyforge.org/redirected')
|
94
97
|
end
|
95
98
|
end
|
96
99
|
|
@@ -102,23 +105,23 @@ describe Page do
|
|
102
105
|
it "should provide access to the raw Cookie" do
|
103
106
|
cookie = @page.cookie
|
104
107
|
|
105
|
-
cookie.
|
106
|
-
cookie.
|
108
|
+
expect(cookie).not_to be_nil
|
109
|
+
expect(cookie).not_to be_empty
|
107
110
|
end
|
108
111
|
|
109
112
|
it "should provide access to the Cookies" do
|
110
113
|
cookies = @page.cookies
|
111
114
|
|
112
|
-
cookies.
|
115
|
+
expect(cookies).not_to be_empty
|
113
116
|
end
|
114
117
|
|
115
118
|
it "should provide access to the key->value pairs within the Cookie" do
|
116
119
|
params = @page.cookie_params
|
117
120
|
|
118
|
-
params.
|
121
|
+
expect(params).not_to be_empty
|
119
122
|
|
120
123
|
params.each do |key,value|
|
121
|
-
key.
|
124
|
+
expect(key).not_to be_empty
|
122
125
|
end
|
123
126
|
end
|
124
127
|
end
|
data/spec/rules_spec.rb
CHANGED
@@ -6,40 +6,40 @@ describe Rules do
|
|
6
6
|
subject { Rules }
|
7
7
|
|
8
8
|
it "should accept data based on acceptance data" do
|
9
|
-
rules = subject.new(:
|
9
|
+
rules = subject.new(accept: [1])
|
10
10
|
|
11
|
-
rules.accept?(1).
|
11
|
+
expect(rules.accept?(1)).to eq(true)
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should accept data based on acceptance regexps" do
|
15
|
-
rules = subject.new(:
|
15
|
+
rules = subject.new(accept: [/1/])
|
16
16
|
|
17
|
-
rules.accept?('1').
|
17
|
+
expect(rules.accept?('1')).to eq(true)
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should match non-Strings using acceptance regexps" do
|
21
|
-
rules = subject.new(:
|
21
|
+
rules = subject.new(accept: [/1/])
|
22
22
|
|
23
|
-
rules.accept?(1).
|
23
|
+
expect(rules.accept?(1)).to eq(true)
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should accept data using acceptance lambdas" do
|
27
|
-
rules = subject.new(:
|
27
|
+
rules = subject.new(accept: [lambda { |data| data > 2 }])
|
28
28
|
|
29
|
-
rules.accept?(3).
|
29
|
+
expect(rules.accept?(3)).to eq(true)
|
30
30
|
end
|
31
31
|
|
32
32
|
it "should reject data that does not match any acceptance patterns" do
|
33
|
-
rules = subject.new(:
|
33
|
+
rules = subject.new(accept: [1, 2, 3])
|
34
34
|
|
35
|
-
rules.accept?(2).
|
36
|
-
rules.accept?(4).
|
35
|
+
expect(rules.accept?(2)).to eq(true)
|
36
|
+
expect(rules.accept?(4)).to eq(false)
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should accept data that does not match any rejection patterns" do
|
40
|
-
rules = subject.new(:
|
40
|
+
rules = subject.new(reject: [1, 2, 3])
|
41
41
|
|
42
|
-
rules.accept?(2).
|
43
|
-
rules.accept?(4).
|
42
|
+
expect(rules.accept?(2)).to eq(false)
|
43
|
+
expect(rules.accept?(4)).to eq(true)
|
44
44
|
end
|
45
45
|
end
|
data/spec/session_cache.rb
CHANGED
@@ -9,11 +9,11 @@ describe SessionCache do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should not have any active sessions" do
|
12
|
-
@sessions.
|
12
|
+
expect(@sessions).not_to be_active(URI('http://example.com/'))
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should start new sessions on-demand" do
|
16
|
-
@sessions[URI('http://example.com/')].
|
16
|
+
expect(@sessions[URI('http://example.com/')]).not_to be_nil
|
17
17
|
end
|
18
18
|
|
19
19
|
after(:all) do
|
@@ -30,25 +30,25 @@ describe SessionCache do
|
|
30
30
|
end
|
31
31
|
|
32
32
|
it "should have active sessions" do
|
33
|
-
@sessions.
|
33
|
+
expect(@sessions).to be_active(@url)
|
34
34
|
end
|
35
35
|
|
36
36
|
it "should provide access to sessions" do
|
37
|
-
@sessions[@url].
|
37
|
+
expect(@sessions[@url]).not_to be_nil
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should start new sessions on-demand" do
|
41
41
|
url2 = URI('http://www.w3c.org/')
|
42
42
|
|
43
|
-
@sessions[url2].
|
43
|
+
expect(@sessions[url2]).not_to be_nil
|
44
44
|
end
|
45
45
|
|
46
46
|
it "should be able to kill sessions" do
|
47
47
|
url2 = URI('http://www.w3c.org/')
|
48
48
|
|
49
|
-
@sessions[url2].
|
49
|
+
expect(@sessions[url2]).not_to be_nil
|
50
50
|
@sessions.kill!(url2)
|
51
|
-
@sessions.
|
51
|
+
expect(@sessions).not_to be_active(url2)
|
52
52
|
end
|
53
53
|
|
54
54
|
after(:all) do
|
data/spec/spidr_spec.rb
CHANGED
@@ -4,36 +4,36 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Spidr do
|
6
6
|
it "should have a VERSION constant" do
|
7
|
-
subject.const_defined?('VERSION').
|
7
|
+
expect(subject.const_defined?('VERSION')).to eq(true)
|
8
8
|
end
|
9
9
|
|
10
10
|
describe "proxy" do
|
11
11
|
after(:all) do
|
12
|
-
|
12
|
+
Spidr.disable_proxy!
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should not have proxy settings by default" do
|
16
|
-
subject.proxy[:host].
|
16
|
+
expect(subject.proxy[:host]).to be_nil
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should allow setting new proxy settings" do
|
20
|
-
subject.proxy = {:
|
20
|
+
subject.proxy = {host: 'example.com', port: 8010}
|
21
21
|
|
22
|
-
subject.proxy[:host].
|
23
|
-
subject.proxy[:port].
|
22
|
+
expect(subject.proxy[:host]).to eq('example.com')
|
23
|
+
expect(subject.proxy[:port]).to eq(8010)
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should default the :port option of new proxy settings" do
|
27
|
-
subject.proxy = {:
|
27
|
+
subject.proxy = {host: 'example.com'}
|
28
28
|
|
29
|
-
subject.proxy[:host].
|
30
|
-
subject.proxy[:port].
|
29
|
+
expect(subject.proxy[:host]).to eq('example.com')
|
30
|
+
expect(subject.proxy[:port]).to eq(Spidr::COMMON_PROXY_PORT)
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should allow disabling the proxy" do
|
34
34
|
subject.disable_proxy!
|
35
35
|
|
36
|
-
subject.proxy[:host].
|
36
|
+
expect(subject.proxy[:host]).to be_nil
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
metadata
CHANGED
@@ -1,49 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.5.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Postmodern
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-01-04 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.3'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: bundler
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- - ~>
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '1.0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: yard
|
38
|
-
requirement: &19473820 !ruby/object:Gem::Requirement
|
39
|
-
none: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
37
|
requirements:
|
41
|
-
- - ~>
|
38
|
+
- - "~>"
|
42
39
|
- !ruby/object:Gem::Version
|
43
|
-
version: '0
|
44
|
-
type: :development
|
45
|
-
prerelease: false
|
46
|
-
version_requirements: *19473820
|
40
|
+
version: '1.0'
|
47
41
|
description: Spidr is a versatile Ruby web spidering library that can spider a site,
|
48
42
|
multiple domains, certain links or infinitely. Spidr is designed to be fast and
|
49
43
|
easy to use.
|
@@ -55,9 +49,9 @@ extra_rdoc_files:
|
|
55
49
|
- LICENSE.txt
|
56
50
|
- README.md
|
57
51
|
files:
|
58
|
-
- .gitignore
|
59
|
-
- .rspec
|
60
|
-
- .yardopts
|
52
|
+
- ".gitignore"
|
53
|
+
- ".rspec"
|
54
|
+
- ".yardopts"
|
61
55
|
- ChangeLog.md
|
62
56
|
- Gemfile
|
63
57
|
- LICENSE.txt
|
@@ -65,81 +59,73 @@ files:
|
|
65
59
|
- Rakefile
|
66
60
|
- gemspec.yml
|
67
61
|
- lib/spidr.rb
|
68
|
-
- lib/spidr/actions.rb
|
69
|
-
- lib/spidr/actions/actions.rb
|
70
|
-
- lib/spidr/actions/exceptions.rb
|
71
|
-
- lib/spidr/actions/exceptions/action.rb
|
72
|
-
- lib/spidr/actions/exceptions/paused.rb
|
73
|
-
- lib/spidr/actions/exceptions/skip_link.rb
|
74
|
-
- lib/spidr/actions/exceptions/skip_page.rb
|
75
62
|
- lib/spidr/agent.rb
|
63
|
+
- lib/spidr/agent/actions.rb
|
64
|
+
- lib/spidr/agent/events.rb
|
65
|
+
- lib/spidr/agent/filters.rb
|
66
|
+
- lib/spidr/agent/sanitizers.rb
|
76
67
|
- lib/spidr/auth_credential.rb
|
77
68
|
- lib/spidr/auth_store.rb
|
78
|
-
- lib/spidr/body.rb
|
79
69
|
- lib/spidr/cookie_jar.rb
|
80
|
-
- lib/spidr/events.rb
|
81
70
|
- lib/spidr/extensions.rb
|
82
71
|
- lib/spidr/extensions/uri.rb
|
83
|
-
- lib/spidr/filters.rb
|
84
|
-
- lib/spidr/headers.rb
|
85
|
-
- lib/spidr/links.rb
|
86
72
|
- lib/spidr/page.rb
|
73
|
+
- lib/spidr/page/body.rb
|
74
|
+
- lib/spidr/page/headers.rb
|
75
|
+
- lib/spidr/page/links.rb
|
87
76
|
- lib/spidr/rules.rb
|
88
|
-
- lib/spidr/sanitizers.rb
|
89
77
|
- lib/spidr/session_cache.rb
|
90
78
|
- lib/spidr/spidr.rb
|
91
79
|
- lib/spidr/version.rb
|
92
|
-
- spec/actions_spec.rb
|
80
|
+
- spec/agent/actions_spec.rb
|
81
|
+
- spec/agent/filters_spec.rb
|
82
|
+
- spec/agent/sanitizers_spec.rb
|
93
83
|
- spec/agent_spec.rb
|
94
84
|
- spec/auth_store_spec.rb
|
95
85
|
- spec/cookie_jar_spec.rb
|
96
86
|
- spec/extensions/uri_spec.rb
|
97
|
-
- spec/filters_spec.rb
|
98
87
|
- spec/helpers/history.rb
|
99
88
|
- spec/helpers/page.rb
|
100
89
|
- spec/helpers/wsoc.rb
|
101
90
|
- spec/page_examples.rb
|
102
91
|
- spec/page_spec.rb
|
103
92
|
- spec/rules_spec.rb
|
104
|
-
- spec/sanitizers_spec.rb
|
105
93
|
- spec/session_cache.rb
|
106
94
|
- spec/spec_helper.rb
|
107
95
|
- spec/spidr_spec.rb
|
108
96
|
- spidr.gemspec
|
109
|
-
homepage:
|
97
|
+
homepage: https://github.com/postmodern/spidr#readme
|
110
98
|
licenses:
|
111
99
|
- MIT
|
100
|
+
metadata: {}
|
112
101
|
post_install_message:
|
113
102
|
rdoc_options: []
|
114
103
|
require_paths:
|
115
104
|
- lib
|
116
105
|
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
-
none: false
|
118
106
|
requirements:
|
119
|
-
- -
|
107
|
+
- - ">="
|
120
108
|
- !ruby/object:Gem::Version
|
121
|
-
version:
|
109
|
+
version: 1.9.1
|
122
110
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
-
none: false
|
124
111
|
requirements:
|
125
|
-
- -
|
112
|
+
- - ">="
|
126
113
|
- !ruby/object:Gem::Version
|
127
114
|
version: '0'
|
128
115
|
requirements: []
|
129
116
|
rubyforge_project:
|
130
|
-
rubygems_version:
|
117
|
+
rubygems_version: 2.4.7
|
131
118
|
signing_key:
|
132
|
-
specification_version:
|
119
|
+
specification_version: 4
|
133
120
|
summary: A versatile Ruby web spidering library
|
134
121
|
test_files:
|
135
|
-
- spec/actions_spec.rb
|
122
|
+
- spec/agent/actions_spec.rb
|
123
|
+
- spec/agent/filters_spec.rb
|
124
|
+
- spec/agent/sanitizers_spec.rb
|
136
125
|
- spec/agent_spec.rb
|
137
126
|
- spec/auth_store_spec.rb
|
138
127
|
- spec/cookie_jar_spec.rb
|
139
128
|
- spec/extensions/uri_spec.rb
|
140
|
-
- spec/filters_spec.rb
|
141
129
|
- spec/page_spec.rb
|
142
130
|
- spec/rules_spec.rb
|
143
|
-
- spec/sanitizers_spec.rb
|
144
131
|
- spec/spidr_spec.rb
|
145
|
-
has_rdoc:
|