spidr 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ChangeLog.md +69 -54
- data/Gemfile +9 -5
- data/LICENSE.txt +1 -1
- data/README.md +34 -26
- data/Rakefile +4 -15
- data/gemspec.yml +3 -2
- data/lib/spidr/agent.rb +101 -44
- data/lib/spidr/{actions → agent}/actions.rb +32 -12
- data/lib/spidr/{events.rb → agent/events.rb} +4 -8
- data/lib/spidr/{filters.rb → agent/filters.rb} +14 -16
- data/lib/spidr/{sanitizers.rb → agent/sanitizers.rb} +5 -7
- data/lib/spidr/auth_store.rb +2 -2
- data/lib/spidr/cookie_jar.rb +2 -2
- data/lib/spidr/extensions/uri.rb +28 -16
- data/lib/spidr/page.rb +7 -11
- data/lib/spidr/{body.rb → page/body.rb} +1 -1
- data/lib/spidr/{headers.rb → page/headers.rb} +1 -1
- data/lib/spidr/{links.rb → page/links.rb} +43 -7
- data/lib/spidr/session_cache.rb +2 -2
- data/lib/spidr/spidr.rb +32 -5
- data/lib/spidr/version.rb +1 -1
- data/spec/agent/actions_spec.rb +60 -0
- data/spec/agent/filters_spec.rb +62 -0
- data/spec/agent/sanitizers_spec.rb +62 -0
- data/spec/agent_spec.rb +13 -13
- data/spec/auth_store_spec.rb +17 -17
- data/spec/cookie_jar_spec.rb +26 -26
- data/spec/extensions/uri_spec.rb +19 -9
- data/spec/helpers/history.rb +5 -5
- data/spec/helpers/wsoc.rb +2 -2
- data/spec/page_examples.rb +4 -4
- data/spec/page_spec.rb +28 -25
- data/spec/rules_spec.rb +14 -14
- data/spec/session_cache.rb +7 -7
- data/spec/spidr_spec.rb +10 -10
- metadata +37 -51
- data/lib/spidr/actions.rb +0 -2
- data/lib/spidr/actions/exceptions.rb +0 -4
- data/lib/spidr/actions/exceptions/action.rb +0 -9
- data/lib/spidr/actions/exceptions/paused.rb +0 -11
- data/lib/spidr/actions/exceptions/skip_link.rb +0 -12
- data/lib/spidr/actions/exceptions/skip_page.rb +0 -12
- data/spec/actions_spec.rb +0 -59
- data/spec/filters_spec.rb +0 -61
- data/spec/sanitizers_spec.rb +0 -61
data/spec/extensions/uri_spec.rb
CHANGED
@@ -5,39 +5,49 @@ require 'spec_helper'
|
|
5
5
|
describe URI do
|
6
6
|
describe "expand_path" do
|
7
7
|
it "should preserve single directory paths" do
|
8
|
-
URI.expand_path('path').
|
8
|
+
expect(URI.expand_path('path')).to eq('path')
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should preserve trailing '/'" do
|
12
|
-
URI.expand_path('test/path/').
|
12
|
+
expect(URI.expand_path('test/path/')).to eq('test/path/')
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should remove multiple '/' characters" do
|
16
|
-
URI.expand_path('///test///path///').
|
16
|
+
expect(URI.expand_path('///test///path///')).to eq('/test/path/')
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should remove '.' directories from the path" do
|
20
|
-
URI.expand_path('test/./path').
|
20
|
+
expect(URI.expand_path('test/./path')).to eq('test/path')
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should handle '..' directories properly" do
|
24
|
-
URI.expand_path('test/../path').
|
24
|
+
expect(URI.expand_path('test/../path')).to eq('path')
|
25
25
|
end
|
26
26
|
|
27
27
|
it "should limit the number of '..' directories resolved" do
|
28
|
-
URI.expand_path('/test/../../../..').
|
28
|
+
expect(URI.expand_path('/test/../../../..')).to eq('/')
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should preserve leading '/'" do
|
32
|
+
expect(URI.expand_path('/../../../foo')).to eq('/foo')
|
29
33
|
end
|
30
34
|
|
31
35
|
it "should preserve absolute paths" do
|
32
|
-
URI.expand_path('/test/path').
|
36
|
+
expect(URI.expand_path('/test/path')).to eq('/test/path')
|
33
37
|
end
|
34
38
|
|
35
39
|
it "should preserve the root path" do
|
36
|
-
URI.expand_path('/').
|
40
|
+
expect(URI.expand_path('/')).to eq('/')
|
37
41
|
end
|
38
42
|
|
39
43
|
it "should default empty paths to the root path" do
|
40
|
-
URI.expand_path('').
|
44
|
+
expect(URI.expand_path('')).to eq('/')
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should default zero-sum paths to a '/'" do
|
48
|
+
expect(URI.expand_path('foo/..')).to eq('/')
|
49
|
+
expect(URI.expand_path('foo/../bar/..')).to eq('/')
|
50
|
+
expect(URI.expand_path('././././.')).to eq('/')
|
41
51
|
end
|
42
52
|
end
|
43
53
|
end
|
data/spec/helpers/history.rb
CHANGED
@@ -15,20 +15,20 @@ module Helpers
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def should_visit_link(url)
|
18
|
-
visited_link?(url).
|
18
|
+
expect(visited_link?(url)).to eq(true)
|
19
19
|
end
|
20
20
|
|
21
21
|
def should_ignore_link(url)
|
22
|
-
visited_link?(url).
|
22
|
+
expect(visited_link?(url)).to eq(false)
|
23
23
|
end
|
24
24
|
|
25
25
|
def should_visit_once(url)
|
26
|
-
visited_once?(url).
|
26
|
+
expect(visited_once?(url)).to eq(true)
|
27
27
|
end
|
28
28
|
|
29
29
|
def should_fail_link(url)
|
30
|
-
visited_link?(url).
|
31
|
-
visit_failed?(url).
|
30
|
+
expect(visited_link?(url)).to eq(false)
|
31
|
+
expect(visit_failed?(url)).to eq(true)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
data/spec/helpers/wsoc.rb
CHANGED
@@ -9,8 +9,8 @@ module Helpers
|
|
9
9
|
include History
|
10
10
|
|
11
11
|
SERVER_URL = URI::HTTP.build(
|
12
|
-
:
|
13
|
-
:
|
12
|
+
host: (ENV['HOST'] || ::WSOC::Config::DEFAULT_HOST),
|
13
|
+
port: (ENV['PORT'] || ::WSOC::Config::DEFAULT_PORT)
|
14
14
|
)
|
15
15
|
|
16
16
|
SPECS_URL = SERVER_URL.merge(::WSOC::Config::SPECS_PATHS[:json])
|
data/spec/page_examples.rb
CHANGED
@@ -4,18 +4,18 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
shared_examples_for "Page" do
|
6
6
|
it "should have a status code" do
|
7
|
-
@page.code.
|
7
|
+
expect(@page.code).to be_integer
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should have a body" do
|
11
|
-
@page.body.
|
11
|
+
expect(@page.body).not_to be_empty
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should provide transparent access to the response headers" do
|
15
|
-
@page.content_type.
|
15
|
+
expect(@page.content_type).to eq(@page.response['Content-Type'])
|
16
16
|
end
|
17
17
|
|
18
18
|
it "should allow content-types" do
|
19
|
-
@page.content_types.
|
19
|
+
expect(@page.content_types).not_to be_empty
|
20
20
|
end
|
21
21
|
end
|
data/spec/page_spec.rb
CHANGED
@@ -13,84 +13,87 @@ describe Page do
|
|
13
13
|
it_should_behave_like "Page"
|
14
14
|
|
15
15
|
it "should be OK" do
|
16
|
-
@page.
|
16
|
+
expect(@page).to be_ok
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should have a content-type" do
|
20
|
-
@page.content_type.
|
20
|
+
expect(@page.content_type).to include('text/html')
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should be a html page" do
|
24
|
-
@page.
|
24
|
+
expect(@page).to be_html
|
25
25
|
end
|
26
26
|
|
27
27
|
it "should have provide a document" do
|
28
|
-
@page.doc.class.
|
28
|
+
expect(@page.doc.class).to eq(Nokogiri::HTML::Document)
|
29
29
|
end
|
30
30
|
|
31
31
|
it "should allow searching the document" do
|
32
|
-
@page.doc.search('//p').length.
|
33
|
-
@page.doc.at('//p[2]').inner_text.
|
32
|
+
expect(@page.doc.search('//p').length).to eq(2)
|
33
|
+
expect(@page.doc.at('//p[2]').inner_text).to eq('Ready! Set! Go!')
|
34
34
|
end
|
35
35
|
|
36
36
|
it "should have a title" do
|
37
|
-
@page.title.
|
37
|
+
expect(@page.title).to eq('Spidr :: Web-Spider Obstacle Course :: Start')
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should have links" do
|
41
|
-
@page.links.
|
41
|
+
expect(@page.links).not_to be_empty
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
45
|
describe "txt" do
|
46
46
|
before(:all) do
|
47
|
-
@page = get_page('
|
47
|
+
@page = get_page('https://www.ruby-lang.org/en/about/license.txt')
|
48
48
|
end
|
49
49
|
|
50
50
|
it_should_behave_like "Page"
|
51
51
|
|
52
52
|
it "should be OK" do
|
53
|
-
@page.
|
53
|
+
expect(@page).to be_ok
|
54
54
|
end
|
55
55
|
|
56
56
|
it "should have a content-type" do
|
57
|
-
@page.content_type.
|
57
|
+
expect(@page.content_type).to include('text/plain')
|
58
58
|
end
|
59
59
|
|
60
60
|
it "should be a txt page" do
|
61
|
-
@page.
|
61
|
+
expect(@page).to be_txt
|
62
62
|
end
|
63
63
|
|
64
64
|
it "should not have provide a document" do
|
65
|
-
@page.doc.
|
65
|
+
expect(@page.doc).to be_nil
|
66
66
|
end
|
67
67
|
|
68
68
|
it "should not allow searching the document" do
|
69
|
-
@page.search('//p').
|
70
|
-
@page.at('//p').
|
69
|
+
expect(@page.search('//p')).to be_empty
|
70
|
+
expect(@page.at('//p')).to be_nil
|
71
71
|
end
|
72
72
|
|
73
73
|
it "should not have links" do
|
74
|
-
@page.links.
|
74
|
+
expect(@page.links).to be_empty
|
75
75
|
end
|
76
76
|
|
77
77
|
it "should not have a title" do
|
78
|
-
@page.title.
|
78
|
+
expect(@page.title).to be_nil
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
82
82
|
describe "redirects" do
|
83
83
|
before(:all) do
|
84
84
|
@page = get_page('http://spidr.rubyforge.org/course/start.html')
|
85
|
-
|
85
|
+
end
|
86
|
+
|
87
|
+
before do
|
88
|
+
allow(@page).to receive(:body).and_return('<meta HTTP-EQUIV="REFRESH" content="0; url=http://spidr.rubyforge.org/redirected">')
|
86
89
|
end
|
87
90
|
|
88
91
|
it "should provide access to page-level redirects" do
|
89
|
-
@page.redirects_to.
|
92
|
+
expect(@page.redirects_to).to eq(['http://spidr.rubyforge.org/redirected'])
|
90
93
|
end
|
91
94
|
|
92
95
|
it "should include meta refresh redirects in the list of links" do
|
93
|
-
@page.links.
|
96
|
+
expect(@page.links).to include('http://spidr.rubyforge.org/redirected')
|
94
97
|
end
|
95
98
|
end
|
96
99
|
|
@@ -102,23 +105,23 @@ describe Page do
|
|
102
105
|
it "should provide access to the raw Cookie" do
|
103
106
|
cookie = @page.cookie
|
104
107
|
|
105
|
-
cookie.
|
106
|
-
cookie.
|
108
|
+
expect(cookie).not_to be_nil
|
109
|
+
expect(cookie).not_to be_empty
|
107
110
|
end
|
108
111
|
|
109
112
|
it "should provide access to the Cookies" do
|
110
113
|
cookies = @page.cookies
|
111
114
|
|
112
|
-
cookies.
|
115
|
+
expect(cookies).not_to be_empty
|
113
116
|
end
|
114
117
|
|
115
118
|
it "should provide access to the key->value pairs within the Cookie" do
|
116
119
|
params = @page.cookie_params
|
117
120
|
|
118
|
-
params.
|
121
|
+
expect(params).not_to be_empty
|
119
122
|
|
120
123
|
params.each do |key,value|
|
121
|
-
key.
|
124
|
+
expect(key).not_to be_empty
|
122
125
|
end
|
123
126
|
end
|
124
127
|
end
|
data/spec/rules_spec.rb
CHANGED
@@ -6,40 +6,40 @@ describe Rules do
|
|
6
6
|
subject { Rules }
|
7
7
|
|
8
8
|
it "should accept data based on acceptance data" do
|
9
|
-
rules = subject.new(:
|
9
|
+
rules = subject.new(accept: [1])
|
10
10
|
|
11
|
-
rules.accept?(1).
|
11
|
+
expect(rules.accept?(1)).to eq(true)
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should accept data based on acceptance regexps" do
|
15
|
-
rules = subject.new(:
|
15
|
+
rules = subject.new(accept: [/1/])
|
16
16
|
|
17
|
-
rules.accept?('1').
|
17
|
+
expect(rules.accept?('1')).to eq(true)
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should match non-Strings using acceptance regexps" do
|
21
|
-
rules = subject.new(:
|
21
|
+
rules = subject.new(accept: [/1/])
|
22
22
|
|
23
|
-
rules.accept?(1).
|
23
|
+
expect(rules.accept?(1)).to eq(true)
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should accept data using acceptance lambdas" do
|
27
|
-
rules = subject.new(:
|
27
|
+
rules = subject.new(accept: [lambda { |data| data > 2 }])
|
28
28
|
|
29
|
-
rules.accept?(3).
|
29
|
+
expect(rules.accept?(3)).to eq(true)
|
30
30
|
end
|
31
31
|
|
32
32
|
it "should reject data that does not match any acceptance patterns" do
|
33
|
-
rules = subject.new(:
|
33
|
+
rules = subject.new(accept: [1, 2, 3])
|
34
34
|
|
35
|
-
rules.accept?(2).
|
36
|
-
rules.accept?(4).
|
35
|
+
expect(rules.accept?(2)).to eq(true)
|
36
|
+
expect(rules.accept?(4)).to eq(false)
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should accept data that does not match any rejection patterns" do
|
40
|
-
rules = subject.new(:
|
40
|
+
rules = subject.new(reject: [1, 2, 3])
|
41
41
|
|
42
|
-
rules.accept?(2).
|
43
|
-
rules.accept?(4).
|
42
|
+
expect(rules.accept?(2)).to eq(false)
|
43
|
+
expect(rules.accept?(4)).to eq(true)
|
44
44
|
end
|
45
45
|
end
|
data/spec/session_cache.rb
CHANGED
@@ -9,11 +9,11 @@ describe SessionCache do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should not have any active sessions" do
|
12
|
-
@sessions.
|
12
|
+
expect(@sessions).not_to be_active(URI('http://example.com/'))
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should start new sessions on-demand" do
|
16
|
-
@sessions[URI('http://example.com/')].
|
16
|
+
expect(@sessions[URI('http://example.com/')]).not_to be_nil
|
17
17
|
end
|
18
18
|
|
19
19
|
after(:all) do
|
@@ -30,25 +30,25 @@ describe SessionCache do
|
|
30
30
|
end
|
31
31
|
|
32
32
|
it "should have active sessions" do
|
33
|
-
@sessions.
|
33
|
+
expect(@sessions).to be_active(@url)
|
34
34
|
end
|
35
35
|
|
36
36
|
it "should provide access to sessions" do
|
37
|
-
@sessions[@url].
|
37
|
+
expect(@sessions[@url]).not_to be_nil
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should start new sessions on-demand" do
|
41
41
|
url2 = URI('http://www.w3c.org/')
|
42
42
|
|
43
|
-
@sessions[url2].
|
43
|
+
expect(@sessions[url2]).not_to be_nil
|
44
44
|
end
|
45
45
|
|
46
46
|
it "should be able to kill sessions" do
|
47
47
|
url2 = URI('http://www.w3c.org/')
|
48
48
|
|
49
|
-
@sessions[url2].
|
49
|
+
expect(@sessions[url2]).not_to be_nil
|
50
50
|
@sessions.kill!(url2)
|
51
|
-
@sessions.
|
51
|
+
expect(@sessions).not_to be_active(url2)
|
52
52
|
end
|
53
53
|
|
54
54
|
after(:all) do
|
data/spec/spidr_spec.rb
CHANGED
@@ -4,36 +4,36 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Spidr do
|
6
6
|
it "should have a VERSION constant" do
|
7
|
-
subject.const_defined?('VERSION').
|
7
|
+
expect(subject.const_defined?('VERSION')).to eq(true)
|
8
8
|
end
|
9
9
|
|
10
10
|
describe "proxy" do
|
11
11
|
after(:all) do
|
12
|
-
|
12
|
+
Spidr.disable_proxy!
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should not have proxy settings by default" do
|
16
|
-
subject.proxy[:host].
|
16
|
+
expect(subject.proxy[:host]).to be_nil
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should allow setting new proxy settings" do
|
20
|
-
subject.proxy = {:
|
20
|
+
subject.proxy = {host: 'example.com', port: 8010}
|
21
21
|
|
22
|
-
subject.proxy[:host].
|
23
|
-
subject.proxy[:port].
|
22
|
+
expect(subject.proxy[:host]).to eq('example.com')
|
23
|
+
expect(subject.proxy[:port]).to eq(8010)
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should default the :port option of new proxy settings" do
|
27
|
-
subject.proxy = {:
|
27
|
+
subject.proxy = {host: 'example.com'}
|
28
28
|
|
29
|
-
subject.proxy[:host].
|
30
|
-
subject.proxy[:port].
|
29
|
+
expect(subject.proxy[:host]).to eq('example.com')
|
30
|
+
expect(subject.proxy[:port]).to eq(Spidr::COMMON_PROXY_PORT)
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should allow disabling the proxy" do
|
34
34
|
subject.disable_proxy!
|
35
35
|
|
36
|
-
subject.proxy[:host].
|
36
|
+
expect(subject.proxy[:host]).to be_nil
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
metadata
CHANGED
@@ -1,49 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.5.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Postmodern
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-01-04 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.3'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: bundler
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- - ~>
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '1.0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: yard
|
38
|
-
requirement: &19473820 !ruby/object:Gem::Requirement
|
39
|
-
none: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
37
|
requirements:
|
41
|
-
- - ~>
|
38
|
+
- - "~>"
|
42
39
|
- !ruby/object:Gem::Version
|
43
|
-
version: '0
|
44
|
-
type: :development
|
45
|
-
prerelease: false
|
46
|
-
version_requirements: *19473820
|
40
|
+
version: '1.0'
|
47
41
|
description: Spidr is a versatile Ruby web spidering library that can spider a site,
|
48
42
|
multiple domains, certain links or infinitely. Spidr is designed to be fast and
|
49
43
|
easy to use.
|
@@ -55,9 +49,9 @@ extra_rdoc_files:
|
|
55
49
|
- LICENSE.txt
|
56
50
|
- README.md
|
57
51
|
files:
|
58
|
-
- .gitignore
|
59
|
-
- .rspec
|
60
|
-
- .yardopts
|
52
|
+
- ".gitignore"
|
53
|
+
- ".rspec"
|
54
|
+
- ".yardopts"
|
61
55
|
- ChangeLog.md
|
62
56
|
- Gemfile
|
63
57
|
- LICENSE.txt
|
@@ -65,81 +59,73 @@ files:
|
|
65
59
|
- Rakefile
|
66
60
|
- gemspec.yml
|
67
61
|
- lib/spidr.rb
|
68
|
-
- lib/spidr/actions.rb
|
69
|
-
- lib/spidr/actions/actions.rb
|
70
|
-
- lib/spidr/actions/exceptions.rb
|
71
|
-
- lib/spidr/actions/exceptions/action.rb
|
72
|
-
- lib/spidr/actions/exceptions/paused.rb
|
73
|
-
- lib/spidr/actions/exceptions/skip_link.rb
|
74
|
-
- lib/spidr/actions/exceptions/skip_page.rb
|
75
62
|
- lib/spidr/agent.rb
|
63
|
+
- lib/spidr/agent/actions.rb
|
64
|
+
- lib/spidr/agent/events.rb
|
65
|
+
- lib/spidr/agent/filters.rb
|
66
|
+
- lib/spidr/agent/sanitizers.rb
|
76
67
|
- lib/spidr/auth_credential.rb
|
77
68
|
- lib/spidr/auth_store.rb
|
78
|
-
- lib/spidr/body.rb
|
79
69
|
- lib/spidr/cookie_jar.rb
|
80
|
-
- lib/spidr/events.rb
|
81
70
|
- lib/spidr/extensions.rb
|
82
71
|
- lib/spidr/extensions/uri.rb
|
83
|
-
- lib/spidr/filters.rb
|
84
|
-
- lib/spidr/headers.rb
|
85
|
-
- lib/spidr/links.rb
|
86
72
|
- lib/spidr/page.rb
|
73
|
+
- lib/spidr/page/body.rb
|
74
|
+
- lib/spidr/page/headers.rb
|
75
|
+
- lib/spidr/page/links.rb
|
87
76
|
- lib/spidr/rules.rb
|
88
|
-
- lib/spidr/sanitizers.rb
|
89
77
|
- lib/spidr/session_cache.rb
|
90
78
|
- lib/spidr/spidr.rb
|
91
79
|
- lib/spidr/version.rb
|
92
|
-
- spec/actions_spec.rb
|
80
|
+
- spec/agent/actions_spec.rb
|
81
|
+
- spec/agent/filters_spec.rb
|
82
|
+
- spec/agent/sanitizers_spec.rb
|
93
83
|
- spec/agent_spec.rb
|
94
84
|
- spec/auth_store_spec.rb
|
95
85
|
- spec/cookie_jar_spec.rb
|
96
86
|
- spec/extensions/uri_spec.rb
|
97
|
-
- spec/filters_spec.rb
|
98
87
|
- spec/helpers/history.rb
|
99
88
|
- spec/helpers/page.rb
|
100
89
|
- spec/helpers/wsoc.rb
|
101
90
|
- spec/page_examples.rb
|
102
91
|
- spec/page_spec.rb
|
103
92
|
- spec/rules_spec.rb
|
104
|
-
- spec/sanitizers_spec.rb
|
105
93
|
- spec/session_cache.rb
|
106
94
|
- spec/spec_helper.rb
|
107
95
|
- spec/spidr_spec.rb
|
108
96
|
- spidr.gemspec
|
109
|
-
homepage:
|
97
|
+
homepage: https://github.com/postmodern/spidr#readme
|
110
98
|
licenses:
|
111
99
|
- MIT
|
100
|
+
metadata: {}
|
112
101
|
post_install_message:
|
113
102
|
rdoc_options: []
|
114
103
|
require_paths:
|
115
104
|
- lib
|
116
105
|
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
-
none: false
|
118
106
|
requirements:
|
119
|
-
- -
|
107
|
+
- - ">="
|
120
108
|
- !ruby/object:Gem::Version
|
121
|
-
version:
|
109
|
+
version: 1.9.1
|
122
110
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
-
none: false
|
124
111
|
requirements:
|
125
|
-
- -
|
112
|
+
- - ">="
|
126
113
|
- !ruby/object:Gem::Version
|
127
114
|
version: '0'
|
128
115
|
requirements: []
|
129
116
|
rubyforge_project:
|
130
|
-
rubygems_version:
|
117
|
+
rubygems_version: 2.4.7
|
131
118
|
signing_key:
|
132
|
-
specification_version:
|
119
|
+
specification_version: 4
|
133
120
|
summary: A versatile Ruby web spidering library
|
134
121
|
test_files:
|
135
|
-
- spec/actions_spec.rb
|
122
|
+
- spec/agent/actions_spec.rb
|
123
|
+
- spec/agent/filters_spec.rb
|
124
|
+
- spec/agent/sanitizers_spec.rb
|
136
125
|
- spec/agent_spec.rb
|
137
126
|
- spec/auth_store_spec.rb
|
138
127
|
- spec/cookie_jar_spec.rb
|
139
128
|
- spec/extensions/uri_spec.rb
|
140
|
-
- spec/filters_spec.rb
|
141
129
|
- spec/page_spec.rb
|
142
130
|
- spec/rules_spec.rb
|
143
|
-
- spec/sanitizers_spec.rb
|
144
131
|
- spec/spidr_spec.rb
|
145
|
-
has_rdoc:
|