spidr_epg 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +291 -0
- data/ChangeLog.md~ +291 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +49 -0
- data/Gemfile~ +16 -0
- data/LICENSE.txt +20 -0
- data/README.md +193 -0
- data/README.md~ +190 -0
- data/Rakefile +29 -0
- data/gemspec.yml +19 -0
- data/lib/spidr/actions/actions.rb +83 -0
- data/lib/spidr/actions/exceptions/action.rb +9 -0
- data/lib/spidr/actions/exceptions/paused.rb +11 -0
- data/lib/spidr/actions/exceptions/skip_link.rb +12 -0
- data/lib/spidr/actions/exceptions/skip_page.rb +12 -0
- data/lib/spidr/actions/exceptions.rb +4 -0
- data/lib/spidr/actions.rb +2 -0
- data/lib/spidr/agent.rb +866 -0
- data/lib/spidr/auth_credential.rb +28 -0
- data/lib/spidr/auth_store.rb +161 -0
- data/lib/spidr/body.rb +98 -0
- data/lib/spidr/cookie_jar.rb +202 -0
- data/lib/spidr/events.rb +537 -0
- data/lib/spidr/extensions/uri.rb +52 -0
- data/lib/spidr/extensions.rb +1 -0
- data/lib/spidr/filters.rb +539 -0
- data/lib/spidr/headers.rb +370 -0
- data/lib/spidr/links.rb +229 -0
- data/lib/spidr/page.rb +108 -0
- data/lib/spidr/rules.rb +79 -0
- data/lib/spidr/sanitizers.rb +56 -0
- data/lib/spidr/session_cache.rb +145 -0
- data/lib/spidr/spidr.rb +107 -0
- data/lib/spidr/version.rb +4 -0
- data/lib/spidr/version.rb~ +4 -0
- data/lib/spidr.rb +3 -0
- data/pkg/spidr-1.0.0.gem +0 -0
- data/spec/actions_spec.rb +59 -0
- data/spec/agent_spec.rb +81 -0
- data/spec/auth_store_spec.rb +85 -0
- data/spec/cookie_jar_spec.rb +144 -0
- data/spec/extensions/uri_spec.rb +43 -0
- data/spec/filters_spec.rb +61 -0
- data/spec/helpers/history.rb +34 -0
- data/spec/helpers/page.rb +8 -0
- data/spec/helpers/wsoc.rb +83 -0
- data/spec/page_examples.rb +21 -0
- data/spec/page_spec.rb +125 -0
- data/spec/rules_spec.rb +45 -0
- data/spec/sanitizers_spec.rb +61 -0
- data/spec/session_cache.rb +58 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/spidr_spec.rb +39 -0
- data/spidr.gemspec +133 -0
- data/spidr.gemspec~ +131 -0
- metadata +158 -0
data/spec/spidr_spec.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spidr'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Spidr do
|
6
|
+
it "should have a VERSION constant" do
|
7
|
+
subject.const_defined?('VERSION').should == true
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "proxy" do
|
11
|
+
after(:all) do
|
12
|
+
subject.disable_proxy!
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should not have proxy settings by default" do
|
16
|
+
subject.proxy[:host].should be_nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should allow setting new proxy settings" do
|
20
|
+
subject.proxy = {:host => 'example.com', :port => 8010}
|
21
|
+
|
22
|
+
subject.proxy[:host].should == 'example.com'
|
23
|
+
subject.proxy[:port].should == 8010
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should default the :port option of new proxy settings" do
|
27
|
+
subject.proxy = {:host => 'example.com'}
|
28
|
+
|
29
|
+
subject.proxy[:host].should == 'example.com'
|
30
|
+
subject.proxy[:port].should == Spidr::COMMON_PROXY_PORT
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should allow disabling the proxy" do
|
34
|
+
subject.disable_proxy!
|
35
|
+
|
36
|
+
subject.proxy[:host].should be_nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/spidr.gemspec
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gemspec|
|
6
|
+
root = File.dirname(__FILE__)
|
7
|
+
lib_dir = File.join(root,'lib')
|
8
|
+
files = if File.directory?('.git')
|
9
|
+
`git ls-files`.split($/)
|
10
|
+
elsif File.directory?('.hg')
|
11
|
+
`hg manifest`.split($/)
|
12
|
+
elsif File.directory?('.svn')
|
13
|
+
`svn ls -R`.split($/).select { |path| File.file?(path) }
|
14
|
+
else
|
15
|
+
Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
|
16
|
+
end
|
17
|
+
|
18
|
+
filter_files = lambda { |paths|
|
19
|
+
case paths
|
20
|
+
when Array
|
21
|
+
(files & paths)
|
22
|
+
when String
|
23
|
+
(files & Dir[paths])
|
24
|
+
end
|
25
|
+
}
|
26
|
+
|
27
|
+
version = {
|
28
|
+
:file => 'spidr/version',
|
29
|
+
:constant => 'Spidr::VERSION'
|
30
|
+
}
|
31
|
+
|
32
|
+
defaults = {
|
33
|
+
# 'name' => File.basename(root),
|
34
|
+
'name' => 'spidr_epg',
|
35
|
+
'files' => files,
|
36
|
+
'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
|
37
|
+
'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
|
38
|
+
'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
|
39
|
+
}
|
40
|
+
|
41
|
+
metadata = defaults.merge(YAML.load_file('gemspec.yml'))
|
42
|
+
|
43
|
+
#gemspec.name = metadata.fetch('name',defaults[:name])
|
44
|
+
gemspec.name = 'spidr_epg'
|
45
|
+
gemspec.version = if metadata['version']
|
46
|
+
metadata['version']
|
47
|
+
else
|
48
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
49
|
+
|
50
|
+
require version[:file]
|
51
|
+
eval(version[:constant])
|
52
|
+
end
|
53
|
+
|
54
|
+
gemspec.summary = metadata.fetch('summary',metadata['description'])
|
55
|
+
gemspec.description = metadata.fetch('description',metadata['summary'])
|
56
|
+
|
57
|
+
case metadata['license']
|
58
|
+
when Array
|
59
|
+
gemspec.licenses = metadata['license']
|
60
|
+
when String
|
61
|
+
gemspec.license = metadata['license']
|
62
|
+
end
|
63
|
+
|
64
|
+
case metadata['authors']
|
65
|
+
when Array
|
66
|
+
gemspec.authors = metadata['authors']
|
67
|
+
when String
|
68
|
+
gemspec.author = metadata['authors']
|
69
|
+
end
|
70
|
+
|
71
|
+
gemspec.email = metadata['email']
|
72
|
+
gemspec.homepage = metadata['homepage']
|
73
|
+
|
74
|
+
case metadata['require_paths']
|
75
|
+
when Array
|
76
|
+
gemspec.require_paths = metadata['require_paths']
|
77
|
+
when String
|
78
|
+
gemspec.require_path = metadata['require_paths']
|
79
|
+
end
|
80
|
+
|
81
|
+
gemspec.files = filter_files[metadata['files']]
|
82
|
+
|
83
|
+
gemspec.executables = metadata['executables']
|
84
|
+
gemspec.extensions = metadata['extensions']
|
85
|
+
|
86
|
+
if Gem::VERSION < '1.7.'
|
87
|
+
gemspec.default_executable = gemspec.executables.first
|
88
|
+
end
|
89
|
+
|
90
|
+
gemspec.test_files = filter_files[metadata['test_files']]
|
91
|
+
|
92
|
+
unless gemspec.files.include?('.document')
|
93
|
+
gemspec.extra_rdoc_files = metadata['extra_doc_files']
|
94
|
+
end
|
95
|
+
|
96
|
+
gemspec.post_install_message = metadata['post_install_message']
|
97
|
+
gemspec.requirements = metadata['requirements']
|
98
|
+
|
99
|
+
if gemspec.respond_to?(:required_ruby_version=)
|
100
|
+
gemspec.required_ruby_version = metadata['required_ruby_version']
|
101
|
+
end
|
102
|
+
|
103
|
+
if gemspec.respond_to?(:required_rubygems_version=)
|
104
|
+
gemspec.required_rubygems_version = metadata['required_rubygems_version']
|
105
|
+
end
|
106
|
+
|
107
|
+
parse_versions = lambda { |versions|
|
108
|
+
case versions
|
109
|
+
when Array
|
110
|
+
versions.map { |v| v.to_s }
|
111
|
+
when String
|
112
|
+
versions.split(/,\s*/)
|
113
|
+
end
|
114
|
+
}
|
115
|
+
|
116
|
+
if metadata['dependencies']
|
117
|
+
metadata['dependencies'].each do |name,versions|
|
118
|
+
gemspec.add_dependency(name,parse_versions[versions])
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if metadata['runtime_dependencies']
|
123
|
+
metadata['runtime_dependencies'].each do |name,versions|
|
124
|
+
gemspec.add_runtime_dependency(name,parse_versions[versions])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
if metadata['development_dependencies']
|
129
|
+
metadata['development_dependencies'].each do |name,versions|
|
130
|
+
gemspec.add_development_dependency(name,parse_versions[versions])
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
data/spidr.gemspec~
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gemspec|
|
6
|
+
root = File.dirname(__FILE__)
|
7
|
+
lib_dir = File.join(root,'lib')
|
8
|
+
files = if File.directory?('.git')
|
9
|
+
`git ls-files`.split($/)
|
10
|
+
elsif File.directory?('.hg')
|
11
|
+
`hg manifest`.split($/)
|
12
|
+
elsif File.directory?('.svn')
|
13
|
+
`svn ls -R`.split($/).select { |path| File.file?(path) }
|
14
|
+
else
|
15
|
+
Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
|
16
|
+
end
|
17
|
+
|
18
|
+
filter_files = lambda { |paths|
|
19
|
+
case paths
|
20
|
+
when Array
|
21
|
+
(files & paths)
|
22
|
+
when String
|
23
|
+
(files & Dir[paths])
|
24
|
+
end
|
25
|
+
}
|
26
|
+
|
27
|
+
version = {
|
28
|
+
:file => 'spidr/version',
|
29
|
+
:constant => 'Spidr::VERSION'
|
30
|
+
}
|
31
|
+
|
32
|
+
defaults = {
|
33
|
+
'name' => File.basename(root),
|
34
|
+
'files' => files,
|
35
|
+
'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
|
36
|
+
'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
|
37
|
+
'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
|
38
|
+
}
|
39
|
+
|
40
|
+
metadata = defaults.merge(YAML.load_file('gemspec.yml'))
|
41
|
+
|
42
|
+
gemspec.name = metadata.fetch('name',defaults[:name])
|
43
|
+
gemspec.version = if metadata['version']
|
44
|
+
metadata['version']
|
45
|
+
else
|
46
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
47
|
+
|
48
|
+
require version[:file]
|
49
|
+
eval(version[:constant])
|
50
|
+
end
|
51
|
+
|
52
|
+
gemspec.summary = metadata.fetch('summary',metadata['description'])
|
53
|
+
gemspec.description = metadata.fetch('description',metadata['summary'])
|
54
|
+
|
55
|
+
case metadata['license']
|
56
|
+
when Array
|
57
|
+
gemspec.licenses = metadata['license']
|
58
|
+
when String
|
59
|
+
gemspec.license = metadata['license']
|
60
|
+
end
|
61
|
+
|
62
|
+
case metadata['authors']
|
63
|
+
when Array
|
64
|
+
gemspec.authors = metadata['authors']
|
65
|
+
when String
|
66
|
+
gemspec.author = metadata['authors']
|
67
|
+
end
|
68
|
+
|
69
|
+
gemspec.email = metadata['email']
|
70
|
+
gemspec.homepage = metadata['homepage']
|
71
|
+
|
72
|
+
case metadata['require_paths']
|
73
|
+
when Array
|
74
|
+
gemspec.require_paths = metadata['require_paths']
|
75
|
+
when String
|
76
|
+
gemspec.require_path = metadata['require_paths']
|
77
|
+
end
|
78
|
+
|
79
|
+
gemspec.files = filter_files[metadata['files']]
|
80
|
+
|
81
|
+
gemspec.executables = metadata['executables']
|
82
|
+
gemspec.extensions = metadata['extensions']
|
83
|
+
|
84
|
+
if Gem::VERSION < '1.7.'
|
85
|
+
gemspec.default_executable = gemspec.executables.first
|
86
|
+
end
|
87
|
+
|
88
|
+
gemspec.test_files = filter_files[metadata['test_files']]
|
89
|
+
|
90
|
+
unless gemspec.files.include?('.document')
|
91
|
+
gemspec.extra_rdoc_files = metadata['extra_doc_files']
|
92
|
+
end
|
93
|
+
|
94
|
+
gemspec.post_install_message = metadata['post_install_message']
|
95
|
+
gemspec.requirements = metadata['requirements']
|
96
|
+
|
97
|
+
if gemspec.respond_to?(:required_ruby_version=)
|
98
|
+
gemspec.required_ruby_version = metadata['required_ruby_version']
|
99
|
+
end
|
100
|
+
|
101
|
+
if gemspec.respond_to?(:required_rubygems_version=)
|
102
|
+
gemspec.required_rubygems_version = metadata['required_rubygems_version']
|
103
|
+
end
|
104
|
+
|
105
|
+
parse_versions = lambda { |versions|
|
106
|
+
case versions
|
107
|
+
when Array
|
108
|
+
versions.map { |v| v.to_s }
|
109
|
+
when String
|
110
|
+
versions.split(/,\s*/)
|
111
|
+
end
|
112
|
+
}
|
113
|
+
|
114
|
+
if metadata['dependencies']
|
115
|
+
metadata['dependencies'].each do |name,versions|
|
116
|
+
gemspec.add_dependency(name,parse_versions[versions])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
if metadata['runtime_dependencies']
|
121
|
+
metadata['runtime_dependencies'].each do |name,versions|
|
122
|
+
gemspec.add_runtime_dependency(name,parse_versions[versions])
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
if metadata['development_dependencies']
|
127
|
+
metadata['development_dependencies'].each do |name,versions|
|
128
|
+
gemspec.add_development_dependency(name,parse_versions[versions])
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
metadata
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: spidr_epg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Postmodern
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: yard
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.7'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.7'
|
55
|
+
description: Spidr is a versatile Ruby web spidering library that can spider a site,
|
56
|
+
multiple domains, certain links or infinitely. Spidr is designed to be fast and
|
57
|
+
easy to use.
|
58
|
+
email: postmodern.mod3@gmail.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files:
|
62
|
+
- LICENSE.txt
|
63
|
+
- README.md
|
64
|
+
- ChangeLog.md
|
65
|
+
files:
|
66
|
+
- .rspec
|
67
|
+
- .gitignore
|
68
|
+
- .yardopts
|
69
|
+
- LICENSE.txt
|
70
|
+
- Rakefile
|
71
|
+
- Gemfile.lock
|
72
|
+
- ChangeLog.md~
|
73
|
+
- lib/spidr.rb
|
74
|
+
- lib/spidr/filters.rb
|
75
|
+
- lib/spidr/actions.rb
|
76
|
+
- lib/spidr/rules.rb
|
77
|
+
- lib/spidr/links.rb
|
78
|
+
- lib/spidr/body.rb
|
79
|
+
- lib/spidr/spidr.rb
|
80
|
+
- lib/spidr/session_cache.rb
|
81
|
+
- lib/spidr/extensions.rb
|
82
|
+
- lib/spidr/agent.rb
|
83
|
+
- lib/spidr/auth_store.rb
|
84
|
+
- lib/spidr/auth_credential.rb
|
85
|
+
- lib/spidr/page.rb
|
86
|
+
- lib/spidr/version.rb~
|
87
|
+
- lib/spidr/actions/actions.rb
|
88
|
+
- lib/spidr/actions/exceptions.rb
|
89
|
+
- lib/spidr/actions/exceptions/skip_link.rb
|
90
|
+
- lib/spidr/actions/exceptions/paused.rb
|
91
|
+
- lib/spidr/actions/exceptions/action.rb
|
92
|
+
- lib/spidr/actions/exceptions/skip_page.rb
|
93
|
+
- lib/spidr/headers.rb
|
94
|
+
- lib/spidr/version.rb
|
95
|
+
- lib/spidr/sanitizers.rb
|
96
|
+
- lib/spidr/cookie_jar.rb
|
97
|
+
- lib/spidr/extensions/uri.rb
|
98
|
+
- lib/spidr/events.rb
|
99
|
+
- spidr.gemspec~
|
100
|
+
- gemspec.yml
|
101
|
+
- Gemfile~
|
102
|
+
- spidr.gemspec
|
103
|
+
- Gemfile
|
104
|
+
- README.md~
|
105
|
+
- README.md
|
106
|
+
- pkg/spidr-1.0.0.gem
|
107
|
+
- ChangeLog.md
|
108
|
+
- spec/filters_spec.rb
|
109
|
+
- spec/agent_spec.rb
|
110
|
+
- spec/spec_helper.rb
|
111
|
+
- spec/cookie_jar_spec.rb
|
112
|
+
- spec/auth_store_spec.rb
|
113
|
+
- spec/spidr_spec.rb
|
114
|
+
- spec/session_cache.rb
|
115
|
+
- spec/page_spec.rb
|
116
|
+
- spec/page_examples.rb
|
117
|
+
- spec/actions_spec.rb
|
118
|
+
- spec/helpers/history.rb
|
119
|
+
- spec/helpers/wsoc.rb
|
120
|
+
- spec/helpers/page.rb
|
121
|
+
- spec/rules_spec.rb
|
122
|
+
- spec/sanitizers_spec.rb
|
123
|
+
- spec/extensions/uri_spec.rb
|
124
|
+
homepage: http://github.com/postmodern/spidr
|
125
|
+
licenses:
|
126
|
+
- MIT
|
127
|
+
metadata: {}
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
require_paths:
|
131
|
+
- lib
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ! '>='
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
requirements: []
|
143
|
+
rubyforge_project:
|
144
|
+
rubygems_version: 2.0.0
|
145
|
+
signing_key:
|
146
|
+
specification_version: 4
|
147
|
+
summary: A versatile Ruby web spidering library
|
148
|
+
test_files:
|
149
|
+
- spec/filters_spec.rb
|
150
|
+
- spec/agent_spec.rb
|
151
|
+
- spec/cookie_jar_spec.rb
|
152
|
+
- spec/auth_store_spec.rb
|
153
|
+
- spec/spidr_spec.rb
|
154
|
+
- spec/page_spec.rb
|
155
|
+
- spec/actions_spec.rb
|
156
|
+
- spec/rules_spec.rb
|
157
|
+
- spec/sanitizers_spec.rb
|
158
|
+
- spec/extensions/uri_spec.rb
|