spidr_epg 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +1 -0
  4. data/.yardopts +1 -0
  5. data/ChangeLog.md +291 -0
  6. data/ChangeLog.md~ +291 -0
  7. data/Gemfile +16 -0
  8. data/Gemfile.lock +49 -0
  9. data/Gemfile~ +16 -0
  10. data/LICENSE.txt +20 -0
  11. data/README.md +193 -0
  12. data/README.md~ +190 -0
  13. data/Rakefile +29 -0
  14. data/gemspec.yml +19 -0
  15. data/lib/spidr/actions/actions.rb +83 -0
  16. data/lib/spidr/actions/exceptions/action.rb +9 -0
  17. data/lib/spidr/actions/exceptions/paused.rb +11 -0
  18. data/lib/spidr/actions/exceptions/skip_link.rb +12 -0
  19. data/lib/spidr/actions/exceptions/skip_page.rb +12 -0
  20. data/lib/spidr/actions/exceptions.rb +4 -0
  21. data/lib/spidr/actions.rb +2 -0
  22. data/lib/spidr/agent.rb +866 -0
  23. data/lib/spidr/auth_credential.rb +28 -0
  24. data/lib/spidr/auth_store.rb +161 -0
  25. data/lib/spidr/body.rb +98 -0
  26. data/lib/spidr/cookie_jar.rb +202 -0
  27. data/lib/spidr/events.rb +537 -0
  28. data/lib/spidr/extensions/uri.rb +52 -0
  29. data/lib/spidr/extensions.rb +1 -0
  30. data/lib/spidr/filters.rb +539 -0
  31. data/lib/spidr/headers.rb +370 -0
  32. data/lib/spidr/links.rb +229 -0
  33. data/lib/spidr/page.rb +108 -0
  34. data/lib/spidr/rules.rb +79 -0
  35. data/lib/spidr/sanitizers.rb +56 -0
  36. data/lib/spidr/session_cache.rb +145 -0
  37. data/lib/spidr/spidr.rb +107 -0
  38. data/lib/spidr/version.rb +4 -0
  39. data/lib/spidr/version.rb~ +4 -0
  40. data/lib/spidr.rb +3 -0
  41. data/pkg/spidr-1.0.0.gem +0 -0
  42. data/spec/actions_spec.rb +59 -0
  43. data/spec/agent_spec.rb +81 -0
  44. data/spec/auth_store_spec.rb +85 -0
  45. data/spec/cookie_jar_spec.rb +144 -0
  46. data/spec/extensions/uri_spec.rb +43 -0
  47. data/spec/filters_spec.rb +61 -0
  48. data/spec/helpers/history.rb +34 -0
  49. data/spec/helpers/page.rb +8 -0
  50. data/spec/helpers/wsoc.rb +83 -0
  51. data/spec/page_examples.rb +21 -0
  52. data/spec/page_spec.rb +125 -0
  53. data/spec/rules_spec.rb +45 -0
  54. data/spec/sanitizers_spec.rb +61 -0
  55. data/spec/session_cache.rb +58 -0
  56. data/spec/spec_helper.rb +4 -0
  57. data/spec/spidr_spec.rb +39 -0
  58. data/spidr.gemspec +133 -0
  59. data/spidr.gemspec~ +131 -0
  60. metadata +158 -0
@@ -0,0 +1,39 @@
1
+ require 'spidr'
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Spidr do
6
+ it "should have a VERSION constant" do
7
+ subject.const_defined?('VERSION').should == true
8
+ end
9
+
10
+ describe "proxy" do
11
+ after(:all) do
12
+ subject.disable_proxy!
13
+ end
14
+
15
+ it "should not have proxy settings by default" do
16
+ subject.proxy[:host].should be_nil
17
+ end
18
+
19
+ it "should allow setting new proxy settings" do
20
+ subject.proxy = {:host => 'example.com', :port => 8010}
21
+
22
+ subject.proxy[:host].should == 'example.com'
23
+ subject.proxy[:port].should == 8010
24
+ end
25
+
26
+ it "should default the :port option of new proxy settings" do
27
+ subject.proxy = {:host => 'example.com'}
28
+
29
+ subject.proxy[:host].should == 'example.com'
30
+ subject.proxy[:port].should == Spidr::COMMON_PROXY_PORT
31
+ end
32
+
33
+ it "should allow disabling the proxy" do
34
+ subject.disable_proxy!
35
+
36
+ subject.proxy[:host].should be_nil
37
+ end
38
+ end
39
+ end
data/spidr.gemspec ADDED
@@ -0,0 +1,133 @@
1
+ # encoding: utf-8
2
+
3
+ require 'yaml'
4
+
5
+ Gem::Specification.new do |gemspec|
6
+ root = File.dirname(__FILE__)
7
+ lib_dir = File.join(root,'lib')
8
+ files = if File.directory?('.git')
9
+ `git ls-files`.split($/)
10
+ elsif File.directory?('.hg')
11
+ `hg manifest`.split($/)
12
+ elsif File.directory?('.svn')
13
+ `svn ls -R`.split($/).select { |path| File.file?(path) }
14
+ else
15
+ Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
16
+ end
17
+
18
+ filter_files = lambda { |paths|
19
+ case paths
20
+ when Array
21
+ (files & paths)
22
+ when String
23
+ (files & Dir[paths])
24
+ end
25
+ }
26
+
27
+ version = {
28
+ :file => 'spidr/version',
29
+ :constant => 'Spidr::VERSION'
30
+ }
31
+
32
+ defaults = {
33
+ # 'name' => File.basename(root),
34
+ 'name' => 'spidr_epg',
35
+ 'files' => files,
36
+ 'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
37
+ 'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
38
+ 'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
39
+ }
40
+
41
+ metadata = defaults.merge(YAML.load_file('gemspec.yml'))
42
+
43
+ #gemspec.name = metadata.fetch('name',defaults[:name])
44
+ gemspec.name = 'spidr_epg'
45
+ gemspec.version = if metadata['version']
46
+ metadata['version']
47
+ else
48
+ $LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
49
+
50
+ require version[:file]
51
+ eval(version[:constant])
52
+ end
53
+
54
+ gemspec.summary = metadata.fetch('summary',metadata['description'])
55
+ gemspec.description = metadata.fetch('description',metadata['summary'])
56
+
57
+ case metadata['license']
58
+ when Array
59
+ gemspec.licenses = metadata['license']
60
+ when String
61
+ gemspec.license = metadata['license']
62
+ end
63
+
64
+ case metadata['authors']
65
+ when Array
66
+ gemspec.authors = metadata['authors']
67
+ when String
68
+ gemspec.author = metadata['authors']
69
+ end
70
+
71
+ gemspec.email = metadata['email']
72
+ gemspec.homepage = metadata['homepage']
73
+
74
+ case metadata['require_paths']
75
+ when Array
76
+ gemspec.require_paths = metadata['require_paths']
77
+ when String
78
+ gemspec.require_path = metadata['require_paths']
79
+ end
80
+
81
+ gemspec.files = filter_files[metadata['files']]
82
+
83
+ gemspec.executables = metadata['executables']
84
+ gemspec.extensions = metadata['extensions']
85
+
86
+ if Gem::VERSION < '1.7.'
87
+ gemspec.default_executable = gemspec.executables.first
88
+ end
89
+
90
+ gemspec.test_files = filter_files[metadata['test_files']]
91
+
92
+ unless gemspec.files.include?('.document')
93
+ gemspec.extra_rdoc_files = metadata['extra_doc_files']
94
+ end
95
+
96
+ gemspec.post_install_message = metadata['post_install_message']
97
+ gemspec.requirements = metadata['requirements']
98
+
99
+ if gemspec.respond_to?(:required_ruby_version=)
100
+ gemspec.required_ruby_version = metadata['required_ruby_version']
101
+ end
102
+
103
+ if gemspec.respond_to?(:required_rubygems_version=)
104
+ gemspec.required_rubygems_version = metadata['required_rubygems_version']
105
+ end
106
+
107
+ parse_versions = lambda { |versions|
108
+ case versions
109
+ when Array
110
+ versions.map { |v| v.to_s }
111
+ when String
112
+ versions.split(/,\s*/)
113
+ end
114
+ }
115
+
116
+ if metadata['dependencies']
117
+ metadata['dependencies'].each do |name,versions|
118
+ gemspec.add_dependency(name,parse_versions[versions])
119
+ end
120
+ end
121
+
122
+ if metadata['runtime_dependencies']
123
+ metadata['runtime_dependencies'].each do |name,versions|
124
+ gemspec.add_runtime_dependency(name,parse_versions[versions])
125
+ end
126
+ end
127
+
128
+ if metadata['development_dependencies']
129
+ metadata['development_dependencies'].each do |name,versions|
130
+ gemspec.add_development_dependency(name,parse_versions[versions])
131
+ end
132
+ end
133
+ end
data/spidr.gemspec~ ADDED
@@ -0,0 +1,131 @@
1
+ # encoding: utf-8
2
+
3
+ require 'yaml'
4
+
5
+ Gem::Specification.new do |gemspec|
6
+ root = File.dirname(__FILE__)
7
+ lib_dir = File.join(root,'lib')
8
+ files = if File.directory?('.git')
9
+ `git ls-files`.split($/)
10
+ elsif File.directory?('.hg')
11
+ `hg manifest`.split($/)
12
+ elsif File.directory?('.svn')
13
+ `svn ls -R`.split($/).select { |path| File.file?(path) }
14
+ else
15
+ Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
16
+ end
17
+
18
+ filter_files = lambda { |paths|
19
+ case paths
20
+ when Array
21
+ (files & paths)
22
+ when String
23
+ (files & Dir[paths])
24
+ end
25
+ }
26
+
27
+ version = {
28
+ :file => 'spidr/version',
29
+ :constant => 'Spidr::VERSION'
30
+ }
31
+
32
+ defaults = {
33
+ 'name' => File.basename(root),
34
+ 'files' => files,
35
+ 'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
36
+ 'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
37
+ 'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
38
+ }
39
+
40
+ metadata = defaults.merge(YAML.load_file('gemspec.yml'))
41
+
42
+ gemspec.name = metadata.fetch('name',defaults[:name])
43
+ gemspec.version = if metadata['version']
44
+ metadata['version']
45
+ else
46
+ $LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
47
+
48
+ require version[:file]
49
+ eval(version[:constant])
50
+ end
51
+
52
+ gemspec.summary = metadata.fetch('summary',metadata['description'])
53
+ gemspec.description = metadata.fetch('description',metadata['summary'])
54
+
55
+ case metadata['license']
56
+ when Array
57
+ gemspec.licenses = metadata['license']
58
+ when String
59
+ gemspec.license = metadata['license']
60
+ end
61
+
62
+ case metadata['authors']
63
+ when Array
64
+ gemspec.authors = metadata['authors']
65
+ when String
66
+ gemspec.author = metadata['authors']
67
+ end
68
+
69
+ gemspec.email = metadata['email']
70
+ gemspec.homepage = metadata['homepage']
71
+
72
+ case metadata['require_paths']
73
+ when Array
74
+ gemspec.require_paths = metadata['require_paths']
75
+ when String
76
+ gemspec.require_path = metadata['require_paths']
77
+ end
78
+
79
+ gemspec.files = filter_files[metadata['files']]
80
+
81
+ gemspec.executables = metadata['executables']
82
+ gemspec.extensions = metadata['extensions']
83
+
84
+ if Gem::VERSION < '1.7.'
85
+ gemspec.default_executable = gemspec.executables.first
86
+ end
87
+
88
+ gemspec.test_files = filter_files[metadata['test_files']]
89
+
90
+ unless gemspec.files.include?('.document')
91
+ gemspec.extra_rdoc_files = metadata['extra_doc_files']
92
+ end
93
+
94
+ gemspec.post_install_message = metadata['post_install_message']
95
+ gemspec.requirements = metadata['requirements']
96
+
97
+ if gemspec.respond_to?(:required_ruby_version=)
98
+ gemspec.required_ruby_version = metadata['required_ruby_version']
99
+ end
100
+
101
+ if gemspec.respond_to?(:required_rubygems_version=)
102
+ gemspec.required_rubygems_version = metadata['required_rubygems_version']
103
+ end
104
+
105
+ parse_versions = lambda { |versions|
106
+ case versions
107
+ when Array
108
+ versions.map { |v| v.to_s }
109
+ when String
110
+ versions.split(/,\s*/)
111
+ end
112
+ }
113
+
114
+ if metadata['dependencies']
115
+ metadata['dependencies'].each do |name,versions|
116
+ gemspec.add_dependency(name,parse_versions[versions])
117
+ end
118
+ end
119
+
120
+ if metadata['runtime_dependencies']
121
+ metadata['runtime_dependencies'].each do |name,versions|
122
+ gemspec.add_runtime_dependency(name,parse_versions[versions])
123
+ end
124
+ end
125
+
126
+ if metadata['development_dependencies']
127
+ metadata['development_dependencies'].each do |name,versions|
128
+ gemspec.add_development_dependency(name,parse_versions[versions])
129
+ end
130
+ end
131
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spidr_epg
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Postmodern
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: yard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ description: Spidr is a versatile Ruby web spidering library that can spider a site,
56
+ multiple domains, certain links or infinitely. Spidr is designed to be fast and
57
+ easy to use.
58
+ email: postmodern.mod3@gmail.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files:
62
+ - LICENSE.txt
63
+ - README.md
64
+ - ChangeLog.md
65
+ files:
66
+ - .rspec
67
+ - .gitignore
68
+ - .yardopts
69
+ - LICENSE.txt
70
+ - Rakefile
71
+ - Gemfile.lock
72
+ - ChangeLog.md~
73
+ - lib/spidr.rb
74
+ - lib/spidr/filters.rb
75
+ - lib/spidr/actions.rb
76
+ - lib/spidr/rules.rb
77
+ - lib/spidr/links.rb
78
+ - lib/spidr/body.rb
79
+ - lib/spidr/spidr.rb
80
+ - lib/spidr/session_cache.rb
81
+ - lib/spidr/extensions.rb
82
+ - lib/spidr/agent.rb
83
+ - lib/spidr/auth_store.rb
84
+ - lib/spidr/auth_credential.rb
85
+ - lib/spidr/page.rb
86
+ - lib/spidr/version.rb~
87
+ - lib/spidr/actions/actions.rb
88
+ - lib/spidr/actions/exceptions.rb
89
+ - lib/spidr/actions/exceptions/skip_link.rb
90
+ - lib/spidr/actions/exceptions/paused.rb
91
+ - lib/spidr/actions/exceptions/action.rb
92
+ - lib/spidr/actions/exceptions/skip_page.rb
93
+ - lib/spidr/headers.rb
94
+ - lib/spidr/version.rb
95
+ - lib/spidr/sanitizers.rb
96
+ - lib/spidr/cookie_jar.rb
97
+ - lib/spidr/extensions/uri.rb
98
+ - lib/spidr/events.rb
99
+ - spidr.gemspec~
100
+ - gemspec.yml
101
+ - Gemfile~
102
+ - spidr.gemspec
103
+ - Gemfile
104
+ - README.md~
105
+ - README.md
106
+ - pkg/spidr-1.0.0.gem
107
+ - ChangeLog.md
108
+ - spec/filters_spec.rb
109
+ - spec/agent_spec.rb
110
+ - spec/spec_helper.rb
111
+ - spec/cookie_jar_spec.rb
112
+ - spec/auth_store_spec.rb
113
+ - spec/spidr_spec.rb
114
+ - spec/session_cache.rb
115
+ - spec/page_spec.rb
116
+ - spec/page_examples.rb
117
+ - spec/actions_spec.rb
118
+ - spec/helpers/history.rb
119
+ - spec/helpers/wsoc.rb
120
+ - spec/helpers/page.rb
121
+ - spec/rules_spec.rb
122
+ - spec/sanitizers_spec.rb
123
+ - spec/extensions/uri_spec.rb
124
+ homepage: http://github.com/postmodern/spidr
125
+ licenses:
126
+ - MIT
127
+ metadata: {}
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubyforge_project:
144
+ rubygems_version: 2.0.0
145
+ signing_key:
146
+ specification_version: 4
147
+ summary: A versatile Ruby web spidering library
148
+ test_files:
149
+ - spec/filters_spec.rb
150
+ - spec/agent_spec.rb
151
+ - spec/cookie_jar_spec.rb
152
+ - spec/auth_store_spec.rb
153
+ - spec/spidr_spec.rb
154
+ - spec/page_spec.rb
155
+ - spec/actions_spec.rb
156
+ - spec/rules_spec.rb
157
+ - spec/sanitizers_spec.rb
158
+ - spec/extensions/uri_spec.rb