spidr_epg 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +1 -0
  4. data/.yardopts +1 -0
  5. data/ChangeLog.md +291 -0
  6. data/ChangeLog.md~ +291 -0
  7. data/Gemfile +16 -0
  8. data/Gemfile.lock +49 -0
  9. data/Gemfile~ +16 -0
  10. data/LICENSE.txt +20 -0
  11. data/README.md +193 -0
  12. data/README.md~ +190 -0
  13. data/Rakefile +29 -0
  14. data/gemspec.yml +19 -0
  15. data/lib/spidr/actions/actions.rb +83 -0
  16. data/lib/spidr/actions/exceptions/action.rb +9 -0
  17. data/lib/spidr/actions/exceptions/paused.rb +11 -0
  18. data/lib/spidr/actions/exceptions/skip_link.rb +12 -0
  19. data/lib/spidr/actions/exceptions/skip_page.rb +12 -0
  20. data/lib/spidr/actions/exceptions.rb +4 -0
  21. data/lib/spidr/actions.rb +2 -0
  22. data/lib/spidr/agent.rb +866 -0
  23. data/lib/spidr/auth_credential.rb +28 -0
  24. data/lib/spidr/auth_store.rb +161 -0
  25. data/lib/spidr/body.rb +98 -0
  26. data/lib/spidr/cookie_jar.rb +202 -0
  27. data/lib/spidr/events.rb +537 -0
  28. data/lib/spidr/extensions/uri.rb +52 -0
  29. data/lib/spidr/extensions.rb +1 -0
  30. data/lib/spidr/filters.rb +539 -0
  31. data/lib/spidr/headers.rb +370 -0
  32. data/lib/spidr/links.rb +229 -0
  33. data/lib/spidr/page.rb +108 -0
  34. data/lib/spidr/rules.rb +79 -0
  35. data/lib/spidr/sanitizers.rb +56 -0
  36. data/lib/spidr/session_cache.rb +145 -0
  37. data/lib/spidr/spidr.rb +107 -0
  38. data/lib/spidr/version.rb +4 -0
  39. data/lib/spidr/version.rb~ +4 -0
  40. data/lib/spidr.rb +3 -0
  41. data/pkg/spidr-1.0.0.gem +0 -0
  42. data/spec/actions_spec.rb +59 -0
  43. data/spec/agent_spec.rb +81 -0
  44. data/spec/auth_store_spec.rb +85 -0
  45. data/spec/cookie_jar_spec.rb +144 -0
  46. data/spec/extensions/uri_spec.rb +43 -0
  47. data/spec/filters_spec.rb +61 -0
  48. data/spec/helpers/history.rb +34 -0
  49. data/spec/helpers/page.rb +8 -0
  50. data/spec/helpers/wsoc.rb +83 -0
  51. data/spec/page_examples.rb +21 -0
  52. data/spec/page_spec.rb +125 -0
  53. data/spec/rules_spec.rb +45 -0
  54. data/spec/sanitizers_spec.rb +61 -0
  55. data/spec/session_cache.rb +58 -0
  56. data/spec/spec_helper.rb +4 -0
  57. data/spec/spidr_spec.rb +39 -0
  58. data/spidr.gemspec +133 -0
  59. data/spidr.gemspec~ +131 -0
  60. metadata +158 -0
@@ -0,0 +1,39 @@
1
+ require 'spidr'
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Spidr do
6
+ it "should have a VERSION constant" do
7
+ subject.const_defined?('VERSION').should == true
8
+ end
9
+
10
+ describe "proxy" do
11
+ after(:all) do
12
+ subject.disable_proxy!
13
+ end
14
+
15
+ it "should not have proxy settings by default" do
16
+ subject.proxy[:host].should be_nil
17
+ end
18
+
19
+ it "should allow setting new proxy settings" do
20
+ subject.proxy = {:host => 'example.com', :port => 8010}
21
+
22
+ subject.proxy[:host].should == 'example.com'
23
+ subject.proxy[:port].should == 8010
24
+ end
25
+
26
+ it "should default the :port option of new proxy settings" do
27
+ subject.proxy = {:host => 'example.com'}
28
+
29
+ subject.proxy[:host].should == 'example.com'
30
+ subject.proxy[:port].should == Spidr::COMMON_PROXY_PORT
31
+ end
32
+
33
+ it "should allow disabling the proxy" do
34
+ subject.disable_proxy!
35
+
36
+ subject.proxy[:host].should be_nil
37
+ end
38
+ end
39
+ end
data/spidr.gemspec ADDED
@@ -0,0 +1,133 @@
1
+ # encoding: utf-8
2
+
3
+ require 'yaml'
4
+
5
+ Gem::Specification.new do |gemspec|
6
+ root = File.dirname(__FILE__)
7
+ lib_dir = File.join(root,'lib')
8
+ files = if File.directory?('.git')
9
+ `git ls-files`.split($/)
10
+ elsif File.directory?('.hg')
11
+ `hg manifest`.split($/)
12
+ elsif File.directory?('.svn')
13
+ `svn ls -R`.split($/).select { |path| File.file?(path) }
14
+ else
15
+ Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
16
+ end
17
+
18
+ filter_files = lambda { |paths|
19
+ case paths
20
+ when Array
21
+ (files & paths)
22
+ when String
23
+ (files & Dir[paths])
24
+ end
25
+ }
26
+
27
+ version = {
28
+ :file => 'spidr/version',
29
+ :constant => 'Spidr::VERSION'
30
+ }
31
+
32
+ defaults = {
33
+ # 'name' => File.basename(root),
34
+ 'name' => 'spidr_epg',
35
+ 'files' => files,
36
+ 'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
37
+ 'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
38
+ 'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
39
+ }
40
+
41
+ metadata = defaults.merge(YAML.load_file('gemspec.yml'))
42
+
43
+ #gemspec.name = metadata.fetch('name',defaults[:name])
44
+ gemspec.name = 'spidr_epg'
45
+ gemspec.version = if metadata['version']
46
+ metadata['version']
47
+ else
48
+ $LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
49
+
50
+ require version[:file]
51
+ eval(version[:constant])
52
+ end
53
+
54
+ gemspec.summary = metadata.fetch('summary',metadata['description'])
55
+ gemspec.description = metadata.fetch('description',metadata['summary'])
56
+
57
+ case metadata['license']
58
+ when Array
59
+ gemspec.licenses = metadata['license']
60
+ when String
61
+ gemspec.license = metadata['license']
62
+ end
63
+
64
+ case metadata['authors']
65
+ when Array
66
+ gemspec.authors = metadata['authors']
67
+ when String
68
+ gemspec.author = metadata['authors']
69
+ end
70
+
71
+ gemspec.email = metadata['email']
72
+ gemspec.homepage = metadata['homepage']
73
+
74
+ case metadata['require_paths']
75
+ when Array
76
+ gemspec.require_paths = metadata['require_paths']
77
+ when String
78
+ gemspec.require_path = metadata['require_paths']
79
+ end
80
+
81
+ gemspec.files = filter_files[metadata['files']]
82
+
83
+ gemspec.executables = metadata['executables']
84
+ gemspec.extensions = metadata['extensions']
85
+
86
+ if Gem::VERSION < '1.7.'
87
+ gemspec.default_executable = gemspec.executables.first
88
+ end
89
+
90
+ gemspec.test_files = filter_files[metadata['test_files']]
91
+
92
+ unless gemspec.files.include?('.document')
93
+ gemspec.extra_rdoc_files = metadata['extra_doc_files']
94
+ end
95
+
96
+ gemspec.post_install_message = metadata['post_install_message']
97
+ gemspec.requirements = metadata['requirements']
98
+
99
+ if gemspec.respond_to?(:required_ruby_version=)
100
+ gemspec.required_ruby_version = metadata['required_ruby_version']
101
+ end
102
+
103
+ if gemspec.respond_to?(:required_rubygems_version=)
104
+ gemspec.required_rubygems_version = metadata['required_rubygems_version']
105
+ end
106
+
107
+ parse_versions = lambda { |versions|
108
+ case versions
109
+ when Array
110
+ versions.map { |v| v.to_s }
111
+ when String
112
+ versions.split(/,\s*/)
113
+ end
114
+ }
115
+
116
+ if metadata['dependencies']
117
+ metadata['dependencies'].each do |name,versions|
118
+ gemspec.add_dependency(name,parse_versions[versions])
119
+ end
120
+ end
121
+
122
+ if metadata['runtime_dependencies']
123
+ metadata['runtime_dependencies'].each do |name,versions|
124
+ gemspec.add_runtime_dependency(name,parse_versions[versions])
125
+ end
126
+ end
127
+
128
+ if metadata['development_dependencies']
129
+ metadata['development_dependencies'].each do |name,versions|
130
+ gemspec.add_development_dependency(name,parse_versions[versions])
131
+ end
132
+ end
133
+ end
data/spidr.gemspec~ ADDED
@@ -0,0 +1,131 @@
1
+ # encoding: utf-8
2
+
3
+ require 'yaml'
4
+
5
+ Gem::Specification.new do |gemspec|
6
+ root = File.dirname(__FILE__)
7
+ lib_dir = File.join(root,'lib')
8
+ files = if File.directory?('.git')
9
+ `git ls-files`.split($/)
10
+ elsif File.directory?('.hg')
11
+ `hg manifest`.split($/)
12
+ elsif File.directory?('.svn')
13
+ `svn ls -R`.split($/).select { |path| File.file?(path) }
14
+ else
15
+ Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
16
+ end
17
+
18
+ filter_files = lambda { |paths|
19
+ case paths
20
+ when Array
21
+ (files & paths)
22
+ when String
23
+ (files & Dir[paths])
24
+ end
25
+ }
26
+
27
+ version = {
28
+ :file => 'spidr/version',
29
+ :constant => 'Spidr::VERSION'
30
+ }
31
+
32
+ defaults = {
33
+ 'name' => File.basename(root),
34
+ 'files' => files,
35
+ 'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
36
+ 'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
37
+ 'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
38
+ }
39
+
40
+ metadata = defaults.merge(YAML.load_file('gemspec.yml'))
41
+
42
+ gemspec.name = metadata.fetch('name',defaults[:name])
43
+ gemspec.version = if metadata['version']
44
+ metadata['version']
45
+ else
46
+ $LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
47
+
48
+ require version[:file]
49
+ eval(version[:constant])
50
+ end
51
+
52
+ gemspec.summary = metadata.fetch('summary',metadata['description'])
53
+ gemspec.description = metadata.fetch('description',metadata['summary'])
54
+
55
+ case metadata['license']
56
+ when Array
57
+ gemspec.licenses = metadata['license']
58
+ when String
59
+ gemspec.license = metadata['license']
60
+ end
61
+
62
+ case metadata['authors']
63
+ when Array
64
+ gemspec.authors = metadata['authors']
65
+ when String
66
+ gemspec.author = metadata['authors']
67
+ end
68
+
69
+ gemspec.email = metadata['email']
70
+ gemspec.homepage = metadata['homepage']
71
+
72
+ case metadata['require_paths']
73
+ when Array
74
+ gemspec.require_paths = metadata['require_paths']
75
+ when String
76
+ gemspec.require_path = metadata['require_paths']
77
+ end
78
+
79
+ gemspec.files = filter_files[metadata['files']]
80
+
81
+ gemspec.executables = metadata['executables']
82
+ gemspec.extensions = metadata['extensions']
83
+
84
+ if Gem::VERSION < '1.7.'
85
+ gemspec.default_executable = gemspec.executables.first
86
+ end
87
+
88
+ gemspec.test_files = filter_files[metadata['test_files']]
89
+
90
+ unless gemspec.files.include?('.document')
91
+ gemspec.extra_rdoc_files = metadata['extra_doc_files']
92
+ end
93
+
94
+ gemspec.post_install_message = metadata['post_install_message']
95
+ gemspec.requirements = metadata['requirements']
96
+
97
+ if gemspec.respond_to?(:required_ruby_version=)
98
+ gemspec.required_ruby_version = metadata['required_ruby_version']
99
+ end
100
+
101
+ if gemspec.respond_to?(:required_rubygems_version=)
102
+ gemspec.required_rubygems_version = metadata['required_rubygems_version']
103
+ end
104
+
105
+ parse_versions = lambda { |versions|
106
+ case versions
107
+ when Array
108
+ versions.map { |v| v.to_s }
109
+ when String
110
+ versions.split(/,\s*/)
111
+ end
112
+ }
113
+
114
+ if metadata['dependencies']
115
+ metadata['dependencies'].each do |name,versions|
116
+ gemspec.add_dependency(name,parse_versions[versions])
117
+ end
118
+ end
119
+
120
+ if metadata['runtime_dependencies']
121
+ metadata['runtime_dependencies'].each do |name,versions|
122
+ gemspec.add_runtime_dependency(name,parse_versions[versions])
123
+ end
124
+ end
125
+
126
+ if metadata['development_dependencies']
127
+ metadata['development_dependencies'].each do |name,versions|
128
+ gemspec.add_development_dependency(name,parse_versions[versions])
129
+ end
130
+ end
131
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spidr_epg
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Postmodern
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: yard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ description: Spidr is a versatile Ruby web spidering library that can spider a site,
56
+ multiple domains, certain links or infinitely. Spidr is designed to be fast and
57
+ easy to use.
58
+ email: postmodern.mod3@gmail.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files:
62
+ - LICENSE.txt
63
+ - README.md
64
+ - ChangeLog.md
65
+ files:
66
+ - .rspec
67
+ - .gitignore
68
+ - .yardopts
69
+ - LICENSE.txt
70
+ - Rakefile
71
+ - Gemfile.lock
72
+ - ChangeLog.md~
73
+ - lib/spidr.rb
74
+ - lib/spidr/filters.rb
75
+ - lib/spidr/actions.rb
76
+ - lib/spidr/rules.rb
77
+ - lib/spidr/links.rb
78
+ - lib/spidr/body.rb
79
+ - lib/spidr/spidr.rb
80
+ - lib/spidr/session_cache.rb
81
+ - lib/spidr/extensions.rb
82
+ - lib/spidr/agent.rb
83
+ - lib/spidr/auth_store.rb
84
+ - lib/spidr/auth_credential.rb
85
+ - lib/spidr/page.rb
86
+ - lib/spidr/version.rb~
87
+ - lib/spidr/actions/actions.rb
88
+ - lib/spidr/actions/exceptions.rb
89
+ - lib/spidr/actions/exceptions/skip_link.rb
90
+ - lib/spidr/actions/exceptions/paused.rb
91
+ - lib/spidr/actions/exceptions/action.rb
92
+ - lib/spidr/actions/exceptions/skip_page.rb
93
+ - lib/spidr/headers.rb
94
+ - lib/spidr/version.rb
95
+ - lib/spidr/sanitizers.rb
96
+ - lib/spidr/cookie_jar.rb
97
+ - lib/spidr/extensions/uri.rb
98
+ - lib/spidr/events.rb
99
+ - spidr.gemspec~
100
+ - gemspec.yml
101
+ - Gemfile~
102
+ - spidr.gemspec
103
+ - Gemfile
104
+ - README.md~
105
+ - README.md
106
+ - pkg/spidr-1.0.0.gem
107
+ - ChangeLog.md
108
+ - spec/filters_spec.rb
109
+ - spec/agent_spec.rb
110
+ - spec/spec_helper.rb
111
+ - spec/cookie_jar_spec.rb
112
+ - spec/auth_store_spec.rb
113
+ - spec/spidr_spec.rb
114
+ - spec/session_cache.rb
115
+ - spec/page_spec.rb
116
+ - spec/page_examples.rb
117
+ - spec/actions_spec.rb
118
+ - spec/helpers/history.rb
119
+ - spec/helpers/wsoc.rb
120
+ - spec/helpers/page.rb
121
+ - spec/rules_spec.rb
122
+ - spec/sanitizers_spec.rb
123
+ - spec/extensions/uri_spec.rb
124
+ homepage: http://github.com/postmodern/spidr
125
+ licenses:
126
+ - MIT
127
+ metadata: {}
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubyforge_project:
144
+ rubygems_version: 2.0.0
145
+ signing_key:
146
+ specification_version: 4
147
+ summary: A versatile Ruby web spidering library
148
+ test_files:
149
+ - spec/filters_spec.rb
150
+ - spec/agent_spec.rb
151
+ - spec/cookie_jar_spec.rb
152
+ - spec/auth_store_spec.rb
153
+ - spec/spidr_spec.rb
154
+ - spec/page_spec.rb
155
+ - spec/actions_spec.rb
156
+ - spec/rules_spec.rb
157
+ - spec/sanitizers_spec.rb
158
+ - spec/extensions/uri_spec.rb