ruby-crawler 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler.require :test
4
+
5
+ require 'simplecov'
6
+ SimpleCov.start do
7
+ add_filter "/test/*_test.rb"
8
+ end
9
+
10
+ require 'redis'
11
+ require 'minitest/autorun'
12
+ require 'webmock/minitest'
13
+ require 'crawler'
14
+
15
+ # Support files
16
+ Dir["#{File.expand_path(File.dirname(__FILE__))}/support/*.rb"].each do |file|
17
+ require file
18
+ end
19
+
20
+ class MiniTest::Test
21
+ include DomainHtml
22
+
23
+ def setup
24
+ stub_request(:get, "https://www.digitalocean.com/").
25
+ with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
26
+ to_return(:status => 200, :body => root_html , :headers => {})
27
+
28
+ stub_request(:get, "https://www.digitalocean.com/redirect").
29
+ with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
30
+ to_return(:status => 301, :body => root_html , :headers => {'Location'=>'https://www.digitalocean.com'})
31
+
32
+ stub_request(:get, "https://www.digitalocean.com/bad_request").
33
+ with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
34
+ to_return(:status => 404, :body => root_html , :headers => {})
35
+
36
+ stub_request(:get, "https://www.digitalocean.com/pricing/").
37
+ with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
38
+ to_return(:status => 200, :body => pricing_html , :headers => {})
39
+
40
+ stub_request(:get, "https://www.digitalocean.com/features/").
41
+ with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
42
+ to_return(:status => 200, :body => features_html , :headers => {})
43
+
44
+ stub_request(:get, "https://www.digitalocean.com/company/careers/").
45
+ with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
46
+ to_return(:status => 200, :body => careers_html , :headers => {})
47
+
48
+ Redis.new.flushdb
49
+ end
50
+ end
metadata ADDED
@@ -0,0 +1,191 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Adam Ryan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: addressable
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: redis
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: simplecov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: Simple ruby web crawler
126
+ email:
127
+ - adam.g.ryan@gmail.com
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - ".DS_Store"
133
+ - ".gitignore"
134
+ - Gemfile
135
+ - LICENSE.txt
136
+ - README.md
137
+ - Rakefile
138
+ - crawler.gemspec
139
+ - lib/crawler.rb
140
+ - lib/crawler/document.rb
141
+ - lib/crawler/document_parser.rb
142
+ - lib/crawler/error.rb
143
+ - lib/crawler/formatting.rb
144
+ - lib/crawler/http.rb
145
+ - lib/crawler/index.rb
146
+ - lib/crawler/storage.rb
147
+ - lib/crawler/validations.rb
148
+ - lib/crawler/version.rb
149
+ - test/crawler/crawler_test.rb
150
+ - test/crawler/document_test.rb
151
+ - test/crawler/formatting_test.rb
152
+ - test/crawler/http_test.rb
153
+ - test/crawler/index_test.rb
154
+ - test/crawler/storage_test.rb
155
+ - test/crawler/validations_test.rb
156
+ - test/support/domain_html.rb
157
+ - test/test_helper.rb
158
+ homepage: ''
159
+ licenses:
160
+ - MIT
161
+ metadata: {}
162
+ post_install_message:
163
+ rdoc_options: []
164
+ require_paths:
165
+ - lib
166
+ required_ruby_version: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - ">="
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ required_rubygems_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ requirements: []
177
+ rubyforge_project:
178
+ rubygems_version: 2.2.0
179
+ signing_key:
180
+ specification_version: 4
181
+ summary: Simple ruby web crawler
182
+ test_files:
183
+ - test/crawler/crawler_test.rb
184
+ - test/crawler/document_test.rb
185
+ - test/crawler/formatting_test.rb
186
+ - test/crawler/http_test.rb
187
+ - test/crawler/index_test.rb
188
+ - test/crawler/storage_test.rb
189
+ - test/crawler/validations_test.rb
190
+ - test/support/domain_html.rb
191
+ - test/test_helper.rb