scraypa 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ module Scraypa
2
+ class VisitFactory
3
+ def self.build(params={})
4
+ if params[:config] && params[:config].use_capybara
5
+ if [:poltergeist, :poltergeist_billy].include? params[:config].driver
6
+ VisitCapybaraPoltergeist.new(params)
7
+ elsif params[:config].driver == :headless_chromium
8
+ VisitCapybaraHeadlessChromium.new(params)
9
+ else
10
+ raise CapybaraDriverUnsupported,
11
+ "Currently no support for capybara driver: #{params[:config].driver}"
12
+ end
13
+ else
14
+ VisitRestClient.new(params)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,13 @@
1
+ module Scraypa
2
+ class VisitInterface
3
+ def initialize(params)
4
+ raise "Scraypa::Configuration object required by Visit interface. " +
5
+ "Got: #{args[0].class}" unless
6
+ params && params[:config].is_a?(Scraypa::Configuration)
7
+ end
8
+
9
+ def execute
10
+ raise NotImplementedError, 'execute action not implemented.'
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,39 @@
1
+ require 'rest-client'
2
+
3
+ module Scraypa
4
+ class VisitRestClient < VisitInterface
5
+ def initialize params={}
6
+ super(params)
7
+ @config = params[:config]
8
+ @tor_proxy = params[:tor_proxy]
9
+ @user_agent_retriever = params[:user_agent_retriever]
10
+ end
11
+
12
+ def execute params={}
13
+ @config.tor && @tor_proxy ?
14
+ visit_get_response_through_tor(params) :
15
+ visit_get_response(params)
16
+ end
17
+
18
+ private
19
+
20
+ def visit_get_response_through_tor params={}
21
+ @tor_proxy.proxy do
22
+ return visit_get_response params
23
+ end
24
+ end
25
+
26
+ def visit_get_response params={}
27
+ RestClient::Request.execute add_user_agent_to(params)
28
+ end
29
+
30
+ def add_user_agent_to params
31
+ @user_agent_retriever ?
32
+ params.merge({
33
+ headers: {
34
+ user_agent: @user_agent_retriever.user_agent
35
+ }
36
+ }) : params
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,42 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scraypa/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scraypa"
8
+ spec.version = Scraypa::VERSION
9
+ spec.authors = ["joshweir"]
10
+ spec.email = ["joshua.weir@gmail.com"]
11
+
12
+ spec.summary = %q{Web scraper with support for proxy, Tor and javascript.}
13
+ #spec.description = %q{TODO: Write a longer description or delete this line.}
14
+ spec.homepage = "https://github.com/joshweir/scraypa"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ spec.add_development_dependency "rspec-rails", "~> 3.5"
28
+ spec.add_development_dependency "webmock"
29
+ spec.add_development_dependency "puffing-billy"
30
+ spec.add_development_dependency "gem-release"
31
+ spec.add_development_dependency "rb-fsevent"
32
+ spec.add_development_dependency "guard-rspec"
33
+ spec.add_dependency "activesupport"
34
+ spec.add_dependency "rest-client"
35
+ spec.add_dependency "useragents", "0.1.4"
36
+ spec.add_dependency "capybara", "~> 2.4.4"
37
+ spec.add_dependency "chromedriver-helper"
38
+ spec.add_dependency "tormanager"
39
+ spec.add_dependency "selenium-webdriver"
40
+ spec.add_dependency "poltergeist"
41
+ spec.add_dependency "phantomjs"
42
+ end
metadata ADDED
@@ -0,0 +1,322 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scraypa
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - joshweir
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-09-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.14'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.14'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-rails
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: puffing-billy
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: gem-release
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rb-fsevent
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: guard-rspec
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: activesupport
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rest-client
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: useragents
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - '='
172
+ - !ruby/object:Gem::Version
173
+ version: 0.1.4
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - '='
179
+ - !ruby/object:Gem::Version
180
+ version: 0.1.4
181
+ - !ruby/object:Gem::Dependency
182
+ name: capybara
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 2.4.4
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 2.4.4
195
+ - !ruby/object:Gem::Dependency
196
+ name: chromedriver-helper
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: tormanager
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
223
+ - !ruby/object:Gem::Dependency
224
+ name: selenium-webdriver
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :runtime
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
237
+ - !ruby/object:Gem::Dependency
238
+ name: poltergeist
239
+ requirement: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - ">="
242
+ - !ruby/object:Gem::Version
243
+ version: '0'
244
+ type: :runtime
245
+ prerelease: false
246
+ version_requirements: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - ">="
249
+ - !ruby/object:Gem::Version
250
+ version: '0'
251
+ - !ruby/object:Gem::Dependency
252
+ name: phantomjs
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - ">="
256
+ - !ruby/object:Gem::Version
257
+ version: '0'
258
+ type: :runtime
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - ">="
263
+ - !ruby/object:Gem::Version
264
+ version: '0'
265
+ description:
266
+ email:
267
+ - joshua.weir@gmail.com
268
+ executables: []
269
+ extensions: []
270
+ extra_rdoc_files: []
271
+ files:
272
+ - ".gitignore"
273
+ - ".rspec"
274
+ - ".travis.yml"
275
+ - Gemfile
276
+ - Guardfile
277
+ - LICENSE.txt
278
+ - README.md
279
+ - Rakefile
280
+ - bin/console
281
+ - bin/setup
282
+ - lib/scraypa.rb
283
+ - lib/scraypa/configuration.rb
284
+ - lib/scraypa/driver_resetter.rb
285
+ - lib/scraypa/throttle.rb
286
+ - lib/scraypa/user_agent/user_agent_abstract.rb
287
+ - lib/scraypa/user_agent/user_agent_common_aliases_lists.rb
288
+ - lib/scraypa/user_agent/user_agent_factory.rb
289
+ - lib/scraypa/user_agent/user_agent_iterator.rb
290
+ - lib/scraypa/user_agent/user_agent_random.rb
291
+ - lib/scraypa/version.rb
292
+ - lib/scraypa/visit/visit_capabara_headless_chromium.rb
293
+ - lib/scraypa/visit/visit_capabara_poltergeist.rb
294
+ - lib/scraypa/visit/visit_factory.rb
295
+ - lib/scraypa/visit/visit_interface.rb
296
+ - lib/scraypa/visit/visit_rest_client.rb
297
+ - scraypa.gemspec
298
+ homepage: https://github.com/joshweir/scraypa
299
+ licenses:
300
+ - MIT
301
+ metadata: {}
302
+ post_install_message:
303
+ rdoc_options: []
304
+ require_paths:
305
+ - lib
306
+ required_ruby_version: !ruby/object:Gem::Requirement
307
+ requirements:
308
+ - - ">="
309
+ - !ruby/object:Gem::Version
310
+ version: '0'
311
+ required_rubygems_version: !ruby/object:Gem::Requirement
312
+ requirements:
313
+ - - ">="
314
+ - !ruby/object:Gem::Version
315
+ version: '0'
316
+ requirements: []
317
+ rubyforge_project:
318
+ rubygems_version: 2.5.1
319
+ signing_key:
320
+ specification_version: 4
321
+ summary: Web scraper with support for proxy, Tor and javascript.
322
+ test_files: []