scrapey 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,15 @@
1
1
  require 'scrapey'
2
2
  require 'scrapey/multi'
3
+ require 'pry'
3
4
 
4
5
  fields 'url', 'status'
5
6
 
6
- def scrape url, response, header
7
+ def on_success url, response, header
7
8
  save({'url' => url, 'status' => header.status})
8
9
  end
9
10
 
10
- multi_head ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/', 'http://www.bing.com/404.html'], :threads => 4, :callback => :scrape
11
+ def on_error url, e
12
+ save({'url' => url, 'status' => e})
13
+ end
14
+
15
+ multi_head ['http://locahlost2/foo', 'http://www.google.com/', 'http://www.bing.com/', 'http://www.bing.com/404.html']
@@ -1,5 +1,5 @@
1
1
  module Scrapey
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  BASEDIR = File.expand_path(File.dirname($0)).gsub(/\/src$/,'')
4
4
  URL = "https://github.com/monkeysuffrage/scrapey"
5
5
  #ENV['SSL_FILE'] = "#{Gem.dir}/gems/scrapey-#{Scrapey::VERSION}/ssl/cacert.pem"
data/lib/scrapey/multi.rb CHANGED
@@ -2,9 +2,11 @@ require 'em-http-request'
2
2
 
3
3
  module Scrapey
4
4
  def multi_get_or_post method, all_urls, options = {}
5
- request_options = {:redirects => 10, :head => {"User-Agent" => "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"}.merge(options.delete(:head))}
5
+ head = options.delete(:head) || {}
6
+ request_options = {:redirects => 10, :head => {"User-Agent" => "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"}.merge(head)}
6
7
  threads = options[:threads] || 20
7
- callback = options[:callback] || :save_cache
8
+ on_success = options[:on_success] || :on_success
9
+ on_error = options[:on_error] || :on_error
8
10
  all_urls.reject!{|url| is_cached? url} if @use_cache
9
11
  @lock = Mutex.new
10
12
  all_urls.each_slice(threads) do |urls|
@@ -18,10 +20,18 @@ module Scrapey
18
20
  (0...multi.requests.length).each do |i|
19
21
  if multi.responses[:callback][i]
20
22
  @lock.synchronize do
21
- send callback, urls[i], multi.responses[:callback][i].response, multi.responses[:callback][i].response_header
23
+ if defined? on_success
24
+ send on_success, urls[i], multi.responses[:callback][i].response, multi.responses[:callback][i].response_header
25
+ else
26
+ raise "#{on_success} not defined!"
27
+ end
22
28
  end
23
29
  else
24
- puts "problem downloading #{urls[i]}!"
30
+ if defined? on_error
31
+ send on_error, urls[i], multi.requests[i].error
32
+ else
33
+ raise "#{on_error} not defined!"
34
+ end
25
35
  end
26
36
  end
27
37
  EventMachine.stop
data/lib/scrapey.rb CHANGED
@@ -13,7 +13,7 @@ include Scrapey
13
13
  # some defaults that I like
14
14
  @agent ||= Mechanize.new{|a| a.history.max_size = 10}
15
15
  @agent.user_agent = "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"
16
-
16
+ @agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
17
17
  # default output file
18
18
  @output = 'output.csv'
19
19
 
data/output.csv ADDED
@@ -0,0 +1,3 @@
1
+ url,status
2
+ http://www.bing.com/,200
3
+ http://www.bing.com/404.html,404
data/ponsesq ADDED
@@ -0,0 +1,593 @@
1
+ => #<EventMachine::MultiRequest:0x2237178
2
+ @callbacks=[],
3
+ @deferred_args=[#<EventMachine::MultiRequest:0x2237178 ...>],
4
+ @deferred_status=:succeeded,
5
+ @deferred_timeout=nil,
6
+ @errbacks=nil,
7
+ @requests=
8
+ {0=>
9
+ #<EventMachine::HttpClient:0x21adc28
10
+ @callbacks=[],
11
+ @conn=
12
+ #<EventMachine::HttpConnection:0x21f3f30
13
+ @connopts=
14
+ #<HttpConnectionOptions:0x2231100
15
+ @connect_timeout=5,
16
+ @host="locahlost2",
17
+ @inactivity_timeout=10,
18
+ @port=80,
19
+ @proxy=nil,
20
+ @tls={}>,
21
+ @deferred=true,
22
+ @middleware=[],
23
+ @uri="http://locahlost2/foo">,
24
+ @content_charset=nil,
25
+ @content_decoder=nil,
26
+ @cookiejar=
27
+ #<EventMachine::HttpClient::CookieJar:0x21adc70
28
+ @jar=#<CookieJar::Jar:0x21adcb8 @domains={}>>,
29
+ @cookies=[],
30
+ @deferred_args=[#<EventMachine::HttpClient:0x21adc28 ...>],
31
+ @deferred_status=:failed,
32
+ @deferred_timeout=nil,
33
+ @errbacks=[],
34
+ @error="unable to resolve server address",
35
+ @headers=nil,
36
+ @req=
37
+ #<HttpClientOptions:0x21f42f0
38
+ @body=nil,
39
+ @decoding=true,
40
+ @file=nil,
41
+ @followed=0,
42
+ @headers=
43
+ {"User-Agent"=>
44
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
45
+ @host="locahlost2",
46
+ @keepalive=false,
47
+ @method="HEAD",
48
+ @pass_cookies=true,
49
+ @path=nil,
50
+ @port=80,
51
+ @query=nil,
52
+ @redirects=10,
53
+ @uri=#<Addressable::URI:0x10e696c URI:http://locahlost2:80/foo>>,
54
+ @response="",
55
+ @response_header={},
56
+ @state=:response_header,
57
+ @stream=nil>,
58
+ 1=>
59
+ #<EventMachine::HttpClient:0x20ed610
60
+ @callbacks=[],
61
+ @conn=
62
+ #<EventMachine::HttpConnection:0x2136d08
63
+ @clients=[],
64
+ @conn=
65
+ #<EventMachine::HttpStubConnection:0x20c2e60
66
+ @callbacks=[],
67
+ @deferred_args=[],
68
+ @deferred_status=:succeeded,
69
+ @deferred_timeout=nil,
70
+ @errbacks=nil,
71
+ @parent=#<EventMachine::HttpConnection:0x2136d08 ...>,
72
+ @signature=5>,
73
+ @connopts=
74
+ #<HttpConnectionOptions:0x215c7d8
75
+ @connect_timeout=5,
76
+ @host="www.google.com.",
77
+ @inactivity_timeout=10,
78
+ @port=80,
79
+ @proxy=nil,
80
+ @tls={}>,
81
+ @deferred=false,
82
+ @middleware=[],
83
+ @p=#<HTTP::Parser:0x20da578>,
84
+ @peer="\x02\x00\x00PJ}G^\x00\x00\x00\x00\x00\x00\x00\x00",
85
+ @pending=[],
86
+ @uri="http://www.google.com.">,
87
+ @content_charset=nil,
88
+ @content_decoder=nil,
89
+ @cookiejar=
90
+ #<EventMachine::HttpClient::CookieJar:0x20ed670
91
+ @jar=
92
+ #<CookieJar::Jar:0x20e5198
93
+ @domains=
94
+ {".google.com.ph"=>
95
+ {"/"=>
96
+ {"PREF"=>
97
+ PREF=ID=ec6b270e7fe890fa:FF=0:TM=1344992862:LM=1344992862:S=WlLM9juILblGoi5k,
98
+ "NID"=>
99
+ NID=62=5O_DMdySUEeIJXvveuCr1U8UEfYEhurh0X2Is-a5f0xhTw5CxEY9gELcAyCmwqt4MxVLDpvT2anCV79hhXHfd-QPi0zRY8bCiqh7BlH1B3w0wfE3eg0PTR_KbXUJBBFm}}}>>,
100
+ @cookies=[],
101
+ @deferred_args=[#<EventMachine::HttpClient:0x20ed610 ...>],
102
+ @deferred_status=:succeeded,
103
+ @deferred_timeout=nil,
104
+ @errbacks=
105
+ [#<Proc:0x20297e8@C:/Ruby193/lib/ruby/gems/1.9.1/gems/em-http-request-1.0.2/lib/em-http/multi.rb:42>],
106
+ @error=nil,
107
+ @headers=nil,
108
+ @req=
109
+ #<HttpClientOptions:0x2129ec8
110
+ @body=nil,
111
+ @decoding=true,
112
+ @file=nil,
113
+ @followed=1,
114
+ @headers=
115
+ {"User-Agent"=>
116
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
117
+ @host="www.google.com.ph",
118
+ @keepalive=false,
119
+ @method="HEAD",
120
+ @pass_cookies=true,
121
+ @path=nil,
122
+ @port=80,
123
+ @query=nil,
124
+ @redirects=10,
125
+ @uri=#<Addressable::URI:0xe683ec URI:http://www.google.com.ph:80/>>,
126
+ @response="",
127
+ @response_header=
128
+ {"DATE"=>"Wed, 15 Aug 2012 01:07:42 GMT",
129
+ "EXPIRES"=>"-1",
130
+ "CACHE_CONTROL"=>"private, max-age=0",
131
+ "CONTENT_TYPE"=>"text/html; charset=ISO-8859-1",
132
+ "SET_COOKIE"=>
133
+ ["PREF=ID=ec6b270e7fe890fa:FF=0:TM=1344992862:LM=1344992862:S=WlLM9juILblGoi5k; expires=Fri, 15-Aug-2014 01:07:42 GMT; path=/; domain=.google.com.ph",
134
+ "NID=62=5O_DMdySUEeIJXvveuCr1U8UEfYEhurh0X2Is-a5f0xhTw5CxEY9gELcAyCmwqt4MxVLDpvT2anCV79hhXHfd-QPi0zRY8bCiqh7BlH1B3w0wfE3eg0PTR_KbXUJBBFm; expires=Thu, 14-Feb-2013 01:07:42 GMT; path=/; domain=.google.com.ph; HttpOnly"],
135
+ "P3P"=>
136
+ "CP=\"This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657 for more info.\"",
137
+ "SERVER"=>"gws",
138
+ "X_XSS_PROTECTION"=>"1; mode=block",
139
+ "X_FRAME_OPTIONS"=>"SAMEORIGIN",
140
+ "CONNECTION"=>"close"},
141
+ @state=:finished,
142
+ @stream=nil>,
143
+ 2=>
144
+ #<EventMachine::HttpClient:0x08d8490
145
+ @callbacks=[],
146
+ @conn=
147
+ #<EventMachine::HttpConnection:0x08d91b0
148
+ @clients=[],
149
+ @conn=
150
+ #<EventMachine::HttpStubConnection:0x08d8148
151
+ @callbacks=[],
152
+ @deferred_args=[],
153
+ @deferred_status=:succeeded,
154
+ @deferred_timeout=nil,
155
+ @errbacks=nil,
156
+ @parent=#<EventMachine::HttpConnection:0x08d91b0 ...>,
157
+ @signature=3>,
158
+ @connopts=
159
+ #<HttpConnectionOptions:0x1fed3b0
160
+ @connect_timeout=5,
161
+ @host="www.bing.com",
162
+ @inactivity_timeout=10,
163
+ @port=80,
164
+ @proxy=nil,
165
+ @tls={}>,
166
+ @deferred=false,
167
+ @middleware=[],
168
+ @p=#<HTTP::Parser:0x08d8028>,
169
+ @peer="\x02\x00\x00P|j\xAE\xB2\x00\x00\x00\x00\x00\x00\x00\x00",
170
+ @pending=[],
171
+ @uri="http://www.bing.com/">,
172
+ @content_charset=nil,
173
+ @content_decoder=nil,
174
+ @cookiejar=
175
+ #<EventMachine::HttpClient::CookieJar:0x08d8340
176
+ @jar=
177
+ #<CookieJar::Jar:0x08d8328
178
+ @domains=
179
+ {".bing.com"=>
180
+ {"/"=>
181
+ {"_FS"=>_FS=NU=1,
182
+ "_SS"=>_SS=SID=847F099F99524E2F97F8236B4B203509,
183
+ "SRCHD"=>SRCHD=D=2430787&MS=2430787&AF=NOFORM,
184
+ "SRCHUSR"=>SRCHUSR=AUTOREDIR=0&GEOVAR=&DOB=20120815}},
185
+ "www.bing.com"=>
186
+ {"/"=>
187
+ {"SRCHUID"=>
188
+ SRCHUID=V=2&GUID=28C754BC00C346D19F70AD5235BC50B4}}}>>,
189
+ @cookies=[],
190
+ @deferred_args=[#<EventMachine::HttpClient:0x08d8490 ...>],
191
+ @deferred_status=:succeeded,
192
+ @deferred_timeout=nil,
193
+ @errbacks=[],
194
+ @error=nil,
195
+ @headers=nil,
196
+ @req=
197
+ #<HttpClientOptions:0x08d9030
198
+ @body=nil,
199
+ @decoding=true,
200
+ @file=nil,
201
+ @followed=0,
202
+ @headers=
203
+ {"User-Agent"=>
204
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
205
+ @host="www.bing.com",
206
+ @keepalive=false,
207
+ @method="HEAD",
208
+ @pass_cookies=true,
209
+ @path=nil,
210
+ @port=80,
211
+ @query=nil,
212
+ @redirects=10,
213
+ @uri=#<Addressable::URI:0x46c50c URI:http://www.bing.com:80/>>,
214
+ @response="",
215
+ @response_header=
216
+ {"CACHE_CONTROL"=>"private, max-age=0",
217
+ "CONTENT_TYPE"=>"text/html",
218
+ "P3P"=>"CP=\"NON UNI COM NAV STA LOC CURa DEVa PSAa PSDa OUR IND\"",
219
+ "DATE"=>"Wed, 15 Aug 2012 01:07:42 GMT",
220
+ "CONTENT_LENGTH"=>"1",
221
+ "CONNECTION"=>"close",
222
+ "SET_COOKIE"=>
223
+ ["_FS=NU=1; domain=.bing.com; path=/",
224
+ "_SS=SID=847F099F99524E2F97F8236B4B203509; domain=.bing.com; path=/",
225
+ "SRCHD=D=2430787&MS=2430787&AF=NOFORM; expires=Fri, 15-Aug-2014 01:07:42 GMT; domain=.bing.com; path=/",
226
+ "SRCHUID=V=2&GUID=28C754BC00C346D19F70AD5235BC50B4; expires=Fri, 15-Aug-2014 01:07:42 GMT; path=/",
227
+ "SRCHUSR=AUTOREDIR=0&GEOVAR=&DOB=20120815; expires=Fri, 15-Aug-2014 01:07:42 GMT; domain=.bing.com; path=/"]},
228
+ @state=:finished,
229
+ @stream=nil>,
230
+ 3=>
231
+ #<EventMachine::HttpClient:0x1cc2a98
232
+ @callbacks=[],
233
+ @conn=
234
+ #<EventMachine::HttpConnection:0x08d7698
235
+ @clients=[],
236
+ @conn=
237
+ #<EventMachine::HttpStubConnection:0x1cc22e8
238
+ @callbacks=[],
239
+ @deferred_args=[],
240
+ @deferred_status=:succeeded,
241
+ @deferred_timeout=nil,
242
+ @errbacks=nil,
243
+ @parent=#<EventMachine::HttpConnection:0x08d7698 ...>,
244
+ @signature=4>,
245
+ @connopts=
246
+ #<HttpConnectionOptions:0x08d7d70
247
+ @connect_timeout=5,
248
+ @host="www.bing.com",
249
+ @inactivity_timeout=10,
250
+ @port=80,
251
+ @proxy=nil,
252
+ @tls={}>,
253
+ @deferred=false,
254
+ @middleware=[],
255
+ @p=#<HTTP::Parser:0x1cc2150>,
256
+ @peer="\x02\x00\x00P|j\xAE\xB2\x00\x00\x00\x00\x00\x00\x00\x00",
257
+ @pending=[],
258
+ @uri="http://www.bing.com/404.html">,
259
+ @content_charset=nil,
260
+ @content_decoder=nil,
261
+ @cookiejar=
262
+ #<EventMachine::HttpClient::CookieJar:0x1cc27c8
263
+ @jar=#<CookieJar::Jar:0x1cc2660 @domains={}>>,
264
+ @cookies=[],
265
+ @deferred_args=[#<EventMachine::HttpClient:0x1cc2a98 ...>],
266
+ @deferred_status=:succeeded,
267
+ @deferred_timeout=nil,
268
+ @errbacks=[],
269
+ @error=nil,
270
+ @headers=nil,
271
+ @req=
272
+ #<HttpClientOptions:0x08d7650
273
+ @body=nil,
274
+ @decoding=true,
275
+ @file=nil,
276
+ @followed=0,
277
+ @headers=
278
+ {"User-Agent"=>
279
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
280
+ @host="www.bing.com",
281
+ @keepalive=false,
282
+ @method="HEAD",
283
+ @pass_cookies=true,
284
+ @path=nil,
285
+ @port=80,
286
+ @query=nil,
287
+ @redirects=10,
288
+ @uri=#<Addressable::URI:0xe61f18 URI:http://www.bing.com:80/404.html>>,
289
+ @response="",
290
+ @response_header=
291
+ {"CACHE_CONTROL"=>"no-cache",
292
+ "CONTENT_TYPE"=>"text/html",
293
+ "P3P"=>"CP=\"NON UNI COM NAV STA LOC CURa DEVa PSAa PSDa OUR IND\"",
294
+ "DATE"=>"Wed, 15 Aug 2012 01:07:42 GMT",
295
+ "CONTENT_LENGTH"=>"1",
296
+ "CONNECTION"=>"close"},
297
+ @state=:finished,
298
+ @stream=nil>},
299
+ @responses=
300
+ {:callback=>
301
+ {2=>
302
+ #<EventMachine::HttpClient:0x08d8490
303
+ @callbacks=[],
304
+ @conn=
305
+ #<EventMachine::HttpConnection:0x08d91b0
306
+ @clients=[],
307
+ @conn=
308
+ #<EventMachine::HttpStubConnection:0x08d8148
309
+ @callbacks=[],
310
+ @deferred_args=[],
311
+ @deferred_status=:succeeded,
312
+ @deferred_timeout=nil,
313
+ @errbacks=nil,
314
+ @parent=#<EventMachine::HttpConnection:0x08d91b0 ...>,
315
+ @signature=3>,
316
+ @connopts=
317
+ #<HttpConnectionOptions:0x1fed3b0
318
+ @connect_timeout=5,
319
+ @host="www.bing.com",
320
+ @inactivity_timeout=10,
321
+ @port=80,
322
+ @proxy=nil,
323
+ @tls={}>,
324
+ @deferred=false,
325
+ @middleware=[],
326
+ @p=#<HTTP::Parser:0x08d8028>,
327
+ @peer="\x02\x00\x00P|j\xAE\xB2\x00\x00\x00\x00\x00\x00\x00\x00",
328
+ @pending=[],
329
+ @uri="http://www.bing.com/">,
330
+ @content_charset=nil,
331
+ @content_decoder=nil,
332
+ @cookiejar=
333
+ #<EventMachine::HttpClient::CookieJar:0x08d8340
334
+ @jar=
335
+ #<CookieJar::Jar:0x08d8328
336
+ @domains=
337
+ {".bing.com"=>
338
+ {"/"=>
339
+ {"_FS"=>_FS=NU=1,
340
+ "_SS"=>_SS=SID=847F099F99524E2F97F8236B4B203509,
341
+ "SRCHD"=>SRCHD=D=2430787&MS=2430787&AF=NOFORM,
342
+ "SRCHUSR"=>SRCHUSR=AUTOREDIR=0&GEOVAR=&DOB=20120815}},
343
+ "www.bing.com"=>
344
+ {"/"=>
345
+ {"SRCHUID"=>
346
+ SRCHUID=V=2&GUID=28C754BC00C346D19F70AD5235BC50B4}}}>>,
347
+ @cookies=[],
348
+ @deferred_args=[#<EventMachine::HttpClient:0x08d8490 ...>],
349
+ @deferred_status=:succeeded,
350
+ @deferred_timeout=nil,
351
+ @errbacks=[],
352
+ @error=nil,
353
+ @headers=nil,
354
+ @req=
355
+ #<HttpClientOptions:0x08d9030
356
+ @body=nil,
357
+ @decoding=true,
358
+ @file=nil,
359
+ @followed=0,
360
+ @headers=
361
+ {"User-Agent"=>
362
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
363
+ @host="www.bing.com",
364
+ @keepalive=false,
365
+ @method="HEAD",
366
+ @pass_cookies=true,
367
+ @path=nil,
368
+ @port=80,
369
+ @query=nil,
370
+ @redirects=10,
371
+ @uri=#<Addressable::URI:0x46c50c URI:http://www.bing.com:80/>>,
372
+ @response="",
373
+ @response_header=
374
+ {"CACHE_CONTROL"=>"private, max-age=0",
375
+ "CONTENT_TYPE"=>"text/html",
376
+ "P3P"=>"CP=\"NON UNI COM NAV STA LOC CURa DEVa PSAa PSDa OUR IND\"",
377
+ "DATE"=>"Wed, 15 Aug 2012 01:07:42 GMT",
378
+ "CONTENT_LENGTH"=>"1",
379
+ "CONNECTION"=>"close",
380
+ "SET_COOKIE"=>
381
+ ["_FS=NU=1; domain=.bing.com; path=/",
382
+ "_SS=SID=847F099F99524E2F97F8236B4B203509; domain=.bing.com; path=/",
383
+ "SRCHD=D=2430787&MS=2430787&AF=NOFORM; expires=Fri, 15-Aug-2014 01:07:42 GMT; domain=.bing.com; path=/",
384
+ "SRCHUID=V=2&GUID=28C754BC00C346D19F70AD5235BC50B4; expires=Fri, 15-Aug-2014 01:07:42 GMT; path=/",
385
+ "SRCHUSR=AUTOREDIR=0&GEOVAR=&DOB=20120815; expires=Fri, 15-Aug-2014 01:07:42 GMT; domain=.bing.com; path=/"]},
386
+ @state=:finished,
387
+ @stream=nil>,
388
+ 3=>
389
+ #<EventMachine::HttpClient:0x1cc2a98
390
+ @callbacks=[],
391
+ @conn=
392
+ #<EventMachine::HttpConnection:0x08d7698
393
+ @clients=[],
394
+ @conn=
395
+ #<EventMachine::HttpStubConnection:0x1cc22e8
396
+ @callbacks=[],
397
+ @deferred_args=[],
398
+ @deferred_status=:succeeded,
399
+ @deferred_timeout=nil,
400
+ @errbacks=nil,
401
+ @parent=#<EventMachine::HttpConnection:0x08d7698 ...>,
402
+ @signature=4>,
403
+ @connopts=
404
+ #<HttpConnectionOptions:0x08d7d70
405
+ @connect_timeout=5,
406
+ @host="www.bing.com",
407
+ @inactivity_timeout=10,
408
+ @port=80,
409
+ @proxy=nil,
410
+ @tls={}>,
411
+ @deferred=false,
412
+ @middleware=[],
413
+ @p=#<HTTP::Parser:0x1cc2150>,
414
+ @peer="\x02\x00\x00P|j\xAE\xB2\x00\x00\x00\x00\x00\x00\x00\x00",
415
+ @pending=[],
416
+ @uri="http://www.bing.com/404.html">,
417
+ @content_charset=nil,
418
+ @content_decoder=nil,
419
+ @cookiejar=
420
+ #<EventMachine::HttpClient::CookieJar:0x1cc27c8
421
+ @jar=#<CookieJar::Jar:0x1cc2660 @domains={}>>,
422
+ @cookies=[],
423
+ @deferred_args=[#<EventMachine::HttpClient:0x1cc2a98 ...>],
424
+ @deferred_status=:succeeded,
425
+ @deferred_timeout=nil,
426
+ @errbacks=[],
427
+ @error=nil,
428
+ @headers=nil,
429
+ @req=
430
+ #<HttpClientOptions:0x08d7650
431
+ @body=nil,
432
+ @decoding=true,
433
+ @file=nil,
434
+ @followed=0,
435
+ @headers=
436
+ {"User-Agent"=>
437
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
438
+ @host="www.bing.com",
439
+ @keepalive=false,
440
+ @method="HEAD",
441
+ @pass_cookies=true,
442
+ @path=nil,
443
+ @port=80,
444
+ @query=nil,
445
+ @redirects=10,
446
+ @uri=
447
+ #<Addressable::URI:0xe61f18 URI:http://www.bing.com:80/404.html>>,
448
+ @response="",
449
+ @response_header=
450
+ {"CACHE_CONTROL"=>"no-cache",
451
+ "CONTENT_TYPE"=>"text/html",
452
+ "P3P"=>"CP=\"NON UNI COM NAV STA LOC CURa DEVa PSAa PSDa OUR IND\"",
453
+ "DATE"=>"Wed, 15 Aug 2012 01:07:42 GMT",
454
+ "CONTENT_LENGTH"=>"1",
455
+ "CONNECTION"=>"close"},
456
+ @state=:finished,
457
+ @stream=nil>,
458
+ 1=>
459
+ #<EventMachine::HttpClient:0x20ed610
460
+ @callbacks=[],
461
+ @conn=
462
+ #<EventMachine::HttpConnection:0x2136d08
463
+ @clients=[],
464
+ @conn=
465
+ #<EventMachine::HttpStubConnection:0x20c2e60
466
+ @callbacks=[],
467
+ @deferred_args=[],
468
+ @deferred_status=:succeeded,
469
+ @deferred_timeout=nil,
470
+ @errbacks=nil,
471
+ @parent=#<EventMachine::HttpConnection:0x2136d08 ...>,
472
+ @signature=5>,
473
+ @connopts=
474
+ #<HttpConnectionOptions:0x215c7d8
475
+ @connect_timeout=5,
476
+ @host="www.google.com.",
477
+ @inactivity_timeout=10,
478
+ @port=80,
479
+ @proxy=nil,
480
+ @tls={}>,
481
+ @deferred=false,
482
+ @middleware=[],
483
+ @p=#<HTTP::Parser:0x20da578>,
484
+ @peer="\x02\x00\x00PJ}G^\x00\x00\x00\x00\x00\x00\x00\x00",
485
+ @pending=[],
486
+ @uri="http://www.google.com.">,
487
+ @content_charset=nil,
488
+ @content_decoder=nil,
489
+ @cookiejar=
490
+ #<EventMachine::HttpClient::CookieJar:0x20ed670
491
+ @jar=
492
+ #<CookieJar::Jar:0x20e5198
493
+ @domains=
494
+ {".google.com.ph"=>
495
+ {"/"=>
496
+ {"PREF"=>
497
+ PREF=ID=ec6b270e7fe890fa:FF=0:TM=1344992862:LM=1344992862:S=WlLM9juILblGoi5k,
498
+ "NID"=>
499
+ NID=62=5O_DMdySUEeIJXvveuCr1U8UEfYEhurh0X2Is-a5f0xhTw5CxEY9gELcAyCmwqt4MxVLDpvT2anCV79hhXHfd-QPi0zRY8bCiqh7BlH1B3w0wfE3eg0PTR_KbXUJBBFm}}}>>,
500
+ @cookies=[],
501
+ @deferred_args=[#<EventMachine::HttpClient:0x20ed610 ...>],
502
+ @deferred_status=:succeeded,
503
+ @deferred_timeout=nil,
504
+ @errbacks=
505
+ [#<Proc:0x20297e8@C:/Ruby193/lib/ruby/gems/1.9.1/gems/em-http-request-1.0.2/lib/em-http/multi.rb:42>],
506
+ @error=nil,
507
+ @headers=nil,
508
+ @req=
509
+ #<HttpClientOptions:0x2129ec8
510
+ @body=nil,
511
+ @decoding=true,
512
+ @file=nil,
513
+ @followed=1,
514
+ @headers=
515
+ {"User-Agent"=>
516
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
517
+ @host="www.google.com.ph",
518
+ @keepalive=false,
519
+ @method="HEAD",
520
+ @pass_cookies=true,
521
+ @path=nil,
522
+ @port=80,
523
+ @query=nil,
524
+ @redirects=10,
525
+ @uri=#<Addressable::URI:0xe683ec URI:http://www.google.com.ph:80/>>,
526
+ @response="",
527
+ @response_header=
528
+ {"DATE"=>"Wed, 15 Aug 2012 01:07:42 GMT",
529
+ "EXPIRES"=>"-1",
530
+ "CACHE_CONTROL"=>"private, max-age=0",
531
+ "CONTENT_TYPE"=>"text/html; charset=ISO-8859-1",
532
+ "SET_COOKIE"=>
533
+ ["PREF=ID=ec6b270e7fe890fa:FF=0:TM=1344992862:LM=1344992862:S=WlLM9juILblGoi5k; expires=Fri, 15-Aug-2014 01:07:42 GMT; path=/; domain=.google.com.ph",
534
+ "NID=62=5O_DMdySUEeIJXvveuCr1U8UEfYEhurh0X2Is-a5f0xhTw5CxEY9gELcAyCmwqt4MxVLDpvT2anCV79hhXHfd-QPi0zRY8bCiqh7BlH1B3w0wfE3eg0PTR_KbXUJBBFm; expires=Thu, 14-Feb-2013 01:07:42 GMT; path=/; domain=.google.com.ph; HttpOnly"],
535
+ "P3P"=>
536
+ "CP=\"This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657 for more info.\"",
537
+ "SERVER"=>"gws",
538
+ "X_XSS_PROTECTION"=>"1; mode=block",
539
+ "X_FRAME_OPTIONS"=>"SAMEORIGIN",
540
+ "CONNECTION"=>"close"},
541
+ @state=:finished,
542
+ @stream=nil>},
543
+ :errback=>
544
+ {0=>
545
+ #<EventMachine::HttpClient:0x21adc28
546
+ @callbacks=[],
547
+ @conn=
548
+ #<EventMachine::HttpConnection:0x21f3f30
549
+ @connopts=
550
+ #<HttpConnectionOptions:0x2231100
551
+ @connect_timeout=5,
552
+ @host="locahlost2",
553
+ @inactivity_timeout=10,
554
+ @port=80,
555
+ @proxy=nil,
556
+ @tls={}>,
557
+ @deferred=true,
558
+ @middleware=[],
559
+ @uri="http://locahlost2/foo">,
560
+ @content_charset=nil,
561
+ @content_decoder=nil,
562
+ @cookiejar=
563
+ #<EventMachine::HttpClient::CookieJar:0x21adc70
564
+ @jar=#<CookieJar::Jar:0x21adcb8 @domains={}>>,
565
+ @cookies=[],
566
+ @deferred_args=[#<EventMachine::HttpClient:0x21adc28 ...>],
567
+ @deferred_status=:failed,
568
+ @deferred_timeout=nil,
569
+ @errbacks=[],
570
+ @error="unable to resolve server address",
571
+ @headers=nil,
572
+ @req=
573
+ #<HttpClientOptions:0x21f42f0
574
+ @body=nil,
575
+ @decoding=true,
576
+ @file=nil,
577
+ @followed=0,
578
+ @headers=
579
+ {"User-Agent"=>
580
+ "Scrapey v0.0.5 - https://github.com/monkeysuffrage/scrapey"},
581
+ @host="locahlost2",
582
+ @keepalive=false,
583
+ @method="HEAD",
584
+ @pass_cookies=true,
585
+ @path=nil,
586
+ @port=80,
587
+ @query=nil,
588
+ @redirects=10,
589
+ @uri=#<Addressable::URI:0x10e696c URI:http://locahlost2:80/foo>>,
590
+ @response="",
591
+ @response_header={},
592
+ @state=:response_header,
593
+ @stream=nil>}}>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapey
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-08 00:00:00.000000000 Z
12
+ date: 2012-08-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -70,6 +70,8 @@ files:
70
70
  - lib/scrapey/scrapey.rb
71
71
  - lib/scrapey/template.rb
72
72
  - lib/scrapey.rb
73
+ - output.csv
74
+ - ponsesq
73
75
  - scrapey.gemspec
74
76
  - template/config/config.yml
75
77
  - template/Gemfile