rhack 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gemtest +0 -0
  2. data/CURB-LICENSE +51 -0
  3. data/Gemfile +4 -0
  4. data/History.txt +4 -0
  5. data/LICENSE +51 -0
  6. data/License.txt +17 -0
  7. data/Manifest.txt +61 -0
  8. data/README.txt +12 -0
  9. data/Rakefile +34 -0
  10. data/ext/curb-original/curb.c +977 -0
  11. data/ext/curb-original/curb.h +52 -0
  12. data/ext/curb-original/curb_config.h +235 -0
  13. data/ext/curb-original/curb_easy.c +3455 -0
  14. data/ext/curb-original/curb_easy.h +90 -0
  15. data/ext/curb-original/curb_errors.c +647 -0
  16. data/ext/curb-original/curb_errors.h +129 -0
  17. data/ext/curb-original/curb_macros.h +159 -0
  18. data/ext/curb-original/curb_multi.c +704 -0
  19. data/ext/curb-original/curb_multi.h +26 -0
  20. data/ext/curb-original/curb_postfield.c +523 -0
  21. data/ext/curb-original/curb_postfield.h +40 -0
  22. data/ext/curb-original/curb_upload.c +80 -0
  23. data/ext/curb-original/curb_upload.h +30 -0
  24. data/ext/curb/Makefile +157 -0
  25. data/ext/curb/curb.c +977 -0
  26. data/ext/curb/curb.h +52 -0
  27. data/ext/curb/curb_config.h +235 -0
  28. data/ext/curb/curb_easy.c +3430 -0
  29. data/ext/curb/curb_easy.h +94 -0
  30. data/ext/curb/curb_errors.c +647 -0
  31. data/ext/curb/curb_errors.h +129 -0
  32. data/ext/curb/curb_macros.h +159 -0
  33. data/ext/curb/curb_multi.c +710 -0
  34. data/ext/curb/curb_multi.h +26 -0
  35. data/ext/curb/curb_postfield.c +523 -0
  36. data/ext/curb/curb_postfield.h +40 -0
  37. data/ext/curb/curb_upload.c +80 -0
  38. data/ext/curb/curb_upload.h +30 -0
  39. data/ext/curb/extconf.rb +399 -0
  40. data/lib/cache.rb +44 -0
  41. data/lib/curl-global.rb +151 -0
  42. data/lib/extensions/browser/env.js +697 -0
  43. data/lib/extensions/browser/jquery.js +7180 -0
  44. data/lib/extensions/browser/xmlsax.js +1564 -0
  45. data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
  46. data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
  47. data/lib/extensions/curb.rb +125 -0
  48. data/lib/extensions/declarative.rb +153 -0
  49. data/lib/extensions/johnson.rb +63 -0
  50. data/lib/frame.rb +766 -0
  51. data/lib/init.rb +36 -0
  52. data/lib/rhack.rb +16 -0
  53. data/lib/rhack.yml.template +19 -0
  54. data/lib/rhack/proxy/checker.rb +226 -0
  55. data/lib/rhack/proxy/list.rb +196 -0
  56. data/lib/rhack/services.rb +445 -0
  57. data/lib/rhack_in.rb +2 -0
  58. data/lib/scout.rb +591 -0
  59. data/lib/words.rb +37 -0
  60. data/test/test_frame.rb +107 -0
  61. data/test/test_rhack.rb +5 -0
  62. data/test/test_scout.rb +53 -0
  63. metadata +195 -0
@@ -0,0 +1,445 @@
1
+ # encoding: utf-8
2
+ require 'rhack'
3
+
4
+ module HTTPAccessKit
5
+
6
+ class Service
7
+ attr_accessor :f
8
+
9
+ def initialize(service, frame, *args)
10
+ @service = service
11
+ @f = frame || Frame(self.class::URI[service], *args)
12
+ end
13
+
14
+ def go(*args, &block)
15
+ __send__(@service, *args, &block) rescue(Curl.reload)
16
+ end
17
+
18
+ def inspect
19
+ "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
20
+ end
21
+
22
+ end
23
+
24
+ class ServiceError < Exception; end
25
+
26
+ class Yandex < Service
27
+ __init__
28
+
29
+ unless defined? IGNORE_UPPERCASE
30
+ URI = {
31
+ :speller => "http://speller.yandex.net/services/spellservice.json/checkText",
32
+ :search => "http://www.yandex.ru/yandsearch?lr=213&%s",
33
+ :weather => "http://pogoda.yandex.ru/%d/details/"
34
+ }
35
+
36
+ IGNORE_UPPERCASE = 1
37
+ IGNORE_DIGITS = 2
38
+ IGNORE_URLS = 4
39
+ FIND_REPEAT_WORDS = 8
40
+ IGNORE_LATIN = 16
41
+ NO_SUGGEST = 32
42
+ FLAG_LATIN = 128
43
+ end
44
+
45
+ def initialize(service=:search, frame=nil)
46
+ ua = UAS.rand
47
+ ua << " YB/4.2.0" if !ua["YB"]
48
+ super service, frame, nil, ua, :ck => {
49
+ "yandexuid"=>"3644005621268702222",
50
+ "t"=>"p"
51
+ }, :eval => false
52
+ end
53
+
54
+ def search(text, opts={}, &block)
55
+ uri = URI.search % urlencode(opts.merge(:text=>text))
56
+ @f.run(uri, :proc_result => block) {|page| process page}
57
+ end
58
+
59
+ def process page
60
+ page.find('.p1/.cr').map {|n| [n.at('.cs').href, n.at('.cs').text.strip, (n.at('.kk') || n.at('.k7/div')).text.strip]} if page.html.b
61
+ end
62
+
63
+ def speller(text, opts=23)
64
+ text = text.split_to_lines(10000)
65
+ i = 0
66
+ @f.run({"text" => text[i], "options" => opts}, URI.speller, :json => true) {|pg|
67
+ yield pg.hash
68
+ text[i+=1] && @f.get({"text" => text[i], "options" => opts}, URI.speller, :json => true)
69
+ }
70
+ end
71
+
72
+ def fix_content(doc, opts={})
73
+ nodes = doc.root.text_nodes
74
+ speller(nodes*". ", opts) {|json|
75
+ fix = {}
76
+ json.each {|h| fix[h.word] = h.s[0] if h.s[0]}
77
+ nodes.each {|n|
78
+ fixed = false
79
+ text = n.text
80
+ fix.each {|k, v| fixed = true if text.gsub!(/\b#{k}\b/, v)}
81
+ n.text(text) if fixed
82
+ }
83
+ }
84
+ Curl.wait
85
+ end
86
+
87
+ def weather city=27612, day=nil, &block
88
+ if city.is String
89
+ city = CitiesCodes[city] if defined? CitiesCodes
90
+ raise ServiceError, "can't get weather info for #{city.inspect}:#{city.class}" if !city.is(Fixnum)
91
+ end
92
+ @f.get(URI.weather%city, :proc_result => block) {|pg|
93
+ ary = pg.find('//.b-forecast-details/tbody/tr{_["class"] =~ /t\d/}').map {|e|
94
+ "#{e.at('.date') ? e.at('.date').text+":\n" : ''} - #{e.at('.t').text} - #{e.at('.data').text} - #{e.at('.wind/img').alt} #{e.at('.wind').text} м/с"
95
+ }
96
+ ary = ary[0..11].div(4) + ary[12..-1].div(2)
97
+ day ? ary[day] : ary
98
+ }#.res
99
+ end
100
+
101
+ def self.weather(*args, &block) new(:weather).go *args, &block end
102
+ def self.search(*args, &block) new.go *args, &block end
103
+
104
+ end
105
+
106
+ class Google < Service
107
+ __init__
108
+ URI = {
109
+ :translate => "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s%%7C%s",
110
+ :search => "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&hl=ru&q=%s",
111
+ :detect => "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s"
112
+ }
113
+
114
+ Shortcuts = Hash[*%w{
115
+ v ru.wikipedia.org в ru.wikipedia.org вики en.wikipedia.org
116
+ w en.wikipedia.org ев en.wikipedia.org wiki en.wikipedia.org
117
+ lm lurkmore.ru лм lurkmore.ru
118
+ wa world-art.ru ва world-art.ru
119
+ ad anidb.info ад anidb.info
120
+ ed encyclopediadramatica.com ед encyclopediadramatica.com
121
+ }]
122
+
123
+ Langs = *%w{
124
+ af sq am ar hy az eu be bn bh bg my ca chr zh zh-CN zh-TW hr cs da dv nl en eo et tl fi fr gl ka de el gn gu iw hi hu is id iu it ja kn kk km ko ku ky lo lv lt mk ms ml mt mr mn ne no or ps fa pl pt-PT pa ro ru sa sr sd si sk sl es sw sv tg ta tl te th bo tr uk ur uz ug vi
125
+ }
126
+
127
+ def initialize(service=:search, frame=nil)
128
+ super service, frame, :json => true
129
+ end
130
+
131
+ def search(text, opts={}, &block)
132
+ text = "site:#{opts[:site]} #{text}" if opts[:site]
133
+ uri = URI.search % CGI.escape(text)
134
+ @f.run(uri, :proc_result => block) {|page|
135
+ if data = page.hash.responseData.b
136
+ data.results.map! {|res| [res.unescapedUrl, res.titleNoFormatting, res.content]}
137
+ end
138
+ }#.res
139
+ end
140
+
141
+ def detect(text, wait=!block_given?, &block)
142
+ text = text.is(String) ? text[0...600] : text[0]
143
+ uri = URI[:detect] % CGI.escape(text)
144
+ @f.run(uri, :proc_result => block, :wait => wait) {|page|
145
+ (data = page.hash.responseData.b) && data.language
146
+ }
147
+ end
148
+
149
+ def translate(text, to, from=nil, &block)
150
+ text = text.split_to_blocks(600, :syntax) if !text.is Array
151
+ if !from
152
+ if block_given?
153
+ return detect(text) {|from| yield translate(text, to, from)}
154
+ else
155
+ return translate(text, to, detect(text).res)
156
+ end
157
+ end
158
+ res = []
159
+ i = 0
160
+ text.each_with_index {|b, j|
161
+ @f.run(URI.translate%[CGI.escape(text[j]), from, to], :proc_result => block, :wait => false) {|page|
162
+ res[j] = (data = page.hash.responseData.b and data.translatedText)
163
+ (i += 1) == text.size ? res*"\n" : :skip
164
+ }
165
+ }
166
+ Curl.wait if !block_given?
167
+ res*"\n"
168
+ end
169
+
170
+ def self.search(*args, &block) new.search *args, &block end
171
+ def self.tr(*args, &block) new(:translate).translate *args, &block end
172
+
173
+ end
174
+
175
+ class Infoseek < Service
176
+ URI = {:tr => 'http://translation.infoseek.co.jp/'}
177
+
178
+ def initialize frame=nil
179
+ super :tr, frame, :eval => false
180
+ end
181
+
182
+ def get_token page
183
+ @token = page.at('input[name=token]').value
184
+ end
185
+
186
+ def tr(text, direction=:from_ja, &block)
187
+ if @token
188
+ selector = direction.in([:from_ja, :from_jp, :to_en]) ? 1 : 0
189
+ body = {'ac' => 'Text', 'lng' => 'en', 'original' => text, 'selector' => selector, 'token' => @token, 'submit' => ' 翻訳'}
190
+ @f.run(body, :proc_result => block) {|page|
191
+ get_token page
192
+ page.at('textarea[name=converted]').text
193
+ }#.res
194
+ else
195
+ @f.run(:save_result => !block) {|page|
196
+ get_token page
197
+ tr text, direction, &block
198
+ }#.res
199
+ end
200
+ end
201
+
202
+ def self.tr(*args, &block) new.tr *args, &block end
203
+
204
+ end
205
+
206
+ class Youtube < Service
207
+ URI = {:info => "http://www.youtube.com/get_video_info?video_id=%s"}
208
+ attr_reader :track
209
+
210
+ def initialize frame=nil
211
+ super :dl, frame, :eval => false
212
+ @f.ss.each {|s| s.timeout=600}
213
+ require 'open3'
214
+ require 'mp3info'
215
+ end
216
+
217
+ def dl(id, fd=nil, &block)
218
+ if block
219
+ info(id) {|lnk| __dl(lnk, fd, block)}
220
+ else __dl(info(id), fd)
221
+ end
222
+ end
223
+
224
+ def dlmp3(id, mp3=nil)
225
+ dl(id) {|flv|
226
+ if !File.file?(df = mp3||flv.sub(/.flv$/, '.mp3'))
227
+ Open3.popen3("ffmpeg -i '#{flv}' -ab 262144 -ar 44100 '#{df}'") {|i,o,e|
228
+ if $verbose
229
+ t = e.gets2 and t and t[/^size=/] and print t until e.eof?
230
+ puts "\n#{t}"
231
+ end
232
+ }
233
+ end
234
+ Mp3Info.open(df, :encoding=>'utf-8') {|mp3|
235
+ mp3.tag2.TPE1, mp3.tag2.TIT2 = @track[1..2]
236
+ } }
237
+ end
238
+
239
+ def self.dl(id) new.dl(id) end
240
+ def self.dlmp3(id) new.dlmp3(id) end
241
+
242
+ private
243
+ def info(id, &block)
244
+ @f.run(URI.info%[id[/\/watch/] ?
245
+ id.parseuri.query.v :
246
+ File.basename(id).till(/[&?]/)],:hash=>true,:proc_result=>block){|p|
247
+ res = p.hash
248
+ @track = [res.author, res.creator, res.title]
249
+ CGI.unescape(res.fmt_url_map).split(/,\d+\|/)[0].after('|')
250
+ }#.res
251
+ end
252
+
253
+ def __dl(lnk,fd,block=nil)
254
+ @f.dl(lnk, fd||"files/youtube/#{@track*' - '}.flv", :auto, 5, &block)
255
+ end
256
+
257
+ end
258
+
259
+ class VK < Service
260
+ attr_reader :links, :open_links
261
+ URI = {
262
+ :people => "http://vkontakte.ru/gsearch.php?from=people&ajax=1",
263
+ :login => "http://vkontakte.ru/index.php",
264
+ :id => "http://vkontakte.ru%s"
265
+ }
266
+ DefaultParams = Hash[*%w[
267
+ c[city] 1
268
+ c[country] 1
269
+ c[noiphone] 1
270
+ c[photo] 1
271
+ c[section] people
272
+ c[sex] 1
273
+ c[status] 6
274
+ ]]
275
+ @@reloadable = ReloadablePage {
276
+ if !@title and !@hash
277
+ L << self
278
+ L << @doc
279
+ end
280
+ if @hash == false or @hash.nil? && (!@title or @title["Ошибка"])
281
+ L.info "@title caller.size", binding
282
+ sleep 2
283
+ end
284
+ }
285
+ def self.com; new end
286
+
287
+ class NotFoundError < Exception; end
288
+
289
+ def initialize frame=nil
290
+ super :people, frame, {:cp => true, :relvl => 5, :eval => false}, 5
291
+ @links = []
292
+ @open_links = []
293
+ login
294
+ end
295
+
296
+ def login params={'email'=>'fshm@bk.ru', 'pass'=>'Riddick2', 'expire'=>nil}
297
+ Curl.run
298
+ @f[0].cookies.clear
299
+ @f.get(URI[:login], :json=>nil) {|login_page|
300
+ login_page.submit('form', @f, params, :json=>nil) {|redirection|
301
+ redirection.submit('form', @f, {}, :json=>nil) {|logged|
302
+ @f.each {|s| s.cookies.replace @f[0].cookies}
303
+ }}}
304
+ end
305
+
306
+ def get_links h, pagenum, &block
307
+ @f.run(h.merge('offset' => pagenum*20), URI[:people], :proc_result=>block, :result=>@@reloadable, :json => true) {|page|
308
+ ls = Page(page.hash.rows).get_links('.image/a')
309
+ @links.concat ls
310
+ ls
311
+ }
312
+ end
313
+
314
+ def people(q, *args, &block)
315
+ age, opts = args.get_opts [17..23]
316
+ h = DefaultParams.merge('c[q]' => q)
317
+ h.merge! Hash[opts.map {|k,v| ["c[#{k}]", v]}]
318
+ h['c[age_from]'], h['c[age_to]'] = age.first, age.last
319
+
320
+ @f.run(h, URI[:people], :proc_result => block, :json => true) {|page|
321
+ # ответом может быть невнятное требование залогиниться
322
+ sum = page.hash.summary.sub(/<span.+>/, '')
323
+ puts sum
324
+ found = sum[/\d+/]
325
+ if !found
326
+ L.warn sum
327
+ else
328
+ @links.concat Page(page.hash.rows).get_links('.image/a')
329
+ max_page = [50, (found.to_f/20).ceil].min
330
+ (1...max_page).each {|_|
331
+ sleep 0.5
332
+ get_links h, _, &block
333
+ }
334
+ end
335
+ }
336
+ end
337
+
338
+ def get_people q, *opts
339
+ @links = []
340
+ @open_links = []
341
+ people q, *opts
342
+ get_pages q
343
+ end
344
+
345
+ def get_pages q=nil
346
+ @links.uniq.each {|id| get_page id, q; sleep 1.5}
347
+ end
348
+
349
+ def get_page id, q=nil
350
+ q = q ? q.ci.to_re : // unless q.is Regexp
351
+ id_num = id[/\d+/].to_i
352
+ @f.get(id, :result=>@@reloadable) {|p|
353
+ data = p.find('.profileTable//.dataWrap').to_a.b
354
+ if data
355
+ L.debug "!p.at('.basicInfo//.alertmsg') data.contents.join('')[/(\\d\\s*){6,}/] data.contents.join('')[q]", binding
356
+ end
357
+ if data = p.find('.profileTable//.dataWrap').b and
358
+ contents = data.to_a.contents.join.b and contents[q]
359
+ digits = contents[/(\d *){6,9}/]
360
+ bot = (digits and digits[/^\d{7}$/] and id_num.between 852e5, 893e5)
361
+ if !bot and !p.at('.basicInfo//.alertmsg') || digits
362
+ L << "added vk.com#{id}"
363
+ @open_links << id
364
+ elsif bot
365
+ L << "bot #{id_num} detected"
366
+ else tick!
367
+ end
368
+ else tick!
369
+ end
370
+ }
371
+ end
372
+
373
+ end
374
+
375
+ class Mamba < Service
376
+ attr_reader :links, :open_links
377
+ @@login, @@pass = %w{AnotherOneUser AyaHirano8}
378
+ URI = {
379
+ :people => "http://mamba.ru/?",
380
+ :login => "http://mamba.ru/tips/?tip=Login",
381
+ :id => "http://vk.com%s"
382
+ }
383
+ DefaultParams = Hash[*%w[
384
+ c[city] 1
385
+ c[country] 1
386
+ c[noiphone] 1
387
+ c[photo] 1
388
+ c[section] people
389
+ c[sex] 1
390
+ c[status] 6
391
+ ]]
392
+
393
+ def initialize frame=nil
394
+ super :people, frame, {:cp=>{
395
+ "PREV_LOGIN"=>"anotheroneuser", "LOGIN"=>"anotheroneuser", "UID"=>"494809761", "LEVEL"=>"Low", "bar"=>"AShwjUz54RmYnfClOdlMYZylGUU90PUxeFkwlGixrP2ARHDs3A0EbDDxQTEksEm4LPT8FfzpfdiMME1omFz0tVhA5QjcsCgckaSQfIDxI", "s"=>"MJt2J3U9Pnk7Qvpie13lN7rrqmahTrAk", "SECRET"=>"adqH47"},
396
+ :eval=>false, :timeout=>5, :retry=>['TimeoutError']
397
+ }, 5
398
+ @links = []
399
+ @open_links = []
400
+ end
401
+
402
+ def login
403
+ @f.run(URI[:login]) {|p|
404
+ p.submit('.ap-t-c//form', @f, 'login'=>@@login, 'password'=>@@pass, 'level'=>nil) {
405
+ @f.each {|s| s.cookies.replace @f[0].cookies}
406
+ }
407
+ }
408
+ Curl.wait
409
+ end
410
+
411
+ def people
412
+ # TODO
413
+ # ... or not TODO?
414
+ end
415
+ # seems like NOT... LOL
416
+
417
+ end
418
+
419
+ module Downloaders
420
+
421
+ def letitbit(path, &block)
422
+ link = ''
423
+ frame = Frame 'letitbit.net', {:cp => true, :eval => nil}, 1
424
+ frame.run(path, :wait => !block) {|page1|
425
+ page1.submit('#ifree_form', frame) {|page2|
426
+ page2.submit('[action=/download4.php]', frame) {|page3|
427
+ page3.submit('[action=/download3.php]', frame) {|page4|
428
+ t = Thread.new {
429
+ sleep 60
430
+ frame.run({}, '/ajax/download3.php',
431
+ :headers => {"Referer" => "http://letitbit.net/download3.php"}
432
+ ) {|res|
433
+ link << res.html
434
+ block[link] if block
435
+ }
436
+ }
437
+ t.join if !block
438
+ }}}}
439
+ link
440
+ end
441
+
442
+ module_function :letitbit
443
+ end
444
+
445
+ end
data/lib/rhack_in.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'rhack'
2
+ class Object; include RHACK end
data/lib/scout.rb ADDED
@@ -0,0 +1,591 @@
1
+ # encoding: utf-8
2
+ module Curl
3
+
4
+ def ITT
5
+ res = nil
6
+ HTTPAccessKit::Scout('file://').loadGet(__FILE__) {|c| res = yield}
7
+ loop {if res then break res else sleep 0.01 end}
8
+ end
9
+ module_function :ITT
10
+
11
+ class Response
12
+ __init__
13
+ attr_reader :header, :code, :body, :hash, :timestamp, :time, :req, :date, :error
14
+
15
+ def to_s
16
+ str = '<#'
17
+ if @error
18
+ str << "#{@error[0].self_name}: #{@error[1]}"
19
+ else
20
+ str << (@header[/\d{3}/] == @code.to_s ? @header : "#{@header[/\S+/]} #{@code}") if @header
21
+ if @hash.location
22
+ str << ' '+@req.url if $panic
23
+ str << ' -> '+@hash.location
24
+ end
25
+ str << " (#{@body ? @body.size.bytes : 'No'} Body)"
26
+ str << " [#{@timestamp}]" if @timestamp
27
+ end
28
+ str << '>'
29
+ end
30
+ alias :inspect :to_s
31
+
32
+ def initialize(easy)
33
+ @hash = {}
34
+ @timestamp = @date = @header = nil
35
+ if easy.base.error
36
+ @error = easy.base.error
37
+ else
38
+ if headers = easy.header_str || easy.base.headers
39
+ headers /= "\r\n"
40
+ @header = headers.shift
41
+ headers.each {|h|
42
+ h /= ': '
43
+ if h[0]
44
+ h[0].downcase!
45
+ if h[0] == 'set-cookie'
46
+ (@hash.cookies ||= []) << h[1]
47
+ else
48
+ @hash[h[0]] = h[1]
49
+ end
50
+ end
51
+ }
52
+ @timestamp = if @hash.date
53
+ begin
54
+ @date = @hash.date.to_time
55
+ rescue => e
56
+ (@date = Time.now).strftime("%H:%M:%S")
57
+ L < "Error #{e.class}:#{e.message} with @hash.date = #{@hash.date.inspect}"
58
+ end
59
+ @hash.date[/\d\d:\d\d:\d\d/]
60
+ else
61
+ (@date = Time.now).strftime("%H:%M:%S")
62
+ end
63
+ end
64
+ @code = easy.response_code
65
+ @body = easy.body_str
66
+ @time = easy.total_time
67
+ end
68
+
69
+ @req = {}
70
+ @req.url = easy.last_effective_url
71
+ @req.header = easy.headers
72
+ if range = easy.headers.Range and range[/(\d+)-(\d+)/]
73
+ @req.range = $1.to_i .. $2.to_i
74
+ end
75
+ if easy.base and @req.meth = easy.base.last_method and @req.meth == :post
76
+ @req.body = easy.post_body
77
+ @req.mp = easy.multipart_form_post?
78
+ end
79
+ end
80
+
81
+ def is(klass)
82
+ if @error
83
+ klass == Array || klass = Curl::Response
84
+ else
85
+ klass == Curl::Response
86
+ end
87
+ end
88
+
89
+ def [](key_or_index)
90
+ @error ? @error[key_or_index] : @hash[key_or_index.downcase]
91
+ end
92
+
93
+ end
94
+
95
+ end
96
+
97
+ module HTTPAccessKit
98
+
99
+ class Cookie
100
+ __init__
101
+
102
+ def initialize(*args)
103
+ if args[1].is Scout
104
+ domain, str, scout = nil, *args
105
+ ck = str//;\s*/
106
+ ck[1..-1].each {|par|
107
+ a = par/'='
108
+ case a[0]
109
+ when 'path'; @path = (a[1] == '/') ? // : /^#{Regexp.escape a[1]}/
110
+ when 'domain'
111
+ if a[1].ord == ?.
112
+ domain = a[1][1..-1]
113
+ @domain = /(^|\.)#{Regexp.escape(domain)}$/
114
+ else
115
+ domain = a[1]
116
+ @domain = /^#{Regexp.escape(a[1])}$/
117
+ end
118
+ end
119
+ }
120
+ @name, @value = ck[0]/'='
121
+ L.debug args if !domain
122
+ (scout.cookies[domain || scout.uri.host] ||= {})[@name] = self
123
+ else
124
+ @name, cookie = args[0]
125
+ case cookie
126
+ when Array; @value, @path, @domain = cookie
127
+ when Hash; @value, @path, @domain = cookie.value, cookie.path, cookie.domain
128
+ else @value = args[1].to_s
129
+ end
130
+ end
131
+ @path ||= //
132
+ @domain ||= //
133
+ @string = "#{@name}=#{@value}; "
134
+ end
135
+
136
+ def use(str, uri)
137
+ str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
138
+ end
139
+
140
+ def to_s; @value end
141
+ def inspect; @value.inspect end
142
+
143
+ end
144
+
145
+ class Scout
146
+ __init__
147
+ attr_accessor :timeout, :raise_err, :retry
148
+ attr_accessor :path, :root, :sld, :proxy
149
+ attr_reader :uri
150
+ attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
151
+ attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
152
+
153
+ DefaultHeader = {
154
+ "Expect" => "",
155
+ "Keep-Alive" => "300",
156
+ "Accept-Charset" => "windows-1251,utf-8;q=0.7,*;q=0.7",
157
+ "Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
158
+ "Connection" => "keep-alive"
159
+ }
160
+
161
+ class ProxyError < ArgumentError
162
+ def initialize proxy
163
+ super "incorrect proxy: %s class %s, must be an Array
164
+ proxy format: ['127.0.0.1', '80'], [2130706433, 80], ['someproxy.com', :WebproxyModule]"%[proxy.inspect, proxy.class]
165
+ end
166
+ end
167
+ @@retry = RETRY
168
+
169
+ def initialize(*argv)
170
+ uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
171
+ raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
172
+ 'http://' >> uri if uri !~ /^\w+:\/\//
173
+ if proxy
174
+ if proxy[1] and proxy[1].to_i == 0
175
+ @webproxy = eval("WebProxy::#{proxy[1]}")
176
+ @proxy = proxy[0].parse(:uri).root
177
+ else
178
+ proxy[0] = proxy[0].to_ip if proxy[0].is Integer
179
+ @proxy = proxy
180
+ end
181
+ end
182
+ @cookies = {}
183
+ @body = {}
184
+ @num = []
185
+ @cookieProc = opts[:cp] || opts[:ck]
186
+ @raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
187
+ @engine = opts[:engine]
188
+ @timeout = opts[:timeout] || $CurlDefaultTimeout || 60
189
+ @post_proc = @get_proc = @head_proc = Proc::NULL
190
+ update uri
191
+ @retry = opts[:retry] || {}
192
+ @retry = {@uri.host => @retry} if @retry.is Array
193
+ end
194
+
195
+ def update(uri)
196
+ if !uri[/^\w+:\/\//]
197
+ '/' >> uri if uri[0,1] != '/'
198
+ @uri = uri.parse:uri
199
+ return
200
+ end
201
+ @uri = uri.parse:uri
202
+ return if @uri.root == @root
203
+ @root = @uri.root
204
+ @sld = @root[/[\w-]+\.[a-z]+$/]
205
+ @path = @uri.fullpath
206
+ if @http
207
+ @http.url = @webproxy ? @proxy : @root
208
+ else
209
+ @http = Curl::Easy(@webproxy ? @proxy : @root)
210
+ @http.base = self
211
+ end
212
+ if @proxy
213
+ @http.proxy_url = @proxy*':' if !@webproxy
214
+ @proxystr = @webproxy ? @proxy[0] : @http.proxy_url
215
+ else @proxystr = 'localhost'
216
+ end
217
+ if @cookieProc.is Hash
218
+ self.main_cks = @cookieProc
219
+ @cookieProc = true
220
+ end
221
+ self
222
+ end
223
+
224
+ def to_s
225
+ str = "<##{self.class.self_name} @ "
226
+ if @webproxy
227
+ str << "#{@proxy} ~ "
228
+ elsif @proxy
229
+ str << @proxy*':'+" ~ "
230
+ end
231
+ str << @root+'>'
232
+ end
233
+ alias :inspect :to_s
234
+
235
+ def update_res
236
+ @outdated = false
237
+ @res = @http.res
238
+ @headers = nil
239
+ @res
240
+ end
241
+
242
+ def res
243
+ if @res && !@outdated
244
+ @res
245
+ else update_res end
246
+ end
247
+
248
+ def req; res.req end
249
+
250
+ def dump
251
+ str = "IP: #{@proxystr}\nRequest: "
252
+ str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
253
+ str << "\nReady" if @ready
254
+ str
255
+ end
256
+
257
+ def fix(path)
258
+ path = path.tr ' ', '+'
259
+ path = expand path if path =~ /^\./
260
+ if update(path) or @uri.root
261
+ path = @webproxy.encode(path) if @webproxy
262
+ else
263
+ path = @webproxy.encode(@root+path) if @webproxy
264
+ end
265
+ path
266
+ end
267
+
268
+ def expand(uri)
269
+ if !@webproxy || @http.last_effective_url
270
+ path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
271
+ return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
272
+ end
273
+ uri
274
+ end
275
+
276
+ def mkBody(params, multipart=nil)
277
+ if multipart
278
+ @http.multipart_post_body = params.map {|k, v|
279
+ v = v.call if v.is Proc
280
+ if k =~ /^f:/
281
+ Curl::PostField.file(k[2..-1], "application/octet-stream",
282
+ "#{randstr(16, :hex)}.jpg", v+randstr )
283
+ elsif k =~ /^p:/
284
+ Curl::PostField.file(k[2..-1], "application/octet-stream",
285
+ File.basename(f), read(v) )
286
+ else
287
+ Curl::PostField.content(k.to_s, v.to_s)
288
+ end
289
+ }
290
+ else
291
+ @http.post_body = params.urlencode
292
+ end
293
+ end
294
+
295
+ def mkHeader(uri)
296
+ header = DefaultHeader.dup
297
+ if @cookieProc
298
+ cookies = ''
299
+ main_cks.each_value {|v| v.use cookies, @uri}
300
+ header['Cookie'] = cookies[0..-3]
301
+ end
302
+ if @refforge
303
+ ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
304
+ header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]
305
+ end
306
+ header['User-Agent'] = @ua == :rand ? UAS.rand : @ua if @ua
307
+ header
308
+ end
309
+
310
+ def ProcCookies(res)
311
+ ck = []
312
+ case res
313
+ when String
314
+ res.split(/\r?\n/).each {|h|
315
+ hs = h/': '
316
+ ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
317
+ }
318
+ when Curl::Response
319
+ ck = res['cookies']
320
+ end
321
+ return if !ck.b
322
+ ck.each {|c| Cookie(c, self)}
323
+ # StoreCookies if @cookieStore
324
+ end
325
+
326
+ def cp_on() @cookieProc = true end
327
+ def cp_off() @cookieProc = false end
328
+
329
+ def main_cks() @cookies[@uri.host] ||= {} end
330
+ def main_cks=(cks)
331
+ @cookies[@uri.host] = @webproxy ?
332
+ @webproxy.ck_encode(@root, cks) :
333
+ cks.map2 {|k, v| Cookie(k, v)}
334
+ end
335
+
336
+ def retry?(err)
337
+ # exc = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
338
+ exc = (@@retry.keys + @retry.keys).select_in @root
339
+ return false if !exc.b
340
+ # ['www.nomer.org', 'nomer.org'].every |www| 'TiemoutError'.in({'nomer.org' => 'TimeoutError'}[www])} ?
341
+ !exc.find {|e| !err[0].self_name.in((@@retry[e] || []) + @retry[e])}
342
+ end
343
+
344
+ def loaded?
345
+ $Carier.reqs.include? @http
346
+ end
347
+
348
+ def load!
349
+ $log <= [$Carier, @http]
350
+ unless $Carier.add @http
351
+ $Carier.remove @http
352
+ $Carier.add @http
353
+ end
354
+ rescue RuntimeError => e
355
+ e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
356
+ raise e
357
+ end
358
+
359
+ def load(path=@path, headers={}, not_redir=1, relvl=10)
360
+ @http.path = path = fix(path)
361
+ @http.headers = mkHeader(path).merge!(headers)
362
+ if not_redir.b
363
+ @http.follow_location = false
364
+ else
365
+ @http.follow_location = true
366
+ @http.max_redirects = relvl
367
+ end
368
+ @http.timeout = @timeout
369
+
370
+ @http.on_complete {|c|
371
+ @error = nil
372
+ @outdated = true
373
+ ProcCookies c.res if @cookieProc
374
+ yield c if block_given?
375
+ }
376
+ @http.on_failure {|c, e|
377
+ @http.on_complete {}
378
+ @outdated = true
379
+ @error = e
380
+ if retry? e
381
+ L.debug "#{e[0]} -> reloading scout"
382
+ #load uri, headers, not_redir, relvl, &callback
383
+ load! # all params including post_body are still set
384
+ else
385
+ L.debug "#{e[0]} -> not reloading scout"
386
+ raise *e if @raise_err
387
+ end
388
+ } if !@http.on_failure
389
+
390
+ load!
391
+ end
392
+
393
+ def loadPost(*argv, &callback)
394
+ hash, multipart, uri, opts = argv.get_opts [@body, false, @path],
395
+ :headers => {}, :redir => false, :relvl => 2
396
+ mkBody hash, multipart.b
397
+ @last_method = :post
398
+ if block_given?
399
+ @post_proc = callback
400
+ elsif @http.callback != @post_proc
401
+ callback = @post_proc
402
+ end
403
+ load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
404
+ end
405
+
406
+ def loadGet(*argv, &callback)
407
+ uri, opts = argv.get_opts [@path],
408
+ :headers => {}, :redir => false, :relvl => 2
409
+ @http.get = true
410
+ @last_method = :get
411
+ if block_given?
412
+ @get_proc = callback
413
+ elsif @http.callback != @get_proc
414
+ callback = @get_proc
415
+ end
416
+ load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
417
+ end
418
+
419
+ def loadHead(*argv, &callback)
420
+ uri, emulate, headers = argv.get_opts [@path, :if_retry]
421
+ @http.head = true if emulate != :always
422
+ @last_method = :head
423
+ if block_given?
424
+ @head_proc = callback
425
+ elsif @http.callback != @head_proc
426
+ callback = @head_proc
427
+ end
428
+ emu = lambda {
429
+ @headers = ''
430
+ $log << @headers
431
+ @http.on_header {|h|
432
+ $log << @headers
433
+ @headers << h
434
+ h == "\r\n" ? 0 : h.size
435
+ }
436
+ @http.get = true
437
+ load(uri, headers) {|c| c.on_header; callback[c]}
438
+ }
439
+ if emulate != :always
440
+ load(uri, headers) {|c|
441
+ if !@error and c.res.code != 200 and emulate == :if_retry
442
+ $log << @headers
443
+ emu.call
444
+ else
445
+ callback[c]
446
+ end
447
+ }
448
+ else emu.call
449
+ end
450
+ end
451
+
452
+ end
453
+
454
+ class PickError < IndexError
455
+ def initialize
456
+ super "can't get scout from empty squad" end
457
+ end
458
+
459
+ class ScoutSquad < Array
460
+ __init__
461
+
462
+ def initialize(*args)
463
+ raise ArgumentError, "can't create empty squad" if (num = args.pop) < 1
464
+ proxies = nil
465
+ super []
466
+ if args[0].is Scout
467
+ s = args[0]
468
+ else
469
+ if !args[0].is String
470
+ args.unshift ''
471
+ if (opts = args[-1]).is Hash and (opts[:cp] || opts[:ck]).is Hash
472
+ L.warn "it's useless to setup cookies for untargeted squad!"
473
+ end
474
+ end
475
+ if args[1] and args[1][0].is Array
476
+ proxies = args[1]
477
+ args[1] = proxies.shift
478
+ end
479
+ self[0] = s = Scout(*args)
480
+ num -=1
481
+ end
482
+ num.times {|i|
483
+ self << Scout(s.root+s.path, (proxies ? proxies[i] : s.proxy), s.ua, s.refforge, :ck => s.main_cks, :raise => s.raise_err, :timeout => s.timeout, :retry => s.retry)
484
+ }
485
+ end
486
+
487
+ def update uri, forced=nil
488
+ each {|s| return L.warn "failed to update scout loaded? with url: #{s.http.url}" if s.loaded?} if !forced
489
+ each {|s| s.update uri}
490
+ end
491
+
492
+ def untargeted
493
+ first.root == 'http://'
494
+ end
495
+
496
+ def rand
497
+ raise PickError if !b
498
+ # to_a because reject returns object of this class
499
+ if scout = to_a.rand {|_|!_.loaded?}; scout
500
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
501
+ raise "Curl must run in order to use ScoutSquad#rand" if !Curl.status
502
+ Curl.wait
503
+ self.rand
504
+ end
505
+ end
506
+
507
+ def next
508
+ raise PickError if !b
509
+ if scout = find {|_|!_.loaded?}; scout
510
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
511
+ raise "Curl must run in order to use ScoutSquad#next" if !Curl.status
512
+ Curl.wait
513
+ self.next
514
+ end
515
+ end
516
+
517
+ def to_s
518
+ str = '<#ScoutSquad @ '
519
+ if b
520
+ if first.webproxy
521
+ str << "#{first.proxy} ~ "
522
+ elsif first.proxy
523
+ str << first.proxy*':'+" ~ "
524
+ end
525
+ str << "#{untargeted ? "no target" : first.root} "
526
+ end
527
+ str << "x#{size}>"
528
+ end
529
+ alias :inspect :to_s
530
+
531
+ end
532
+
533
+ end
534
+
535
+ ### Global scope shortcut methods ###
536
+
537
+ module RMTools
538
+
539
+ def Get(uri, opts={})
540
+ raise ArgumentError, "Local uri passed to Get function" if uri[0,1] == '/'
541
+ $log.debug "Protocol-less uri passed to Get function" if !uri[/^\w+:\/\//]
542
+ headers = opts[:headers] || opts[:h] || {}
543
+ proxy = opts[:proxy] || opts[:pr] || $CurlGetProxy
544
+ ret_body = opts.fetch(:ret_body, opts.fetch(:b, 1)).b
545
+ wait = opts.fetch(:wait, opts.fetch(:w, !block_given?)).b
546
+ s = HTTPAccessKit::Scout(uri, proxy, opts)
547
+ buf = ret_body ? '' : s.http.res
548
+ s.raise_err ||= opts[:e]
549
+ s.http.timeout ||= opts[:t]
550
+ s.loadGet(headers) {|c|
551
+ if ret_body
552
+ buf << c.body_str
553
+ else
554
+ buf.load_from c.res
555
+ end
556
+ yield buf if block_given?
557
+ }
558
+ if wait
559
+ ($CarierThread and $CarierThread.status) ? Curl.wait : $Carier.perform
560
+ end
561
+ buf
562
+ end
563
+ module_function :Get
564
+
565
+ end
566
+
567
+ module Enumerable
568
+
569
+ def GetAll(on_count=nil, default_domain=nil, &callback)
570
+ if on_count
571
+ len = size
572
+ counter = 0
573
+ send(resto(:each_value) ? :each_value : :each) {|uri|
574
+ uri = File.join(default_domain, uri) if default_domain and (uri[0,1] == '/' or !uri[/^https?:/])
575
+ Get(uri) {|buf|
576
+ callback.arity > 1 ?
577
+ callback.call(buf, counter) :
578
+ callback.call(buf)
579
+ if (counter += 1) == len
580
+ on_count.arity > 0 ?
581
+ on_count.call(buf) :
582
+ on_count.call
583
+ end
584
+ }
585
+ }
586
+ else send(resto(:each_value) ? :each_value : :each) {|uri|
587
+ Get(uri, &callback) }
588
+ end
589
+ end
590
+
591
+ end