rhack 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gemtest +0 -0
  2. data/CURB-LICENSE +51 -0
  3. data/Gemfile +4 -0
  4. data/History.txt +4 -0
  5. data/LICENSE +51 -0
  6. data/License.txt +17 -0
  7. data/Manifest.txt +61 -0
  8. data/README.txt +12 -0
  9. data/Rakefile +34 -0
  10. data/ext/curb-original/curb.c +977 -0
  11. data/ext/curb-original/curb.h +52 -0
  12. data/ext/curb-original/curb_config.h +235 -0
  13. data/ext/curb-original/curb_easy.c +3455 -0
  14. data/ext/curb-original/curb_easy.h +90 -0
  15. data/ext/curb-original/curb_errors.c +647 -0
  16. data/ext/curb-original/curb_errors.h +129 -0
  17. data/ext/curb-original/curb_macros.h +159 -0
  18. data/ext/curb-original/curb_multi.c +704 -0
  19. data/ext/curb-original/curb_multi.h +26 -0
  20. data/ext/curb-original/curb_postfield.c +523 -0
  21. data/ext/curb-original/curb_postfield.h +40 -0
  22. data/ext/curb-original/curb_upload.c +80 -0
  23. data/ext/curb-original/curb_upload.h +30 -0
  24. data/ext/curb/Makefile +157 -0
  25. data/ext/curb/curb.c +977 -0
  26. data/ext/curb/curb.h +52 -0
  27. data/ext/curb/curb_config.h +235 -0
  28. data/ext/curb/curb_easy.c +3430 -0
  29. data/ext/curb/curb_easy.h +94 -0
  30. data/ext/curb/curb_errors.c +647 -0
  31. data/ext/curb/curb_errors.h +129 -0
  32. data/ext/curb/curb_macros.h +159 -0
  33. data/ext/curb/curb_multi.c +710 -0
  34. data/ext/curb/curb_multi.h +26 -0
  35. data/ext/curb/curb_postfield.c +523 -0
  36. data/ext/curb/curb_postfield.h +40 -0
  37. data/ext/curb/curb_upload.c +80 -0
  38. data/ext/curb/curb_upload.h +30 -0
  39. data/ext/curb/extconf.rb +399 -0
  40. data/lib/cache.rb +44 -0
  41. data/lib/curl-global.rb +151 -0
  42. data/lib/extensions/browser/env.js +697 -0
  43. data/lib/extensions/browser/jquery.js +7180 -0
  44. data/lib/extensions/browser/xmlsax.js +1564 -0
  45. data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
  46. data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
  47. data/lib/extensions/curb.rb +125 -0
  48. data/lib/extensions/declarative.rb +153 -0
  49. data/lib/extensions/johnson.rb +63 -0
  50. data/lib/frame.rb +766 -0
  51. data/lib/init.rb +36 -0
  52. data/lib/rhack.rb +16 -0
  53. data/lib/rhack.yml.template +19 -0
  54. data/lib/rhack/proxy/checker.rb +226 -0
  55. data/lib/rhack/proxy/list.rb +196 -0
  56. data/lib/rhack/services.rb +445 -0
  57. data/lib/rhack_in.rb +2 -0
  58. data/lib/scout.rb +591 -0
  59. data/lib/words.rb +37 -0
  60. data/test/test_frame.rb +107 -0
  61. data/test/test_rhack.rb +5 -0
  62. data/test/test_scout.rb +53 -0
  63. metadata +195 -0
@@ -0,0 +1,445 @@
1
+ # encoding: utf-8
2
+ require 'rhack'
3
+
4
+ module HTTPAccessKit
5
+
6
+ class Service
7
+ attr_accessor :f
8
+
9
+ def initialize(service, frame, *args)
10
+ @service = service
11
+ @f = frame || Frame(self.class::URI[service], *args)
12
+ end
13
+
14
+ def go(*args, &block)
15
+ __send__(@service, *args, &block) rescue(Curl.reload)
16
+ end
17
+
18
+ def inspect
19
+ "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
20
+ end
21
+
22
+ end
23
+
24
+ class ServiceError < Exception; end
25
+
26
+ class Yandex < Service
27
+ __init__
28
+
29
+ unless defined? IGNORE_UPPERCASE
30
+ URI = {
31
+ :speller => "http://speller.yandex.net/services/spellservice.json/checkText",
32
+ :search => "http://www.yandex.ru/yandsearch?lr=213&%s",
33
+ :weather => "http://pogoda.yandex.ru/%d/details/"
34
+ }
35
+
36
+ IGNORE_UPPERCASE = 1
37
+ IGNORE_DIGITS = 2
38
+ IGNORE_URLS = 4
39
+ FIND_REPEAT_WORDS = 8
40
+ IGNORE_LATIN = 16
41
+ NO_SUGGEST = 32
42
+ FLAG_LATIN = 128
43
+ end
44
+
45
+ def initialize(service=:search, frame=nil)
46
+ ua = UAS.rand
47
+ ua << " YB/4.2.0" if !ua["YB"]
48
+ super service, frame, nil, ua, :ck => {
49
+ "yandexuid"=>"3644005621268702222",
50
+ "t"=>"p"
51
+ }, :eval => false
52
+ end
53
+
54
+ def search(text, opts={}, &block)
55
+ uri = URI.search % urlencode(opts.merge(:text=>text))
56
+ @f.run(uri, :proc_result => block) {|page| process page}
57
+ end
58
+
59
+ def process page
60
+ page.find('.p1/.cr').map {|n| [n.at('.cs').href, n.at('.cs').text.strip, (n.at('.kk') || n.at('.k7/div')).text.strip]} if page.html.b
61
+ end
62
+
63
+ def speller(text, opts=23)
64
+ text = text.split_to_lines(10000)
65
+ i = 0
66
+ @f.run({"text" => text[i], "options" => opts}, URI.speller, :json => true) {|pg|
67
+ yield pg.hash
68
+ text[i+=1] && @f.get({"text" => text[i], "options" => opts}, URI.speller, :json => true)
69
+ }
70
+ end
71
+
72
+ def fix_content(doc, opts={})
73
+ nodes = doc.root.text_nodes
74
+ speller(nodes*". ", opts) {|json|
75
+ fix = {}
76
+ json.each {|h| fix[h.word] = h.s[0] if h.s[0]}
77
+ nodes.each {|n|
78
+ fixed = false
79
+ text = n.text
80
+ fix.each {|k, v| fixed = true if text.gsub!(/\b#{k}\b/, v)}
81
+ n.text(text) if fixed
82
+ }
83
+ }
84
+ Curl.wait
85
+ end
86
+
87
+ def weather city=27612, day=nil, &block
88
+ if city.is String
89
+ city = CitiesCodes[city] if defined? CitiesCodes
90
+ raise ServiceError, "can't get weather info for #{city.inspect}:#{city.class}" if !city.is(Fixnum)
91
+ end
92
+ @f.get(URI.weather%city, :proc_result => block) {|pg|
93
+ ary = pg.find('//.b-forecast-details/tbody/tr{_["class"] =~ /t\d/}').map {|e|
94
+ "#{e.at('.date') ? e.at('.date').text+":\n" : ''} - #{e.at('.t').text} - #{e.at('.data').text} - #{e.at('.wind/img').alt} #{e.at('.wind').text} м/с"
95
+ }
96
+ ary = ary[0..11].div(4) + ary[12..-1].div(2)
97
+ day ? ary[day] : ary
98
+ }#.res
99
+ end
100
+
101
+ def self.weather(*args, &block) new(:weather).go *args, &block end
102
+ def self.search(*args, &block) new.go *args, &block end
103
+
104
+ end
105
+
106
+ class Google < Service
107
+ __init__
108
+ URI = {
109
+ :translate => "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s%%7C%s",
110
+ :search => "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&hl=ru&q=%s",
111
+ :detect => "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s"
112
+ }
113
+
114
+ Shortcuts = Hash[*%w{
115
+ v ru.wikipedia.org в ru.wikipedia.org вики en.wikipedia.org
116
+ w en.wikipedia.org ев en.wikipedia.org wiki en.wikipedia.org
117
+ lm lurkmore.ru лм lurkmore.ru
118
+ wa world-art.ru ва world-art.ru
119
+ ad anidb.info ад anidb.info
120
+ ed encyclopediadramatica.com ед encyclopediadramatica.com
121
+ }]
122
+
123
+ Langs = *%w{
124
+ af sq am ar hy az eu be bn bh bg my ca chr zh zh-CN zh-TW hr cs da dv nl en eo et tl fi fr gl ka de el gn gu iw hi hu is id iu it ja kn kk km ko ku ky lo lv lt mk ms ml mt mr mn ne no or ps fa pl pt-PT pa ro ru sa sr sd si sk sl es sw sv tg ta tl te th bo tr uk ur uz ug vi
125
+ }
126
+
127
+ def initialize(service=:search, frame=nil)
128
+ super service, frame, :json => true
129
+ end
130
+
131
+ def search(text, opts={}, &block)
132
+ text = "site:#{opts[:site]} #{text}" if opts[:site]
133
+ uri = URI.search % CGI.escape(text)
134
+ @f.run(uri, :proc_result => block) {|page|
135
+ if data = page.hash.responseData.b
136
+ data.results.map! {|res| [res.unescapedUrl, res.titleNoFormatting, res.content]}
137
+ end
138
+ }#.res
139
+ end
140
+
141
+ def detect(text, wait=!block_given?, &block)
142
+ text = text.is(String) ? text[0...600] : text[0]
143
+ uri = URI[:detect] % CGI.escape(text)
144
+ @f.run(uri, :proc_result => block, :wait => wait) {|page|
145
+ (data = page.hash.responseData.b) && data.language
146
+ }
147
+ end
148
+
149
+ def translate(text, to, from=nil, &block)
150
+ text = text.split_to_blocks(600, :syntax) if !text.is Array
151
+ if !from
152
+ if block_given?
153
+ return detect(text) {|from| yield translate(text, to, from)}
154
+ else
155
+ return translate(text, to, detect(text).res)
156
+ end
157
+ end
158
+ res = []
159
+ i = 0
160
+ text.each_with_index {|b, j|
161
+ @f.run(URI.translate%[CGI.escape(text[j]), from, to], :proc_result => block, :wait => false) {|page|
162
+ res[j] = (data = page.hash.responseData.b and data.translatedText)
163
+ (i += 1) == text.size ? res*"\n" : :skip
164
+ }
165
+ }
166
+ Curl.wait if !block_given?
167
+ res*"\n"
168
+ end
169
+
170
+ def self.search(*args, &block) new.search *args, &block end
171
+ def self.tr(*args, &block) new(:translate).translate *args, &block end
172
+
173
+ end
174
+
175
+ class Infoseek < Service
176
+ URI = {:tr => 'http://translation.infoseek.co.jp/'}
177
+
178
+ def initialize frame=nil
179
+ super :tr, frame, :eval => false
180
+ end
181
+
182
+ def get_token page
183
+ @token = page.at('input[name=token]').value
184
+ end
185
+
186
+ def tr(text, direction=:from_ja, &block)
187
+ if @token
188
+ selector = direction.in([:from_ja, :from_jp, :to_en]) ? 1 : 0
189
+ body = {'ac' => 'Text', 'lng' => 'en', 'original' => text, 'selector' => selector, 'token' => @token, 'submit' => ' 翻訳'}
190
+ @f.run(body, :proc_result => block) {|page|
191
+ get_token page
192
+ page.at('textarea[name=converted]').text
193
+ }#.res
194
+ else
195
+ @f.run(:save_result => !block) {|page|
196
+ get_token page
197
+ tr text, direction, &block
198
+ }#.res
199
+ end
200
+ end
201
+
202
+ def self.tr(*args, &block) new.tr *args, &block end
203
+
204
+ end
205
+
206
+ class Youtube < Service
207
+ URI = {:info => "http://www.youtube.com/get_video_info?video_id=%s"}
208
+ attr_reader :track
209
+
210
+ def initialize frame=nil
211
+ super :dl, frame, :eval => false
212
+ @f.ss.each {|s| s.timeout=600}
213
+ require 'open3'
214
+ require 'mp3info'
215
+ end
216
+
217
+ def dl(id, fd=nil, &block)
218
+ if block
219
+ info(id) {|lnk| __dl(lnk, fd, block)}
220
+ else __dl(info(id), fd)
221
+ end
222
+ end
223
+
224
+ def dlmp3(id, mp3=nil)
225
+ dl(id) {|flv|
226
+ if !File.file?(df = mp3||flv.sub(/.flv$/, '.mp3'))
227
+ Open3.popen3("ffmpeg -i '#{flv}' -ab 262144 -ar 44100 '#{df}'") {|i,o,e|
228
+ if $verbose
229
+ t = e.gets2 and t and t[/^size=/] and print t until e.eof?
230
+ puts "\n#{t}"
231
+ end
232
+ }
233
+ end
234
+ Mp3Info.open(df, :encoding=>'utf-8') {|mp3|
235
+ mp3.tag2.TPE1, mp3.tag2.TIT2 = @track[1..2]
236
+ } }
237
+ end
238
+
239
+ def self.dl(id) new.dl(id) end
240
+ def self.dlmp3(id) new.dlmp3(id) end
241
+
242
+ private
243
+ def info(id, &block)
244
+ @f.run(URI.info%[id[/\/watch/] ?
245
+ id.parseuri.query.v :
246
+ File.basename(id).till(/[&?]/)],:hash=>true,:proc_result=>block){|p|
247
+ res = p.hash
248
+ @track = [res.author, res.creator, res.title]
249
+ CGI.unescape(res.fmt_url_map).split(/,\d+\|/)[0].after('|')
250
+ }#.res
251
+ end
252
+
253
+ def __dl(lnk,fd,block=nil)
254
+ @f.dl(lnk, fd||"files/youtube/#{@track*' - '}.flv", :auto, 5, &block)
255
+ end
256
+
257
+ end
258
+
259
+ class VK < Service
260
+ attr_reader :links, :open_links
261
+ URI = {
262
+ :people => "http://vkontakte.ru/gsearch.php?from=people&ajax=1",
263
+ :login => "http://vkontakte.ru/index.php",
264
+ :id => "http://vkontakte.ru%s"
265
+ }
266
+ DefaultParams = Hash[*%w[
267
+ c[city] 1
268
+ c[country] 1
269
+ c[noiphone] 1
270
+ c[photo] 1
271
+ c[section] people
272
+ c[sex] 1
273
+ c[status] 6
274
+ ]]
275
+ @@reloadable = ReloadablePage {
276
+ if !@title and !@hash
277
+ L << self
278
+ L << @doc
279
+ end
280
+ if @hash == false or @hash.nil? && (!@title or @title["Ошибка"])
281
+ L.info "@title caller.size", binding
282
+ sleep 2
283
+ end
284
+ }
285
+ def self.com; new end
286
+
287
+ class NotFoundError < Exception; end
288
+
289
+ def initialize frame=nil
290
+ super :people, frame, {:cp => true, :relvl => 5, :eval => false}, 5
291
+ @links = []
292
+ @open_links = []
293
+ login
294
+ end
295
+
296
+ def login params={'email'=>'fshm@bk.ru', 'pass'=>'Riddick2', 'expire'=>nil}
297
+ Curl.run
298
+ @f[0].cookies.clear
299
+ @f.get(URI[:login], :json=>nil) {|login_page|
300
+ login_page.submit('form', @f, params, :json=>nil) {|redirection|
301
+ redirection.submit('form', @f, {}, :json=>nil) {|logged|
302
+ @f.each {|s| s.cookies.replace @f[0].cookies}
303
+ }}}
304
+ end
305
+
306
+ def get_links h, pagenum, &block
307
+ @f.run(h.merge('offset' => pagenum*20), URI[:people], :proc_result=>block, :result=>@@reloadable, :json => true) {|page|
308
+ ls = Page(page.hash.rows).get_links('.image/a')
309
+ @links.concat ls
310
+ ls
311
+ }
312
+ end
313
+
314
+ def people(q, *args, &block)
315
+ age, opts = args.get_opts [17..23]
316
+ h = DefaultParams.merge('c[q]' => q)
317
+ h.merge! Hash[opts.map {|k,v| ["c[#{k}]", v]}]
318
+ h['c[age_from]'], h['c[age_to]'] = age.first, age.last
319
+
320
+ @f.run(h, URI[:people], :proc_result => block, :json => true) {|page|
321
+ # ответом может быть невнятное требование залогиниться
322
+ sum = page.hash.summary.sub(/<span.+>/, '')
323
+ puts sum
324
+ found = sum[/\d+/]
325
+ if !found
326
+ L.warn sum
327
+ else
328
+ @links.concat Page(page.hash.rows).get_links('.image/a')
329
+ max_page = [50, (found.to_f/20).ceil].min
330
+ (1...max_page).each {|_|
331
+ sleep 0.5
332
+ get_links h, _, &block
333
+ }
334
+ end
335
+ }
336
+ end
337
+
338
+ def get_people q, *opts
339
+ @links = []
340
+ @open_links = []
341
+ people q, *opts
342
+ get_pages q
343
+ end
344
+
345
+ def get_pages q=nil
346
+ @links.uniq.each {|id| get_page id, q; sleep 1.5}
347
+ end
348
+
349
+ def get_page id, q=nil
350
+ q = q ? q.ci.to_re : // unless q.is Regexp
351
+ id_num = id[/\d+/].to_i
352
+ @f.get(id, :result=>@@reloadable) {|p|
353
+ data = p.find('.profileTable//.dataWrap').to_a.b
354
+ if data
355
+ L.debug "!p.at('.basicInfo//.alertmsg') data.contents.join('')[/(\\d\\s*){6,}/] data.contents.join('')[q]", binding
356
+ end
357
+ if data = p.find('.profileTable//.dataWrap').b and
358
+ contents = data.to_a.contents.join.b and contents[q]
359
+ digits = contents[/(\d *){6,9}/]
360
+ bot = (digits and digits[/^\d{7}$/] and id_num.between 852e5, 893e5)
361
+ if !bot and !p.at('.basicInfo//.alertmsg') || digits
362
+ L << "added vk.com#{id}"
363
+ @open_links << id
364
+ elsif bot
365
+ L << "bot #{id_num} detected"
366
+ else tick!
367
+ end
368
+ else tick!
369
+ end
370
+ }
371
+ end
372
+
373
+ end
374
+
375
+ class Mamba < Service
376
+ attr_reader :links, :open_links
377
+ @@login, @@pass = %w{AnotherOneUser AyaHirano8}
378
+ URI = {
379
+ :people => "http://mamba.ru/?",
380
+ :login => "http://mamba.ru/tips/?tip=Login",
381
+ :id => "http://vk.com%s"
382
+ }
383
+ DefaultParams = Hash[*%w[
384
+ c[city] 1
385
+ c[country] 1
386
+ c[noiphone] 1
387
+ c[photo] 1
388
+ c[section] people
389
+ c[sex] 1
390
+ c[status] 6
391
+ ]]
392
+
393
+ def initialize frame=nil
394
+ super :people, frame, {:cp=>{
395
+ "PREV_LOGIN"=>"anotheroneuser", "LOGIN"=>"anotheroneuser", "UID"=>"494809761", "LEVEL"=>"Low", "bar"=>"AShwjUz54RmYnfClOdlMYZylGUU90PUxeFkwlGixrP2ARHDs3A0EbDDxQTEksEm4LPT8FfzpfdiMME1omFz0tVhA5QjcsCgckaSQfIDxI", "s"=>"MJt2J3U9Pnk7Qvpie13lN7rrqmahTrAk", "SECRET"=>"adqH47"},
396
+ :eval=>false, :timeout=>5, :retry=>['TimeoutError']
397
+ }, 5
398
+ @links = []
399
+ @open_links = []
400
+ end
401
+
402
+ def login
403
+ @f.run(URI[:login]) {|p|
404
+ p.submit('.ap-t-c//form', @f, 'login'=>@@login, 'password'=>@@pass, 'level'=>nil) {
405
+ @f.each {|s| s.cookies.replace @f[0].cookies}
406
+ }
407
+ }
408
+ Curl.wait
409
+ end
410
+
411
+ def people
412
+ # TODO
413
+ # ... or not TODO?
414
+ end
415
+ # seems like NOT... LOL
416
+
417
+ end
418
+
419
+ module Downloaders
420
+
421
+ def letitbit(path, &block)
422
+ link = ''
423
+ frame = Frame 'letitbit.net', {:cp => true, :eval => nil}, 1
424
+ frame.run(path, :wait => !block) {|page1|
425
+ page1.submit('#ifree_form', frame) {|page2|
426
+ page2.submit('[action=/download4.php]', frame) {|page3|
427
+ page3.submit('[action=/download3.php]', frame) {|page4|
428
+ t = Thread.new {
429
+ sleep 60
430
+ frame.run({}, '/ajax/download3.php',
431
+ :headers => {"Referer" => "http://letitbit.net/download3.php"}
432
+ ) {|res|
433
+ link << res.html
434
+ block[link] if block
435
+ }
436
+ }
437
+ t.join if !block
438
+ }}}}
439
+ link
440
+ end
441
+
442
+ module_function :letitbit
443
+ end
444
+
445
+ end
data/lib/rhack_in.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'rhack'
2
+ class Object; include RHACK end
data/lib/scout.rb ADDED
@@ -0,0 +1,591 @@
1
+ # encoding: utf-8
2
+ module Curl
3
+
4
+ def ITT
5
+ res = nil
6
+ HTTPAccessKit::Scout('file://').loadGet(__FILE__) {|c| res = yield}
7
+ loop {if res then break res else sleep 0.01 end}
8
+ end
9
+ module_function :ITT
10
+
11
+ class Response
12
+ __init__
13
+ attr_reader :header, :code, :body, :hash, :timestamp, :time, :req, :date, :error
14
+
15
+ def to_s
16
+ str = '<#'
17
+ if @error
18
+ str << "#{@error[0].self_name}: #{@error[1]}"
19
+ else
20
+ str << (@header[/\d{3}/] == @code.to_s ? @header : "#{@header[/\S+/]} #{@code}") if @header
21
+ if @hash.location
22
+ str << ' '+@req.url if $panic
23
+ str << ' -> '+@hash.location
24
+ end
25
+ str << " (#{@body ? @body.size.bytes : 'No'} Body)"
26
+ str << " [#{@timestamp}]" if @timestamp
27
+ end
28
+ str << '>'
29
+ end
30
+ alias :inspect :to_s
31
+
32
+ def initialize(easy)
33
+ @hash = {}
34
+ @timestamp = @date = @header = nil
35
+ if easy.base.error
36
+ @error = easy.base.error
37
+ else
38
+ if headers = easy.header_str || easy.base.headers
39
+ headers /= "\r\n"
40
+ @header = headers.shift
41
+ headers.each {|h|
42
+ h /= ': '
43
+ if h[0]
44
+ h[0].downcase!
45
+ if h[0] == 'set-cookie'
46
+ (@hash.cookies ||= []) << h[1]
47
+ else
48
+ @hash[h[0]] = h[1]
49
+ end
50
+ end
51
+ }
52
+ @timestamp = if @hash.date
53
+ begin
54
+ @date = @hash.date.to_time
55
+ rescue => e
56
+ (@date = Time.now).strftime("%H:%M:%S")
57
+ L < "Error #{e.class}:#{e.message} with @hash.date = #{@hash.date.inspect}"
58
+ end
59
+ @hash.date[/\d\d:\d\d:\d\d/]
60
+ else
61
+ (@date = Time.now).strftime("%H:%M:%S")
62
+ end
63
+ end
64
+ @code = easy.response_code
65
+ @body = easy.body_str
66
+ @time = easy.total_time
67
+ end
68
+
69
+ @req = {}
70
+ @req.url = easy.last_effective_url
71
+ @req.header = easy.headers
72
+ if range = easy.headers.Range and range[/(\d+)-(\d+)/]
73
+ @req.range = $1.to_i .. $2.to_i
74
+ end
75
+ if easy.base and @req.meth = easy.base.last_method and @req.meth == :post
76
+ @req.body = easy.post_body
77
+ @req.mp = easy.multipart_form_post?
78
+ end
79
+ end
80
+
81
+ def is(klass)
82
+ if @error
83
+ klass == Array || klass = Curl::Response
84
+ else
85
+ klass == Curl::Response
86
+ end
87
+ end
88
+
89
+ def [](key_or_index)
90
+ @error ? @error[key_or_index] : @hash[key_or_index.downcase]
91
+ end
92
+
93
+ end
94
+
95
+ end
96
+
97
+ module HTTPAccessKit
98
+
99
+ class Cookie
100
+ __init__
101
+
102
+ def initialize(*args)
103
+ if args[1].is Scout
104
+ domain, str, scout = nil, *args
105
+ ck = str//;\s*/
106
+ ck[1..-1].each {|par|
107
+ a = par/'='
108
+ case a[0]
109
+ when 'path'; @path = (a[1] == '/') ? // : /^#{Regexp.escape a[1]}/
110
+ when 'domain'
111
+ if a[1].ord == ?.
112
+ domain = a[1][1..-1]
113
+ @domain = /(^|\.)#{Regexp.escape(domain)}$/
114
+ else
115
+ domain = a[1]
116
+ @domain = /^#{Regexp.escape(a[1])}$/
117
+ end
118
+ end
119
+ }
120
+ @name, @value = ck[0]/'='
121
+ L.debug args if !domain
122
+ (scout.cookies[domain || scout.uri.host] ||= {})[@name] = self
123
+ else
124
+ @name, cookie = args[0]
125
+ case cookie
126
+ when Array; @value, @path, @domain = cookie
127
+ when Hash; @value, @path, @domain = cookie.value, cookie.path, cookie.domain
128
+ else @value = args[1].to_s
129
+ end
130
+ end
131
+ @path ||= //
132
+ @domain ||= //
133
+ @string = "#{@name}=#{@value}; "
134
+ end
135
+
136
+ def use(str, uri)
137
+ str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
138
+ end
139
+
140
+ def to_s; @value end
141
+ def inspect; @value.inspect end
142
+
143
+ end
144
+
145
+ class Scout
146
+ __init__
147
+ attr_accessor :timeout, :raise_err, :retry
148
+ attr_accessor :path, :root, :sld, :proxy
149
+ attr_reader :uri
150
+ attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
151
+ attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
152
+
153
+ DefaultHeader = {
154
+ "Expect" => "",
155
+ "Keep-Alive" => "300",
156
+ "Accept-Charset" => "windows-1251,utf-8;q=0.7,*;q=0.7",
157
+ "Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
158
+ "Connection" => "keep-alive"
159
+ }
160
+
161
+ class ProxyError < ArgumentError
162
+ def initialize proxy
163
+ super "incorrect proxy: %s class %s, must be an Array
164
+ proxy format: ['127.0.0.1', '80'], [2130706433, 80], ['someproxy.com', :WebproxyModule]"%[proxy.inspect, proxy.class]
165
+ end
166
+ end
167
+ @@retry = RETRY
168
+
169
+ def initialize(*argv)
170
+ uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
171
+ raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
172
+ 'http://' >> uri if uri !~ /^\w+:\/\//
173
+ if proxy
174
+ if proxy[1] and proxy[1].to_i == 0
175
+ @webproxy = eval("WebProxy::#{proxy[1]}")
176
+ @proxy = proxy[0].parse(:uri).root
177
+ else
178
+ proxy[0] = proxy[0].to_ip if proxy[0].is Integer
179
+ @proxy = proxy
180
+ end
181
+ end
182
+ @cookies = {}
183
+ @body = {}
184
+ @num = []
185
+ @cookieProc = opts[:cp] || opts[:ck]
186
+ @raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
187
+ @engine = opts[:engine]
188
+ @timeout = opts[:timeout] || $CurlDefaultTimeout || 60
189
+ @post_proc = @get_proc = @head_proc = Proc::NULL
190
+ update uri
191
+ @retry = opts[:retry] || {}
192
+ @retry = {@uri.host => @retry} if @retry.is Array
193
+ end
194
+
195
+ def update(uri)
196
+ if !uri[/^\w+:\/\//]
197
+ '/' >> uri if uri[0,1] != '/'
198
+ @uri = uri.parse:uri
199
+ return
200
+ end
201
+ @uri = uri.parse:uri
202
+ return if @uri.root == @root
203
+ @root = @uri.root
204
+ @sld = @root[/[\w-]+\.[a-z]+$/]
205
+ @path = @uri.fullpath
206
+ if @http
207
+ @http.url = @webproxy ? @proxy : @root
208
+ else
209
+ @http = Curl::Easy(@webproxy ? @proxy : @root)
210
+ @http.base = self
211
+ end
212
+ if @proxy
213
+ @http.proxy_url = @proxy*':' if !@webproxy
214
+ @proxystr = @webproxy ? @proxy[0] : @http.proxy_url
215
+ else @proxystr = 'localhost'
216
+ end
217
+ if @cookieProc.is Hash
218
+ self.main_cks = @cookieProc
219
+ @cookieProc = true
220
+ end
221
+ self
222
+ end
223
+
224
+ def to_s
225
+ str = "<##{self.class.self_name} @ "
226
+ if @webproxy
227
+ str << "#{@proxy} ~ "
228
+ elsif @proxy
229
+ str << @proxy*':'+" ~ "
230
+ end
231
+ str << @root+'>'
232
+ end
233
+ alias :inspect :to_s
234
+
235
+ def update_res
236
+ @outdated = false
237
+ @res = @http.res
238
+ @headers = nil
239
+ @res
240
+ end
241
+
242
+ def res
243
+ if @res && !@outdated
244
+ @res
245
+ else update_res end
246
+ end
247
+
248
+ def req; res.req end
249
+
250
+ def dump
251
+ str = "IP: #{@proxystr}\nRequest: "
252
+ str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
253
+ str << "\nReady" if @ready
254
+ str
255
+ end
256
+
257
+ def fix(path)
258
+ path = path.tr ' ', '+'
259
+ path = expand path if path =~ /^\./
260
+ if update(path) or @uri.root
261
+ path = @webproxy.encode(path) if @webproxy
262
+ else
263
+ path = @webproxy.encode(@root+path) if @webproxy
264
+ end
265
+ path
266
+ end
267
+
268
+ def expand(uri)
269
+ if !@webproxy || @http.last_effective_url
270
+ path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
271
+ return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
272
+ end
273
+ uri
274
+ end
275
+
276
+ def mkBody(params, multipart=nil)
277
+ if multipart
278
+ @http.multipart_post_body = params.map {|k, v|
279
+ v = v.call if v.is Proc
280
+ if k =~ /^f:/
281
+ Curl::PostField.file(k[2..-1], "application/octet-stream",
282
+ "#{randstr(16, :hex)}.jpg", v+randstr )
283
+ elsif k =~ /^p:/
284
+ Curl::PostField.file(k[2..-1], "application/octet-stream",
285
+ File.basename(f), read(v) )
286
+ else
287
+ Curl::PostField.content(k.to_s, v.to_s)
288
+ end
289
+ }
290
+ else
291
+ @http.post_body = params.urlencode
292
+ end
293
+ end
294
+
295
+ def mkHeader(uri)
296
+ header = DefaultHeader.dup
297
+ if @cookieProc
298
+ cookies = ''
299
+ main_cks.each_value {|v| v.use cookies, @uri}
300
+ header['Cookie'] = cookies[0..-3]
301
+ end
302
+ if @refforge
303
+ ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
304
+ header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]
305
+ end
306
+ header['User-Agent'] = @ua == :rand ? UAS.rand : @ua if @ua
307
+ header
308
+ end
309
+
310
+ def ProcCookies(res)
311
+ ck = []
312
+ case res
313
+ when String
314
+ res.split(/\r?\n/).each {|h|
315
+ hs = h/': '
316
+ ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
317
+ }
318
+ when Curl::Response
319
+ ck = res['cookies']
320
+ end
321
+ return if !ck.b
322
+ ck.each {|c| Cookie(c, self)}
323
+ # StoreCookies if @cookieStore
324
+ end
325
+
326
+ def cp_on() @cookieProc = true end
327
+ def cp_off() @cookieProc = false end
328
+
329
+ def main_cks() @cookies[@uri.host] ||= {} end
330
+ def main_cks=(cks)
331
+ @cookies[@uri.host] = @webproxy ?
332
+ @webproxy.ck_encode(@root, cks) :
333
+ cks.map2 {|k, v| Cookie(k, v)}
334
+ end
335
+
336
+ def retry?(err)
337
+ # exc = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
338
+ exc = (@@retry.keys + @retry.keys).select_in @root
339
+ return false if !exc.b
340
+ # ['www.nomer.org', 'nomer.org'].every |www| 'TiemoutError'.in({'nomer.org' => 'TimeoutError'}[www])} ?
341
+ !exc.find {|e| !err[0].self_name.in((@@retry[e] || []) + @retry[e])}
342
+ end
343
+
344
+ def loaded?
345
+ $Carier.reqs.include? @http
346
+ end
347
+
348
+ def load!
349
+ $log <= [$Carier, @http]
350
+ unless $Carier.add @http
351
+ $Carier.remove @http
352
+ $Carier.add @http
353
+ end
354
+ rescue RuntimeError => e
355
+ e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
356
+ raise e
357
+ end
358
+
359
+ def load(path=@path, headers={}, not_redir=1, relvl=10)
360
+ @http.path = path = fix(path)
361
+ @http.headers = mkHeader(path).merge!(headers)
362
+ if not_redir.b
363
+ @http.follow_location = false
364
+ else
365
+ @http.follow_location = true
366
+ @http.max_redirects = relvl
367
+ end
368
+ @http.timeout = @timeout
369
+
370
+ @http.on_complete {|c|
371
+ @error = nil
372
+ @outdated = true
373
+ ProcCookies c.res if @cookieProc
374
+ yield c if block_given?
375
+ }
376
+ @http.on_failure {|c, e|
377
+ @http.on_complete {}
378
+ @outdated = true
379
+ @error = e
380
+ if retry? e
381
+ L.debug "#{e[0]} -> reloading scout"
382
+ #load uri, headers, not_redir, relvl, &callback
383
+ load! # all params including post_body are still set
384
+ else
385
+ L.debug "#{e[0]} -> not reloading scout"
386
+ raise *e if @raise_err
387
+ end
388
+ } if !@http.on_failure
389
+
390
+ load!
391
+ end
392
+
393
+ def loadPost(*argv, &callback)
394
+ hash, multipart, uri, opts = argv.get_opts [@body, false, @path],
395
+ :headers => {}, :redir => false, :relvl => 2
396
+ mkBody hash, multipart.b
397
+ @last_method = :post
398
+ if block_given?
399
+ @post_proc = callback
400
+ elsif @http.callback != @post_proc
401
+ callback = @post_proc
402
+ end
403
+ load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
404
+ end
405
+
406
+ def loadGet(*argv, &callback)
407
+ uri, opts = argv.get_opts [@path],
408
+ :headers => {}, :redir => false, :relvl => 2
409
+ @http.get = true
410
+ @last_method = :get
411
+ if block_given?
412
+ @get_proc = callback
413
+ elsif @http.callback != @get_proc
414
+ callback = @get_proc
415
+ end
416
+ load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
417
+ end
418
+
419
+ def loadHead(*argv, &callback)
420
+ uri, emulate, headers = argv.get_opts [@path, :if_retry]
421
+ @http.head = true if emulate != :always
422
+ @last_method = :head
423
+ if block_given?
424
+ @head_proc = callback
425
+ elsif @http.callback != @head_proc
426
+ callback = @head_proc
427
+ end
428
+ emu = lambda {
429
+ @headers = ''
430
+ $log << @headers
431
+ @http.on_header {|h|
432
+ $log << @headers
433
+ @headers << h
434
+ h == "\r\n" ? 0 : h.size
435
+ }
436
+ @http.get = true
437
+ load(uri, headers) {|c| c.on_header; callback[c]}
438
+ }
439
+ if emulate != :always
440
+ load(uri, headers) {|c|
441
+ if !@error and c.res.code != 200 and emulate == :if_retry
442
+ $log << @headers
443
+ emu.call
444
+ else
445
+ callback[c]
446
+ end
447
+ }
448
+ else emu.call
449
+ end
450
+ end
451
+
452
+ end
453
+
454
+ class PickError < IndexError
455
+ def initialize
456
+ super "can't get scout from empty squad" end
457
+ end
458
+
459
+ class ScoutSquad < Array
460
+ __init__
461
+
462
+ def initialize(*args)
463
+ raise ArgumentError, "can't create empty squad" if (num = args.pop) < 1
464
+ proxies = nil
465
+ super []
466
+ if args[0].is Scout
467
+ s = args[0]
468
+ else
469
+ if !args[0].is String
470
+ args.unshift ''
471
+ if (opts = args[-1]).is Hash and (opts[:cp] || opts[:ck]).is Hash
472
+ L.warn "it's useless to setup cookies for untargeted squad!"
473
+ end
474
+ end
475
+ if args[1] and args[1][0].is Array
476
+ proxies = args[1]
477
+ args[1] = proxies.shift
478
+ end
479
+ self[0] = s = Scout(*args)
480
+ num -=1
481
+ end
482
+ num.times {|i|
483
+ self << Scout(s.root+s.path, (proxies ? proxies[i] : s.proxy), s.ua, s.refforge, :ck => s.main_cks, :raise => s.raise_err, :timeout => s.timeout, :retry => s.retry)
484
+ }
485
+ end
486
+
487
+ def update uri, forced=nil
488
+ each {|s| return L.warn "failed to update scout loaded? with url: #{s.http.url}" if s.loaded?} if !forced
489
+ each {|s| s.update uri}
490
+ end
491
+
492
+ def untargeted
493
+ first.root == 'http://'
494
+ end
495
+
496
+ def rand
497
+ raise PickError if !b
498
+ # to_a because reject returns object of this class
499
+ if scout = to_a.rand {|_|!_.loaded?}; scout
500
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
501
+ raise "Curl must run in order to use ScoutSquad#rand" if !Curl.status
502
+ Curl.wait
503
+ self.rand
504
+ end
505
+ end
506
+
507
+ def next
508
+ raise PickError if !b
509
+ if scout = find {|_|!_.loaded?}; scout
510
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
511
+ raise "Curl must run in order to use ScoutSquad#next" if !Curl.status
512
+ Curl.wait
513
+ self.next
514
+ end
515
+ end
516
+
517
+ def to_s
518
+ str = '<#ScoutSquad @ '
519
+ if b
520
+ if first.webproxy
521
+ str << "#{first.proxy} ~ "
522
+ elsif first.proxy
523
+ str << first.proxy*':'+" ~ "
524
+ end
525
+ str << "#{untargeted ? "no target" : first.root} "
526
+ end
527
+ str << "x#{size}>"
528
+ end
529
+ alias :inspect :to_s
530
+
531
+ end
532
+
533
+ end
534
+
535
+ ### Global scope shortcut methods ###
536
+
537
+ module RMTools
538
+
539
+ def Get(uri, opts={})
540
+ raise ArgumentError, "Local uri passed to Get function" if uri[0,1] == '/'
541
+ $log.debug "Protocol-less uri passed to Get function" if !uri[/^\w+:\/\//]
542
+ headers = opts[:headers] || opts[:h] || {}
543
+ proxy = opts[:proxy] || opts[:pr] || $CurlGetProxy
544
+ ret_body = opts.fetch(:ret_body, opts.fetch(:b, 1)).b
545
+ wait = opts.fetch(:wait, opts.fetch(:w, !block_given?)).b
546
+ s = HTTPAccessKit::Scout(uri, proxy, opts)
547
+ buf = ret_body ? '' : s.http.res
548
+ s.raise_err ||= opts[:e]
549
+ s.http.timeout ||= opts[:t]
550
+ s.loadGet(headers) {|c|
551
+ if ret_body
552
+ buf << c.body_str
553
+ else
554
+ buf.load_from c.res
555
+ end
556
+ yield buf if block_given?
557
+ }
558
+ if wait
559
+ ($CarierThread and $CarierThread.status) ? Curl.wait : $Carier.perform
560
+ end
561
+ buf
562
+ end
563
+ module_function :Get
564
+
565
+ end
566
+
567
+ module Enumerable
568
+
569
+ def GetAll(on_count=nil, default_domain=nil, &callback)
570
+ if on_count
571
+ len = size
572
+ counter = 0
573
+ send(resto(:each_value) ? :each_value : :each) {|uri|
574
+ uri = File.join(default_domain, uri) if default_domain and (uri[0,1] == '/' or !uri[/^https?:/])
575
+ Get(uri) {|buf|
576
+ callback.arity > 1 ?
577
+ callback.call(buf, counter) :
578
+ callback.call(buf)
579
+ if (counter += 1) == len
580
+ on_count.arity > 0 ?
581
+ on_count.call(buf) :
582
+ on_count.call
583
+ end
584
+ }
585
+ }
586
+ else send(resto(:each_value) ? :each_value : :each) {|uri|
587
+ Get(uri, &callback) }
588
+ end
589
+ end
590
+
591
+ end