rhack 0.4.1 → 1.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.gitignore +22 -0
  2. data/Gemfile +2 -5
  3. data/LICENSE +19 -15
  4. data/README.md +66 -26
  5. data/Rakefile +42 -31
  6. data/config/cacert.pem +3895 -0
  7. data/config/rhack.yml.template +40 -0
  8. data/ext/curb-original/curb_config.h +3 -0
  9. data/ext/curb-original/curb_easy.c +3 -54
  10. data/ext/curb-original/curb_multi.c +69 -140
  11. data/ext/curb/curb_multi.c +1 -1
  12. data/lib/rhack.rb +82 -12
  13. data/lib/rhack/cookie.rb +49 -0
  14. data/lib/rhack/curl.rb +6 -0
  15. data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
  16. data/lib/rhack/curl/global.rb +175 -0
  17. data/lib/rhack/curl/itt.rb +11 -0
  18. data/lib/rhack/curl/multi.rb +37 -0
  19. data/lib/rhack/curl/post_field.rb +20 -0
  20. data/lib/rhack/curl/response.rb +91 -0
  21. data/lib/rhack/dl.rb +308 -0
  22. data/lib/rhack/frame.rb +316 -0
  23. data/lib/{extensions → rhack/js}/browser/env.js +0 -0
  24. data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
  25. data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
  26. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
  27. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
  28. data/lib/rhack/js/johnson.rb +71 -0
  29. data/lib/rhack/page.rb +263 -0
  30. data/lib/rhack/proxy.rb +3 -0
  31. data/lib/rhack/proxy/checker.rb +1 -1
  32. data/lib/rhack/scout.rb +342 -0
  33. data/lib/rhack/scout_squad.rb +98 -0
  34. data/lib/rhack/services.rb +1 -464
  35. data/lib/rhack/services/base.rb +59 -0
  36. data/lib/rhack/services/examples.rb +423 -0
  37. data/lib/rhack/version.rb +3 -0
  38. data/lib/rhack_in.rb +3 -2
  39. data/rhack.gemspec +28 -0
  40. metadata +104 -85
  41. data/.gemtest +0 -0
  42. data/Gemfile.lock +0 -23
  43. data/Manifest.txt +0 -60
  44. data/ext/curb/Makefile +0 -217
  45. data/lib/cache.rb +0 -44
  46. data/lib/curl-global.rb +0 -164
  47. data/lib/extensions/declarative.rb +0 -153
  48. data/lib/extensions/johnson.rb +0 -63
  49. data/lib/frame.rb +0 -848
  50. data/lib/init.rb +0 -49
  51. data/lib/rhack.yml.template +0 -19
  52. data/lib/scout.rb +0 -589
  53. data/lib/words.rb +0 -25
@@ -0,0 +1,98 @@
1
+ module RHACK
2
+
3
+ class PickError < IndexError
4
+ def initialize
5
+ super "can't get scout from empty squad" end
6
+ end
7
+
8
+ class ScoutSquad < Array
9
+ __init__
10
+
11
+ def initialize(*args)
12
+ raise ArgumentError, "can't create empty squad" if (num = args.pop) < 1
13
+ proxies = nil
14
+ super []
15
+ if args[0].is Scout
16
+ s = args[0]
17
+ else
18
+ if !args[0].is String
19
+ args.unshift ''
20
+ if (opts = args[-1]).is Hash and (opts[:cp] || opts[:ck]).is Hash
21
+ L.warn "it's useless to setup cookies for untargeted squad!"
22
+ end
23
+ end
24
+ if args[1] and args[1][0].is Array
25
+ proxies = args[1]
26
+ args[1] = proxies.shift
27
+ end
28
+ self[0] = s = Scout(*args)
29
+ num -=1
30
+ end
31
+ num.times {|i|
32
+ self << Scout(s.root+s.path, (proxies ? proxies[i] : s.proxy), s.ua, s.refforge, :ck => s.main_cks, :raise => s.raise_err, :timeout => s.timeout, :retry => s.retry)
33
+ }
34
+ end
35
+
36
+ def update uri, forced=nil
37
+ each {|s| return L.warn "failed to update scout loaded? with url: #{s.http.url}" if s.loaded?} if !forced
38
+ each {|s| s.update uri}
39
+ end
40
+
41
+ def untargeted
42
+ first.root == 'http://'
43
+ end
44
+
45
+ def rand
46
+ raise PickError if !b
47
+ # to_a because reject returns object of this class
48
+ if scout = to_a.rand {|_|!_.loaded?}; scout
49
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
50
+ unless Curl.status
51
+ L.log "Curl must run in order to use ScoutSquad#rand; setting Carier Thread"
52
+ Curl.execute
53
+ #raise "Curl must run in order to use ScoutSquad#rand"
54
+ end
55
+ #Curl.wait
56
+ loop {
57
+ sleep 1
58
+ break if Curl.carier.reqs.size < size
59
+ }
60
+ self.rand
61
+ end
62
+ end
63
+
64
+ def next
65
+ raise PickError if !b
66
+ if scout = find {|_|!_.loaded?}; scout
67
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
68
+ unless Curl.status
69
+ L.log "Curl must run in order to use ScoutSquad#next; setting Carier Thread"
70
+ Curl.execute :unless_allready
71
+ #raise "Curl must run in order to use ScoutSquad#next"
72
+ end
73
+ #Curl.wait
74
+ loop {
75
+ sleep 1
76
+ break if Curl.carier.reqs.size < size
77
+ }
78
+ self.next
79
+ end
80
+ end
81
+
82
+ def to_s
83
+ str = '<#ScoutSquad @ '
84
+ if b
85
+ if first.webproxy
86
+ str << "#{first.proxy} ~ "
87
+ elsif first.proxy
88
+ str << first.proxy*':'+" ~ "
89
+ end
90
+ str << "#{untargeted ? "no target" : first.root} "
91
+ end
92
+ str << "x#{size}>"
93
+ end
94
+ alias :inspect :to_s
95
+
96
+ end
97
+
98
+ end
@@ -1,465 +1,2 @@
1
- # encoding: utf-8
2
1
  require 'rhack'
3
-
4
- # Вызовы сервисов всегда ждут и возвращают обработанный ответ, если вызвваны без блока.
5
- # В противном случае используется событийная модель и обработанный ответ передаётся в блок.
6
- module HTTPAccessKit
7
-
8
- class Service
9
- attr_accessor :f
10
-
11
- def initialize(service, frame, *args)
12
- @service = service
13
- # first argument should be a string so that frame won't be static
14
- @f = frame || Frame(self.class::URI[service] || self.class::URI[:login], *args)
15
- end
16
-
17
- # Usable only for sync requests
18
- def login(*)
19
- Curl.run
20
- @f[0].cookies.clear
21
- json, wait, @f.opts[:json], @f.opts[:wait] = @f.opts[:json], @f.opts[:wait], false, true
22
- yield @f.get(self.class::URI[:login])
23
- @f.get(self.class::URI[:home]) if self.class::URI[:home]
24
- @f.opts[:json], @f.opts[:wait] = json, wait
25
- @f.copy_cookies!
26
- end
27
-
28
- def go(*args, &block)
29
- __send__(@service, *args, &block)
30
- rescue
31
- L < $!
32
- Curl.reload
33
- end
34
-
35
- def scrape!(page)
36
- __send__(:"scrape_#{@service}", page)
37
- if url = next_url(page)
38
- @f.get(url) {|next_page| scrape!(next_page)}
39
- end
40
- end
41
-
42
- def inspect
43
- "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
44
- end
45
-
46
- end
47
-
48
- class ServiceError < Exception; end
49
-
50
- class Yandex < Service
51
- __init__
52
-
53
- unless defined? IGNORE_UPPERCASE
54
- URI = {
55
- :speller => "http://speller.yandex.net/services/spellservice.json/checkText",
56
- :search => "http://www.yandex.ru/yandsearch?lr=213&%s",
57
- :weather => "http://pogoda.yandex.ru/%d/details/"
58
- }
59
-
60
- IGNORE_UPPERCASE = 1
61
- IGNORE_DIGITS = 2
62
- IGNORE_URLS = 4
63
- FIND_REPEAT_WORDS = 8
64
- IGNORE_LATIN = 16
65
- NO_SUGGEST = 32
66
- FLAG_LATIN = 128
67
- end
68
-
69
- def initialize(service=:search, frame=nil)
70
- ua = UAS.rand
71
- ua << " YB/4.2.0" if !ua["YB"]
72
- super service, frame, nil, ua, :ck => {
73
- "yandexuid"=>"3644005621268702222",
74
- "t"=>"p"
75
- }, :eval => false
76
- end
77
-
78
- def search(text, opts={}, &block)
79
- uri = URI.search % urlencode(opts.merge(:text=>text))
80
- @f.run(uri, :proc_result => block) {|page| process page}
81
- end
82
-
83
- def process page
84
- page.find('.p1/.cr').map {|n| [n.at('.cs').href, n.at('.cs').text.strip, (n.at('.kk') || n.at('.k7/div')).text.strip]} if page.html.b
85
- end
86
-
87
- def speller(text, opts=23)
88
- text = text.split_to_lines(10000)
89
- i = 0
90
- @f.run({"text" => text[i], "options" => opts}, URI.speller, :json => true) {|pg|
91
- yield pg.hash
92
- text[i+=1] && @f.get({"text" => text[i], "options" => opts}, URI.speller, :json => true)
93
- }
94
- end
95
-
96
- def fix_content(doc, opts={})
97
- nodes = doc.root.text_nodes
98
- speller(nodes*". ", opts) {|json|
99
- fix = {}
100
- json.each {|h| fix[h.word] = h.s[0] if h.s[0]}
101
- nodes.each {|n|
102
- fixed = false
103
- text = n.text
104
- fix.each {|k, v| fixed = true if text.gsub!(/\b#{k}\b/, v)}
105
- n.text(text) if fixed
106
- }
107
- }
108
- Curl.wait
109
- end
110
-
111
- def weather city=27612, day=nil, &block
112
- if city.is String
113
- city = CitiesCodes[city] if defined? CitiesCodes
114
- raise ServiceError, "can't get weather info for #{city.inspect}:#{city.class}" if !city.is(Fixnum)
115
- end
116
- @f.get(URI.weather%city, :proc_result => block) {|pg|
117
- ary = pg.find('//.b-forecast-details/tbody/tr{_["class"] =~ /t\d/}').map {|e|
118
- "#{e.at('.date') ? e.at('.date').text+":\n" : ''} - #{e.at('.t').text} - #{e.at('.data').text} - #{e.at('.wind/img').alt} #{e.at('.wind').text} м/с"
119
- }
120
- ary = ary[0..11].div(4) + ary[12..-1].div(2)
121
- day ? ary[day] : ary
122
- }#.res
123
- end
124
-
125
- def self.weather(*args, &block) new(:weather).go *args, &block end
126
- def self.search(*args, &block) new.go *args, &block end
127
-
128
- end
129
-
130
- class Google < Service
131
- __init__
132
- URI = {
133
- :translate => "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s%%7C%s",
134
- :search => "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&hl=ru&q=%s",
135
- :detect => "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s"
136
- }
137
-
138
- Shortcuts = Hash[*%w{
139
- v ru.wikipedia.org в ru.wikipedia.org вики en.wikipedia.org
140
- w en.wikipedia.org ев en.wikipedia.org wiki en.wikipedia.org
141
- lm lurkmore.ru лм lurkmore.ru
142
- wa world-art.ru ва world-art.ru
143
- ad anidb.info ад anidb.info
144
- ed encyclopediadramatica.com ед encyclopediadramatica.com
145
- }]
146
-
147
- Langs = *%w{
148
- af sq am ar hy az eu be bn bh bg my ca chr zh zh-CN zh-TW hr cs da dv nl en eo et tl fi fr gl ka de el gn gu iw hi hu is id iu it ja kn kk km ko ku ky lo lv lt mk ms ml mt mr mn ne no or ps fa pl pt-PT pa ro ru sa sr sd si sk sl es sw sv tg ta tl te th bo tr uk ur uz ug vi
149
- }
150
-
151
- def initialize(service=:search, frame=nil)
152
- super service, frame, :json => true
153
- end
154
-
155
- def search(text, opts={}, &block)
156
- text = "site:#{opts[:site]} #{text}" if opts[:site]
157
- uri = URI.search % CGI.escape(text)
158
- @f.run(uri, :proc_result => block) {|page|
159
- if data = page.hash.responseData.b
160
- data.results.map! {|res| [res.unescapedUrl, res.titleNoFormatting, res.content]}
161
- end
162
- }#.res
163
- end
164
-
165
- def detect(text, wait=!block_given?, &block)
166
- text = text.is(String) ? text[0...600] : text[0]
167
- uri = URI[:detect] % CGI.escape(text)
168
- @f.run(uri, :proc_result => block, :wait => wait) {|page|
169
- (data = page.hash.responseData.b) && data.language
170
- }
171
- end
172
-
173
- def translate(text, to, from=nil, &block)
174
- text = text.split_to_blocks(600, :syntax) if !text.is Array
175
- if !from
176
- if block_given?
177
- return detect(text) {|from| yield translate(text, to, from)}
178
- else
179
- return translate(text, to, detect(text).res)
180
- end
181
- end
182
- res = []
183
- i = 0
184
- text.each_with_index {|b, j|
185
- @f.run(URI.translate%[CGI.escape(text[j]), from, to], :proc_result => block, :wait => false) {|page|
186
- res[j] = (data = page.hash.responseData.b and data.translatedText)
187
- (i += 1) == text.size ? res*"\n" : :skip
188
- }
189
- }
190
- Curl.wait if !block_given?
191
- res*"\n"
192
- end
193
-
194
- def self.search(*args, &block) new.search *args, &block end
195
- def self.tr(*args, &block) new(:translate).translate *args, &block end
196
-
197
- end
198
-
199
- class Infoseek < Service
200
- URI = {:tr => 'http://translation.infoseek.co.jp/'}
201
-
202
- def initialize frame=nil
203
- super :tr, frame, :eval => false
204
- end
205
-
206
- def get_token page
207
- @token = page.at('input[name=token]').value
208
- end
209
-
210
- def tr(text, direction=:from_ja, &block)
211
- if @token
212
- selector = direction.in([:from_ja, :from_jp, :to_en]) ? 1 : 0
213
- body = {'ac' => 'Text', 'lng' => 'en', 'original' => text, 'selector' => selector, 'token' => @token, 'submit' => ' 翻訳'}
214
- @f.run(body, :proc_result => block) {|page|
215
- get_token page
216
- page.at('textarea[name=converted]').text
217
- }#.res
218
- else
219
- @f.run(:save_result => !block) {|page|
220
- get_token page
221
- tr text, direction, &block
222
- }#.res
223
- end
224
- end
225
-
226
- def self.tr(*args, &block) new.tr *args, &block end
227
-
228
- end
229
-
230
- class Youtube < Service
231
- URI = {:info => "http://www.youtube.com/get_video_info?video_id=%s"}
232
- attr_reader :track
233
-
234
- def initialize frame=nil
235
- super :dl, frame, :eval => false
236
- @f.ss.each {|s| s.timeout=600}
237
- require 'open3'
238
- require 'mp3info'
239
- end
240
-
241
- def dl(id, fd=nil, &block)
242
- if block
243
- info(id) {|lnk| __dl(lnk, fd, block)}
244
- else __dl(info(id), fd)
245
- end
246
- end
247
-
248
- def dlmp3(id, mp3=nil)
249
- dl(id) {|flv|
250
- if !File.file?(df = mp3||flv.sub(/.flv$/, '.mp3'))
251
- Open3.popen3("ffmpeg -i '#{flv}' -ab 262144 -ar 44100 '#{df}'") {|i,o,e|
252
- if $verbose
253
- t = e.gets2 and t and t[/^size=/] and print t until e.eof?
254
- puts "\n#{t}"
255
- end
256
- }
257
- end
258
- Mp3Info.open(df, :encoding=>'utf-8') {|mp3|
259
- mp3.tag2.TPE1, mp3.tag2.TIT2 = @track[1..2]
260
- } }
261
- end
262
-
263
- def self.dl(id) new.dl(id) end
264
- def self.dlmp3(id) new.dlmp3(id) end
265
-
266
- private
267
- def info(id, &block)
268
- @f.run(URI.info%[id[/\/watch/] ?
269
- id.parseuri.query.v :
270
- File.basename(id).till(/[&?]/)],:hash=>true,:proc_result=>block){|p|
271
- res = p.hash
272
- @track = [res.author, res.creator, res.title]
273
- CGI.unescape(res.fmt_url_map).split(/,\d+\|/)[0].after('|')
274
- }#.res
275
- end
276
-
277
- def __dl(lnk,fd,block=nil)
278
- @f.dl(lnk, fd||"files/youtube/#{@track*' - '}.flv", :auto, 5, &block)
279
- end
280
-
281
- end
282
-
283
- class VK < Service
284
- attr_reader :links, :open_links
285
- URI = {
286
- :people => "http://vkontakte.ru/gsearch.php?from=people&ajax=1",
287
- :login => "http://vkontakte.ru/index.php",
288
- :id => "http://vkontakte.ru%s"
289
- }
290
- DefaultParams = Hash[*%w[
291
- c[city] 1
292
- c[country] 1
293
- c[noiphone] 1
294
- c[photo] 1
295
- c[section] people
296
- c[sex] 1
297
- c[status] 6
298
- ]]
299
- @@reloadable = ReloadablePage {
300
- if !@title and !@hash
301
- L << self
302
- L << @doc
303
- end
304
- if @hash == false or @hash.nil? && (!@title or @title["Ошибка"])
305
- L.info "@title caller.size", binding
306
- sleep 2
307
- end
308
- }
309
- def self.com; new end
310
-
311
- class NotFoundError < Exception; end
312
-
313
- def initialize frame=nil
314
- super :people, frame, {:cp => true, :relvl => 5, :eval => false}, 5
315
- @links = []
316
- @open_links = []
317
- login
318
- end
319
-
320
- def login params={'email'=>'fshm@bk.ru', 'pass'=>'Riddick2', 'expire'=>nil}
321
- super {|login_page|
322
- login_page.submit('form', @f, params).submit('form', @f, {})
323
- }
324
- end
325
-
326
- def get_links h, pagenum, &block
327
- @f.run(h.merge('offset' => pagenum*20), URI[:people], :proc_result=>block, :result=>@@reloadable, :json => true) {|page|
328
- ls = Page(page.hash.rows).get_links('.image/a')
329
- @links.concat ls
330
- ls
331
- }
332
- end
333
-
334
- def people(q, *args, &block)
335
- age, opts = args.get_opts [17..23]
336
- h = DefaultParams.merge('c[q]' => q)
337
- h.merge! Hash[opts.map {|k,v| ["c[#{k}]", v]}]
338
- h['c[age_from]'], h['c[age_to]'] = age.first, age.last
339
-
340
- @f.run(h, URI[:people], :proc_result => block, :json => true) {|page|
341
- # ответом может быть невнятное требование залогиниться
342
- sum = page.hash.summary.sub(/<span.+>/, '')
343
- puts sum
344
- found = sum[/\d+/]
345
- if !found
346
- L.warn sum
347
- else
348
- @links.concat Page(page.hash.rows).get_links('.image/a')
349
- max_page = [50, (found.to_f/20).ceil].min
350
- (1...max_page).each {|_|
351
- sleep 0.5
352
- get_links h, _, &block
353
- }
354
- end
355
- }
356
- end
357
-
358
- def get_people q, *opts
359
- @links = []
360
- @open_links = []
361
- people q, *opts
362
- get_pages q
363
- end
364
-
365
- def get_pages q=nil
366
- @links.uniq.each {|id| get_page id, q; sleep 1.5}
367
- end
368
-
369
- def get_page id, q=nil
370
- q = q ? q.ci.to_re : // unless q.is Regexp
371
- id_num = id[/\d+/].to_i
372
- @f.get(id, :result=>@@reloadable) {|p|
373
- data = p.find('.profileTable//.dataWrap').to_a.b
374
- if data
375
- L.debug "!p.at('.basicInfo//.alertmsg') data.contents.join('')[/(\\d\\s*){6,}/] data.contents.join('')[q]", binding
376
- end
377
- if data = p.find('.profileTable//.dataWrap').b and
378
- contents = data.to_a.contents.join.b and contents[q]
379
- digits = contents[/(\d *){6,9}/]
380
- bot = (digits and digits[/^\d{7}$/] and id_num.between 852e5, 893e5)
381
- if !bot and !p.at('.basicInfo//.alertmsg') || digits
382
- L << "added vk.com#{id}"
383
- @open_links << id
384
- elsif bot
385
- L << "bot #{id_num} detected"
386
- else tick!
387
- end
388
- else tick!
389
- end
390
- }
391
- end
392
-
393
- end
394
-
395
- class Mamba < Service
396
- attr_reader :links, :open_links
397
- @@login, @@pass = %w{AnotherOneUser AyaHirano8}
398
- URI = {
399
- :people => "http://mamba.ru/?",
400
- :login => "http://mamba.ru/tips/?tip=Login",
401
- :id => "http://vk.com%s"
402
- }
403
- DefaultParams = Hash[*%w[
404
- c[city] 1
405
- c[country] 1
406
- c[noiphone] 1
407
- c[photo] 1
408
- c[section] people
409
- c[sex] 1
410
- c[status] 6
411
- ]]
412
-
413
- def initialize frame=nil
414
- super :people, frame, {:cp=>{
415
- "PREV_LOGIN"=>"anotheroneuser", "LOGIN"=>"anotheroneuser", "UID"=>"494809761", "LEVEL"=>"Low", "bar"=>"AShwjUz54RmYnfClOdlMYZylGUU90PUxeFkwlGixrP2ARHDs3A0EbDDxQTEksEm4LPT8FfzpfdiMME1omFz0tVhA5QjcsCgckaSQfIDxI", "s"=>"MJt2J3U9Pnk7Qvpie13lN7rrqmahTrAk", "SECRET"=>"adqH47"},
416
- :eval=>false, :timeout=>5, :retry=>['TimeoutError']
417
- }, 5
418
- @links = []
419
- @open_links = []
420
- end
421
-
422
- def login
423
- @f.run(URI[:login]) {|p|
424
- p.submit('.ap-t-c//form', @f, 'login'=>@@login, 'password'=>@@pass, 'level'=>nil) {
425
- @f.each {|s| s.cookies.replace @f[0].cookies}
426
- }
427
- }
428
- Curl.wait
429
- end
430
-
431
- def people
432
- # TODO
433
- # ... or not TODO?
434
- end
435
- # seems like NOT... LOL
436
-
437
- end
438
-
439
- module Downloaders
440
-
441
- def letitbit(path, &block)
442
- link = ''
443
- frame = Frame 'letitbit.net', {:cp => true, :eval => nil}, 1
444
- frame.run(path, :wait => !block) {|page1|
445
- page1.submit('#ifree_form', frame) {|page2|
446
- page2.submit('[action=/download4.php]', frame) {|page3|
447
- page3.submit('[action=/download3.php]', frame) {|page4|
448
- t = Thread.new {
449
- sleep 60
450
- frame.run({}, '/ajax/download3.php',
451
- :headers => {"Referer" => "http://letitbit.net/download3.php"}
452
- ) {|res|
453
- link << res.html
454
- block[link] if block
455
- }
456
- }
457
- t.join if !block
458
- }}}}
459
- link
460
- end
461
-
462
- module_function :letitbit
463
- end
464
-
465
- end
2
+ require 'rhack/services/base'