rhack 0.4.1 → 1.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.gitignore +22 -0
  2. data/Gemfile +2 -5
  3. data/LICENSE +19 -15
  4. data/README.md +66 -26
  5. data/Rakefile +42 -31
  6. data/config/cacert.pem +3895 -0
  7. data/config/rhack.yml.template +40 -0
  8. data/ext/curb-original/curb_config.h +3 -0
  9. data/ext/curb-original/curb_easy.c +3 -54
  10. data/ext/curb-original/curb_multi.c +69 -140
  11. data/ext/curb/curb_multi.c +1 -1
  12. data/lib/rhack.rb +82 -12
  13. data/lib/rhack/cookie.rb +49 -0
  14. data/lib/rhack/curl.rb +6 -0
  15. data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
  16. data/lib/rhack/curl/global.rb +175 -0
  17. data/lib/rhack/curl/itt.rb +11 -0
  18. data/lib/rhack/curl/multi.rb +37 -0
  19. data/lib/rhack/curl/post_field.rb +20 -0
  20. data/lib/rhack/curl/response.rb +91 -0
  21. data/lib/rhack/dl.rb +308 -0
  22. data/lib/rhack/frame.rb +316 -0
  23. data/lib/{extensions → rhack/js}/browser/env.js +0 -0
  24. data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
  25. data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
  26. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
  27. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
  28. data/lib/rhack/js/johnson.rb +71 -0
  29. data/lib/rhack/page.rb +263 -0
  30. data/lib/rhack/proxy.rb +3 -0
  31. data/lib/rhack/proxy/checker.rb +1 -1
  32. data/lib/rhack/scout.rb +342 -0
  33. data/lib/rhack/scout_squad.rb +98 -0
  34. data/lib/rhack/services.rb +1 -464
  35. data/lib/rhack/services/base.rb +59 -0
  36. data/lib/rhack/services/examples.rb +423 -0
  37. data/lib/rhack/version.rb +3 -0
  38. data/lib/rhack_in.rb +3 -2
  39. data/rhack.gemspec +28 -0
  40. metadata +104 -85
  41. data/.gemtest +0 -0
  42. data/Gemfile.lock +0 -23
  43. data/Manifest.txt +0 -60
  44. data/ext/curb/Makefile +0 -217
  45. data/lib/cache.rb +0 -44
  46. data/lib/curl-global.rb +0 -164
  47. data/lib/extensions/declarative.rb +0 -153
  48. data/lib/extensions/johnson.rb +0 -63
  49. data/lib/frame.rb +0 -848
  50. data/lib/init.rb +0 -49
  51. data/lib/rhack.yml.template +0 -19
  52. data/lib/scout.rb +0 -589
  53. data/lib/words.rb +0 -25
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+
3
+ class Class
4
+ def alias_constant(name)
5
+ class_eval %{
6
+ def #{name}(key=nil)
7
+ key ? self.class::#{name}[key] : self.class::#{name}
8
+ end}
9
+ end unless defined? alias_constant
10
+ end
11
+
12
+ # Вызовы сервисов всегда ждут и возвращают обработанный ответ, если вызвваны без блока.
13
+ # В противном случае используется событийная модель и обработанный ответ передаётся в блок.
14
+ module RHACK
15
+
16
+ class Service
17
+ attr_accessor :f
18
+ alias_constant :URI
19
+
20
+ def initialize(service, frame=nil, *args)
21
+ @service = service
22
+ # first argument should be a string so that frame won't be static
23
+ @f = frame || Frame(URI(service) || URI(:login), *args)
24
+ end
25
+
26
+ # Usable only for sync requests
27
+ def login(*)
28
+ Curl.run
29
+ @f[0].cookies.clear
30
+ json, wait, @f.opts[:json], @f.opts[:wait] = @f.opts[:json], @f.opts[:wait], false, true
31
+ yield @f.get(URI :login)
32
+ @f.get(URI :home) if URI :home
33
+ @f.opts[:json], @f.opts[:wait] = json, wait
34
+ @f.copy_cookies!
35
+ end
36
+
37
+ def go(*args, &block)
38
+ __send__(@service, *args, &block)
39
+ rescue
40
+ L < $!
41
+ Curl.reload
42
+ end
43
+
44
+ def scrape!(page)
45
+ __send__(:"scrape_#{@service}", page)
46
+ if url = next_url(page)
47
+ @f.get(url) {|next_page| scrape!(next_page)}
48
+ end
49
+ end
50
+
51
+ def inspect
52
+ "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
53
+ end
54
+
55
+ end
56
+
57
+ class ServiceError < Exception; end
58
+
59
+ end
@@ -0,0 +1,423 @@
1
+ # encoding: utf-8
2
+ require 'rhack/services'
3
+
4
+ module RHACK
5
+
6
+ class Yandex < Service
7
+ __init__
8
+
9
+ unless defined? IGNORE_UPPERCASE
10
+ URI = {
11
+ :speller => "http://speller.yandex.net/services/spellservice.json/checkText",
12
+ :search => "http://www.yandex.ru/yandsearch?lr=213&%s",
13
+ :weather => "http://pogoda.yandex.ru/%d/details/"
14
+ }
15
+
16
+ IGNORE_UPPERCASE = 1
17
+ IGNORE_DIGITS = 2
18
+ IGNORE_URLS = 4
19
+ FIND_REPEAT_WORDS = 8
20
+ IGNORE_LATIN = 16
21
+ NO_SUGGEST = 32
22
+ FLAG_LATIN = 128
23
+ end
24
+
25
+ def initialize(service=:search, frame=nil)
26
+ ua = RHACK.useragents.rand
27
+ ua << " YB/4.2.0" if !ua["YB"]
28
+ super service, frame, nil, ua, :ck => {
29
+ "yandexuid"=>"3644005621268702222",
30
+ "t"=>"p"
31
+ }, :eval => false
32
+ end
33
+
34
+ def search(text, opts={}, &block)
35
+ uri = URI.search % urlencode(opts.merge(:text=>text))
36
+ @f.run(uri, :proc_result => block) {|page| process page}
37
+ end
38
+
39
+ def process page
40
+ page.find('.p1/.cr').map {|n| [n.at('.cs').href, n.at('.cs').text.strip, (n.at('.kk') || n.at('.k7/div')).text.strip]} if page.html.b
41
+ end
42
+
43
+ def speller(text, opts=23)
44
+ text = text.split_to_lines(10000)
45
+ i = 0
46
+ @f.run({"text" => text[i], "options" => opts}, URI.speller, :json => true) {|pg|
47
+ yield pg.hash
48
+ text[i+=1] && @f.get({"text" => text[i], "options" => opts}, URI.speller, :json => true)
49
+ }
50
+ end
51
+
52
+ def fix_content(doc, opts={})
53
+ nodes = doc.root.text_nodes
54
+ speller(nodes*". ", opts) {|json|
55
+ fix = {}
56
+ json.each {|h| fix[h.word] = h.s[0] if h.s[0]}
57
+ nodes.each {|n|
58
+ fixed = false
59
+ text = n.text
60
+ fix.each {|k, v| fixed = true if text.gsub!(/\b#{k}\b/, v)}
61
+ n.text(text) if fixed
62
+ }
63
+ }
64
+ Curl.wait
65
+ end
66
+
67
+ def weather city=27612, day=nil, &block
68
+ if city.is String
69
+ city = CitiesCodes[city] if defined? CitiesCodes
70
+ raise ServiceError, "can't get weather info for #{city.inspect}:#{city.class}" if !city.is(Fixnum)
71
+ end
72
+ @f.get(URI.weather%city, :proc_result => block) {|pg|
73
+ ary = pg.find('//.b-forecast-details/tbody/tr{_["class"] =~ /t\d/}').map {|e|
74
+ "#{e.at('.date') ? e.at('.date').text+":\n" : ''} - #{e.at('.t').text} - #{e.at('.data').text} - #{e.at('.wind/img').alt} #{e.at('.wind').text} м/с"
75
+ }
76
+ ary = ary[0..11].div(4) + ary[12..-1].div(2)
77
+ day ? ary[day] : ary
78
+ }#.res
79
+ end
80
+
81
+ def self.weather(*args, &block) new(:weather).go *args, &block end
82
+ def self.search(*args, &block) new.go *args, &block end
83
+
84
+ end
85
+
86
+ class Google < Service
87
+ __init__
88
+ URI = {
89
+ :translate => "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s%%7C%s",
90
+ :search => "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&hl=ru&q=%s",
91
+ :detect => "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s"
92
+ }
93
+
94
+ Shortcuts = Hash[*%w{
95
+ v ru.wikipedia.org в ru.wikipedia.org вики en.wikipedia.org
96
+ w en.wikipedia.org ев en.wikipedia.org wiki en.wikipedia.org
97
+ lm lurkmore.ru лм lurkmore.ru
98
+ wa world-art.ru ва world-art.ru
99
+ ad anidb.info ад anidb.info
100
+ ed encyclopediadramatica.com ед encyclopediadramatica.com
101
+ }]
102
+
103
+ Langs = *%w{
104
+ af sq am ar hy az eu be bn bh bg my ca chr zh zh-CN zh-TW hr cs da dv nl en eo et tl fi fr gl ka de el gn gu iw hi hu is id iu it ja kn kk km ko ku ky lo lv lt mk ms ml mt mr mn ne no or ps fa pl pt-PT pa ro ru sa sr sd si sk sl es sw sv tg ta tl te th bo tr uk ur uz ug vi
105
+ }
106
+
107
+ def initialize(service=:search, frame=nil)
108
+ super service, frame, :json => true
109
+ end
110
+
111
+ def search(text, opts={}, &block)
112
+ text = "site:#{opts[:site]} #{text}" if opts[:site]
113
+ uri = URI.search % CGI.escape(text)
114
+ @f.run(uri, :proc_result => block) {|page|
115
+ if data = page.hash.responseData.b
116
+ data.results.map! {|res| [res.unescapedUrl, res.titleNoFormatting, res.content]}
117
+ end
118
+ }#.res
119
+ end
120
+
121
+ def detect(text, wait=!block_given?, &block)
122
+ text = text.is(String) ? text[0...600] : text[0]
123
+ uri = URI[:detect] % CGI.escape(text)
124
+ @f.run(uri, :proc_result => block, :wait => wait) {|page|
125
+ (data = page.hash.responseData.b) && data.language
126
+ }
127
+ end
128
+
129
+ def translate(text, to, from=nil, &block)
130
+ text = text.split_to_blocks(600, :syntax) if !text.is Array
131
+ if !from
132
+ if block_given?
133
+ return detect(text) {|from| yield translate(text, to, from)}
134
+ else
135
+ return translate(text, to, detect(text).res)
136
+ end
137
+ end
138
+ res = []
139
+ i = 0
140
+ text.each_with_index {|b, j|
141
+ @f.run(URI.translate%[CGI.escape(text[j]), from, to], :proc_result => block, :wait => false) {|page|
142
+ res[j] = (data = page.hash.responseData.b and data.translatedText)
143
+ (i += 1) == text.size ? res*"\n" : :skip
144
+ }
145
+ }
146
+ Curl.wait if !block_given?
147
+ res*"\n"
148
+ end
149
+
150
+ def self.search(*args, &block) new.search *args, &block end
151
+ def self.tr(*args, &block) new(:translate).translate *args, &block end
152
+
153
+ end
154
+
155
+ class Infoseek < Service
156
+ URI = {:tr => 'http://translation.infoseek.co.jp/'}
157
+
158
+ def initialize frame=nil
159
+ super :tr, frame, :eval => false
160
+ end
161
+
162
+ def get_token page
163
+ @token = page.at('input[name=token]').value
164
+ end
165
+
166
+ def tr(text, direction=:from_ja, &block)
167
+ if @token
168
+ selector = direction.in([:from_ja, :from_jp, :to_en]) ? 1 : 0
169
+ body = {'ac' => 'Text', 'lng' => 'en', 'original' => text, 'selector' => selector, 'token' => @token, 'submit' => ' 翻訳'}
170
+ @f.run(body, :proc_result => block) {|page|
171
+ get_token page
172
+ page.at('textarea[name=converted]').text
173
+ }#.res
174
+ else
175
+ @f.run(:save_result => !block) {|page|
176
+ get_token page
177
+ tr text, direction, &block
178
+ }#.res
179
+ end
180
+ end
181
+
182
+ def self.tr(*args, &block) new.tr *args, &block end
183
+
184
+ end
185
+
186
+ class Youtube < Service
187
+ URI = {:info => "http://www.youtube.com/get_video_info?video_id=%s"}
188
+ attr_reader :track
189
+
190
+ def initialize frame=nil
191
+ super :dl, frame, :eval => false
192
+ @f.ss.each {|s| s.timeout=600}
193
+ require 'open3'
194
+ require 'mp3info'
195
+ end
196
+
197
+ def dl(id, fd=nil, &block)
198
+ if block
199
+ info(id) {|lnk| __dl(lnk, fd, block)}
200
+ else __dl(info(id), fd)
201
+ end
202
+ end
203
+
204
+ def dlmp3(id, mp3=nil)
205
+ dl(id) {|flv|
206
+ if !File.file?(df = mp3||flv.sub(/.flv$/, '.mp3'))
207
+ Open3.popen3("ffmpeg -i '#{flv}' -ab 262144 -ar 44100 '#{df}'") {|i,o,e|
208
+ if $verbose
209
+ t = e.gets2 and t and t[/^size=/] and print t until e.eof?
210
+ puts "\n#{t}"
211
+ end
212
+ }
213
+ end
214
+ Mp3Info.open(df, :encoding=>'utf-8') {|mp3|
215
+ mp3.tag2.TPE1, mp3.tag2.TIT2 = @track[1..2]
216
+ } }
217
+ end
218
+
219
+ def self.dl(id) new.dl(id) end
220
+ def self.dlmp3(id) new.dlmp3(id) end
221
+
222
+ private
223
+ def info(id, &block)
224
+ @f.run(URI.info%[id[/\/watch/] ?
225
+ id.parseuri.query.v :
226
+ File.basename(id).till(/[&?]/)],:hash=>true,:proc_result=>block){|p|
227
+ res = p.hash
228
+ @track = [res.author, res.creator, res.title]
229
+ CGI.unescape(res.fmt_url_map).split(/,\d+\|/)[0].after('|')
230
+ }#.res
231
+ end
232
+
233
+ def __dl(lnk,fd,block=nil)
234
+ @f.dl(lnk, fd||"files/youtube/#{@track*' - '}.flv", :auto, 5, &block)
235
+ end
236
+
237
+ end
238
+
239
+ class VK < Service
240
+ attr_reader :links, :open_links
241
+ URI = {
242
+ :people => "http://vkontakte.ru/gsearch.php?from=people&ajax=1",
243
+ :login => "http://vkontakte.ru/index.php",
244
+ :id => "http://vkontakte.ru%s"
245
+ }
246
+ DefaultParams = Hash[*%w[
247
+ c[city] 1
248
+ c[country] 1
249
+ c[noiphone] 1
250
+ c[photo] 1
251
+ c[section] people
252
+ c[sex] 1
253
+ c[status] 6
254
+ ]]
255
+ @@reloadable = ReloadablePage {
256
+ if !@title and !@hash
257
+ L << self
258
+ L << @doc
259
+ end
260
+ if @hash == false or @hash.nil? && (!@title or @title["Ошибка"])
261
+ L.info "@title caller.size", binding
262
+ sleep 2
263
+ end
264
+ }
265
+ def self.com; new end
266
+
267
+ class NotFoundError < Exception; end
268
+
269
+ def initialize frame=nil
270
+ super :people, frame, {:cp => true, :relvl => 5, :eval => false}, 5
271
+ @links = []
272
+ @open_links = []
273
+ login
274
+ end
275
+
276
+ def login params={'email'=>'fshm@bk.ru', 'pass'=>'Riddick2', 'expire'=>nil}
277
+ super {|login_page|
278
+ login_page.submit('form', @f, params).submit('form', @f, {})
279
+ }
280
+ end
281
+
282
+ def get_links h, pagenum, &block
283
+ @f.run(h.merge('offset' => pagenum*20), URI[:people], :proc_result=>block, :result=>@@reloadable, :json => true) {|page|
284
+ ls = Page(page.hash.rows).get_links('.image/a')
285
+ @links.concat ls
286
+ ls
287
+ }
288
+ end
289
+
290
+ def people(q, *args, &block)
291
+ age, opts = args.get_opts [17..23]
292
+ h = DefaultParams.merge('c[q]' => q)
293
+ h.merge! Hash[opts.map {|k,v| ["c[#{k}]", v]}]
294
+ h['c[age_from]'], h['c[age_to]'] = age.first, age.last
295
+
296
+ @f.run(h, URI[:people], :proc_result => block, :json => true) {|page|
297
+ # ответом может быть невнятное требование залогиниться
298
+ sum = page.hash.summary.sub(/<span.+>/, '')
299
+ puts sum
300
+ found = sum[/\d+/]
301
+ if !found
302
+ L.warn sum
303
+ else
304
+ @links.concat Page(page.hash.rows).get_links('.image/a')
305
+ max_page = [50, (found.to_f/20).ceil].min
306
+ (1...max_page).each {|_|
307
+ sleep 0.5
308
+ get_links h, _, &block
309
+ }
310
+ end
311
+ }
312
+ end
313
+
314
+ def get_people q, *opts
315
+ @links = []
316
+ @open_links = []
317
+ people q, *opts
318
+ get_pages q
319
+ end
320
+
321
+ def get_pages q=nil
322
+ @links.uniq.each {|id| get_page id, q; sleep 1.5}
323
+ end
324
+
325
+ def get_page id, q=nil
326
+ q = q ? q.ci.to_re : // unless q.is Regexp
327
+ id_num = id[/\d+/].to_i
328
+ @f.get(id, :result=>@@reloadable) {|p|
329
+ data = p.find('.profileTable//.dataWrap').to_a.b
330
+ if data
331
+ L.debug "!p.at('.basicInfo//.alertmsg') data.contents.join('')[/(\\d\\s*){6,}/] data.contents.join('')[q]", binding
332
+ end
333
+ if data = p.find('.profileTable//.dataWrap').b and
334
+ contents = data.to_a.contents.join.b and contents[q]
335
+ digits = contents[/(\d *){6,9}/]
336
+ bot = (digits and digits[/^\d{7}$/] and id_num.between 852e5, 893e5)
337
+ if !bot and !p.at('.basicInfo//.alertmsg') || digits
338
+ L << "added vk.com#{id}"
339
+ @open_links << id
340
+ elsif bot
341
+ L << "bot #{id_num} detected"
342
+ else tick!
343
+ end
344
+ else tick!
345
+ end
346
+ }
347
+ end
348
+
349
+ end
350
+
351
+ class Mamba < Service
352
+ attr_reader :links, :open_links
353
+ @@login, @@pass = %w{AnotherOneUser AyaHirano8}
354
+ URI = {
355
+ :people => "http://mamba.ru/?",
356
+ :login => "http://mamba.ru/tips/?tip=Login",
357
+ :id => "http://vk.com%s"
358
+ }
359
+ DefaultParams = Hash[*%w[
360
+ c[city] 1
361
+ c[country] 1
362
+ c[noiphone] 1
363
+ c[photo] 1
364
+ c[section] people
365
+ c[sex] 1
366
+ c[status] 6
367
+ ]]
368
+
369
+ def initialize frame=nil
370
+ super :people, frame, {:cp=>{
371
+ "PREV_LOGIN"=>"anotheroneuser", "LOGIN"=>"anotheroneuser", "UID"=>"494809761", "LEVEL"=>"Low", "bar"=>"AShwjUz54RmYnfClOdlMYZylGUU90PUxeFkwlGixrP2ARHDs3A0EbDDxQTEksEm4LPT8FfzpfdiMME1omFz0tVhA5QjcsCgckaSQfIDxI", "s"=>"MJt2J3U9Pnk7Qvpie13lN7rrqmahTrAk", "SECRET"=>"adqH47"},
372
+ :eval=>false, :timeout=>5, :retry=>['TimeoutError']
373
+ }, 5
374
+ @links = []
375
+ @open_links = []
376
+ end
377
+
378
+ def login
379
+ @f.run(URI[:login]) {|p|
380
+ p.submit('.ap-t-c//form', @f, 'login'=>@@login, 'password'=>@@pass, 'level'=>nil) {
381
+ @f.each {|s| s.cookies.replace @f[0].cookies}
382
+ }
383
+ }
384
+ Curl.wait
385
+ end
386
+
387
+ def people
388
+ # TODO
389
+ # ... or not TODO?
390
+ end
391
+ # seems like NOT... LOL
392
+
393
+ end
394
+
395
+
396
+
397
+ module Downloaders
398
+
399
+ def letitbit(path, &block)
400
+ link = ''
401
+ frame = Frame 'letitbit.net', {:cp => true, :eval => nil}, 1
402
+ frame.run(path, :wait => !block) {|page1|
403
+ page1.submit('#ifree_form', frame) {|page2|
404
+ page2.submit('[action=/download4.php]', frame) {|page3|
405
+ page3.submit('[action=/download3.php]', frame) {|page4|
406
+ t = Thread.new {
407
+ sleep 60
408
+ frame.run({}, '/ajax/download3.php',
409
+ :headers => {"Referer" => "http://letitbit.net/download3.php"}
410
+ ) {|res|
411
+ link << res.html
412
+ block[link] if block
413
+ }
414
+ }
415
+ t.join if !block
416
+ }}}}
417
+ link
418
+ end
419
+
420
+ module_function :letitbit
421
+ end
422
+
423
+ end