rhack 0.4.1 → 1.0.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +22 -0
  2. data/Gemfile +2 -5
  3. data/LICENSE +19 -15
  4. data/README.md +66 -26
  5. data/Rakefile +42 -31
  6. data/config/cacert.pem +3895 -0
  7. data/config/rhack.yml.template +40 -0
  8. data/ext/curb-original/curb_config.h +3 -0
  9. data/ext/curb-original/curb_easy.c +3 -54
  10. data/ext/curb-original/curb_multi.c +69 -140
  11. data/ext/curb/curb_multi.c +1 -1
  12. data/lib/rhack.rb +82 -12
  13. data/lib/rhack/cookie.rb +49 -0
  14. data/lib/rhack/curl.rb +6 -0
  15. data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
  16. data/lib/rhack/curl/global.rb +175 -0
  17. data/lib/rhack/curl/itt.rb +11 -0
  18. data/lib/rhack/curl/multi.rb +37 -0
  19. data/lib/rhack/curl/post_field.rb +20 -0
  20. data/lib/rhack/curl/response.rb +91 -0
  21. data/lib/rhack/dl.rb +308 -0
  22. data/lib/rhack/frame.rb +316 -0
  23. data/lib/{extensions → rhack/js}/browser/env.js +0 -0
  24. data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
  25. data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
  26. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
  27. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
  28. data/lib/rhack/js/johnson.rb +71 -0
  29. data/lib/rhack/page.rb +263 -0
  30. data/lib/rhack/proxy.rb +3 -0
  31. data/lib/rhack/proxy/checker.rb +1 -1
  32. data/lib/rhack/scout.rb +342 -0
  33. data/lib/rhack/scout_squad.rb +98 -0
  34. data/lib/rhack/services.rb +1 -464
  35. data/lib/rhack/services/base.rb +59 -0
  36. data/lib/rhack/services/examples.rb +423 -0
  37. data/lib/rhack/version.rb +3 -0
  38. data/lib/rhack_in.rb +3 -2
  39. data/rhack.gemspec +28 -0
  40. metadata +104 -85
  41. data/.gemtest +0 -0
  42. data/Gemfile.lock +0 -23
  43. data/Manifest.txt +0 -60
  44. data/ext/curb/Makefile +0 -217
  45. data/lib/cache.rb +0 -44
  46. data/lib/curl-global.rb +0 -164
  47. data/lib/extensions/declarative.rb +0 -153
  48. data/lib/extensions/johnson.rb +0 -63
  49. data/lib/frame.rb +0 -848
  50. data/lib/init.rb +0 -49
  51. data/lib/rhack.yml.template +0 -19
  52. data/lib/scout.rb +0 -589
  53. data/lib/words.rb +0 -25
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+
3
+ class Class
4
+ def alias_constant(name)
5
+ class_eval %{
6
+ def #{name}(key=nil)
7
+ key ? self.class::#{name}[key] : self.class::#{name}
8
+ end}
9
+ end unless defined? alias_constant
10
+ end
11
+
12
+ # Вызовы сервисов всегда ждут и возвращают обработанный ответ, если вызвваны без блока.
13
+ # В противном случае используется событийная модель и обработанный ответ передаётся в блок.
14
+ module RHACK
15
+
16
+ class Service
17
+ attr_accessor :f
18
+ alias_constant :URI
19
+
20
+ def initialize(service, frame=nil, *args)
21
+ @service = service
22
+ # first argument should be a string so that frame won't be static
23
+ @f = frame || Frame(URI(service) || URI(:login), *args)
24
+ end
25
+
26
+ # Usable only for sync requests
27
+ def login(*)
28
+ Curl.run
29
+ @f[0].cookies.clear
30
+ json, wait, @f.opts[:json], @f.opts[:wait] = @f.opts[:json], @f.opts[:wait], false, true
31
+ yield @f.get(URI :login)
32
+ @f.get(URI :home) if URI :home
33
+ @f.opts[:json], @f.opts[:wait] = json, wait
34
+ @f.copy_cookies!
35
+ end
36
+
37
+ def go(*args, &block)
38
+ __send__(@service, *args, &block)
39
+ rescue
40
+ L < $!
41
+ Curl.reload
42
+ end
43
+
44
+ def scrape!(page)
45
+ __send__(:"scrape_#{@service}", page)
46
+ if url = next_url(page)
47
+ @f.get(url) {|next_page| scrape!(next_page)}
48
+ end
49
+ end
50
+
51
+ def inspect
52
+ "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
53
+ end
54
+
55
+ end
56
+
57
+ class ServiceError < Exception; end
58
+
59
+ end
@@ -0,0 +1,423 @@
1
+ # encoding: utf-8
2
+ require 'rhack/services'
3
+
4
+ module RHACK
5
+
6
+ class Yandex < Service
7
+ __init__
8
+
9
+ unless defined? IGNORE_UPPERCASE
10
+ URI = {
11
+ :speller => "http://speller.yandex.net/services/spellservice.json/checkText",
12
+ :search => "http://www.yandex.ru/yandsearch?lr=213&%s",
13
+ :weather => "http://pogoda.yandex.ru/%d/details/"
14
+ }
15
+
16
+ IGNORE_UPPERCASE = 1
17
+ IGNORE_DIGITS = 2
18
+ IGNORE_URLS = 4
19
+ FIND_REPEAT_WORDS = 8
20
+ IGNORE_LATIN = 16
21
+ NO_SUGGEST = 32
22
+ FLAG_LATIN = 128
23
+ end
24
+
25
+ def initialize(service=:search, frame=nil)
26
+ ua = RHACK.useragents.rand
27
+ ua << " YB/4.2.0" if !ua["YB"]
28
+ super service, frame, nil, ua, :ck => {
29
+ "yandexuid"=>"3644005621268702222",
30
+ "t"=>"p"
31
+ }, :eval => false
32
+ end
33
+
34
+ def search(text, opts={}, &block)
35
+ uri = URI.search % urlencode(opts.merge(:text=>text))
36
+ @f.run(uri, :proc_result => block) {|page| process page}
37
+ end
38
+
39
+ def process page
40
+ page.find('.p1/.cr').map {|n| [n.at('.cs').href, n.at('.cs').text.strip, (n.at('.kk') || n.at('.k7/div')).text.strip]} if page.html.b
41
+ end
42
+
43
+ def speller(text, opts=23)
44
+ text = text.split_to_lines(10000)
45
+ i = 0
46
+ @f.run({"text" => text[i], "options" => opts}, URI.speller, :json => true) {|pg|
47
+ yield pg.hash
48
+ text[i+=1] && @f.get({"text" => text[i], "options" => opts}, URI.speller, :json => true)
49
+ }
50
+ end
51
+
52
+ def fix_content(doc, opts={})
53
+ nodes = doc.root.text_nodes
54
+ speller(nodes*". ", opts) {|json|
55
+ fix = {}
56
+ json.each {|h| fix[h.word] = h.s[0] if h.s[0]}
57
+ nodes.each {|n|
58
+ fixed = false
59
+ text = n.text
60
+ fix.each {|k, v| fixed = true if text.gsub!(/\b#{k}\b/, v)}
61
+ n.text(text) if fixed
62
+ }
63
+ }
64
+ Curl.wait
65
+ end
66
+
67
+ def weather city=27612, day=nil, &block
68
+ if city.is String
69
+ city = CitiesCodes[city] if defined? CitiesCodes
70
+ raise ServiceError, "can't get weather info for #{city.inspect}:#{city.class}" if !city.is(Fixnum)
71
+ end
72
+ @f.get(URI.weather%city, :proc_result => block) {|pg|
73
+ ary = pg.find('//.b-forecast-details/tbody/tr{_["class"] =~ /t\d/}').map {|e|
74
+ "#{e.at('.date') ? e.at('.date').text+":\n" : ''} - #{e.at('.t').text} - #{e.at('.data').text} - #{e.at('.wind/img').alt} #{e.at('.wind').text} м/с"
75
+ }
76
+ ary = ary[0..11].div(4) + ary[12..-1].div(2)
77
+ day ? ary[day] : ary
78
+ }#.res
79
+ end
80
+
81
+ def self.weather(*args, &block) new(:weather).go *args, &block end
82
+ def self.search(*args, &block) new.go *args, &block end
83
+
84
+ end
85
+
86
+ class Google < Service
87
+ __init__
88
+ URI = {
89
+ :translate => "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s%%7C%s",
90
+ :search => "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&hl=ru&q=%s",
91
+ :detect => "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s"
92
+ }
93
+
94
+ Shortcuts = Hash[*%w{
95
+ v ru.wikipedia.org в ru.wikipedia.org вики en.wikipedia.org
96
+ w en.wikipedia.org ев en.wikipedia.org wiki en.wikipedia.org
97
+ lm lurkmore.ru лм lurkmore.ru
98
+ wa world-art.ru ва world-art.ru
99
+ ad anidb.info ад anidb.info
100
+ ed encyclopediadramatica.com ед encyclopediadramatica.com
101
+ }]
102
+
103
+ Langs = *%w{
104
+ af sq am ar hy az eu be bn bh bg my ca chr zh zh-CN zh-TW hr cs da dv nl en eo et tl fi fr gl ka de el gn gu iw hi hu is id iu it ja kn kk km ko ku ky lo lv lt mk ms ml mt mr mn ne no or ps fa pl pt-PT pa ro ru sa sr sd si sk sl es sw sv tg ta tl te th bo tr uk ur uz ug vi
105
+ }
106
+
107
+ def initialize(service=:search, frame=nil)
108
+ super service, frame, :json => true
109
+ end
110
+
111
+ def search(text, opts={}, &block)
112
+ text = "site:#{opts[:site]} #{text}" if opts[:site]
113
+ uri = URI.search % CGI.escape(text)
114
+ @f.run(uri, :proc_result => block) {|page|
115
+ if data = page.hash.responseData.b
116
+ data.results.map! {|res| [res.unescapedUrl, res.titleNoFormatting, res.content]}
117
+ end
118
+ }#.res
119
+ end
120
+
121
+ def detect(text, wait=!block_given?, &block)
122
+ text = text.is(String) ? text[0...600] : text[0]
123
+ uri = URI[:detect] % CGI.escape(text)
124
+ @f.run(uri, :proc_result => block, :wait => wait) {|page|
125
+ (data = page.hash.responseData.b) && data.language
126
+ }
127
+ end
128
+
129
+ def translate(text, to, from=nil, &block)
130
+ text = text.split_to_blocks(600, :syntax) if !text.is Array
131
+ if !from
132
+ if block_given?
133
+ return detect(text) {|from| yield translate(text, to, from)}
134
+ else
135
+ return translate(text, to, detect(text).res)
136
+ end
137
+ end
138
+ res = []
139
+ i = 0
140
+ text.each_with_index {|b, j|
141
+ @f.run(URI.translate%[CGI.escape(text[j]), from, to], :proc_result => block, :wait => false) {|page|
142
+ res[j] = (data = page.hash.responseData.b and data.translatedText)
143
+ (i += 1) == text.size ? res*"\n" : :skip
144
+ }
145
+ }
146
+ Curl.wait if !block_given?
147
+ res*"\n"
148
+ end
149
+
150
+ def self.search(*args, &block) new.search *args, &block end
151
+ def self.tr(*args, &block) new(:translate).translate *args, &block end
152
+
153
+ end
154
+
155
+ class Infoseek < Service
156
+ URI = {:tr => 'http://translation.infoseek.co.jp/'}
157
+
158
+ def initialize frame=nil
159
+ super :tr, frame, :eval => false
160
+ end
161
+
162
+ def get_token page
163
+ @token = page.at('input[name=token]').value
164
+ end
165
+
166
+ def tr(text, direction=:from_ja, &block)
167
+ if @token
168
+ selector = direction.in([:from_ja, :from_jp, :to_en]) ? 1 : 0
169
+ body = {'ac' => 'Text', 'lng' => 'en', 'original' => text, 'selector' => selector, 'token' => @token, 'submit' => ' 翻訳'}
170
+ @f.run(body, :proc_result => block) {|page|
171
+ get_token page
172
+ page.at('textarea[name=converted]').text
173
+ }#.res
174
+ else
175
+ @f.run(:save_result => !block) {|page|
176
+ get_token page
177
+ tr text, direction, &block
178
+ }#.res
179
+ end
180
+ end
181
+
182
+ def self.tr(*args, &block) new.tr *args, &block end
183
+
184
+ end
185
+
186
+ class Youtube < Service
187
+ URI = {:info => "http://www.youtube.com/get_video_info?video_id=%s"}
188
+ attr_reader :track
189
+
190
+ def initialize frame=nil
191
+ super :dl, frame, :eval => false
192
+ @f.ss.each {|s| s.timeout=600}
193
+ require 'open3'
194
+ require 'mp3info'
195
+ end
196
+
197
+ def dl(id, fd=nil, &block)
198
+ if block
199
+ info(id) {|lnk| __dl(lnk, fd, block)}
200
+ else __dl(info(id), fd)
201
+ end
202
+ end
203
+
204
+ def dlmp3(id, mp3=nil)
205
+ dl(id) {|flv|
206
+ if !File.file?(df = mp3||flv.sub(/.flv$/, '.mp3'))
207
+ Open3.popen3("ffmpeg -i '#{flv}' -ab 262144 -ar 44100 '#{df}'") {|i,o,e|
208
+ if $verbose
209
+ t = e.gets2 and t and t[/^size=/] and print t until e.eof?
210
+ puts "\n#{t}"
211
+ end
212
+ }
213
+ end
214
+ Mp3Info.open(df, :encoding=>'utf-8') {|mp3|
215
+ mp3.tag2.TPE1, mp3.tag2.TIT2 = @track[1..2]
216
+ } }
217
+ end
218
+
219
+ def self.dl(id) new.dl(id) end
220
+ def self.dlmp3(id) new.dlmp3(id) end
221
+
222
+ private
223
+ def info(id, &block)
224
+ @f.run(URI.info%[id[/\/watch/] ?
225
+ id.parseuri.query.v :
226
+ File.basename(id).till(/[&?]/)],:hash=>true,:proc_result=>block){|p|
227
+ res = p.hash
228
+ @track = [res.author, res.creator, res.title]
229
+ CGI.unescape(res.fmt_url_map).split(/,\d+\|/)[0].after('|')
230
+ }#.res
231
+ end
232
+
233
+ def __dl(lnk,fd,block=nil)
234
+ @f.dl(lnk, fd||"files/youtube/#{@track*' - '}.flv", :auto, 5, &block)
235
+ end
236
+
237
+ end
238
+
239
+ class VK < Service
240
+ attr_reader :links, :open_links
241
+ URI = {
242
+ :people => "http://vkontakte.ru/gsearch.php?from=people&ajax=1",
243
+ :login => "http://vkontakte.ru/index.php",
244
+ :id => "http://vkontakte.ru%s"
245
+ }
246
+ DefaultParams = Hash[*%w[
247
+ c[city] 1
248
+ c[country] 1
249
+ c[noiphone] 1
250
+ c[photo] 1
251
+ c[section] people
252
+ c[sex] 1
253
+ c[status] 6
254
+ ]]
255
+ @@reloadable = ReloadablePage {
256
+ if !@title and !@hash
257
+ L << self
258
+ L << @doc
259
+ end
260
+ if @hash == false or @hash.nil? && (!@title or @title["Ошибка"])
261
+ L.info "@title caller.size", binding
262
+ sleep 2
263
+ end
264
+ }
265
+ def self.com; new end
266
+
267
+ class NotFoundError < Exception; end
268
+
269
+ def initialize frame=nil
270
+ super :people, frame, {:cp => true, :relvl => 5, :eval => false}, 5
271
+ @links = []
272
+ @open_links = []
273
+ login
274
+ end
275
+
276
+ def login params={'email'=>'fshm@bk.ru', 'pass'=>'Riddick2', 'expire'=>nil}
277
+ super {|login_page|
278
+ login_page.submit('form', @f, params).submit('form', @f, {})
279
+ }
280
+ end
281
+
282
+ def get_links h, pagenum, &block
283
+ @f.run(h.merge('offset' => pagenum*20), URI[:people], :proc_result=>block, :result=>@@reloadable, :json => true) {|page|
284
+ ls = Page(page.hash.rows).get_links('.image/a')
285
+ @links.concat ls
286
+ ls
287
+ }
288
+ end
289
+
290
+ def people(q, *args, &block)
291
+ age, opts = args.get_opts [17..23]
292
+ h = DefaultParams.merge('c[q]' => q)
293
+ h.merge! Hash[opts.map {|k,v| ["c[#{k}]", v]}]
294
+ h['c[age_from]'], h['c[age_to]'] = age.first, age.last
295
+
296
+ @f.run(h, URI[:people], :proc_result => block, :json => true) {|page|
297
+ # ответом может быть невнятное требование залогиниться
298
+ sum = page.hash.summary.sub(/<span.+>/, '')
299
+ puts sum
300
+ found = sum[/\d+/]
301
+ if !found
302
+ L.warn sum
303
+ else
304
+ @links.concat Page(page.hash.rows).get_links('.image/a')
305
+ max_page = [50, (found.to_f/20).ceil].min
306
+ (1...max_page).each {|_|
307
+ sleep 0.5
308
+ get_links h, _, &block
309
+ }
310
+ end
311
+ }
312
+ end
313
+
314
+ def get_people q, *opts
315
+ @links = []
316
+ @open_links = []
317
+ people q, *opts
318
+ get_pages q
319
+ end
320
+
321
+ def get_pages q=nil
322
+ @links.uniq.each {|id| get_page id, q; sleep 1.5}
323
+ end
324
+
325
+ def get_page id, q=nil
326
+ q = q ? q.ci.to_re : // unless q.is Regexp
327
+ id_num = id[/\d+/].to_i
328
+ @f.get(id, :result=>@@reloadable) {|p|
329
+ data = p.find('.profileTable//.dataWrap').to_a.b
330
+ if data
331
+ L.debug "!p.at('.basicInfo//.alertmsg') data.contents.join('')[/(\\d\\s*){6,}/] data.contents.join('')[q]", binding
332
+ end
333
+ if data = p.find('.profileTable//.dataWrap').b and
334
+ contents = data.to_a.contents.join.b and contents[q]
335
+ digits = contents[/(\d *){6,9}/]
336
+ bot = (digits and digits[/^\d{7}$/] and id_num.between 852e5, 893e5)
337
+ if !bot and !p.at('.basicInfo//.alertmsg') || digits
338
+ L << "added vk.com#{id}"
339
+ @open_links << id
340
+ elsif bot
341
+ L << "bot #{id_num} detected"
342
+ else tick!
343
+ end
344
+ else tick!
345
+ end
346
+ }
347
+ end
348
+
349
+ end
350
+
351
+ class Mamba < Service
352
+ attr_reader :links, :open_links
353
+ @@login, @@pass = %w{AnotherOneUser AyaHirano8}
354
+ URI = {
355
+ :people => "http://mamba.ru/?",
356
+ :login => "http://mamba.ru/tips/?tip=Login",
357
+ :id => "http://vk.com%s"
358
+ }
359
+ DefaultParams = Hash[*%w[
360
+ c[city] 1
361
+ c[country] 1
362
+ c[noiphone] 1
363
+ c[photo] 1
364
+ c[section] people
365
+ c[sex] 1
366
+ c[status] 6
367
+ ]]
368
+
369
+ def initialize frame=nil
370
+ super :people, frame, {:cp=>{
371
+ "PREV_LOGIN"=>"anotheroneuser", "LOGIN"=>"anotheroneuser", "UID"=>"494809761", "LEVEL"=>"Low", "bar"=>"AShwjUz54RmYnfClOdlMYZylGUU90PUxeFkwlGixrP2ARHDs3A0EbDDxQTEksEm4LPT8FfzpfdiMME1omFz0tVhA5QjcsCgckaSQfIDxI", "s"=>"MJt2J3U9Pnk7Qvpie13lN7rrqmahTrAk", "SECRET"=>"adqH47"},
372
+ :eval=>false, :timeout=>5, :retry=>['TimeoutError']
373
+ }, 5
374
+ @links = []
375
+ @open_links = []
376
+ end
377
+
378
+ def login
379
+ @f.run(URI[:login]) {|p|
380
+ p.submit('.ap-t-c//form', @f, 'login'=>@@login, 'password'=>@@pass, 'level'=>nil) {
381
+ @f.each {|s| s.cookies.replace @f[0].cookies}
382
+ }
383
+ }
384
+ Curl.wait
385
+ end
386
+
387
+ def people
388
+ # TODO
389
+ # ... or not TODO?
390
+ end
391
+ # seems like NOT... LOL
392
+
393
+ end
394
+
395
+
396
+
397
+ module Downloaders
398
+
399
+ def letitbit(path, &block)
400
+ link = ''
401
+ frame = Frame 'letitbit.net', {:cp => true, :eval => nil}, 1
402
+ frame.run(path, :wait => !block) {|page1|
403
+ page1.submit('#ifree_form', frame) {|page2|
404
+ page2.submit('[action=/download4.php]', frame) {|page3|
405
+ page3.submit('[action=/download3.php]', frame) {|page4|
406
+ t = Thread.new {
407
+ sleep 60
408
+ frame.run({}, '/ajax/download3.php',
409
+ :headers => {"Referer" => "http://letitbit.net/download3.php"}
410
+ ) {|res|
411
+ link << res.html
412
+ block[link] if block
413
+ }
414
+ }
415
+ t.join if !block
416
+ }}}}
417
+ link
418
+ end
419
+
420
+ module_function :letitbit
421
+ end
422
+
423
+ end