rhack 0.4.1 → 1.0.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +22 -0
  2. data/Gemfile +2 -5
  3. data/LICENSE +19 -15
  4. data/README.md +66 -26
  5. data/Rakefile +42 -31
  6. data/config/cacert.pem +3895 -0
  7. data/config/rhack.yml.template +40 -0
  8. data/ext/curb-original/curb_config.h +3 -0
  9. data/ext/curb-original/curb_easy.c +3 -54
  10. data/ext/curb-original/curb_multi.c +69 -140
  11. data/ext/curb/curb_multi.c +1 -1
  12. data/lib/rhack.rb +82 -12
  13. data/lib/rhack/cookie.rb +49 -0
  14. data/lib/rhack/curl.rb +6 -0
  15. data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
  16. data/lib/rhack/curl/global.rb +175 -0
  17. data/lib/rhack/curl/itt.rb +11 -0
  18. data/lib/rhack/curl/multi.rb +37 -0
  19. data/lib/rhack/curl/post_field.rb +20 -0
  20. data/lib/rhack/curl/response.rb +91 -0
  21. data/lib/rhack/dl.rb +308 -0
  22. data/lib/rhack/frame.rb +316 -0
  23. data/lib/{extensions → rhack/js}/browser/env.js +0 -0
  24. data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
  25. data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
  26. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
  27. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
  28. data/lib/rhack/js/johnson.rb +71 -0
  29. data/lib/rhack/page.rb +263 -0
  30. data/lib/rhack/proxy.rb +3 -0
  31. data/lib/rhack/proxy/checker.rb +1 -1
  32. data/lib/rhack/scout.rb +342 -0
  33. data/lib/rhack/scout_squad.rb +98 -0
  34. data/lib/rhack/services.rb +1 -464
  35. data/lib/rhack/services/base.rb +59 -0
  36. data/lib/rhack/services/examples.rb +423 -0
  37. data/lib/rhack/version.rb +3 -0
  38. data/lib/rhack_in.rb +3 -2
  39. data/rhack.gemspec +28 -0
  40. metadata +104 -85
  41. data/.gemtest +0 -0
  42. data/Gemfile.lock +0 -23
  43. data/Manifest.txt +0 -60
  44. data/ext/curb/Makefile +0 -217
  45. data/lib/cache.rb +0 -44
  46. data/lib/curl-global.rb +0 -164
  47. data/lib/extensions/declarative.rb +0 -153
  48. data/lib/extensions/johnson.rb +0 -63
  49. data/lib/frame.rb +0 -848
  50. data/lib/init.rb +0 -49
  51. data/lib/rhack.yml.template +0 -19
  52. data/lib/scout.rb +0 -589
  53. data/lib/words.rb +0 -25
@@ -0,0 +1,98 @@
1
+ module RHACK
2
+
3
+ class PickError < IndexError
4
+ def initialize
5
+ super "can't get scout from empty squad" end
6
+ end
7
+
8
+ class ScoutSquad < Array
9
+ __init__
10
+
11
+ def initialize(*args)
12
+ raise ArgumentError, "can't create empty squad" if (num = args.pop) < 1
13
+ proxies = nil
14
+ super []
15
+ if args[0].is Scout
16
+ s = args[0]
17
+ else
18
+ if !args[0].is String
19
+ args.unshift ''
20
+ if (opts = args[-1]).is Hash and (opts[:cp] || opts[:ck]).is Hash
21
+ L.warn "it's useless to setup cookies for untargeted squad!"
22
+ end
23
+ end
24
+ if args[1] and args[1][0].is Array
25
+ proxies = args[1]
26
+ args[1] = proxies.shift
27
+ end
28
+ self[0] = s = Scout(*args)
29
+ num -=1
30
+ end
31
+ num.times {|i|
32
+ self << Scout(s.root+s.path, (proxies ? proxies[i] : s.proxy), s.ua, s.refforge, :ck => s.main_cks, :raise => s.raise_err, :timeout => s.timeout, :retry => s.retry)
33
+ }
34
+ end
35
+
36
+ def update uri, forced=nil
37
+ each {|s| return L.warn "failed to update scout loaded? with url: #{s.http.url}" if s.loaded?} if !forced
38
+ each {|s| s.update uri}
39
+ end
40
+
41
+ def untargeted
42
+ first.root == 'http://'
43
+ end
44
+
45
+ def rand
46
+ raise PickError if !b
47
+ # to_a because reject returns object of this class
48
+ if scout = to_a.rand {|_|!_.loaded?}; scout
49
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
50
+ unless Curl.status
51
+ L.log "Curl must run in order to use ScoutSquad#rand; setting Carier Thread"
52
+ Curl.execute
53
+ #raise "Curl must run in order to use ScoutSquad#rand"
54
+ end
55
+ #Curl.wait
56
+ loop {
57
+ sleep 1
58
+ break if Curl.carier.reqs.size < size
59
+ }
60
+ self.rand
61
+ end
62
+ end
63
+
64
+ def next
65
+ raise PickError if !b
66
+ if scout = find {|_|!_.loaded?}; scout
67
+ else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
68
+ unless Curl.status
69
+ L.log "Curl must run in order to use ScoutSquad#next; setting Carier Thread"
70
+ Curl.execute :unless_allready
71
+ #raise "Curl must run in order to use ScoutSquad#next"
72
+ end
73
+ #Curl.wait
74
+ loop {
75
+ sleep 1
76
+ break if Curl.carier.reqs.size < size
77
+ }
78
+ self.next
79
+ end
80
+ end
81
+
82
+ def to_s
83
+ str = '<#ScoutSquad @ '
84
+ if b
85
+ if first.webproxy
86
+ str << "#{first.proxy} ~ "
87
+ elsif first.proxy
88
+ str << first.proxy*':'+" ~ "
89
+ end
90
+ str << "#{untargeted ? "no target" : first.root} "
91
+ end
92
+ str << "x#{size}>"
93
+ end
94
+ alias :inspect :to_s
95
+
96
+ end
97
+
98
+ end
@@ -1,465 +1,2 @@
1
- # encoding: utf-8
2
1
  require 'rhack'
3
-
4
- # Вызовы сервисов всегда ждут и возвращают обработанный ответ, если вызвваны без блока.
5
- # В противном случае используется событийная модель и обработанный ответ передаётся в блок.
6
- module HTTPAccessKit
7
-
8
- class Service
9
- attr_accessor :f
10
-
11
- def initialize(service, frame, *args)
12
- @service = service
13
- # first argument should be a string so that frame won't be static
14
- @f = frame || Frame(self.class::URI[service] || self.class::URI[:login], *args)
15
- end
16
-
17
- # Usable only for sync requests
18
- def login(*)
19
- Curl.run
20
- @f[0].cookies.clear
21
- json, wait, @f.opts[:json], @f.opts[:wait] = @f.opts[:json], @f.opts[:wait], false, true
22
- yield @f.get(self.class::URI[:login])
23
- @f.get(self.class::URI[:home]) if self.class::URI[:home]
24
- @f.opts[:json], @f.opts[:wait] = json, wait
25
- @f.copy_cookies!
26
- end
27
-
28
- def go(*args, &block)
29
- __send__(@service, *args, &block)
30
- rescue
31
- L < $!
32
- Curl.reload
33
- end
34
-
35
- def scrape!(page)
36
- __send__(:"scrape_#{@service}", page)
37
- if url = next_url(page)
38
- @f.get(url) {|next_page| scrape!(next_page)}
39
- end
40
- end
41
-
42
- def inspect
43
- "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
44
- end
45
-
46
- end
47
-
48
- class ServiceError < Exception; end
49
-
50
- class Yandex < Service
51
- __init__
52
-
53
- unless defined? IGNORE_UPPERCASE
54
- URI = {
55
- :speller => "http://speller.yandex.net/services/spellservice.json/checkText",
56
- :search => "http://www.yandex.ru/yandsearch?lr=213&%s",
57
- :weather => "http://pogoda.yandex.ru/%d/details/"
58
- }
59
-
60
- IGNORE_UPPERCASE = 1
61
- IGNORE_DIGITS = 2
62
- IGNORE_URLS = 4
63
- FIND_REPEAT_WORDS = 8
64
- IGNORE_LATIN = 16
65
- NO_SUGGEST = 32
66
- FLAG_LATIN = 128
67
- end
68
-
69
- def initialize(service=:search, frame=nil)
70
- ua = UAS.rand
71
- ua << " YB/4.2.0" if !ua["YB"]
72
- super service, frame, nil, ua, :ck => {
73
- "yandexuid"=>"3644005621268702222",
74
- "t"=>"p"
75
- }, :eval => false
76
- end
77
-
78
- def search(text, opts={}, &block)
79
- uri = URI.search % urlencode(opts.merge(:text=>text))
80
- @f.run(uri, :proc_result => block) {|page| process page}
81
- end
82
-
83
- def process page
84
- page.find('.p1/.cr').map {|n| [n.at('.cs').href, n.at('.cs').text.strip, (n.at('.kk') || n.at('.k7/div')).text.strip]} if page.html.b
85
- end
86
-
87
- def speller(text, opts=23)
88
- text = text.split_to_lines(10000)
89
- i = 0
90
- @f.run({"text" => text[i], "options" => opts}, URI.speller, :json => true) {|pg|
91
- yield pg.hash
92
- text[i+=1] && @f.get({"text" => text[i], "options" => opts}, URI.speller, :json => true)
93
- }
94
- end
95
-
96
- def fix_content(doc, opts={})
97
- nodes = doc.root.text_nodes
98
- speller(nodes*". ", opts) {|json|
99
- fix = {}
100
- json.each {|h| fix[h.word] = h.s[0] if h.s[0]}
101
- nodes.each {|n|
102
- fixed = false
103
- text = n.text
104
- fix.each {|k, v| fixed = true if text.gsub!(/\b#{k}\b/, v)}
105
- n.text(text) if fixed
106
- }
107
- }
108
- Curl.wait
109
- end
110
-
111
- def weather city=27612, day=nil, &block
112
- if city.is String
113
- city = CitiesCodes[city] if defined? CitiesCodes
114
- raise ServiceError, "can't get weather info for #{city.inspect}:#{city.class}" if !city.is(Fixnum)
115
- end
116
- @f.get(URI.weather%city, :proc_result => block) {|pg|
117
- ary = pg.find('//.b-forecast-details/tbody/tr{_["class"] =~ /t\d/}').map {|e|
118
- "#{e.at('.date') ? e.at('.date').text+":\n" : ''} - #{e.at('.t').text} - #{e.at('.data').text} - #{e.at('.wind/img').alt} #{e.at('.wind').text} м/с"
119
- }
120
- ary = ary[0..11].div(4) + ary[12..-1].div(2)
121
- day ? ary[day] : ary
122
- }#.res
123
- end
124
-
125
- def self.weather(*args, &block) new(:weather).go *args, &block end
126
- def self.search(*args, &block) new.go *args, &block end
127
-
128
- end
129
-
130
- class Google < Service
131
- __init__
132
- URI = {
133
- :translate => "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s%%7C%s",
134
- :search => "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&hl=ru&q=%s",
135
- :detect => "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s"
136
- }
137
-
138
- Shortcuts = Hash[*%w{
139
- v ru.wikipedia.org в ru.wikipedia.org вики en.wikipedia.org
140
- w en.wikipedia.org ев en.wikipedia.org wiki en.wikipedia.org
141
- lm lurkmore.ru лм lurkmore.ru
142
- wa world-art.ru ва world-art.ru
143
- ad anidb.info ад anidb.info
144
- ed encyclopediadramatica.com ед encyclopediadramatica.com
145
- }]
146
-
147
- Langs = *%w{
148
- af sq am ar hy az eu be bn bh bg my ca chr zh zh-CN zh-TW hr cs da dv nl en eo et tl fi fr gl ka de el gn gu iw hi hu is id iu it ja kn kk km ko ku ky lo lv lt mk ms ml mt mr mn ne no or ps fa pl pt-PT pa ro ru sa sr sd si sk sl es sw sv tg ta tl te th bo tr uk ur uz ug vi
149
- }
150
-
151
- def initialize(service=:search, frame=nil)
152
- super service, frame, :json => true
153
- end
154
-
155
- def search(text, opts={}, &block)
156
- text = "site:#{opts[:site]} #{text}" if opts[:site]
157
- uri = URI.search % CGI.escape(text)
158
- @f.run(uri, :proc_result => block) {|page|
159
- if data = page.hash.responseData.b
160
- data.results.map! {|res| [res.unescapedUrl, res.titleNoFormatting, res.content]}
161
- end
162
- }#.res
163
- end
164
-
165
- def detect(text, wait=!block_given?, &block)
166
- text = text.is(String) ? text[0...600] : text[0]
167
- uri = URI[:detect] % CGI.escape(text)
168
- @f.run(uri, :proc_result => block, :wait => wait) {|page|
169
- (data = page.hash.responseData.b) && data.language
170
- }
171
- end
172
-
173
- def translate(text, to, from=nil, &block)
174
- text = text.split_to_blocks(600, :syntax) if !text.is Array
175
- if !from
176
- if block_given?
177
- return detect(text) {|from| yield translate(text, to, from)}
178
- else
179
- return translate(text, to, detect(text).res)
180
- end
181
- end
182
- res = []
183
- i = 0
184
- text.each_with_index {|b, j|
185
- @f.run(URI.translate%[CGI.escape(text[j]), from, to], :proc_result => block, :wait => false) {|page|
186
- res[j] = (data = page.hash.responseData.b and data.translatedText)
187
- (i += 1) == text.size ? res*"\n" : :skip
188
- }
189
- }
190
- Curl.wait if !block_given?
191
- res*"\n"
192
- end
193
-
194
- def self.search(*args, &block) new.search *args, &block end
195
- def self.tr(*args, &block) new(:translate).translate *args, &block end
196
-
197
- end
198
-
199
- class Infoseek < Service
200
- URI = {:tr => 'http://translation.infoseek.co.jp/'}
201
-
202
- def initialize frame=nil
203
- super :tr, frame, :eval => false
204
- end
205
-
206
- def get_token page
207
- @token = page.at('input[name=token]').value
208
- end
209
-
210
- def tr(text, direction=:from_ja, &block)
211
- if @token
212
- selector = direction.in([:from_ja, :from_jp, :to_en]) ? 1 : 0
213
- body = {'ac' => 'Text', 'lng' => 'en', 'original' => text, 'selector' => selector, 'token' => @token, 'submit' => ' 翻訳'}
214
- @f.run(body, :proc_result => block) {|page|
215
- get_token page
216
- page.at('textarea[name=converted]').text
217
- }#.res
218
- else
219
- @f.run(:save_result => !block) {|page|
220
- get_token page
221
- tr text, direction, &block
222
- }#.res
223
- end
224
- end
225
-
226
- def self.tr(*args, &block) new.tr *args, &block end
227
-
228
- end
229
-
230
- class Youtube < Service
231
- URI = {:info => "http://www.youtube.com/get_video_info?video_id=%s"}
232
- attr_reader :track
233
-
234
- def initialize frame=nil
235
- super :dl, frame, :eval => false
236
- @f.ss.each {|s| s.timeout=600}
237
- require 'open3'
238
- require 'mp3info'
239
- end
240
-
241
- def dl(id, fd=nil, &block)
242
- if block
243
- info(id) {|lnk| __dl(lnk, fd, block)}
244
- else __dl(info(id), fd)
245
- end
246
- end
247
-
248
- def dlmp3(id, mp3=nil)
249
- dl(id) {|flv|
250
- if !File.file?(df = mp3||flv.sub(/.flv$/, '.mp3'))
251
- Open3.popen3("ffmpeg -i '#{flv}' -ab 262144 -ar 44100 '#{df}'") {|i,o,e|
252
- if $verbose
253
- t = e.gets2 and t and t[/^size=/] and print t until e.eof?
254
- puts "\n#{t}"
255
- end
256
- }
257
- end
258
- Mp3Info.open(df, :encoding=>'utf-8') {|mp3|
259
- mp3.tag2.TPE1, mp3.tag2.TIT2 = @track[1..2]
260
- } }
261
- end
262
-
263
- def self.dl(id) new.dl(id) end
264
- def self.dlmp3(id) new.dlmp3(id) end
265
-
266
- private
267
- def info(id, &block)
268
- @f.run(URI.info%[id[/\/watch/] ?
269
- id.parseuri.query.v :
270
- File.basename(id).till(/[&?]/)],:hash=>true,:proc_result=>block){|p|
271
- res = p.hash
272
- @track = [res.author, res.creator, res.title]
273
- CGI.unescape(res.fmt_url_map).split(/,\d+\|/)[0].after('|')
274
- }#.res
275
- end
276
-
277
- def __dl(lnk,fd,block=nil)
278
- @f.dl(lnk, fd||"files/youtube/#{@track*' - '}.flv", :auto, 5, &block)
279
- end
280
-
281
- end
282
-
283
- class VK < Service
284
- attr_reader :links, :open_links
285
- URI = {
286
- :people => "http://vkontakte.ru/gsearch.php?from=people&ajax=1",
287
- :login => "http://vkontakte.ru/index.php",
288
- :id => "http://vkontakte.ru%s"
289
- }
290
- DefaultParams = Hash[*%w[
291
- c[city] 1
292
- c[country] 1
293
- c[noiphone] 1
294
- c[photo] 1
295
- c[section] people
296
- c[sex] 1
297
- c[status] 6
298
- ]]
299
- @@reloadable = ReloadablePage {
300
- if !@title and !@hash
301
- L << self
302
- L << @doc
303
- end
304
- if @hash == false or @hash.nil? && (!@title or @title["Ошибка"])
305
- L.info "@title caller.size", binding
306
- sleep 2
307
- end
308
- }
309
- def self.com; new end
310
-
311
- class NotFoundError < Exception; end
312
-
313
- def initialize frame=nil
314
- super :people, frame, {:cp => true, :relvl => 5, :eval => false}, 5
315
- @links = []
316
- @open_links = []
317
- login
318
- end
319
-
320
- def login params={'email'=>'fshm@bk.ru', 'pass'=>'Riddick2', 'expire'=>nil}
321
- super {|login_page|
322
- login_page.submit('form', @f, params).submit('form', @f, {})
323
- }
324
- end
325
-
326
- def get_links h, pagenum, &block
327
- @f.run(h.merge('offset' => pagenum*20), URI[:people], :proc_result=>block, :result=>@@reloadable, :json => true) {|page|
328
- ls = Page(page.hash.rows).get_links('.image/a')
329
- @links.concat ls
330
- ls
331
- }
332
- end
333
-
334
- def people(q, *args, &block)
335
- age, opts = args.get_opts [17..23]
336
- h = DefaultParams.merge('c[q]' => q)
337
- h.merge! Hash[opts.map {|k,v| ["c[#{k}]", v]}]
338
- h['c[age_from]'], h['c[age_to]'] = age.first, age.last
339
-
340
- @f.run(h, URI[:people], :proc_result => block, :json => true) {|page|
341
- # ответом может быть невнятное требование залогиниться
342
- sum = page.hash.summary.sub(/<span.+>/, '')
343
- puts sum
344
- found = sum[/\d+/]
345
- if !found
346
- L.warn sum
347
- else
348
- @links.concat Page(page.hash.rows).get_links('.image/a')
349
- max_page = [50, (found.to_f/20).ceil].min
350
- (1...max_page).each {|_|
351
- sleep 0.5
352
- get_links h, _, &block
353
- }
354
- end
355
- }
356
- end
357
-
358
- def get_people q, *opts
359
- @links = []
360
- @open_links = []
361
- people q, *opts
362
- get_pages q
363
- end
364
-
365
- def get_pages q=nil
366
- @links.uniq.each {|id| get_page id, q; sleep 1.5}
367
- end
368
-
369
- def get_page id, q=nil
370
- q = q ? q.ci.to_re : // unless q.is Regexp
371
- id_num = id[/\d+/].to_i
372
- @f.get(id, :result=>@@reloadable) {|p|
373
- data = p.find('.profileTable//.dataWrap').to_a.b
374
- if data
375
- L.debug "!p.at('.basicInfo//.alertmsg') data.contents.join('')[/(\\d\\s*){6,}/] data.contents.join('')[q]", binding
376
- end
377
- if data = p.find('.profileTable//.dataWrap').b and
378
- contents = data.to_a.contents.join.b and contents[q]
379
- digits = contents[/(\d *){6,9}/]
380
- bot = (digits and digits[/^\d{7}$/] and id_num.between 852e5, 893e5)
381
- if !bot and !p.at('.basicInfo//.alertmsg') || digits
382
- L << "added vk.com#{id}"
383
- @open_links << id
384
- elsif bot
385
- L << "bot #{id_num} detected"
386
- else tick!
387
- end
388
- else tick!
389
- end
390
- }
391
- end
392
-
393
- end
394
-
395
- class Mamba < Service
396
- attr_reader :links, :open_links
397
- @@login, @@pass = %w{AnotherOneUser AyaHirano8}
398
- URI = {
399
- :people => "http://mamba.ru/?",
400
- :login => "http://mamba.ru/tips/?tip=Login",
401
- :id => "http://vk.com%s"
402
- }
403
- DefaultParams = Hash[*%w[
404
- c[city] 1
405
- c[country] 1
406
- c[noiphone] 1
407
- c[photo] 1
408
- c[section] people
409
- c[sex] 1
410
- c[status] 6
411
- ]]
412
-
413
- def initialize frame=nil
414
- super :people, frame, {:cp=>{
415
- "PREV_LOGIN"=>"anotheroneuser", "LOGIN"=>"anotheroneuser", "UID"=>"494809761", "LEVEL"=>"Low", "bar"=>"AShwjUz54RmYnfClOdlMYZylGUU90PUxeFkwlGixrP2ARHDs3A0EbDDxQTEksEm4LPT8FfzpfdiMME1omFz0tVhA5QjcsCgckaSQfIDxI", "s"=>"MJt2J3U9Pnk7Qvpie13lN7rrqmahTrAk", "SECRET"=>"adqH47"},
416
- :eval=>false, :timeout=>5, :retry=>['TimeoutError']
417
- }, 5
418
- @links = []
419
- @open_links = []
420
- end
421
-
422
- def login
423
- @f.run(URI[:login]) {|p|
424
- p.submit('.ap-t-c//form', @f, 'login'=>@@login, 'password'=>@@pass, 'level'=>nil) {
425
- @f.each {|s| s.cookies.replace @f[0].cookies}
426
- }
427
- }
428
- Curl.wait
429
- end
430
-
431
- def people
432
- # TODO
433
- # ... or not TODO?
434
- end
435
- # seems like NOT... LOL
436
-
437
- end
438
-
439
- module Downloaders
440
-
441
- def letitbit(path, &block)
442
- link = ''
443
- frame = Frame 'letitbit.net', {:cp => true, :eval => nil}, 1
444
- frame.run(path, :wait => !block) {|page1|
445
- page1.submit('#ifree_form', frame) {|page2|
446
- page2.submit('[action=/download4.php]', frame) {|page3|
447
- page3.submit('[action=/download3.php]', frame) {|page4|
448
- t = Thread.new {
449
- sleep 60
450
- frame.run({}, '/ajax/download3.php',
451
- :headers => {"Referer" => "http://letitbit.net/download3.php"}
452
- ) {|res|
453
- link << res.html
454
- block[link] if block
455
- }
456
- }
457
- t.join if !block
458
- }}}}
459
- link
460
- end
461
-
462
- module_function :letitbit
463
- end
464
-
465
- end
2
+ require 'rhack/services/base'