rhack 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gemtest +0 -0
  2. data/CURB-LICENSE +51 -0
  3. data/Gemfile +4 -0
  4. data/History.txt +4 -0
  5. data/LICENSE +51 -0
  6. data/License.txt +17 -0
  7. data/Manifest.txt +61 -0
  8. data/README.txt +12 -0
  9. data/Rakefile +34 -0
  10. data/ext/curb-original/curb.c +977 -0
  11. data/ext/curb-original/curb.h +52 -0
  12. data/ext/curb-original/curb_config.h +235 -0
  13. data/ext/curb-original/curb_easy.c +3455 -0
  14. data/ext/curb-original/curb_easy.h +90 -0
  15. data/ext/curb-original/curb_errors.c +647 -0
  16. data/ext/curb-original/curb_errors.h +129 -0
  17. data/ext/curb-original/curb_macros.h +159 -0
  18. data/ext/curb-original/curb_multi.c +704 -0
  19. data/ext/curb-original/curb_multi.h +26 -0
  20. data/ext/curb-original/curb_postfield.c +523 -0
  21. data/ext/curb-original/curb_postfield.h +40 -0
  22. data/ext/curb-original/curb_upload.c +80 -0
  23. data/ext/curb-original/curb_upload.h +30 -0
  24. data/ext/curb/Makefile +157 -0
  25. data/ext/curb/curb.c +977 -0
  26. data/ext/curb/curb.h +52 -0
  27. data/ext/curb/curb_config.h +235 -0
  28. data/ext/curb/curb_easy.c +3430 -0
  29. data/ext/curb/curb_easy.h +94 -0
  30. data/ext/curb/curb_errors.c +647 -0
  31. data/ext/curb/curb_errors.h +129 -0
  32. data/ext/curb/curb_macros.h +159 -0
  33. data/ext/curb/curb_multi.c +710 -0
  34. data/ext/curb/curb_multi.h +26 -0
  35. data/ext/curb/curb_postfield.c +523 -0
  36. data/ext/curb/curb_postfield.h +40 -0
  37. data/ext/curb/curb_upload.c +80 -0
  38. data/ext/curb/curb_upload.h +30 -0
  39. data/ext/curb/extconf.rb +399 -0
  40. data/lib/cache.rb +44 -0
  41. data/lib/curl-global.rb +151 -0
  42. data/lib/extensions/browser/env.js +697 -0
  43. data/lib/extensions/browser/jquery.js +7180 -0
  44. data/lib/extensions/browser/xmlsax.js +1564 -0
  45. data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
  46. data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
  47. data/lib/extensions/curb.rb +125 -0
  48. data/lib/extensions/declarative.rb +153 -0
  49. data/lib/extensions/johnson.rb +63 -0
  50. data/lib/frame.rb +766 -0
  51. data/lib/init.rb +36 -0
  52. data/lib/rhack.rb +16 -0
  53. data/lib/rhack.yml.template +19 -0
  54. data/lib/rhack/proxy/checker.rb +226 -0
  55. data/lib/rhack/proxy/list.rb +196 -0
  56. data/lib/rhack/services.rb +445 -0
  57. data/lib/rhack_in.rb +2 -0
  58. data/lib/scout.rb +591 -0
  59. data/lib/words.rb +37 -0
  60. data/test/test_frame.rb +107 -0
  61. data/test/test_rhack.rb +5 -0
  62. data/test/test_scout.rb +53 -0
  63. metadata +195 -0
data/lib/init.rb ADDED
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+ module HTTPAccessKit
3
+ include RMTools
4
+ CONFIG = YAML.load(read(%W(rhack.yml #{File.join(ENV['HOME'], 'rhack.yml')})) || '') || {}
5
+
6
+ UAS = if File.file?(uas = CONFIG['ua file'] || File.join(ENV['HOME'], 'ua.txt'))
7
+ IO.read(uas)/"\n"
8
+ else ['Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.1.8) Gecko/20100202 MRA 5.6 (build 03278) Firefox/3.5.8 (.NET CLR 3.5.30729)'] end
9
+
10
+ L = RMLogger.new(CONFIG.logger || {})
11
+
12
+ config = CONFIG.db || File.join(ENV['HOME'], 'db.yml')
13
+ begin
14
+ DB = ActiveRecord::Base.establish_connection_with config
15
+ rescue LoadError
16
+ DB = nil
17
+ end
18
+
19
+ cache = CONFIG.cache || {}
20
+ CacheDir = cache.dir || File.join(File.dirname(__FILE__), 'cache')
21
+ CacheTable = (cache.table || :rhack_cache).to_sym
22
+ CacheTTL = cache.clean ? eval(cache.clean).b : nil
23
+
24
+ RETRY = CONFIG['scout retry'] || {}
25
+
26
+ $uas ||= UAS
27
+ $Carier ||= Curl::Multi.new
28
+ $Carier.pipeline = true
29
+
30
+ def self.update
31
+ each_child {|c| c.class_eval "include HTTPAccessKit; extend HTTPAccessKit" if !c.in c.children}
32
+ end
33
+ end
34
+
35
+ module Curl; include HTTPAccessKit end
36
+ RHACK = HTTPAccessKit
data/lib/rhack.rb ADDED
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ $KCODE = 'UTF-8' if RUBY_VERSION < "1.9"
3
+ require 'rmtools_dev' unless defined? RMTools
4
+ here = File.expand_path File.dirname __FILE__
5
+ require File.join(here, 'curb_core.so')
6
+ require 'active_record'
7
+ Dir.glob("#{here}/extensions/**.rb") { |f| require f }
8
+
9
+ require "#{here}/init" unless defined? RHACK
10
+ RHACK::VERSION = IO.read(File.join here, '..', 'Rakefile').match(/RHACK_VERSION = '(.+?)'/)[1]
11
+
12
+ require "#{here}/curl-global"
13
+ require "#{here}/scout"
14
+ require "#{here}/frame"
15
+ require "#{here}/words"
16
+ require "#{here}/cache" if RHACK::DB
@@ -0,0 +1,19 @@
1
+ ua file: /path/to/ua/list
2
+ cache:
3
+ dir: /path/to/cache/dir
4
+ table: hack_cache
5
+ clean: 30.days
6
+ logger:
7
+ :out: /path/to/rmlogger/logfile
8
+ scout retry:
9
+ example.com:
10
+ - TimeoutError
11
+ db:
12
+ reconnect: true
13
+ encoding: utf8
14
+ username: root
15
+ adapter: mysql
16
+ database: dbname
17
+ pool: 5
18
+ password:
19
+ socket: /var/run/mysqld/mysqld.sock
@@ -0,0 +1,226 @@
1
+ # encoding: utf-8
2
+ require 'rhack'
3
+ require File.join(File.dirname(__FILE__), 'list')
4
+
5
+ $unres_hosts = (RMTools.read('tmp/unres_hosts') or '')/"\n"
6
+
7
+ module RHACK
8
+ module Proxy
9
+ PROXYLISTFILE = 'log/proxylist.txt'
10
+ DefaultGet = {:req => 'http://internet.yandex.ru/speed/?len=10&rnd=%s', :expect => {:body => "yandex! "}}
11
+ DefaultDL = {:req => 'http://internet.yandex.ru/speed/?len=100000', :expect => {:body => "yandex! "*12500}}
12
+ DefaultPost = {:req => [{'yandex!' => 'yandex!'}, false, 'http://internet.yandex.ru/speed/'], :expect => {:body => "hooray!"}}
13
+
14
+ def DOWANT(pl, opts={})
15
+ opts[:div] ||= 50
16
+ opts[:timeout] ||= 5
17
+ pc = Checker pl, opts
18
+ begin
19
+ pc.check
20
+ L.info "working proxies: #{pc.wpl.size} with average ping #{pc.wpl.values.avg}"
21
+ pc.charge pc.get_by_ping(opts[:ping] || 0.5)
22
+ opts[:div] = opts[:dl_div] || 5
23
+ opts[:timeout] = opts[:dl_timeout]
24
+ pc.check opts
25
+ L.info "fast proxies: #{pc.fpl.size} with average speed #{pc.fpl.values.avg.bytes}/s"
26
+ pc.get_by_speed(opts[:speed] || 50000)
27
+ rescue
28
+ $pc = pc
29
+ raise
30
+ end
31
+ end
32
+ module_function :DOWANT
33
+
34
+ class Interceptor < Scout
35
+ attr_accessor :posted, :ready, :captcha, :engine, :ready, :num
36
+ end
37
+
38
+ class Checker
39
+ __init__
40
+ attr_reader :target, :opts, :pl, :ics, :wpl, :fpl
41
+
42
+ def initialize(*argv)
43
+ @pl, @target, @opts = argv.fetch_opts [[], DefaultGet]
44
+ @wpl = {} # proxy => ping
45
+ @fpl = {} # proxy => Bps
46
+ @succeed = @failed = 0
47
+ @printer = TempPrinter self, "succeed: :succeed\nfailed : :failed"
48
+ if @opts.page
49
+ Curl.run
50
+ @page = IB::Page.new(@opts.page, :rt => true, :form => true) if @opts.page.is String
51
+ Curl.wait
52
+ @opts.engine = @page.engine
53
+ end
54
+ charge @pl
55
+ end
56
+
57
+ def inspect
58
+ "<#ProxyChecker @ics: #{@ics.size} @wpl: #{@wpl.size} @fpl: #{@fpl.size}>"
59
+ end
60
+
61
+ def charge(pl=@pl, target=@target.req)
62
+ @ics = []
63
+ GC.start
64
+ fail_proc = lambda {|c, e|
65
+ c.on_complete {}
66
+ c.base.error = e
67
+ }
68
+ target = target.find_is(String) if !target.is String
69
+ pl.each {|pr|
70
+ sc = Interceptor.new(target, pr, $uas.rand, @opts)
71
+ sc.http.on_failure(&fail_proc)
72
+ @ics << sc
73
+ }
74
+ self
75
+ end
76
+
77
+ def check(*argv, &callback)
78
+ target, query, opts = argv.fetch_opts [@target, @ics], @opts
79
+ report = opts[:report]
80
+ cond = opts[:while]
81
+ carier = opts[:carier]
82
+ post = opts[:post] || carier || target.is(Array)
83
+ dl = opts[:dl] unless post
84
+ division = opts[:div] || 500
85
+ if !target.req.is(Array) and @page.resto target.req
86
+ post ||= target.req == :action
87
+ target.req = @page.host + @page.send(target.req)
88
+ end
89
+ if !query[0].is Interceptor
90
+ @opts.merge!(opts)
91
+ query = charge(query, target.req)
92
+ end
93
+
94
+ testrow = lambda {|d|
95
+ $log << "\n#{report} = #{instance_eval(report).inspect}" if report
96
+ throw :break if cond and !instance_eval(&cond)
97
+ d.each {|s|
98
+ if post
99
+ (carier || self).Post(s, target)
100
+ else
101
+ if '%s'.in target.req
102
+ scoped_target = target.dup
103
+ scoped_target.req %= rand
104
+ else
105
+ scoped_target = target
106
+ end
107
+ if dl
108
+ DL(s, scoped_target)
109
+ else
110
+ callback ? Get(s, scoped_target, &callback) : Get(s, scoped_target)
111
+ end
112
+ end
113
+ }
114
+ Curl.wait
115
+ }
116
+
117
+ dl ? (@fpl = {}) : (@wpl = {})
118
+ @succeed = @failed = 0
119
+ Curl.execute
120
+ catch(:break) {
121
+ query.div(division).each {|d| testrow[d]}
122
+ }
123
+ catch(:break) {
124
+ query.select {|i| i.res.is Array and i.res[0] == Curl::Err::TimeoutError}.div(division).each {|d| testrow[d]}
125
+ }
126
+ @printer.end!
127
+ self
128
+ end
129
+
130
+ def expected? res, target
131
+ target.expect[:code] ||= 200
132
+ !target.expect.find {|k, v| !( v.is(Proc) ? v[res.__send__(k)] : v === res.__send__(k) )}
133
+ end
134
+
135
+ def DL(scout, target)
136
+ scout.loadGet(target.req) {|c|
137
+ res = c.res
138
+ $log.debug " #{c.base} returned #{res}"
139
+ if !res.is Array and expected? res, target
140
+ @fpl[c.proxy_url] = (res.body.size/(c.total_time - @wpl[scout.proxystr].to_f)).to_i
141
+ @succeed += 1
142
+ else
143
+ @failed += 1
144
+ end
145
+ @printer.p if !$panic
146
+ }
147
+ end
148
+
149
+ def Get(scout, target, &callback)
150
+ if callback
151
+ scout.loadGet(target.req, &callback)
152
+ else
153
+ scout.loadGet(target) {|c|
154
+ res = c.res
155
+ $log.debug " #{c.base} returned #{res}"
156
+ if !res.is Array and expected? res, target
157
+ @wpl[c.proxy_url] = c.total_time
158
+ @succeed += 1
159
+ else
160
+ @failed += 1
161
+ end
162
+ @printer.p if !$panic
163
+ }
164
+ end
165
+ end
166
+
167
+ def Post(scout, target)
168
+ scout.loadPost(*target.req) {|c|
169
+ res = c.res
170
+ $log.debug "#{c.base} returned #{res}"
171
+ if !res.is Array and expected? res, target# || (res.code == '303' and @opts.engine.is IB::Wakaba)
172
+ @wpl[c.proxy_url] = c.total_time
173
+ @succeed += 1
174
+ else
175
+ @failed += 1
176
+ end
177
+ @printer.p if !$panic
178
+ }
179
+ end
180
+
181
+ def to_a
182
+ deprecation "use #to_pl instead."
183
+ to_pl
184
+ end
185
+
186
+ def to_pl
187
+ @wpl.map {|k,v| [v,k]}.sort.map! {|e| e.last/':'}.to_pl
188
+ end
189
+
190
+ def fastest
191
+ val = @wpl.values.sort[0]
192
+ @wpl.find {|k,v| v == val}[0]/':' if val
193
+ end
194
+
195
+ def get_by_ping(limit, minlen=1)
196
+ newpl = []
197
+ case limit
198
+ when Numeric; @wpl.each {|pr,lt| newpl << pr/':' if lt < limit}
199
+ when Range
200
+ begin
201
+ limit.to_a.each {|i|
202
+ @wpl.each {|pr,lt| newpl << pr/':' if lt < i}
203
+ break if newpl.size >= minlen
204
+ newpl.clear
205
+ }
206
+ rescue TypeError
207
+ @wpl.each {|pr,lt| newpl << pr/':' if lt < limit.min}
208
+ if newpl.size < minlen
209
+ newpl.clear
210
+ @wpl.each {|pr,lt| newpl << pr/':' if lt < limit.max}
211
+ end
212
+ end
213
+ end
214
+ newpl.to_pl
215
+ end
216
+
217
+ def get_by_speed(min)
218
+ newpl = []
219
+ @fpl.each {|pr, sp| newpl << pr/':' if sp >= min}
220
+ newpl.to_pl
221
+ end
222
+
223
+ end
224
+
225
+ end
226
+ end
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+ module RMTools
3
+
4
+ def mkprlist(df='proxy.txt')
5
+ if df == :last
6
+ df = Dir.new('log').content.find_all {|e| e[/pl|proxy/]}.sort_by {|e| File.mtime(e)}.last
7
+ end
8
+ pl = [].to_pl df
9
+ if File.file? df
10
+ IO.readlines(df).each {|s|
11
+ s = s[%r{^(?![#/])[^#/]+}]
12
+ pl << s.chomp if s
13
+ }
14
+ pl.map! {|e| e/':'}
15
+ else
16
+ puts df+' is missed!'
17
+ end
18
+ pl
19
+ end
20
+
21
+ module_function :mkprlist
22
+ end
23
+
24
+ class String
25
+
26
+ def grabprlist(to)
27
+ to.concat(parseips.uniq).size
28
+ end
29
+
30
+ end
31
+
32
+ class Array
33
+
34
+ def to_pl(file=nil)
35
+ ProxyList self, file
36
+ end
37
+
38
+ def grabprlist(dest)
39
+ if $Carier
40
+ text = ''
41
+ GetAll( lambda {text.grabprlist(dest)} ) {|res| text << res+"\n"}
42
+ else
43
+ getURLs.join("\n").grabprlist(dest)
44
+ end
45
+ end
46
+
47
+ end
48
+
49
+ class ProxyList < Array
50
+ __init__
51
+ attr_accessor :file
52
+ attr_reader :name
53
+ alias :minus :-
54
+ alias :plus :+
55
+
56
+ def initialize(source=[], file=nil)
57
+ if source.is String
58
+ super []
59
+ load source
60
+ elsif source.is Array
61
+ raise ArgumentError, 'second arg must be a string' if file and !file.is String
62
+ super source
63
+ fix! if file
64
+ @name = sort.hash
65
+ @name = File.split(file)[1] if file and @name == 0
66
+ @file = file || "tmp/#{'%0*x'%[12,@name]}.txt"
67
+ rw @file, map {|i| i*':'+"\n"} if file and !empty?
68
+ else raise TypeError, "can't create proxylist from #{source.class}"
69
+ end
70
+ end
71
+
72
+ def rehash
73
+ ProxyList self, @file
74
+ end
75
+
76
+ def ==(pl)
77
+ if pl.is ProxyList
78
+ pl.name == @name
79
+ else
80
+ pl == self
81
+ end
82
+ end
83
+
84
+ def -(pl)
85
+ ProxyList minus pl
86
+ end
87
+
88
+ def +(pl)
89
+ ProxyList plus pl
90
+ end
91
+
92
+ def fix!
93
+ map! {|i|
94
+ if i.is Array and ip = i[0]
95
+ port = i[1].to_i
96
+ [(ip.gsub!(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {|m| sprintf("%d.%d.%d.%d", *$~[1..4].map! {|d| d.to_i})} || ip), port > 0 ? port : i[1]]
97
+ end
98
+ }.compact!
99
+ uniq! || self
100
+ end
101
+
102
+ def fix
103
+ dup.fixprlist!
104
+ end
105
+
106
+ def inspect
107
+ @name.is(String) ?
108
+ sprintf("<#ProxyList: %s (%d items)>", @name, size) :
109
+ sprintf("<#ProxyList:%#0*x (%d items)>", 12, @name, size)
110
+ end
111
+
112
+ def to_pl
113
+ self
114
+ end
115
+
116
+ def _fixed(ip)
117
+ ip.is String and ip[/^\d+\.\d+\.\d+\.\d+$/] and !ip["\n"]
118
+ end
119
+
120
+ def ips
121
+ self[0].is(Array) ?
122
+ self[0].size > 1 ?
123
+ find_all {|i| i[1].to_i > 0 and _fixed i[0]}.firsts :
124
+ find_all {|i| _fixed i[0]}.firsts :
125
+ find_all {|i| _fixed i}
126
+ end
127
+
128
+ def standart
129
+ find_all {|i| i[1].to_i > 0}
130
+ end
131
+
132
+ def glypes
133
+ reject {|i| i[1].to_i > 0}
134
+ end
135
+
136
+ def ips_fixed
137
+ firsts.each {|ip| return false if !_fixed ip }
138
+ true
139
+ end
140
+
141
+ def fix_ips
142
+ ts = []
143
+ (0...size).to_a.div(size/50).each {|d| ts << Thread.new {d.each {|i|
144
+
145
+ if self[i][0].is Fixnum
146
+ self[i][0] = self[i][0].to_ip
147
+ next
148
+ elsif self[i][1].to_i == 0 or self[i][0][/^\d+\.\d+\.\d+\.\d+$/]
149
+ next
150
+ end
151
+
152
+ if (ip = self[i][0][/\d{1,3}[\.\-]\d+[\.\-]\d+[\.\-]\d{1,3}/])
153
+ self[i][0] = ip.gsub('-', '.')
154
+ elsif $unres_hosts.has ip
155
+ self[i] = nil
156
+ else
157
+ ip = IPSocket.getaddress(self[i][0]) rescue($unres_hosts << self[i][0]; nil)
158
+ ip and (self[i][0] = ip) and tick!
159
+ end
160
+
161
+ }}}
162
+ ts.joins && ts.clear
163
+ compact! || self
164
+ end
165
+
166
+ def valid
167
+ each {|i| return false if !i.is Array or i.nitems != 2 or !i[0].is String or !(i[1].is String or i[1].is Fixnum)}
168
+ true
169
+ end
170
+
171
+ def save(mark=nil)
172
+ str = map {|i| i * ':' + "\n" if i.is Array}
173
+ str = "#"*10+" #{puttime} - #{mark}\n#{str}"+"#"*20 if mark
174
+ rw @file, str
175
+ end
176
+
177
+ def load(file=@file)
178
+ if "\n".in file or "</".in file
179
+ file.grabprlist self
180
+ elsif File.file?(file)
181
+ IO.read(file).grabprlist(self)
182
+ else
183
+ file.grabprlist self
184
+ end
185
+ self
186
+ end
187
+
188
+ def find_all
189
+ super
190
+ end
191
+
192
+ def reject
193
+ super
194
+ end
195
+
196
+ end