rhack 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gemtest +0 -0
  2. data/CURB-LICENSE +51 -0
  3. data/Gemfile +4 -0
  4. data/History.txt +4 -0
  5. data/LICENSE +51 -0
  6. data/License.txt +17 -0
  7. data/Manifest.txt +61 -0
  8. data/README.txt +12 -0
  9. data/Rakefile +34 -0
  10. data/ext/curb-original/curb.c +977 -0
  11. data/ext/curb-original/curb.h +52 -0
  12. data/ext/curb-original/curb_config.h +235 -0
  13. data/ext/curb-original/curb_easy.c +3455 -0
  14. data/ext/curb-original/curb_easy.h +90 -0
  15. data/ext/curb-original/curb_errors.c +647 -0
  16. data/ext/curb-original/curb_errors.h +129 -0
  17. data/ext/curb-original/curb_macros.h +159 -0
  18. data/ext/curb-original/curb_multi.c +704 -0
  19. data/ext/curb-original/curb_multi.h +26 -0
  20. data/ext/curb-original/curb_postfield.c +523 -0
  21. data/ext/curb-original/curb_postfield.h +40 -0
  22. data/ext/curb-original/curb_upload.c +80 -0
  23. data/ext/curb-original/curb_upload.h +30 -0
  24. data/ext/curb/Makefile +157 -0
  25. data/ext/curb/curb.c +977 -0
  26. data/ext/curb/curb.h +52 -0
  27. data/ext/curb/curb_config.h +235 -0
  28. data/ext/curb/curb_easy.c +3430 -0
  29. data/ext/curb/curb_easy.h +94 -0
  30. data/ext/curb/curb_errors.c +647 -0
  31. data/ext/curb/curb_errors.h +129 -0
  32. data/ext/curb/curb_macros.h +159 -0
  33. data/ext/curb/curb_multi.c +710 -0
  34. data/ext/curb/curb_multi.h +26 -0
  35. data/ext/curb/curb_postfield.c +523 -0
  36. data/ext/curb/curb_postfield.h +40 -0
  37. data/ext/curb/curb_upload.c +80 -0
  38. data/ext/curb/curb_upload.h +30 -0
  39. data/ext/curb/extconf.rb +399 -0
  40. data/lib/cache.rb +44 -0
  41. data/lib/curl-global.rb +151 -0
  42. data/lib/extensions/browser/env.js +697 -0
  43. data/lib/extensions/browser/jquery.js +7180 -0
  44. data/lib/extensions/browser/xmlsax.js +1564 -0
  45. data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
  46. data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
  47. data/lib/extensions/curb.rb +125 -0
  48. data/lib/extensions/declarative.rb +153 -0
  49. data/lib/extensions/johnson.rb +63 -0
  50. data/lib/frame.rb +766 -0
  51. data/lib/init.rb +36 -0
  52. data/lib/rhack.rb +16 -0
  53. data/lib/rhack.yml.template +19 -0
  54. data/lib/rhack/proxy/checker.rb +226 -0
  55. data/lib/rhack/proxy/list.rb +196 -0
  56. data/lib/rhack/services.rb +445 -0
  57. data/lib/rhack_in.rb +2 -0
  58. data/lib/scout.rb +591 -0
  59. data/lib/words.rb +37 -0
  60. data/test/test_frame.rb +107 -0
  61. data/test/test_rhack.rb +5 -0
  62. data/test/test_scout.rb +53 -0
  63. metadata +195 -0
data/lib/init.rb ADDED
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+ module HTTPAccessKit
3
+ include RMTools
4
+ CONFIG = YAML.load(read(%W(rhack.yml #{File.join(ENV['HOME'], 'rhack.yml')})) || '') || {}
5
+
6
+ UAS = if File.file?(uas = CONFIG['ua file'] || File.join(ENV['HOME'], 'ua.txt'))
7
+ IO.read(uas)/"\n"
8
+ else ['Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.1.8) Gecko/20100202 MRA 5.6 (build 03278) Firefox/3.5.8 (.NET CLR 3.5.30729)'] end
9
+
10
+ L = RMLogger.new(CONFIG.logger || {})
11
+
12
+ config = CONFIG.db || File.join(ENV['HOME'], 'db.yml')
13
+ begin
14
+ DB = ActiveRecord::Base.establish_connection_with config
15
+ rescue LoadError
16
+ DB = nil
17
+ end
18
+
19
+ cache = CONFIG.cache || {}
20
+ CacheDir = cache.dir || File.join(File.dirname(__FILE__), 'cache')
21
+ CacheTable = (cache.table || :rhack_cache).to_sym
22
+ CacheTTL = cache.clean ? eval(cache.clean).b : nil
23
+
24
+ RETRY = CONFIG['scout retry'] || {}
25
+
26
+ $uas ||= UAS
27
+ $Carier ||= Curl::Multi.new
28
+ $Carier.pipeline = true
29
+
30
+ def self.update
31
+ each_child {|c| c.class_eval "include HTTPAccessKit; extend HTTPAccessKit" if !c.in c.children}
32
+ end
33
+ end
34
+
35
+ module Curl; include HTTPAccessKit end
36
+ RHACK = HTTPAccessKit
data/lib/rhack.rb ADDED
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ $KCODE = 'UTF-8' if RUBY_VERSION < "1.9"
3
+ require 'rmtools_dev' unless defined? RMTools
4
+ here = File.expand_path File.dirname __FILE__
5
+ require File.join(here, 'curb_core.so')
6
+ require 'active_record'
7
+ Dir.glob("#{here}/extensions/**.rb") { |f| require f }
8
+
9
+ require "#{here}/init" unless defined? RHACK
10
+ RHACK::VERSION = IO.read(File.join here, '..', 'Rakefile').match(/RHACK_VERSION = '(.+?)'/)[1]
11
+
12
+ require "#{here}/curl-global"
13
+ require "#{here}/scout"
14
+ require "#{here}/frame"
15
+ require "#{here}/words"
16
+ require "#{here}/cache" if RHACK::DB
@@ -0,0 +1,19 @@
1
+ ua file: /path/to/ua/list
2
+ cache:
3
+ dir: /path/to/cache/dir
4
+ table: hack_cache
5
+ clean: 30.days
6
+ logger:
7
+ :out: /path/to/rmlogger/logfile
8
+ scout retry:
9
+ example.com:
10
+ - TimeoutError
11
+ db:
12
+ reconnect: true
13
+ encoding: utf8
14
+ username: root
15
+ adapter: mysql
16
+ database: dbname
17
+ pool: 5
18
+ password:
19
+ socket: /var/run/mysqld/mysqld.sock
@@ -0,0 +1,226 @@
1
+ # encoding: utf-8
2
+ require 'rhack'
3
+ require File.join(File.dirname(__FILE__), 'list')
4
+
5
+ $unres_hosts = (RMTools.read('tmp/unres_hosts') or '')/"\n"
6
+
7
+ module RHACK
8
+ module Proxy
9
+ PROXYLISTFILE = 'log/proxylist.txt'
10
+ DefaultGet = {:req => 'http://internet.yandex.ru/speed/?len=10&rnd=%s', :expect => {:body => "yandex! "}}
11
+ DefaultDL = {:req => 'http://internet.yandex.ru/speed/?len=100000', :expect => {:body => "yandex! "*12500}}
12
+ DefaultPost = {:req => [{'yandex!' => 'yandex!'}, false, 'http://internet.yandex.ru/speed/'], :expect => {:body => "hooray!"}}
13
+
14
+ def DOWANT(pl, opts={})
15
+ opts[:div] ||= 50
16
+ opts[:timeout] ||= 5
17
+ pc = Checker pl, opts
18
+ begin
19
+ pc.check
20
+ L.info "working proxies: #{pc.wpl.size} with average ping #{pc.wpl.values.avg}"
21
+ pc.charge pc.get_by_ping(opts[:ping] || 0.5)
22
+ opts[:div] = opts[:dl_div] || 5
23
+ opts[:timeout] = opts[:dl_timeout]
24
+ pc.check opts
25
+ L.info "fast proxies: #{pc.fpl.size} with average speed #{pc.fpl.values.avg.bytes}/s"
26
+ pc.get_by_speed(opts[:speed] || 50000)
27
+ rescue
28
+ $pc = pc
29
+ raise
30
+ end
31
+ end
32
+ module_function :DOWANT
33
+
34
+ class Interceptor < Scout
35
+ attr_accessor :posted, :ready, :captcha, :engine, :ready, :num
36
+ end
37
+
38
+ class Checker
39
+ __init__
40
+ attr_reader :target, :opts, :pl, :ics, :wpl, :fpl
41
+
42
+ def initialize(*argv)
43
+ @pl, @target, @opts = argv.fetch_opts [[], DefaultGet]
44
+ @wpl = {} # proxy => ping
45
+ @fpl = {} # proxy => Bps
46
+ @succeed = @failed = 0
47
+ @printer = TempPrinter self, "succeed: :succeed\nfailed : :failed"
48
+ if @opts.page
49
+ Curl.run
50
+ @page = IB::Page.new(@opts.page, :rt => true, :form => true) if @opts.page.is String
51
+ Curl.wait
52
+ @opts.engine = @page.engine
53
+ end
54
+ charge @pl
55
+ end
56
+
57
+ def inspect
58
+ "<#ProxyChecker @ics: #{@ics.size} @wpl: #{@wpl.size} @fpl: #{@fpl.size}>"
59
+ end
60
+
61
+ def charge(pl=@pl, target=@target.req)
62
+ @ics = []
63
+ GC.start
64
+ fail_proc = lambda {|c, e|
65
+ c.on_complete {}
66
+ c.base.error = e
67
+ }
68
+ target = target.find_is(String) if !target.is String
69
+ pl.each {|pr|
70
+ sc = Interceptor.new(target, pr, $uas.rand, @opts)
71
+ sc.http.on_failure(&fail_proc)
72
+ @ics << sc
73
+ }
74
+ self
75
+ end
76
+
77
+ def check(*argv, &callback)
78
+ target, query, opts = argv.fetch_opts [@target, @ics], @opts
79
+ report = opts[:report]
80
+ cond = opts[:while]
81
+ carier = opts[:carier]
82
+ post = opts[:post] || carier || target.is(Array)
83
+ dl = opts[:dl] unless post
84
+ division = opts[:div] || 500
85
+ if !target.req.is(Array) and @page.resto target.req
86
+ post ||= target.req == :action
87
+ target.req = @page.host + @page.send(target.req)
88
+ end
89
+ if !query[0].is Interceptor
90
+ @opts.merge!(opts)
91
+ query = charge(query, target.req)
92
+ end
93
+
94
+ testrow = lambda {|d|
95
+ $log << "\n#{report} = #{instance_eval(report).inspect}" if report
96
+ throw :break if cond and !instance_eval(&cond)
97
+ d.each {|s|
98
+ if post
99
+ (carier || self).Post(s, target)
100
+ else
101
+ if '%s'.in target.req
102
+ scoped_target = target.dup
103
+ scoped_target.req %= rand
104
+ else
105
+ scoped_target = target
106
+ end
107
+ if dl
108
+ DL(s, scoped_target)
109
+ else
110
+ callback ? Get(s, scoped_target, &callback) : Get(s, scoped_target)
111
+ end
112
+ end
113
+ }
114
+ Curl.wait
115
+ }
116
+
117
+ dl ? (@fpl = {}) : (@wpl = {})
118
+ @succeed = @failed = 0
119
+ Curl.execute
120
+ catch(:break) {
121
+ query.div(division).each {|d| testrow[d]}
122
+ }
123
+ catch(:break) {
124
+ query.select {|i| i.res.is Array and i.res[0] == Curl::Err::TimeoutError}.div(division).each {|d| testrow[d]}
125
+ }
126
+ @printer.end!
127
+ self
128
+ end
129
+
130
+ def expected? res, target
131
+ target.expect[:code] ||= 200
132
+ !target.expect.find {|k, v| !( v.is(Proc) ? v[res.__send__(k)] : v === res.__send__(k) )}
133
+ end
134
+
135
+ def DL(scout, target)
136
+ scout.loadGet(target.req) {|c|
137
+ res = c.res
138
+ $log.debug " #{c.base} returned #{res}"
139
+ if !res.is Array and expected? res, target
140
+ @fpl[c.proxy_url] = (res.body.size/(c.total_time - @wpl[scout.proxystr].to_f)).to_i
141
+ @succeed += 1
142
+ else
143
+ @failed += 1
144
+ end
145
+ @printer.p if !$panic
146
+ }
147
+ end
148
+
149
+ def Get(scout, target, &callback)
150
+ if callback
151
+ scout.loadGet(target.req, &callback)
152
+ else
153
+ scout.loadGet(target) {|c|
154
+ res = c.res
155
+ $log.debug " #{c.base} returned #{res}"
156
+ if !res.is Array and expected? res, target
157
+ @wpl[c.proxy_url] = c.total_time
158
+ @succeed += 1
159
+ else
160
+ @failed += 1
161
+ end
162
+ @printer.p if !$panic
163
+ }
164
+ end
165
+ end
166
+
167
+ def Post(scout, target)
168
+ scout.loadPost(*target.req) {|c|
169
+ res = c.res
170
+ $log.debug "#{c.base} returned #{res}"
171
+ if !res.is Array and expected? res, target# || (res.code == '303' and @opts.engine.is IB::Wakaba)
172
+ @wpl[c.proxy_url] = c.total_time
173
+ @succeed += 1
174
+ else
175
+ @failed += 1
176
+ end
177
+ @printer.p if !$panic
178
+ }
179
+ end
180
+
181
+ def to_a
182
+ deprecation "use #to_pl instead."
183
+ to_pl
184
+ end
185
+
186
+ def to_pl
187
+ @wpl.map {|k,v| [v,k]}.sort.map! {|e| e.last/':'}.to_pl
188
+ end
189
+
190
+ def fastest
191
+ val = @wpl.values.sort[0]
192
+ @wpl.find {|k,v| v == val}[0]/':' if val
193
+ end
194
+
195
+ def get_by_ping(limit, minlen=1)
196
+ newpl = []
197
+ case limit
198
+ when Numeric; @wpl.each {|pr,lt| newpl << pr/':' if lt < limit}
199
+ when Range
200
+ begin
201
+ limit.to_a.each {|i|
202
+ @wpl.each {|pr,lt| newpl << pr/':' if lt < i}
203
+ break if newpl.size >= minlen
204
+ newpl.clear
205
+ }
206
+ rescue TypeError
207
+ @wpl.each {|pr,lt| newpl << pr/':' if lt < limit.min}
208
+ if newpl.size < minlen
209
+ newpl.clear
210
+ @wpl.each {|pr,lt| newpl << pr/':' if lt < limit.max}
211
+ end
212
+ end
213
+ end
214
+ newpl.to_pl
215
+ end
216
+
217
+ def get_by_speed(min)
218
+ newpl = []
219
+ @fpl.each {|pr, sp| newpl << pr/':' if sp >= min}
220
+ newpl.to_pl
221
+ end
222
+
223
+ end
224
+
225
+ end
226
+ end
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+ module RMTools
3
+
4
+ def mkprlist(df='proxy.txt')
5
+ if df == :last
6
+ df = Dir.new('log').content.find_all {|e| e[/pl|proxy/]}.sort_by {|e| File.mtime(e)}.last
7
+ end
8
+ pl = [].to_pl df
9
+ if File.file? df
10
+ IO.readlines(df).each {|s|
11
+ s = s[%r{^(?![#/])[^#/]+}]
12
+ pl << s.chomp if s
13
+ }
14
+ pl.map! {|e| e/':'}
15
+ else
16
+ puts df+' is missed!'
17
+ end
18
+ pl
19
+ end
20
+
21
+ module_function :mkprlist
22
+ end
23
+
24
+ class String
25
+
26
+ def grabprlist(to)
27
+ to.concat(parseips.uniq).size
28
+ end
29
+
30
+ end
31
+
32
+ class Array
33
+
34
+ def to_pl(file=nil)
35
+ ProxyList self, file
36
+ end
37
+
38
+ def grabprlist(dest)
39
+ if $Carier
40
+ text = ''
41
+ GetAll( lambda {text.grabprlist(dest)} ) {|res| text << res+"\n"}
42
+ else
43
+ getURLs.join("\n").grabprlist(dest)
44
+ end
45
+ end
46
+
47
+ end
48
+
49
+ class ProxyList < Array
50
+ __init__
51
+ attr_accessor :file
52
+ attr_reader :name
53
+ alias :minus :-
54
+ alias :plus :+
55
+
56
+ def initialize(source=[], file=nil)
57
+ if source.is String
58
+ super []
59
+ load source
60
+ elsif source.is Array
61
+ raise ArgumentError, 'second arg must be a string' if file and !file.is String
62
+ super source
63
+ fix! if file
64
+ @name = sort.hash
65
+ @name = File.split(file)[1] if file and @name == 0
66
+ @file = file || "tmp/#{'%0*x'%[12,@name]}.txt"
67
+ rw @file, map {|i| i*':'+"\n"} if file and !empty?
68
+ else raise TypeError, "can't create proxylist from #{source.class}"
69
+ end
70
+ end
71
+
72
+ def rehash
73
+ ProxyList self, @file
74
+ end
75
+
76
+ def ==(pl)
77
+ if pl.is ProxyList
78
+ pl.name == @name
79
+ else
80
+ pl == self
81
+ end
82
+ end
83
+
84
+ def -(pl)
85
+ ProxyList minus pl
86
+ end
87
+
88
+ def +(pl)
89
+ ProxyList plus pl
90
+ end
91
+
92
+ def fix!
93
+ map! {|i|
94
+ if i.is Array and ip = i[0]
95
+ port = i[1].to_i
96
+ [(ip.gsub!(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {|m| sprintf("%d.%d.%d.%d", *$~[1..4].map! {|d| d.to_i})} || ip), port > 0 ? port : i[1]]
97
+ end
98
+ }.compact!
99
+ uniq! || self
100
+ end
101
+
102
+ def fix
103
+ dup.fixprlist!
104
+ end
105
+
106
+ def inspect
107
+ @name.is(String) ?
108
+ sprintf("<#ProxyList: %s (%d items)>", @name, size) :
109
+ sprintf("<#ProxyList:%#0*x (%d items)>", 12, @name, size)
110
+ end
111
+
112
+ def to_pl
113
+ self
114
+ end
115
+
116
+ def _fixed(ip)
117
+ ip.is String and ip[/^\d+\.\d+\.\d+\.\d+$/] and !ip["\n"]
118
+ end
119
+
120
+ def ips
121
+ self[0].is(Array) ?
122
+ self[0].size > 1 ?
123
+ find_all {|i| i[1].to_i > 0 and _fixed i[0]}.firsts :
124
+ find_all {|i| _fixed i[0]}.firsts :
125
+ find_all {|i| _fixed i}
126
+ end
127
+
128
+ def standart
129
+ find_all {|i| i[1].to_i > 0}
130
+ end
131
+
132
+ def glypes
133
+ reject {|i| i[1].to_i > 0}
134
+ end
135
+
136
+ def ips_fixed
137
+ firsts.each {|ip| return false if !_fixed ip }
138
+ true
139
+ end
140
+
141
+ def fix_ips
142
+ ts = []
143
+ (0...size).to_a.div(size/50).each {|d| ts << Thread.new {d.each {|i|
144
+
145
+ if self[i][0].is Fixnum
146
+ self[i][0] = self[i][0].to_ip
147
+ next
148
+ elsif self[i][1].to_i == 0 or self[i][0][/^\d+\.\d+\.\d+\.\d+$/]
149
+ next
150
+ end
151
+
152
+ if (ip = self[i][0][/\d{1,3}[\.\-]\d+[\.\-]\d+[\.\-]\d{1,3}/])
153
+ self[i][0] = ip.gsub('-', '.')
154
+ elsif $unres_hosts.has ip
155
+ self[i] = nil
156
+ else
157
+ ip = IPSocket.getaddress(self[i][0]) rescue($unres_hosts << self[i][0]; nil)
158
+ ip and (self[i][0] = ip) and tick!
159
+ end
160
+
161
+ }}}
162
+ ts.joins && ts.clear
163
+ compact! || self
164
+ end
165
+
166
+ def valid
167
+ each {|i| return false if !i.is Array or i.nitems != 2 or !i[0].is String or !(i[1].is String or i[1].is Fixnum)}
168
+ true
169
+ end
170
+
171
+ def save(mark=nil)
172
+ str = map {|i| i * ':' + "\n" if i.is Array}
173
+ str = "#"*10+" #{puttime} - #{mark}\n#{str}"+"#"*20 if mark
174
+ rw @file, str
175
+ end
176
+
177
+ def load(file=@file)
178
+ if "\n".in file or "</".in file
179
+ file.grabprlist self
180
+ elsif File.file?(file)
181
+ IO.read(file).grabprlist(self)
182
+ else
183
+ file.grabprlist self
184
+ end
185
+ self
186
+ end
187
+
188
+ def find_all
189
+ super
190
+ end
191
+
192
+ def reject
193
+ super
194
+ end
195
+
196
+ end