rhack 0.4.1 → 1.0.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
data/lib/init.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module HTTPAccessKit
|
3
|
-
include RMTools
|
4
|
-
extend RMTools
|
5
|
-
|
6
|
-
Dir.chdir ENV['APP_ROOT'] if ENV['APP_ROOT']
|
7
|
-
|
8
|
-
CONFIG = if defined? Rails
|
9
|
-
YAML.load(read('config/rhack.yml') || '') || {}
|
10
|
-
else
|
11
|
-
YAML.load(read(%W(config/rhack.yml rhack.yml #{File.join(ENV['HOME'], 'rhack.yml')})) || '') || {}
|
12
|
-
end
|
13
|
-
|
14
|
-
UAS = if File.file?(uas = CONFIG['ua file'] || File.join(ENV['HOME'], 'ua.txt'))
|
15
|
-
IO.read(uas)/"\n"
|
16
|
-
else ['Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1'] end
|
17
|
-
|
18
|
-
L = RMLogger.new(CONFIG.logger || {})
|
19
|
-
|
20
|
-
db_config = if defined? Rails
|
21
|
-
YAML.load(read('config/database.yml'))[ENV["RAILS_ENV"]]
|
22
|
-
else
|
23
|
-
CONFIG.db || File.join(ENV['HOME'], 'db.yml')
|
24
|
-
end
|
25
|
-
begin
|
26
|
-
DB = ActiveRecord::Base.establish_connection_with db_config
|
27
|
-
rescue LoadError
|
28
|
-
DB = nil
|
29
|
-
end
|
30
|
-
if DB and !(CONFIG.cache and CONFIG.cache.enabled == false)
|
31
|
-
cache = CONFIG.cache || {}
|
32
|
-
CacheDir = cache.dir || File.join(File.dirname(__FILE__), 'cache')
|
33
|
-
CacheTable = (cache.table || :rhack_cache).to_sym
|
34
|
-
CacheTTL = cache.clean ? eval(cache.clean).b : nil
|
35
|
-
end
|
36
|
-
|
37
|
-
RETRY = CONFIG['scout retry'] || {}
|
38
|
-
|
39
|
-
$uas ||= UAS
|
40
|
-
$Carier ||= Curl::Multi.new
|
41
|
-
$Carier.pipeline = true
|
42
|
-
|
43
|
-
def self.update
|
44
|
-
each_child {|c| c.class_eval "include HTTPAccessKit; extend HTTPAccessKit" if !c.in c.children}
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
module Curl; extend HTTPAccessKit end
|
49
|
-
RHACK = HTTPAccessKit
|
data/lib/rhack.yml.template
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#ua file: db/useragents.txt
|
2
|
-
#logger:
|
3
|
-
# :out: log/rhack.log
|
4
|
-
#scout retry: {host => [Curl::Error subclass, ], }
|
5
|
-
# example.com:
|
6
|
-
# - TimeoutError
|
7
|
-
#db: # defaults file @ to RAILS_PATH/config/RAILS_ENV.yml
|
8
|
-
# reconnect: true
|
9
|
-
# encoding: utf8
|
10
|
-
# username: root
|
11
|
-
# adapter: mysql
|
12
|
-
# database: dbname
|
13
|
-
# pool: 5
|
14
|
-
# password:
|
15
|
-
# socket: /var/run/mysqld/mysqld.sock
|
16
|
-
#cache: # deprecated
|
17
|
-
# dir: /path/to/cache/dir
|
18
|
-
# table: hack_cache
|
19
|
-
# clean: 30.days
|
data/lib/scout.rb
DELETED
@@ -1,589 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module Curl
|
3
|
-
|
4
|
-
def ITT
|
5
|
-
res = nil
|
6
|
-
HTTPAccessKit::Scout('file://').loadGet(__FILE__) {|c| res = yield}
|
7
|
-
loop {if res then break res else sleep 0.01 end}
|
8
|
-
end
|
9
|
-
module_function :ITT
|
10
|
-
|
11
|
-
class Response
|
12
|
-
__init__
|
13
|
-
attr_reader :header, :code, :body, :hash, :timestamp, :time, :req, :date, :error
|
14
|
-
|
15
|
-
def to_s
|
16
|
-
str = '<#'
|
17
|
-
if @error
|
18
|
-
str << "#{@error[0].self_name}: #{@error[1]}"
|
19
|
-
else
|
20
|
-
str << (@header[/\d{3}/] == @code.to_s ? @header : "#{@header[/\S+/]} #{@code}") if @header
|
21
|
-
if @hash.location
|
22
|
-
str << ' '+@req.url if $panic
|
23
|
-
str << ' -> '+@hash.location
|
24
|
-
end
|
25
|
-
str << " (#{@body ? @body.size.bytes : 'No'} Body)"
|
26
|
-
str << " [#{@timestamp}]" if @timestamp
|
27
|
-
end
|
28
|
-
str << '>'
|
29
|
-
end
|
30
|
-
alias :inspect :to_s
|
31
|
-
|
32
|
-
def initialize(easy)
|
33
|
-
@hash = {}
|
34
|
-
@timestamp = @date = @header = nil
|
35
|
-
if easy.base.error
|
36
|
-
@error = easy.base.error
|
37
|
-
else
|
38
|
-
if headers = easy.header_str || easy.base.headers
|
39
|
-
headers /= "\r\n"
|
40
|
-
@header = headers.shift
|
41
|
-
headers.each {|h|
|
42
|
-
h /= ': '
|
43
|
-
if h[0]
|
44
|
-
h[0].downcase!
|
45
|
-
if h[0] == 'set-cookie'
|
46
|
-
(@hash.cookies ||= []) << h[1]
|
47
|
-
else
|
48
|
-
@hash[h[0]] = h[1]
|
49
|
-
end
|
50
|
-
end
|
51
|
-
}
|
52
|
-
@timestamp = if @hash.date
|
53
|
-
begin
|
54
|
-
@date = @hash.date.to_time
|
55
|
-
rescue => e
|
56
|
-
(@date = Time.now).strftime("%H:%M:%S")
|
57
|
-
L < "Error #{e.class}:#{e.message} with @hash.date = #{@hash.date.inspect}"
|
58
|
-
end
|
59
|
-
@hash.date[/\d\d:\d\d:\d\d/]
|
60
|
-
else
|
61
|
-
(@date = Time.now).strftime("%H:%M:%S")
|
62
|
-
end
|
63
|
-
end
|
64
|
-
@code = easy.response_code
|
65
|
-
@body = easy.body_str
|
66
|
-
@time = easy.total_time
|
67
|
-
end
|
68
|
-
|
69
|
-
@req = {}
|
70
|
-
@req.url = easy.last_effective_url
|
71
|
-
@req.headers = easy.headers
|
72
|
-
if range = easy.headers.Range and range[/(\d+)-(\d+)/]
|
73
|
-
@req.range = $1.to_i .. $2.to_i
|
74
|
-
end
|
75
|
-
if easy.base and @req.meth = easy.base.last_method and @req.meth == :post
|
76
|
-
@req.body = easy.post_body
|
77
|
-
@req.mp = easy.multipart_form_post?
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def is(klass)
|
82
|
-
if @error
|
83
|
-
klass == Array || klass = Curl::Response
|
84
|
-
else
|
85
|
-
klass == Curl::Response
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def [](key_or_index)
|
90
|
-
@error ? @error[key_or_index] : @hash[key_or_index.downcase]
|
91
|
-
end
|
92
|
-
|
93
|
-
alias :headers :hash
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
module HTTPAccessKit
|
99
|
-
|
100
|
-
class Cookie
|
101
|
-
__init__
|
102
|
-
|
103
|
-
def initialize(*args)
|
104
|
-
if args[1].is Scout
|
105
|
-
str, scout = *args
|
106
|
-
ck = str//;\s*/
|
107
|
-
ck[1..-1].each {|par|
|
108
|
-
a = par/'='
|
109
|
-
case a[0].downcase
|
110
|
-
when 'path'; @path = (a[1] == '/') ? // : /^#{Regexp.escape a[1]}/
|
111
|
-
when 'domain'; @domain = /(^|\.)#{Regexp.escape a[1].sub(/^./, '')}$/
|
112
|
-
when 'expires'; @expires = a[1].to_time
|
113
|
-
end
|
114
|
-
}
|
115
|
-
@name, @value = ck[0].split('=', 2)
|
116
|
-
#@value.gsub!(/^['"]|['"]$/, '')
|
117
|
-
#L.debug args if !@domain
|
118
|
-
(scout.cookies[scout.uri.host] ||= {})[@name] = self
|
119
|
-
else
|
120
|
-
@name, cookie = args[0]
|
121
|
-
case cookie
|
122
|
-
when Array; @value, @path, @domain = cookie
|
123
|
-
when Hash; @value, @path, @domain = cookie.value, cookie.path, cookie.domain
|
124
|
-
else @value = args[1].to_s
|
125
|
-
end
|
126
|
-
end
|
127
|
-
@path ||= //
|
128
|
-
@domain ||= //
|
129
|
-
@string = "#{@name}=#{@value}; "
|
130
|
-
end
|
131
|
-
|
132
|
-
def use(str, uri)
|
133
|
-
if !@expires or @expires > Time.now
|
134
|
-
str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
|
135
|
-
else
|
136
|
-
:expired
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
def to_s; @value end
|
141
|
-
def inspect; @value.inspect end
|
142
|
-
|
143
|
-
end
|
144
|
-
|
145
|
-
class Scout
|
146
|
-
__init__
|
147
|
-
attr_accessor :timeout, :raise_err, :retry
|
148
|
-
attr_accessor :path, :root, :sld, :proxy
|
149
|
-
attr_reader :uri
|
150
|
-
attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
|
151
|
-
attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
|
152
|
-
|
153
|
-
DefaultHeader = {
|
154
|
-
"Expect" => "",
|
155
|
-
"Keep-Alive" => "300",
|
156
|
-
"Accept-Charset" => "windows-1251,utf-8;q=0.7,*;q=0.7",
|
157
|
-
"Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
|
158
|
-
"Connection" => "keep-alive"
|
159
|
-
}
|
160
|
-
|
161
|
-
class ProxyError < ArgumentError
|
162
|
-
def initialize proxy
|
163
|
-
super "incorrect proxy: %s class %s, must be an Array
|
164
|
-
proxy format: ['127.0.0.1', '80'], [2130706433, 80], ['someproxy.com', :WebproxyModule]"%[proxy.inspect, proxy.class]
|
165
|
-
end
|
166
|
-
end
|
167
|
-
@@retry = RETRY
|
168
|
-
|
169
|
-
def initialize(*argv)
|
170
|
-
uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
|
171
|
-
raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
|
172
|
-
'http://' >> uri if uri !~ /^\w+:\/\//
|
173
|
-
if proxy
|
174
|
-
if proxy[1] and proxy[1].to_i == 0
|
175
|
-
@webproxy = eval("WebProxy::#{proxy[1]}")
|
176
|
-
@proxy = proxy[0].parse(:uri).root
|
177
|
-
else
|
178
|
-
proxy[0] = proxy[0].to_ip if proxy[0].is Integer
|
179
|
-
@proxy = proxy
|
180
|
-
end
|
181
|
-
end
|
182
|
-
@cookies = {}
|
183
|
-
@body = {}
|
184
|
-
@num = []
|
185
|
-
@cookieProc = opts[:cp] || opts[:ck]
|
186
|
-
@raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
|
187
|
-
@engine = opts[:engine]
|
188
|
-
@timeout = opts[:timeout] || $CurlDefaultTimeout || 60
|
189
|
-
@post_proc = @get_proc = @head_proc = Proc::NULL
|
190
|
-
update uri
|
191
|
-
@retry = opts[:retry] || {}
|
192
|
-
@retry = {@uri.host => @retry} if @retry.is Array
|
193
|
-
end
|
194
|
-
|
195
|
-
def update(uri)
|
196
|
-
if !uri[/^\w+:\/\//]
|
197
|
-
'/' >> uri if uri[0,1] != '/'
|
198
|
-
@uri = uri.parse:uri
|
199
|
-
return
|
200
|
-
end
|
201
|
-
@uri = uri.parse:uri
|
202
|
-
return if @uri.root == @root
|
203
|
-
@root = @uri.root
|
204
|
-
@sld = @root[/[\w-]+\.[a-z]+$/]
|
205
|
-
@path = @uri.fullpath
|
206
|
-
if @http
|
207
|
-
@http.url = @webproxy ? @proxy : @root
|
208
|
-
else
|
209
|
-
@http = Curl::Easy(@webproxy ? @proxy : @root)
|
210
|
-
@http.base = self
|
211
|
-
end
|
212
|
-
if @proxy
|
213
|
-
@http.proxy_url = @proxy*':' if !@webproxy
|
214
|
-
@proxystr = @webproxy ? @proxy[0] : @http.proxy_url
|
215
|
-
else @proxystr = 'localhost'
|
216
|
-
end
|
217
|
-
if @cookieProc.is Hash
|
218
|
-
self.main_cks = @cookieProc
|
219
|
-
@cookieProc = true
|
220
|
-
end
|
221
|
-
self
|
222
|
-
end
|
223
|
-
|
224
|
-
def to_s
|
225
|
-
str = "<##{self.class.self_name} @ "
|
226
|
-
if @webproxy
|
227
|
-
str << "#{@proxy} ~ "
|
228
|
-
elsif @proxy
|
229
|
-
str << @proxy*':'+" ~ "
|
230
|
-
end
|
231
|
-
str << @root+'>'
|
232
|
-
end
|
233
|
-
alias :inspect :to_s
|
234
|
-
|
235
|
-
def update_res
|
236
|
-
@outdated = false
|
237
|
-
@res = @http.res
|
238
|
-
@headers = nil
|
239
|
-
@res
|
240
|
-
end
|
241
|
-
|
242
|
-
def res
|
243
|
-
if @res && !@outdated
|
244
|
-
@res
|
245
|
-
else update_res end
|
246
|
-
end
|
247
|
-
|
248
|
-
def req; res.req end
|
249
|
-
|
250
|
-
def dump
|
251
|
-
str = "IP: #{@proxystr}\nRequest: "
|
252
|
-
str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
|
253
|
-
str << "\nReady" if @ready
|
254
|
-
str
|
255
|
-
end
|
256
|
-
|
257
|
-
def fix(path)
|
258
|
-
path = path.tr ' ', '+'
|
259
|
-
path = expand path if path =~ /^\./
|
260
|
-
if update(path) or @uri.root
|
261
|
-
path = @webproxy.encode(path) if @webproxy
|
262
|
-
else
|
263
|
-
path = @webproxy.encode(@root+path) if @webproxy
|
264
|
-
end
|
265
|
-
path
|
266
|
-
end
|
267
|
-
|
268
|
-
def expand(uri)
|
269
|
-
if !@webproxy || @http.last_effective_url
|
270
|
-
path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
|
271
|
-
return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
|
272
|
-
end
|
273
|
-
uri
|
274
|
-
end
|
275
|
-
|
276
|
-
def mkBody(params, multipart=nil)
|
277
|
-
if multipart
|
278
|
-
@http.multipart_post_body = params.map {|k, v|
|
279
|
-
v = v.call if v.is Proc
|
280
|
-
if k =~ /^f:/
|
281
|
-
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
282
|
-
"#{randstr(16, :hex)}.jpg", v+randstr )
|
283
|
-
elsif k =~ /^p:/
|
284
|
-
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
285
|
-
File.basename(f), read(v) )
|
286
|
-
else
|
287
|
-
Curl::PostField.content(k.to_s, v.to_s)
|
288
|
-
end
|
289
|
-
}
|
290
|
-
else
|
291
|
-
@http.post_body = params.urlencode
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
295
|
-
def mkHeader(uri)
|
296
|
-
header = DefaultHeader.dup
|
297
|
-
if @cookieProc
|
298
|
-
cookies = ''
|
299
|
-
main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
|
300
|
-
header['Cookie'] = cookies[0..-3]
|
301
|
-
end
|
302
|
-
if @refforge
|
303
|
-
ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
|
304
|
-
header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]
|
305
|
-
end
|
306
|
-
header['User-Agent'] = @ua == :rand ? UAS.rand : @ua if @ua
|
307
|
-
header
|
308
|
-
end
|
309
|
-
|
310
|
-
def ProcCookies(res)
|
311
|
-
ck = []
|
312
|
-
case res
|
313
|
-
when String
|
314
|
-
res.split(/\r?\n/).each {|h|
|
315
|
-
hs = h/': '
|
316
|
-
ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
|
317
|
-
}
|
318
|
-
when Curl::Response
|
319
|
-
ck = res['cookies']
|
320
|
-
end
|
321
|
-
return if !ck.b
|
322
|
-
ck.each {|c| Cookie(c, self)}
|
323
|
-
# StoreCookies if @cookieStore
|
324
|
-
end
|
325
|
-
|
326
|
-
def cp_on() @cookieProc = true end
|
327
|
-
def cp_off() @cookieProc = false end
|
328
|
-
|
329
|
-
def main_cks() @cookies[@uri.host] ||= {} end
|
330
|
-
def main_cks=(cks)
|
331
|
-
@cookies[@uri.host] = @webproxy ?
|
332
|
-
@webproxy.ck_encode(@root, cks) :
|
333
|
-
cks.map2 {|k, v| Cookie(k, v)}
|
334
|
-
end
|
335
|
-
|
336
|
-
def retry?(err)
|
337
|
-
# exc = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
|
338
|
-
exc = (@@retry.keys + @retry.keys).select_in @root
|
339
|
-
return false if !exc.b
|
340
|
-
# ['www.nomer.org', 'nomer.org'].every {|www| 'TimeoutError'.in({'nomer.org' => 'TimeoutError'}[www])} ?
|
341
|
-
exc.no? {|e| err[0].self_name.in((@@retry[e] || []) + @retry[e])}
|
342
|
-
end
|
343
|
-
|
344
|
-
def loaded?
|
345
|
-
$Carier.reqs.include? @http
|
346
|
-
end
|
347
|
-
|
348
|
-
def load!
|
349
|
-
unless $Carier.add @http
|
350
|
-
$Carier.remove @http
|
351
|
-
$Carier.add @http
|
352
|
-
end
|
353
|
-
rescue RuntimeError => e
|
354
|
-
e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
355
|
-
raise e
|
356
|
-
end
|
357
|
-
|
358
|
-
def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
|
359
|
-
@http.path = path = fix(path)
|
360
|
-
@http.headers = mkHeader(path).merge!(headers)
|
361
|
-
@http.timeout = @timeout
|
362
|
-
|
363
|
-
@http.on_complete {|c|
|
364
|
-
@error = nil
|
365
|
-
@outdated = true
|
366
|
-
ProcCookies c.res if @cookieProc
|
367
|
-
# We cannot just cancel on_complete in on_redirect block
|
368
|
-
# because loadGet will immediately reset on_complete back
|
369
|
-
if c.res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = c.res.hash.location
|
370
|
-
loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
|
371
|
-
elsif block_given?
|
372
|
-
yield c
|
373
|
-
end
|
374
|
-
}
|
375
|
-
@http.on_failure {|c, e|
|
376
|
-
@http.on_complete &Proc::NULL
|
377
|
-
@outdated = true
|
378
|
-
@error = e
|
379
|
-
if retry? e
|
380
|
-
L.debug "#{e[0]} -> reloading scout"
|
381
|
-
#load uri, headers, not_redir, relvl, &callback
|
382
|
-
load! # all params including post_body are still set
|
383
|
-
else
|
384
|
-
L.debug "#{e[0]} -> not reloading scout"
|
385
|
-
raise *e if @raise_err
|
386
|
-
end
|
387
|
-
} if !@http.on_failure
|
388
|
-
|
389
|
-
load!
|
390
|
-
end
|
391
|
-
|
392
|
-
def loadPost(*argv, &callback)
|
393
|
-
hash, multipart, uri, opts = argv.get_opts [@body, false, @path],
|
394
|
-
:headers => {}, :redir => false, :relvl => 2
|
395
|
-
mkBody hash, multipart.b
|
396
|
-
@last_method = :post
|
397
|
-
if block_given?
|
398
|
-
@post_proc = callback
|
399
|
-
else#if @http.callback != @post_proc
|
400
|
-
callback = @post_proc
|
401
|
-
end
|
402
|
-
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
403
|
-
end
|
404
|
-
|
405
|
-
def loadGet(*argv, &callback)
|
406
|
-
uri, opts = argv.get_opts [@path],
|
407
|
-
:headers => {}, :redir => false, :relvl => 2
|
408
|
-
@http.get = true
|
409
|
-
@last_method = :get
|
410
|
-
if block_given?
|
411
|
-
@get_proc = callback
|
412
|
-
else#if @http.callback != @get_proc
|
413
|
-
callback = @get_proc
|
414
|
-
end
|
415
|
-
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
416
|
-
end
|
417
|
-
|
418
|
-
def loadHead(*argv, &callback)
|
419
|
-
uri, emulate, headers = argv.get_opts [@path, :if_retry]
|
420
|
-
@http.head = true if emulate != :always
|
421
|
-
@last_method = :head
|
422
|
-
if block_given?
|
423
|
-
@head_proc = callback
|
424
|
-
else#if @http.callback != @head_proc
|
425
|
-
callback = @head_proc
|
426
|
-
end
|
427
|
-
emu = lambda {
|
428
|
-
@headers = ''
|
429
|
-
@http.on_header {|h|
|
430
|
-
@headers << h
|
431
|
-
h == "\r\n" ? 0 : h.size
|
432
|
-
}
|
433
|
-
@http.get = true
|
434
|
-
load(uri, headers) {|c| c.on_header; callback[c]}
|
435
|
-
}
|
436
|
-
if emulate != :always
|
437
|
-
load(uri, headers) {|c|
|
438
|
-
if !@error and c.res.code != 200 and emulate == :if_retry
|
439
|
-
emu.call
|
440
|
-
else
|
441
|
-
callback[c]
|
442
|
-
end
|
443
|
-
}
|
444
|
-
else emu.call
|
445
|
-
end
|
446
|
-
end
|
447
|
-
|
448
|
-
end
|
449
|
-
|
450
|
-
class PickError < IndexError
|
451
|
-
def initialize
|
452
|
-
super "can't get scout from empty squad" end
|
453
|
-
end
|
454
|
-
|
455
|
-
class ScoutSquad < Array
|
456
|
-
__init__
|
457
|
-
|
458
|
-
def initialize(*args)
|
459
|
-
raise ArgumentError, "can't create empty squad" if (num = args.pop) < 1
|
460
|
-
proxies = nil
|
461
|
-
super []
|
462
|
-
if args[0].is Scout
|
463
|
-
s = args[0]
|
464
|
-
else
|
465
|
-
if !args[0].is String
|
466
|
-
args.unshift ''
|
467
|
-
if (opts = args[-1]).is Hash and (opts[:cp] || opts[:ck]).is Hash
|
468
|
-
L.warn "it's useless to setup cookies for untargeted squad!"
|
469
|
-
end
|
470
|
-
end
|
471
|
-
if args[1] and args[1][0].is Array
|
472
|
-
proxies = args[1]
|
473
|
-
args[1] = proxies.shift
|
474
|
-
end
|
475
|
-
self[0] = s = Scout(*args)
|
476
|
-
num -=1
|
477
|
-
end
|
478
|
-
num.times {|i|
|
479
|
-
self << Scout(s.root+s.path, (proxies ? proxies[i] : s.proxy), s.ua, s.refforge, :ck => s.main_cks, :raise => s.raise_err, :timeout => s.timeout, :retry => s.retry)
|
480
|
-
}
|
481
|
-
end
|
482
|
-
|
483
|
-
def update uri, forced=nil
|
484
|
-
each {|s| return L.warn "failed to update scout loaded? with url: #{s.http.url}" if s.loaded?} if !forced
|
485
|
-
each {|s| s.update uri}
|
486
|
-
end
|
487
|
-
|
488
|
-
def untargeted
|
489
|
-
first.root == 'http://'
|
490
|
-
end
|
491
|
-
|
492
|
-
def rand
|
493
|
-
raise PickError if !b
|
494
|
-
# to_a because reject returns object of this class
|
495
|
-
if scout = to_a.rand {|_|!_.loaded?}; scout
|
496
|
-
else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
|
497
|
-
raise "Curl must run in order to use ScoutSquad#rand" if !Curl.status
|
498
|
-
#Curl.wait
|
499
|
-
loop {sleep 1; break if $Carier.reqs.size < size}
|
500
|
-
self.rand
|
501
|
-
end
|
502
|
-
end
|
503
|
-
|
504
|
-
def next
|
505
|
-
raise PickError if !b
|
506
|
-
if scout = find {|_|!_.loaded?}; scout
|
507
|
-
else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
|
508
|
-
raise "Curl must run in order to use ScoutSquad#next" if !Curl.status
|
509
|
-
#Curl.wait
|
510
|
-
loop {sleep 1; break if $Carier.reqs.size < size}
|
511
|
-
self.next
|
512
|
-
end
|
513
|
-
end
|
514
|
-
|
515
|
-
def to_s
|
516
|
-
str = '<#ScoutSquad @ '
|
517
|
-
if b
|
518
|
-
if first.webproxy
|
519
|
-
str << "#{first.proxy} ~ "
|
520
|
-
elsif first.proxy
|
521
|
-
str << first.proxy*':'+" ~ "
|
522
|
-
end
|
523
|
-
str << "#{untargeted ? "no target" : first.root} "
|
524
|
-
end
|
525
|
-
str << "x#{size}>"
|
526
|
-
end
|
527
|
-
alias :inspect :to_s
|
528
|
-
|
529
|
-
end
|
530
|
-
|
531
|
-
end
|
532
|
-
|
533
|
-
### Global scope shortcut methods ###
|
534
|
-
|
535
|
-
module RMTools
|
536
|
-
|
537
|
-
def Get(uri, opts={})
|
538
|
-
raise ArgumentError, "Local uri passed to Get function" if uri[0,1] == '/'
|
539
|
-
$log.debug "Protocol-less uri passed to Get function" if !uri[/^\w+:\/\//]
|
540
|
-
headers = opts[:headers] || opts[:h] || {}
|
541
|
-
proxy = opts[:proxy] || opts[:pr] || $CurlGetProxy
|
542
|
-
ret_body = opts.fetch(:ret_body, opts.fetch(:b, 1)).b
|
543
|
-
wait = opts.fetch(:wait, opts.fetch(:w, !block_given?)).b
|
544
|
-
s = HTTPAccessKit::Scout(uri, proxy, opts)
|
545
|
-
buf = ret_body ? '' : s.http.res
|
546
|
-
s.raise_err ||= opts[:e]
|
547
|
-
s.http.timeout ||= opts[:t]
|
548
|
-
s.loadGet(headers) {|c|
|
549
|
-
if ret_body
|
550
|
-
buf << c.body_str
|
551
|
-
else
|
552
|
-
buf.load_from c.res
|
553
|
-
end
|
554
|
-
yield buf if block_given?
|
555
|
-
}
|
556
|
-
if wait
|
557
|
-
($CarierThread and $CarierThread.status) ? Curl.wait : $Carier.perform
|
558
|
-
end
|
559
|
-
buf
|
560
|
-
end
|
561
|
-
module_function :Get
|
562
|
-
|
563
|
-
end
|
564
|
-
|
565
|
-
module Enumerable
|
566
|
-
|
567
|
-
def GetAll(on_count=nil, default_domain=nil, &callback)
|
568
|
-
if on_count
|
569
|
-
len = size
|
570
|
-
counter = 0
|
571
|
-
send(resto(:each_value) ? :each_value : :each) {|uri|
|
572
|
-
uri = File.join(default_domain, uri) if default_domain and (uri[0,1] == '/' or !uri[/^https?:/])
|
573
|
-
Get(uri) {|buf|
|
574
|
-
callback.arity > 1 ?
|
575
|
-
callback.call(buf, counter) :
|
576
|
-
callback.call(buf)
|
577
|
-
if (counter += 1) == len
|
578
|
-
on_count.arity > 0 ?
|
579
|
-
on_count.call(buf) :
|
580
|
-
on_count.call
|
581
|
-
end
|
582
|
-
}
|
583
|
-
}
|
584
|
-
else send(resto(:each_value) ? :each_value : :each) {|uri|
|
585
|
-
Get(uri, &callback) }
|
586
|
-
end
|
587
|
-
end
|
588
|
-
|
589
|
-
end
|