rhack 0.4.1 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
data/lib/init.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module HTTPAccessKit
|
3
|
-
include RMTools
|
4
|
-
extend RMTools
|
5
|
-
|
6
|
-
Dir.chdir ENV['APP_ROOT'] if ENV['APP_ROOT']
|
7
|
-
|
8
|
-
CONFIG = if defined? Rails
|
9
|
-
YAML.load(read('config/rhack.yml') || '') || {}
|
10
|
-
else
|
11
|
-
YAML.load(read(%W(config/rhack.yml rhack.yml #{File.join(ENV['HOME'], 'rhack.yml')})) || '') || {}
|
12
|
-
end
|
13
|
-
|
14
|
-
UAS = if File.file?(uas = CONFIG['ua file'] || File.join(ENV['HOME'], 'ua.txt'))
|
15
|
-
IO.read(uas)/"\n"
|
16
|
-
else ['Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1'] end
|
17
|
-
|
18
|
-
L = RMLogger.new(CONFIG.logger || {})
|
19
|
-
|
20
|
-
db_config = if defined? Rails
|
21
|
-
YAML.load(read('config/database.yml'))[ENV["RAILS_ENV"]]
|
22
|
-
else
|
23
|
-
CONFIG.db || File.join(ENV['HOME'], 'db.yml')
|
24
|
-
end
|
25
|
-
begin
|
26
|
-
DB = ActiveRecord::Base.establish_connection_with db_config
|
27
|
-
rescue LoadError
|
28
|
-
DB = nil
|
29
|
-
end
|
30
|
-
if DB and !(CONFIG.cache and CONFIG.cache.enabled == false)
|
31
|
-
cache = CONFIG.cache || {}
|
32
|
-
CacheDir = cache.dir || File.join(File.dirname(__FILE__), 'cache')
|
33
|
-
CacheTable = (cache.table || :rhack_cache).to_sym
|
34
|
-
CacheTTL = cache.clean ? eval(cache.clean).b : nil
|
35
|
-
end
|
36
|
-
|
37
|
-
RETRY = CONFIG['scout retry'] || {}
|
38
|
-
|
39
|
-
$uas ||= UAS
|
40
|
-
$Carier ||= Curl::Multi.new
|
41
|
-
$Carier.pipeline = true
|
42
|
-
|
43
|
-
def self.update
|
44
|
-
each_child {|c| c.class_eval "include HTTPAccessKit; extend HTTPAccessKit" if !c.in c.children}
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
module Curl; extend HTTPAccessKit end
|
49
|
-
RHACK = HTTPAccessKit
|
data/lib/rhack.yml.template
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#ua file: db/useragents.txt
|
2
|
-
#logger:
|
3
|
-
# :out: log/rhack.log
|
4
|
-
#scout retry: {host => [Curl::Error subclass, ], }
|
5
|
-
# example.com:
|
6
|
-
# - TimeoutError
|
7
|
-
#db: # defaults file @ to RAILS_PATH/config/RAILS_ENV.yml
|
8
|
-
# reconnect: true
|
9
|
-
# encoding: utf8
|
10
|
-
# username: root
|
11
|
-
# adapter: mysql
|
12
|
-
# database: dbname
|
13
|
-
# pool: 5
|
14
|
-
# password:
|
15
|
-
# socket: /var/run/mysqld/mysqld.sock
|
16
|
-
#cache: # deprecated
|
17
|
-
# dir: /path/to/cache/dir
|
18
|
-
# table: hack_cache
|
19
|
-
# clean: 30.days
|
data/lib/scout.rb
DELETED
@@ -1,589 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module Curl
|
3
|
-
|
4
|
-
def ITT
|
5
|
-
res = nil
|
6
|
-
HTTPAccessKit::Scout('file://').loadGet(__FILE__) {|c| res = yield}
|
7
|
-
loop {if res then break res else sleep 0.01 end}
|
8
|
-
end
|
9
|
-
module_function :ITT
|
10
|
-
|
11
|
-
class Response
|
12
|
-
__init__
|
13
|
-
attr_reader :header, :code, :body, :hash, :timestamp, :time, :req, :date, :error
|
14
|
-
|
15
|
-
def to_s
|
16
|
-
str = '<#'
|
17
|
-
if @error
|
18
|
-
str << "#{@error[0].self_name}: #{@error[1]}"
|
19
|
-
else
|
20
|
-
str << (@header[/\d{3}/] == @code.to_s ? @header : "#{@header[/\S+/]} #{@code}") if @header
|
21
|
-
if @hash.location
|
22
|
-
str << ' '+@req.url if $panic
|
23
|
-
str << ' -> '+@hash.location
|
24
|
-
end
|
25
|
-
str << " (#{@body ? @body.size.bytes : 'No'} Body)"
|
26
|
-
str << " [#{@timestamp}]" if @timestamp
|
27
|
-
end
|
28
|
-
str << '>'
|
29
|
-
end
|
30
|
-
alias :inspect :to_s
|
31
|
-
|
32
|
-
def initialize(easy)
|
33
|
-
@hash = {}
|
34
|
-
@timestamp = @date = @header = nil
|
35
|
-
if easy.base.error
|
36
|
-
@error = easy.base.error
|
37
|
-
else
|
38
|
-
if headers = easy.header_str || easy.base.headers
|
39
|
-
headers /= "\r\n"
|
40
|
-
@header = headers.shift
|
41
|
-
headers.each {|h|
|
42
|
-
h /= ': '
|
43
|
-
if h[0]
|
44
|
-
h[0].downcase!
|
45
|
-
if h[0] == 'set-cookie'
|
46
|
-
(@hash.cookies ||= []) << h[1]
|
47
|
-
else
|
48
|
-
@hash[h[0]] = h[1]
|
49
|
-
end
|
50
|
-
end
|
51
|
-
}
|
52
|
-
@timestamp = if @hash.date
|
53
|
-
begin
|
54
|
-
@date = @hash.date.to_time
|
55
|
-
rescue => e
|
56
|
-
(@date = Time.now).strftime("%H:%M:%S")
|
57
|
-
L < "Error #{e.class}:#{e.message} with @hash.date = #{@hash.date.inspect}"
|
58
|
-
end
|
59
|
-
@hash.date[/\d\d:\d\d:\d\d/]
|
60
|
-
else
|
61
|
-
(@date = Time.now).strftime("%H:%M:%S")
|
62
|
-
end
|
63
|
-
end
|
64
|
-
@code = easy.response_code
|
65
|
-
@body = easy.body_str
|
66
|
-
@time = easy.total_time
|
67
|
-
end
|
68
|
-
|
69
|
-
@req = {}
|
70
|
-
@req.url = easy.last_effective_url
|
71
|
-
@req.headers = easy.headers
|
72
|
-
if range = easy.headers.Range and range[/(\d+)-(\d+)/]
|
73
|
-
@req.range = $1.to_i .. $2.to_i
|
74
|
-
end
|
75
|
-
if easy.base and @req.meth = easy.base.last_method and @req.meth == :post
|
76
|
-
@req.body = easy.post_body
|
77
|
-
@req.mp = easy.multipart_form_post?
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def is(klass)
|
82
|
-
if @error
|
83
|
-
klass == Array || klass = Curl::Response
|
84
|
-
else
|
85
|
-
klass == Curl::Response
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def [](key_or_index)
|
90
|
-
@error ? @error[key_or_index] : @hash[key_or_index.downcase]
|
91
|
-
end
|
92
|
-
|
93
|
-
alias :headers :hash
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
module HTTPAccessKit
|
99
|
-
|
100
|
-
class Cookie
|
101
|
-
__init__
|
102
|
-
|
103
|
-
def initialize(*args)
|
104
|
-
if args[1].is Scout
|
105
|
-
str, scout = *args
|
106
|
-
ck = str//;\s*/
|
107
|
-
ck[1..-1].each {|par|
|
108
|
-
a = par/'='
|
109
|
-
case a[0].downcase
|
110
|
-
when 'path'; @path = (a[1] == '/') ? // : /^#{Regexp.escape a[1]}/
|
111
|
-
when 'domain'; @domain = /(^|\.)#{Regexp.escape a[1].sub(/^./, '')}$/
|
112
|
-
when 'expires'; @expires = a[1].to_time
|
113
|
-
end
|
114
|
-
}
|
115
|
-
@name, @value = ck[0].split('=', 2)
|
116
|
-
#@value.gsub!(/^['"]|['"]$/, '')
|
117
|
-
#L.debug args if !@domain
|
118
|
-
(scout.cookies[scout.uri.host] ||= {})[@name] = self
|
119
|
-
else
|
120
|
-
@name, cookie = args[0]
|
121
|
-
case cookie
|
122
|
-
when Array; @value, @path, @domain = cookie
|
123
|
-
when Hash; @value, @path, @domain = cookie.value, cookie.path, cookie.domain
|
124
|
-
else @value = args[1].to_s
|
125
|
-
end
|
126
|
-
end
|
127
|
-
@path ||= //
|
128
|
-
@domain ||= //
|
129
|
-
@string = "#{@name}=#{@value}; "
|
130
|
-
end
|
131
|
-
|
132
|
-
def use(str, uri)
|
133
|
-
if !@expires or @expires > Time.now
|
134
|
-
str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
|
135
|
-
else
|
136
|
-
:expired
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
def to_s; @value end
|
141
|
-
def inspect; @value.inspect end
|
142
|
-
|
143
|
-
end
|
144
|
-
|
145
|
-
class Scout
|
146
|
-
__init__
|
147
|
-
attr_accessor :timeout, :raise_err, :retry
|
148
|
-
attr_accessor :path, :root, :sld, :proxy
|
149
|
-
attr_reader :uri
|
150
|
-
attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
|
151
|
-
attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
|
152
|
-
|
153
|
-
DefaultHeader = {
|
154
|
-
"Expect" => "",
|
155
|
-
"Keep-Alive" => "300",
|
156
|
-
"Accept-Charset" => "windows-1251,utf-8;q=0.7,*;q=0.7",
|
157
|
-
"Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
|
158
|
-
"Connection" => "keep-alive"
|
159
|
-
}
|
160
|
-
|
161
|
-
class ProxyError < ArgumentError
|
162
|
-
def initialize proxy
|
163
|
-
super "incorrect proxy: %s class %s, must be an Array
|
164
|
-
proxy format: ['127.0.0.1', '80'], [2130706433, 80], ['someproxy.com', :WebproxyModule]"%[proxy.inspect, proxy.class]
|
165
|
-
end
|
166
|
-
end
|
167
|
-
@@retry = RETRY
|
168
|
-
|
169
|
-
def initialize(*argv)
|
170
|
-
uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
|
171
|
-
raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
|
172
|
-
'http://' >> uri if uri !~ /^\w+:\/\//
|
173
|
-
if proxy
|
174
|
-
if proxy[1] and proxy[1].to_i == 0
|
175
|
-
@webproxy = eval("WebProxy::#{proxy[1]}")
|
176
|
-
@proxy = proxy[0].parse(:uri).root
|
177
|
-
else
|
178
|
-
proxy[0] = proxy[0].to_ip if proxy[0].is Integer
|
179
|
-
@proxy = proxy
|
180
|
-
end
|
181
|
-
end
|
182
|
-
@cookies = {}
|
183
|
-
@body = {}
|
184
|
-
@num = []
|
185
|
-
@cookieProc = opts[:cp] || opts[:ck]
|
186
|
-
@raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
|
187
|
-
@engine = opts[:engine]
|
188
|
-
@timeout = opts[:timeout] || $CurlDefaultTimeout || 60
|
189
|
-
@post_proc = @get_proc = @head_proc = Proc::NULL
|
190
|
-
update uri
|
191
|
-
@retry = opts[:retry] || {}
|
192
|
-
@retry = {@uri.host => @retry} if @retry.is Array
|
193
|
-
end
|
194
|
-
|
195
|
-
def update(uri)
|
196
|
-
if !uri[/^\w+:\/\//]
|
197
|
-
'/' >> uri if uri[0,1] != '/'
|
198
|
-
@uri = uri.parse:uri
|
199
|
-
return
|
200
|
-
end
|
201
|
-
@uri = uri.parse:uri
|
202
|
-
return if @uri.root == @root
|
203
|
-
@root = @uri.root
|
204
|
-
@sld = @root[/[\w-]+\.[a-z]+$/]
|
205
|
-
@path = @uri.fullpath
|
206
|
-
if @http
|
207
|
-
@http.url = @webproxy ? @proxy : @root
|
208
|
-
else
|
209
|
-
@http = Curl::Easy(@webproxy ? @proxy : @root)
|
210
|
-
@http.base = self
|
211
|
-
end
|
212
|
-
if @proxy
|
213
|
-
@http.proxy_url = @proxy*':' if !@webproxy
|
214
|
-
@proxystr = @webproxy ? @proxy[0] : @http.proxy_url
|
215
|
-
else @proxystr = 'localhost'
|
216
|
-
end
|
217
|
-
if @cookieProc.is Hash
|
218
|
-
self.main_cks = @cookieProc
|
219
|
-
@cookieProc = true
|
220
|
-
end
|
221
|
-
self
|
222
|
-
end
|
223
|
-
|
224
|
-
def to_s
|
225
|
-
str = "<##{self.class.self_name} @ "
|
226
|
-
if @webproxy
|
227
|
-
str << "#{@proxy} ~ "
|
228
|
-
elsif @proxy
|
229
|
-
str << @proxy*':'+" ~ "
|
230
|
-
end
|
231
|
-
str << @root+'>'
|
232
|
-
end
|
233
|
-
alias :inspect :to_s
|
234
|
-
|
235
|
-
def update_res
|
236
|
-
@outdated = false
|
237
|
-
@res = @http.res
|
238
|
-
@headers = nil
|
239
|
-
@res
|
240
|
-
end
|
241
|
-
|
242
|
-
def res
|
243
|
-
if @res && !@outdated
|
244
|
-
@res
|
245
|
-
else update_res end
|
246
|
-
end
|
247
|
-
|
248
|
-
def req; res.req end
|
249
|
-
|
250
|
-
def dump
|
251
|
-
str = "IP: #{@proxystr}\nRequest: "
|
252
|
-
str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
|
253
|
-
str << "\nReady" if @ready
|
254
|
-
str
|
255
|
-
end
|
256
|
-
|
257
|
-
def fix(path)
|
258
|
-
path = path.tr ' ', '+'
|
259
|
-
path = expand path if path =~ /^\./
|
260
|
-
if update(path) or @uri.root
|
261
|
-
path = @webproxy.encode(path) if @webproxy
|
262
|
-
else
|
263
|
-
path = @webproxy.encode(@root+path) if @webproxy
|
264
|
-
end
|
265
|
-
path
|
266
|
-
end
|
267
|
-
|
268
|
-
def expand(uri)
|
269
|
-
if !@webproxy || @http.last_effective_url
|
270
|
-
path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
|
271
|
-
return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
|
272
|
-
end
|
273
|
-
uri
|
274
|
-
end
|
275
|
-
|
276
|
-
def mkBody(params, multipart=nil)
|
277
|
-
if multipart
|
278
|
-
@http.multipart_post_body = params.map {|k, v|
|
279
|
-
v = v.call if v.is Proc
|
280
|
-
if k =~ /^f:/
|
281
|
-
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
282
|
-
"#{randstr(16, :hex)}.jpg", v+randstr )
|
283
|
-
elsif k =~ /^p:/
|
284
|
-
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
285
|
-
File.basename(f), read(v) )
|
286
|
-
else
|
287
|
-
Curl::PostField.content(k.to_s, v.to_s)
|
288
|
-
end
|
289
|
-
}
|
290
|
-
else
|
291
|
-
@http.post_body = params.urlencode
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
295
|
-
def mkHeader(uri)
|
296
|
-
header = DefaultHeader.dup
|
297
|
-
if @cookieProc
|
298
|
-
cookies = ''
|
299
|
-
main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
|
300
|
-
header['Cookie'] = cookies[0..-3]
|
301
|
-
end
|
302
|
-
if @refforge
|
303
|
-
ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
|
304
|
-
header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]
|
305
|
-
end
|
306
|
-
header['User-Agent'] = @ua == :rand ? UAS.rand : @ua if @ua
|
307
|
-
header
|
308
|
-
end
|
309
|
-
|
310
|
-
def ProcCookies(res)
|
311
|
-
ck = []
|
312
|
-
case res
|
313
|
-
when String
|
314
|
-
res.split(/\r?\n/).each {|h|
|
315
|
-
hs = h/': '
|
316
|
-
ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
|
317
|
-
}
|
318
|
-
when Curl::Response
|
319
|
-
ck = res['cookies']
|
320
|
-
end
|
321
|
-
return if !ck.b
|
322
|
-
ck.each {|c| Cookie(c, self)}
|
323
|
-
# StoreCookies if @cookieStore
|
324
|
-
end
|
325
|
-
|
326
|
-
def cp_on() @cookieProc = true end
|
327
|
-
def cp_off() @cookieProc = false end
|
328
|
-
|
329
|
-
def main_cks() @cookies[@uri.host] ||= {} end
|
330
|
-
def main_cks=(cks)
|
331
|
-
@cookies[@uri.host] = @webproxy ?
|
332
|
-
@webproxy.ck_encode(@root, cks) :
|
333
|
-
cks.map2 {|k, v| Cookie(k, v)}
|
334
|
-
end
|
335
|
-
|
336
|
-
def retry?(err)
|
337
|
-
# exc = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
|
338
|
-
exc = (@@retry.keys + @retry.keys).select_in @root
|
339
|
-
return false if !exc.b
|
340
|
-
# ['www.nomer.org', 'nomer.org'].every {|www| 'TimeoutError'.in({'nomer.org' => 'TimeoutError'}[www])} ?
|
341
|
-
exc.no? {|e| err[0].self_name.in((@@retry[e] || []) + @retry[e])}
|
342
|
-
end
|
343
|
-
|
344
|
-
def loaded?
|
345
|
-
$Carier.reqs.include? @http
|
346
|
-
end
|
347
|
-
|
348
|
-
def load!
|
349
|
-
unless $Carier.add @http
|
350
|
-
$Carier.remove @http
|
351
|
-
$Carier.add @http
|
352
|
-
end
|
353
|
-
rescue RuntimeError => e
|
354
|
-
e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
355
|
-
raise e
|
356
|
-
end
|
357
|
-
|
358
|
-
def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
|
359
|
-
@http.path = path = fix(path)
|
360
|
-
@http.headers = mkHeader(path).merge!(headers)
|
361
|
-
@http.timeout = @timeout
|
362
|
-
|
363
|
-
@http.on_complete {|c|
|
364
|
-
@error = nil
|
365
|
-
@outdated = true
|
366
|
-
ProcCookies c.res if @cookieProc
|
367
|
-
# We cannot just cancel on_complete in on_redirect block
|
368
|
-
# because loadGet will immediately reset on_complete back
|
369
|
-
if c.res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = c.res.hash.location
|
370
|
-
loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
|
371
|
-
elsif block_given?
|
372
|
-
yield c
|
373
|
-
end
|
374
|
-
}
|
375
|
-
@http.on_failure {|c, e|
|
376
|
-
@http.on_complete &Proc::NULL
|
377
|
-
@outdated = true
|
378
|
-
@error = e
|
379
|
-
if retry? e
|
380
|
-
L.debug "#{e[0]} -> reloading scout"
|
381
|
-
#load uri, headers, not_redir, relvl, &callback
|
382
|
-
load! # all params including post_body are still set
|
383
|
-
else
|
384
|
-
L.debug "#{e[0]} -> not reloading scout"
|
385
|
-
raise *e if @raise_err
|
386
|
-
end
|
387
|
-
} if !@http.on_failure
|
388
|
-
|
389
|
-
load!
|
390
|
-
end
|
391
|
-
|
392
|
-
def loadPost(*argv, &callback)
|
393
|
-
hash, multipart, uri, opts = argv.get_opts [@body, false, @path],
|
394
|
-
:headers => {}, :redir => false, :relvl => 2
|
395
|
-
mkBody hash, multipart.b
|
396
|
-
@last_method = :post
|
397
|
-
if block_given?
|
398
|
-
@post_proc = callback
|
399
|
-
else#if @http.callback != @post_proc
|
400
|
-
callback = @post_proc
|
401
|
-
end
|
402
|
-
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
403
|
-
end
|
404
|
-
|
405
|
-
def loadGet(*argv, &callback)
|
406
|
-
uri, opts = argv.get_opts [@path],
|
407
|
-
:headers => {}, :redir => false, :relvl => 2
|
408
|
-
@http.get = true
|
409
|
-
@last_method = :get
|
410
|
-
if block_given?
|
411
|
-
@get_proc = callback
|
412
|
-
else#if @http.callback != @get_proc
|
413
|
-
callback = @get_proc
|
414
|
-
end
|
415
|
-
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
416
|
-
end
|
417
|
-
|
418
|
-
def loadHead(*argv, &callback)
|
419
|
-
uri, emulate, headers = argv.get_opts [@path, :if_retry]
|
420
|
-
@http.head = true if emulate != :always
|
421
|
-
@last_method = :head
|
422
|
-
if block_given?
|
423
|
-
@head_proc = callback
|
424
|
-
else#if @http.callback != @head_proc
|
425
|
-
callback = @head_proc
|
426
|
-
end
|
427
|
-
emu = lambda {
|
428
|
-
@headers = ''
|
429
|
-
@http.on_header {|h|
|
430
|
-
@headers << h
|
431
|
-
h == "\r\n" ? 0 : h.size
|
432
|
-
}
|
433
|
-
@http.get = true
|
434
|
-
load(uri, headers) {|c| c.on_header; callback[c]}
|
435
|
-
}
|
436
|
-
if emulate != :always
|
437
|
-
load(uri, headers) {|c|
|
438
|
-
if !@error and c.res.code != 200 and emulate == :if_retry
|
439
|
-
emu.call
|
440
|
-
else
|
441
|
-
callback[c]
|
442
|
-
end
|
443
|
-
}
|
444
|
-
else emu.call
|
445
|
-
end
|
446
|
-
end
|
447
|
-
|
448
|
-
end
|
449
|
-
|
450
|
-
class PickError < IndexError
|
451
|
-
def initialize
|
452
|
-
super "can't get scout from empty squad" end
|
453
|
-
end
|
454
|
-
|
455
|
-
class ScoutSquad < Array
|
456
|
-
__init__
|
457
|
-
|
458
|
-
def initialize(*args)
|
459
|
-
raise ArgumentError, "can't create empty squad" if (num = args.pop) < 1
|
460
|
-
proxies = nil
|
461
|
-
super []
|
462
|
-
if args[0].is Scout
|
463
|
-
s = args[0]
|
464
|
-
else
|
465
|
-
if !args[0].is String
|
466
|
-
args.unshift ''
|
467
|
-
if (opts = args[-1]).is Hash and (opts[:cp] || opts[:ck]).is Hash
|
468
|
-
L.warn "it's useless to setup cookies for untargeted squad!"
|
469
|
-
end
|
470
|
-
end
|
471
|
-
if args[1] and args[1][0].is Array
|
472
|
-
proxies = args[1]
|
473
|
-
args[1] = proxies.shift
|
474
|
-
end
|
475
|
-
self[0] = s = Scout(*args)
|
476
|
-
num -=1
|
477
|
-
end
|
478
|
-
num.times {|i|
|
479
|
-
self << Scout(s.root+s.path, (proxies ? proxies[i] : s.proxy), s.ua, s.refforge, :ck => s.main_cks, :raise => s.raise_err, :timeout => s.timeout, :retry => s.retry)
|
480
|
-
}
|
481
|
-
end
|
482
|
-
|
483
|
-
def update uri, forced=nil
|
484
|
-
each {|s| return L.warn "failed to update scout loaded? with url: #{s.http.url}" if s.loaded?} if !forced
|
485
|
-
each {|s| s.update uri}
|
486
|
-
end
|
487
|
-
|
488
|
-
def untargeted
|
489
|
-
first.root == 'http://'
|
490
|
-
end
|
491
|
-
|
492
|
-
def rand
|
493
|
-
raise PickError if !b
|
494
|
-
# to_a because reject returns object of this class
|
495
|
-
if scout = to_a.rand {|_|!_.loaded?}; scout
|
496
|
-
else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
|
497
|
-
raise "Curl must run in order to use ScoutSquad#rand" if !Curl.status
|
498
|
-
#Curl.wait
|
499
|
-
loop {sleep 1; break if $Carier.reqs.size < size}
|
500
|
-
self.rand
|
501
|
-
end
|
502
|
-
end
|
503
|
-
|
504
|
-
def next
|
505
|
-
raise PickError if !b
|
506
|
-
if scout = find {|_|!_.loaded?}; scout
|
507
|
-
else # Curl should run here, otherwise `next'/`rand'-recursion will cause stack overflow
|
508
|
-
raise "Curl must run in order to use ScoutSquad#next" if !Curl.status
|
509
|
-
#Curl.wait
|
510
|
-
loop {sleep 1; break if $Carier.reqs.size < size}
|
511
|
-
self.next
|
512
|
-
end
|
513
|
-
end
|
514
|
-
|
515
|
-
def to_s
|
516
|
-
str = '<#ScoutSquad @ '
|
517
|
-
if b
|
518
|
-
if first.webproxy
|
519
|
-
str << "#{first.proxy} ~ "
|
520
|
-
elsif first.proxy
|
521
|
-
str << first.proxy*':'+" ~ "
|
522
|
-
end
|
523
|
-
str << "#{untargeted ? "no target" : first.root} "
|
524
|
-
end
|
525
|
-
str << "x#{size}>"
|
526
|
-
end
|
527
|
-
alias :inspect :to_s
|
528
|
-
|
529
|
-
end
|
530
|
-
|
531
|
-
end
|
532
|
-
|
533
|
-
### Global scope shortcut methods ###
|
534
|
-
|
535
|
-
module RMTools
|
536
|
-
|
537
|
-
def Get(uri, opts={})
|
538
|
-
raise ArgumentError, "Local uri passed to Get function" if uri[0,1] == '/'
|
539
|
-
$log.debug "Protocol-less uri passed to Get function" if !uri[/^\w+:\/\//]
|
540
|
-
headers = opts[:headers] || opts[:h] || {}
|
541
|
-
proxy = opts[:proxy] || opts[:pr] || $CurlGetProxy
|
542
|
-
ret_body = opts.fetch(:ret_body, opts.fetch(:b, 1)).b
|
543
|
-
wait = opts.fetch(:wait, opts.fetch(:w, !block_given?)).b
|
544
|
-
s = HTTPAccessKit::Scout(uri, proxy, opts)
|
545
|
-
buf = ret_body ? '' : s.http.res
|
546
|
-
s.raise_err ||= opts[:e]
|
547
|
-
s.http.timeout ||= opts[:t]
|
548
|
-
s.loadGet(headers) {|c|
|
549
|
-
if ret_body
|
550
|
-
buf << c.body_str
|
551
|
-
else
|
552
|
-
buf.load_from c.res
|
553
|
-
end
|
554
|
-
yield buf if block_given?
|
555
|
-
}
|
556
|
-
if wait
|
557
|
-
($CarierThread and $CarierThread.status) ? Curl.wait : $Carier.perform
|
558
|
-
end
|
559
|
-
buf
|
560
|
-
end
|
561
|
-
module_function :Get
|
562
|
-
|
563
|
-
end
|
564
|
-
|
565
|
-
module Enumerable
|
566
|
-
|
567
|
-
def GetAll(on_count=nil, default_domain=nil, &callback)
|
568
|
-
if on_count
|
569
|
-
len = size
|
570
|
-
counter = 0
|
571
|
-
send(resto(:each_value) ? :each_value : :each) {|uri|
|
572
|
-
uri = File.join(default_domain, uri) if default_domain and (uri[0,1] == '/' or !uri[/^https?:/])
|
573
|
-
Get(uri) {|buf|
|
574
|
-
callback.arity > 1 ?
|
575
|
-
callback.call(buf, counter) :
|
576
|
-
callback.call(buf)
|
577
|
-
if (counter += 1) == len
|
578
|
-
on_count.arity > 0 ?
|
579
|
-
on_count.call(buf) :
|
580
|
-
on_count.call
|
581
|
-
end
|
582
|
-
}
|
583
|
-
}
|
584
|
-
else send(resto(:each_value) ? :each_value : :each) {|uri|
|
585
|
-
Get(uri, &callback) }
|
586
|
-
end
|
587
|
-
end
|
588
|
-
|
589
|
-
end
|