rhack 1.3.1 → 1.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rhack/clients/base.rb +35 -11
- data/lib/rhack/frame.rb +46 -11
- data/lib/rhack/scout.rb +36 -23
- data/lib/rhack/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40e4929e52203b0121c9ca2eac2f289f656ff543
|
4
|
+
data.tar.gz: abb0fe53f2f8e43836473fdc8d7ae517fcd63602
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23e4c2b43ee95212d0c8069ed7e2c731dc1878ec4bd4f7b8984bb0dfea55f1cd496ad161c098d2a0551a49d995a6cf4faca8b0041fc891d9f3fc3b42bfce5243
|
7
|
+
data.tar.gz: b6e3fce288abf2aaa51cf7910b1b99c90e0c673f13bf6541e8a047b496d83fe6d90b5d11379f8d8e6610a9ad94c6d8834d58c9b0c8cfa2fd171d43852791d987
|
data/lib/rhack/clients/base.rb
CHANGED
@@ -11,6 +11,7 @@ module RHACK
|
|
11
11
|
class_attribute :frame_defaults, :instance_writer => false
|
12
12
|
class_attribute :accounts, :instance_writer => false
|
13
13
|
class_attribute :routes, :instance_writer => false
|
14
|
+
class_attribute :rootpath, :instance_writer => false
|
14
15
|
|
15
16
|
self.frame_defaults = {}
|
16
17
|
self.accounts = {}
|
@@ -25,33 +26,50 @@ module RHACK
|
|
25
26
|
}
|
26
27
|
end
|
27
28
|
|
29
|
+
def method_missing(method, *args, &block)
|
30
|
+
if personal_instance_methods.include? method
|
31
|
+
return new.__send__(method, *args, &block)
|
32
|
+
end
|
33
|
+
super
|
34
|
+
end
|
35
|
+
|
28
36
|
private
|
29
37
|
|
38
|
+
def root(value=nil)
|
39
|
+
if value
|
40
|
+
value = 'http://' + value if value !~ /^\w+:/
|
41
|
+
self.rootpath = value
|
42
|
+
else
|
43
|
+
self.rootpath
|
44
|
+
end
|
45
|
+
end
|
46
|
+
alias :host :root
|
47
|
+
|
30
48
|
# Set routes map
|
31
49
|
def map(dict)
|
32
50
|
# URI is deprecated # backward compatibility
|
33
51
|
if defined? URI and URI.is Hash
|
34
52
|
URI.merge! dict.map_hash {|k, v| [k.to_sym, v.freeze]}
|
35
53
|
end
|
36
|
-
routes
|
54
|
+
self.routes += dict.map_hash {|k, v| [k.to_sym, v.freeze]}
|
37
55
|
end
|
38
56
|
|
39
57
|
# Set default Frame options
|
40
58
|
def frame(dict)
|
41
|
-
frame_defaults
|
59
|
+
self.frame_defaults += dict
|
42
60
|
end
|
43
61
|
|
44
62
|
# Set usable accounts
|
45
63
|
# @ dict : {symbol => {symbol => string, ...}}
|
46
64
|
def accounts(dict)
|
47
|
-
accounts
|
65
|
+
self.accounts += dict
|
48
66
|
end
|
49
67
|
|
50
68
|
end
|
51
69
|
|
52
70
|
def initialize(*args)
|
53
|
-
service, opts = args.get_opts [:api]
|
54
|
-
@service = service
|
71
|
+
service, opts = args.get_opts [routes.include?(:api) ? :api : nil]
|
72
|
+
@service = service # Deprectated. Use different classes to implement different services.
|
55
73
|
# first argument should be a string so that frame won't be static
|
56
74
|
if opts.is_a?(Frame)
|
57
75
|
@f = opts
|
@@ -60,9 +78,13 @@ module RHACK
|
|
60
78
|
if self.class.const_defined? :Result
|
61
79
|
opts[:result] = self.class::Result
|
62
80
|
end
|
63
|
-
@f = Frame(route(service) || route(:login), opts)
|
81
|
+
@f = Frame(rootpath || route(service) || route(:login), opts)
|
64
82
|
end
|
65
83
|
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
"<##{self.class.name}#{":#{@service.to_s.camelize} service" if @service} via #{@f.inspect}>"
|
87
|
+
end
|
66
88
|
|
67
89
|
|
68
90
|
# Usable only for sync requests
|
@@ -91,15 +113,17 @@ module RHACK
|
|
91
113
|
@f.get(url) {|next_page| scrape!(next_page)}
|
92
114
|
end
|
93
115
|
end
|
94
|
-
|
95
|
-
def inspect
|
96
|
-
"<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
|
97
|
-
end
|
116
|
+
|
98
117
|
|
99
118
|
# shortcuts to class variables #
|
100
119
|
|
101
120
|
def route(name)
|
102
|
-
routes[name]
|
121
|
+
if url = routes[name]
|
122
|
+
if url !~ /^\w+:/
|
123
|
+
url = File.join rootpath, url
|
124
|
+
end
|
125
|
+
url
|
126
|
+
end
|
103
127
|
end
|
104
128
|
alias :url :route
|
105
129
|
# URI is deprecated # backward compatibility
|
data/lib/rhack/frame.rb
CHANGED
@@ -90,17 +90,36 @@ module RHACK
|
|
90
90
|
|
91
91
|
def inspect
|
92
92
|
sssize = @ss.size
|
93
|
-
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].
|
93
|
+
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookie_enabled ? 'on' : 'off'}>"
|
94
94
|
end
|
95
95
|
|
96
96
|
# All opts going in one hash.
|
97
97
|
# Opts for Frame:
|
98
|
-
# :wait, :
|
99
|
-
#
|
98
|
+
# :wait, :sync, :thread_safe, :raw, :proc_result, :save_result, :zip, :result, :stream
|
99
|
+
# ... processed and passed to Scout:
|
100
|
+
# :xhr, :content_type, :auth
|
101
|
+
# Opts passed to result:
|
100
102
|
# :xml, :html, :json, :hash, :eval, :load_scripts
|
101
|
-
# Opts
|
103
|
+
# Opts passed to Scout:
|
102
104
|
# :headers, :redir, :relvl
|
103
|
-
#
|
105
|
+
#
|
106
|
+
# @ :result : враппер результата исполнения; по умолчанию Page, для Client — если определён — Result; при асинхронном вызове будет возвращён незамедлительно
|
107
|
+
# @ &callback : в него будет передан инстанс result, а его результат будет записан в result#res (по умолчанию это ссылка на себя)
|
108
|
+
# @ :thread_safe : не использовать луп исполнения Curl::Multi#perform, а вызывать #perform прямо в этом треде; если установлен, то невозможно прерывание исполнения клавиатурой (продолжит работать, выполняя колбеки, в фоне), и невозможно задавать больше параллельных реквестов, чем разрешено параллельных соединений (просто застрянет)
|
109
|
+
# @ :sync : остановить (Thread#kill) perform-loop после исполнения всех запросов; подразумевает wait=true; при вызове одиночного реквеста подразумевает thread_safe=true
|
110
|
+
# @ :wait : ждать исполнения всех реквестов
|
111
|
+
# @ :save_result: возвращает #res для каждого инстанса result вместо самого инстанса; если не задан :proc_result, то подразумевает wait=true
|
112
|
+
# @ :proc_result: Proc, в который будет передан result#res, если задан также &callback; служит для создания вложенных блоков для клиентов; если =nil, то подразумевает wait=true
|
113
|
+
# @ :raw : сохраняем *только* тело ответа, без хедеров, без отладочной инфы в #res
|
114
|
+
# @ :raw + :sync : подразумевает save_result=true
|
115
|
+
# @ :xhr, :content_type, :auth : формируют хедеры X-Requested-With, Content-Type, Authorization для передачи в Scout
|
116
|
+
# @ :xhr : boolean
|
117
|
+
# @ :content_type : symbol<extension> | raw string
|
118
|
+
# @ :auth : "<username>:<password>"
|
119
|
+
#
|
120
|
+
# @ :zip, :stream и все опции для result : deprecated
|
121
|
+
#
|
122
|
+
# TODO: Семантически разделить синхронное и асинхронное выполнение запросов (не важно, серии или отдельных), с учётом, что асинхронность по сути своей перегружена и требуется, например, в очередях сообщений, но не в синхронных контроллерах Rails
|
104
123
|
def exec *args, &callback
|
105
124
|
many, order, orders, with_opts = interpret_request *args
|
106
125
|
L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
|
@@ -113,8 +132,15 @@ module RHACK
|
|
113
132
|
# if we aren't said explicitly about the opposite
|
114
133
|
Johnson::Runtime.set_browser_for_curl with_opts
|
115
134
|
|
116
|
-
if many
|
117
|
-
|
135
|
+
if many
|
136
|
+
result = exec_many orders, with_opts, &callback
|
137
|
+
else
|
138
|
+
result = exec_one order, with_opts, &callback
|
139
|
+
end
|
140
|
+
if with_opts[:sync]
|
141
|
+
Curl.stop
|
142
|
+
end
|
143
|
+
result
|
118
144
|
end
|
119
145
|
alias :get :exec
|
120
146
|
alias :run :get
|
@@ -218,7 +244,13 @@ module RHACK
|
|
218
244
|
|
219
245
|
opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
|
220
246
|
opts[:load_scripts] = self if opts[:load_scripts]
|
221
|
-
opts[:
|
247
|
+
opts[:save_result] = true if opts[:wait] and opts[:raw]
|
248
|
+
|
249
|
+
if orders
|
250
|
+
opts[:thread_safe] = false if @ss.size < orders.size
|
251
|
+
else
|
252
|
+
opts[:thread_safe] = true if opts[:sync]
|
253
|
+
end
|
222
254
|
|
223
255
|
(opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
|
224
256
|
if opts[:content_type]
|
@@ -233,6 +265,9 @@ module RHACK
|
|
233
265
|
(opts[:headers] ||= {})['Content-Type'] = opts[:content_type]
|
234
266
|
end
|
235
267
|
end
|
268
|
+
if opts[:auth]
|
269
|
+
(opts[:headers] ||= {})['Authorization'] = "Basic #{Base64.encode64(opts[:auth])}".chop
|
270
|
+
end
|
236
271
|
|
237
272
|
[many, order, orders, opts]
|
238
273
|
end
|
@@ -331,7 +366,7 @@ module RHACK
|
|
331
366
|
# if no spare scouts can be found, squad simply waits for first callbacks to complete
|
332
367
|
s = @ss.next
|
333
368
|
s.http.on_failure {|curl, error|
|
334
|
-
|
369
|
+
s.process_failure(*error) {
|
335
370
|
# curl itself has decided not to retry a request
|
336
371
|
if opts[:raw]
|
337
372
|
page.res = s.error
|
@@ -339,14 +374,14 @@ module RHACK
|
|
339
374
|
run_callbacks! page, opts, &callback
|
340
375
|
# nothing to do here if process returns nil or false
|
341
376
|
end
|
342
|
-
|
377
|
+
}
|
343
378
|
}
|
344
379
|
s.send(*(order << opts)) {|curl|
|
345
380
|
# there is a problem with storing html on disk
|
346
381
|
if order[0] == :loadGet and @write_to
|
347
382
|
# sometimes (about 2% for 100-threads-dling) when this string is calling
|
348
383
|
# no matter what +curl.res.body+ has contained here
|
349
|
-
RMTools.rw @write_to+'/'+order[-2].sub(
|
384
|
+
RMTools.rw @write_to+'/'+order[-2].sub(/^\w+:\/\//, ''), curl.res.body.xml_to_utf
|
350
385
|
end
|
351
386
|
if opts[:raw]
|
352
387
|
page.res = block_given? ? yield(curl) : curl.body_str
|
data/lib/rhack/scout.rb
CHANGED
@@ -7,7 +7,7 @@ module RHACK
|
|
7
7
|
attr_accessor :path, :root, :sld, :proxy
|
8
8
|
attr_reader :uri
|
9
9
|
attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
|
10
|
-
attr_reader :cookies, :ua, :refforge, :
|
10
|
+
attr_reader :cookies, :ua, :refforge, :cookies_enabled
|
11
11
|
|
12
12
|
DefaultHeader = {
|
13
13
|
"Expect" => "",
|
@@ -40,7 +40,7 @@ module RHACK
|
|
40
40
|
@cookies = {}
|
41
41
|
@body = {}
|
42
42
|
@num = []
|
43
|
-
@
|
43
|
+
@cookies_enabled = opts[:cp] || opts[:ck]
|
44
44
|
@raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
|
45
45
|
@engine = opts[:engine]
|
46
46
|
@timeout = opts[:timeout] || @@timeout || 60
|
@@ -81,9 +81,9 @@ module RHACK
|
|
81
81
|
@proxystr = @webproxy ? @proxy[0] : @http.proxy_url
|
82
82
|
else @proxystr = 'localhost'
|
83
83
|
end
|
84
|
-
if @
|
85
|
-
self.main_cks = @
|
86
|
-
@
|
84
|
+
if @cookies_enabled.is Hash
|
85
|
+
self.main_cks = @cookies_enabled
|
86
|
+
@cookies_enabled = true
|
87
87
|
end
|
88
88
|
self
|
89
89
|
end
|
@@ -169,7 +169,7 @@ module RHACK
|
|
169
169
|
|
170
170
|
def mkHeader(uri)
|
171
171
|
header = DefaultHeader.dup
|
172
|
-
if @
|
172
|
+
if @cookies_enabled
|
173
173
|
cookies = ''
|
174
174
|
main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
|
175
175
|
header['Cookie'] = cookies[0..-3]
|
@@ -182,7 +182,7 @@ module RHACK
|
|
182
182
|
header
|
183
183
|
end
|
184
184
|
|
185
|
-
def
|
185
|
+
def process_cookies(res)
|
186
186
|
ck = []
|
187
187
|
case res
|
188
188
|
when String
|
@@ -195,11 +195,10 @@ module RHACK
|
|
195
195
|
end
|
196
196
|
return if !ck.b
|
197
197
|
ck.each {|c| Cookie(c, self)}
|
198
|
-
# StoreCookies if @cookieStore
|
199
198
|
end
|
200
199
|
|
201
|
-
def cp_on() @
|
202
|
-
def cp_off() @
|
200
|
+
def cp_on() @cookies_enabled = true end
|
201
|
+
def cp_off() @cookies_enabled = false end
|
203
202
|
|
204
203
|
def main_cks() @cookies[@uri.host] ||= {} end
|
205
204
|
def main_cks=(cks)
|
@@ -231,13 +230,18 @@ module RHACK
|
|
231
230
|
Curl.carier.reqs.include? @http
|
232
231
|
end
|
233
232
|
|
233
|
+
# Scout must not be reused until not only response will have come,
|
234
|
+
# but callback will have been processed, too.
|
235
|
+
# Otherwise, #retry! may not work as expected:
|
236
|
+
# if a scout gets callback as a block argument, then it may re-run not original callback,
|
237
|
+
# but it's copy with another scope.
|
234
238
|
def available?
|
235
|
-
!loaded?
|
239
|
+
!loaded? and !@busy
|
236
240
|
end
|
237
241
|
|
238
242
|
# - if curl should retry request based on Curl::Err class only
|
239
243
|
# => false
|
240
|
-
def process_failure(curl_err, message)
|
244
|
+
def process_failure(curl_err, message, &callback)
|
241
245
|
@error = curl_err.new message
|
242
246
|
#@error = [curl_err, message] # old
|
243
247
|
@http.outdate!
|
@@ -247,33 +251,35 @@ module RHACK
|
|
247
251
|
if retry? curl_err
|
248
252
|
L.debug "#{curl_err} -> reloading scout"
|
249
253
|
retry!
|
250
|
-
false
|
251
254
|
else
|
252
255
|
L.debug "#{curl_err} -> not reloading scout"
|
253
256
|
raise @error if @raise_err
|
254
257
|
#raise *@error if @raise_err # old
|
255
|
-
|
258
|
+
yield if block_given?
|
259
|
+
# Now, we assume that data of this @http have been copied or will not be used anymore,
|
260
|
+
# thus the scout can be reused.
|
261
|
+
@busy = false
|
256
262
|
end
|
257
263
|
end
|
258
264
|
|
259
265
|
def load!
|
260
266
|
unless Curl.carier.add @http
|
267
|
+
L.warn "#{self}##{object_id}: Failed to add Curl::Easy##{@http.object_id} to Curl::Multi##{Curl.carier.object_id}. Trying to remove it and re-add."
|
261
268
|
Curl.carier.remove @http
|
262
269
|
Curl.сarier.add @http
|
263
270
|
end
|
264
271
|
rescue RuntimeError => e
|
265
272
|
e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
266
|
-
L.warn "#{e.inspect}: #{e.message}"
|
273
|
+
L.warn "#{self}##{object_id}: #{e.inspect}: #{e.message}"
|
267
274
|
if loaded?
|
268
275
|
Curl.carier.remove @http
|
269
276
|
end
|
270
277
|
sleep 1
|
271
278
|
load!
|
272
|
-
#e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
273
|
-
#raise e
|
274
279
|
end
|
275
280
|
|
276
281
|
def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
|
282
|
+
@busy = true
|
277
283
|
# cache preprocessed data for one time so we can do #retry
|
278
284
|
@__path = path
|
279
285
|
@__headers = headers
|
@@ -286,21 +292,28 @@ module RHACK
|
|
286
292
|
@http.timeout = @timeout
|
287
293
|
|
288
294
|
@http.on_complete {|curl| # = @http
|
289
|
-
#
|
295
|
+
# @http has already been removed when a request had complete,
|
296
|
+
# but this callback may occure wherever in a serial queue of curl callbacks.
|
290
297
|
@error = nil
|
291
298
|
# While not outdated, Curl::Response here may contain pointers on freed
|
292
299
|
# memory, thus throwing exception on #to_s and #inspect
|
293
300
|
@http.outdate!
|
294
301
|
res = @http.res
|
295
|
-
|
296
|
-
# We cannot just cancel on_complete in on_redirect block
|
297
|
-
# because loadGet will immediately reset on_complete back
|
302
|
+
process_cookies res if @cookies_enabled
|
303
|
+
# We cannot just cancel on_complete in on_redirect block,
|
304
|
+
# because loadGet should (and will) immediately reset on_complete back.
|
298
305
|
if res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = res.hash.location
|
299
306
|
loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
|
300
|
-
|
301
|
-
yield @http
|
307
|
+
else
|
308
|
+
yield @http if block_given?
|
309
|
+
# Now, we assume that data of this @http have been copied or will not be used anymore,
|
310
|
+
# thus the scout can be reused.
|
311
|
+
@busy = false
|
312
|
+
@http.on_failure &Proc::NULL
|
302
313
|
end
|
303
314
|
}
|
315
|
+
# Curl::Err::* (TCP/IP level) exception callback.
|
316
|
+
# May be set out there.
|
304
317
|
@http.on_failure {|curl, error|
|
305
318
|
process_failure(*error)
|
306
319
|
} unless @http.on_failure
|
data/lib/rhack/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rhack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergey Baev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rmtools
|