rhack 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rhack/clients/base.rb +35 -11
- data/lib/rhack/frame.rb +46 -11
- data/lib/rhack/scout.rb +36 -23
- data/lib/rhack/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40e4929e52203b0121c9ca2eac2f289f656ff543
|
4
|
+
data.tar.gz: abb0fe53f2f8e43836473fdc8d7ae517fcd63602
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23e4c2b43ee95212d0c8069ed7e2c731dc1878ec4bd4f7b8984bb0dfea55f1cd496ad161c098d2a0551a49d995a6cf4faca8b0041fc891d9f3fc3b42bfce5243
|
7
|
+
data.tar.gz: b6e3fce288abf2aaa51cf7910b1b99c90e0c673f13bf6541e8a047b496d83fe6d90b5d11379f8d8e6610a9ad94c6d8834d58c9b0c8cfa2fd171d43852791d987
|
data/lib/rhack/clients/base.rb
CHANGED
@@ -11,6 +11,7 @@ module RHACK
|
|
11
11
|
class_attribute :frame_defaults, :instance_writer => false
|
12
12
|
class_attribute :accounts, :instance_writer => false
|
13
13
|
class_attribute :routes, :instance_writer => false
|
14
|
+
class_attribute :rootpath, :instance_writer => false
|
14
15
|
|
15
16
|
self.frame_defaults = {}
|
16
17
|
self.accounts = {}
|
@@ -25,33 +26,50 @@ module RHACK
|
|
25
26
|
}
|
26
27
|
end
|
27
28
|
|
29
|
+
def method_missing(method, *args, &block)
|
30
|
+
if personal_instance_methods.include? method
|
31
|
+
return new.__send__(method, *args, &block)
|
32
|
+
end
|
33
|
+
super
|
34
|
+
end
|
35
|
+
|
28
36
|
private
|
29
37
|
|
38
|
+
def root(value=nil)
|
39
|
+
if value
|
40
|
+
value = 'http://' + value if value !~ /^\w+:/
|
41
|
+
self.rootpath = value
|
42
|
+
else
|
43
|
+
self.rootpath
|
44
|
+
end
|
45
|
+
end
|
46
|
+
alias :host :root
|
47
|
+
|
30
48
|
# Set routes map
|
31
49
|
def map(dict)
|
32
50
|
# URI is deprecated # backward compatibility
|
33
51
|
if defined? URI and URI.is Hash
|
34
52
|
URI.merge! dict.map_hash {|k, v| [k.to_sym, v.freeze]}
|
35
53
|
end
|
36
|
-
routes
|
54
|
+
self.routes += dict.map_hash {|k, v| [k.to_sym, v.freeze]}
|
37
55
|
end
|
38
56
|
|
39
57
|
# Set default Frame options
|
40
58
|
def frame(dict)
|
41
|
-
frame_defaults
|
59
|
+
self.frame_defaults += dict
|
42
60
|
end
|
43
61
|
|
44
62
|
# Set usable accounts
|
45
63
|
# @ dict : {symbol => {symbol => string, ...}}
|
46
64
|
def accounts(dict)
|
47
|
-
accounts
|
65
|
+
self.accounts += dict
|
48
66
|
end
|
49
67
|
|
50
68
|
end
|
51
69
|
|
52
70
|
def initialize(*args)
|
53
|
-
service, opts = args.get_opts [:api]
|
54
|
-
@service = service
|
71
|
+
service, opts = args.get_opts [routes.include?(:api) ? :api : nil]
|
72
|
+
@service = service # Deprectated. Use different classes to implement different services.
|
55
73
|
# first argument should be a string so that frame won't be static
|
56
74
|
if opts.is_a?(Frame)
|
57
75
|
@f = opts
|
@@ -60,9 +78,13 @@ module RHACK
|
|
60
78
|
if self.class.const_defined? :Result
|
61
79
|
opts[:result] = self.class::Result
|
62
80
|
end
|
63
|
-
@f = Frame(route(service) || route(:login), opts)
|
81
|
+
@f = Frame(rootpath || route(service) || route(:login), opts)
|
64
82
|
end
|
65
83
|
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
"<##{self.class.name}#{":#{@service.to_s.camelize} service" if @service} via #{@f.inspect}>"
|
87
|
+
end
|
66
88
|
|
67
89
|
|
68
90
|
# Usable only for sync requests
|
@@ -91,15 +113,17 @@ module RHACK
|
|
91
113
|
@f.get(url) {|next_page| scrape!(next_page)}
|
92
114
|
end
|
93
115
|
end
|
94
|
-
|
95
|
-
def inspect
|
96
|
-
"<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
|
97
|
-
end
|
116
|
+
|
98
117
|
|
99
118
|
# shortcuts to class variables #
|
100
119
|
|
101
120
|
def route(name)
|
102
|
-
routes[name]
|
121
|
+
if url = routes[name]
|
122
|
+
if url !~ /^\w+:/
|
123
|
+
url = File.join rootpath, url
|
124
|
+
end
|
125
|
+
url
|
126
|
+
end
|
103
127
|
end
|
104
128
|
alias :url :route
|
105
129
|
# URI is deprecated # backward compatibility
|
data/lib/rhack/frame.rb
CHANGED
@@ -90,17 +90,36 @@ module RHACK
|
|
90
90
|
|
91
91
|
def inspect
|
92
92
|
sssize = @ss.size
|
93
|
-
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].
|
93
|
+
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookie_enabled ? 'on' : 'off'}>"
|
94
94
|
end
|
95
95
|
|
96
96
|
# All opts going in one hash.
|
97
97
|
# Opts for Frame:
|
98
|
-
# :wait, :
|
99
|
-
#
|
98
|
+
# :wait, :sync, :thread_safe, :raw, :proc_result, :save_result, :zip, :result, :stream
|
99
|
+
# ... processed and passed to Scout:
|
100
|
+
# :xhr, :content_type, :auth
|
101
|
+
# Opts passed to result:
|
100
102
|
# :xml, :html, :json, :hash, :eval, :load_scripts
|
101
|
-
# Opts
|
103
|
+
# Opts passed to Scout:
|
102
104
|
# :headers, :redir, :relvl
|
103
|
-
#
|
105
|
+
#
|
106
|
+
# @ :result : враппер результата исполнения; по умолчанию Page, для Client — если определён — Result; при асинхронном вызове будет возвращён незамедлительно
|
107
|
+
# @ &callback : в него будет передан инстанс result, а его результат будет записан в result#res (по умолчанию это ссылка на себя)
|
108
|
+
# @ :thread_safe : не использовать луп исполнения Curl::Multi#perform, а вызывать #perform прямо в этом треде; если установлен, то невозможно прерывание исполнения клавиатурой (продолжит работать, выполняя колбеки, в фоне), и невозможно задавать больше параллельных реквестов, чем разрешено параллельных соединений (просто застрянет)
|
109
|
+
# @ :sync : остановить (Thread#kill) perform-loop после исполнения всех запросов; подразумевает wait=true; при вызове одиночного реквеста подразумевает thread_safe=true
|
110
|
+
# @ :wait : ждать исполнения всех реквестов
|
111
|
+
# @ :save_result: возвращает #res для каждого инстанса result вместо самого инстанса; если не задан :proc_result, то подразумевает wait=true
|
112
|
+
# @ :proc_result: Proc, в который будет передан result#res, если задан также &callback; служит для создания вложенных блоков для клиентов; если =nil, то подразумевает wait=true
|
113
|
+
# @ :raw : сохраняем *только* тело ответа, без хедеров, без отладочной инфы в #res
|
114
|
+
# @ :raw + :sync : подразумевает save_result=true
|
115
|
+
# @ :xhr, :content_type, :auth : формируют хедеры X-Requested-With, Content-Type, Authorization для передачи в Scout
|
116
|
+
# @ :xhr : boolean
|
117
|
+
# @ :content_type : symbol<extension> | raw string
|
118
|
+
# @ :auth : "<username>:<password>"
|
119
|
+
#
|
120
|
+
# @ :zip, :stream и все опции для result : deprecated
|
121
|
+
#
|
122
|
+
# TODO: Семантически разделить синхронное и асинхронное выполнение запросов (не важно, серии или отдельных), с учётом, что асинхронность по сути своей перегружена и требуется, например, в очередях сообщений, но не в синхронных контроллерах Rails
|
104
123
|
def exec *args, &callback
|
105
124
|
many, order, orders, with_opts = interpret_request *args
|
106
125
|
L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
|
@@ -113,8 +132,15 @@ module RHACK
|
|
113
132
|
# if we aren't said explicitly about the opposite
|
114
133
|
Johnson::Runtime.set_browser_for_curl with_opts
|
115
134
|
|
116
|
-
if many
|
117
|
-
|
135
|
+
if many
|
136
|
+
result = exec_many orders, with_opts, &callback
|
137
|
+
else
|
138
|
+
result = exec_one order, with_opts, &callback
|
139
|
+
end
|
140
|
+
if with_opts[:sync]
|
141
|
+
Curl.stop
|
142
|
+
end
|
143
|
+
result
|
118
144
|
end
|
119
145
|
alias :get :exec
|
120
146
|
alias :run :get
|
@@ -218,7 +244,13 @@ module RHACK
|
|
218
244
|
|
219
245
|
opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
|
220
246
|
opts[:load_scripts] = self if opts[:load_scripts]
|
221
|
-
opts[:
|
247
|
+
opts[:save_result] = true if opts[:wait] and opts[:raw]
|
248
|
+
|
249
|
+
if orders
|
250
|
+
opts[:thread_safe] = false if @ss.size < orders.size
|
251
|
+
else
|
252
|
+
opts[:thread_safe] = true if opts[:sync]
|
253
|
+
end
|
222
254
|
|
223
255
|
(opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
|
224
256
|
if opts[:content_type]
|
@@ -233,6 +265,9 @@ module RHACK
|
|
233
265
|
(opts[:headers] ||= {})['Content-Type'] = opts[:content_type]
|
234
266
|
end
|
235
267
|
end
|
268
|
+
if opts[:auth]
|
269
|
+
(opts[:headers] ||= {})['Authorization'] = "Basic #{Base64.encode64(opts[:auth])}".chop
|
270
|
+
end
|
236
271
|
|
237
272
|
[many, order, orders, opts]
|
238
273
|
end
|
@@ -331,7 +366,7 @@ module RHACK
|
|
331
366
|
# if no spare scouts can be found, squad simply waits for first callbacks to complete
|
332
367
|
s = @ss.next
|
333
368
|
s.http.on_failure {|curl, error|
|
334
|
-
|
369
|
+
s.process_failure(*error) {
|
335
370
|
# curl itself has decided not to retry a request
|
336
371
|
if opts[:raw]
|
337
372
|
page.res = s.error
|
@@ -339,14 +374,14 @@ module RHACK
|
|
339
374
|
run_callbacks! page, opts, &callback
|
340
375
|
# nothing to do here if process returns nil or false
|
341
376
|
end
|
342
|
-
|
377
|
+
}
|
343
378
|
}
|
344
379
|
s.send(*(order << opts)) {|curl|
|
345
380
|
# there is a problem with storing html on disk
|
346
381
|
if order[0] == :loadGet and @write_to
|
347
382
|
# sometimes (about 2% for 100-threads-dling) when this string is calling
|
348
383
|
# no matter what +curl.res.body+ has contained here
|
349
|
-
RMTools.rw @write_to+'/'+order[-2].sub(
|
384
|
+
RMTools.rw @write_to+'/'+order[-2].sub(/^\w+:\/\//, ''), curl.res.body.xml_to_utf
|
350
385
|
end
|
351
386
|
if opts[:raw]
|
352
387
|
page.res = block_given? ? yield(curl) : curl.body_str
|
data/lib/rhack/scout.rb
CHANGED
@@ -7,7 +7,7 @@ module RHACK
|
|
7
7
|
attr_accessor :path, :root, :sld, :proxy
|
8
8
|
attr_reader :uri
|
9
9
|
attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
|
10
|
-
attr_reader :cookies, :ua, :refforge, :
|
10
|
+
attr_reader :cookies, :ua, :refforge, :cookies_enabled
|
11
11
|
|
12
12
|
DefaultHeader = {
|
13
13
|
"Expect" => "",
|
@@ -40,7 +40,7 @@ module RHACK
|
|
40
40
|
@cookies = {}
|
41
41
|
@body = {}
|
42
42
|
@num = []
|
43
|
-
@
|
43
|
+
@cookies_enabled = opts[:cp] || opts[:ck]
|
44
44
|
@raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
|
45
45
|
@engine = opts[:engine]
|
46
46
|
@timeout = opts[:timeout] || @@timeout || 60
|
@@ -81,9 +81,9 @@ module RHACK
|
|
81
81
|
@proxystr = @webproxy ? @proxy[0] : @http.proxy_url
|
82
82
|
else @proxystr = 'localhost'
|
83
83
|
end
|
84
|
-
if @
|
85
|
-
self.main_cks = @
|
86
|
-
@
|
84
|
+
if @cookies_enabled.is Hash
|
85
|
+
self.main_cks = @cookies_enabled
|
86
|
+
@cookies_enabled = true
|
87
87
|
end
|
88
88
|
self
|
89
89
|
end
|
@@ -169,7 +169,7 @@ module RHACK
|
|
169
169
|
|
170
170
|
def mkHeader(uri)
|
171
171
|
header = DefaultHeader.dup
|
172
|
-
if @
|
172
|
+
if @cookies_enabled
|
173
173
|
cookies = ''
|
174
174
|
main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
|
175
175
|
header['Cookie'] = cookies[0..-3]
|
@@ -182,7 +182,7 @@ module RHACK
|
|
182
182
|
header
|
183
183
|
end
|
184
184
|
|
185
|
-
def
|
185
|
+
def process_cookies(res)
|
186
186
|
ck = []
|
187
187
|
case res
|
188
188
|
when String
|
@@ -195,11 +195,10 @@ module RHACK
|
|
195
195
|
end
|
196
196
|
return if !ck.b
|
197
197
|
ck.each {|c| Cookie(c, self)}
|
198
|
-
# StoreCookies if @cookieStore
|
199
198
|
end
|
200
199
|
|
201
|
-
def cp_on() @
|
202
|
-
def cp_off() @
|
200
|
+
def cp_on() @cookies_enabled = true end
|
201
|
+
def cp_off() @cookies_enabled = false end
|
203
202
|
|
204
203
|
def main_cks() @cookies[@uri.host] ||= {} end
|
205
204
|
def main_cks=(cks)
|
@@ -231,13 +230,18 @@ module RHACK
|
|
231
230
|
Curl.carier.reqs.include? @http
|
232
231
|
end
|
233
232
|
|
233
|
+
# Scout must not be reused until not only response will have come,
|
234
|
+
# but callback will have been processed, too.
|
235
|
+
# Otherwise, #retry! may not work as expected:
|
236
|
+
# if a scout gets callback as a block argument, then it may re-run not original callback,
|
237
|
+
# but it's copy with another scope.
|
234
238
|
def available?
|
235
|
-
!loaded?
|
239
|
+
!loaded? and !@busy
|
236
240
|
end
|
237
241
|
|
238
242
|
# - if curl should retry request based on Curl::Err class only
|
239
243
|
# => false
|
240
|
-
def process_failure(curl_err, message)
|
244
|
+
def process_failure(curl_err, message, &callback)
|
241
245
|
@error = curl_err.new message
|
242
246
|
#@error = [curl_err, message] # old
|
243
247
|
@http.outdate!
|
@@ -247,33 +251,35 @@ module RHACK
|
|
247
251
|
if retry? curl_err
|
248
252
|
L.debug "#{curl_err} -> reloading scout"
|
249
253
|
retry!
|
250
|
-
false
|
251
254
|
else
|
252
255
|
L.debug "#{curl_err} -> not reloading scout"
|
253
256
|
raise @error if @raise_err
|
254
257
|
#raise *@error if @raise_err # old
|
255
|
-
|
258
|
+
yield if block_given?
|
259
|
+
# Now, we assume that data of this @http have been copied or will not be used anymore,
|
260
|
+
# thus the scout can be reused.
|
261
|
+
@busy = false
|
256
262
|
end
|
257
263
|
end
|
258
264
|
|
259
265
|
def load!
|
260
266
|
unless Curl.carier.add @http
|
267
|
+
L.warn "#{self}##{object_id}: Failed to add Curl::Easy##{@http.object_id} to Curl::Multi##{Curl.carier.object_id}. Trying to remove it and re-add."
|
261
268
|
Curl.carier.remove @http
|
262
269
|
Curl.сarier.add @http
|
263
270
|
end
|
264
271
|
rescue RuntimeError => e
|
265
272
|
e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
266
|
-
L.warn "#{e.inspect}: #{e.message}"
|
273
|
+
L.warn "#{self}##{object_id}: #{e.inspect}: #{e.message}"
|
267
274
|
if loaded?
|
268
275
|
Curl.carier.remove @http
|
269
276
|
end
|
270
277
|
sleep 1
|
271
278
|
load!
|
272
|
-
#e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
273
|
-
#raise e
|
274
279
|
end
|
275
280
|
|
276
281
|
def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
|
282
|
+
@busy = true
|
277
283
|
# cache preprocessed data for one time so we can do #retry
|
278
284
|
@__path = path
|
279
285
|
@__headers = headers
|
@@ -286,21 +292,28 @@ module RHACK
|
|
286
292
|
@http.timeout = @timeout
|
287
293
|
|
288
294
|
@http.on_complete {|curl| # = @http
|
289
|
-
#
|
295
|
+
# @http has already been removed when a request had complete,
|
296
|
+
# but this callback may occure wherever in a serial queue of curl callbacks.
|
290
297
|
@error = nil
|
291
298
|
# While not outdated, Curl::Response here may contain pointers on freed
|
292
299
|
# memory, thus throwing exception on #to_s and #inspect
|
293
300
|
@http.outdate!
|
294
301
|
res = @http.res
|
295
|
-
|
296
|
-
# We cannot just cancel on_complete in on_redirect block
|
297
|
-
# because loadGet will immediately reset on_complete back
|
302
|
+
process_cookies res if @cookies_enabled
|
303
|
+
# We cannot just cancel on_complete in on_redirect block,
|
304
|
+
# because loadGet should (and will) immediately reset on_complete back.
|
298
305
|
if res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = res.hash.location
|
299
306
|
loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
|
300
|
-
|
301
|
-
yield @http
|
307
|
+
else
|
308
|
+
yield @http if block_given?
|
309
|
+
# Now, we assume that data of this @http have been copied or will not be used anymore,
|
310
|
+
# thus the scout can be reused.
|
311
|
+
@busy = false
|
312
|
+
@http.on_failure &Proc::NULL
|
302
313
|
end
|
303
314
|
}
|
315
|
+
# Curl::Err::* (TCP/IP level) exception callback.
|
316
|
+
# May be set out there.
|
304
317
|
@http.on_failure {|curl, error|
|
305
318
|
process_failure(*error)
|
306
319
|
} unless @http.on_failure
|
data/lib/rhack/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rhack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergey Baev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rmtools
|