rhack 1.3.1 → 1.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8f5a539c0ca1c92416d50ffd4609d08a69d900fe
4
- data.tar.gz: fee2e42ae02b21af929a1b0b417360adc043562d
3
+ metadata.gz: 40e4929e52203b0121c9ca2eac2f289f656ff543
4
+ data.tar.gz: abb0fe53f2f8e43836473fdc8d7ae517fcd63602
5
5
  SHA512:
6
- metadata.gz: bfda297b6b465393b6749bac236e32bd967216dbad68c4383fc3bdd1e9cbc5602efcb57513ce9c01b9b6c6416ad312b10e150a802b717932e471594744e8765b
7
- data.tar.gz: e4e762cd0e9e858cd20e9bef50c0a9fb4d97b506361879b5efc49cad3d939ebb35b2e3245144fd2e9f01a7ddc033200b656d71a5e4ffa53172e84d468376d3c4
6
+ metadata.gz: 23e4c2b43ee95212d0c8069ed7e2c731dc1878ec4bd4f7b8984bb0dfea55f1cd496ad161c098d2a0551a49d995a6cf4faca8b0041fc891d9f3fc3b42bfce5243
7
+ data.tar.gz: b6e3fce288abf2aaa51cf7910b1b99c90e0c673f13bf6541e8a047b496d83fe6d90b5d11379f8d8e6610a9ad94c6d8834d58c9b0c8cfa2fd171d43852791d987
@@ -11,6 +11,7 @@ module RHACK
11
11
  class_attribute :frame_defaults, :instance_writer => false
12
12
  class_attribute :accounts, :instance_writer => false
13
13
  class_attribute :routes, :instance_writer => false
14
+ class_attribute :rootpath, :instance_writer => false
14
15
 
15
16
  self.frame_defaults = {}
16
17
  self.accounts = {}
@@ -25,33 +26,50 @@ module RHACK
25
26
  }
26
27
  end
27
28
 
29
+ def method_missing(method, *args, &block)
30
+ if personal_instance_methods.include? method
31
+ return new.__send__(method, *args, &block)
32
+ end
33
+ super
34
+ end
35
+
28
36
  private
29
37
 
38
+ def root(value=nil)
39
+ if value
40
+ value = 'http://' + value if value !~ /^\w+:/
41
+ self.rootpath = value
42
+ else
43
+ self.rootpath
44
+ end
45
+ end
46
+ alias :host :root
47
+
30
48
  # Set routes map
31
49
  def map(dict)
32
50
  # URI is deprecated # backward compatibility
33
51
  if defined? URI and URI.is Hash
34
52
  URI.merge! dict.map_hash {|k, v| [k.to_sym, v.freeze]}
35
53
  end
36
- routes.merge! dict.map_hash {|k, v| [k.to_sym, v.freeze]}
54
+ self.routes += dict.map_hash {|k, v| [k.to_sym, v.freeze]}
37
55
  end
38
56
 
39
57
  # Set default Frame options
40
58
  def frame(dict)
41
- frame_defaults.merge! dict
59
+ self.frame_defaults += dict
42
60
  end
43
61
 
44
62
  # Set usable accounts
45
63
  # @ dict : {symbol => {symbol => string, ...}}
46
64
  def accounts(dict)
47
- accounts.merge! dict
65
+ self.accounts += dict
48
66
  end
49
67
 
50
68
  end
51
69
 
52
70
  def initialize(*args)
53
- service, opts = args.get_opts [:api]
54
- @service = service
71
+ service, opts = args.get_opts [routes.include?(:api) ? :api : nil]
72
+ @service = service # Deprectated. Use different classes to implement different services.
55
73
  # first argument should be a string so that frame won't be static
56
74
  if opts.is_a?(Frame)
57
75
  @f = opts
@@ -60,9 +78,13 @@ module RHACK
60
78
  if self.class.const_defined? :Result
61
79
  opts[:result] = self.class::Result
62
80
  end
63
- @f = Frame(route(service) || route(:login), opts)
81
+ @f = Frame(rootpath || route(service) || route(:login), opts)
64
82
  end
65
83
  end
84
+
85
+ def inspect
86
+ "<##{self.class.name}#{":#{@service.to_s.camelize} service" if @service} via #{@f.inspect}>"
87
+ end
66
88
 
67
89
 
68
90
  # Usable only for sync requests
@@ -91,15 +113,17 @@ module RHACK
91
113
  @f.get(url) {|next_page| scrape!(next_page)}
92
114
  end
93
115
  end
94
-
95
- def inspect
96
- "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
97
- end
116
+
98
117
 
99
118
  # shortcuts to class variables #
100
119
 
101
120
  def route(name)
102
- routes[name]
121
+ if url = routes[name]
122
+ if url !~ /^\w+:/
123
+ url = File.join rootpath, url
124
+ end
125
+ url
126
+ end
103
127
  end
104
128
  alias :url :route
105
129
  # URI is deprecated # backward compatibility
@@ -90,17 +90,36 @@ module RHACK
90
90
 
91
91
  def inspect
92
92
  sssize = @ss.size
93
- "<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
93
+ "<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookie_enabled ? 'on' : 'off'}>"
94
94
  end
95
95
 
96
96
  # All opts going in one hash.
97
97
  # Opts for Frame:
98
- # :wait, :proc_result, :save_result, :zip, :thread_safe, :result, :stream, :raw, :xhr, :content_type
99
- # Opts passed to Page:
98
+ # :wait, :sync, :thread_safe, :raw, :proc_result, :save_result, :zip, :result, :stream
99
+ # ... processed and passed to Scout:
100
+ # :xhr, :content_type, :auth
101
+ # Opts passed to result:
100
102
  # :xml, :html, :json, :hash, :eval, :load_scripts
101
- # Opts for Scout:
103
+ # Opts passed to Scout:
102
104
  # :headers, :redir, :relvl
103
- # TODO: describe options
105
+ #
106
+ # @ :result : враппер результата исполнения; по умолчанию Page, для Client — если определён — Result; при асинхронном вызове будет возвращён незамедлительно
107
+ # @ &callback : в него будет передан инстанс result, а его результат будет записан в result#res (по умолчанию это ссылка на себя)
108
+ # @ :thread_safe : не использовать луп исполнения Curl::Multi#perform, а вызывать #perform прямо в этом треде; если установлен, то невозможно прерывание исполнения клавиатурой (продолжит работать, выполняя колбеки, в фоне), и невозможно задавать больше параллельных реквестов, чем разрешено параллельных соединений (просто застрянет)
109
+ # @ :sync : остановить (Thread#kill) perform-loop после исполнения всех запросов; подразумевает wait=true; при вызове одиночного реквеста подразумевает thread_safe=true
110
+ # @ :wait : ждать исполнения всех реквестов
111
+ # @ :save_result: возвращает #res для каждого инстанса result вместо самого инстанса; если не задан :proc_result, то подразумевает wait=true
112
+ # @ :proc_result: Proc, в который будет передан result#res, если задан также &callback; служит для создания вложенных блоков для клиентов; если =nil, то подразумевает wait=true
113
+ # @ :raw : сохраняем *только* тело ответа, без хедеров, без отладочной инфы в #res
114
+ # @ :raw + :sync : подразумевает save_result=true
115
+ # @ :xhr, :content_type, :auth : формируют хедеры X-Requested-With, Content-Type, Authorization для передачи в Scout
116
+ # @ :xhr : boolean
117
+ # @ :content_type : symbol<extension> | raw string
118
+ # @ :auth : "<username>:<password>"
119
+ #
120
+ # @ :zip, :stream и все опции для result : deprecated
121
+ #
122
+ # TODO: Семантически разделить синхронное и асинхронное выполнение запросов (не важно, серии или отдельных), с учётом, что асинхронность по сути своей перегружена и требуется, например, в очередях сообщений, но не в синхронных контроллерах Rails
104
123
  def exec *args, &callback
105
124
  many, order, orders, with_opts = interpret_request *args
106
125
  L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
@@ -113,8 +132,15 @@ module RHACK
113
132
  # if we aren't said explicitly about the opposite
114
133
  Johnson::Runtime.set_browser_for_curl with_opts
115
134
 
116
- if many then exec_many orders, with_opts, &callback
117
- else exec_one order, with_opts, &callback end
135
+ if many
136
+ result = exec_many orders, with_opts, &callback
137
+ else
138
+ result = exec_one order, with_opts, &callback
139
+ end
140
+ if with_opts[:sync]
141
+ Curl.stop
142
+ end
143
+ result
118
144
  end
119
145
  alias :get :exec
120
146
  alias :run :get
@@ -218,7 +244,13 @@ module RHACK
218
244
 
219
245
  opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
220
246
  opts[:load_scripts] = self if opts[:load_scripts]
221
- opts[:stream] = true if opts[:raw]
247
+ opts[:save_result] = true if opts[:wait] and opts[:raw]
248
+
249
+ if orders
250
+ opts[:thread_safe] = false if @ss.size < orders.size
251
+ else
252
+ opts[:thread_safe] = true if opts[:sync]
253
+ end
222
254
 
223
255
  (opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
224
256
  if opts[:content_type]
@@ -233,6 +265,9 @@ module RHACK
233
265
  (opts[:headers] ||= {})['Content-Type'] = opts[:content_type]
234
266
  end
235
267
  end
268
+ if opts[:auth]
269
+ (opts[:headers] ||= {})['Authorization'] = "Basic #{Base64.encode64(opts[:auth])}".chop
270
+ end
236
271
 
237
272
  [many, order, orders, opts]
238
273
  end
@@ -331,7 +366,7 @@ module RHACK
331
366
  # if no spare scouts can be found, squad simply waits for first callbacks to complete
332
367
  s = @ss.next
333
368
  s.http.on_failure {|curl, error|
334
- if s.process_failure(*error)
369
+ s.process_failure(*error) {
335
370
  # curl itself has decided not to retry a request
336
371
  if opts[:raw]
337
372
  page.res = s.error
@@ -339,14 +374,14 @@ module RHACK
339
374
  run_callbacks! page, opts, &callback
340
375
  # nothing to do here if process returns nil or false
341
376
  end
342
- end
377
+ }
343
378
  }
344
379
  s.send(*(order << opts)) {|curl|
345
380
  # there is a problem with storing html on disk
346
381
  if order[0] == :loadGet and @write_to
347
382
  # sometimes (about 2% for 100-threads-dling) when this string is calling
348
383
  # no matter what +curl.res.body+ has contained here
349
- RMTools.rw @write_to+'/'+order[-2].sub(/^[a-z]+:\/\//, ''), curl.res.body.xml_to_utf
384
+ RMTools.rw @write_to+'/'+order[-2].sub(/^\w+:\/\//, ''), curl.res.body.xml_to_utf
350
385
  end
351
386
  if opts[:raw]
352
387
  page.res = block_given? ? yield(curl) : curl.body_str
@@ -7,7 +7,7 @@ module RHACK
7
7
  attr_accessor :path, :root, :sld, :proxy
8
8
  attr_reader :uri
9
9
  attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
10
- attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
10
+ attr_reader :cookies, :ua, :refforge, :cookies_enabled
11
11
 
12
12
  DefaultHeader = {
13
13
  "Expect" => "",
@@ -40,7 +40,7 @@ module RHACK
40
40
  @cookies = {}
41
41
  @body = {}
42
42
  @num = []
43
- @cookieProc = opts[:cp] || opts[:ck]
43
+ @cookies_enabled = opts[:cp] || opts[:ck]
44
44
  @raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
45
45
  @engine = opts[:engine]
46
46
  @timeout = opts[:timeout] || @@timeout || 60
@@ -81,9 +81,9 @@ module RHACK
81
81
  @proxystr = @webproxy ? @proxy[0] : @http.proxy_url
82
82
  else @proxystr = 'localhost'
83
83
  end
84
- if @cookieProc.is Hash
85
- self.main_cks = @cookieProc
86
- @cookieProc = true
84
+ if @cookies_enabled.is Hash
85
+ self.main_cks = @cookies_enabled
86
+ @cookies_enabled = true
87
87
  end
88
88
  self
89
89
  end
@@ -169,7 +169,7 @@ module RHACK
169
169
 
170
170
  def mkHeader(uri)
171
171
  header = DefaultHeader.dup
172
- if @cookieProc
172
+ if @cookies_enabled
173
173
  cookies = ''
174
174
  main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
175
175
  header['Cookie'] = cookies[0..-3]
@@ -182,7 +182,7 @@ module RHACK
182
182
  header
183
183
  end
184
184
 
185
- def ProcCookies(res)
185
+ def process_cookies(res)
186
186
  ck = []
187
187
  case res
188
188
  when String
@@ -195,11 +195,10 @@ module RHACK
195
195
  end
196
196
  return if !ck.b
197
197
  ck.each {|c| Cookie(c, self)}
198
- # StoreCookies if @cookieStore
199
198
  end
200
199
 
201
- def cp_on() @cookieProc = true end
202
- def cp_off() @cookieProc = false end
200
+ def cp_on() @cookies_enabled = true end
201
+ def cp_off() @cookies_enabled = false end
203
202
 
204
203
  def main_cks() @cookies[@uri.host] ||= {} end
205
204
  def main_cks=(cks)
@@ -231,13 +230,18 @@ module RHACK
231
230
  Curl.carier.reqs.include? @http
232
231
  end
233
232
 
233
+ # Scout must not be reused until not only response will have come,
234
+ # but callback will have been processed, too.
235
+ # Otherwise, #retry! may not work as expected:
236
+ # if a scout gets callback as a block argument, then it may re-run not original callback,
237
+ # but it's copy with another scope.
234
238
  def available?
235
- !loaded?
239
+ !loaded? and !@busy
236
240
  end
237
241
 
238
242
  # - if curl should retry request based on Curl::Err class only
239
243
  # => false
240
- def process_failure(curl_err, message)
244
+ def process_failure(curl_err, message, &callback)
241
245
  @error = curl_err.new message
242
246
  #@error = [curl_err, message] # old
243
247
  @http.outdate!
@@ -247,33 +251,35 @@ module RHACK
247
251
  if retry? curl_err
248
252
  L.debug "#{curl_err} -> reloading scout"
249
253
  retry!
250
- false
251
254
  else
252
255
  L.debug "#{curl_err} -> not reloading scout"
253
256
  raise @error if @raise_err
254
257
  #raise *@error if @raise_err # old
255
- true
258
+ yield if block_given?
259
+ # Now, we assume that data of this @http have been copied or will not be used anymore,
260
+ # thus the scout can be reused.
261
+ @busy = false
256
262
  end
257
263
  end
258
264
 
259
265
  def load!
260
266
  unless Curl.carier.add @http
267
+ L.warn "#{self}##{object_id}: Failed to add Curl::Easy##{@http.object_id} to Curl::Multi##{Curl.carier.object_id}. Trying to remove it and re-add."
261
268
  Curl.carier.remove @http
262
269
  Curl.сarier.add @http
263
270
  end
264
271
  rescue RuntimeError => e
265
272
  e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
266
- L.warn "#{e.inspect}: #{e.message}"
273
+ L.warn "#{self}##{object_id}: #{e.inspect}: #{e.message}"
267
274
  if loaded?
268
275
  Curl.carier.remove @http
269
276
  end
270
277
  sleep 1
271
278
  load!
272
- #e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
273
- #raise e
274
279
  end
275
280
 
276
281
  def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
282
+ @busy = true
277
283
  # cache preprocessed data for one time so we can do #retry
278
284
  @__path = path
279
285
  @__headers = headers
@@ -286,21 +292,28 @@ module RHACK
286
292
  @http.timeout = @timeout
287
293
 
288
294
  @http.on_complete {|curl| # = @http
289
- # > Carier.requests--
295
+ # @http has already been removed when a request had complete,
296
+ # but this callback may occure wherever in a serial queue of curl callbacks.
290
297
  @error = nil
291
298
  # While not outdated, Curl::Response here may contain pointers on freed
292
299
  # memory, thus throwing exception on #to_s and #inspect
293
300
  @http.outdate!
294
301
  res = @http.res
295
- ProcCookies res if @cookieProc
296
- # We cannot just cancel on_complete in on_redirect block
297
- # because loadGet will immediately reset on_complete back
302
+ process_cookies res if @cookies_enabled
303
+ # We cannot just cancel on_complete in on_redirect block,
304
+ # because loadGet should (and will) immediately reset on_complete back.
298
305
  if res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = res.hash.location
299
306
  loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
300
- elsif block_given?
301
- yield @http
307
+ else
308
+ yield @http if block_given?
309
+ # Now, we assume that data of this @http have been copied or will not be used anymore,
310
+ # thus the scout can be reused.
311
+ @busy = false
312
+ @http.on_failure &Proc::NULL
302
313
  end
303
314
  }
315
+ # Curl::Err::* (TCP/IP level) exception callback.
316
+ # May be set out there.
304
317
  @http.on_failure {|curl, error|
305
318
  process_failure(*error)
306
319
  } unless @http.on_failure
@@ -1,3 +1,3 @@
1
1
  module RHACK
2
- VERSION = '1.3.1'
2
+ VERSION = '1.3.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rhack
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergey Baev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-27 00:00:00.000000000 Z
11
+ date: 2014-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rmtools