rhack 1.3.1 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8f5a539c0ca1c92416d50ffd4609d08a69d900fe
4
- data.tar.gz: fee2e42ae02b21af929a1b0b417360adc043562d
3
+ metadata.gz: 40e4929e52203b0121c9ca2eac2f289f656ff543
4
+ data.tar.gz: abb0fe53f2f8e43836473fdc8d7ae517fcd63602
5
5
  SHA512:
6
- metadata.gz: bfda297b6b465393b6749bac236e32bd967216dbad68c4383fc3bdd1e9cbc5602efcb57513ce9c01b9b6c6416ad312b10e150a802b717932e471594744e8765b
7
- data.tar.gz: e4e762cd0e9e858cd20e9bef50c0a9fb4d97b506361879b5efc49cad3d939ebb35b2e3245144fd2e9f01a7ddc033200b656d71a5e4ffa53172e84d468376d3c4
6
+ metadata.gz: 23e4c2b43ee95212d0c8069ed7e2c731dc1878ec4bd4f7b8984bb0dfea55f1cd496ad161c098d2a0551a49d995a6cf4faca8b0041fc891d9f3fc3b42bfce5243
7
+ data.tar.gz: b6e3fce288abf2aaa51cf7910b1b99c90e0c673f13bf6541e8a047b496d83fe6d90b5d11379f8d8e6610a9ad94c6d8834d58c9b0c8cfa2fd171d43852791d987
@@ -11,6 +11,7 @@ module RHACK
11
11
  class_attribute :frame_defaults, :instance_writer => false
12
12
  class_attribute :accounts, :instance_writer => false
13
13
  class_attribute :routes, :instance_writer => false
14
+ class_attribute :rootpath, :instance_writer => false
14
15
 
15
16
  self.frame_defaults = {}
16
17
  self.accounts = {}
@@ -25,33 +26,50 @@ module RHACK
25
26
  }
26
27
  end
27
28
 
29
+ def method_missing(method, *args, &block)
30
+ if personal_instance_methods.include? method
31
+ return new.__send__(method, *args, &block)
32
+ end
33
+ super
34
+ end
35
+
28
36
  private
29
37
 
38
+ def root(value=nil)
39
+ if value
40
+ value = 'http://' + value if value !~ /^\w+:/
41
+ self.rootpath = value
42
+ else
43
+ self.rootpath
44
+ end
45
+ end
46
+ alias :host :root
47
+
30
48
  # Set routes map
31
49
  def map(dict)
32
50
  # URI is deprecated # backward compatibility
33
51
  if defined? URI and URI.is Hash
34
52
  URI.merge! dict.map_hash {|k, v| [k.to_sym, v.freeze]}
35
53
  end
36
- routes.merge! dict.map_hash {|k, v| [k.to_sym, v.freeze]}
54
+ self.routes += dict.map_hash {|k, v| [k.to_sym, v.freeze]}
37
55
  end
38
56
 
39
57
  # Set default Frame options
40
58
  def frame(dict)
41
- frame_defaults.merge! dict
59
+ self.frame_defaults += dict
42
60
  end
43
61
 
44
62
  # Set usable accounts
45
63
  # @ dict : {symbol => {symbol => string, ...}}
46
64
  def accounts(dict)
47
- accounts.merge! dict
65
+ self.accounts += dict
48
66
  end
49
67
 
50
68
  end
51
69
 
52
70
  def initialize(*args)
53
- service, opts = args.get_opts [:api]
54
- @service = service
71
+ service, opts = args.get_opts [routes.include?(:api) ? :api : nil]
72
+ @service = service # Deprectated. Use different classes to implement different services.
55
73
  # first argument should be a string so that frame won't be static
56
74
  if opts.is_a?(Frame)
57
75
  @f = opts
@@ -60,9 +78,13 @@ module RHACK
60
78
  if self.class.const_defined? :Result
61
79
  opts[:result] = self.class::Result
62
80
  end
63
- @f = Frame(route(service) || route(:login), opts)
81
+ @f = Frame(rootpath || route(service) || route(:login), opts)
64
82
  end
65
83
  end
84
+
85
+ def inspect
86
+ "<##{self.class.name}#{":#{@service.to_s.camelize} service" if @service} via #{@f.inspect}>"
87
+ end
66
88
 
67
89
 
68
90
  # Usable only for sync requests
@@ -91,15 +113,17 @@ module RHACK
91
113
  @f.get(url) {|next_page| scrape!(next_page)}
92
114
  end
93
115
  end
94
-
95
- def inspect
96
- "<##{self.class.self_name}:#{@service.to_s.camelize} service via #{@f.inspect}>"
97
- end
116
+
98
117
 
99
118
  # shortcuts to class variables #
100
119
 
101
120
  def route(name)
102
- routes[name]
121
+ if url = routes[name]
122
+ if url !~ /^\w+:/
123
+ url = File.join rootpath, url
124
+ end
125
+ url
126
+ end
103
127
  end
104
128
  alias :url :route
105
129
  # URI is deprecated # backward compatibility
@@ -90,17 +90,36 @@ module RHACK
90
90
 
91
91
  def inspect
92
92
  sssize = @ss.size
93
- "<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
93
+ "<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookie_enabled ? 'on' : 'off'}>"
94
94
  end
95
95
 
96
96
  # All opts going in one hash.
97
97
  # Opts for Frame:
98
- # :wait, :proc_result, :save_result, :zip, :thread_safe, :result, :stream, :raw, :xhr, :content_type
99
- # Opts passed to Page:
98
+ # :wait, :sync, :thread_safe, :raw, :proc_result, :save_result, :zip, :result, :stream
99
+ # ... processed and passed to Scout:
100
+ # :xhr, :content_type, :auth
101
+ # Opts passed to result:
100
102
  # :xml, :html, :json, :hash, :eval, :load_scripts
101
- # Opts for Scout:
103
+ # Opts passed to Scout:
102
104
  # :headers, :redir, :relvl
103
- # TODO: describe options
105
+ #
106
+ # @ :result : враппер результата исполнения; по умолчанию Page, для Client — если определён — Result; при асинхронном вызове будет возвращён незамедлительно
107
+ # @ &callback : в него будет передан инстанс result, а его результат будет записан в result#res (по умолчанию это ссылка на себя)
108
+ # @ :thread_safe : не использовать луп исполнения Curl::Multi#perform, а вызывать #perform прямо в этом треде; если установлен, то невозможно прерывание исполнения клавиатурой (продолжит работать, выполняя колбеки, в фоне), и невозможно задавать больше параллельных реквестов, чем разрешено параллельных соединений (просто застрянет)
109
+ # @ :sync : остановить (Thread#kill) perform-loop после исполнения всех запросов; подразумевает wait=true; при вызове одиночного реквеста подразумевает thread_safe=true
110
+ # @ :wait : ждать исполнения всех реквестов
111
+ # @ :save_result: возвращает #res для каждого инстанса result вместо самого инстанса; если не задан :proc_result, то подразумевает wait=true
112
+ # @ :proc_result: Proc, в который будет передан result#res, если задан также &callback; служит для создания вложенных блоков для клиентов; если =nil, то подразумевает wait=true
113
+ # @ :raw : сохраняем *только* тело ответа, без хедеров, без отладочной инфы в #res
114
+ # @ :raw + :sync : подразумевает save_result=true
115
+ # @ :xhr, :content_type, :auth : формируют хедеры X-Requested-With, Content-Type, Authorization для передачи в Scout
116
+ # @ :xhr : boolean
117
+ # @ :content_type : symbol<extension> | raw string
118
+ # @ :auth : "<username>:<password>"
119
+ #
120
+ # @ :zip, :stream и все опции для result : deprecated
121
+ #
122
+ # TODO: Семантически разделить синхронное и асинхронное выполнение запросов (не важно, серии или отдельных), с учётом, что асинхронность по сути своей перегружена и требуется, например, в очередях сообщений, но не в синхронных контроллерах Rails
104
123
  def exec *args, &callback
105
124
  many, order, orders, with_opts = interpret_request *args
106
125
  L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
@@ -113,8 +132,15 @@ module RHACK
113
132
  # if we aren't said explicitly about the opposite
114
133
  Johnson::Runtime.set_browser_for_curl with_opts
115
134
 
116
- if many then exec_many orders, with_opts, &callback
117
- else exec_one order, with_opts, &callback end
135
+ if many
136
+ result = exec_many orders, with_opts, &callback
137
+ else
138
+ result = exec_one order, with_opts, &callback
139
+ end
140
+ if with_opts[:sync]
141
+ Curl.stop
142
+ end
143
+ result
118
144
  end
119
145
  alias :get :exec
120
146
  alias :run :get
@@ -218,7 +244,13 @@ module RHACK
218
244
 
219
245
  opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
220
246
  opts[:load_scripts] = self if opts[:load_scripts]
221
- opts[:stream] = true if opts[:raw]
247
+ opts[:save_result] = true if opts[:wait] and opts[:raw]
248
+
249
+ if orders
250
+ opts[:thread_safe] = false if @ss.size < orders.size
251
+ else
252
+ opts[:thread_safe] = true if opts[:sync]
253
+ end
222
254
 
223
255
  (opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
224
256
  if opts[:content_type]
@@ -233,6 +265,9 @@ module RHACK
233
265
  (opts[:headers] ||= {})['Content-Type'] = opts[:content_type]
234
266
  end
235
267
  end
268
+ if opts[:auth]
269
+ (opts[:headers] ||= {})['Authorization'] = "Basic #{Base64.encode64(opts[:auth])}".chop
270
+ end
236
271
 
237
272
  [many, order, orders, opts]
238
273
  end
@@ -331,7 +366,7 @@ module RHACK
331
366
  # if no spare scouts can be found, squad simply waits for first callbacks to complete
332
367
  s = @ss.next
333
368
  s.http.on_failure {|curl, error|
334
- if s.process_failure(*error)
369
+ s.process_failure(*error) {
335
370
  # curl itself has decided not to retry a request
336
371
  if opts[:raw]
337
372
  page.res = s.error
@@ -339,14 +374,14 @@ module RHACK
339
374
  run_callbacks! page, opts, &callback
340
375
  # nothing to do here if process returns nil or false
341
376
  end
342
- end
377
+ }
343
378
  }
344
379
  s.send(*(order << opts)) {|curl|
345
380
  # there is a problem with storing html on disk
346
381
  if order[0] == :loadGet and @write_to
347
382
  # sometimes (about 2% for 100-threads-dling) when this string is calling
348
383
  # no matter what +curl.res.body+ has contained here
349
- RMTools.rw @write_to+'/'+order[-2].sub(/^[a-z]+:\/\//, ''), curl.res.body.xml_to_utf
384
+ RMTools.rw @write_to+'/'+order[-2].sub(/^\w+:\/\//, ''), curl.res.body.xml_to_utf
350
385
  end
351
386
  if opts[:raw]
352
387
  page.res = block_given? ? yield(curl) : curl.body_str
@@ -7,7 +7,7 @@ module RHACK
7
7
  attr_accessor :path, :root, :sld, :proxy
8
8
  attr_reader :uri
9
9
  attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
10
- attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
10
+ attr_reader :cookies, :ua, :refforge, :cookies_enabled
11
11
 
12
12
  DefaultHeader = {
13
13
  "Expect" => "",
@@ -40,7 +40,7 @@ module RHACK
40
40
  @cookies = {}
41
41
  @body = {}
42
42
  @num = []
43
- @cookieProc = opts[:cp] || opts[:ck]
43
+ @cookies_enabled = opts[:cp] || opts[:ck]
44
44
  @raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
45
45
  @engine = opts[:engine]
46
46
  @timeout = opts[:timeout] || @@timeout || 60
@@ -81,9 +81,9 @@ module RHACK
81
81
  @proxystr = @webproxy ? @proxy[0] : @http.proxy_url
82
82
  else @proxystr = 'localhost'
83
83
  end
84
- if @cookieProc.is Hash
85
- self.main_cks = @cookieProc
86
- @cookieProc = true
84
+ if @cookies_enabled.is Hash
85
+ self.main_cks = @cookies_enabled
86
+ @cookies_enabled = true
87
87
  end
88
88
  self
89
89
  end
@@ -169,7 +169,7 @@ module RHACK
169
169
 
170
170
  def mkHeader(uri)
171
171
  header = DefaultHeader.dup
172
- if @cookieProc
172
+ if @cookies_enabled
173
173
  cookies = ''
174
174
  main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
175
175
  header['Cookie'] = cookies[0..-3]
@@ -182,7 +182,7 @@ module RHACK
182
182
  header
183
183
  end
184
184
 
185
- def ProcCookies(res)
185
+ def process_cookies(res)
186
186
  ck = []
187
187
  case res
188
188
  when String
@@ -195,11 +195,10 @@ module RHACK
195
195
  end
196
196
  return if !ck.b
197
197
  ck.each {|c| Cookie(c, self)}
198
- # StoreCookies if @cookieStore
199
198
  end
200
199
 
201
- def cp_on() @cookieProc = true end
202
- def cp_off() @cookieProc = false end
200
+ def cp_on() @cookies_enabled = true end
201
+ def cp_off() @cookies_enabled = false end
203
202
 
204
203
  def main_cks() @cookies[@uri.host] ||= {} end
205
204
  def main_cks=(cks)
@@ -231,13 +230,18 @@ module RHACK
231
230
  Curl.carier.reqs.include? @http
232
231
  end
233
232
 
233
+ # Scout must not be reused until not only response will have come,
234
+ # but callback will have been processed, too.
235
+ # Otherwise, #retry! may not work as expected:
236
+ # if a scout gets callback as a block argument, then it may re-run not original callback,
237
+ # but it's copy with another scope.
234
238
  def available?
235
- !loaded?
239
+ !loaded? and !@busy
236
240
  end
237
241
 
238
242
  # - if curl should retry request based on Curl::Err class only
239
243
  # => false
240
- def process_failure(curl_err, message)
244
+ def process_failure(curl_err, message, &callback)
241
245
  @error = curl_err.new message
242
246
  #@error = [curl_err, message] # old
243
247
  @http.outdate!
@@ -247,33 +251,35 @@ module RHACK
247
251
  if retry? curl_err
248
252
  L.debug "#{curl_err} -> reloading scout"
249
253
  retry!
250
- false
251
254
  else
252
255
  L.debug "#{curl_err} -> not reloading scout"
253
256
  raise @error if @raise_err
254
257
  #raise *@error if @raise_err # old
255
- true
258
+ yield if block_given?
259
+ # Now, we assume that data of this @http have been copied or will not be used anymore,
260
+ # thus the scout can be reused.
261
+ @busy = false
256
262
  end
257
263
  end
258
264
 
259
265
  def load!
260
266
  unless Curl.carier.add @http
267
+ L.warn "#{self}##{object_id}: Failed to add Curl::Easy##{@http.object_id} to Curl::Multi##{Curl.carier.object_id}. Trying to remove it and re-add."
261
268
  Curl.carier.remove @http
262
269
  Curl.сarier.add @http
263
270
  end
264
271
  rescue RuntimeError => e
265
272
  e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
266
- L.warn "#{e.inspect}: #{e.message}"
273
+ L.warn "#{self}##{object_id}: #{e.inspect}: #{e.message}"
267
274
  if loaded?
268
275
  Curl.carier.remove @http
269
276
  end
270
277
  sleep 1
271
278
  load!
272
- #e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
273
- #raise e
274
279
  end
275
280
 
276
281
  def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
282
+ @busy = true
277
283
  # cache preprocessed data for one time so we can do #retry
278
284
  @__path = path
279
285
  @__headers = headers
@@ -286,21 +292,28 @@ module RHACK
286
292
  @http.timeout = @timeout
287
293
 
288
294
  @http.on_complete {|curl| # = @http
289
- # > Carier.requests--
295
+ # @http has already been removed when a request had complete,
296
+ # but this callback may occure wherever in a serial queue of curl callbacks.
290
297
  @error = nil
291
298
  # While not outdated, Curl::Response here may contain pointers on freed
292
299
  # memory, thus throwing exception on #to_s and #inspect
293
300
  @http.outdate!
294
301
  res = @http.res
295
- ProcCookies res if @cookieProc
296
- # We cannot just cancel on_complete in on_redirect block
297
- # because loadGet will immediately reset on_complete back
302
+ process_cookies res if @cookies_enabled
303
+ # We cannot just cancel on_complete in on_redirect block,
304
+ # because loadGet should (and will) immediately reset on_complete back.
298
305
  if res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = res.hash.location
299
306
  loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
300
- elsif block_given?
301
- yield @http
307
+ else
308
+ yield @http if block_given?
309
+ # Now, we assume that data of this @http have been copied or will not be used anymore,
310
+ # thus the scout can be reused.
311
+ @busy = false
312
+ @http.on_failure &Proc::NULL
302
313
  end
303
314
  }
315
+ # Curl::Err::* (TCP/IP level) exception callback.
316
+ # May be set out there.
304
317
  @http.on_failure {|curl, error|
305
318
  process_failure(*error)
306
319
  } unless @http.on_failure
@@ -1,3 +1,3 @@
1
1
  module RHACK
2
- VERSION = '1.3.1'
2
+ VERSION = '1.3.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rhack
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergey Baev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-27 00:00:00.000000000 Z
11
+ date: 2014-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rmtools