rhack 0.4.1 → 1.0.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +22 -0
  2. data/Gemfile +2 -5
  3. data/LICENSE +19 -15
  4. data/README.md +66 -26
  5. data/Rakefile +42 -31
  6. data/config/cacert.pem +3895 -0
  7. data/config/rhack.yml.template +40 -0
  8. data/ext/curb-original/curb_config.h +3 -0
  9. data/ext/curb-original/curb_easy.c +3 -54
  10. data/ext/curb-original/curb_multi.c +69 -140
  11. data/ext/curb/curb_multi.c +1 -1
  12. data/lib/rhack.rb +82 -12
  13. data/lib/rhack/cookie.rb +49 -0
  14. data/lib/rhack/curl.rb +6 -0
  15. data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
  16. data/lib/rhack/curl/global.rb +175 -0
  17. data/lib/rhack/curl/itt.rb +11 -0
  18. data/lib/rhack/curl/multi.rb +37 -0
  19. data/lib/rhack/curl/post_field.rb +20 -0
  20. data/lib/rhack/curl/response.rb +91 -0
  21. data/lib/rhack/dl.rb +308 -0
  22. data/lib/rhack/frame.rb +316 -0
  23. data/lib/{extensions → rhack/js}/browser/env.js +0 -0
  24. data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
  25. data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
  26. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
  27. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
  28. data/lib/rhack/js/johnson.rb +71 -0
  29. data/lib/rhack/page.rb +263 -0
  30. data/lib/rhack/proxy.rb +3 -0
  31. data/lib/rhack/proxy/checker.rb +1 -1
  32. data/lib/rhack/scout.rb +342 -0
  33. data/lib/rhack/scout_squad.rb +98 -0
  34. data/lib/rhack/services.rb +1 -464
  35. data/lib/rhack/services/base.rb +59 -0
  36. data/lib/rhack/services/examples.rb +423 -0
  37. data/lib/rhack/version.rb +3 -0
  38. data/lib/rhack_in.rb +3 -2
  39. data/rhack.gemspec +28 -0
  40. metadata +104 -85
  41. data/.gemtest +0 -0
  42. data/Gemfile.lock +0 -23
  43. data/Manifest.txt +0 -60
  44. data/ext/curb/Makefile +0 -217
  45. data/lib/cache.rb +0 -44
  46. data/lib/curl-global.rb +0 -164
  47. data/lib/extensions/declarative.rb +0 -153
  48. data/lib/extensions/johnson.rb +0 -63
  49. data/lib/frame.rb +0 -848
  50. data/lib/init.rb +0 -49
  51. data/lib/rhack.yml.template +0 -19
  52. data/lib/scout.rb +0 -589
  53. data/lib/words.rb +0 -25
data/lib/rhack/dl.rb ADDED
@@ -0,0 +1,308 @@
1
+ # encoding: utf-8
2
+ require 'rhack'
3
+
4
+ module RHACK
5
+ class Frame
6
+
7
+ def get_cached(*links)
8
+ res = []
9
+ expire = links[-1] == :expire ? links.pop : false
10
+ links.parses(:uri).each_with_index {|url, i|
11
+ next if url.path[/ads|count|stats/]
12
+ file = Cache.load url, !expire
13
+ if file
14
+ if expire
15
+ @ss.next.loadGet(url.href, :headers=>{'If-Modified-Since'=>file.date}) {|c|
16
+ if c.res.code == 200
17
+ res << [i, (data = c.res.body)]
18
+ Cache.save url, data, false
19
+ else
20
+ res << [i, file.is(String) ? file : read(file.path)]
21
+ end
22
+ }
23
+ else
24
+ res << [i, file.is(String) ? file : read(file.path)]
25
+ end
26
+ else
27
+ @ss.next.loadGet(url.href) {|c|
28
+ if c.res.code == 200
29
+ res << [i, (data = c.res.body)]
30
+ Cache.save url, data, !expire
31
+ end
32
+ }
33
+ end
34
+ }
35
+ Curl.wait
36
+ links.size == 1 ? res[0][1] : res.sort!.lasts
37
+ end
38
+
39
+ def get_distr(uri, psize, threads, start=0, print_progress=$verbose)
40
+ raise ConfigError, "Insufficient Scouts in the Frame for distributed downloading" if @ss.size < 2
41
+ @print_progress, code, stop_download, @ss_reserve = print_progress, nil, false, []
42
+ (s = @ss.next).http.on_header {|h|
43
+ next h.size unless h[/Content-Length: (\d+)|HTTP\/1\.[01] (\d+)[^\r]+|^\s*$/]
44
+ if code = $2
45
+ if code != '200'
46
+ L << "#$& getting #{uri}; interrupting request."
47
+ s.http.on_header() # set default process
48
+ next 0
49
+ end
50
+ next h.size
51
+ end
52
+
53
+ s.http.on_header() # set default process
54
+ if !$1 # конец хедера, content-length отсутствует
55
+ L << "No Content-Length header; trying to load a whole #{uri} at once!"
56
+ s.loadGet {|c| yield c.res.body.size, 0, c.res.body}
57
+ next 0
58
+ end
59
+
60
+ len = $1.to_i - start
61
+ psize = configure_psize(len, psize, threads)
62
+ parts = (len/psize.to_f).ceil
63
+ setup_speedometer(uri, parts, len)
64
+ yield len, psize, :careful_dl if len > (@opts[:careful_dl] || 10.mb)
65
+
66
+ @ss_reserve = @ss[threads+1..-1]
67
+ @ss = @ss[0..threads]
68
+ (0...parts).each {|n|
69
+ break if stop_download
70
+
71
+ s = @ss.next
72
+ run_speedometer(s, len, n)
73
+ s.loadGet(uri, :headers => {
74
+ 'Range' => "bytes=#{start + n*psize}-#{start + (n+1)*psize - 1}"
75
+ }) {|c|
76
+ clear_speedometer(s)
77
+ if c.res.code/10 == 20
78
+ yield len, n*psize, c.res.body
79
+ else
80
+ L << "#{c.res} during get #{uri.inspect}; interrupting request."
81
+ stop_download = true
82
+ end
83
+ }
84
+ }
85
+ 0
86
+ }
87
+ s.raise_err = false
88
+ s.loadGet validate uri
89
+ ensure
90
+ @ss.concat @ss_reserve || []
91
+ end
92
+
93
+ def dl(uri, df=File.basename(uri.parse(:uri).path), psize=:auto, opts={})
94
+ dled = 0
95
+ lock = ''
96
+ callback = lambda {|len, pos, body|
97
+ if body != :careful_dl
98
+ begin
99
+ write(df, body, pos)
100
+ rescue => e
101
+ binding.start_interaction
102
+ raise
103
+ end
104
+ if (dled += body.size) == len
105
+ File.delete lock if File.file? lock
106
+ yield df if block_given?
107
+ end
108
+ else
109
+ lock = lock_file df, len, pos # filename, filesize, partsize
110
+ end
111
+ }
112
+ opts[:threads] ||= @ss.size-1
113
+ get_distr(uri, psize, opts[:threads], opts[:start].to_i, &callback)
114
+ Curl.wait unless block_given?
115
+ df
116
+ end
117
+
118
+ def simple_dl(uri, df=File.basename(uri.parse(:uri).path), opts={})
119
+ opts.reverse_merge! :psize => :auto, :threads => 1, :print_progress => $verbose
120
+ L << opts
121
+
122
+ @print_progress = opts[:print_progress]
123
+ unless len = opts[:len] || (map = read_mapfile(df) and map.len)
124
+ return @ss.next.loadHead(uri) {|c| $log << c
125
+ if len = c.res['Content-Length']
126
+ simple_dl(uri, df, opts.merge(:len => len.to_i))
127
+ else L.warn "Can't get file size, so it has no sence to download this way. Or maybe it's just an error. Check ObjectSpace.find(#{c.res.object_id}) out."
128
+ end
129
+ }
130
+ end
131
+
132
+ psize, parts = check_mapfile(df, opts)
133
+ return unless psize
134
+ L << [psize, parts]
135
+ setup_speedometer(uri, parts.size, len)
136
+
137
+ obtained uri do |uri|
138
+ if opts[:threads] == 1
139
+ start = opts[:start].to_i || (parts[0] && parts[0].begin) || 0
140
+ scout = opts[:scout] || @ss.next
141
+ $log << [uri, scout]
142
+ (loadget = lambda {|n|
143
+ run_speedometer(scout, len, n)
144
+ from = start + n*psize
145
+ to = start + (n+1)*psize - 1
146
+ scout.loadGet(uri, :headers => {'Range' => "bytes=#{from}-#{to}"}) {|c|
147
+ begin
148
+ $log << "writing #{df} from #{from}: #{c.res.body.inspect}"
149
+ write(df, c.res.body, from)
150
+ rescue => e
151
+ binding.start_interaction
152
+ raise
153
+ end
154
+ if write_mapfile(df, from, to)
155
+ clear_speedometer(scout)
156
+ L.warn "file completely dl'ed, but (n+1)*psize <= len: (#{n}+1)*#{psize} <= #{len}" if (n+1)*psize <= len
157
+ yield df if block_given?
158
+ elsif (n+1)*psize <= len
159
+ loadget[n+1]
160
+ end
161
+ }
162
+ })[0]
163
+ else
164
+ exec(uri, opts.merge(:raw => true, :ranges => parts)) {|c|
165
+ L << c.res
166
+ range = c.req.range
167
+ begin
168
+ write(df, c.res.body, range.begin)
169
+ rescue => e
170
+ binding.start_interaction
171
+ raise
172
+ end
173
+ if write_mapfile(df, range.begin, range.end)
174
+ @ss.each {|s| s.http.on_progress} if @print_progress
175
+ yield df if block_given?
176
+ end
177
+ }
178
+ end
179
+ end
180
+ end
181
+
182
+ def check_mapfile(df, opts={})
183
+ opts.reverse_merge! :psize => :auto, :threads => 1
184
+ map = read_mapfile df
185
+ if map
186
+ L << map
187
+ if map.rest.empty?
188
+ puts "#{df} is loaded"
189
+ $log << 'deleting mapfile'
190
+ File.delete df+'.map'
191
+ []
192
+ else
193
+ if opts[:len] and map.len != opts[:len]
194
+ raise "Incorrect file size for #{df}"
195
+ end
196
+ psize = configure_psize *opts.values_at(:len, :psize, :threads)
197
+ [psize, map.rest.div(psize)]
198
+ end
199
+ else
200
+ write_mapfile df, opts[:len]
201
+ psize = configure_psize *opts.values_at(:len, :psize, :threads)
202
+ $log << (0...opts[:len]).div(psize)
203
+ [psize, (0...opts[:len]).div(psize)]
204
+ end
205
+ end
206
+
207
+ def read_mapfile(df)
208
+ df += '.map'
209
+ text = read df
210
+ $log << "mapfile read: #{text}"
211
+ if text.b
212
+ text[/^(\d+)\0+(\d+)\0*\n/]
213
+ map = {}
214
+ $log << [$1,$2]
215
+ if $1 and $1 == $2
216
+ map.rest = []
217
+ else
218
+ map.len, *map.parts = text.chop/"\n"
219
+ map.len = map.len.to_i
220
+ map.parts.map! {|part| part /= '-'; part[0].to_i..part[1].to_i}
221
+ $log << map.parts
222
+ map.rest = (0...map.len) - XRange(*map.parts)
223
+ end
224
+ map
225
+ end
226
+ end
227
+
228
+ def write_mapfile(df, *args)
229
+ df += '.map'
230
+ map = ''
231
+ if args.size != 2
232
+ len = args.shift
233
+ map << len.to_s.ljust(22, "\0") << "\n" if File.file? df
234
+ end
235
+ if args.any?
236
+ read(df)[/^(\d+)\0+(\d+)\0*\n/]
237
+ $log << "mapfile read"
238
+ $log << [$1,$2]
239
+ dled = $2.to_i + args[1] - args[0] + 1
240
+ return true if dled == $1.to_i
241
+ map << "#{args[0]}..#{args[1]}\n"
242
+ $log << 'writing mapfile'
243
+ write(df, dled.to_s.ljust(11, "\0"), 11)
244
+ end
245
+ $log << [df, map]
246
+ $log << 'writing mapfile'
247
+ write df, map
248
+ nil
249
+ end
250
+
251
+ def configure_psize(len, psize, threads)
252
+ case psize
253
+ when Numeric; psize.to_i
254
+ when :auto; len > 100000 ? len/threads+1 : len
255
+ when :mb; 1.mb
256
+ else raise ArgumentError, "Incorrect value for part size #{psize}:#{psize.class}"
257
+ end
258
+ end
259
+
260
+
261
+
262
+ def setup_speedometer(uri, parts, len)
263
+ return unless @print_progress
264
+ @progress = Array.new(parts, 0)
265
+ @stop_print, @speed, @sum, *@speedometer = false, '', 0, Time.now, 0
266
+ @str = "Downloading #{uri.gsub '%', '%%'} (#{len.bytes}) in %03s streams, %07s/s:"
267
+ @bs = "\b\r"*(@newlines = (uri.unpack('U*').size+len.bytes.size+42)/(ENV['COLUMNS'] || 80).to_i)
268
+ Thread.new {
269
+ until @stop_print
270
+ sleep 0.2
271
+ now = Time.now
272
+ if now > @speedometer[0] and @sum > @speedometer[1]
273
+ @speed.replace(((@sum - @speedometer[1])/(now - @speedometer[0])).to_i.bytes)
274
+ @speedometer.replace [now, @sum]
275
+ end
276
+ end
277
+ }
278
+ end
279
+
280
+ def run_speedometer(scout, len, n)
281
+ return unless @print_progress
282
+ scout.http.on_progress {|dl_need, dl_now, *ul|
283
+ if !@stop_print
284
+ @progress[n] = dl_now
285
+ percents = (@sum = @progress.sum)*100/len
286
+ print @str%[@progress.select_b.size, @speed]+"\n%%[#{'@'*percents}#{' '*(100-percents)}]\r\b\r"+@bs
287
+ if percents == 100
288
+ puts "\v"*@newlines
289
+ @stop_print = true
290
+ end
291
+ end
292
+ true
293
+ }
294
+ end
295
+
296
+ def clear_speedometer(scout)
297
+ return unless @print_progress
298
+ scout.http.on_progress
299
+ end
300
+
301
+ end
302
+
303
+ def dl(uri, df=File.basename(uri.parse(:uri).path), threads=5, timeout=600, &block)
304
+ Curl.run
305
+ Frame({:timeout=>timeout}, threads).dl(uri, df, :auto, threads, &block)
306
+ end
307
+ module_function :dl
308
+ end
@@ -0,0 +1,316 @@
1
+ # encoding: utf-8
2
+ module RHACK
3
+
4
+ # Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
5
+ # Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
6
+
7
+ class ZippingError < ArgumentError
8
+ def initialize debug, str="invalid use of :zip option, url and body must be an arrays with the same size\n url: %s(%s), body: %s(%s)"
9
+ super str%debug end
10
+ end
11
+
12
+ class TargetError < ArgumentError
13
+ def initialize msg="only static frame can use local paths"
14
+ super end
15
+ end
16
+
17
+ class ConfigError < ArgumentError
18
+ def initialize msg
19
+ super end
20
+ end
21
+
22
+ class Frame
23
+ __init__
24
+ attr_reader :loc, :static, :ss, :opts, :use_cache, :write_to
25
+ @@cache = {}
26
+
27
+ def initialize *args
28
+ args << 10 unless args[-1].is Fixnum
29
+ args.insert -2, {} unless args[-2].is Hash
30
+ if scouts = args[-2][:scouts]
31
+ args[-1] = scouts
32
+ end
33
+ @opts = {:eval => Johnson::Enabled, :redir => true, :cp => true, :result => Page}.merge!(args[-2])
34
+ args[-2] = @opts
35
+ if args[0].is String
36
+ url = args[0]
37
+ 'http://' >> url if url !~ /^\w+:\/\//
38
+ update_loc url
39
+ else
40
+ @loc = {}
41
+ @static = false
42
+ end
43
+ @ss = ScoutSquad *args
44
+ Curl.run :unless_allready
45
+ end
46
+
47
+ def update_loc url
48
+ @loc = url.parse :uri
49
+ # be careful, if you set :static => false, frame will be unable to use "path" url
50
+ @static = @opts.fetch(:static, @loc)
51
+ end
52
+
53
+ def retarget to, forced=nil
54
+ to = 'http://' + to if to !~ /^\w+:/
55
+ @ss.update to, forced
56
+ update_loc to
57
+ end
58
+ alias :target= :retarget
59
+
60
+ def next() @ss.next end
61
+ def rand() @ss.rand end
62
+ def each(&block) @ss.each &block end
63
+ def [](i) @ss[i] end
64
+
65
+ def copy_cookies! i=0
66
+ @ss.each {|s| s.cookies.replace @ss[i].cookies}
67
+ end
68
+
69
+ def use_cache! opts={}
70
+ if opts == false
71
+ @use_cache = false
72
+ else
73
+ @@cache = opts[:pages].kinda(Hash) ? opts[:pages] : opts[:pages].map_hash {|p| [p.href, p]} if opts[:pages]
74
+ #@write_to = opts[:write_to] if :write_to.in opts
75
+ @use_cache = true
76
+ end
77
+ end
78
+
79
+ def drop_cache! use=nil
80
+ @@cache.clear
81
+ GC.start
82
+ @use_cache = use if use.in [true, false]
83
+ end
84
+
85
+ def inspect
86
+ sssize = @ss.size
87
+ "<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
88
+ end
89
+
90
+ # opts are :eval, :json, :hash, :wait, :proc_result, :save_result, :load_scripts,
91
+ # :zip, :thread_safe, :result, :stream, :raw, :xhr + any opts for Scouts in one hash
92
+ def exec *args, &callback
93
+ many, order, orders, with_opts = interpret_request *args
94
+ L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
95
+
96
+ if !Johnson::Enabled and with_opts[:eval]
97
+ L < "failed to use option :eval because Johnson is disabled"
98
+ with_opts.delete :eval
99
+ end
100
+ # JS Runtime is not thread-safe and must be created in curl thread
101
+ # if we aren't said explicitly about the opposite
102
+ Johnson::Runtime.set_browser_for_curl with_opts
103
+
104
+ if many then exec_many orders, with_opts, &callback
105
+ else exec_one order, with_opts, &callback end
106
+ end
107
+ alias :get :exec
108
+ alias :run :get
109
+
110
+ def interpret_request(*args)
111
+ body, mp, url, opts = args.dup.get_opts [nil, false, nil], @opts
112
+ L.log [body, mp, url, opts]
113
+ zip = opts.delete :zip
114
+ verb = opts.delete :verb
115
+ many = order = orders = post = false
116
+ # Default options set is for POST
117
+ if mp.is String or mp.kinda Array and !(url.is String or url.kinda Array)
118
+ # if second arg is String, then that's url
119
+ url, mp, post = mp.dup, false, true
120
+ # L.debug "url #{url.inspect} has been passed as second argument instead of third"
121
+ # But if we have only one argument actually passed
122
+ # except for options hash, then believe it's GET
123
+ elsif body.is String or body.kinda [String]
124
+ L.debug "first parameter (#{body.inspect}) was implicitly taken as url#{' '+body.class if body.kinda Array}, but last paramter is of type #{url.class}, too" if url
125
+ url = body.dup
126
+ elsif !body
127
+ url = nil
128
+ else
129
+ url = url.dup if url
130
+ mp, post = !!mp, true
131
+ end
132
+
133
+ if post
134
+ put = verb == :put
135
+ validate_zip url, body if zip
136
+ if zip or url.kinda Array or body.kinda Array
137
+ many = true
138
+ unless put or body.kinda [Hash]
139
+ raise TypeError, "body of post request must be a hash array, params was
140
+ (#{args.inspect[1..-2]})"
141
+ end
142
+
143
+ if zip or url.kinda Array
144
+ validate_some url
145
+ orders = zip ? body.zip(url) : url.xprod(body, :inverse)
146
+ else
147
+ url = validate url
148
+ orders = body.xprod url
149
+ end
150
+ if put
151
+ orders.each {|o| o.unshift :loadPut}
152
+ else
153
+ orders.each {|o| o.unshift :loadPost and o.insert 2, mp}
154
+ end
155
+ else
156
+ unless put or body.is Hash
157
+ raise TypeError, "body of post request must be a hash, params was
158
+ (#{args.inspect[1..-2]})"
159
+ end
160
+
161
+ url = validate url
162
+ order = put ? [:loadPut, body, url] : [:loadPost, body, mp, url]
163
+ end
164
+ else
165
+ del = verb == :delete
166
+ if url.kinda Array
167
+ many = true
168
+ validate_some url
169
+ orders = [del ? :loadDelete : :loadGet].xprod url
170
+ else
171
+ url = validate url
172
+ order = [del ? :loadDelete : :loadGet, url]
173
+ end
174
+ end
175
+ if !order.b and !orders.b
176
+ raise ArgumentError, "failed to run blank request#{'s' if many}, params was
177
+ (#{args.inspect[1..-2]})"
178
+ end
179
+
180
+ opts[:wait] = opts[:sync] if :sync.in opts
181
+ opts[:wait] = true if !:wait.in(opts) and
182
+ :proc_result.in(opts) ? !opts[:proc_result] : opts[:save_result]
183
+ opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
184
+ opts[:load_scripts] = self if opts[:load_scripts]
185
+ opts[:stream] = true if opts[:raw]
186
+ (opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
187
+ [many, order, orders, opts]
188
+ end
189
+
190
+ private
191
+ def validate_zip(url, body)
192
+ if !(url.kinda Array and body.kinda Array)
193
+ raise ZippingError, [url.class, nil, body.class, nil]
194
+ elsif url.size != body.size
195
+ raise ZippingError, [url.class, url.size, body.class, body.size]
196
+ end
197
+ end
198
+
199
+ # :static option now can accept hash with :procotol key, in that case Frame can be relocated to the same domain on another protocol and default protocol would be the value of @static.protocol
200
+ # if @static option has a :host value as well then it works just like a default route
201
+ def validate(url)
202
+ if url
203
+ loc = url.parse:uri
204
+ if loc.root and loc.root != @loc.root
205
+ if @static
206
+ if @static.is Hash
207
+ if loc.host != @loc.host and !@static.host
208
+ raise TargetError, "unable to get #{url} by static frame [#{@static.protocol}://]#{@loc.host}, you should first update it with new target"
209
+ end
210
+ else
211
+ raise TargetError, "unable to get #{url} by static frame #{@loc.root}, you should first update it with new target"
212
+ end
213
+ end
214
+ @loc.root, @loc.host, @loc.protocol = loc.root, loc.host, loc.protocol
215
+ url
216
+ elsif !loc.root
217
+ if !@static
218
+ raise TargetError, "undefined root for query #{url}, use :static option as Hash to set default protocol and host, or as True to allow using previously used root"
219
+ elsif @static.is Hash
220
+ # targeting relatively to default values (from @static hash)
221
+ @loc.protocol = @static.protocol
222
+ @loc.host = @static.host if @static.host
223
+ @loc.root = @loc.protocol+'://'+@loc.host
224
+ end
225
+ if !@loc.host
226
+ raise TargetError, "undefined host for query #{url}, use :host parameter of :static option to set default host"
227
+ end
228
+ File.join @loc.root, url
229
+ else url
230
+ end
231
+ else
232
+ raise TargetError if !@static
233
+ @loc.href
234
+ end
235
+ end
236
+
237
+ def validate_some(urls)
238
+ urls.map! {|u| validate u}
239
+ end
240
+
241
+ def run_callbacks!(page, opts, &callback)
242
+ # if no callback must have run then page.res is equal to the page
243
+ # so we can get the page as result of a sync as well as an async request
244
+ page.res = page
245
+ if callback
246
+ yres = callback.call page
247
+ # if we don't want callback to affect page.res
248
+ # then we should not set :save_result
249
+ if yres != :skip
250
+ if opts[:proc_result].is Proc
251
+ # yres is intermediate result that we should proc
252
+ page.res = opts[:proc_result].call yres
253
+ elsif opts[:save_result] or :proc_result.in opts
254
+ # yres is total result that we should save
255
+ page.res = yres
256
+ end
257
+ # in both cases page.res is set to total result
258
+ # so we can return result from any depth as @res attribute of what we have on top
259
+ end
260
+ end
261
+ end
262
+
263
+ # TODO: found why/how IO on callbacks breaks +curl.res.body+ content and how to fix or how to avoid it
264
+ def exec_one(order, opts, &callback)
265
+ if @use_cache and order[0] == :loadGet and page = @@cache[order[1]]
266
+ run_callbacks! page, opts, &callback
267
+ res = opts[:wait] && (opts[:save_result] or :proc_result.in opts) ? page.res : page
268
+ return res
269
+ end
270
+ # must result in Page (default) or it's subclass
271
+ page = opts[:result].new
272
+ # if no spare scouts can be found, squad simply waits for first callbacks to complete
273
+ s = @ss.next
274
+ s.send(*(order << opts)) {|curl|
275
+ # there is a problem with storing html on disk
276
+ if order[0] == :loadGet and @write_to
277
+ # sometimes (about 2% for 100-threads-dling) when this string is calling
278
+ # no matter what +curl.res.body+ has contained here
279
+ RMTools.rw @write_to+'/'+order[-2].sub(/^[a-z]+:\/\//, ''), curl.res.body.xml_to_utf
280
+ end
281
+ if opts[:raw]
282
+ page.res = yield curl
283
+ # here +curl.res.body+ become empty
284
+ elsif page.process(curl, opts)
285
+ @@cache[page.href] = page if order[0] == :loadGet and @use_cache
286
+ run_callbacks! page, opts, &callback
287
+ end
288
+ }
289
+ if opts[:wait]
290
+ opts[:thread_safe] ? Curl.carier.perform : Curl.wait
291
+ (opts[:save_result] or :proc_result.in opts) ? page.res : page
292
+ else page
293
+ end
294
+ end
295
+
296
+ def exec_many(orders, with_opts, &callback)
297
+ w = with_opts.delete :wait
298
+ iterator = with_opts[:stream] ? :each : :map
299
+ if with_opts[:ranges]
300
+ if orders.size != with_opts[:ranges].size
301
+ raise ZippingError, [orders.size, with_opts[:ranges].size], "orders quantity (%s) is not equal ranges quantity (%s)"
302
+ end
303
+ pages = orders.zip(with_opts[:ranges]).send(iterator) {|order, range|
304
+ (with_opts[:headers] ||= {}).Range = "bytes=#{range.begin}-#{range.end}"
305
+ exec_one order, with_opts, &callback
306
+ }
307
+ else
308
+ pages = orders.send(iterator) {|order| exec_one order, with_opts, &callback }
309
+ end
310
+ with_opts[:thread_safe] ? Curl.carier.perform : Curl.wait if w
311
+ with_opts[:stream] || pages
312
+ end
313
+
314
+ end
315
+
316
+ end