rhack 0.4.1 → 1.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.gitignore +22 -0
  2. data/Gemfile +2 -5
  3. data/LICENSE +19 -15
  4. data/README.md +66 -26
  5. data/Rakefile +42 -31
  6. data/config/cacert.pem +3895 -0
  7. data/config/rhack.yml.template +40 -0
  8. data/ext/curb-original/curb_config.h +3 -0
  9. data/ext/curb-original/curb_easy.c +3 -54
  10. data/ext/curb-original/curb_multi.c +69 -140
  11. data/ext/curb/curb_multi.c +1 -1
  12. data/lib/rhack.rb +82 -12
  13. data/lib/rhack/cookie.rb +49 -0
  14. data/lib/rhack/curl.rb +6 -0
  15. data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
  16. data/lib/rhack/curl/global.rb +175 -0
  17. data/lib/rhack/curl/itt.rb +11 -0
  18. data/lib/rhack/curl/multi.rb +37 -0
  19. data/lib/rhack/curl/post_field.rb +20 -0
  20. data/lib/rhack/curl/response.rb +91 -0
  21. data/lib/rhack/dl.rb +308 -0
  22. data/lib/rhack/frame.rb +316 -0
  23. data/lib/{extensions → rhack/js}/browser/env.js +0 -0
  24. data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
  25. data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
  26. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
  27. data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
  28. data/lib/rhack/js/johnson.rb +71 -0
  29. data/lib/rhack/page.rb +263 -0
  30. data/lib/rhack/proxy.rb +3 -0
  31. data/lib/rhack/proxy/checker.rb +1 -1
  32. data/lib/rhack/scout.rb +342 -0
  33. data/lib/rhack/scout_squad.rb +98 -0
  34. data/lib/rhack/services.rb +1 -464
  35. data/lib/rhack/services/base.rb +59 -0
  36. data/lib/rhack/services/examples.rb +423 -0
  37. data/lib/rhack/version.rb +3 -0
  38. data/lib/rhack_in.rb +3 -2
  39. data/rhack.gemspec +28 -0
  40. metadata +104 -85
  41. data/.gemtest +0 -0
  42. data/Gemfile.lock +0 -23
  43. data/Manifest.txt +0 -60
  44. data/ext/curb/Makefile +0 -217
  45. data/lib/cache.rb +0 -44
  46. data/lib/curl-global.rb +0 -164
  47. data/lib/extensions/declarative.rb +0 -153
  48. data/lib/extensions/johnson.rb +0 -63
  49. data/lib/frame.rb +0 -848
  50. data/lib/init.rb +0 -49
  51. data/lib/rhack.yml.template +0 -19
  52. data/lib/scout.rb +0 -589
  53. data/lib/words.rb +0 -25
data/lib/rhack/dl.rb ADDED
@@ -0,0 +1,308 @@
1
+ # encoding: utf-8
2
+ require 'rhack'
3
+
4
+ module RHACK
5
+ class Frame
6
+
7
+ def get_cached(*links)
8
+ res = []
9
+ expire = links[-1] == :expire ? links.pop : false
10
+ links.parses(:uri).each_with_index {|url, i|
11
+ next if url.path[/ads|count|stats/]
12
+ file = Cache.load url, !expire
13
+ if file
14
+ if expire
15
+ @ss.next.loadGet(url.href, :headers=>{'If-Modified-Since'=>file.date}) {|c|
16
+ if c.res.code == 200
17
+ res << [i, (data = c.res.body)]
18
+ Cache.save url, data, false
19
+ else
20
+ res << [i, file.is(String) ? file : read(file.path)]
21
+ end
22
+ }
23
+ else
24
+ res << [i, file.is(String) ? file : read(file.path)]
25
+ end
26
+ else
27
+ @ss.next.loadGet(url.href) {|c|
28
+ if c.res.code == 200
29
+ res << [i, (data = c.res.body)]
30
+ Cache.save url, data, !expire
31
+ end
32
+ }
33
+ end
34
+ }
35
+ Curl.wait
36
+ links.size == 1 ? res[0][1] : res.sort!.lasts
37
+ end
38
+
39
+ def get_distr(uri, psize, threads, start=0, print_progress=$verbose)
40
+ raise ConfigError, "Insufficient Scouts in the Frame for distributed downloading" if @ss.size < 2
41
+ @print_progress, code, stop_download, @ss_reserve = print_progress, nil, false, []
42
+ (s = @ss.next).http.on_header {|h|
43
+ next h.size unless h[/Content-Length: (\d+)|HTTP\/1\.[01] (\d+)[^\r]+|^\s*$/]
44
+ if code = $2
45
+ if code != '200'
46
+ L << "#$& getting #{uri}; interrupting request."
47
+ s.http.on_header() # set default process
48
+ next 0
49
+ end
50
+ next h.size
51
+ end
52
+
53
+ s.http.on_header() # set default process
54
+ if !$1 # конец хедера, content-length отсутствует
55
+ L << "No Content-Length header; trying to load a whole #{uri} at once!"
56
+ s.loadGet {|c| yield c.res.body.size, 0, c.res.body}
57
+ next 0
58
+ end
59
+
60
+ len = $1.to_i - start
61
+ psize = configure_psize(len, psize, threads)
62
+ parts = (len/psize.to_f).ceil
63
+ setup_speedometer(uri, parts, len)
64
+ yield len, psize, :careful_dl if len > (@opts[:careful_dl] || 10.mb)
65
+
66
+ @ss_reserve = @ss[threads+1..-1]
67
+ @ss = @ss[0..threads]
68
+ (0...parts).each {|n|
69
+ break if stop_download
70
+
71
+ s = @ss.next
72
+ run_speedometer(s, len, n)
73
+ s.loadGet(uri, :headers => {
74
+ 'Range' => "bytes=#{start + n*psize}-#{start + (n+1)*psize - 1}"
75
+ }) {|c|
76
+ clear_speedometer(s)
77
+ if c.res.code/10 == 20
78
+ yield len, n*psize, c.res.body
79
+ else
80
+ L << "#{c.res} during get #{uri.inspect}; interrupting request."
81
+ stop_download = true
82
+ end
83
+ }
84
+ }
85
+ 0
86
+ }
87
+ s.raise_err = false
88
+ s.loadGet validate uri
89
+ ensure
90
+ @ss.concat @ss_reserve || []
91
+ end
92
+
93
+ def dl(uri, df=File.basename(uri.parse(:uri).path), psize=:auto, opts={})
94
+ dled = 0
95
+ lock = ''
96
+ callback = lambda {|len, pos, body|
97
+ if body != :careful_dl
98
+ begin
99
+ write(df, body, pos)
100
+ rescue => e
101
+ binding.start_interaction
102
+ raise
103
+ end
104
+ if (dled += body.size) == len
105
+ File.delete lock if File.file? lock
106
+ yield df if block_given?
107
+ end
108
+ else
109
+ lock = lock_file df, len, pos # filename, filesize, partsize
110
+ end
111
+ }
112
+ opts[:threads] ||= @ss.size-1
113
+ get_distr(uri, psize, opts[:threads], opts[:start].to_i, &callback)
114
+ Curl.wait unless block_given?
115
+ df
116
+ end
117
+
118
+ def simple_dl(uri, df=File.basename(uri.parse(:uri).path), opts={})
119
+ opts.reverse_merge! :psize => :auto, :threads => 1, :print_progress => $verbose
120
+ L << opts
121
+
122
+ @print_progress = opts[:print_progress]
123
+ unless len = opts[:len] || (map = read_mapfile(df) and map.len)
124
+ return @ss.next.loadHead(uri) {|c| $log << c
125
+ if len = c.res['Content-Length']
126
+ simple_dl(uri, df, opts.merge(:len => len.to_i))
127
+ else L.warn "Can't get file size, so it has no sence to download this way. Or maybe it's just an error. Check ObjectSpace.find(#{c.res.object_id}) out."
128
+ end
129
+ }
130
+ end
131
+
132
+ psize, parts = check_mapfile(df, opts)
133
+ return unless psize
134
+ L << [psize, parts]
135
+ setup_speedometer(uri, parts.size, len)
136
+
137
+ obtained uri do |uri|
138
+ if opts[:threads] == 1
139
+ start = opts[:start].to_i || (parts[0] && parts[0].begin) || 0
140
+ scout = opts[:scout] || @ss.next
141
+ $log << [uri, scout]
142
+ (loadget = lambda {|n|
143
+ run_speedometer(scout, len, n)
144
+ from = start + n*psize
145
+ to = start + (n+1)*psize - 1
146
+ scout.loadGet(uri, :headers => {'Range' => "bytes=#{from}-#{to}"}) {|c|
147
+ begin
148
+ $log << "writing #{df} from #{from}: #{c.res.body.inspect}"
149
+ write(df, c.res.body, from)
150
+ rescue => e
151
+ binding.start_interaction
152
+ raise
153
+ end
154
+ if write_mapfile(df, from, to)
155
+ clear_speedometer(scout)
156
+ L.warn "file completely dl'ed, but (n+1)*psize <= len: (#{n}+1)*#{psize} <= #{len}" if (n+1)*psize <= len
157
+ yield df if block_given?
158
+ elsif (n+1)*psize <= len
159
+ loadget[n+1]
160
+ end
161
+ }
162
+ })[0]
163
+ else
164
+ exec(uri, opts.merge(:raw => true, :ranges => parts)) {|c|
165
+ L << c.res
166
+ range = c.req.range
167
+ begin
168
+ write(df, c.res.body, range.begin)
169
+ rescue => e
170
+ binding.start_interaction
171
+ raise
172
+ end
173
+ if write_mapfile(df, range.begin, range.end)
174
+ @ss.each {|s| s.http.on_progress} if @print_progress
175
+ yield df if block_given?
176
+ end
177
+ }
178
+ end
179
+ end
180
+ end
181
+
182
+ def check_mapfile(df, opts={})
183
+ opts.reverse_merge! :psize => :auto, :threads => 1
184
+ map = read_mapfile df
185
+ if map
186
+ L << map
187
+ if map.rest.empty?
188
+ puts "#{df} is loaded"
189
+ $log << 'deleting mapfile'
190
+ File.delete df+'.map'
191
+ []
192
+ else
193
+ if opts[:len] and map.len != opts[:len]
194
+ raise "Incorrect file size for #{df}"
195
+ end
196
+ psize = configure_psize *opts.values_at(:len, :psize, :threads)
197
+ [psize, map.rest.div(psize)]
198
+ end
199
+ else
200
+ write_mapfile df, opts[:len]
201
+ psize = configure_psize *opts.values_at(:len, :psize, :threads)
202
+ $log << (0...opts[:len]).div(psize)
203
+ [psize, (0...opts[:len]).div(psize)]
204
+ end
205
+ end
206
+
207
+ def read_mapfile(df)
208
+ df += '.map'
209
+ text = read df
210
+ $log << "mapfile read: #{text}"
211
+ if text.b
212
+ text[/^(\d+)\0+(\d+)\0*\n/]
213
+ map = {}
214
+ $log << [$1,$2]
215
+ if $1 and $1 == $2
216
+ map.rest = []
217
+ else
218
+ map.len, *map.parts = text.chop/"\n"
219
+ map.len = map.len.to_i
220
+ map.parts.map! {|part| part /= '-'; part[0].to_i..part[1].to_i}
221
+ $log << map.parts
222
+ map.rest = (0...map.len) - XRange(*map.parts)
223
+ end
224
+ map
225
+ end
226
+ end
227
+
228
+ def write_mapfile(df, *args)
229
+ df += '.map'
230
+ map = ''
231
+ if args.size != 2
232
+ len = args.shift
233
+ map << len.to_s.ljust(22, "\0") << "\n" if File.file? df
234
+ end
235
+ if args.any?
236
+ read(df)[/^(\d+)\0+(\d+)\0*\n/]
237
+ $log << "mapfile read"
238
+ $log << [$1,$2]
239
+ dled = $2.to_i + args[1] - args[0] + 1
240
+ return true if dled == $1.to_i
241
+ map << "#{args[0]}..#{args[1]}\n"
242
+ $log << 'writing mapfile'
243
+ write(df, dled.to_s.ljust(11, "\0"), 11)
244
+ end
245
+ $log << [df, map]
246
+ $log << 'writing mapfile'
247
+ write df, map
248
+ nil
249
+ end
250
+
251
+ def configure_psize(len, psize, threads)
252
+ case psize
253
+ when Numeric; psize.to_i
254
+ when :auto; len > 100000 ? len/threads+1 : len
255
+ when :mb; 1.mb
256
+ else raise ArgumentError, "Incorrect value for part size #{psize}:#{psize.class}"
257
+ end
258
+ end
259
+
260
+
261
+
262
+ def setup_speedometer(uri, parts, len)
263
+ return unless @print_progress
264
+ @progress = Array.new(parts, 0)
265
+ @stop_print, @speed, @sum, *@speedometer = false, '', 0, Time.now, 0
266
+ @str = "Downloading #{uri.gsub '%', '%%'} (#{len.bytes}) in %03s streams, %07s/s:"
267
+ @bs = "\b\r"*(@newlines = (uri.unpack('U*').size+len.bytes.size+42)/(ENV['COLUMNS'] || 80).to_i)
268
+ Thread.new {
269
+ until @stop_print
270
+ sleep 0.2
271
+ now = Time.now
272
+ if now > @speedometer[0] and @sum > @speedometer[1]
273
+ @speed.replace(((@sum - @speedometer[1])/(now - @speedometer[0])).to_i.bytes)
274
+ @speedometer.replace [now, @sum]
275
+ end
276
+ end
277
+ }
278
+ end
279
+
280
+ def run_speedometer(scout, len, n)
281
+ return unless @print_progress
282
+ scout.http.on_progress {|dl_need, dl_now, *ul|
283
+ if !@stop_print
284
+ @progress[n] = dl_now
285
+ percents = (@sum = @progress.sum)*100/len
286
+ print @str%[@progress.select_b.size, @speed]+"\n%%[#{'@'*percents}#{' '*(100-percents)}]\r\b\r"+@bs
287
+ if percents == 100
288
+ puts "\v"*@newlines
289
+ @stop_print = true
290
+ end
291
+ end
292
+ true
293
+ }
294
+ end
295
+
296
+ def clear_speedometer(scout)
297
+ return unless @print_progress
298
+ scout.http.on_progress
299
+ end
300
+
301
+ end
302
+
303
+ def dl(uri, df=File.basename(uri.parse(:uri).path), threads=5, timeout=600, &block)
304
+ Curl.run
305
+ Frame({:timeout=>timeout}, threads).dl(uri, df, :auto, threads, &block)
306
+ end
307
+ module_function :dl
308
+ end
@@ -0,0 +1,316 @@
1
+ # encoding: utf-8
2
+ module RHACK
3
+
4
+ # Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
5
+ # Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
6
+
7
+ class ZippingError < ArgumentError
8
+ def initialize debug, str="invalid use of :zip option, url and body must be an arrays with the same size\n url: %s(%s), body: %s(%s)"
9
+ super str%debug end
10
+ end
11
+
12
+ class TargetError < ArgumentError
13
+ def initialize msg="only static frame can use local paths"
14
+ super end
15
+ end
16
+
17
+ class ConfigError < ArgumentError
18
+ def initialize msg
19
+ super end
20
+ end
21
+
22
+ class Frame
23
+ __init__
24
+ attr_reader :loc, :static, :ss, :opts, :use_cache, :write_to
25
+ @@cache = {}
26
+
27
+ def initialize *args
28
+ args << 10 unless args[-1].is Fixnum
29
+ args.insert -2, {} unless args[-2].is Hash
30
+ if scouts = args[-2][:scouts]
31
+ args[-1] = scouts
32
+ end
33
+ @opts = {:eval => Johnson::Enabled, :redir => true, :cp => true, :result => Page}.merge!(args[-2])
34
+ args[-2] = @opts
35
+ if args[0].is String
36
+ url = args[0]
37
+ 'http://' >> url if url !~ /^\w+:\/\//
38
+ update_loc url
39
+ else
40
+ @loc = {}
41
+ @static = false
42
+ end
43
+ @ss = ScoutSquad *args
44
+ Curl.run :unless_allready
45
+ end
46
+
47
+ def update_loc url
48
+ @loc = url.parse :uri
49
+ # be careful, if you set :static => false, frame will be unable to use "path" url
50
+ @static = @opts.fetch(:static, @loc)
51
+ end
52
+
53
+ def retarget to, forced=nil
54
+ to = 'http://' + to if to !~ /^\w+:/
55
+ @ss.update to, forced
56
+ update_loc to
57
+ end
58
+ alias :target= :retarget
59
+
60
+ def next() @ss.next end
61
+ def rand() @ss.rand end
62
+ def each(&block) @ss.each &block end
63
+ def [](i) @ss[i] end
64
+
65
+ def copy_cookies! i=0
66
+ @ss.each {|s| s.cookies.replace @ss[i].cookies}
67
+ end
68
+
69
+ def use_cache! opts={}
70
+ if opts == false
71
+ @use_cache = false
72
+ else
73
+ @@cache = opts[:pages].kinda(Hash) ? opts[:pages] : opts[:pages].map_hash {|p| [p.href, p]} if opts[:pages]
74
+ #@write_to = opts[:write_to] if :write_to.in opts
75
+ @use_cache = true
76
+ end
77
+ end
78
+
79
+ def drop_cache! use=nil
80
+ @@cache.clear
81
+ GC.start
82
+ @use_cache = use if use.in [true, false]
83
+ end
84
+
85
+ def inspect
86
+ sssize = @ss.size
87
+ "<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
88
+ end
89
+
90
+ # opts are :eval, :json, :hash, :wait, :proc_result, :save_result, :load_scripts,
91
+ # :zip, :thread_safe, :result, :stream, :raw, :xhr + any opts for Scouts in one hash
92
+ def exec *args, &callback
93
+ many, order, orders, with_opts = interpret_request *args
94
+ L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
95
+
96
+ if !Johnson::Enabled and with_opts[:eval]
97
+ L < "failed to use option :eval because Johnson is disabled"
98
+ with_opts.delete :eval
99
+ end
100
+ # JS Runtime is not thread-safe and must be created in curl thread
101
+ # if we aren't said explicitly about the opposite
102
+ Johnson::Runtime.set_browser_for_curl with_opts
103
+
104
+ if many then exec_many orders, with_opts, &callback
105
+ else exec_one order, with_opts, &callback end
106
+ end
107
+ alias :get :exec
108
+ alias :run :get
109
+
110
+ def interpret_request(*args)
111
+ body, mp, url, opts = args.dup.get_opts [nil, false, nil], @opts
112
+ L.log [body, mp, url, opts]
113
+ zip = opts.delete :zip
114
+ verb = opts.delete :verb
115
+ many = order = orders = post = false
116
+ # Default options set is for POST
117
+ if mp.is String or mp.kinda Array and !(url.is String or url.kinda Array)
118
+ # if second arg is String, then that's url
119
+ url, mp, post = mp.dup, false, true
120
+ # L.debug "url #{url.inspect} has been passed as second argument instead of third"
121
+ # But if we have only one argument actually passed
122
+ # except for options hash, then believe it's GET
123
+ elsif body.is String or body.kinda [String]
124
+ L.debug "first parameter (#{body.inspect}) was implicitly taken as url#{' '+body.class if body.kinda Array}, but last paramter is of type #{url.class}, too" if url
125
+ url = body.dup
126
+ elsif !body
127
+ url = nil
128
+ else
129
+ url = url.dup if url
130
+ mp, post = !!mp, true
131
+ end
132
+
133
+ if post
134
+ put = verb == :put
135
+ validate_zip url, body if zip
136
+ if zip or url.kinda Array or body.kinda Array
137
+ many = true
138
+ unless put or body.kinda [Hash]
139
+ raise TypeError, "body of post request must be a hash array, params was
140
+ (#{args.inspect[1..-2]})"
141
+ end
142
+
143
+ if zip or url.kinda Array
144
+ validate_some url
145
+ orders = zip ? body.zip(url) : url.xprod(body, :inverse)
146
+ else
147
+ url = validate url
148
+ orders = body.xprod url
149
+ end
150
+ if put
151
+ orders.each {|o| o.unshift :loadPut}
152
+ else
153
+ orders.each {|o| o.unshift :loadPost and o.insert 2, mp}
154
+ end
155
+ else
156
+ unless put or body.is Hash
157
+ raise TypeError, "body of post request must be a hash, params was
158
+ (#{args.inspect[1..-2]})"
159
+ end
160
+
161
+ url = validate url
162
+ order = put ? [:loadPut, body, url] : [:loadPost, body, mp, url]
163
+ end
164
+ else
165
+ del = verb == :delete
166
+ if url.kinda Array
167
+ many = true
168
+ validate_some url
169
+ orders = [del ? :loadDelete : :loadGet].xprod url
170
+ else
171
+ url = validate url
172
+ order = [del ? :loadDelete : :loadGet, url]
173
+ end
174
+ end
175
+ if !order.b and !orders.b
176
+ raise ArgumentError, "failed to run blank request#{'s' if many}, params was
177
+ (#{args.inspect[1..-2]})"
178
+ end
179
+
180
+ opts[:wait] = opts[:sync] if :sync.in opts
181
+ opts[:wait] = true if !:wait.in(opts) and
182
+ :proc_result.in(opts) ? !opts[:proc_result] : opts[:save_result]
183
+ opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
184
+ opts[:load_scripts] = self if opts[:load_scripts]
185
+ opts[:stream] = true if opts[:raw]
186
+ (opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
187
+ [many, order, orders, opts]
188
+ end
189
+
190
+ private
191
+ def validate_zip(url, body)
192
+ if !(url.kinda Array and body.kinda Array)
193
+ raise ZippingError, [url.class, nil, body.class, nil]
194
+ elsif url.size != body.size
195
+ raise ZippingError, [url.class, url.size, body.class, body.size]
196
+ end
197
+ end
198
+
199
+ # :static option now can accept hash with :procotol key, in that case Frame can be relocated to the same domain on another protocol and default protocol would be the value of @static.protocol
200
+ # if @static option has a :host value as well then it works just like a default route
201
+ def validate(url)
202
+ if url
203
+ loc = url.parse:uri
204
+ if loc.root and loc.root != @loc.root
205
+ if @static
206
+ if @static.is Hash
207
+ if loc.host != @loc.host and !@static.host
208
+ raise TargetError, "unable to get #{url} by static frame [#{@static.protocol}://]#{@loc.host}, you should first update it with new target"
209
+ end
210
+ else
211
+ raise TargetError, "unable to get #{url} by static frame #{@loc.root}, you should first update it with new target"
212
+ end
213
+ end
214
+ @loc.root, @loc.host, @loc.protocol = loc.root, loc.host, loc.protocol
215
+ url
216
+ elsif !loc.root
217
+ if !@static
218
+ raise TargetError, "undefined root for query #{url}, use :static option as Hash to set default protocol and host, or as True to allow using previously used root"
219
+ elsif @static.is Hash
220
+ # targeting relatively to default values (from @static hash)
221
+ @loc.protocol = @static.protocol
222
+ @loc.host = @static.host if @static.host
223
+ @loc.root = @loc.protocol+'://'+@loc.host
224
+ end
225
+ if !@loc.host
226
+ raise TargetError, "undefined host for query #{url}, use :host parameter of :static option to set default host"
227
+ end
228
+ File.join @loc.root, url
229
+ else url
230
+ end
231
+ else
232
+ raise TargetError if !@static
233
+ @loc.href
234
+ end
235
+ end
236
+
237
+ def validate_some(urls)
238
+ urls.map! {|u| validate u}
239
+ end
240
+
241
+ def run_callbacks!(page, opts, &callback)
242
+ # if no callback must have run then page.res is equal to the page
243
+ # so we can get the page as result of a sync as well as an async request
244
+ page.res = page
245
+ if callback
246
+ yres = callback.call page
247
+ # if we don't want callback to affect page.res
248
+ # then we should not set :save_result
249
+ if yres != :skip
250
+ if opts[:proc_result].is Proc
251
+ # yres is intermediate result that we should proc
252
+ page.res = opts[:proc_result].call yres
253
+ elsif opts[:save_result] or :proc_result.in opts
254
+ # yres is total result that we should save
255
+ page.res = yres
256
+ end
257
+ # in both cases page.res is set to total result
258
+ # so we can return result from any depth as @res attribute of what we have on top
259
+ end
260
+ end
261
+ end
262
+
263
+ # TODO: found why/how IO on callbacks breaks +curl.res.body+ content and how to fix or how to avoid it
264
+ def exec_one(order, opts, &callback)
265
+ if @use_cache and order[0] == :loadGet and page = @@cache[order[1]]
266
+ run_callbacks! page, opts, &callback
267
+ res = opts[:wait] && (opts[:save_result] or :proc_result.in opts) ? page.res : page
268
+ return res
269
+ end
270
+ # must result in Page (default) or it's subclass
271
+ page = opts[:result].new
272
+ # if no spare scouts can be found, squad simply waits for first callbacks to complete
273
+ s = @ss.next
274
+ s.send(*(order << opts)) {|curl|
275
+ # there is a problem with storing html on disk
276
+ if order[0] == :loadGet and @write_to
277
+ # sometimes (about 2% for 100-threads-dling) when this string is calling
278
+ # no matter what +curl.res.body+ has contained here
279
+ RMTools.rw @write_to+'/'+order[-2].sub(/^[a-z]+:\/\//, ''), curl.res.body.xml_to_utf
280
+ end
281
+ if opts[:raw]
282
+ page.res = yield curl
283
+ # here +curl.res.body+ become empty
284
+ elsif page.process(curl, opts)
285
+ @@cache[page.href] = page if order[0] == :loadGet and @use_cache
286
+ run_callbacks! page, opts, &callback
287
+ end
288
+ }
289
+ if opts[:wait]
290
+ opts[:thread_safe] ? Curl.carier.perform : Curl.wait
291
+ (opts[:save_result] or :proc_result.in opts) ? page.res : page
292
+ else page
293
+ end
294
+ end
295
+
296
+ def exec_many(orders, with_opts, &callback)
297
+ w = with_opts.delete :wait
298
+ iterator = with_opts[:stream] ? :each : :map
299
+ if with_opts[:ranges]
300
+ if orders.size != with_opts[:ranges].size
301
+ raise ZippingError, [orders.size, with_opts[:ranges].size], "orders quantity (%s) is not equal ranges quantity (%s)"
302
+ end
303
+ pages = orders.zip(with_opts[:ranges]).send(iterator) {|order, range|
304
+ (with_opts[:headers] ||= {}).Range = "bytes=#{range.begin}-#{range.end}"
305
+ exec_one order, with_opts, &callback
306
+ }
307
+ else
308
+ pages = orders.send(iterator) {|order| exec_one order, with_opts, &callback }
309
+ end
310
+ with_opts[:thread_safe] ? Curl.carier.perform : Curl.wait if w
311
+ with_opts[:stream] || pages
312
+ end
313
+
314
+ end
315
+
316
+ end