rhack 0.4.1 → 1.0.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
data/lib/rhack/dl.rb
ADDED
@@ -0,0 +1,308 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rhack'
|
3
|
+
|
4
|
+
module RHACK
|
5
|
+
class Frame
|
6
|
+
|
7
|
+
def get_cached(*links)
|
8
|
+
res = []
|
9
|
+
expire = links[-1] == :expire ? links.pop : false
|
10
|
+
links.parses(:uri).each_with_index {|url, i|
|
11
|
+
next if url.path[/ads|count|stats/]
|
12
|
+
file = Cache.load url, !expire
|
13
|
+
if file
|
14
|
+
if expire
|
15
|
+
@ss.next.loadGet(url.href, :headers=>{'If-Modified-Since'=>file.date}) {|c|
|
16
|
+
if c.res.code == 200
|
17
|
+
res << [i, (data = c.res.body)]
|
18
|
+
Cache.save url, data, false
|
19
|
+
else
|
20
|
+
res << [i, file.is(String) ? file : read(file.path)]
|
21
|
+
end
|
22
|
+
}
|
23
|
+
else
|
24
|
+
res << [i, file.is(String) ? file : read(file.path)]
|
25
|
+
end
|
26
|
+
else
|
27
|
+
@ss.next.loadGet(url.href) {|c|
|
28
|
+
if c.res.code == 200
|
29
|
+
res << [i, (data = c.res.body)]
|
30
|
+
Cache.save url, data, !expire
|
31
|
+
end
|
32
|
+
}
|
33
|
+
end
|
34
|
+
}
|
35
|
+
Curl.wait
|
36
|
+
links.size == 1 ? res[0][1] : res.sort!.lasts
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_distr(uri, psize, threads, start=0, print_progress=$verbose)
|
40
|
+
raise ConfigError, "Insufficient Scouts in the Frame for distributed downloading" if @ss.size < 2
|
41
|
+
@print_progress, code, stop_download, @ss_reserve = print_progress, nil, false, []
|
42
|
+
(s = @ss.next).http.on_header {|h|
|
43
|
+
next h.size unless h[/Content-Length: (\d+)|HTTP\/1\.[01] (\d+)[^\r]+|^\s*$/]
|
44
|
+
if code = $2
|
45
|
+
if code != '200'
|
46
|
+
L << "#$& getting #{uri}; interrupting request."
|
47
|
+
s.http.on_header() # set default process
|
48
|
+
next 0
|
49
|
+
end
|
50
|
+
next h.size
|
51
|
+
end
|
52
|
+
|
53
|
+
s.http.on_header() # set default process
|
54
|
+
if !$1 # конец хедера, content-length отсутствует
|
55
|
+
L << "No Content-Length header; trying to load a whole #{uri} at once!"
|
56
|
+
s.loadGet {|c| yield c.res.body.size, 0, c.res.body}
|
57
|
+
next 0
|
58
|
+
end
|
59
|
+
|
60
|
+
len = $1.to_i - start
|
61
|
+
psize = configure_psize(len, psize, threads)
|
62
|
+
parts = (len/psize.to_f).ceil
|
63
|
+
setup_speedometer(uri, parts, len)
|
64
|
+
yield len, psize, :careful_dl if len > (@opts[:careful_dl] || 10.mb)
|
65
|
+
|
66
|
+
@ss_reserve = @ss[threads+1..-1]
|
67
|
+
@ss = @ss[0..threads]
|
68
|
+
(0...parts).each {|n|
|
69
|
+
break if stop_download
|
70
|
+
|
71
|
+
s = @ss.next
|
72
|
+
run_speedometer(s, len, n)
|
73
|
+
s.loadGet(uri, :headers => {
|
74
|
+
'Range' => "bytes=#{start + n*psize}-#{start + (n+1)*psize - 1}"
|
75
|
+
}) {|c|
|
76
|
+
clear_speedometer(s)
|
77
|
+
if c.res.code/10 == 20
|
78
|
+
yield len, n*psize, c.res.body
|
79
|
+
else
|
80
|
+
L << "#{c.res} during get #{uri.inspect}; interrupting request."
|
81
|
+
stop_download = true
|
82
|
+
end
|
83
|
+
}
|
84
|
+
}
|
85
|
+
0
|
86
|
+
}
|
87
|
+
s.raise_err = false
|
88
|
+
s.loadGet validate uri
|
89
|
+
ensure
|
90
|
+
@ss.concat @ss_reserve || []
|
91
|
+
end
|
92
|
+
|
93
|
+
def dl(uri, df=File.basename(uri.parse(:uri).path), psize=:auto, opts={})
|
94
|
+
dled = 0
|
95
|
+
lock = ''
|
96
|
+
callback = lambda {|len, pos, body|
|
97
|
+
if body != :careful_dl
|
98
|
+
begin
|
99
|
+
write(df, body, pos)
|
100
|
+
rescue => e
|
101
|
+
binding.start_interaction
|
102
|
+
raise
|
103
|
+
end
|
104
|
+
if (dled += body.size) == len
|
105
|
+
File.delete lock if File.file? lock
|
106
|
+
yield df if block_given?
|
107
|
+
end
|
108
|
+
else
|
109
|
+
lock = lock_file df, len, pos # filename, filesize, partsize
|
110
|
+
end
|
111
|
+
}
|
112
|
+
opts[:threads] ||= @ss.size-1
|
113
|
+
get_distr(uri, psize, opts[:threads], opts[:start].to_i, &callback)
|
114
|
+
Curl.wait unless block_given?
|
115
|
+
df
|
116
|
+
end
|
117
|
+
|
118
|
+
def simple_dl(uri, df=File.basename(uri.parse(:uri).path), opts={})
|
119
|
+
opts.reverse_merge! :psize => :auto, :threads => 1, :print_progress => $verbose
|
120
|
+
L << opts
|
121
|
+
|
122
|
+
@print_progress = opts[:print_progress]
|
123
|
+
unless len = opts[:len] || (map = read_mapfile(df) and map.len)
|
124
|
+
return @ss.next.loadHead(uri) {|c| $log << c
|
125
|
+
if len = c.res['Content-Length']
|
126
|
+
simple_dl(uri, df, opts.merge(:len => len.to_i))
|
127
|
+
else L.warn "Can't get file size, so it has no sence to download this way. Or maybe it's just an error. Check ObjectSpace.find(#{c.res.object_id}) out."
|
128
|
+
end
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
psize, parts = check_mapfile(df, opts)
|
133
|
+
return unless psize
|
134
|
+
L << [psize, parts]
|
135
|
+
setup_speedometer(uri, parts.size, len)
|
136
|
+
|
137
|
+
obtained uri do |uri|
|
138
|
+
if opts[:threads] == 1
|
139
|
+
start = opts[:start].to_i || (parts[0] && parts[0].begin) || 0
|
140
|
+
scout = opts[:scout] || @ss.next
|
141
|
+
$log << [uri, scout]
|
142
|
+
(loadget = lambda {|n|
|
143
|
+
run_speedometer(scout, len, n)
|
144
|
+
from = start + n*psize
|
145
|
+
to = start + (n+1)*psize - 1
|
146
|
+
scout.loadGet(uri, :headers => {'Range' => "bytes=#{from}-#{to}"}) {|c|
|
147
|
+
begin
|
148
|
+
$log << "writing #{df} from #{from}: #{c.res.body.inspect}"
|
149
|
+
write(df, c.res.body, from)
|
150
|
+
rescue => e
|
151
|
+
binding.start_interaction
|
152
|
+
raise
|
153
|
+
end
|
154
|
+
if write_mapfile(df, from, to)
|
155
|
+
clear_speedometer(scout)
|
156
|
+
L.warn "file completely dl'ed, but (n+1)*psize <= len: (#{n}+1)*#{psize} <= #{len}" if (n+1)*psize <= len
|
157
|
+
yield df if block_given?
|
158
|
+
elsif (n+1)*psize <= len
|
159
|
+
loadget[n+1]
|
160
|
+
end
|
161
|
+
}
|
162
|
+
})[0]
|
163
|
+
else
|
164
|
+
exec(uri, opts.merge(:raw => true, :ranges => parts)) {|c|
|
165
|
+
L << c.res
|
166
|
+
range = c.req.range
|
167
|
+
begin
|
168
|
+
write(df, c.res.body, range.begin)
|
169
|
+
rescue => e
|
170
|
+
binding.start_interaction
|
171
|
+
raise
|
172
|
+
end
|
173
|
+
if write_mapfile(df, range.begin, range.end)
|
174
|
+
@ss.each {|s| s.http.on_progress} if @print_progress
|
175
|
+
yield df if block_given?
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def check_mapfile(df, opts={})
|
183
|
+
opts.reverse_merge! :psize => :auto, :threads => 1
|
184
|
+
map = read_mapfile df
|
185
|
+
if map
|
186
|
+
L << map
|
187
|
+
if map.rest.empty?
|
188
|
+
puts "#{df} is loaded"
|
189
|
+
$log << 'deleting mapfile'
|
190
|
+
File.delete df+'.map'
|
191
|
+
[]
|
192
|
+
else
|
193
|
+
if opts[:len] and map.len != opts[:len]
|
194
|
+
raise "Incorrect file size for #{df}"
|
195
|
+
end
|
196
|
+
psize = configure_psize *opts.values_at(:len, :psize, :threads)
|
197
|
+
[psize, map.rest.div(psize)]
|
198
|
+
end
|
199
|
+
else
|
200
|
+
write_mapfile df, opts[:len]
|
201
|
+
psize = configure_psize *opts.values_at(:len, :psize, :threads)
|
202
|
+
$log << (0...opts[:len]).div(psize)
|
203
|
+
[psize, (0...opts[:len]).div(psize)]
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def read_mapfile(df)
|
208
|
+
df += '.map'
|
209
|
+
text = read df
|
210
|
+
$log << "mapfile read: #{text}"
|
211
|
+
if text.b
|
212
|
+
text[/^(\d+)\0+(\d+)\0*\n/]
|
213
|
+
map = {}
|
214
|
+
$log << [$1,$2]
|
215
|
+
if $1 and $1 == $2
|
216
|
+
map.rest = []
|
217
|
+
else
|
218
|
+
map.len, *map.parts = text.chop/"\n"
|
219
|
+
map.len = map.len.to_i
|
220
|
+
map.parts.map! {|part| part /= '-'; part[0].to_i..part[1].to_i}
|
221
|
+
$log << map.parts
|
222
|
+
map.rest = (0...map.len) - XRange(*map.parts)
|
223
|
+
end
|
224
|
+
map
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def write_mapfile(df, *args)
|
229
|
+
df += '.map'
|
230
|
+
map = ''
|
231
|
+
if args.size != 2
|
232
|
+
len = args.shift
|
233
|
+
map << len.to_s.ljust(22, "\0") << "\n" if File.file? df
|
234
|
+
end
|
235
|
+
if args.any?
|
236
|
+
read(df)[/^(\d+)\0+(\d+)\0*\n/]
|
237
|
+
$log << "mapfile read"
|
238
|
+
$log << [$1,$2]
|
239
|
+
dled = $2.to_i + args[1] - args[0] + 1
|
240
|
+
return true if dled == $1.to_i
|
241
|
+
map << "#{args[0]}..#{args[1]}\n"
|
242
|
+
$log << 'writing mapfile'
|
243
|
+
write(df, dled.to_s.ljust(11, "\0"), 11)
|
244
|
+
end
|
245
|
+
$log << [df, map]
|
246
|
+
$log << 'writing mapfile'
|
247
|
+
write df, map
|
248
|
+
nil
|
249
|
+
end
|
250
|
+
|
251
|
+
def configure_psize(len, psize, threads)
|
252
|
+
case psize
|
253
|
+
when Numeric; psize.to_i
|
254
|
+
when :auto; len > 100000 ? len/threads+1 : len
|
255
|
+
when :mb; 1.mb
|
256
|
+
else raise ArgumentError, "Incorrect value for part size #{psize}:#{psize.class}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
|
262
|
+
def setup_speedometer(uri, parts, len)
|
263
|
+
return unless @print_progress
|
264
|
+
@progress = Array.new(parts, 0)
|
265
|
+
@stop_print, @speed, @sum, *@speedometer = false, '', 0, Time.now, 0
|
266
|
+
@str = "Downloading #{uri.gsub '%', '%%'} (#{len.bytes}) in %03s streams, %07s/s:"
|
267
|
+
@bs = "\b\r"*(@newlines = (uri.unpack('U*').size+len.bytes.size+42)/(ENV['COLUMNS'] || 80).to_i)
|
268
|
+
Thread.new {
|
269
|
+
until @stop_print
|
270
|
+
sleep 0.2
|
271
|
+
now = Time.now
|
272
|
+
if now > @speedometer[0] and @sum > @speedometer[1]
|
273
|
+
@speed.replace(((@sum - @speedometer[1])/(now - @speedometer[0])).to_i.bytes)
|
274
|
+
@speedometer.replace [now, @sum]
|
275
|
+
end
|
276
|
+
end
|
277
|
+
}
|
278
|
+
end
|
279
|
+
|
280
|
+
def run_speedometer(scout, len, n)
|
281
|
+
return unless @print_progress
|
282
|
+
scout.http.on_progress {|dl_need, dl_now, *ul|
|
283
|
+
if !@stop_print
|
284
|
+
@progress[n] = dl_now
|
285
|
+
percents = (@sum = @progress.sum)*100/len
|
286
|
+
print @str%[@progress.select_b.size, @speed]+"\n%%[#{'@'*percents}#{' '*(100-percents)}]\r\b\r"+@bs
|
287
|
+
if percents == 100
|
288
|
+
puts "\v"*@newlines
|
289
|
+
@stop_print = true
|
290
|
+
end
|
291
|
+
end
|
292
|
+
true
|
293
|
+
}
|
294
|
+
end
|
295
|
+
|
296
|
+
def clear_speedometer(scout)
|
297
|
+
return unless @print_progress
|
298
|
+
scout.http.on_progress
|
299
|
+
end
|
300
|
+
|
301
|
+
end
|
302
|
+
|
303
|
+
def dl(uri, df=File.basename(uri.parse(:uri).path), threads=5, timeout=600, &block)
|
304
|
+
Curl.run
|
305
|
+
Frame({:timeout=>timeout}, threads).dl(uri, df, :auto, threads, &block)
|
306
|
+
end
|
307
|
+
module_function :dl
|
308
|
+
end
|
data/lib/rhack/frame.rb
ADDED
@@ -0,0 +1,316 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
# Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
|
5
|
+
# Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
|
6
|
+
|
7
|
+
class ZippingError < ArgumentError
|
8
|
+
def initialize debug, str="invalid use of :zip option, url and body must be an arrays with the same size\n url: %s(%s), body: %s(%s)"
|
9
|
+
super str%debug end
|
10
|
+
end
|
11
|
+
|
12
|
+
class TargetError < ArgumentError
|
13
|
+
def initialize msg="only static frame can use local paths"
|
14
|
+
super end
|
15
|
+
end
|
16
|
+
|
17
|
+
class ConfigError < ArgumentError
|
18
|
+
def initialize msg
|
19
|
+
super end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Frame
|
23
|
+
__init__
|
24
|
+
attr_reader :loc, :static, :ss, :opts, :use_cache, :write_to
|
25
|
+
@@cache = {}
|
26
|
+
|
27
|
+
def initialize *args
|
28
|
+
args << 10 unless args[-1].is Fixnum
|
29
|
+
args.insert -2, {} unless args[-2].is Hash
|
30
|
+
if scouts = args[-2][:scouts]
|
31
|
+
args[-1] = scouts
|
32
|
+
end
|
33
|
+
@opts = {:eval => Johnson::Enabled, :redir => true, :cp => true, :result => Page}.merge!(args[-2])
|
34
|
+
args[-2] = @opts
|
35
|
+
if args[0].is String
|
36
|
+
url = args[0]
|
37
|
+
'http://' >> url if url !~ /^\w+:\/\//
|
38
|
+
update_loc url
|
39
|
+
else
|
40
|
+
@loc = {}
|
41
|
+
@static = false
|
42
|
+
end
|
43
|
+
@ss = ScoutSquad *args
|
44
|
+
Curl.run :unless_allready
|
45
|
+
end
|
46
|
+
|
47
|
+
def update_loc url
|
48
|
+
@loc = url.parse :uri
|
49
|
+
# be careful, if you set :static => false, frame will be unable to use "path" url
|
50
|
+
@static = @opts.fetch(:static, @loc)
|
51
|
+
end
|
52
|
+
|
53
|
+
def retarget to, forced=nil
|
54
|
+
to = 'http://' + to if to !~ /^\w+:/
|
55
|
+
@ss.update to, forced
|
56
|
+
update_loc to
|
57
|
+
end
|
58
|
+
alias :target= :retarget
|
59
|
+
|
60
|
+
def next() @ss.next end
|
61
|
+
def rand() @ss.rand end
|
62
|
+
def each(&block) @ss.each &block end
|
63
|
+
def [](i) @ss[i] end
|
64
|
+
|
65
|
+
def copy_cookies! i=0
|
66
|
+
@ss.each {|s| s.cookies.replace @ss[i].cookies}
|
67
|
+
end
|
68
|
+
|
69
|
+
def use_cache! opts={}
|
70
|
+
if opts == false
|
71
|
+
@use_cache = false
|
72
|
+
else
|
73
|
+
@@cache = opts[:pages].kinda(Hash) ? opts[:pages] : opts[:pages].map_hash {|p| [p.href, p]} if opts[:pages]
|
74
|
+
#@write_to = opts[:write_to] if :write_to.in opts
|
75
|
+
@use_cache = true
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def drop_cache! use=nil
|
80
|
+
@@cache.clear
|
81
|
+
GC.start
|
82
|
+
@use_cache = use if use.in [true, false]
|
83
|
+
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
sssize = @ss.size
|
87
|
+
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
|
88
|
+
end
|
89
|
+
|
90
|
+
# opts are :eval, :json, :hash, :wait, :proc_result, :save_result, :load_scripts,
|
91
|
+
# :zip, :thread_safe, :result, :stream, :raw, :xhr + any opts for Scouts in one hash
|
92
|
+
def exec *args, &callback
|
93
|
+
many, order, orders, with_opts = interpret_request *args
|
94
|
+
L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
|
95
|
+
|
96
|
+
if !Johnson::Enabled and with_opts[:eval]
|
97
|
+
L < "failed to use option :eval because Johnson is disabled"
|
98
|
+
with_opts.delete :eval
|
99
|
+
end
|
100
|
+
# JS Runtime is not thread-safe and must be created in curl thread
|
101
|
+
# if we aren't said explicitly about the opposite
|
102
|
+
Johnson::Runtime.set_browser_for_curl with_opts
|
103
|
+
|
104
|
+
if many then exec_many orders, with_opts, &callback
|
105
|
+
else exec_one order, with_opts, &callback end
|
106
|
+
end
|
107
|
+
alias :get :exec
|
108
|
+
alias :run :get
|
109
|
+
|
110
|
+
def interpret_request(*args)
|
111
|
+
body, mp, url, opts = args.dup.get_opts [nil, false, nil], @opts
|
112
|
+
L.log [body, mp, url, opts]
|
113
|
+
zip = opts.delete :zip
|
114
|
+
verb = opts.delete :verb
|
115
|
+
many = order = orders = post = false
|
116
|
+
# Default options set is for POST
|
117
|
+
if mp.is String or mp.kinda Array and !(url.is String or url.kinda Array)
|
118
|
+
# if second arg is String, then that's url
|
119
|
+
url, mp, post = mp.dup, false, true
|
120
|
+
# L.debug "url #{url.inspect} has been passed as second argument instead of third"
|
121
|
+
# But if we have only one argument actually passed
|
122
|
+
# except for options hash, then believe it's GET
|
123
|
+
elsif body.is String or body.kinda [String]
|
124
|
+
L.debug "first parameter (#{body.inspect}) was implicitly taken as url#{' '+body.class if body.kinda Array}, but last paramter is of type #{url.class}, too" if url
|
125
|
+
url = body.dup
|
126
|
+
elsif !body
|
127
|
+
url = nil
|
128
|
+
else
|
129
|
+
url = url.dup if url
|
130
|
+
mp, post = !!mp, true
|
131
|
+
end
|
132
|
+
|
133
|
+
if post
|
134
|
+
put = verb == :put
|
135
|
+
validate_zip url, body if zip
|
136
|
+
if zip or url.kinda Array or body.kinda Array
|
137
|
+
many = true
|
138
|
+
unless put or body.kinda [Hash]
|
139
|
+
raise TypeError, "body of post request must be a hash array, params was
|
140
|
+
(#{args.inspect[1..-2]})"
|
141
|
+
end
|
142
|
+
|
143
|
+
if zip or url.kinda Array
|
144
|
+
validate_some url
|
145
|
+
orders = zip ? body.zip(url) : url.xprod(body, :inverse)
|
146
|
+
else
|
147
|
+
url = validate url
|
148
|
+
orders = body.xprod url
|
149
|
+
end
|
150
|
+
if put
|
151
|
+
orders.each {|o| o.unshift :loadPut}
|
152
|
+
else
|
153
|
+
orders.each {|o| o.unshift :loadPost and o.insert 2, mp}
|
154
|
+
end
|
155
|
+
else
|
156
|
+
unless put or body.is Hash
|
157
|
+
raise TypeError, "body of post request must be a hash, params was
|
158
|
+
(#{args.inspect[1..-2]})"
|
159
|
+
end
|
160
|
+
|
161
|
+
url = validate url
|
162
|
+
order = put ? [:loadPut, body, url] : [:loadPost, body, mp, url]
|
163
|
+
end
|
164
|
+
else
|
165
|
+
del = verb == :delete
|
166
|
+
if url.kinda Array
|
167
|
+
many = true
|
168
|
+
validate_some url
|
169
|
+
orders = [del ? :loadDelete : :loadGet].xprod url
|
170
|
+
else
|
171
|
+
url = validate url
|
172
|
+
order = [del ? :loadDelete : :loadGet, url]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
if !order.b and !orders.b
|
176
|
+
raise ArgumentError, "failed to run blank request#{'s' if many}, params was
|
177
|
+
(#{args.inspect[1..-2]})"
|
178
|
+
end
|
179
|
+
|
180
|
+
opts[:wait] = opts[:sync] if :sync.in opts
|
181
|
+
opts[:wait] = true if !:wait.in(opts) and
|
182
|
+
:proc_result.in(opts) ? !opts[:proc_result] : opts[:save_result]
|
183
|
+
opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
|
184
|
+
opts[:load_scripts] = self if opts[:load_scripts]
|
185
|
+
opts[:stream] = true if opts[:raw]
|
186
|
+
(opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
|
187
|
+
[many, order, orders, opts]
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
def validate_zip(url, body)
|
192
|
+
if !(url.kinda Array and body.kinda Array)
|
193
|
+
raise ZippingError, [url.class, nil, body.class, nil]
|
194
|
+
elsif url.size != body.size
|
195
|
+
raise ZippingError, [url.class, url.size, body.class, body.size]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# :static option now can accept hash with :procotol key, in that case Frame can be relocated to the same domain on another protocol and default protocol would be the value of @static.protocol
|
200
|
+
# if @static option has a :host value as well then it works just like a default route
|
201
|
+
def validate(url)
|
202
|
+
if url
|
203
|
+
loc = url.parse:uri
|
204
|
+
if loc.root and loc.root != @loc.root
|
205
|
+
if @static
|
206
|
+
if @static.is Hash
|
207
|
+
if loc.host != @loc.host and !@static.host
|
208
|
+
raise TargetError, "unable to get #{url} by static frame [#{@static.protocol}://]#{@loc.host}, you should first update it with new target"
|
209
|
+
end
|
210
|
+
else
|
211
|
+
raise TargetError, "unable to get #{url} by static frame #{@loc.root}, you should first update it with new target"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
@loc.root, @loc.host, @loc.protocol = loc.root, loc.host, loc.protocol
|
215
|
+
url
|
216
|
+
elsif !loc.root
|
217
|
+
if !@static
|
218
|
+
raise TargetError, "undefined root for query #{url}, use :static option as Hash to set default protocol and host, or as True to allow using previously used root"
|
219
|
+
elsif @static.is Hash
|
220
|
+
# targeting relatively to default values (from @static hash)
|
221
|
+
@loc.protocol = @static.protocol
|
222
|
+
@loc.host = @static.host if @static.host
|
223
|
+
@loc.root = @loc.protocol+'://'+@loc.host
|
224
|
+
end
|
225
|
+
if !@loc.host
|
226
|
+
raise TargetError, "undefined host for query #{url}, use :host parameter of :static option to set default host"
|
227
|
+
end
|
228
|
+
File.join @loc.root, url
|
229
|
+
else url
|
230
|
+
end
|
231
|
+
else
|
232
|
+
raise TargetError if !@static
|
233
|
+
@loc.href
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def validate_some(urls)
|
238
|
+
urls.map! {|u| validate u}
|
239
|
+
end
|
240
|
+
|
241
|
+
def run_callbacks!(page, opts, &callback)
|
242
|
+
# if no callback must have run then page.res is equal to the page
|
243
|
+
# so we can get the page as result of a sync as well as an async request
|
244
|
+
page.res = page
|
245
|
+
if callback
|
246
|
+
yres = callback.call page
|
247
|
+
# if we don't want callback to affect page.res
|
248
|
+
# then we should not set :save_result
|
249
|
+
if yres != :skip
|
250
|
+
if opts[:proc_result].is Proc
|
251
|
+
# yres is intermediate result that we should proc
|
252
|
+
page.res = opts[:proc_result].call yres
|
253
|
+
elsif opts[:save_result] or :proc_result.in opts
|
254
|
+
# yres is total result that we should save
|
255
|
+
page.res = yres
|
256
|
+
end
|
257
|
+
# in both cases page.res is set to total result
|
258
|
+
# so we can return result from any depth as @res attribute of what we have on top
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# TODO: found why/how IO on callbacks breaks +curl.res.body+ content and how to fix or how to avoid it
|
264
|
+
def exec_one(order, opts, &callback)
|
265
|
+
if @use_cache and order[0] == :loadGet and page = @@cache[order[1]]
|
266
|
+
run_callbacks! page, opts, &callback
|
267
|
+
res = opts[:wait] && (opts[:save_result] or :proc_result.in opts) ? page.res : page
|
268
|
+
return res
|
269
|
+
end
|
270
|
+
# must result in Page (default) or it's subclass
|
271
|
+
page = opts[:result].new
|
272
|
+
# if no spare scouts can be found, squad simply waits for first callbacks to complete
|
273
|
+
s = @ss.next
|
274
|
+
s.send(*(order << opts)) {|curl|
|
275
|
+
# there is a problem with storing html on disk
|
276
|
+
if order[0] == :loadGet and @write_to
|
277
|
+
# sometimes (about 2% for 100-threads-dling) when this string is calling
|
278
|
+
# no matter what +curl.res.body+ has contained here
|
279
|
+
RMTools.rw @write_to+'/'+order[-2].sub(/^[a-z]+:\/\//, ''), curl.res.body.xml_to_utf
|
280
|
+
end
|
281
|
+
if opts[:raw]
|
282
|
+
page.res = yield curl
|
283
|
+
# here +curl.res.body+ become empty
|
284
|
+
elsif page.process(curl, opts)
|
285
|
+
@@cache[page.href] = page if order[0] == :loadGet and @use_cache
|
286
|
+
run_callbacks! page, opts, &callback
|
287
|
+
end
|
288
|
+
}
|
289
|
+
if opts[:wait]
|
290
|
+
opts[:thread_safe] ? Curl.carier.perform : Curl.wait
|
291
|
+
(opts[:save_result] or :proc_result.in opts) ? page.res : page
|
292
|
+
else page
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def exec_many(orders, with_opts, &callback)
|
297
|
+
w = with_opts.delete :wait
|
298
|
+
iterator = with_opts[:stream] ? :each : :map
|
299
|
+
if with_opts[:ranges]
|
300
|
+
if orders.size != with_opts[:ranges].size
|
301
|
+
raise ZippingError, [orders.size, with_opts[:ranges].size], "orders quantity (%s) is not equal ranges quantity (%s)"
|
302
|
+
end
|
303
|
+
pages = orders.zip(with_opts[:ranges]).send(iterator) {|order, range|
|
304
|
+
(with_opts[:headers] ||= {}).Range = "bytes=#{range.begin}-#{range.end}"
|
305
|
+
exec_one order, with_opts, &callback
|
306
|
+
}
|
307
|
+
else
|
308
|
+
pages = orders.send(iterator) {|order| exec_one order, with_opts, &callback }
|
309
|
+
end
|
310
|
+
with_opts[:thread_safe] ? Curl.carier.perform : Curl.wait if w
|
311
|
+
with_opts[:stream] || pages
|
312
|
+
end
|
313
|
+
|
314
|
+
end
|
315
|
+
|
316
|
+
end
|