rhack 0.4.1 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
data/lib/rhack/dl.rb
ADDED
@@ -0,0 +1,308 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rhack'
|
3
|
+
|
4
|
+
module RHACK
|
5
|
+
class Frame
|
6
|
+
|
7
|
+
def get_cached(*links)
|
8
|
+
res = []
|
9
|
+
expire = links[-1] == :expire ? links.pop : false
|
10
|
+
links.parses(:uri).each_with_index {|url, i|
|
11
|
+
next if url.path[/ads|count|stats/]
|
12
|
+
file = Cache.load url, !expire
|
13
|
+
if file
|
14
|
+
if expire
|
15
|
+
@ss.next.loadGet(url.href, :headers=>{'If-Modified-Since'=>file.date}) {|c|
|
16
|
+
if c.res.code == 200
|
17
|
+
res << [i, (data = c.res.body)]
|
18
|
+
Cache.save url, data, false
|
19
|
+
else
|
20
|
+
res << [i, file.is(String) ? file : read(file.path)]
|
21
|
+
end
|
22
|
+
}
|
23
|
+
else
|
24
|
+
res << [i, file.is(String) ? file : read(file.path)]
|
25
|
+
end
|
26
|
+
else
|
27
|
+
@ss.next.loadGet(url.href) {|c|
|
28
|
+
if c.res.code == 200
|
29
|
+
res << [i, (data = c.res.body)]
|
30
|
+
Cache.save url, data, !expire
|
31
|
+
end
|
32
|
+
}
|
33
|
+
end
|
34
|
+
}
|
35
|
+
Curl.wait
|
36
|
+
links.size == 1 ? res[0][1] : res.sort!.lasts
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_distr(uri, psize, threads, start=0, print_progress=$verbose)
|
40
|
+
raise ConfigError, "Insufficient Scouts in the Frame for distributed downloading" if @ss.size < 2
|
41
|
+
@print_progress, code, stop_download, @ss_reserve = print_progress, nil, false, []
|
42
|
+
(s = @ss.next).http.on_header {|h|
|
43
|
+
next h.size unless h[/Content-Length: (\d+)|HTTP\/1\.[01] (\d+)[^\r]+|^\s*$/]
|
44
|
+
if code = $2
|
45
|
+
if code != '200'
|
46
|
+
L << "#$& getting #{uri}; interrupting request."
|
47
|
+
s.http.on_header() # set default process
|
48
|
+
next 0
|
49
|
+
end
|
50
|
+
next h.size
|
51
|
+
end
|
52
|
+
|
53
|
+
s.http.on_header() # set default process
|
54
|
+
if !$1 # конец хедера, content-length отсутствует
|
55
|
+
L << "No Content-Length header; trying to load a whole #{uri} at once!"
|
56
|
+
s.loadGet {|c| yield c.res.body.size, 0, c.res.body}
|
57
|
+
next 0
|
58
|
+
end
|
59
|
+
|
60
|
+
len = $1.to_i - start
|
61
|
+
psize = configure_psize(len, psize, threads)
|
62
|
+
parts = (len/psize.to_f).ceil
|
63
|
+
setup_speedometer(uri, parts, len)
|
64
|
+
yield len, psize, :careful_dl if len > (@opts[:careful_dl] || 10.mb)
|
65
|
+
|
66
|
+
@ss_reserve = @ss[threads+1..-1]
|
67
|
+
@ss = @ss[0..threads]
|
68
|
+
(0...parts).each {|n|
|
69
|
+
break if stop_download
|
70
|
+
|
71
|
+
s = @ss.next
|
72
|
+
run_speedometer(s, len, n)
|
73
|
+
s.loadGet(uri, :headers => {
|
74
|
+
'Range' => "bytes=#{start + n*psize}-#{start + (n+1)*psize - 1}"
|
75
|
+
}) {|c|
|
76
|
+
clear_speedometer(s)
|
77
|
+
if c.res.code/10 == 20
|
78
|
+
yield len, n*psize, c.res.body
|
79
|
+
else
|
80
|
+
L << "#{c.res} during get #{uri.inspect}; interrupting request."
|
81
|
+
stop_download = true
|
82
|
+
end
|
83
|
+
}
|
84
|
+
}
|
85
|
+
0
|
86
|
+
}
|
87
|
+
s.raise_err = false
|
88
|
+
s.loadGet validate uri
|
89
|
+
ensure
|
90
|
+
@ss.concat @ss_reserve || []
|
91
|
+
end
|
92
|
+
|
93
|
+
def dl(uri, df=File.basename(uri.parse(:uri).path), psize=:auto, opts={})
|
94
|
+
dled = 0
|
95
|
+
lock = ''
|
96
|
+
callback = lambda {|len, pos, body|
|
97
|
+
if body != :careful_dl
|
98
|
+
begin
|
99
|
+
write(df, body, pos)
|
100
|
+
rescue => e
|
101
|
+
binding.start_interaction
|
102
|
+
raise
|
103
|
+
end
|
104
|
+
if (dled += body.size) == len
|
105
|
+
File.delete lock if File.file? lock
|
106
|
+
yield df if block_given?
|
107
|
+
end
|
108
|
+
else
|
109
|
+
lock = lock_file df, len, pos # filename, filesize, partsize
|
110
|
+
end
|
111
|
+
}
|
112
|
+
opts[:threads] ||= @ss.size-1
|
113
|
+
get_distr(uri, psize, opts[:threads], opts[:start].to_i, &callback)
|
114
|
+
Curl.wait unless block_given?
|
115
|
+
df
|
116
|
+
end
|
117
|
+
|
118
|
+
def simple_dl(uri, df=File.basename(uri.parse(:uri).path), opts={})
|
119
|
+
opts.reverse_merge! :psize => :auto, :threads => 1, :print_progress => $verbose
|
120
|
+
L << opts
|
121
|
+
|
122
|
+
@print_progress = opts[:print_progress]
|
123
|
+
unless len = opts[:len] || (map = read_mapfile(df) and map.len)
|
124
|
+
return @ss.next.loadHead(uri) {|c| $log << c
|
125
|
+
if len = c.res['Content-Length']
|
126
|
+
simple_dl(uri, df, opts.merge(:len => len.to_i))
|
127
|
+
else L.warn "Can't get file size, so it has no sence to download this way. Or maybe it's just an error. Check ObjectSpace.find(#{c.res.object_id}) out."
|
128
|
+
end
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
psize, parts = check_mapfile(df, opts)
|
133
|
+
return unless psize
|
134
|
+
L << [psize, parts]
|
135
|
+
setup_speedometer(uri, parts.size, len)
|
136
|
+
|
137
|
+
obtained uri do |uri|
|
138
|
+
if opts[:threads] == 1
|
139
|
+
start = opts[:start].to_i || (parts[0] && parts[0].begin) || 0
|
140
|
+
scout = opts[:scout] || @ss.next
|
141
|
+
$log << [uri, scout]
|
142
|
+
(loadget = lambda {|n|
|
143
|
+
run_speedometer(scout, len, n)
|
144
|
+
from = start + n*psize
|
145
|
+
to = start + (n+1)*psize - 1
|
146
|
+
scout.loadGet(uri, :headers => {'Range' => "bytes=#{from}-#{to}"}) {|c|
|
147
|
+
begin
|
148
|
+
$log << "writing #{df} from #{from}: #{c.res.body.inspect}"
|
149
|
+
write(df, c.res.body, from)
|
150
|
+
rescue => e
|
151
|
+
binding.start_interaction
|
152
|
+
raise
|
153
|
+
end
|
154
|
+
if write_mapfile(df, from, to)
|
155
|
+
clear_speedometer(scout)
|
156
|
+
L.warn "file completely dl'ed, but (n+1)*psize <= len: (#{n}+1)*#{psize} <= #{len}" if (n+1)*psize <= len
|
157
|
+
yield df if block_given?
|
158
|
+
elsif (n+1)*psize <= len
|
159
|
+
loadget[n+1]
|
160
|
+
end
|
161
|
+
}
|
162
|
+
})[0]
|
163
|
+
else
|
164
|
+
exec(uri, opts.merge(:raw => true, :ranges => parts)) {|c|
|
165
|
+
L << c.res
|
166
|
+
range = c.req.range
|
167
|
+
begin
|
168
|
+
write(df, c.res.body, range.begin)
|
169
|
+
rescue => e
|
170
|
+
binding.start_interaction
|
171
|
+
raise
|
172
|
+
end
|
173
|
+
if write_mapfile(df, range.begin, range.end)
|
174
|
+
@ss.each {|s| s.http.on_progress} if @print_progress
|
175
|
+
yield df if block_given?
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def check_mapfile(df, opts={})
|
183
|
+
opts.reverse_merge! :psize => :auto, :threads => 1
|
184
|
+
map = read_mapfile df
|
185
|
+
if map
|
186
|
+
L << map
|
187
|
+
if map.rest.empty?
|
188
|
+
puts "#{df} is loaded"
|
189
|
+
$log << 'deleting mapfile'
|
190
|
+
File.delete df+'.map'
|
191
|
+
[]
|
192
|
+
else
|
193
|
+
if opts[:len] and map.len != opts[:len]
|
194
|
+
raise "Incorrect file size for #{df}"
|
195
|
+
end
|
196
|
+
psize = configure_psize *opts.values_at(:len, :psize, :threads)
|
197
|
+
[psize, map.rest.div(psize)]
|
198
|
+
end
|
199
|
+
else
|
200
|
+
write_mapfile df, opts[:len]
|
201
|
+
psize = configure_psize *opts.values_at(:len, :psize, :threads)
|
202
|
+
$log << (0...opts[:len]).div(psize)
|
203
|
+
[psize, (0...opts[:len]).div(psize)]
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def read_mapfile(df)
|
208
|
+
df += '.map'
|
209
|
+
text = read df
|
210
|
+
$log << "mapfile read: #{text}"
|
211
|
+
if text.b
|
212
|
+
text[/^(\d+)\0+(\d+)\0*\n/]
|
213
|
+
map = {}
|
214
|
+
$log << [$1,$2]
|
215
|
+
if $1 and $1 == $2
|
216
|
+
map.rest = []
|
217
|
+
else
|
218
|
+
map.len, *map.parts = text.chop/"\n"
|
219
|
+
map.len = map.len.to_i
|
220
|
+
map.parts.map! {|part| part /= '-'; part[0].to_i..part[1].to_i}
|
221
|
+
$log << map.parts
|
222
|
+
map.rest = (0...map.len) - XRange(*map.parts)
|
223
|
+
end
|
224
|
+
map
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def write_mapfile(df, *args)
|
229
|
+
df += '.map'
|
230
|
+
map = ''
|
231
|
+
if args.size != 2
|
232
|
+
len = args.shift
|
233
|
+
map << len.to_s.ljust(22, "\0") << "\n" if File.file? df
|
234
|
+
end
|
235
|
+
if args.any?
|
236
|
+
read(df)[/^(\d+)\0+(\d+)\0*\n/]
|
237
|
+
$log << "mapfile read"
|
238
|
+
$log << [$1,$2]
|
239
|
+
dled = $2.to_i + args[1] - args[0] + 1
|
240
|
+
return true if dled == $1.to_i
|
241
|
+
map << "#{args[0]}..#{args[1]}\n"
|
242
|
+
$log << 'writing mapfile'
|
243
|
+
write(df, dled.to_s.ljust(11, "\0"), 11)
|
244
|
+
end
|
245
|
+
$log << [df, map]
|
246
|
+
$log << 'writing mapfile'
|
247
|
+
write df, map
|
248
|
+
nil
|
249
|
+
end
|
250
|
+
|
251
|
+
def configure_psize(len, psize, threads)
|
252
|
+
case psize
|
253
|
+
when Numeric; psize.to_i
|
254
|
+
when :auto; len > 100000 ? len/threads+1 : len
|
255
|
+
when :mb; 1.mb
|
256
|
+
else raise ArgumentError, "Incorrect value for part size #{psize}:#{psize.class}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
|
262
|
+
def setup_speedometer(uri, parts, len)
|
263
|
+
return unless @print_progress
|
264
|
+
@progress = Array.new(parts, 0)
|
265
|
+
@stop_print, @speed, @sum, *@speedometer = false, '', 0, Time.now, 0
|
266
|
+
@str = "Downloading #{uri.gsub '%', '%%'} (#{len.bytes}) in %03s streams, %07s/s:"
|
267
|
+
@bs = "\b\r"*(@newlines = (uri.unpack('U*').size+len.bytes.size+42)/(ENV['COLUMNS'] || 80).to_i)
|
268
|
+
Thread.new {
|
269
|
+
until @stop_print
|
270
|
+
sleep 0.2
|
271
|
+
now = Time.now
|
272
|
+
if now > @speedometer[0] and @sum > @speedometer[1]
|
273
|
+
@speed.replace(((@sum - @speedometer[1])/(now - @speedometer[0])).to_i.bytes)
|
274
|
+
@speedometer.replace [now, @sum]
|
275
|
+
end
|
276
|
+
end
|
277
|
+
}
|
278
|
+
end
|
279
|
+
|
280
|
+
def run_speedometer(scout, len, n)
|
281
|
+
return unless @print_progress
|
282
|
+
scout.http.on_progress {|dl_need, dl_now, *ul|
|
283
|
+
if !@stop_print
|
284
|
+
@progress[n] = dl_now
|
285
|
+
percents = (@sum = @progress.sum)*100/len
|
286
|
+
print @str%[@progress.select_b.size, @speed]+"\n%%[#{'@'*percents}#{' '*(100-percents)}]\r\b\r"+@bs
|
287
|
+
if percents == 100
|
288
|
+
puts "\v"*@newlines
|
289
|
+
@stop_print = true
|
290
|
+
end
|
291
|
+
end
|
292
|
+
true
|
293
|
+
}
|
294
|
+
end
|
295
|
+
|
296
|
+
def clear_speedometer(scout)
|
297
|
+
return unless @print_progress
|
298
|
+
scout.http.on_progress
|
299
|
+
end
|
300
|
+
|
301
|
+
end
|
302
|
+
|
303
|
+
def dl(uri, df=File.basename(uri.parse(:uri).path), threads=5, timeout=600, &block)
|
304
|
+
Curl.run
|
305
|
+
Frame({:timeout=>timeout}, threads).dl(uri, df, :auto, threads, &block)
|
306
|
+
end
|
307
|
+
module_function :dl
|
308
|
+
end
|
data/lib/rhack/frame.rb
ADDED
@@ -0,0 +1,316 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
# Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
|
5
|
+
# Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
|
6
|
+
|
7
|
+
class ZippingError < ArgumentError
|
8
|
+
def initialize debug, str="invalid use of :zip option, url and body must be an arrays with the same size\n url: %s(%s), body: %s(%s)"
|
9
|
+
super str%debug end
|
10
|
+
end
|
11
|
+
|
12
|
+
class TargetError < ArgumentError
|
13
|
+
def initialize msg="only static frame can use local paths"
|
14
|
+
super end
|
15
|
+
end
|
16
|
+
|
17
|
+
class ConfigError < ArgumentError
|
18
|
+
def initialize msg
|
19
|
+
super end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Frame
|
23
|
+
__init__
|
24
|
+
attr_reader :loc, :static, :ss, :opts, :use_cache, :write_to
|
25
|
+
@@cache = {}
|
26
|
+
|
27
|
+
def initialize *args
|
28
|
+
args << 10 unless args[-1].is Fixnum
|
29
|
+
args.insert -2, {} unless args[-2].is Hash
|
30
|
+
if scouts = args[-2][:scouts]
|
31
|
+
args[-1] = scouts
|
32
|
+
end
|
33
|
+
@opts = {:eval => Johnson::Enabled, :redir => true, :cp => true, :result => Page}.merge!(args[-2])
|
34
|
+
args[-2] = @opts
|
35
|
+
if args[0].is String
|
36
|
+
url = args[0]
|
37
|
+
'http://' >> url if url !~ /^\w+:\/\//
|
38
|
+
update_loc url
|
39
|
+
else
|
40
|
+
@loc = {}
|
41
|
+
@static = false
|
42
|
+
end
|
43
|
+
@ss = ScoutSquad *args
|
44
|
+
Curl.run :unless_allready
|
45
|
+
end
|
46
|
+
|
47
|
+
def update_loc url
|
48
|
+
@loc = url.parse :uri
|
49
|
+
# be careful, if you set :static => false, frame will be unable to use "path" url
|
50
|
+
@static = @opts.fetch(:static, @loc)
|
51
|
+
end
|
52
|
+
|
53
|
+
def retarget to, forced=nil
|
54
|
+
to = 'http://' + to if to !~ /^\w+:/
|
55
|
+
@ss.update to, forced
|
56
|
+
update_loc to
|
57
|
+
end
|
58
|
+
alias :target= :retarget
|
59
|
+
|
60
|
+
def next() @ss.next end
|
61
|
+
def rand() @ss.rand end
|
62
|
+
def each(&block) @ss.each &block end
|
63
|
+
def [](i) @ss[i] end
|
64
|
+
|
65
|
+
def copy_cookies! i=0
|
66
|
+
@ss.each {|s| s.cookies.replace @ss[i].cookies}
|
67
|
+
end
|
68
|
+
|
69
|
+
def use_cache! opts={}
|
70
|
+
if opts == false
|
71
|
+
@use_cache = false
|
72
|
+
else
|
73
|
+
@@cache = opts[:pages].kinda(Hash) ? opts[:pages] : opts[:pages].map_hash {|p| [p.href, p]} if opts[:pages]
|
74
|
+
#@write_to = opts[:write_to] if :write_to.in opts
|
75
|
+
@use_cache = true
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def drop_cache! use=nil
|
80
|
+
@@cache.clear
|
81
|
+
GC.start
|
82
|
+
@use_cache = use if use.in [true, false]
|
83
|
+
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
sssize = @ss.size
|
87
|
+
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{sssize} #{sssize == 1 ? 'scout' : 'scouts'}#{', static'+(' => '+@static.protocol if @static.is(Hash)) if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
|
88
|
+
end
|
89
|
+
|
90
|
+
# opts are :eval, :json, :hash, :wait, :proc_result, :save_result, :load_scripts,
|
91
|
+
# :zip, :thread_safe, :result, :stream, :raw, :xhr + any opts for Scouts in one hash
|
92
|
+
def exec *args, &callback
|
93
|
+
many, order, orders, with_opts = interpret_request *args
|
94
|
+
L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
|
95
|
+
|
96
|
+
if !Johnson::Enabled and with_opts[:eval]
|
97
|
+
L < "failed to use option :eval because Johnson is disabled"
|
98
|
+
with_opts.delete :eval
|
99
|
+
end
|
100
|
+
# JS Runtime is not thread-safe and must be created in curl thread
|
101
|
+
# if we aren't said explicitly about the opposite
|
102
|
+
Johnson::Runtime.set_browser_for_curl with_opts
|
103
|
+
|
104
|
+
if many then exec_many orders, with_opts, &callback
|
105
|
+
else exec_one order, with_opts, &callback end
|
106
|
+
end
|
107
|
+
alias :get :exec
|
108
|
+
alias :run :get
|
109
|
+
|
110
|
+
def interpret_request(*args)
|
111
|
+
body, mp, url, opts = args.dup.get_opts [nil, false, nil], @opts
|
112
|
+
L.log [body, mp, url, opts]
|
113
|
+
zip = opts.delete :zip
|
114
|
+
verb = opts.delete :verb
|
115
|
+
many = order = orders = post = false
|
116
|
+
# Default options set is for POST
|
117
|
+
if mp.is String or mp.kinda Array and !(url.is String or url.kinda Array)
|
118
|
+
# if second arg is String, then that's url
|
119
|
+
url, mp, post = mp.dup, false, true
|
120
|
+
# L.debug "url #{url.inspect} has been passed as second argument instead of third"
|
121
|
+
# But if we have only one argument actually passed
|
122
|
+
# except for options hash, then believe it's GET
|
123
|
+
elsif body.is String or body.kinda [String]
|
124
|
+
L.debug "first parameter (#{body.inspect}) was implicitly taken as url#{' '+body.class if body.kinda Array}, but last paramter is of type #{url.class}, too" if url
|
125
|
+
url = body.dup
|
126
|
+
elsif !body
|
127
|
+
url = nil
|
128
|
+
else
|
129
|
+
url = url.dup if url
|
130
|
+
mp, post = !!mp, true
|
131
|
+
end
|
132
|
+
|
133
|
+
if post
|
134
|
+
put = verb == :put
|
135
|
+
validate_zip url, body if zip
|
136
|
+
if zip or url.kinda Array or body.kinda Array
|
137
|
+
many = true
|
138
|
+
unless put or body.kinda [Hash]
|
139
|
+
raise TypeError, "body of post request must be a hash array, params was
|
140
|
+
(#{args.inspect[1..-2]})"
|
141
|
+
end
|
142
|
+
|
143
|
+
if zip or url.kinda Array
|
144
|
+
validate_some url
|
145
|
+
orders = zip ? body.zip(url) : url.xprod(body, :inverse)
|
146
|
+
else
|
147
|
+
url = validate url
|
148
|
+
orders = body.xprod url
|
149
|
+
end
|
150
|
+
if put
|
151
|
+
orders.each {|o| o.unshift :loadPut}
|
152
|
+
else
|
153
|
+
orders.each {|o| o.unshift :loadPost and o.insert 2, mp}
|
154
|
+
end
|
155
|
+
else
|
156
|
+
unless put or body.is Hash
|
157
|
+
raise TypeError, "body of post request must be a hash, params was
|
158
|
+
(#{args.inspect[1..-2]})"
|
159
|
+
end
|
160
|
+
|
161
|
+
url = validate url
|
162
|
+
order = put ? [:loadPut, body, url] : [:loadPost, body, mp, url]
|
163
|
+
end
|
164
|
+
else
|
165
|
+
del = verb == :delete
|
166
|
+
if url.kinda Array
|
167
|
+
many = true
|
168
|
+
validate_some url
|
169
|
+
orders = [del ? :loadDelete : :loadGet].xprod url
|
170
|
+
else
|
171
|
+
url = validate url
|
172
|
+
order = [del ? :loadDelete : :loadGet, url]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
if !order.b and !orders.b
|
176
|
+
raise ArgumentError, "failed to run blank request#{'s' if many}, params was
|
177
|
+
(#{args.inspect[1..-2]})"
|
178
|
+
end
|
179
|
+
|
180
|
+
opts[:wait] = opts[:sync] if :sync.in opts
|
181
|
+
opts[:wait] = true if !:wait.in(opts) and
|
182
|
+
:proc_result.in(opts) ? !opts[:proc_result] : opts[:save_result]
|
183
|
+
opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
|
184
|
+
opts[:load_scripts] = self if opts[:load_scripts]
|
185
|
+
opts[:stream] = true if opts[:raw]
|
186
|
+
(opts[:headers] ||= {})['X-Requested-With'] = 'XMLHttpRequest' if opts[:xhr]
|
187
|
+
[many, order, orders, opts]
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
def validate_zip(url, body)
|
192
|
+
if !(url.kinda Array and body.kinda Array)
|
193
|
+
raise ZippingError, [url.class, nil, body.class, nil]
|
194
|
+
elsif url.size != body.size
|
195
|
+
raise ZippingError, [url.class, url.size, body.class, body.size]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# :static option now can accept hash with :procotol key, in that case Frame can be relocated to the same domain on another protocol and default protocol would be the value of @static.protocol
|
200
|
+
# if @static option has a :host value as well then it works just like a default route
|
201
|
+
def validate(url)
|
202
|
+
if url
|
203
|
+
loc = url.parse:uri
|
204
|
+
if loc.root and loc.root != @loc.root
|
205
|
+
if @static
|
206
|
+
if @static.is Hash
|
207
|
+
if loc.host != @loc.host and !@static.host
|
208
|
+
raise TargetError, "unable to get #{url} by static frame [#{@static.protocol}://]#{@loc.host}, you should first update it with new target"
|
209
|
+
end
|
210
|
+
else
|
211
|
+
raise TargetError, "unable to get #{url} by static frame #{@loc.root}, you should first update it with new target"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
@loc.root, @loc.host, @loc.protocol = loc.root, loc.host, loc.protocol
|
215
|
+
url
|
216
|
+
elsif !loc.root
|
217
|
+
if !@static
|
218
|
+
raise TargetError, "undefined root for query #{url}, use :static option as Hash to set default protocol and host, or as True to allow using previously used root"
|
219
|
+
elsif @static.is Hash
|
220
|
+
# targeting relatively to default values (from @static hash)
|
221
|
+
@loc.protocol = @static.protocol
|
222
|
+
@loc.host = @static.host if @static.host
|
223
|
+
@loc.root = @loc.protocol+'://'+@loc.host
|
224
|
+
end
|
225
|
+
if !@loc.host
|
226
|
+
raise TargetError, "undefined host for query #{url}, use :host parameter of :static option to set default host"
|
227
|
+
end
|
228
|
+
File.join @loc.root, url
|
229
|
+
else url
|
230
|
+
end
|
231
|
+
else
|
232
|
+
raise TargetError if !@static
|
233
|
+
@loc.href
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def validate_some(urls)
|
238
|
+
urls.map! {|u| validate u}
|
239
|
+
end
|
240
|
+
|
241
|
+
def run_callbacks!(page, opts, &callback)
|
242
|
+
# if no callback must have run then page.res is equal to the page
|
243
|
+
# so we can get the page as result of a sync as well as an async request
|
244
|
+
page.res = page
|
245
|
+
if callback
|
246
|
+
yres = callback.call page
|
247
|
+
# if we don't want callback to affect page.res
|
248
|
+
# then we should not set :save_result
|
249
|
+
if yres != :skip
|
250
|
+
if opts[:proc_result].is Proc
|
251
|
+
# yres is intermediate result that we should proc
|
252
|
+
page.res = opts[:proc_result].call yres
|
253
|
+
elsif opts[:save_result] or :proc_result.in opts
|
254
|
+
# yres is total result that we should save
|
255
|
+
page.res = yres
|
256
|
+
end
|
257
|
+
# in both cases page.res is set to total result
|
258
|
+
# so we can return result from any depth as @res attribute of what we have on top
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# TODO: found why/how IO on callbacks breaks +curl.res.body+ content and how to fix or how to avoid it
|
264
|
+
def exec_one(order, opts, &callback)
|
265
|
+
if @use_cache and order[0] == :loadGet and page = @@cache[order[1]]
|
266
|
+
run_callbacks! page, opts, &callback
|
267
|
+
res = opts[:wait] && (opts[:save_result] or :proc_result.in opts) ? page.res : page
|
268
|
+
return res
|
269
|
+
end
|
270
|
+
# must result in Page (default) or it's subclass
|
271
|
+
page = opts[:result].new
|
272
|
+
# if no spare scouts can be found, squad simply waits for first callbacks to complete
|
273
|
+
s = @ss.next
|
274
|
+
s.send(*(order << opts)) {|curl|
|
275
|
+
# there is a problem with storing html on disk
|
276
|
+
if order[0] == :loadGet and @write_to
|
277
|
+
# sometimes (about 2% for 100-threads-dling) when this string is calling
|
278
|
+
# no matter what +curl.res.body+ has contained here
|
279
|
+
RMTools.rw @write_to+'/'+order[-2].sub(/^[a-z]+:\/\//, ''), curl.res.body.xml_to_utf
|
280
|
+
end
|
281
|
+
if opts[:raw]
|
282
|
+
page.res = yield curl
|
283
|
+
# here +curl.res.body+ become empty
|
284
|
+
elsif page.process(curl, opts)
|
285
|
+
@@cache[page.href] = page if order[0] == :loadGet and @use_cache
|
286
|
+
run_callbacks! page, opts, &callback
|
287
|
+
end
|
288
|
+
}
|
289
|
+
if opts[:wait]
|
290
|
+
opts[:thread_safe] ? Curl.carier.perform : Curl.wait
|
291
|
+
(opts[:save_result] or :proc_result.in opts) ? page.res : page
|
292
|
+
else page
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def exec_many(orders, with_opts, &callback)
|
297
|
+
w = with_opts.delete :wait
|
298
|
+
iterator = with_opts[:stream] ? :each : :map
|
299
|
+
if with_opts[:ranges]
|
300
|
+
if orders.size != with_opts[:ranges].size
|
301
|
+
raise ZippingError, [orders.size, with_opts[:ranges].size], "orders quantity (%s) is not equal ranges quantity (%s)"
|
302
|
+
end
|
303
|
+
pages = orders.zip(with_opts[:ranges]).send(iterator) {|order, range|
|
304
|
+
(with_opts[:headers] ||= {}).Range = "bytes=#{range.begin}-#{range.end}"
|
305
|
+
exec_one order, with_opts, &callback
|
306
|
+
}
|
307
|
+
else
|
308
|
+
pages = orders.send(iterator) {|order| exec_one order, with_opts, &callback }
|
309
|
+
end
|
310
|
+
with_opts[:thread_safe] ? Curl.carier.perform : Curl.wait if w
|
311
|
+
with_opts[:stream] || pages
|
312
|
+
end
|
313
|
+
|
314
|
+
end
|
315
|
+
|
316
|
+
end
|