rhack 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/CURB-LICENSE +51 -0
- data/Gemfile +4 -0
- data/History.txt +4 -0
- data/LICENSE +51 -0
- data/License.txt +17 -0
- data/Manifest.txt +61 -0
- data/README.txt +12 -0
- data/Rakefile +34 -0
- data/ext/curb-original/curb.c +977 -0
- data/ext/curb-original/curb.h +52 -0
- data/ext/curb-original/curb_config.h +235 -0
- data/ext/curb-original/curb_easy.c +3455 -0
- data/ext/curb-original/curb_easy.h +90 -0
- data/ext/curb-original/curb_errors.c +647 -0
- data/ext/curb-original/curb_errors.h +129 -0
- data/ext/curb-original/curb_macros.h +159 -0
- data/ext/curb-original/curb_multi.c +704 -0
- data/ext/curb-original/curb_multi.h +26 -0
- data/ext/curb-original/curb_postfield.c +523 -0
- data/ext/curb-original/curb_postfield.h +40 -0
- data/ext/curb-original/curb_upload.c +80 -0
- data/ext/curb-original/curb_upload.h +30 -0
- data/ext/curb/Makefile +157 -0
- data/ext/curb/curb.c +977 -0
- data/ext/curb/curb.h +52 -0
- data/ext/curb/curb_config.h +235 -0
- data/ext/curb/curb_easy.c +3430 -0
- data/ext/curb/curb_easy.h +94 -0
- data/ext/curb/curb_errors.c +647 -0
- data/ext/curb/curb_errors.h +129 -0
- data/ext/curb/curb_macros.h +159 -0
- data/ext/curb/curb_multi.c +710 -0
- data/ext/curb/curb_multi.h +26 -0
- data/ext/curb/curb_postfield.c +523 -0
- data/ext/curb/curb_postfield.h +40 -0
- data/ext/curb/curb_upload.c +80 -0
- data/ext/curb/curb_upload.h +30 -0
- data/ext/curb/extconf.rb +399 -0
- data/lib/cache.rb +44 -0
- data/lib/curl-global.rb +151 -0
- data/lib/extensions/browser/env.js +697 -0
- data/lib/extensions/browser/jquery.js +7180 -0
- data/lib/extensions/browser/xmlsax.js +1564 -0
- data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
- data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
- data/lib/extensions/curb.rb +125 -0
- data/lib/extensions/declarative.rb +153 -0
- data/lib/extensions/johnson.rb +63 -0
- data/lib/frame.rb +766 -0
- data/lib/init.rb +36 -0
- data/lib/rhack.rb +16 -0
- data/lib/rhack.yml.template +19 -0
- data/lib/rhack/proxy/checker.rb +226 -0
- data/lib/rhack/proxy/list.rb +196 -0
- data/lib/rhack/services.rb +445 -0
- data/lib/rhack_in.rb +2 -0
- data/lib/scout.rb +591 -0
- data/lib/words.rb +37 -0
- data/test/test_frame.rb +107 -0
- data/test/test_rhack.rb +5 -0
- data/test/test_scout.rb +53 -0
- metadata +195 -0
@@ -0,0 +1,125 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Curl
|
3
|
+
|
4
|
+
class Easy
|
5
|
+
__init__
|
6
|
+
attr_accessor :base
|
7
|
+
|
8
|
+
def res
|
9
|
+
Response(self)
|
10
|
+
end
|
11
|
+
alias response res
|
12
|
+
|
13
|
+
def req
|
14
|
+
res.req
|
15
|
+
end
|
16
|
+
alias request req
|
17
|
+
|
18
|
+
def host
|
19
|
+
url.parse(:uri).root
|
20
|
+
end
|
21
|
+
|
22
|
+
def path=(href)
|
23
|
+
self.url = host+href.parse(:uri).fullpath
|
24
|
+
end
|
25
|
+
|
26
|
+
def retry!
|
27
|
+
@base.retry!
|
28
|
+
end
|
29
|
+
|
30
|
+
# curb changed getters interface, so i get some shortcuts from curb/lib/curl/easy.rb
|
31
|
+
def set(opt,val)
|
32
|
+
if opt.is_a?(Symbol)
|
33
|
+
setopt(sym2curl(opt), val)
|
34
|
+
else
|
35
|
+
setopt(opt.to_i, val)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def sym2curl(opt)
|
40
|
+
Curl.const_get("CURLOPT_#{opt.to_s.upcase}")
|
41
|
+
end
|
42
|
+
|
43
|
+
def interface=(value)
|
44
|
+
set :interface, value
|
45
|
+
end
|
46
|
+
|
47
|
+
def url=(u)
|
48
|
+
set :url, u
|
49
|
+
end
|
50
|
+
|
51
|
+
def proxy_url=(url)
|
52
|
+
set :proxy, url
|
53
|
+
end
|
54
|
+
|
55
|
+
def userpwd=(value)
|
56
|
+
set :userpwd, value
|
57
|
+
end
|
58
|
+
|
59
|
+
def proxypwd=(value)
|
60
|
+
set :proxyuserpwd, value
|
61
|
+
end
|
62
|
+
|
63
|
+
def follow_location=(onoff)
|
64
|
+
set :followlocation, onoff
|
65
|
+
end
|
66
|
+
|
67
|
+
def head=(onoff)
|
68
|
+
set :nobody, !!onoff
|
69
|
+
end
|
70
|
+
|
71
|
+
def get=(onoff)
|
72
|
+
set :httpget, !!onoff
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
class PostField
|
78
|
+
|
79
|
+
def to_s
|
80
|
+
raise "Cannot convert unnamed field to string" if !name
|
81
|
+
display_content = if (cp = content_proc)
|
82
|
+
cp.inspect
|
83
|
+
elsif (c = content)
|
84
|
+
"#{c[0...20].inspect}#{"… (#{c.size.bytes})" if c.size > 20}"
|
85
|
+
elsif (ln = local_name)
|
86
|
+
File.new(ln).inspect
|
87
|
+
end
|
88
|
+
"#{name}=#{display_content}"
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
class Multi
|
94
|
+
if method_defined? :requests
|
95
|
+
alias :reqs :requests
|
96
|
+
end
|
97
|
+
|
98
|
+
def reset
|
99
|
+
reqs.each {|k| remove k rescue()}
|
100
|
+
$Carier = Multi.new
|
101
|
+
$Carier.pipeline = true
|
102
|
+
# GC.start
|
103
|
+
end
|
104
|
+
|
105
|
+
def drop
|
106
|
+
while running > 0 do perform rescue() end
|
107
|
+
Curl.recall
|
108
|
+
end
|
109
|
+
|
110
|
+
def drop!
|
111
|
+
drop
|
112
|
+
reset if reqs.size + running > 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def sheduled
|
116
|
+
0 < running and running <= reqs.size
|
117
|
+
end
|
118
|
+
|
119
|
+
def inspect
|
120
|
+
"<#Carier #{'unit'.x reqs.size}, #{running} executing>"
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module ActiveRecord
|
3
|
+
|
4
|
+
module ConnectionAdapters
|
5
|
+
AbstractAdapter
|
6
|
+
|
7
|
+
class VirtualTable < Table
|
8
|
+
|
9
|
+
def debug_str meth, called, exist, *args
|
10
|
+
"Table.#{meth}(#{args.inspects*', '}) was#{' NOT' if !called} called due to #{'in' if !exist}existance"
|
11
|
+
end
|
12
|
+
|
13
|
+
def column_exists *args
|
14
|
+
column_names = @base.columns(@table_name).names
|
15
|
+
options = args.extract_options!
|
16
|
+
names = args.dup
|
17
|
+
args << options
|
18
|
+
_or_ = (names[0] == :all) ? !names.shift : true
|
19
|
+
names.each {|name| return _or_ if name.to_s.in(column_names) == _or_}
|
20
|
+
!_or_
|
21
|
+
end
|
22
|
+
|
23
|
+
def index_exists *indexes
|
24
|
+
column_indexes = @base.indexes(@table_name).columnss.flatten
|
25
|
+
_or_ = (indexes[0] == :all) ? !indexes.shift : true
|
26
|
+
indexes.each {|index| return _or_ if index.to_s.in(column_indexes) == _or_}
|
27
|
+
!_or_
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize name, connection, map=nil
|
31
|
+
super name, connection
|
32
|
+
case map
|
33
|
+
when true; @map = []
|
34
|
+
when Array; @map = map
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def map!
|
39
|
+
map_names = @map.firsts.to_ss
|
40
|
+
@base.columns(@table_name).names.each {|name|
|
41
|
+
name.in(map_names) ? @map.reject! {|_| _[0] == name} : remove(name)
|
42
|
+
}
|
43
|
+
@map.each {|col| column *col}
|
44
|
+
end
|
45
|
+
|
46
|
+
def column name, *args
|
47
|
+
to_be_called = !column_exists(name)
|
48
|
+
super if to_be_called
|
49
|
+
$log.debug {debug_str :column, to_be_called, !to_be_called, name, *args}
|
50
|
+
@map << [name, *args] if @map
|
51
|
+
end
|
52
|
+
|
53
|
+
%w{string text integer float decimal
|
54
|
+
datetime timestamp time date binary boolean}.each {|column_type|
|
55
|
+
define_method(column_type) {|*args|
|
56
|
+
to_be_called = !column_exists(*args)
|
57
|
+
super if to_be_called
|
58
|
+
$log.debug {debug_str column_type, to_be_called, !to_be_called, *args}
|
59
|
+
if @map
|
60
|
+
options = args.extract_options!
|
61
|
+
args = args.xprod(column_type)
|
62
|
+
args = args.xprod(options) if options
|
63
|
+
@map.concat args
|
64
|
+
end
|
65
|
+
} }
|
66
|
+
|
67
|
+
def index name, *args
|
68
|
+
to_be_called = !index_exists(name)
|
69
|
+
super if to_be_called
|
70
|
+
$log.debug {debug_str :index, to_be_called, !to_be_called, name, *args}
|
71
|
+
end
|
72
|
+
|
73
|
+
def timestamps
|
74
|
+
to_be_called = !column_exists('created_at', 'updated_at')
|
75
|
+
super if to_be_called
|
76
|
+
$log.debug {debug_str :timestamps, to_be_called, !to_be_called}
|
77
|
+
@map.concat [[:created_at, :datetime], [:updated_at, :datetime]] if @map
|
78
|
+
end
|
79
|
+
|
80
|
+
def change *args
|
81
|
+
raise NotImplementedError, "don't use #change in declaration!"
|
82
|
+
end
|
83
|
+
|
84
|
+
def change_default *args
|
85
|
+
raise NotImplementedError, "don't use #change_default in declaration!"
|
86
|
+
end
|
87
|
+
|
88
|
+
def rename column_name, new_column_name
|
89
|
+
to_be_called = !column_exists(new_column_name)
|
90
|
+
super if to_be_called
|
91
|
+
$log.debug {debug_str :rename, to_be_called, !to_be_called, column_name, new_column_name}
|
92
|
+
end
|
93
|
+
|
94
|
+
def references *args
|
95
|
+
to_be_called = !column_exists(*args.map {|col| "#{col}_id"})
|
96
|
+
super if to_be_called
|
97
|
+
$log.debug {debug_str :references, to_be_called, !to_be_called, *args}
|
98
|
+
end
|
99
|
+
alias :belongs_to :references
|
100
|
+
|
101
|
+
def remove *args
|
102
|
+
to_be_called = column_exists :all, *args
|
103
|
+
super if to_be_called
|
104
|
+
$log.debug {debug_str :remove, to_be_called, to_be_called, *args}
|
105
|
+
end
|
106
|
+
|
107
|
+
def remove_references *args
|
108
|
+
to_be_called = column_exists(:all, *args.map {|col| "#{col}_id"})
|
109
|
+
super if to_be_called
|
110
|
+
$log.debug {debug_str :remove_references, to_be_called, to_be_called, *args}
|
111
|
+
end
|
112
|
+
alias :remove_belongs_to :remove_references
|
113
|
+
|
114
|
+
def remove_index options
|
115
|
+
indexes = options.is(Hash) ? options[:column] : options
|
116
|
+
raise ArgumentError, "can remove only default format named indexes in declaration!" if !indexes
|
117
|
+
to_be_called = index_exists :all, *indexes
|
118
|
+
super if to_be_called
|
119
|
+
$log.debug {debug_str :remove_index, to_be_called, to_be_called, options}
|
120
|
+
end
|
121
|
+
|
122
|
+
def remove_timestamps
|
123
|
+
to_be_called = column_exists 'created_at', 'updated_at'
|
124
|
+
super if to_be_called
|
125
|
+
$log.debug {debug_str :remove_timestamps, to_be_called, to_be_called}
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
class Base
|
133
|
+
|
134
|
+
def self.declare name, options={}, &block
|
135
|
+
self.table_name = name
|
136
|
+
if !table_exists? or options[:force]
|
137
|
+
$log < "with options[:force] the `#{table_name}` table will have been recreated each time you load the #{model_name} model" if options[:force]
|
138
|
+
self.primary_key = options[:primary_key] if options[:id] != false and options[:primary_key]
|
139
|
+
$log.debug "connection.create_table(#{name}, #{options.inspect}) {}"
|
140
|
+
connection.create_table(name, options, &block)
|
141
|
+
elsif options[:map]
|
142
|
+
table = ConnectionAdapters::VirtualTable.new(name, connection, options[:map])
|
143
|
+
yield table
|
144
|
+
table.map!
|
145
|
+
else yield ConnectionAdapters::VirtualTable.new(name, connection)
|
146
|
+
end
|
147
|
+
reset_column_information
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Johnson
|
3
|
+
begin
|
4
|
+
require 'johnson'
|
5
|
+
rescue LoadError
|
6
|
+
Enabled = false
|
7
|
+
else
|
8
|
+
if VERSION <= "2.0.0" and RUBY_VERSION > "1.9"
|
9
|
+
Enabled = false
|
10
|
+
else Enabled = true
|
11
|
+
end
|
12
|
+
end
|
13
|
+
### JavaScript interface DOM emulation ###
|
14
|
+
|
15
|
+
class Runtime
|
16
|
+
attr_accessor :thread_id
|
17
|
+
Runtime_is_set = lambda {|o| !o[:eval].b or ($JSRuntime and $JSRuntime.thread_id == $CarierThread.object_id)}
|
18
|
+
BROWSER_PATH = File.expand_path "#{File.dirname(__FILE__)}/browser"
|
19
|
+
|
20
|
+
# CarierThread breaks if Multi has no work && CarierThread
|
21
|
+
# is joined so itwon't last forever.
|
22
|
+
#
|
23
|
+
# Johnson is not thread safe =>
|
24
|
+
# Runtime created in this thread will become unusable after
|
25
|
+
# CarierThread dies.
|
26
|
+
#
|
27
|
+
# So we don't use Curl.wait until Carier haven't got whole
|
28
|
+
# request for this Runtime.
|
29
|
+
def self.set_browser_for_curl(opts)
|
30
|
+
if !Runtime_is_set[opts]
|
31
|
+
if Curl.status
|
32
|
+
Curl.recall
|
33
|
+
$log.debug 'recalled'
|
34
|
+
end
|
35
|
+
if opts[:thread_safe].b
|
36
|
+
$JSRuntime = new_browser(opts[:jq])
|
37
|
+
$log.debug "#{$JSRuntime} initialized in #{Thread.current}\nmain: #{Thread.main}; carier: #{$CarierThread}"
|
38
|
+
else
|
39
|
+
$log.debug 'about to run carier'
|
40
|
+
Curl.execute {$JSRuntime = new_browser(opts[:jq])
|
41
|
+
$log.debug "#{$JSRuntime} initialized in #{Thread.current}\nmain: #{Thread.main}; carier: #{$CarierThread}"}
|
42
|
+
sleep 0.01 until Runtime_is_set[opts]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.new_browser(jq=false)
|
48
|
+
rt = new
|
49
|
+
%w{xmlw3cdom_1 xmlw3cdom_2 xmlsax env}.concat(jq ? ['jquery'] : []).each {|f|
|
50
|
+
path = "#{BROWSER_PATH}/#{f}.js"
|
51
|
+
rt.evaluate IO.read(path), path, 1
|
52
|
+
}
|
53
|
+
rt.document = ''
|
54
|
+
rt
|
55
|
+
end
|
56
|
+
|
57
|
+
def document=(html)
|
58
|
+
evaluate "var document = new DOMDocument(#{html.to_doc.to_xhtml.inspect})"
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/lib/frame.rb
ADDED
@@ -0,0 +1,766 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module HTTPAccessKit
|
3
|
+
|
4
|
+
# Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
|
5
|
+
# Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
|
6
|
+
|
7
|
+
class ZippingError < ArgumentError
|
8
|
+
def initialize debug, str="invalid use of :zip option, uri and body must be an arrays with the same size\n uri: %s(%s), body: %s(%s)"
|
9
|
+
super str%debug end
|
10
|
+
end
|
11
|
+
|
12
|
+
class TargetError < ArgumentError
|
13
|
+
def initialize msg="only static frame can use local paths"
|
14
|
+
super end
|
15
|
+
end
|
16
|
+
|
17
|
+
class ConfigError < ArgumentError
|
18
|
+
def initialize msg
|
19
|
+
super end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Frame
|
23
|
+
__init__
|
24
|
+
attr_reader :loc, :static, :ss, :opts
|
25
|
+
|
26
|
+
def initialize *args
|
27
|
+
args << 10 unless args[-1].is Fixnum
|
28
|
+
@opts = {:eval => Johnson::Enabled, :redir => true, :cp => true, :result => Page}.merge!(args[-2].is(Hash) ? args[-2] : {})
|
29
|
+
if args[0].is String
|
30
|
+
uri = args[0]
|
31
|
+
'http://' >> uri if uri !~ /^\w+:\/\//
|
32
|
+
@loc = uri.parse:uri
|
33
|
+
# be careful, if you set :static => false, frame will be unable to use implicit url
|
34
|
+
@static = @opts.fetch(:static, true).b
|
35
|
+
else
|
36
|
+
@loc = {}
|
37
|
+
@static = false
|
38
|
+
end
|
39
|
+
@ss = ScoutSquad *args
|
40
|
+
@pages = []
|
41
|
+
Curl.run unless Curl.status
|
42
|
+
end
|
43
|
+
|
44
|
+
def retarget to, forced=nil
|
45
|
+
to = 'http://' + to if to !~ /^\w+:/
|
46
|
+
@ss.update to, forced
|
47
|
+
@loc = to.parse:uri
|
48
|
+
end
|
49
|
+
|
50
|
+
def target=to
|
51
|
+
retarget to
|
52
|
+
end
|
53
|
+
|
54
|
+
def next() @ss.next end
|
55
|
+
def rand() @ss.rand end
|
56
|
+
def each(&block) @ss.each &block end
|
57
|
+
def [](i) @ss[i] end
|
58
|
+
|
59
|
+
def inspect
|
60
|
+
"<#Frame @ #{@ss.untargeted ? 'no target' : @loc.root}: #{'scout'.x @ss.size}#{', static' if @static}, cookies #{@ss[0].cookieProc ? 'on' : 'off'}>"
|
61
|
+
end
|
62
|
+
|
63
|
+
# opts are :eval, :json, :hash, :wait, :proc_result, :save_result, :load_scripts,
|
64
|
+
# :zip, :thread_safe, :result, :stream, :raw + any opts for Scouts in one hash
|
65
|
+
def get *args, &callback
|
66
|
+
many, order, orders, with_opts = interpret_request *args
|
67
|
+
L.log({:many => many, :order => order, :orders => orders, :with_opts => with_opts})
|
68
|
+
|
69
|
+
if !Johnson::Enabled and with_opts[:eval]
|
70
|
+
L < "failed to use option :eval because Johnson is disabled"
|
71
|
+
with_opts.delete :eval
|
72
|
+
end
|
73
|
+
# JS Runtime is not thread-safe and must be created in curl thread
|
74
|
+
# if we aren't said explicitly about the opposite
|
75
|
+
Johnson::Runtime.set_browser_for_curl with_opts
|
76
|
+
|
77
|
+
if many then exec_many orders, with_opts, &callback
|
78
|
+
else exec_one order, with_opts, &callback end
|
79
|
+
end
|
80
|
+
alias :exec :get
|
81
|
+
alias :run :get
|
82
|
+
|
83
|
+
def interpret_request(*args)
|
84
|
+
body, mp, uri, opts = args.dup.get_opts [nil, false, nil], @opts
|
85
|
+
L.log [body, mp, uri, opts]
|
86
|
+
zip = opts.delete :zip
|
87
|
+
many = order = orders = post = false
|
88
|
+
# Default options set is for POST
|
89
|
+
if mp.is String or mp.kinda Array and !(uri.is String or uri.kinda Array)
|
90
|
+
# if second arg is String, then that's uri
|
91
|
+
uri, mp, post = mp.dup, false, true
|
92
|
+
# L.debug "uri #{uri.inspect} has been passed as second argument instead of third"
|
93
|
+
# But if we have only one argument actually passed
|
94
|
+
# except for options hash, then believe it's GET
|
95
|
+
elsif body.is String or body.kinda [String]
|
96
|
+
L.debug "first parameter (#{body.inspect}) was implicitly taken as uri#{' '+body.class if body.kinda Array}, but last paramter is of type #{uri.class}, too" if uri
|
97
|
+
uri = body.dup
|
98
|
+
elsif !body then uri = nil
|
99
|
+
else
|
100
|
+
uri = uri.dup if uri
|
101
|
+
mp, post = !!mp, true
|
102
|
+
end
|
103
|
+
if post
|
104
|
+
unless body.is Hash or body.kinda [Hash]
|
105
|
+
raise TypeError, "body of post request must be a hash or hash array, params was
|
106
|
+
(#{args.inspect[1..-2]})"
|
107
|
+
end
|
108
|
+
validate_zip uri, body if zip
|
109
|
+
if zip or uri.kinda Array or body.kinda Array
|
110
|
+
many = true
|
111
|
+
if zip or uri.kinda Array
|
112
|
+
validate_some uri
|
113
|
+
orders = zip ? body.zip(uri) : uri.xprod(body, :inverse)
|
114
|
+
else
|
115
|
+
uri = validate uri
|
116
|
+
orders = body.xprod uri
|
117
|
+
end
|
118
|
+
orders.each {|o| o.unshift :loadPost and o.insert 2, mp}
|
119
|
+
else
|
120
|
+
uri = validate uri
|
121
|
+
order = [:loadPost, body, mp, uri]
|
122
|
+
end
|
123
|
+
else
|
124
|
+
if uri.kinda Array
|
125
|
+
many = true
|
126
|
+
validate_some uri
|
127
|
+
orders = [:loadGet].xprod uri
|
128
|
+
else
|
129
|
+
uri = validate uri
|
130
|
+
order = [:loadGet, uri]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
if !order.b and !orders.b
|
134
|
+
raise ArgumentError, "failed to run blank request#{'s' if many}, params was
|
135
|
+
(#{args.inspect[1..-2]})"
|
136
|
+
else
|
137
|
+
opts[:wait] = opts[:sync] if :sync.in opts
|
138
|
+
opts[:wait] = true if !:wait.in(opts) and
|
139
|
+
:proc_result.in(opts) ? !opts[:proc_result] : opts[:save_result]
|
140
|
+
opts[:eval] = false if opts[:json] or opts[:hash] or opts[:raw]
|
141
|
+
opts[:load_scripts] = self if opts[:load_scripts]
|
142
|
+
opts[:stream] = true if opts[:raw]
|
143
|
+
[many, order, orders, opts]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def get_cached(*links)
|
148
|
+
res = []
|
149
|
+
expire = links[-1] == :expire ? links.pop : false
|
150
|
+
links.parses(:uri).each_with_index {|uri, i|
|
151
|
+
next if uri.path[/ads|count|stats/]
|
152
|
+
file = Cache.load uri, !expire
|
153
|
+
if file
|
154
|
+
if expire
|
155
|
+
@ss.next.loadGet(uri.href, :headers=>{'If-Modified-Since'=>file.date}) {|c|
|
156
|
+
if c.res.code == 200
|
157
|
+
res << [i, (data = c.res.body)]
|
158
|
+
Cache.save uri, data, false
|
159
|
+
else
|
160
|
+
res << [i, file.is(String) ? file : read(file.path)]
|
161
|
+
end
|
162
|
+
}
|
163
|
+
else
|
164
|
+
res << [i, file.is(String) ? file : read(file.path)]
|
165
|
+
end
|
166
|
+
else
|
167
|
+
@ss.next.loadGet(uri.href) {|c|
|
168
|
+
if c.res.code == 200
|
169
|
+
res << [i, (data = c.res.body)]
|
170
|
+
Cache.save uri, data, !expire
|
171
|
+
end
|
172
|
+
}
|
173
|
+
end
|
174
|
+
}
|
175
|
+
Curl.wait
|
176
|
+
links.size == 1 ? res[0][1] : res.sort!.lasts
|
177
|
+
end
|
178
|
+
|
179
|
+
def get_distr(uri, psize, threads, start=0, print_progress=$verbose)
|
180
|
+
raise ConfigError, "Insufficient Scouts in the Frame for distributed downloading" if @ss.size < 2
|
181
|
+
@print_progress, code, stop_download, @ss_reserve = print_progress, nil, false, []
|
182
|
+
(s = @ss.next).http.on_header {|h|
|
183
|
+
next h.size unless h[/Content-Length: (\d+)|HTTP\/1\.[01] (\d+)[^\r]+|^\s*$/]
|
184
|
+
if code = $2
|
185
|
+
if code != '200'
|
186
|
+
L << "#$& getting #{uri}; interrupting request."
|
187
|
+
s.http.on_header() # set default process
|
188
|
+
next 0
|
189
|
+
end
|
190
|
+
next h.size
|
191
|
+
end
|
192
|
+
|
193
|
+
s.http.on_header() # set default process
|
194
|
+
if !$1 # конец хедера, content-length отсутствует
|
195
|
+
L << "No Content-Length header; trying to load a whole #{uri} at once!"
|
196
|
+
s.loadGet {|c| yield c.res.body.size, 0, c.res.body}
|
197
|
+
next 0
|
198
|
+
end
|
199
|
+
|
200
|
+
len = $1.to_i - start
|
201
|
+
psize = configure_psize(len, psize, threads)
|
202
|
+
parts = (len/psize.to_f).ceil
|
203
|
+
setup_speedometer(uri, parts, len)
|
204
|
+
yield len, psize, :careful_dl if len > (@opts[:careful_dl] || 10.mb)
|
205
|
+
|
206
|
+
@ss_reserve = @ss[threads+1..-1]
|
207
|
+
@ss = @ss[0..threads]
|
208
|
+
(0...parts).each {|n|
|
209
|
+
break if stop_download
|
210
|
+
|
211
|
+
s = @ss.next
|
212
|
+
run_speedometer(s, len, n)
|
213
|
+
s.loadGet(uri, :headers => {
|
214
|
+
'Range' => "bytes=#{start + n*psize}-#{start + (n+1)*psize - 1}"
|
215
|
+
}) {|c|
|
216
|
+
clear_speedometer(s)
|
217
|
+
if c.res.code/10 == 20
|
218
|
+
yield len, n*psize, c.res.body
|
219
|
+
else
|
220
|
+
L << "#{c.res} during get #{uri.inspect}; interrupting request."
|
221
|
+
stop_download = true
|
222
|
+
end
|
223
|
+
}
|
224
|
+
}
|
225
|
+
0
|
226
|
+
}
|
227
|
+
s.raise_err = false
|
228
|
+
s.loadGet validate uri
|
229
|
+
ensure
|
230
|
+
@ss.concat @ss_reserve || []
|
231
|
+
end
|
232
|
+
|
233
|
+
def dl(uri, df=File.basename(uri.parse(:uri).path), psize=:auto, opts={})
|
234
|
+
dled = 0
|
235
|
+
lock = ''
|
236
|
+
callback = lambda {|len, pos, body|
|
237
|
+
if body != :careful_dl
|
238
|
+
begin
|
239
|
+
write(df, body, pos)
|
240
|
+
rescue => e
|
241
|
+
binding.start_interaction
|
242
|
+
raise
|
243
|
+
end
|
244
|
+
if (dled += body.size) == len
|
245
|
+
File.delete lock if File.file? lock
|
246
|
+
yield df if block_given?
|
247
|
+
end
|
248
|
+
else
|
249
|
+
lock = lock_file df, len, pos # filename, filesize, partsize
|
250
|
+
end
|
251
|
+
}
|
252
|
+
opts[:threads] ||= @ss.size-1
|
253
|
+
get_distr(uri, psize, opts[:threads], opts[:start].to_i, &callback)
|
254
|
+
Curl.wait unless block_given?
|
255
|
+
df
|
256
|
+
end
|
257
|
+
|
258
|
+
def simple_dl(uri, df=File.basename(uri.parse(:uri).path), opts={})
|
259
|
+
opts.reverse_merge! :psize => :auto, :threads => 1, :print_progress => $verbose
|
260
|
+
L << opts
|
261
|
+
|
262
|
+
@print_progress = opts[:print_progress]
|
263
|
+
unless len = opts[:len] || (map = read_mapfile(df) and map.len)
|
264
|
+
return @ss.next.loadHead(uri) {|c| $log << c
|
265
|
+
if len = c.res['Content-Length']
|
266
|
+
simple_dl(uri, df, opts.merge(:len => len.to_i))
|
267
|
+
else L.warn "Can't get file size, so it has no sence to download this way. Or maybe it's just an error. Check ObjectSpace.find(#{c.res.object_id}) out."
|
268
|
+
end
|
269
|
+
}
|
270
|
+
end
|
271
|
+
|
272
|
+
psize, parts = check_mapfile(df, opts)
|
273
|
+
return unless psize
|
274
|
+
L << [psize, parts]
|
275
|
+
setup_speedometer(uri, parts.size, len)
|
276
|
+
|
277
|
+
obtained uri do |uri|
|
278
|
+
if opts[:threads] == 1
|
279
|
+
start = opts[:start].to_i || (parts[0] && parts[0].begin) || 0
|
280
|
+
scout = opts[:scout] || @ss.next
|
281
|
+
$log << [uri, scout]
|
282
|
+
(loadget = lambda {|n|
|
283
|
+
run_speedometer(scout, len, n)
|
284
|
+
from = start + n*psize
|
285
|
+
to = start + (n+1)*psize - 1
|
286
|
+
scout.loadGet(uri, :headers => {'Range' => "bytes=#{from}-#{to}"}) {|c|
|
287
|
+
begin
|
288
|
+
$log << "writing #{df} from #{from}: #{c.res.body.inspect}"
|
289
|
+
write(df, c.res.body, from)
|
290
|
+
rescue => e
|
291
|
+
binding.start_interaction
|
292
|
+
raise
|
293
|
+
end
|
294
|
+
if write_mapfile(df, from, to)
|
295
|
+
clear_speedometer(scout)
|
296
|
+
L.warn "file completely dl'ed, but (n+1)*psize <= len: (#{n}+1)*#{psize} <= #{len}" if (n+1)*psize <= len
|
297
|
+
yield df if block_given?
|
298
|
+
elsif (n+1)*psize <= len
|
299
|
+
loadget[n+1]
|
300
|
+
end
|
301
|
+
}
|
302
|
+
})[0]
|
303
|
+
else
|
304
|
+
exec(uri, opts.merge(:raw => true, :ranges => parts)) {|c|
|
305
|
+
L << c.res
|
306
|
+
range = c.req.range
|
307
|
+
begin
|
308
|
+
write(df, c.res.body, range.begin)
|
309
|
+
rescue => e
|
310
|
+
binding.start_interaction
|
311
|
+
raise
|
312
|
+
end
|
313
|
+
if write_mapfile(df, range.begin, range.end)
|
314
|
+
@ss.each {|s| s.http.on_progress} if @print_progress
|
315
|
+
yield df if block_given?
|
316
|
+
end
|
317
|
+
}
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def check_mapfile(df, opts={})
|
323
|
+
opts.reverse_merge! :psize => :auto, :threads => 1
|
324
|
+
map = read_mapfile df
|
325
|
+
if map
|
326
|
+
L << map
|
327
|
+
if map.rest.empty?
|
328
|
+
puts "#{df} is loaded"
|
329
|
+
$log << 'deleting mapfile'
|
330
|
+
File.delete df+'.map'
|
331
|
+
[]
|
332
|
+
else
|
333
|
+
if opts[:len] and map.len != opts[:len]
|
334
|
+
raise "Incorrect file size for #{df}"
|
335
|
+
end
|
336
|
+
psize = configure_psize *opts.values_at(:len, :psize, :threads)
|
337
|
+
[psize, map.rest.div(psize)]
|
338
|
+
end
|
339
|
+
else
|
340
|
+
write_mapfile df, opts[:len]
|
341
|
+
psize = configure_psize *opts.values_at(:len, :psize, :threads)
|
342
|
+
$log << (0...opts[:len]).div(psize)
|
343
|
+
[psize, (0...opts[:len]).div(psize)]
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def read_mapfile(df)
|
348
|
+
df += '.map'
|
349
|
+
text = read df
|
350
|
+
$log << "mapfile read: #{text}"
|
351
|
+
if text.b
|
352
|
+
text[/^(\d+)\0+(\d+)\0*\n/]
|
353
|
+
map = {}
|
354
|
+
$log << [$1,$2]
|
355
|
+
if $1 and $1 == $2
|
356
|
+
map.rest = []
|
357
|
+
else
|
358
|
+
map.len, *map.parts = text.chop/"\n"
|
359
|
+
map.len = map.len.to_i
|
360
|
+
map.parts.map! {|part| part /= '-'; part[0].to_i..part[1].to_i}
|
361
|
+
$log << map.parts
|
362
|
+
map.rest = (0...map.len) - XRange(*map.parts)
|
363
|
+
end
|
364
|
+
map
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
def write_mapfile(df, *args)
|
369
|
+
df += '.map'
|
370
|
+
map = ''
|
371
|
+
if args.size != 2
|
372
|
+
len = args.shift
|
373
|
+
map << len.to_s.ljust(22, "\0") << "\n" if File.file? df
|
374
|
+
end
|
375
|
+
if args.any?
|
376
|
+
read(df)[/^(\d+)\0+(\d+)\0*\n/]
|
377
|
+
$log << "mapfile read"
|
378
|
+
$log << [$1,$2]
|
379
|
+
dled = $2.to_i + args[1] - args[0] + 1
|
380
|
+
return true if dled == $1.to_i
|
381
|
+
map << "#{args[0]}..#{args[1]}\n"
|
382
|
+
$log << 'writing mapfile'
|
383
|
+
write(df, dled.to_s.ljust(11, "\0"), 11)
|
384
|
+
end
|
385
|
+
$log << [df, map]
|
386
|
+
$log << 'writing mapfile'
|
387
|
+
write df, map
|
388
|
+
nil
|
389
|
+
end
|
390
|
+
|
391
|
+
def configure_psize(len, psize, threads)
|
392
|
+
case psize
|
393
|
+
when Numeric; psize.to_i
|
394
|
+
when :auto; len > 100000 ? len/threads+1 : len
|
395
|
+
when :mb; 1.mb
|
396
|
+
else raise ArgumentError, "Incorrect value for part size #{psize}:#{psize.class}"
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
private
|
401
|
+
def validate_zip(uri, body)
|
402
|
+
if !(uri.kinda Array and body.kinda Array)
|
403
|
+
raise ZippingError, [uri.class, nil, body.class, nil]
|
404
|
+
elsif uri.size != body.size
|
405
|
+
raise ZippingError, [uri.class, uri.size, body.class, body.size]
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
def validate(uri)
|
410
|
+
if uri
|
411
|
+
loc = uri.parse:uri
|
412
|
+
if loc.root and loc.root != @loc.root
|
413
|
+
raise TargetError, "failed to get #{uri} by static frame #{@loc.host}, you should first update it with new target" if @static
|
414
|
+
@loc.root = loc.root
|
415
|
+
uri
|
416
|
+
elsif !loc.root
|
417
|
+
raise TargetError if !@static
|
418
|
+
File.join @loc.root, uri
|
419
|
+
else uri
|
420
|
+
end
|
421
|
+
else
|
422
|
+
raise TargetError if !@static
|
423
|
+
@loc.href
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
def validate_some(uris)
|
428
|
+
uris.map! {|u| validate u}
|
429
|
+
end
|
430
|
+
|
431
|
+
def exec_one(order, opts)
|
432
|
+
# must result in Page (default) or it's subclass
|
433
|
+
page = opts[:result].new
|
434
|
+
# if no spare scouts can be found, squad simply waits for all callbacks to complete
|
435
|
+
s = @ss.next
|
436
|
+
#s.raise_err = true# Зачем это тут? Можно добавлять :raise=>1 фрейму при запиле
|
437
|
+
s.send(*(order << opts)) {|curl|
|
438
|
+
if opts[:raw]
|
439
|
+
yield curl
|
440
|
+
elsif page.process(curl, opts) and block_given?
|
441
|
+
yres = yield page
|
442
|
+
if opts[:save_result] or :proc_result.in opts
|
443
|
+
page.res = yres
|
444
|
+
end
|
445
|
+
if opts[:proc_result].is Proc and yres != :skip
|
446
|
+
opts[:proc_result].call yres
|
447
|
+
end
|
448
|
+
end
|
449
|
+
}
|
450
|
+
if opts[:wait]
|
451
|
+
opts[:thread_safe] ? $Carier.perform : Curl.wait
|
452
|
+
# почему бы не уменьшить бойлерплейт в сервисах и не возвращать res сразу?
|
453
|
+
(opts[:save_result] or :proc_result.in opts) ? page.res : page
|
454
|
+
else page
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def exec_many(orders, with_opts, &callback)
|
459
|
+
w = with_opts.delete :wait
|
460
|
+
iterator = with_opts[:stream] ? :each : :map
|
461
|
+
if with_opts[:ranges]
|
462
|
+
if orders.size != with_opts[:ranges].size
|
463
|
+
raise ZippingError, [orders.size, with_opts[:ranges].size], "orders quantity (%s) is not equal ranges quantity (%s)"
|
464
|
+
end
|
465
|
+
pages = orders.zip(with_opts[:ranges]).send(iterator) {|order, range|
|
466
|
+
(with_opts[:headers] ||= {}).Range = "bytes=#{range.begin}-#{range.end}"
|
467
|
+
exec_one order, with_opts, &callback
|
468
|
+
}
|
469
|
+
else
|
470
|
+
pages = orders.send(iterator) {|order| exec_one order, with_opts, &callback }
|
471
|
+
end
|
472
|
+
with_opts[:thread_safe] ? $Carier.perform : Curl.wait if w
|
473
|
+
with_opts[:stream] || pages
|
474
|
+
end
|
475
|
+
|
476
|
+
|
477
|
+
def setup_speedometer(uri, parts, len)
|
478
|
+
return unless @print_progress
|
479
|
+
@progress = Array.new(parts, 0)
|
480
|
+
@stop_print, @speed, @sum, *@speedometer = false, '', 0, Time.now, 0
|
481
|
+
@str = "Downloading #{uri.gsub '%', '%%'} (#{len.bytes}) in %03s streams, %07s/s:"
|
482
|
+
@bs = "\b\r"*(@newlines = (uri.unpack('U*').size+len.bytes.size+42)/(ENV['COLUMNS'] || 80).to_i)
|
483
|
+
Thread.new {
|
484
|
+
until @stop_print
|
485
|
+
sleep 0.2
|
486
|
+
now = Time.now
|
487
|
+
if now > @speedometer[0] and @sum > @speedometer[1]
|
488
|
+
@speed.replace(((@sum - @speedometer[1])/(now - @speedometer[0])).to_i.bytes)
|
489
|
+
@speedometer.replace [now, @sum]
|
490
|
+
end
|
491
|
+
end
|
492
|
+
}
|
493
|
+
end
|
494
|
+
|
495
|
+
def run_speedometer(scout, len, n)
|
496
|
+
return unless @print_progress
|
497
|
+
scout.http.on_progress {|dl_need, dl_now, *ul|
|
498
|
+
if !@stop_print
|
499
|
+
@progress[n] = dl_now
|
500
|
+
percents = (@sum = @progress.sum)*100/len
|
501
|
+
print @str%[@progress.select_b.size, @speed]+"\n%%[#{'@'*percents}#{' '*(100-percents)}]\r\b\r"+@bs
|
502
|
+
if percents == 100
|
503
|
+
puts "\v"*@newlines
|
504
|
+
@stop_print = true
|
505
|
+
end
|
506
|
+
end
|
507
|
+
true
|
508
|
+
}
|
509
|
+
end
|
510
|
+
|
511
|
+
def clear_speedometer(scout)
|
512
|
+
return unless @print_progress
|
513
|
+
scout.http.on_progress
|
514
|
+
end
|
515
|
+
|
516
|
+
end
|
517
|
+
|
518
|
+
def dl(uri, df=File.basename(uri.parse(:uri).path), threads=5, timeout=600, &block)
|
519
|
+
Curl.run
|
520
|
+
Frame({:timeout=>timeout}, threads).dl(uri, df, :auto, threads, &block)
|
521
|
+
end
|
522
|
+
module_function :dl
|
523
|
+
|
524
|
+
|
525
|
+
|
526
|
+
class Page
|
527
|
+
# for debug, just enable L#debug, don't write tons of chaotic log-lines
|
528
|
+
__init__
|
529
|
+
# res here is result of page processing made in frame context
|
530
|
+
attr_accessor :title, :res
|
531
|
+
attr_reader :html, :loc, :hash, :doc, :js
|
532
|
+
@@ignore = /google|_gat|tracker|adver/i
|
533
|
+
|
534
|
+
def initialize(obj='', loc=Hash.new(''), js=$JSRuntime||Johnson::Runtime.new)
|
535
|
+
loc = loc.parse:uri if !loc.is Hash
|
536
|
+
@js = js
|
537
|
+
if obj.is Curl::Easy or obj.kinda Scout
|
538
|
+
c = obj.kinda(Scout) ? obj.http : html
|
539
|
+
@html = ''
|
540
|
+
# just (c, loc) would pass to #process opts variable that returns '' on any key
|
541
|
+
process(c, loc.b || {})
|
542
|
+
else
|
543
|
+
@html = obj
|
544
|
+
@loc = loc
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
def inspect
|
549
|
+
if !@hash.nil?
|
550
|
+
"<#FramePage (#{@hash ? @hash.inspect.size.bytes : 'failed to parse'}) #{@json ? 'json' : 'params hash'}>"
|
551
|
+
else
|
552
|
+
"<#FramePage #{@html.b ? "«#{@title}» (#{@html.size.bytes}" : '(empty'})#{' js enabled' if @js and @doc and @hash.nil?}>"
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
# We can then alternate #process in Page subclasses
|
557
|
+
# Frame doesn't mind about value returned by #process
|
558
|
+
def process(c, opts={})
|
559
|
+
@loc = c.last_effective_url.parse:uri
|
560
|
+
L.debug "#{@loc.fullpath} -> #{c.res}"
|
561
|
+
if c.res.code == 200
|
562
|
+
body = c.res.body
|
563
|
+
if opts[:json]
|
564
|
+
@json = true
|
565
|
+
@hash = begin; body.from_json
|
566
|
+
rescue StandardError
|
567
|
+
false
|
568
|
+
end
|
569
|
+
if !@hash or @hash.is String
|
570
|
+
L.debug "failed to get json from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
|
571
|
+
@html = body; to_doc
|
572
|
+
@hash = false
|
573
|
+
end
|
574
|
+
|
575
|
+
elsif opts[:hash]
|
576
|
+
if body.inline
|
577
|
+
@hash = body.to_hash
|
578
|
+
else
|
579
|
+
@hash = false
|
580
|
+
L.debug "failed to get params hash from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
|
581
|
+
@html = body; to_doc
|
582
|
+
end
|
583
|
+
|
584
|
+
else
|
585
|
+
@html = body; to_doc
|
586
|
+
if opts[:eval]
|
587
|
+
load_scripts opts[:load_scripts]
|
588
|
+
eval_js
|
589
|
+
end
|
590
|
+
end
|
591
|
+
end
|
592
|
+
self
|
593
|
+
end
|
594
|
+
|
595
|
+
def eval_js(frame=nil)
|
596
|
+
eval_string "document.location = window.location = #{@loc.to_json};
|
597
|
+
document.URL = document.baseURI = document.documentURI = location.href;
|
598
|
+
document.domain = location.host;"
|
599
|
+
find("script").each {|n|
|
600
|
+
L.debug n.text.strip
|
601
|
+
if text = n.text.strip.b
|
602
|
+
js[:write_output] = ''
|
603
|
+
eval_string text
|
604
|
+
if res = js[:write_output].b then n.after res end
|
605
|
+
n.remove!
|
606
|
+
elsif frame and n.src
|
607
|
+
eval_string frame.get_cached expand_link n.src
|
608
|
+
end
|
609
|
+
}
|
610
|
+
end
|
611
|
+
|
612
|
+
def eval_string(str)
|
613
|
+
@js ||= Johnson::Runtime.new
|
614
|
+
L.debug "#{@js} evaluating in #{Thread.current}\nmain: #{Thread.main}; carier: #{$CarierThread}"
|
615
|
+
begin
|
616
|
+
@js.evaluate(str)
|
617
|
+
rescue Johnson::Error => e
|
618
|
+
L.warn e.message
|
619
|
+
L.debug {
|
620
|
+
if m = e.message.match(/(\w+) is undefined|([\w.]+) is not a function/)
|
621
|
+
L.clr.hl! str, /\b#{m[1] || m[2]}\b/
|
622
|
+
end
|
623
|
+
"\n\t#{str}"
|
624
|
+
}
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def to_doc
|
629
|
+
@doc = @html.to_doc :forceutf
|
630
|
+
if !(@title = @doc.title.b)
|
631
|
+
@title = @loc.href
|
632
|
+
@doc.at('head').prepend XML::Node('title', @title) if @doc.at('head')
|
633
|
+
else
|
634
|
+
if @title.cyr? and UTF2ANSI[@title].size > 40
|
635
|
+
@title = ANSI2UTF[UTF2ANSI[@title][/.{1,30}\S*/]]+'…'
|
636
|
+
elsif @title.size > 40
|
637
|
+
@title = @title[/.{1,30}\S*/]+'…'
|
638
|
+
end
|
639
|
+
end
|
640
|
+
@doc
|
641
|
+
end
|
642
|
+
|
643
|
+
def find(xp) (@doc || to_doc).find xp end
|
644
|
+
|
645
|
+
def at(xp) (@doc || to_doc).at xp end
|
646
|
+
|
647
|
+
def url() @loc.href end
|
648
|
+
alias :href :url
|
649
|
+
|
650
|
+
def get_srcs(links='img')
|
651
|
+
begin
|
652
|
+
links = find(links).map {|e| e.src} if links.is String
|
653
|
+
rescue XML::Error
|
654
|
+
links = [links]
|
655
|
+
end
|
656
|
+
links.map {|link| expand_link link}.uniq
|
657
|
+
end
|
658
|
+
|
659
|
+
def get_src(link='img')
|
660
|
+
begin
|
661
|
+
link = at(link) && at(link).src if link.is String
|
662
|
+
rescue XML::Error; nil
|
663
|
+
end
|
664
|
+
expand_link link if link
|
665
|
+
end
|
666
|
+
|
667
|
+
def get_links(links='a')
|
668
|
+
begin
|
669
|
+
links = find(links).map {|e| e.href}.b || find(links+'//a').map {|e| e.href} if links.is String
|
670
|
+
rescue XML::Error
|
671
|
+
links = [links]
|
672
|
+
end
|
673
|
+
links.map {|link| expand_link link}.uniq
|
674
|
+
end
|
675
|
+
|
676
|
+
def get_link(link='a')
|
677
|
+
begin
|
678
|
+
link = at(link) && (at(link).href || at(link+'//a').href) if link.is String
|
679
|
+
rescue XML::Error; nil
|
680
|
+
end
|
681
|
+
expand_link link if link
|
682
|
+
end
|
683
|
+
alias :get_hrefs :get_links
|
684
|
+
alias :links :get_links
|
685
|
+
alias :get_href :get_link
|
686
|
+
alias :link :get_link
|
687
|
+
|
688
|
+
def expand_link(link)
|
689
|
+
case link
|
690
|
+
when /^\w+:\/\// then link
|
691
|
+
when /^\/\// then @loc.protocol+link
|
692
|
+
when /^\// then @loc.root+link
|
693
|
+
else File.join((@loc.path.b ? File.dirname(@loc.path) : @loc.root), link)
|
694
|
+
end
|
695
|
+
end
|
696
|
+
|
697
|
+
def form(form='form', hash={}, opts={})
|
698
|
+
form = "[action=#{@loc.path.inspect}]" if form == :self
|
699
|
+
if form.is String
|
700
|
+
form_node = at form
|
701
|
+
raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node
|
702
|
+
else form_node = form
|
703
|
+
end
|
704
|
+
hash = form_node.inputs_all.merge!(hash)
|
705
|
+
action = expand_link(form_node.action || @loc.path)
|
706
|
+
if form_node['method'].downcase == 'post'
|
707
|
+
[hash, form_node.enctype =~ /multipart/, action, opts]
|
708
|
+
else
|
709
|
+
action = "#{action}#{action['?'] ? '&' : '?'}#{hash.urlencode}" if hash.b
|
710
|
+
[action, opts]
|
711
|
+
end
|
712
|
+
end
|
713
|
+
|
714
|
+
def submit(form, frame, hash={}, opts={}, &callback)
|
715
|
+
(opts[:header] ||= {}).Referer ||= @loc.href if @loc
|
716
|
+
query = form(form, hash, opts)
|
717
|
+
|
718
|
+
curr_target, new_target = frame.loc.href, (query[2] || query[0])
|
719
|
+
if need_retargeting = (frame.static && curr_target != new_target)
|
720
|
+
frame.retarget new_target
|
721
|
+
end
|
722
|
+
page = frame.exec(*query, &callback)
|
723
|
+
frame.retarget curr_target, :forced if need_retargeting
|
724
|
+
page
|
725
|
+
end
|
726
|
+
|
727
|
+
def load_scripts(frame)
|
728
|
+
frame && frame.get_cached(*get_srcs("script[src]")).each {|js| eval_string js}
|
729
|
+
end
|
730
|
+
|
731
|
+
end
|
732
|
+
|
733
|
+
# using reprocessing of page in case of non-200 response:
|
734
|
+
# page_class = ReloadablePage do
|
735
|
+
# @res and @res.code != 200
|
736
|
+
# end
|
737
|
+
def ReloadablePage(&reload_condition)
|
738
|
+
rp = Class.new Page
|
739
|
+
rp.send :define_method, :process do |curl, opts|
|
740
|
+
super(curl, opts || {})
|
741
|
+
if curl.instance_eval &reload_condition
|
742
|
+
curl.retry!
|
743
|
+
nil # in case of reload_condition.call super's callback will not proceed
|
744
|
+
else self
|
745
|
+
end
|
746
|
+
end
|
747
|
+
rp
|
748
|
+
end
|
749
|
+
|
750
|
+
end
|
751
|
+
|
752
|
+
|
753
|
+
|
754
|
+
|
755
|
+
|
756
|
+
|
757
|
+
|
758
|
+
|
759
|
+
|
760
|
+
|
761
|
+
|
762
|
+
|
763
|
+
|
764
|
+
|
765
|
+
|
766
|
+
|