rhack 0.4.1 → 1.0.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Johnson
|
3
|
+
begin
|
4
|
+
require 'johnson'
|
5
|
+
rescue LoadError
|
6
|
+
Enabled = false
|
7
|
+
else
|
8
|
+
if VERSION <= "2.0.0" and RUBY_VERSION > "1.9"
|
9
|
+
Enabled = false
|
10
|
+
else Enabled = true
|
11
|
+
end
|
12
|
+
end
|
13
|
+
### JavaScript interface DOM emulation ###
|
14
|
+
|
15
|
+
class Runtime
|
16
|
+
cattr_accessor :browser
|
17
|
+
attr_accessor :thread_id
|
18
|
+
BROWSER_PATH = File.expand_path "../browser", __FILE__
|
19
|
+
|
20
|
+
class << self
|
21
|
+
|
22
|
+
def runtime_set?(opts)
|
23
|
+
!opts[:eval].b or (@@browser and @@browser.thread_id == Curl.carier_thread.object_id)
|
24
|
+
end
|
25
|
+
|
26
|
+
# CarierThread breaks if Multi has no work && CarierThread
|
27
|
+
# is joined so itwon't last forever.
|
28
|
+
#
|
29
|
+
# Johnson is not thread safe =>
|
30
|
+
# Runtime created in this thread will become unusable after
|
31
|
+
# CarierThread dies.
|
32
|
+
#
|
33
|
+
# So we don't use Curl.wait until Carier haven't got whole
|
34
|
+
# request for this Runtime.
|
35
|
+
def set_browser_for_curl(opts)
|
36
|
+
unless runtime_set? opts
|
37
|
+
if Curl.status
|
38
|
+
Curl.recall
|
39
|
+
Curl.debug 'recalled'
|
40
|
+
end
|
41
|
+
if opts[:thread_safe].b
|
42
|
+
@@browser = new_browser(opts[:jq])
|
43
|
+
L.debug "#@@browser initialized in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"
|
44
|
+
else
|
45
|
+
L.debug 'about to run carier'
|
46
|
+
Curl.execute {@@browser = new_browser(opts[:jq])
|
47
|
+
L.debug "#@@browser initialized in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"}
|
48
|
+
sleep 0.01 until runtime_set? opts
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_browser(jq=false)
|
54
|
+
rt = new
|
55
|
+
%w{xmlw3cdom_1 xmlw3cdom_2 xmlsax env}.concat(jq ? ['jquery'] : []).each {|f|
|
56
|
+
path = "#{BROWSER_PATH}/#{f}.js"
|
57
|
+
rt.evaluate IO.read(path), path, 1
|
58
|
+
}
|
59
|
+
rt.document = ''
|
60
|
+
rt
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
def document=(html)
|
66
|
+
evaluate "var document = new DOMDocument(#{html.to_doc.to_xhtml.inspect})"
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
data/lib/rhack/page.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
# Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
|
5
|
+
# Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
|
6
|
+
|
7
|
+
class Page
|
8
|
+
# for debug, just enable L#debug, don't write tons of chaotic log-lines
|
9
|
+
__init__
|
10
|
+
attr_writer :title
|
11
|
+
attr_reader :html, :loc, :hash, :doc, :js, :curl_res, :failed
|
12
|
+
# result of page processing been made in frame context
|
13
|
+
attr_accessor :res
|
14
|
+
# for johnson
|
15
|
+
@@ignore = /google|_gat|tracker|adver/i
|
16
|
+
|
17
|
+
def initialize(obj='', loc=Hash.new(''), js=Johnson::Runtime.browser||Johnson::Runtime.new)
|
18
|
+
loc = loc.parse:uri if !loc.is Hash
|
19
|
+
@js = js
|
20
|
+
if obj.is Curl::Easy or obj.kinda Scout
|
21
|
+
c = obj.kinda(Scout) ? obj.http : obj
|
22
|
+
@html = ''
|
23
|
+
# just (c, loc) would pass to #process opts variable that returns '' on any key
|
24
|
+
process(c, loc.b || {})
|
25
|
+
else
|
26
|
+
@html = obj
|
27
|
+
@loc = loc
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def empty?
|
32
|
+
!(@hash.nil? ? @html : @hash).b
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
if !@hash.nil?
|
37
|
+
"<#FramePage (#{@hash ? @hash.inspect.size.bytes : 'failed to parse'}) #{@json ? 'json' : 'params hash'}>"
|
38
|
+
else
|
39
|
+
"<#FramePage #{@html.b ? "#{@failed ? @curl_res.header : '«'+title(false)+'»'} (#{@html.size.bytes}" : '(empty'})#{' js enabled' if @js and @doc and @hash.nil?}>"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def html!(encoding='UTF-8')
|
44
|
+
@html.force_encoding(encoding)
|
45
|
+
end
|
46
|
+
|
47
|
+
# We can then alternate #process in Page subclasses
|
48
|
+
# Frame doesn't mind about value returned by #process
|
49
|
+
def process(c, opts={})
|
50
|
+
@loc = c.last_effective_url.parse:uri
|
51
|
+
@curl_res = c.res
|
52
|
+
L.debug "#{@loc.fullpath} -> #{@curl_res}"
|
53
|
+
if @curl_res.code == 200
|
54
|
+
body = @curl_res.body
|
55
|
+
if opts[:json]
|
56
|
+
@json = true
|
57
|
+
@hash = begin; body.from_json
|
58
|
+
rescue StandardError
|
59
|
+
false
|
60
|
+
end
|
61
|
+
if !@hash or @hash.is String
|
62
|
+
L.debug "failed to get json from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
|
63
|
+
@html = body; to_doc
|
64
|
+
@hash = false
|
65
|
+
end
|
66
|
+
|
67
|
+
elsif opts[:hash]
|
68
|
+
if body.inline
|
69
|
+
@hash = body.to_params
|
70
|
+
else
|
71
|
+
@hash = false
|
72
|
+
L.debug "failed to get params hash from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
|
73
|
+
@html = body; to_doc
|
74
|
+
end
|
75
|
+
|
76
|
+
else
|
77
|
+
@html = body.xml_to_utf
|
78
|
+
to_doc
|
79
|
+
if opts[:eval]
|
80
|
+
load_scripts opts[:load_scripts]
|
81
|
+
eval_js
|
82
|
+
end
|
83
|
+
end
|
84
|
+
elsif !(opts[:json] or opts[:hash])
|
85
|
+
@html = @curl_res.body
|
86
|
+
@failed = @curl_res.code
|
87
|
+
end
|
88
|
+
self
|
89
|
+
end
|
90
|
+
|
91
|
+
def eval_js(frame=nil)
|
92
|
+
eval_string "document.location = window.location = #{@loc.to_json};
|
93
|
+
document.URL = document.baseURI = document.documentURI = location.href;
|
94
|
+
document.domain = location.host;"
|
95
|
+
find("script").each {|n|
|
96
|
+
L.debug n.text.strip
|
97
|
+
if text = n.text.strip.b
|
98
|
+
js[:write_output] = ''
|
99
|
+
eval_string text
|
100
|
+
if res = js[:write_output].b then n.after res end
|
101
|
+
n.remove!
|
102
|
+
elsif frame and n.src
|
103
|
+
eval_string frame.get_cached expand_link n.src
|
104
|
+
end
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
def eval_string(str)
|
109
|
+
@js ||= Johnson::Runtime.new
|
110
|
+
L.debug "#{@js} evaluating in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"
|
111
|
+
begin
|
112
|
+
@js.evaluate(str)
|
113
|
+
rescue Johnson::Error => e
|
114
|
+
L.warn e.message
|
115
|
+
L.debug {
|
116
|
+
if m = e.message.match(/(\w+) is undefined|([\w.]+) is not a function/)
|
117
|
+
L.clr.hl! str, /\b#{m[1] || m[2]}\b/
|
118
|
+
end
|
119
|
+
"\n\t#{str}"
|
120
|
+
}
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_doc
|
125
|
+
@doc = @html.to_doc :forceutf
|
126
|
+
end
|
127
|
+
|
128
|
+
def title(full=true)
|
129
|
+
if @hash.nil? and !@failed and @html.b
|
130
|
+
if full
|
131
|
+
to_doc unless defined? @doc
|
132
|
+
if @doc.title.b
|
133
|
+
@title = @doc.title
|
134
|
+
else
|
135
|
+
@title = @loc.href
|
136
|
+
@doc.at('head').prepend XML::Node('title', @title) if @doc.at('head')
|
137
|
+
@title
|
138
|
+
end
|
139
|
+
else
|
140
|
+
title true unless defined? @title
|
141
|
+
if RUBY_VERSION < '1.9' and @title.cyr? and UTF2ANSI[@title].size > 40
|
142
|
+
@short_title = ANSI2UTF[UTF2ANSI[@title][/.{1,30}\S*/][0..38]]+'…'
|
143
|
+
elsif @title.size > 40
|
144
|
+
@short_title = @title[/.{1,30}\S*/][0..38]+'…'
|
145
|
+
else
|
146
|
+
@short_title = @title
|
147
|
+
end
|
148
|
+
end
|
149
|
+
else
|
150
|
+
@loc.href
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def find(xp) (@doc || to_doc).find xp end
|
155
|
+
|
156
|
+
def at(xp) (@doc || to_doc).at xp end
|
157
|
+
|
158
|
+
def url() @loc.href end
|
159
|
+
alias :href :url
|
160
|
+
|
161
|
+
def get_srcs(links='img')
|
162
|
+
begin
|
163
|
+
links = find(links).map {|e| e.src} if links.is String
|
164
|
+
rescue XML::Error
|
165
|
+
links = [links]
|
166
|
+
end
|
167
|
+
links.map {|link| expand_link link}.uniq
|
168
|
+
end
|
169
|
+
|
170
|
+
def get_src(link='img')
|
171
|
+
begin
|
172
|
+
link = at(link) && at(link).src if link.is String
|
173
|
+
rescue XML::Error; nil
|
174
|
+
end
|
175
|
+
expand_link link if link
|
176
|
+
end
|
177
|
+
|
178
|
+
def get_links(links='a')
|
179
|
+
begin
|
180
|
+
links = find(links).map {|e| e.href}.b || find(links+'//a').map {|e| e.href} if links.is String
|
181
|
+
rescue XML::Error
|
182
|
+
links = [links]
|
183
|
+
end
|
184
|
+
links.map {|link| expand_link link}.uniq
|
185
|
+
end
|
186
|
+
|
187
|
+
def get_link(link='a')
|
188
|
+
begin
|
189
|
+
link = at(link) && (at(link).href || at(link+'//a').href) if link.is String
|
190
|
+
rescue XML::Error; nil
|
191
|
+
end
|
192
|
+
expand_link link if link
|
193
|
+
end
|
194
|
+
alias :get_hrefs :get_links
|
195
|
+
alias :links :get_links
|
196
|
+
alias :get_href :get_link
|
197
|
+
alias :link :get_link
|
198
|
+
alias :srcs :get_srcs
|
199
|
+
alias :src :get_src
|
200
|
+
|
201
|
+
def expand_link(link)
|
202
|
+
case link
|
203
|
+
when /^\w+:\/\// then link
|
204
|
+
when /^\/\// then @loc.protocol+link
|
205
|
+
when /^\// then @loc.root+link
|
206
|
+
else File.join((@loc.path.b ? File.dirname(@loc.path) : @loc.root), link)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def form(form='form', hash={}, opts={})
|
211
|
+
form = "[action=#{@loc.path.inspect}]" if form == :self
|
212
|
+
if form.is String
|
213
|
+
form_node = at form
|
214
|
+
raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node or form_node.name != 'form'
|
215
|
+
else form_node = form
|
216
|
+
end
|
217
|
+
hash = form_node.inputs_all.merge!(hash)
|
218
|
+
action = expand_link(form_node.action || @loc.path)
|
219
|
+
if form_node['method'].downcase == 'post'
|
220
|
+
[hash, form_node.enctype =~ /multipart/, action, opts]
|
221
|
+
else
|
222
|
+
action = "#{action}#{action['?'] ? '&' : '?'}#{hash.urlencode}" if hash.b
|
223
|
+
[action, opts]
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def submit(form, frame, hash={}, opts={}, &callback)
|
228
|
+
(opts[:headers] ||= {}).Referer ||= @loc.href if @loc
|
229
|
+
query = form(form, hash, opts)
|
230
|
+
|
231
|
+
curr_target, new_target = frame.loc.href, (query[2] || query[0])
|
232
|
+
if need_retargeting = (frame.static && curr_target != new_target)
|
233
|
+
frame.retarget new_target
|
234
|
+
end
|
235
|
+
page = frame.exec(*query, &callback)
|
236
|
+
frame.retarget curr_target, :forced if need_retargeting
|
237
|
+
page
|
238
|
+
end
|
239
|
+
|
240
|
+
def load_scripts(frame)
|
241
|
+
frame && frame.get_cached(*get_srcs("script[src]")).each {|js| eval_string js}
|
242
|
+
end
|
243
|
+
|
244
|
+
end
|
245
|
+
|
246
|
+
# using reprocessing of page in case of non-200 response:
|
247
|
+
# page_class = ReloadablePage do
|
248
|
+
# @res and @res.code != 200
|
249
|
+
# end
|
250
|
+
def ReloadablePage(&reload_condition)
|
251
|
+
rp = Class.new Page
|
252
|
+
rp.send :define_method, :process do |curl, opts|
|
253
|
+
super(curl, opts || {})
|
254
|
+
if curl.instance_eval &reload_condition
|
255
|
+
curl.retry!
|
256
|
+
nil # in case of reload_condition.call super's callback will not proceed
|
257
|
+
else self
|
258
|
+
end
|
259
|
+
end
|
260
|
+
rp
|
261
|
+
end
|
262
|
+
|
263
|
+
end
|
data/lib/rhack/proxy.rb
ADDED
data/lib/rhack/proxy/checker.rb
CHANGED
@@ -67,7 +67,7 @@ module RHACK
|
|
67
67
|
}
|
68
68
|
target = target.find_is(String) if !target.is String
|
69
69
|
pl.each {|pr|
|
70
|
-
sc = Interceptor.new(target, pr,
|
70
|
+
sc = Interceptor.new(target, pr, RHACK.useragents.rand, @opts)
|
71
71
|
sc.http.on_failure(&fail_proc)
|
72
72
|
@ics << sc
|
73
73
|
}
|
data/lib/rhack/scout.rb
ADDED
@@ -0,0 +1,342 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
class Scout
|
5
|
+
__init__
|
6
|
+
attr_accessor :timeout, :raise_err, :retry
|
7
|
+
attr_accessor :path, :root, :sld, :proxy
|
8
|
+
attr_reader :uri
|
9
|
+
attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
|
10
|
+
attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
|
11
|
+
|
12
|
+
DefaultHeader = {
|
13
|
+
"Expect" => "",
|
14
|
+
"Keep-Alive" => "300",
|
15
|
+
"Accept-Charset" => "windows-1251,utf-8;q=0.7,*;q=0.7",
|
16
|
+
"Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
|
17
|
+
"Connection" => "keep-alive"
|
18
|
+
}
|
19
|
+
|
20
|
+
class ProxyError < ArgumentError
|
21
|
+
def initialize proxy
|
22
|
+
super "incorrect proxy: %s class %s, must be an Array
|
23
|
+
proxy format: ['127.0.0.1', '80'], [2130706433, 80], ['someproxy.com', :WebproxyModule]"%[proxy.inspect, proxy.class]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(*argv)
|
28
|
+
uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
|
29
|
+
raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
|
30
|
+
'http://' >> uri if uri !~ /^\w+:\/\//
|
31
|
+
if proxy
|
32
|
+
if proxy[1] and proxy[1].to_i == 0
|
33
|
+
@webproxy = eval("WebProxy::#{proxy[1]}")
|
34
|
+
@proxy = proxy[0].parse(:uri).root
|
35
|
+
else
|
36
|
+
proxy[0] = proxy[0].to_ip if proxy[0].is Integer
|
37
|
+
@proxy = proxy
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@cookies = {}
|
41
|
+
@body = {}
|
42
|
+
@num = []
|
43
|
+
@cookieProc = opts[:cp] || opts[:ck]
|
44
|
+
@raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
|
45
|
+
@engine = opts[:engine]
|
46
|
+
@timeout = opts[:timeout] || @@timeout || 60
|
47
|
+
@post_proc = @get_proc = @head_proc = @put_proc = @delete_proc = Proc::NULL
|
48
|
+
update uri
|
49
|
+
@retry = opts[:retry] || {}
|
50
|
+
@retry = {@uri.host => @retry} if @retry.is Array
|
51
|
+
|
52
|
+
@http.cacert = @@cacert
|
53
|
+
end
|
54
|
+
|
55
|
+
def update(uri)
|
56
|
+
if !uri[/^\w+:\/\//]
|
57
|
+
'/' >> uri if uri[0,1] != '/'
|
58
|
+
@uri = uri.parse:uri
|
59
|
+
return
|
60
|
+
end
|
61
|
+
@uri = uri.parse:uri
|
62
|
+
return if @uri.root == @root
|
63
|
+
@root = @uri.root
|
64
|
+
@sld = @root[/[\w-]+\.[a-z]+$/]
|
65
|
+
@path = @uri.fullpath
|
66
|
+
if @http
|
67
|
+
@http.url = @webproxy ? @proxy : @root
|
68
|
+
else
|
69
|
+
@http = Curl::Easy(@webproxy ? @proxy : @root)
|
70
|
+
@http.base = self
|
71
|
+
end
|
72
|
+
if @proxy
|
73
|
+
@http.proxy_url = @proxy*':' if !@webproxy
|
74
|
+
@proxystr = @webproxy ? @proxy[0] : @http.proxy_url
|
75
|
+
else @proxystr = 'localhost'
|
76
|
+
end
|
77
|
+
if @cookieProc.is Hash
|
78
|
+
self.main_cks = @cookieProc
|
79
|
+
@cookieProc = true
|
80
|
+
end
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_s
|
85
|
+
str = "<##{self.class.self_name} @ "
|
86
|
+
if @webproxy
|
87
|
+
str << "#{@proxy} ~ "
|
88
|
+
elsif @proxy
|
89
|
+
str << @proxy*':'+" ~ "
|
90
|
+
end
|
91
|
+
str << @root+'>'
|
92
|
+
end
|
93
|
+
alias :inspect :to_s
|
94
|
+
|
95
|
+
def update_res
|
96
|
+
@outdated = false
|
97
|
+
@res = @http.res
|
98
|
+
@headers = nil
|
99
|
+
@res
|
100
|
+
end
|
101
|
+
|
102
|
+
def res
|
103
|
+
if @res && !@outdated
|
104
|
+
@res
|
105
|
+
else update_res end
|
106
|
+
end
|
107
|
+
|
108
|
+
def req; res.req end
|
109
|
+
|
110
|
+
def dump
|
111
|
+
str = "IP: #{@proxystr}\nRequest: "
|
112
|
+
str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
|
113
|
+
str << "\nReady" if @ready
|
114
|
+
str
|
115
|
+
end
|
116
|
+
|
117
|
+
def fix(path)
|
118
|
+
path = path.tr ' ', '+'
|
119
|
+
path = expand path if path =~ /^\./
|
120
|
+
if update(path) or @uri.root
|
121
|
+
path = @webproxy.encode(path) if @webproxy
|
122
|
+
else
|
123
|
+
path = @webproxy.encode(@root+path) if @webproxy
|
124
|
+
end
|
125
|
+
path
|
126
|
+
end
|
127
|
+
|
128
|
+
def expand(uri)
|
129
|
+
if !@webproxy || @http.last_effective_url
|
130
|
+
path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
|
131
|
+
return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
|
132
|
+
end
|
133
|
+
uri
|
134
|
+
end
|
135
|
+
|
136
|
+
def mkBody(params, multipart=nil)
|
137
|
+
if multipart
|
138
|
+
@http.multipart_post_body = @body = params.map {|k, v|
|
139
|
+
v = v.call if v.is Proc
|
140
|
+
if k =~ /^f:/
|
141
|
+
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
142
|
+
"#{randstr(16, :hex)}.jpg", v+randstr )
|
143
|
+
elsif k =~ /^p:/
|
144
|
+
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
145
|
+
File.basename(f), read(v) )
|
146
|
+
else
|
147
|
+
Curl::PostField.content(k.to_s, v.to_s)
|
148
|
+
end
|
149
|
+
}
|
150
|
+
else
|
151
|
+
@http.post_body = @body = params.urlencode
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def mkHeader(uri)
|
156
|
+
header = DefaultHeader.dup
|
157
|
+
if @cookieProc
|
158
|
+
cookies = ''
|
159
|
+
main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
|
160
|
+
header['Cookie'] = cookies[0..-3]
|
161
|
+
end
|
162
|
+
if @refforge
|
163
|
+
ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
|
164
|
+
header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]
|
165
|
+
end
|
166
|
+
header['User-Agent'] = @ua == :rand ? RHACK.useragents.rand : @ua if @ua
|
167
|
+
header
|
168
|
+
end
|
169
|
+
|
170
|
+
def ProcCookies(res)
|
171
|
+
ck = []
|
172
|
+
case res
|
173
|
+
when String
|
174
|
+
res.split(/\r?\n/).each {|h|
|
175
|
+
hs = h/': '
|
176
|
+
ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
|
177
|
+
}
|
178
|
+
when Curl::Response
|
179
|
+
ck = res['cookies']
|
180
|
+
end
|
181
|
+
return if !ck.b
|
182
|
+
ck.each {|c| Cookie(c, self)}
|
183
|
+
# StoreCookies if @cookieStore
|
184
|
+
end
|
185
|
+
|
186
|
+
def cp_on() @cookieProc = true end
|
187
|
+
def cp_off() @cookieProc = false end
|
188
|
+
|
189
|
+
def main_cks() @cookies[@uri.host] ||= {} end
|
190
|
+
def main_cks=(cks)
|
191
|
+
@cookies[@uri.host] = @webproxy ?
|
192
|
+
@webproxy.ck_encode(@root, cks) :
|
193
|
+
cks.map2 {|k, v| Cookie(k, v)}
|
194
|
+
end
|
195
|
+
|
196
|
+
def retry?(err)
|
197
|
+
# exc = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
|
198
|
+
exc = (@@retry.keys + @retry.keys).select_in @root
|
199
|
+
return false if !exc.b
|
200
|
+
# ['www.nomer.org', 'nomer.org'].every {|www| 'TimeoutError'.in({'nomer.org' => 'TimeoutError'}[www])} ?
|
201
|
+
exc.no? {|e| err[0].self_name.in((@@retry[e] || []) + @retry[e])}
|
202
|
+
end
|
203
|
+
|
204
|
+
def loaded?
|
205
|
+
Curl.carier.reqs.include? @http
|
206
|
+
end
|
207
|
+
|
208
|
+
def load!
|
209
|
+
unless Curl.carier.add @http
|
210
|
+
Curl.carier.remove @http
|
211
|
+
Curl.сarier.add @http
|
212
|
+
end
|
213
|
+
rescue RuntimeError => e
|
214
|
+
e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
215
|
+
raise e
|
216
|
+
end
|
217
|
+
|
218
|
+
def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
|
219
|
+
@http.path = path = fix(path)
|
220
|
+
@http.headers = mkHeader(path).merge!(headers)
|
221
|
+
@http.timeout = @timeout
|
222
|
+
|
223
|
+
@http.on_complete {|c|
|
224
|
+
@error = nil
|
225
|
+
@outdated = true
|
226
|
+
ProcCookies c.res if @cookieProc
|
227
|
+
# We cannot just cancel on_complete in on_redirect block
|
228
|
+
# because loadGet will immediately reset on_complete back
|
229
|
+
if c.res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = c.res.hash.location
|
230
|
+
loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
|
231
|
+
elsif block_given?
|
232
|
+
yield c
|
233
|
+
end
|
234
|
+
}
|
235
|
+
@http.on_failure {|c, e|
|
236
|
+
if e[0] == Curl::Err::CurlOK
|
237
|
+
@error = e
|
238
|
+
# TODO: где-то в сорцах on_failure вызывается по коду 0, видимо из-за стороннего условия, а не должен
|
239
|
+
L.log << "Got Curl::Err::CurlOK, response was: #{c.res}"
|
240
|
+
else
|
241
|
+
@http.on_complete &Proc::NULL
|
242
|
+
@outdated = true
|
243
|
+
if retry? e
|
244
|
+
L.debug "#{e[0]} -> reloading scout"
|
245
|
+
#load uri, headers, not_redir, relvl, &callback
|
246
|
+
load! # all params including post_body are still set
|
247
|
+
else
|
248
|
+
L.debug "#{e[0]} -> not reloading scout"
|
249
|
+
raise *e if @raise_err
|
250
|
+
end
|
251
|
+
end
|
252
|
+
} if !@http.on_failure
|
253
|
+
|
254
|
+
load!
|
255
|
+
end
|
256
|
+
|
257
|
+
def loadGet(*argv, &callback)
|
258
|
+
uri, opts = argv.get_opts [@path],
|
259
|
+
:headers => {}, :redir => false, :relvl => 2
|
260
|
+
@http.get = true
|
261
|
+
@last_method = :get
|
262
|
+
if block_given?
|
263
|
+
@get_proc = callback
|
264
|
+
else#if @http.callback != @get_proc
|
265
|
+
callback = @get_proc
|
266
|
+
end
|
267
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
268
|
+
end
|
269
|
+
|
270
|
+
def loadDelete(*argv, &callback)
|
271
|
+
uri, opts = argv.get_opts [@path],
|
272
|
+
:headers => {}, :redir => false, :relvl => 2
|
273
|
+
@http.delete = true
|
274
|
+
@last_method = :delete
|
275
|
+
if block_given?
|
276
|
+
@delete_proc = callback
|
277
|
+
else#if @http.callback != @post_proc
|
278
|
+
callback = @delete_proc
|
279
|
+
end
|
280
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
281
|
+
end
|
282
|
+
|
283
|
+
def loadPost(*argv, &callback)
|
284
|
+
hash, multipart, uri, opts = argv.get_opts [@body, @http.multipart_form_post?, @path], :headers => {}, :redir => false, :relvl => 2
|
285
|
+
@http.delete = false
|
286
|
+
mkBody hash, multipart.b
|
287
|
+
@last_method = :post
|
288
|
+
if block_given?
|
289
|
+
@post_proc = callback
|
290
|
+
else#if @http.callback != @post_proc
|
291
|
+
callback = @post_proc
|
292
|
+
end
|
293
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
294
|
+
end
|
295
|
+
|
296
|
+
def loadPut(*argv, &callback)
|
297
|
+
body_or_file, uri, opts = argv.get_opts [@body, @path],
|
298
|
+
:headers => {}, :redir => false, :relvl => 2
|
299
|
+
@http.delete = false
|
300
|
+
@http.put_data = @body = body_or_file
|
301
|
+
@last_method = :put
|
302
|
+
if block_given?
|
303
|
+
@put_proc = callback
|
304
|
+
else#if @http.callback != @post_proc
|
305
|
+
callback = @put_proc
|
306
|
+
end
|
307
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
308
|
+
end
|
309
|
+
|
310
|
+
def loadHead(*argv, &callback)
|
311
|
+
uri, emulate, headers = argv.get_opts [@path, :if_retry]
|
312
|
+
@http.head = true if emulate != :always
|
313
|
+
@last_method = :head
|
314
|
+
if block_given?
|
315
|
+
@head_proc = callback
|
316
|
+
else#if @http.callback != @head_proc
|
317
|
+
callback = @head_proc
|
318
|
+
end
|
319
|
+
emu = lambda {
|
320
|
+
@headers = ''
|
321
|
+
@http.on_header {|h|
|
322
|
+
@headers << h
|
323
|
+
h == "\r\n" ? 0 : h.size
|
324
|
+
}
|
325
|
+
@http.get = true
|
326
|
+
load(uri, headers) {|c| c.on_header; callback[c]}
|
327
|
+
}
|
328
|
+
if emulate != :always
|
329
|
+
load(uri, headers) {|c|
|
330
|
+
if !@error and c.res.code != 200 and emulate == :if_retry
|
331
|
+
emu.call
|
332
|
+
else
|
333
|
+
callback[c]
|
334
|
+
end
|
335
|
+
}
|
336
|
+
else emu.call
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
end
|
341
|
+
|
342
|
+
end
|