rhack 0.4.1 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Johnson
|
3
|
+
begin
|
4
|
+
require 'johnson'
|
5
|
+
rescue LoadError
|
6
|
+
Enabled = false
|
7
|
+
else
|
8
|
+
if VERSION <= "2.0.0" and RUBY_VERSION > "1.9"
|
9
|
+
Enabled = false
|
10
|
+
else Enabled = true
|
11
|
+
end
|
12
|
+
end
|
13
|
+
### JavaScript interface DOM emulation ###
|
14
|
+
|
15
|
+
class Runtime
|
16
|
+
cattr_accessor :browser
|
17
|
+
attr_accessor :thread_id
|
18
|
+
BROWSER_PATH = File.expand_path "../browser", __FILE__
|
19
|
+
|
20
|
+
class << self
|
21
|
+
|
22
|
+
def runtime_set?(opts)
|
23
|
+
!opts[:eval].b or (@@browser and @@browser.thread_id == Curl.carier_thread.object_id)
|
24
|
+
end
|
25
|
+
|
26
|
+
# CarierThread breaks if Multi has no work && CarierThread
|
27
|
+
# is joined so itwon't last forever.
|
28
|
+
#
|
29
|
+
# Johnson is not thread safe =>
|
30
|
+
# Runtime created in this thread will become unusable after
|
31
|
+
# CarierThread dies.
|
32
|
+
#
|
33
|
+
# So we don't use Curl.wait until Carier haven't got whole
|
34
|
+
# request for this Runtime.
|
35
|
+
def set_browser_for_curl(opts)
|
36
|
+
unless runtime_set? opts
|
37
|
+
if Curl.status
|
38
|
+
Curl.recall
|
39
|
+
Curl.debug 'recalled'
|
40
|
+
end
|
41
|
+
if opts[:thread_safe].b
|
42
|
+
@@browser = new_browser(opts[:jq])
|
43
|
+
L.debug "#@@browser initialized in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"
|
44
|
+
else
|
45
|
+
L.debug 'about to run carier'
|
46
|
+
Curl.execute {@@browser = new_browser(opts[:jq])
|
47
|
+
L.debug "#@@browser initialized in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"}
|
48
|
+
sleep 0.01 until runtime_set? opts
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_browser(jq=false)
|
54
|
+
rt = new
|
55
|
+
%w{xmlw3cdom_1 xmlw3cdom_2 xmlsax env}.concat(jq ? ['jquery'] : []).each {|f|
|
56
|
+
path = "#{BROWSER_PATH}/#{f}.js"
|
57
|
+
rt.evaluate IO.read(path), path, 1
|
58
|
+
}
|
59
|
+
rt.document = ''
|
60
|
+
rt
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
def document=(html)
|
66
|
+
evaluate "var document = new DOMDocument(#{html.to_doc.to_xhtml.inspect})"
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
data/lib/rhack/page.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
# Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, ... ) ) =>
|
5
|
+
# Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, ...
|
6
|
+
|
7
|
+
class Page
|
8
|
+
# for debug, just enable L#debug, don't write tons of chaotic log-lines
|
9
|
+
__init__
|
10
|
+
attr_writer :title
|
11
|
+
attr_reader :html, :loc, :hash, :doc, :js, :curl_res, :failed
|
12
|
+
# result of page processing been made in frame context
|
13
|
+
attr_accessor :res
|
14
|
+
# for johnson
|
15
|
+
@@ignore = /google|_gat|tracker|adver/i
|
16
|
+
|
17
|
+
def initialize(obj='', loc=Hash.new(''), js=Johnson::Runtime.browser||Johnson::Runtime.new)
|
18
|
+
loc = loc.parse:uri if !loc.is Hash
|
19
|
+
@js = js
|
20
|
+
if obj.is Curl::Easy or obj.kinda Scout
|
21
|
+
c = obj.kinda(Scout) ? obj.http : obj
|
22
|
+
@html = ''
|
23
|
+
# just (c, loc) would pass to #process opts variable that returns '' on any key
|
24
|
+
process(c, loc.b || {})
|
25
|
+
else
|
26
|
+
@html = obj
|
27
|
+
@loc = loc
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def empty?
|
32
|
+
!(@hash.nil? ? @html : @hash).b
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
if !@hash.nil?
|
37
|
+
"<#FramePage (#{@hash ? @hash.inspect.size.bytes : 'failed to parse'}) #{@json ? 'json' : 'params hash'}>"
|
38
|
+
else
|
39
|
+
"<#FramePage #{@html.b ? "#{@failed ? @curl_res.header : '«'+title(false)+'»'} (#{@html.size.bytes}" : '(empty'})#{' js enabled' if @js and @doc and @hash.nil?}>"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def html!(encoding='UTF-8')
|
44
|
+
@html.force_encoding(encoding)
|
45
|
+
end
|
46
|
+
|
47
|
+
# We can then alternate #process in Page subclasses
|
48
|
+
# Frame doesn't mind about value returned by #process
|
49
|
+
def process(c, opts={})
|
50
|
+
@loc = c.last_effective_url.parse:uri
|
51
|
+
@curl_res = c.res
|
52
|
+
L.debug "#{@loc.fullpath} -> #{@curl_res}"
|
53
|
+
if @curl_res.code == 200
|
54
|
+
body = @curl_res.body
|
55
|
+
if opts[:json]
|
56
|
+
@json = true
|
57
|
+
@hash = begin; body.from_json
|
58
|
+
rescue StandardError
|
59
|
+
false
|
60
|
+
end
|
61
|
+
if !@hash or @hash.is String
|
62
|
+
L.debug "failed to get json from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
|
63
|
+
@html = body; to_doc
|
64
|
+
@hash = false
|
65
|
+
end
|
66
|
+
|
67
|
+
elsif opts[:hash]
|
68
|
+
if body.inline
|
69
|
+
@hash = body.to_params
|
70
|
+
else
|
71
|
+
@hash = false
|
72
|
+
L.debug "failed to get params hash from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
|
73
|
+
@html = body; to_doc
|
74
|
+
end
|
75
|
+
|
76
|
+
else
|
77
|
+
@html = body.xml_to_utf
|
78
|
+
to_doc
|
79
|
+
if opts[:eval]
|
80
|
+
load_scripts opts[:load_scripts]
|
81
|
+
eval_js
|
82
|
+
end
|
83
|
+
end
|
84
|
+
elsif !(opts[:json] or opts[:hash])
|
85
|
+
@html = @curl_res.body
|
86
|
+
@failed = @curl_res.code
|
87
|
+
end
|
88
|
+
self
|
89
|
+
end
|
90
|
+
|
91
|
+
def eval_js(frame=nil)
|
92
|
+
eval_string "document.location = window.location = #{@loc.to_json};
|
93
|
+
document.URL = document.baseURI = document.documentURI = location.href;
|
94
|
+
document.domain = location.host;"
|
95
|
+
find("script").each {|n|
|
96
|
+
L.debug n.text.strip
|
97
|
+
if text = n.text.strip.b
|
98
|
+
js[:write_output] = ''
|
99
|
+
eval_string text
|
100
|
+
if res = js[:write_output].b then n.after res end
|
101
|
+
n.remove!
|
102
|
+
elsif frame and n.src
|
103
|
+
eval_string frame.get_cached expand_link n.src
|
104
|
+
end
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
def eval_string(str)
|
109
|
+
@js ||= Johnson::Runtime.new
|
110
|
+
L.debug "#{@js} evaluating in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"
|
111
|
+
begin
|
112
|
+
@js.evaluate(str)
|
113
|
+
rescue Johnson::Error => e
|
114
|
+
L.warn e.message
|
115
|
+
L.debug {
|
116
|
+
if m = e.message.match(/(\w+) is undefined|([\w.]+) is not a function/)
|
117
|
+
L.clr.hl! str, /\b#{m[1] || m[2]}\b/
|
118
|
+
end
|
119
|
+
"\n\t#{str}"
|
120
|
+
}
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_doc
|
125
|
+
@doc = @html.to_doc :forceutf
|
126
|
+
end
|
127
|
+
|
128
|
+
def title(full=true)
|
129
|
+
if @hash.nil? and !@failed and @html.b
|
130
|
+
if full
|
131
|
+
to_doc unless defined? @doc
|
132
|
+
if @doc.title.b
|
133
|
+
@title = @doc.title
|
134
|
+
else
|
135
|
+
@title = @loc.href
|
136
|
+
@doc.at('head').prepend XML::Node('title', @title) if @doc.at('head')
|
137
|
+
@title
|
138
|
+
end
|
139
|
+
else
|
140
|
+
title true unless defined? @title
|
141
|
+
if RUBY_VERSION < '1.9' and @title.cyr? and UTF2ANSI[@title].size > 40
|
142
|
+
@short_title = ANSI2UTF[UTF2ANSI[@title][/.{1,30}\S*/][0..38]]+'…'
|
143
|
+
elsif @title.size > 40
|
144
|
+
@short_title = @title[/.{1,30}\S*/][0..38]+'…'
|
145
|
+
else
|
146
|
+
@short_title = @title
|
147
|
+
end
|
148
|
+
end
|
149
|
+
else
|
150
|
+
@loc.href
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def find(xp) (@doc || to_doc).find xp end
|
155
|
+
|
156
|
+
def at(xp) (@doc || to_doc).at xp end
|
157
|
+
|
158
|
+
def url() @loc.href end
|
159
|
+
alias :href :url
|
160
|
+
|
161
|
+
def get_srcs(links='img')
|
162
|
+
begin
|
163
|
+
links = find(links).map {|e| e.src} if links.is String
|
164
|
+
rescue XML::Error
|
165
|
+
links = [links]
|
166
|
+
end
|
167
|
+
links.map {|link| expand_link link}.uniq
|
168
|
+
end
|
169
|
+
|
170
|
+
def get_src(link='img')
|
171
|
+
begin
|
172
|
+
link = at(link) && at(link).src if link.is String
|
173
|
+
rescue XML::Error; nil
|
174
|
+
end
|
175
|
+
expand_link link if link
|
176
|
+
end
|
177
|
+
|
178
|
+
def get_links(links='a')
|
179
|
+
begin
|
180
|
+
links = find(links).map {|e| e.href}.b || find(links+'//a').map {|e| e.href} if links.is String
|
181
|
+
rescue XML::Error
|
182
|
+
links = [links]
|
183
|
+
end
|
184
|
+
links.map {|link| expand_link link}.uniq
|
185
|
+
end
|
186
|
+
|
187
|
+
def get_link(link='a')
|
188
|
+
begin
|
189
|
+
link = at(link) && (at(link).href || at(link+'//a').href) if link.is String
|
190
|
+
rescue XML::Error; nil
|
191
|
+
end
|
192
|
+
expand_link link if link
|
193
|
+
end
|
194
|
+
alias :get_hrefs :get_links
|
195
|
+
alias :links :get_links
|
196
|
+
alias :get_href :get_link
|
197
|
+
alias :link :get_link
|
198
|
+
alias :srcs :get_srcs
|
199
|
+
alias :src :get_src
|
200
|
+
|
201
|
+
def expand_link(link)
|
202
|
+
case link
|
203
|
+
when /^\w+:\/\// then link
|
204
|
+
when /^\/\// then @loc.protocol+link
|
205
|
+
when /^\// then @loc.root+link
|
206
|
+
else File.join((@loc.path.b ? File.dirname(@loc.path) : @loc.root), link)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def form(form='form', hash={}, opts={})
|
211
|
+
form = "[action=#{@loc.path.inspect}]" if form == :self
|
212
|
+
if form.is String
|
213
|
+
form_node = at form
|
214
|
+
raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node or form_node.name != 'form'
|
215
|
+
else form_node = form
|
216
|
+
end
|
217
|
+
hash = form_node.inputs_all.merge!(hash)
|
218
|
+
action = expand_link(form_node.action || @loc.path)
|
219
|
+
if form_node['method'].downcase == 'post'
|
220
|
+
[hash, form_node.enctype =~ /multipart/, action, opts]
|
221
|
+
else
|
222
|
+
action = "#{action}#{action['?'] ? '&' : '?'}#{hash.urlencode}" if hash.b
|
223
|
+
[action, opts]
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def submit(form, frame, hash={}, opts={}, &callback)
|
228
|
+
(opts[:headers] ||= {}).Referer ||= @loc.href if @loc
|
229
|
+
query = form(form, hash, opts)
|
230
|
+
|
231
|
+
curr_target, new_target = frame.loc.href, (query[2] || query[0])
|
232
|
+
if need_retargeting = (frame.static && curr_target != new_target)
|
233
|
+
frame.retarget new_target
|
234
|
+
end
|
235
|
+
page = frame.exec(*query, &callback)
|
236
|
+
frame.retarget curr_target, :forced if need_retargeting
|
237
|
+
page
|
238
|
+
end
|
239
|
+
|
240
|
+
def load_scripts(frame)
|
241
|
+
frame && frame.get_cached(*get_srcs("script[src]")).each {|js| eval_string js}
|
242
|
+
end
|
243
|
+
|
244
|
+
end
|
245
|
+
|
246
|
+
# using reprocessing of page in case of non-200 response:
|
247
|
+
# page_class = ReloadablePage do
|
248
|
+
# @res and @res.code != 200
|
249
|
+
# end
|
250
|
+
def ReloadablePage(&reload_condition)
|
251
|
+
rp = Class.new Page
|
252
|
+
rp.send :define_method, :process do |curl, opts|
|
253
|
+
super(curl, opts || {})
|
254
|
+
if curl.instance_eval &reload_condition
|
255
|
+
curl.retry!
|
256
|
+
nil # in case of reload_condition.call super's callback will not proceed
|
257
|
+
else self
|
258
|
+
end
|
259
|
+
end
|
260
|
+
rp
|
261
|
+
end
|
262
|
+
|
263
|
+
end
|
data/lib/rhack/proxy.rb
ADDED
data/lib/rhack/proxy/checker.rb
CHANGED
@@ -67,7 +67,7 @@ module RHACK
|
|
67
67
|
}
|
68
68
|
target = target.find_is(String) if !target.is String
|
69
69
|
pl.each {|pr|
|
70
|
-
sc = Interceptor.new(target, pr,
|
70
|
+
sc = Interceptor.new(target, pr, RHACK.useragents.rand, @opts)
|
71
71
|
sc.http.on_failure(&fail_proc)
|
72
72
|
@ics << sc
|
73
73
|
}
|
data/lib/rhack/scout.rb
ADDED
@@ -0,0 +1,342 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
class Scout
|
5
|
+
__init__
|
6
|
+
attr_accessor :timeout, :raise_err, :retry
|
7
|
+
attr_accessor :path, :root, :sld, :proxy
|
8
|
+
attr_reader :uri
|
9
|
+
attr_reader :webproxy, :last_method, :proxystr, :headers, :body, :http, :error
|
10
|
+
attr_reader :cookies, :ua, :refforge, :cookieStore, :cookieProc
|
11
|
+
|
12
|
+
DefaultHeader = {
|
13
|
+
"Expect" => "",
|
14
|
+
"Keep-Alive" => "300",
|
15
|
+
"Accept-Charset" => "windows-1251,utf-8;q=0.7,*;q=0.7",
|
16
|
+
"Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
|
17
|
+
"Connection" => "keep-alive"
|
18
|
+
}
|
19
|
+
|
20
|
+
class ProxyError < ArgumentError
|
21
|
+
def initialize proxy
|
22
|
+
super "incorrect proxy: %s class %s, must be an Array
|
23
|
+
proxy format: ['127.0.0.1', '80'], [2130706433, 80], ['someproxy.com', :WebproxyModule]"%[proxy.inspect, proxy.class]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(*argv)
|
28
|
+
uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
|
29
|
+
raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
|
30
|
+
'http://' >> uri if uri !~ /^\w+:\/\//
|
31
|
+
if proxy
|
32
|
+
if proxy[1] and proxy[1].to_i == 0
|
33
|
+
@webproxy = eval("WebProxy::#{proxy[1]}")
|
34
|
+
@proxy = proxy[0].parse(:uri).root
|
35
|
+
else
|
36
|
+
proxy[0] = proxy[0].to_ip if proxy[0].is Integer
|
37
|
+
@proxy = proxy
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@cookies = {}
|
41
|
+
@body = {}
|
42
|
+
@num = []
|
43
|
+
@cookieProc = opts[:cp] || opts[:ck]
|
44
|
+
@raise_err = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
|
45
|
+
@engine = opts[:engine]
|
46
|
+
@timeout = opts[:timeout] || @@timeout || 60
|
47
|
+
@post_proc = @get_proc = @head_proc = @put_proc = @delete_proc = Proc::NULL
|
48
|
+
update uri
|
49
|
+
@retry = opts[:retry] || {}
|
50
|
+
@retry = {@uri.host => @retry} if @retry.is Array
|
51
|
+
|
52
|
+
@http.cacert = @@cacert
|
53
|
+
end
|
54
|
+
|
55
|
+
def update(uri)
|
56
|
+
if !uri[/^\w+:\/\//]
|
57
|
+
'/' >> uri if uri[0,1] != '/'
|
58
|
+
@uri = uri.parse:uri
|
59
|
+
return
|
60
|
+
end
|
61
|
+
@uri = uri.parse:uri
|
62
|
+
return if @uri.root == @root
|
63
|
+
@root = @uri.root
|
64
|
+
@sld = @root[/[\w-]+\.[a-z]+$/]
|
65
|
+
@path = @uri.fullpath
|
66
|
+
if @http
|
67
|
+
@http.url = @webproxy ? @proxy : @root
|
68
|
+
else
|
69
|
+
@http = Curl::Easy(@webproxy ? @proxy : @root)
|
70
|
+
@http.base = self
|
71
|
+
end
|
72
|
+
if @proxy
|
73
|
+
@http.proxy_url = @proxy*':' if !@webproxy
|
74
|
+
@proxystr = @webproxy ? @proxy[0] : @http.proxy_url
|
75
|
+
else @proxystr = 'localhost'
|
76
|
+
end
|
77
|
+
if @cookieProc.is Hash
|
78
|
+
self.main_cks = @cookieProc
|
79
|
+
@cookieProc = true
|
80
|
+
end
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_s
|
85
|
+
str = "<##{self.class.self_name} @ "
|
86
|
+
if @webproxy
|
87
|
+
str << "#{@proxy} ~ "
|
88
|
+
elsif @proxy
|
89
|
+
str << @proxy*':'+" ~ "
|
90
|
+
end
|
91
|
+
str << @root+'>'
|
92
|
+
end
|
93
|
+
alias :inspect :to_s
|
94
|
+
|
95
|
+
def update_res
|
96
|
+
@outdated = false
|
97
|
+
@res = @http.res
|
98
|
+
@headers = nil
|
99
|
+
@res
|
100
|
+
end
|
101
|
+
|
102
|
+
def res
|
103
|
+
if @res && !@outdated
|
104
|
+
@res
|
105
|
+
else update_res end
|
106
|
+
end
|
107
|
+
|
108
|
+
def req; res.req end
|
109
|
+
|
110
|
+
def dump
|
111
|
+
str = "IP: #{@proxystr}\nRequest: "
|
112
|
+
str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
|
113
|
+
str << "\nReady" if @ready
|
114
|
+
str
|
115
|
+
end
|
116
|
+
|
117
|
+
def fix(path)
|
118
|
+
path = path.tr ' ', '+'
|
119
|
+
path = expand path if path =~ /^\./
|
120
|
+
if update(path) or @uri.root
|
121
|
+
path = @webproxy.encode(path) if @webproxy
|
122
|
+
else
|
123
|
+
path = @webproxy.encode(@root+path) if @webproxy
|
124
|
+
end
|
125
|
+
path
|
126
|
+
end
|
127
|
+
|
128
|
+
def expand(uri)
|
129
|
+
if !@webproxy || @http.last_effective_url
|
130
|
+
path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
|
131
|
+
return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
|
132
|
+
end
|
133
|
+
uri
|
134
|
+
end
|
135
|
+
|
136
|
+
def mkBody(params, multipart=nil)
|
137
|
+
if multipart
|
138
|
+
@http.multipart_post_body = @body = params.map {|k, v|
|
139
|
+
v = v.call if v.is Proc
|
140
|
+
if k =~ /^f:/
|
141
|
+
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
142
|
+
"#{randstr(16, :hex)}.jpg", v+randstr )
|
143
|
+
elsif k =~ /^p:/
|
144
|
+
Curl::PostField.file(k[2..-1], "application/octet-stream",
|
145
|
+
File.basename(f), read(v) )
|
146
|
+
else
|
147
|
+
Curl::PostField.content(k.to_s, v.to_s)
|
148
|
+
end
|
149
|
+
}
|
150
|
+
else
|
151
|
+
@http.post_body = @body = params.urlencode
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def mkHeader(uri)
|
156
|
+
header = DefaultHeader.dup
|
157
|
+
if @cookieProc
|
158
|
+
cookies = ''
|
159
|
+
main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
|
160
|
+
header['Cookie'] = cookies[0..-3]
|
161
|
+
end
|
162
|
+
if @refforge
|
163
|
+
ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
|
164
|
+
header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]
|
165
|
+
end
|
166
|
+
header['User-Agent'] = @ua == :rand ? RHACK.useragents.rand : @ua if @ua
|
167
|
+
header
|
168
|
+
end
|
169
|
+
|
170
|
+
def ProcCookies(res)
|
171
|
+
ck = []
|
172
|
+
case res
|
173
|
+
when String
|
174
|
+
res.split(/\r?\n/).each {|h|
|
175
|
+
hs = h/': '
|
176
|
+
ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
|
177
|
+
}
|
178
|
+
when Curl::Response
|
179
|
+
ck = res['cookies']
|
180
|
+
end
|
181
|
+
return if !ck.b
|
182
|
+
ck.each {|c| Cookie(c, self)}
|
183
|
+
# StoreCookies if @cookieStore
|
184
|
+
end
|
185
|
+
|
186
|
+
def cp_on() @cookieProc = true end
|
187
|
+
def cp_off() @cookieProc = false end
|
188
|
+
|
189
|
+
def main_cks() @cookies[@uri.host] ||= {} end
|
190
|
+
def main_cks=(cks)
|
191
|
+
@cookies[@uri.host] = @webproxy ?
|
192
|
+
@webproxy.ck_encode(@root, cks) :
|
193
|
+
cks.map2 {|k, v| Cookie(k, v)}
|
194
|
+
end
|
195
|
+
|
196
|
+
def retry?(err)
|
197
|
+
# exc = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
|
198
|
+
exc = (@@retry.keys + @retry.keys).select_in @root
|
199
|
+
return false if !exc.b
|
200
|
+
# ['www.nomer.org', 'nomer.org'].every {|www| 'TimeoutError'.in({'nomer.org' => 'TimeoutError'}[www])} ?
|
201
|
+
exc.no? {|e| err[0].self_name.in((@@retry[e] || []) + @retry[e])}
|
202
|
+
end
|
203
|
+
|
204
|
+
def loaded?
|
205
|
+
Curl.carier.reqs.include? @http
|
206
|
+
end
|
207
|
+
|
208
|
+
def load!
|
209
|
+
unless Curl.carier.add @http
|
210
|
+
Curl.carier.remove @http
|
211
|
+
Curl.сarier.add @http
|
212
|
+
end
|
213
|
+
rescue RuntimeError => e
|
214
|
+
e.message << ". Failed to load allready loaded? easy handler: Bad file descriptor" unless Curl::Err::CurlError === e
|
215
|
+
raise e
|
216
|
+
end
|
217
|
+
|
218
|
+
def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
|
219
|
+
@http.path = path = fix(path)
|
220
|
+
@http.headers = mkHeader(path).merge!(headers)
|
221
|
+
@http.timeout = @timeout
|
222
|
+
|
223
|
+
@http.on_complete {|c|
|
224
|
+
@error = nil
|
225
|
+
@outdated = true
|
226
|
+
ProcCookies c.res if @cookieProc
|
227
|
+
# We cannot just cancel on_complete in on_redirect block
|
228
|
+
# because loadGet will immediately reset on_complete back
|
229
|
+
if c.res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = c.res.hash.location
|
230
|
+
loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
|
231
|
+
elsif block_given?
|
232
|
+
yield c
|
233
|
+
end
|
234
|
+
}
|
235
|
+
@http.on_failure {|c, e|
|
236
|
+
if e[0] == Curl::Err::CurlOK
|
237
|
+
@error = e
|
238
|
+
# TODO: где-то в сорцах on_failure вызывается по коду 0, видимо из-за стороннего условия, а не должен
|
239
|
+
L.log << "Got Curl::Err::CurlOK, response was: #{c.res}"
|
240
|
+
else
|
241
|
+
@http.on_complete &Proc::NULL
|
242
|
+
@outdated = true
|
243
|
+
if retry? e
|
244
|
+
L.debug "#{e[0]} -> reloading scout"
|
245
|
+
#load uri, headers, not_redir, relvl, &callback
|
246
|
+
load! # all params including post_body are still set
|
247
|
+
else
|
248
|
+
L.debug "#{e[0]} -> not reloading scout"
|
249
|
+
raise *e if @raise_err
|
250
|
+
end
|
251
|
+
end
|
252
|
+
} if !@http.on_failure
|
253
|
+
|
254
|
+
load!
|
255
|
+
end
|
256
|
+
|
257
|
+
def loadGet(*argv, &callback)
|
258
|
+
uri, opts = argv.get_opts [@path],
|
259
|
+
:headers => {}, :redir => false, :relvl => 2
|
260
|
+
@http.get = true
|
261
|
+
@last_method = :get
|
262
|
+
if block_given?
|
263
|
+
@get_proc = callback
|
264
|
+
else#if @http.callback != @get_proc
|
265
|
+
callback = @get_proc
|
266
|
+
end
|
267
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
268
|
+
end
|
269
|
+
|
270
|
+
def loadDelete(*argv, &callback)
|
271
|
+
uri, opts = argv.get_opts [@path],
|
272
|
+
:headers => {}, :redir => false, :relvl => 2
|
273
|
+
@http.delete = true
|
274
|
+
@last_method = :delete
|
275
|
+
if block_given?
|
276
|
+
@delete_proc = callback
|
277
|
+
else#if @http.callback != @post_proc
|
278
|
+
callback = @delete_proc
|
279
|
+
end
|
280
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
281
|
+
end
|
282
|
+
|
283
|
+
def loadPost(*argv, &callback)
|
284
|
+
hash, multipart, uri, opts = argv.get_opts [@body, @http.multipart_form_post?, @path], :headers => {}, :redir => false, :relvl => 2
|
285
|
+
@http.delete = false
|
286
|
+
mkBody hash, multipart.b
|
287
|
+
@last_method = :post
|
288
|
+
if block_given?
|
289
|
+
@post_proc = callback
|
290
|
+
else#if @http.callback != @post_proc
|
291
|
+
callback = @post_proc
|
292
|
+
end
|
293
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
294
|
+
end
|
295
|
+
|
296
|
+
def loadPut(*argv, &callback)
|
297
|
+
body_or_file, uri, opts = argv.get_opts [@body, @path],
|
298
|
+
:headers => {}, :redir => false, :relvl => 2
|
299
|
+
@http.delete = false
|
300
|
+
@http.put_data = @body = body_or_file
|
301
|
+
@last_method = :put
|
302
|
+
if block_given?
|
303
|
+
@put_proc = callback
|
304
|
+
else#if @http.callback != @post_proc
|
305
|
+
callback = @put_proc
|
306
|
+
end
|
307
|
+
load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
|
308
|
+
end
|
309
|
+
|
310
|
+
def loadHead(*argv, &callback)
|
311
|
+
uri, emulate, headers = argv.get_opts [@path, :if_retry]
|
312
|
+
@http.head = true if emulate != :always
|
313
|
+
@last_method = :head
|
314
|
+
if block_given?
|
315
|
+
@head_proc = callback
|
316
|
+
else#if @http.callback != @head_proc
|
317
|
+
callback = @head_proc
|
318
|
+
end
|
319
|
+
emu = lambda {
|
320
|
+
@headers = ''
|
321
|
+
@http.on_header {|h|
|
322
|
+
@headers << h
|
323
|
+
h == "\r\n" ? 0 : h.size
|
324
|
+
}
|
325
|
+
@http.get = true
|
326
|
+
load(uri, headers) {|c| c.on_header; callback[c]}
|
327
|
+
}
|
328
|
+
if emulate != :always
|
329
|
+
load(uri, headers) {|c|
|
330
|
+
if !@error and c.res.code != 200 and emulate == :if_retry
|
331
|
+
emu.call
|
332
|
+
else
|
333
|
+
callback[c]
|
334
|
+
end
|
335
|
+
}
|
336
|
+
else emu.call
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
end
|
341
|
+
|
342
|
+
end
|