rhack 0.4.1 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +22 -0
- data/Gemfile +2 -5
- data/LICENSE +19 -15
- data/README.md +66 -26
- data/Rakefile +42 -31
- data/config/cacert.pem +3895 -0
- data/config/rhack.yml.template +40 -0
- data/ext/curb-original/curb_config.h +3 -0
- data/ext/curb-original/curb_easy.c +3 -54
- data/ext/curb-original/curb_multi.c +69 -140
- data/ext/curb/curb_multi.c +1 -1
- data/lib/rhack.rb +82 -12
- data/lib/rhack/cookie.rb +49 -0
- data/lib/rhack/curl.rb +6 -0
- data/lib/{extensions/curb.rb → rhack/curl/easy.rb} +26 -48
- data/lib/rhack/curl/global.rb +175 -0
- data/lib/rhack/curl/itt.rb +11 -0
- data/lib/rhack/curl/multi.rb +37 -0
- data/lib/rhack/curl/post_field.rb +20 -0
- data/lib/rhack/curl/response.rb +91 -0
- data/lib/rhack/dl.rb +308 -0
- data/lib/rhack/frame.rb +316 -0
- data/lib/{extensions → rhack/js}/browser/env.js +0 -0
- data/lib/{extensions → rhack/js}/browser/jquery.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlsax.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_1.js +0 -0
- data/lib/{extensions → rhack/js}/browser/xmlw3cdom_2.js +0 -0
- data/lib/rhack/js/johnson.rb +71 -0
- data/lib/rhack/page.rb +263 -0
- data/lib/rhack/proxy.rb +3 -0
- data/lib/rhack/proxy/checker.rb +1 -1
- data/lib/rhack/scout.rb +342 -0
- data/lib/rhack/scout_squad.rb +98 -0
- data/lib/rhack/services.rb +1 -464
- data/lib/rhack/services/base.rb +59 -0
- data/lib/rhack/services/examples.rb +423 -0
- data/lib/rhack/version.rb +3 -0
- data/lib/rhack_in.rb +3 -2
- data/rhack.gemspec +28 -0
- metadata +104 -85
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -23
- data/Manifest.txt +0 -60
- data/ext/curb/Makefile +0 -217
- data/lib/cache.rb +0 -44
- data/lib/curl-global.rb +0 -164
- data/lib/extensions/declarative.rb +0 -153
- data/lib/extensions/johnson.rb +0 -63
- data/lib/frame.rb +0 -848
- data/lib/init.rb +0 -49
- data/lib/rhack.yml.template +0 -19
- data/lib/scout.rb +0 -589
- data/lib/words.rb +0 -25
data/lib/rhack/cookie.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module RHACK
|
3
|
+
|
4
|
+
class Cookie
|
5
|
+
__init__
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
if args[1].is Scout
|
9
|
+
str, scout = *args
|
10
|
+
ck = str//;\s*/
|
11
|
+
ck[1..-1].each {|par|
|
12
|
+
a = par/'='
|
13
|
+
case a[0].downcase
|
14
|
+
when 'path'; @path = (a[1] == '/') ? // : /^#{Regexp.escape a[1]}/
|
15
|
+
when 'domain'; @domain = /(^|\.)#{Regexp.escape a[1].sub(/^./, '')}$/
|
16
|
+
when 'expires'; @expires = a[1].to_time
|
17
|
+
end
|
18
|
+
}
|
19
|
+
@name, @value = ck[0].split('=', 2)
|
20
|
+
#@value.gsub!(/^['"]|['"]$/, '')
|
21
|
+
#L.debug args if !@domain
|
22
|
+
(scout.cookies[scout.uri.host] ||= {})[@name] = self
|
23
|
+
else
|
24
|
+
@name, cookie = args[0]
|
25
|
+
case cookie
|
26
|
+
when Array; @value, @path, @domain = cookie
|
27
|
+
when Hash; @value, @path, @domain = cookie.value, cookie.path, cookie.domain
|
28
|
+
else @value = args[1].to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
@path ||= //
|
32
|
+
@domain ||= //
|
33
|
+
@string = "#{@name}=#{@value}; "
|
34
|
+
end
|
35
|
+
|
36
|
+
def use(str, uri)
|
37
|
+
if !@expires or @expires > Time.now
|
38
|
+
str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
|
39
|
+
else
|
40
|
+
:expired
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s; @value end
|
45
|
+
def inspect; @value.inspect end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/lib/rhack/curl.rb
ADDED
@@ -44,18 +44,22 @@ module Curl
|
|
44
44
|
set :interface, value
|
45
45
|
end
|
46
46
|
|
47
|
+
# <host>:<port>
|
47
48
|
def url=(u)
|
48
49
|
set :url, u
|
49
50
|
end
|
50
51
|
|
52
|
+
# <host>:<port>
|
51
53
|
def proxy_url=(url)
|
52
54
|
set :proxy, url
|
53
55
|
end
|
54
56
|
|
57
|
+
# <username>:<password>
|
55
58
|
def userpwd=(value)
|
56
59
|
set :userpwd, value
|
57
60
|
end
|
58
61
|
|
62
|
+
# <username>:<password>
|
59
63
|
def proxypwd=(value)
|
60
64
|
set :proxyuserpwd, value
|
61
65
|
end
|
@@ -65,59 +69,33 @@ module Curl
|
|
65
69
|
end
|
66
70
|
|
67
71
|
def head=(onoff)
|
68
|
-
|
72
|
+
if onoff
|
73
|
+
set :httpget, false
|
74
|
+
set :customrequest, nil
|
75
|
+
set :nobody, true
|
76
|
+
else
|
77
|
+
set :nobody, false
|
78
|
+
end
|
69
79
|
end
|
70
80
|
|
71
81
|
def get=(onoff)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
def to_s
|
80
|
-
raise "Cannot convert unnamed field to string" if !name
|
81
|
-
display_content = if (cp = content_proc)
|
82
|
-
cp.inspect
|
83
|
-
elsif (c = content)
|
84
|
-
"#{c[0...20].inspect}#{"… (#{c.size.bytes})" if c.size > 20}"
|
85
|
-
elsif (ln = local_name)
|
86
|
-
File.new(ln).inspect
|
87
|
-
end
|
88
|
-
"#{name}=#{display_content}"
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
class Multi
|
94
|
-
if method_defined? :requests
|
95
|
-
alias :reqs :requests
|
96
|
-
end
|
97
|
-
|
98
|
-
def reset
|
99
|
-
reqs.each {|k| remove k rescue()}
|
100
|
-
$Carier = Multi.new
|
101
|
-
$Carier.pipeline = true
|
102
|
-
# GC.start
|
103
|
-
end
|
104
|
-
|
105
|
-
def drop
|
106
|
-
while running > 0 do perform rescue() end
|
107
|
-
Curl.recall
|
108
|
-
end
|
109
|
-
|
110
|
-
def drop!
|
111
|
-
drop
|
112
|
-
reset if reqs.size + running > 0
|
113
|
-
end
|
114
|
-
|
115
|
-
def sheduled
|
116
|
-
0 < running and running <= reqs.size
|
82
|
+
if onoff
|
83
|
+
set :nobody, false
|
84
|
+
set :customrequest, nil
|
85
|
+
set :httpget, true
|
86
|
+
else
|
87
|
+
set :httpget, false
|
88
|
+
end
|
117
89
|
end
|
118
90
|
|
119
|
-
def
|
120
|
-
|
91
|
+
def delete=(onoff)
|
92
|
+
if onoff
|
93
|
+
set :nobody, false
|
94
|
+
set :httpget, false
|
95
|
+
set :customrequest, 'DELETE'
|
96
|
+
else
|
97
|
+
set :customrequest, nil
|
98
|
+
end
|
121
99
|
end
|
122
100
|
|
123
101
|
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Curl
|
3
|
+
class << Curl
|
4
|
+
|
5
|
+
def execute(unless_allready=false)
|
6
|
+
#if unless_allready and status
|
7
|
+
# return L.log "Carier allready executing"
|
8
|
+
#end
|
9
|
+
if @@carier_thread and s = @@carier_thread.status
|
10
|
+
L.log "Carier Thread allready started and has status #{s}"
|
11
|
+
else
|
12
|
+
if s = status(false) then L.warn s end
|
13
|
+
L.log(@@carier_thread ? "Resetting Carier thread" : "Setting Carier thread up")
|
14
|
+
@@carier_thread = thread {
|
15
|
+
error = nil
|
16
|
+
begin
|
17
|
+
yield if block_given?
|
18
|
+
rescue => error
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
loop {
|
22
|
+
begin
|
23
|
+
# with true argument (idle) it would break only if there is no requests to perform
|
24
|
+
# and still carier thread is joined
|
25
|
+
break unless @@carier.perform true
|
26
|
+
L.log "All requests have been performed; idling..."
|
27
|
+
rescue => error
|
28
|
+
L.log "Catched #{error.class.name} in Carier Thread"
|
29
|
+
break
|
30
|
+
# but ruby mystically crashes if next sequence occur:
|
31
|
+
# Multi performs and can't see any requests so entering idle mode
|
32
|
+
# we add some requests and multi load them
|
33
|
+
# one of requests' callbacks raises error in *main* thread
|
34
|
+
# so we can't allow any raises here, instead, keep them in 'wait' section
|
35
|
+
end
|
36
|
+
} unless error
|
37
|
+
L.log "Nothing to perform; recalling..."
|
38
|
+
error
|
39
|
+
}
|
40
|
+
# until main thread has sleep a bit, $CarierThread will have status "run",
|
41
|
+
# no matter whether it's idling or performing requests
|
42
|
+
sleep 0.001
|
43
|
+
end
|
44
|
+
end
|
45
|
+
alias :run :execute
|
46
|
+
|
47
|
+
def wait
|
48
|
+
if @@carier_thread and @@carier_thread.status
|
49
|
+
unless within = Thread.current == @@carier_thread
|
50
|
+
# We can't set `perform' timeout lesser than 1 second in the curl binding
|
51
|
+
# because in that case thread status would always be "run"
|
52
|
+
# so here we wait for exactly 1 sec
|
53
|
+
sleep 1
|
54
|
+
end
|
55
|
+
# Also, if thread do Kernel.sleep, it would skip Curl.wait here
|
56
|
+
if !@@carier.sheduled and (@@carier_thread.status == 'sleep' or within && @@carier.reqs.empty?)
|
57
|
+
L.log "No shedule to wait"
|
58
|
+
else
|
59
|
+
this_thread = within ? 'it\'s thread' : Thread.main == Thread.current ? 'main thread' : 'thread '+Thread.current.object_id
|
60
|
+
L.log "Waiting for Carier to complete in #{this_thread}"
|
61
|
+
begin
|
62
|
+
L.log { "Trying to change Curl.joined #@@joined -> true from #{this_thread}" }
|
63
|
+
if within
|
64
|
+
L.log "calling this from one of callbacks to wait for the rest to complete"
|
65
|
+
begin
|
66
|
+
@@carier.perform
|
67
|
+
rescue RuntimeError => e
|
68
|
+
L.warn [e, e.message]
|
69
|
+
L.info "@@carier @@carier.sheduled @@carier_thread @@carier_thread.status", binding
|
70
|
+
L.warn "Failed to run Multi#perform: nothing to perform"
|
71
|
+
end
|
72
|
+
else
|
73
|
+
@@joined = true
|
74
|
+
@@carier_thread.join
|
75
|
+
end
|
76
|
+
rescue (defined?(IRB) ? IRB::Abort : NilClass)
|
77
|
+
recall!
|
78
|
+
L.info "Carier Thread recalled by a keyboard"
|
79
|
+
ensure
|
80
|
+
L.log "trying to change Curl.joined #@@joined -> false from #{this_thread}"
|
81
|
+
if !within
|
82
|
+
@@joined = false
|
83
|
+
# using Curl#execute from different threads may cause problems here when you don't control input,
|
84
|
+
# for example, in a daemonized ruby process
|
85
|
+
# just do not get $CarierThread joined from non-main thread
|
86
|
+
if @@carier_thread and e = @@carier_thread.value
|
87
|
+
# this will raise thread-safely in main thread
|
88
|
+
# in case of unrescued error in CarierThread
|
89
|
+
L.log(([e.message]+RMTools.format_trace(e.backtrace))*"\n")
|
90
|
+
recall!
|
91
|
+
raise e
|
92
|
+
end
|
93
|
+
execute
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
else
|
98
|
+
L < "No thread to wait. I guess I should create one"
|
99
|
+
execute
|
100
|
+
wait
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def recall
|
105
|
+
L.debug caller
|
106
|
+
if @@carier_thread
|
107
|
+
L.log "Recalling Carier thread"
|
108
|
+
@@carier_thread.kill
|
109
|
+
sleep 1
|
110
|
+
else
|
111
|
+
L.log "No thread to recall"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
alias :stop :recall
|
115
|
+
|
116
|
+
def recall!
|
117
|
+
if @@carier_thread
|
118
|
+
L.warn "Recalling thread and resetting Carier!!!"
|
119
|
+
@@carier_thread.kill
|
120
|
+
@@carier_thread = nil
|
121
|
+
reset_carier!
|
122
|
+
else
|
123
|
+
L.log "No thread to recall!"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
alias :stop! :recall!
|
127
|
+
|
128
|
+
def reset_carier!
|
129
|
+
@@carier.clear!
|
130
|
+
@@carier = Multi.new
|
131
|
+
carier.pipeline = true
|
132
|
+
#GC.start
|
133
|
+
end
|
134
|
+
|
135
|
+
def reset
|
136
|
+
recall
|
137
|
+
execute
|
138
|
+
end
|
139
|
+
alias :reload :reset
|
140
|
+
|
141
|
+
def reset!
|
142
|
+
recall!
|
143
|
+
execute
|
144
|
+
end
|
145
|
+
alias :reload! :reset!
|
146
|
+
|
147
|
+
def status(raise_error=true)
|
148
|
+
if @@carier_thread and (s = @@carier_thread.status)
|
149
|
+
L.log "Carier Thread responding with status #{s}"
|
150
|
+
s
|
151
|
+
elsif @@carier_thread
|
152
|
+
begin
|
153
|
+
# status = nil
|
154
|
+
error = @@carier_thread.value
|
155
|
+
rescue => error
|
156
|
+
L.warn "Carier Thread has raised an exception"
|
157
|
+
if raise_error
|
158
|
+
recall!
|
159
|
+
raise error
|
160
|
+
else
|
161
|
+
L.log "Carier Thread has catched #{error.inspect}"
|
162
|
+
error
|
163
|
+
end
|
164
|
+
else
|
165
|
+
# status = false
|
166
|
+
L.log "Carier Thread has exited without an exception"
|
167
|
+
end
|
168
|
+
else
|
169
|
+
L.log "There is no Carier Thread atm"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
alias :st :status
|
173
|
+
|
174
|
+
end\
|
175
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Curl
|
3
|
+
|
4
|
+
class Multi
|
5
|
+
if method_defined? :requests
|
6
|
+
alias :reqs :requests
|
7
|
+
end
|
8
|
+
|
9
|
+
def sheduled
|
10
|
+
0 < running and running <= reqs.size
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
rsize = reqs.size
|
15
|
+
"<#Carier #{rsize} #{rsize == 1 ? 'unit' : 'units'}, #{running} executing>"
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# Used for Curl.reset_carier!
|
20
|
+
def clear!
|
21
|
+
reqs.each {|k| remove k rescue()}
|
22
|
+
end
|
23
|
+
|
24
|
+
# Emergency debug methods, not used inside a framework
|
25
|
+
def drop
|
26
|
+
while running > 0 do perform rescue() end
|
27
|
+
Curl.recall
|
28
|
+
end
|
29
|
+
|
30
|
+
def drop!
|
31
|
+
drop
|
32
|
+
Curl.reset_carier! if reqs.size + running > 0
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Curl
|
3
|
+
|
4
|
+
class PostField
|
5
|
+
|
6
|
+
def to_s
|
7
|
+
raise "Cannot convert unnamed field to string" if !name
|
8
|
+
display_content = if (cp = content_proc)
|
9
|
+
cp.inspect
|
10
|
+
elsif (c = content)
|
11
|
+
"#{c[0...20].inspect}#{"… (#{c.size.bytes})" if c.size > 20}"
|
12
|
+
elsif (ln = local_name)
|
13
|
+
File.new(ln).inspect
|
14
|
+
end
|
15
|
+
"#{name}=#{display_content}"
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Curl
|
3
|
+
|
4
|
+
class Response
|
5
|
+
__init__
|
6
|
+
attr_reader :header, :code, :body, :hash, :timestamp, :time, :req, :date, :error
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
str = '<#'
|
10
|
+
if @error
|
11
|
+
str << "#{@error[0].self_name}: #{@error[1]}"
|
12
|
+
else
|
13
|
+
str << (@header[/\d{3}/] == @code.to_s ? @header : "#{@header[/\S+/]} #{@code}") if @header
|
14
|
+
if @hash.location
|
15
|
+
str << ' '+@req.url if $panic
|
16
|
+
str << ' -> '+@hash.location
|
17
|
+
end
|
18
|
+
str << " (#{@body ? @body.size.bytes : 'No'} Body)"
|
19
|
+
str << " [#{@timestamp}]" if @timestamp
|
20
|
+
end
|
21
|
+
str << '>'
|
22
|
+
end
|
23
|
+
alias :inspect :to_s
|
24
|
+
|
25
|
+
def initialize(easy)
|
26
|
+
@hash = {}
|
27
|
+
@timestamp = @date = @header = nil
|
28
|
+
if easy.base.error
|
29
|
+
@error = easy.base.error
|
30
|
+
else
|
31
|
+
if headers = easy.header_str || easy.base.headers
|
32
|
+
headers /= "\r\n"
|
33
|
+
@header = headers.shift
|
34
|
+
headers.each {|h|
|
35
|
+
h /= ': '
|
36
|
+
if h[0]
|
37
|
+
h[0].downcase!
|
38
|
+
if h[0] == 'set-cookie'
|
39
|
+
(@hash.cookies ||= []) << h[1]
|
40
|
+
else
|
41
|
+
@hash[h[0]] = h[1]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
}
|
45
|
+
@timestamp = if @hash.date
|
46
|
+
begin
|
47
|
+
@date = @hash.date.to_time
|
48
|
+
rescue => e
|
49
|
+
(@date = Time.now).strftime("%H:%M:%S")
|
50
|
+
L < "Error #{e.class}:#{e.message} with @hash.date = #{@hash.date.inspect}"
|
51
|
+
end
|
52
|
+
@hash.date[/\d\d:\d\d:\d\d/]
|
53
|
+
else
|
54
|
+
(@date = Time.now).strftime("%H:%M:%S")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
@code = easy.response_code
|
58
|
+
@body = easy.body_str.dup
|
59
|
+
@time = easy.total_time
|
60
|
+
end
|
61
|
+
|
62
|
+
@req = {}
|
63
|
+
@req.url = easy.last_effective_url
|
64
|
+
@req.headers = easy.headers
|
65
|
+
if range = easy.headers.Range and range[/(\d+)-(\d+)/]
|
66
|
+
@req.range = $1.to_i .. $2.to_i
|
67
|
+
end
|
68
|
+
if easy.base and @req.meth = easy.base.last_method and @req.meth.in [:post, :put]
|
69
|
+
@req.body = easy.post_body.dup
|
70
|
+
if @req.meth == :post
|
71
|
+
@req.mp = easy.multipart_form_post?
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def is(klass)
|
77
|
+
if @error
|
78
|
+
klass == Array || klass = Curl::Response
|
79
|
+
else
|
80
|
+
klass == Curl::Response
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def [](key_or_index)
|
85
|
+
@error ? @error[key_or_index] : @hash[key_or_index.downcase]
|
86
|
+
end
|
87
|
+
|
88
|
+
alias :headers :hash
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|