agentx 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/README +31 -0
- data/Rakefile +2 -0
- data/agentx.gemspec +30 -0
- data/bin/agentx +106 -0
- data/config.example +16 -0
- data/lib/agentx.rb +53 -0
- data/lib/agentx/cache.rb +124 -0
- data/lib/agentx/console.rb +31 -0
- data/lib/agentx/history.rb +49 -0
- data/lib/agentx/html.rb +82 -0
- data/lib/agentx/request.rb +288 -0
- data/lib/agentx/response.rb +261 -0
- data/lib/agentx/session.rb +94 -0
- data/lib/agentx/version.rb +5 -0
- data/lib/agentx/xml.rb +65 -0
- metadata +173 -0
data/lib/agentx/html.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
|
2
|
+
module AgentX
|
3
|
+
|
4
|
+
class HTML
|
5
|
+
def initialize(html)
|
6
|
+
if html.kind_of?(String)
|
7
|
+
@html = Nokogiri::HTML(html)
|
8
|
+
else
|
9
|
+
@html = html
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_html
|
14
|
+
@html.to_html
|
15
|
+
end
|
16
|
+
|
17
|
+
def first(selector)
|
18
|
+
(e = @html.css(selector).first) && HTML.new(e)
|
19
|
+
end
|
20
|
+
|
21
|
+
def all(selector)
|
22
|
+
@html.css(selector).map { |e| HTML.new(e) }
|
23
|
+
end
|
24
|
+
|
25
|
+
def parent
|
26
|
+
HTML.new(@html.parent)
|
27
|
+
end
|
28
|
+
|
29
|
+
def children
|
30
|
+
@html.children.map { |e| HTML.new(e) }
|
31
|
+
end
|
32
|
+
|
33
|
+
def next
|
34
|
+
HTML.new(@html.next)
|
35
|
+
end
|
36
|
+
|
37
|
+
def previous
|
38
|
+
HTML.new(@html.previous)
|
39
|
+
end
|
40
|
+
|
41
|
+
def attributes
|
42
|
+
h = {}
|
43
|
+
@html.attribute_nodes.each { |n| h[n.name] = n.value }
|
44
|
+
h
|
45
|
+
end
|
46
|
+
|
47
|
+
def [](attr)
|
48
|
+
@html[attr.to_s]
|
49
|
+
end
|
50
|
+
|
51
|
+
def form_to_hash(opts={})
|
52
|
+
h = {}
|
53
|
+
all('input').each do |input|
|
54
|
+
h[input['name']] = input['value']
|
55
|
+
end
|
56
|
+
opts.each do |k,v|
|
57
|
+
h[k.to_s] = v
|
58
|
+
end
|
59
|
+
h
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_s
|
63
|
+
to_html
|
64
|
+
end
|
65
|
+
|
66
|
+
def inspect
|
67
|
+
to_html
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_nokogiri
|
71
|
+
@html
|
72
|
+
end
|
73
|
+
|
74
|
+
NBSP = Nokogiri::HTML(' ').text
|
75
|
+
|
76
|
+
def text
|
77
|
+
@html.text.gsub(NBSP, ' ')
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
@@ -0,0 +1,288 @@
|
|
1
|
+
|
2
|
+
module AgentX
|
3
|
+
|
4
|
+
class Request
|
5
|
+
attr_reader :url, :method
|
6
|
+
|
7
|
+
def initialize(session, url, params={})
|
8
|
+
@session = session
|
9
|
+
@url, @params = url, params
|
10
|
+
@headers = session.headers
|
11
|
+
@body = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def headers(headers={})
|
15
|
+
if @method
|
16
|
+
@headers
|
17
|
+
else
|
18
|
+
headers.each { |k,v| set_header(k, v) }
|
19
|
+
self
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def body(body=nil)
|
24
|
+
if @method
|
25
|
+
@body
|
26
|
+
else
|
27
|
+
@body.merge!(body) if body
|
28
|
+
self
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def params(params=nil)
|
33
|
+
if @method
|
34
|
+
@params
|
35
|
+
else
|
36
|
+
@params.merge!(params) if params
|
37
|
+
self
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def head(params={})
|
42
|
+
params(params)
|
43
|
+
http(:head)
|
44
|
+
end
|
45
|
+
|
46
|
+
def get(params={})
|
47
|
+
params(params)
|
48
|
+
http(:get)
|
49
|
+
end
|
50
|
+
|
51
|
+
def post(body={})
|
52
|
+
body(body)
|
53
|
+
http(:post)
|
54
|
+
end
|
55
|
+
|
56
|
+
def put(body={})
|
57
|
+
body(body)
|
58
|
+
http(:put)
|
59
|
+
end
|
60
|
+
|
61
|
+
def cacheable?
|
62
|
+
method == 'GET' || method == 'HEAD'
|
63
|
+
end
|
64
|
+
|
65
|
+
# Force: parse response as json (normally type is determined by correctly
|
66
|
+
# set headers.
|
67
|
+
|
68
|
+
def json
|
69
|
+
@parse_type = :json
|
70
|
+
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
def json?
|
75
|
+
@parse_type == :json
|
76
|
+
end
|
77
|
+
|
78
|
+
def html
|
79
|
+
@parse_type = :html
|
80
|
+
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
def html?
|
85
|
+
@parse_type == :html
|
86
|
+
end
|
87
|
+
|
88
|
+
def xml
|
89
|
+
@parse_type = :xml
|
90
|
+
|
91
|
+
self
|
92
|
+
end
|
93
|
+
|
94
|
+
def xml?
|
95
|
+
@parse_type == :xml
|
96
|
+
end
|
97
|
+
|
98
|
+
def inspect
|
99
|
+
if @method
|
100
|
+
"(Request #{@method.to_s.upcase} #{url})"
|
101
|
+
else
|
102
|
+
"(Request #{url})"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def host
|
107
|
+
uri.host
|
108
|
+
end
|
109
|
+
|
110
|
+
def scheme
|
111
|
+
uri.scheme
|
112
|
+
end
|
113
|
+
|
114
|
+
def port
|
115
|
+
uri.port
|
116
|
+
end
|
117
|
+
|
118
|
+
def base_url
|
119
|
+
return @session.base_url if @session.base_url
|
120
|
+
|
121
|
+
if uri.port != AgentX::Session::DEFAULT_PORT[uri.scheme]
|
122
|
+
"#{uri.scheme}://#{uri.host}:#{uri.port}"
|
123
|
+
else
|
124
|
+
"#{uri.scheme}://#{uri.host}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def full_url
|
129
|
+
@full_url ||= case
|
130
|
+
when url.start_with?('/')
|
131
|
+
"#{@session.base_url}#{url}"
|
132
|
+
when url.start_with?('http://', 'https://')
|
133
|
+
url
|
134
|
+
else "#{@session.relative_base_url}#{url}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def cache_key
|
139
|
+
k = [full_url, method, body, params]
|
140
|
+
Digest::MD5.hexdigest(Oj.dump(k, mode: :compat))
|
141
|
+
end
|
142
|
+
|
143
|
+
def timings
|
144
|
+
ts = {}
|
145
|
+
(@times || {}).each do |k,v|
|
146
|
+
ts[k] = "#{'%.2f' % (v * 1000)}ms"
|
147
|
+
end
|
148
|
+
ts
|
149
|
+
end
|
150
|
+
|
151
|
+
def request_time
|
152
|
+
timings[:http_request]
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
HEADER_MAP = {
|
158
|
+
accept: 'Accept',
|
159
|
+
accept_charset: 'Accept-Charset',
|
160
|
+
accept_encoding: 'Accept-Encoding',
|
161
|
+
accept_language: 'Accept-Language',
|
162
|
+
accept_datetime: 'Accept-Datetime',
|
163
|
+
authorization: 'Authorization',
|
164
|
+
cache_control: 'Cache-Control',
|
165
|
+
connection: 'Connection',
|
166
|
+
cookie: 'Cookie',
|
167
|
+
content_length: 'Content-Length',
|
168
|
+
date: 'Date',
|
169
|
+
expect: 'Expect',
|
170
|
+
from: 'From',
|
171
|
+
host: 'Host',
|
172
|
+
if_match: 'If-Match',
|
173
|
+
if_modified_since: 'If-Modified-Since',
|
174
|
+
if_none_match: 'If-None-Match',
|
175
|
+
if_range: 'If-Range',
|
176
|
+
if_unmodified_since: 'If-Unmodified-Since',
|
177
|
+
max_forwards: 'Max-Forwards',
|
178
|
+
origin: 'Origin',
|
179
|
+
pragma: 'Pragma',
|
180
|
+
proxy_authorization: 'Proxy-Authorization',
|
181
|
+
range: 'Range',
|
182
|
+
referer: 'Referer',
|
183
|
+
referrer: 'Referer',
|
184
|
+
te: 'TE',
|
185
|
+
user_agent: 'User-Agent',
|
186
|
+
upgrade: 'Upgrade',
|
187
|
+
via: 'Via',
|
188
|
+
warning: 'Warning'
|
189
|
+
}
|
190
|
+
|
191
|
+
def set_header(k, v)
|
192
|
+
@headers[HEADER_MAP[k] || k.to_s] = v
|
193
|
+
end
|
194
|
+
|
195
|
+
def http(*args)
|
196
|
+
r = nil
|
197
|
+
time(:http_request) do
|
198
|
+
r = untimed_http(*args)
|
199
|
+
end
|
200
|
+
AgentX.logger.info([
|
201
|
+
request_time,
|
202
|
+
@session.history.last.response.code,
|
203
|
+
method,
|
204
|
+
full_url].join(' '))
|
205
|
+
r
|
206
|
+
end
|
207
|
+
|
208
|
+
def untimed_http(verb, params=@params, body=@body, headers=@headers)
|
209
|
+
@method = verb.to_s.upcase
|
210
|
+
|
211
|
+
if cookies = HTTP::Cookie.cookie_value(@session.jar.cookies(full_url))
|
212
|
+
set_header(:cookie, cookies)
|
213
|
+
end
|
214
|
+
|
215
|
+
response = nil
|
216
|
+
|
217
|
+
if cacheable? && (response = Cache.read(self))
|
218
|
+
if response.fresh?
|
219
|
+
AgentX.logger.debug("cache fresh")
|
220
|
+
else
|
221
|
+
AgentX.logger.debug("cache validate")
|
222
|
+
response = validate(response)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
unless response
|
227
|
+
AgentX.logger.debug("cache miss")
|
228
|
+
response = response_from_easy
|
229
|
+
end
|
230
|
+
|
231
|
+
@session.history.add(self, response)
|
232
|
+
response.cookies.each do |cookie|
|
233
|
+
@session.jar.parse(cookie, full_url)
|
234
|
+
end
|
235
|
+
|
236
|
+
if response.headers.location
|
237
|
+
@session[response.headers.location].get
|
238
|
+
else
|
239
|
+
response.parse(@parse_type)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def validate(response)
|
244
|
+
if response.headers.last_modified
|
245
|
+
set_header(:if_modified_since, response.headers.last_modified)
|
246
|
+
end
|
247
|
+
|
248
|
+
if response.headers.etag
|
249
|
+
set_header(:if_none_match, response.headers.etag)
|
250
|
+
end
|
251
|
+
|
252
|
+
response_from_easy(response)
|
253
|
+
end
|
254
|
+
|
255
|
+
def response_from_easy(response=nil)
|
256
|
+
easy = Ethon::Easy.new
|
257
|
+
|
258
|
+
AgentX.logger.info("easy: #{method} #{full_url}")
|
259
|
+
|
260
|
+
easy.http_request(full_url, method,
|
261
|
+
params: params, body: body, headers: headers)
|
262
|
+
|
263
|
+
unless easy.perform == :ok
|
264
|
+
raise "Error: #{easy.return_code}"
|
265
|
+
end
|
266
|
+
|
267
|
+
r = Response.from_easy(easy, response)
|
268
|
+
|
269
|
+
Cache.write(self, r) if cacheable? && r.cacheable?
|
270
|
+
|
271
|
+
r
|
272
|
+
end
|
273
|
+
|
274
|
+
def uri
|
275
|
+
@uri ||= URI(full_url)
|
276
|
+
end
|
277
|
+
|
278
|
+
def time(name)
|
279
|
+
@times ||= {}
|
280
|
+
start = Time.now
|
281
|
+
r = yield
|
282
|
+
@times[name] = Time.now - start
|
283
|
+
r
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
@@ -0,0 +1,261 @@
|
|
1
|
+
|
2
|
+
module AgentX
|
3
|
+
|
4
|
+
class Response
|
5
|
+
attr_reader :code, :body, :headers
|
6
|
+
|
7
|
+
def initialize(code, body, headers)
|
8
|
+
@code, @body, @headers = code, body, Headers.parse(headers)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.from_easy(easy, response=nil)
|
12
|
+
headers = Headers.parse(easy.response_headers)
|
13
|
+
|
14
|
+
r = new(easy.response_code, easy.response_body, headers)
|
15
|
+
|
16
|
+
if response && r.not_modified?
|
17
|
+
r = new(response.code, response.body, response.headers.merge(r.headers))
|
18
|
+
end
|
19
|
+
|
20
|
+
r
|
21
|
+
end
|
22
|
+
|
23
|
+
def cookies
|
24
|
+
Array(headers.set_cookie || [])
|
25
|
+
end
|
26
|
+
|
27
|
+
def ok?
|
28
|
+
code == 200
|
29
|
+
end
|
30
|
+
|
31
|
+
def not_modified?
|
32
|
+
code == 304
|
33
|
+
end
|
34
|
+
|
35
|
+
def fresh?
|
36
|
+
headers.ttl && headers.ttl > 0
|
37
|
+
end
|
38
|
+
|
39
|
+
def expires_at
|
40
|
+
headers.ttl ? Time.now + headers.ttl : Time.at(0)
|
41
|
+
end
|
42
|
+
|
43
|
+
CACHEABLE_CODES = [200, 203, 300, 301, 302, 404, 410]
|
44
|
+
|
45
|
+
def cacheable?
|
46
|
+
return false if headers.cache_control && headers.cache_control.no_store?
|
47
|
+
return false unless CACHEABLE_CODES.include?(code)
|
48
|
+
|
49
|
+
!! (headers.etag || headers.last_modified || fresh?)
|
50
|
+
end
|
51
|
+
|
52
|
+
def inspect
|
53
|
+
"(Response #{code})"
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_hash
|
57
|
+
{ 'code' => code,
|
58
|
+
'body' => body,
|
59
|
+
'headers' => headers.to_hash }
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.from_hash(h)
|
63
|
+
new(h['code'], h['body'], h['headers'])
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse(type=nil)
|
67
|
+
case
|
68
|
+
when type == :json || headers.json? then Oj.load(body)
|
69
|
+
when type == :html || headers.html? then HTML.new(body)
|
70
|
+
when type == :xml || headers.xml? then XML.new(body)
|
71
|
+
else body
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class Headers
|
76
|
+
include Enumerable
|
77
|
+
|
78
|
+
def initialize(hash={})
|
79
|
+
@hash = hash
|
80
|
+
@hash['Date'] ||= Time.now.httpdate
|
81
|
+
@normalized = {}
|
82
|
+
@hash.each do |k,v|
|
83
|
+
@normalized[k.to_s.downcase] = v
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.parse(str)
|
88
|
+
return new(str) if str.kind_of?(Hash)
|
89
|
+
return str if str.kind_of?(Headers)
|
90
|
+
|
91
|
+
hash = {}
|
92
|
+
|
93
|
+
str.lines.each do |line|
|
94
|
+
next if line =~ /^HTTP\/\d/
|
95
|
+
k, v = line.split(':', 2).map { |s| s.strip }
|
96
|
+
|
97
|
+
if k && v
|
98
|
+
if hash[k]
|
99
|
+
hash[k] = Array(hash[k])
|
100
|
+
hash[k] << v
|
101
|
+
else
|
102
|
+
hash[k] = v
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
new(hash)
|
108
|
+
end
|
109
|
+
|
110
|
+
def merge(headers)
|
111
|
+
Headers.new(to_hash.merge(headers.to_hash))
|
112
|
+
end
|
113
|
+
|
114
|
+
def inspect
|
115
|
+
"(Headers #{@normalized})"
|
116
|
+
end
|
117
|
+
|
118
|
+
def each(&block)
|
119
|
+
@normalized.each(&block)
|
120
|
+
end
|
121
|
+
|
122
|
+
def [](k)
|
123
|
+
@hash[k] || @normalized[k.to_s.downcase]
|
124
|
+
end
|
125
|
+
|
126
|
+
def server
|
127
|
+
@normalized['server']
|
128
|
+
end
|
129
|
+
|
130
|
+
def date
|
131
|
+
Time.parse(@normalized['date'])
|
132
|
+
end
|
133
|
+
|
134
|
+
def age
|
135
|
+
(@normalized['age'] || (Time.now - date)).to_i
|
136
|
+
end
|
137
|
+
|
138
|
+
def expires
|
139
|
+
if d = @normalized['expires']
|
140
|
+
Time.parse(d)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def max_age
|
145
|
+
(cache_control &&
|
146
|
+
(cache_control.shared_max_age || cache_control.max_age)) ||
|
147
|
+
(expires && (expires - Time.now))
|
148
|
+
end
|
149
|
+
|
150
|
+
def ttl
|
151
|
+
max_age && (max_age - age)
|
152
|
+
end
|
153
|
+
|
154
|
+
def last_modified
|
155
|
+
@normalized['last-modified']
|
156
|
+
end
|
157
|
+
|
158
|
+
def etag
|
159
|
+
@normalized['etag']
|
160
|
+
end
|
161
|
+
|
162
|
+
def content_type
|
163
|
+
@normalized['content-type']
|
164
|
+
end
|
165
|
+
|
166
|
+
def json?
|
167
|
+
content_type.to_s.downcase['json']
|
168
|
+
end
|
169
|
+
|
170
|
+
def html?
|
171
|
+
content_type.to_s.downcase['html']
|
172
|
+
end
|
173
|
+
|
174
|
+
def xml?
|
175
|
+
content_type.to_s.downcase['xml']
|
176
|
+
end
|
177
|
+
|
178
|
+
def content_length
|
179
|
+
if (length = @normalized['content-length']) && length =~ /^\d+$/
|
180
|
+
length.to_i
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def cache_control
|
185
|
+
if @normalized['cache-control']
|
186
|
+
@cache_control ||= CacheControl.parse(@normalized['cache-control'])
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def set_cookie
|
191
|
+
@normalized['set-cookie']
|
192
|
+
end
|
193
|
+
|
194
|
+
def location
|
195
|
+
@normalized['location']
|
196
|
+
end
|
197
|
+
|
198
|
+
def to_hash
|
199
|
+
@hash
|
200
|
+
end
|
201
|
+
|
202
|
+
class CacheControl
|
203
|
+
attr_reader :directives
|
204
|
+
|
205
|
+
def initialize(directives)
|
206
|
+
@directives = directives
|
207
|
+
end
|
208
|
+
|
209
|
+
def public?
|
210
|
+
@directives['public']
|
211
|
+
end
|
212
|
+
|
213
|
+
def private?
|
214
|
+
@directives['private']
|
215
|
+
end
|
216
|
+
|
217
|
+
def no_store?
|
218
|
+
@directives['no-store']
|
219
|
+
end
|
220
|
+
|
221
|
+
def no_cache?
|
222
|
+
@directives['no-cache']
|
223
|
+
end
|
224
|
+
|
225
|
+
def must_revalidate?
|
226
|
+
@directives['must-revalidate']
|
227
|
+
end
|
228
|
+
|
229
|
+
def max_age
|
230
|
+
@directives['max-age'] && @directives['max-age'].to_i
|
231
|
+
end
|
232
|
+
|
233
|
+
def shared_max_age
|
234
|
+
@directives['s-max-age'] && @directives['s-max-age'].to_i
|
235
|
+
end
|
236
|
+
|
237
|
+
def to_s
|
238
|
+
directives.map { |k,v| v == true ? k : "#{k}=#{v}" }.join(', ')
|
239
|
+
end
|
240
|
+
|
241
|
+
def inspect
|
242
|
+
"(CacheControl #{directives})"
|
243
|
+
end
|
244
|
+
|
245
|
+
def self.parse(s)
|
246
|
+
h = {}
|
247
|
+
|
248
|
+
Array(s).join(',').gsub(' ', '').split(',').each do |p|
|
249
|
+
k, v = p.split('=')
|
250
|
+
|
251
|
+
h[k.downcase] = (v || true)
|
252
|
+
end
|
253
|
+
|
254
|
+
new(h)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
end
|
261
|
+
|