agentx 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/README +31 -0
- data/Rakefile +2 -0
- data/agentx.gemspec +30 -0
- data/bin/agentx +106 -0
- data/config.example +16 -0
- data/lib/agentx.rb +53 -0
- data/lib/agentx/cache.rb +124 -0
- data/lib/agentx/console.rb +31 -0
- data/lib/agentx/history.rb +49 -0
- data/lib/agentx/html.rb +82 -0
- data/lib/agentx/request.rb +288 -0
- data/lib/agentx/response.rb +261 -0
- data/lib/agentx/session.rb +94 -0
- data/lib/agentx/version.rb +5 -0
- data/lib/agentx/xml.rb +65 -0
- metadata +173 -0
data/lib/agentx/html.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
|
2
|
+
module AgentX
|
3
|
+
|
4
|
+
class HTML
|
5
|
+
def initialize(html)
|
6
|
+
if html.kind_of?(String)
|
7
|
+
@html = Nokogiri::HTML(html)
|
8
|
+
else
|
9
|
+
@html = html
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_html
|
14
|
+
@html.to_html
|
15
|
+
end
|
16
|
+
|
17
|
+
def first(selector)
|
18
|
+
(e = @html.css(selector).first) && HTML.new(e)
|
19
|
+
end
|
20
|
+
|
21
|
+
def all(selector)
|
22
|
+
@html.css(selector).map { |e| HTML.new(e) }
|
23
|
+
end
|
24
|
+
|
25
|
+
def parent
|
26
|
+
HTML.new(@html.parent)
|
27
|
+
end
|
28
|
+
|
29
|
+
def children
|
30
|
+
@html.children.map { |e| HTML.new(e) }
|
31
|
+
end
|
32
|
+
|
33
|
+
def next
|
34
|
+
HTML.new(@html.next)
|
35
|
+
end
|
36
|
+
|
37
|
+
def previous
|
38
|
+
HTML.new(@html.previous)
|
39
|
+
end
|
40
|
+
|
41
|
+
def attributes
|
42
|
+
h = {}
|
43
|
+
@html.attribute_nodes.each { |n| h[n.name] = n.value }
|
44
|
+
h
|
45
|
+
end
|
46
|
+
|
47
|
+
def [](attr)
|
48
|
+
@html[attr.to_s]
|
49
|
+
end
|
50
|
+
|
51
|
+
def form_to_hash(opts={})
|
52
|
+
h = {}
|
53
|
+
all('input').each do |input|
|
54
|
+
h[input['name']] = input['value']
|
55
|
+
end
|
56
|
+
opts.each do |k,v|
|
57
|
+
h[k.to_s] = v
|
58
|
+
end
|
59
|
+
h
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_s
|
63
|
+
to_html
|
64
|
+
end
|
65
|
+
|
66
|
+
def inspect
|
67
|
+
to_html
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_nokogiri
|
71
|
+
@html
|
72
|
+
end
|
73
|
+
|
74
|
+
NBSP = Nokogiri::HTML(' ').text
|
75
|
+
|
76
|
+
def text
|
77
|
+
@html.text.gsub(NBSP, ' ')
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
@@ -0,0 +1,288 @@
|
|
1
|
+
|
2
|
+
module AgentX
|
3
|
+
|
4
|
+
class Request
|
5
|
+
attr_reader :url, :method
|
6
|
+
|
7
|
+
def initialize(session, url, params={})
|
8
|
+
@session = session
|
9
|
+
@url, @params = url, params
|
10
|
+
@headers = session.headers
|
11
|
+
@body = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def headers(headers={})
|
15
|
+
if @method
|
16
|
+
@headers
|
17
|
+
else
|
18
|
+
headers.each { |k,v| set_header(k, v) }
|
19
|
+
self
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def body(body=nil)
|
24
|
+
if @method
|
25
|
+
@body
|
26
|
+
else
|
27
|
+
@body.merge!(body) if body
|
28
|
+
self
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def params(params=nil)
|
33
|
+
if @method
|
34
|
+
@params
|
35
|
+
else
|
36
|
+
@params.merge!(params) if params
|
37
|
+
self
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def head(params={})
|
42
|
+
params(params)
|
43
|
+
http(:head)
|
44
|
+
end
|
45
|
+
|
46
|
+
def get(params={})
|
47
|
+
params(params)
|
48
|
+
http(:get)
|
49
|
+
end
|
50
|
+
|
51
|
+
def post(body={})
|
52
|
+
body(body)
|
53
|
+
http(:post)
|
54
|
+
end
|
55
|
+
|
56
|
+
def put(body={})
|
57
|
+
body(body)
|
58
|
+
http(:put)
|
59
|
+
end
|
60
|
+
|
61
|
+
def cacheable?
|
62
|
+
method == 'GET' || method == 'HEAD'
|
63
|
+
end
|
64
|
+
|
65
|
+
# Force: parse response as json (normally type is determined by correctly
|
66
|
+
# set headers.
|
67
|
+
|
68
|
+
def json
|
69
|
+
@parse_type = :json
|
70
|
+
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
def json?
|
75
|
+
@parse_type == :json
|
76
|
+
end
|
77
|
+
|
78
|
+
def html
|
79
|
+
@parse_type = :html
|
80
|
+
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
def html?
|
85
|
+
@parse_type == :html
|
86
|
+
end
|
87
|
+
|
88
|
+
def xml
|
89
|
+
@parse_type = :xml
|
90
|
+
|
91
|
+
self
|
92
|
+
end
|
93
|
+
|
94
|
+
def xml?
|
95
|
+
@parse_type == :xml
|
96
|
+
end
|
97
|
+
|
98
|
+
def inspect
|
99
|
+
if @method
|
100
|
+
"(Request #{@method.to_s.upcase} #{url})"
|
101
|
+
else
|
102
|
+
"(Request #{url})"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def host
|
107
|
+
uri.host
|
108
|
+
end
|
109
|
+
|
110
|
+
def scheme
|
111
|
+
uri.scheme
|
112
|
+
end
|
113
|
+
|
114
|
+
def port
|
115
|
+
uri.port
|
116
|
+
end
|
117
|
+
|
118
|
+
def base_url
|
119
|
+
return @session.base_url if @session.base_url
|
120
|
+
|
121
|
+
if uri.port != AgentX::Session::DEFAULT_PORT[uri.scheme]
|
122
|
+
"#{uri.scheme}://#{uri.host}:#{uri.port}"
|
123
|
+
else
|
124
|
+
"#{uri.scheme}://#{uri.host}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def full_url
|
129
|
+
@full_url ||= case
|
130
|
+
when url.start_with?('/')
|
131
|
+
"#{@session.base_url}#{url}"
|
132
|
+
when url.start_with?('http://', 'https://')
|
133
|
+
url
|
134
|
+
else "#{@session.relative_base_url}#{url}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def cache_key
|
139
|
+
k = [full_url, method, body, params]
|
140
|
+
Digest::MD5.hexdigest(Oj.dump(k, mode: :compat))
|
141
|
+
end
|
142
|
+
|
143
|
+
def timings
|
144
|
+
ts = {}
|
145
|
+
(@times || {}).each do |k,v|
|
146
|
+
ts[k] = "#{'%.2f' % (v * 1000)}ms"
|
147
|
+
end
|
148
|
+
ts
|
149
|
+
end
|
150
|
+
|
151
|
+
def request_time
|
152
|
+
timings[:http_request]
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
HEADER_MAP = {
|
158
|
+
accept: 'Accept',
|
159
|
+
accept_charset: 'Accept-Charset',
|
160
|
+
accept_encoding: 'Accept-Encoding',
|
161
|
+
accept_language: 'Accept-Language',
|
162
|
+
accept_datetime: 'Accept-Datetime',
|
163
|
+
authorization: 'Authorization',
|
164
|
+
cache_control: 'Cache-Control',
|
165
|
+
connection: 'Connection',
|
166
|
+
cookie: 'Cookie',
|
167
|
+
content_length: 'Content-Length',
|
168
|
+
date: 'Date',
|
169
|
+
expect: 'Expect',
|
170
|
+
from: 'From',
|
171
|
+
host: 'Host',
|
172
|
+
if_match: 'If-Match',
|
173
|
+
if_modified_since: 'If-Modified-Since',
|
174
|
+
if_none_match: 'If-None-Match',
|
175
|
+
if_range: 'If-Range',
|
176
|
+
if_unmodified_since: 'If-Unmodified-Since',
|
177
|
+
max_forwards: 'Max-Forwards',
|
178
|
+
origin: 'Origin',
|
179
|
+
pragma: 'Pragma',
|
180
|
+
proxy_authorization: 'Proxy-Authorization',
|
181
|
+
range: 'Range',
|
182
|
+
referer: 'Referer',
|
183
|
+
referrer: 'Referer',
|
184
|
+
te: 'TE',
|
185
|
+
user_agent: 'User-Agent',
|
186
|
+
upgrade: 'Upgrade',
|
187
|
+
via: 'Via',
|
188
|
+
warning: 'Warning'
|
189
|
+
}
|
190
|
+
|
191
|
+
def set_header(k, v)
|
192
|
+
@headers[HEADER_MAP[k] || k.to_s] = v
|
193
|
+
end
|
194
|
+
|
195
|
+
def http(*args)
|
196
|
+
r = nil
|
197
|
+
time(:http_request) do
|
198
|
+
r = untimed_http(*args)
|
199
|
+
end
|
200
|
+
AgentX.logger.info([
|
201
|
+
request_time,
|
202
|
+
@session.history.last.response.code,
|
203
|
+
method,
|
204
|
+
full_url].join(' '))
|
205
|
+
r
|
206
|
+
end
|
207
|
+
|
208
|
+
def untimed_http(verb, params=@params, body=@body, headers=@headers)
|
209
|
+
@method = verb.to_s.upcase
|
210
|
+
|
211
|
+
if cookies = HTTP::Cookie.cookie_value(@session.jar.cookies(full_url))
|
212
|
+
set_header(:cookie, cookies)
|
213
|
+
end
|
214
|
+
|
215
|
+
response = nil
|
216
|
+
|
217
|
+
if cacheable? && (response = Cache.read(self))
|
218
|
+
if response.fresh?
|
219
|
+
AgentX.logger.debug("cache fresh")
|
220
|
+
else
|
221
|
+
AgentX.logger.debug("cache validate")
|
222
|
+
response = validate(response)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
unless response
|
227
|
+
AgentX.logger.debug("cache miss")
|
228
|
+
response = response_from_easy
|
229
|
+
end
|
230
|
+
|
231
|
+
@session.history.add(self, response)
|
232
|
+
response.cookies.each do |cookie|
|
233
|
+
@session.jar.parse(cookie, full_url)
|
234
|
+
end
|
235
|
+
|
236
|
+
if response.headers.location
|
237
|
+
@session[response.headers.location].get
|
238
|
+
else
|
239
|
+
response.parse(@parse_type)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def validate(response)
|
244
|
+
if response.headers.last_modified
|
245
|
+
set_header(:if_modified_since, response.headers.last_modified)
|
246
|
+
end
|
247
|
+
|
248
|
+
if response.headers.etag
|
249
|
+
set_header(:if_none_match, response.headers.etag)
|
250
|
+
end
|
251
|
+
|
252
|
+
response_from_easy(response)
|
253
|
+
end
|
254
|
+
|
255
|
+
def response_from_easy(response=nil)
|
256
|
+
easy = Ethon::Easy.new
|
257
|
+
|
258
|
+
AgentX.logger.info("easy: #{method} #{full_url}")
|
259
|
+
|
260
|
+
easy.http_request(full_url, method,
|
261
|
+
params: params, body: body, headers: headers)
|
262
|
+
|
263
|
+
unless easy.perform == :ok
|
264
|
+
raise "Error: #{easy.return_code}"
|
265
|
+
end
|
266
|
+
|
267
|
+
r = Response.from_easy(easy, response)
|
268
|
+
|
269
|
+
Cache.write(self, r) if cacheable? && r.cacheable?
|
270
|
+
|
271
|
+
r
|
272
|
+
end
|
273
|
+
|
274
|
+
def uri
|
275
|
+
@uri ||= URI(full_url)
|
276
|
+
end
|
277
|
+
|
278
|
+
def time(name)
|
279
|
+
@times ||= {}
|
280
|
+
start = Time.now
|
281
|
+
r = yield
|
282
|
+
@times[name] = Time.now - start
|
283
|
+
r
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
@@ -0,0 +1,261 @@
|
|
1
|
+
|
2
|
+
module AgentX
|
3
|
+
|
4
|
+
class Response
|
5
|
+
attr_reader :code, :body, :headers
|
6
|
+
|
7
|
+
def initialize(code, body, headers)
|
8
|
+
@code, @body, @headers = code, body, Headers.parse(headers)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.from_easy(easy, response=nil)
|
12
|
+
headers = Headers.parse(easy.response_headers)
|
13
|
+
|
14
|
+
r = new(easy.response_code, easy.response_body, headers)
|
15
|
+
|
16
|
+
if response && r.not_modified?
|
17
|
+
r = new(response.code, response.body, response.headers.merge(r.headers))
|
18
|
+
end
|
19
|
+
|
20
|
+
r
|
21
|
+
end
|
22
|
+
|
23
|
+
def cookies
|
24
|
+
Array(headers.set_cookie || [])
|
25
|
+
end
|
26
|
+
|
27
|
+
def ok?
|
28
|
+
code == 200
|
29
|
+
end
|
30
|
+
|
31
|
+
def not_modified?
|
32
|
+
code == 304
|
33
|
+
end
|
34
|
+
|
35
|
+
def fresh?
|
36
|
+
headers.ttl && headers.ttl > 0
|
37
|
+
end
|
38
|
+
|
39
|
+
def expires_at
|
40
|
+
headers.ttl ? Time.now + headers.ttl : Time.at(0)
|
41
|
+
end
|
42
|
+
|
43
|
+
CACHEABLE_CODES = [200, 203, 300, 301, 302, 404, 410]
|
44
|
+
|
45
|
+
def cacheable?
|
46
|
+
return false if headers.cache_control && headers.cache_control.no_store?
|
47
|
+
return false unless CACHEABLE_CODES.include?(code)
|
48
|
+
|
49
|
+
!! (headers.etag || headers.last_modified || fresh?)
|
50
|
+
end
|
51
|
+
|
52
|
+
def inspect
|
53
|
+
"(Response #{code})"
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_hash
|
57
|
+
{ 'code' => code,
|
58
|
+
'body' => body,
|
59
|
+
'headers' => headers.to_hash }
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.from_hash(h)
|
63
|
+
new(h['code'], h['body'], h['headers'])
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse(type=nil)
|
67
|
+
case
|
68
|
+
when type == :json || headers.json? then Oj.load(body)
|
69
|
+
when type == :html || headers.html? then HTML.new(body)
|
70
|
+
when type == :xml || headers.xml? then XML.new(body)
|
71
|
+
else body
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class Headers
|
76
|
+
include Enumerable
|
77
|
+
|
78
|
+
def initialize(hash={})
|
79
|
+
@hash = hash
|
80
|
+
@hash['Date'] ||= Time.now.httpdate
|
81
|
+
@normalized = {}
|
82
|
+
@hash.each do |k,v|
|
83
|
+
@normalized[k.to_s.downcase] = v
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.parse(str)
|
88
|
+
return new(str) if str.kind_of?(Hash)
|
89
|
+
return str if str.kind_of?(Headers)
|
90
|
+
|
91
|
+
hash = {}
|
92
|
+
|
93
|
+
str.lines.each do |line|
|
94
|
+
next if line =~ /^HTTP\/\d/
|
95
|
+
k, v = line.split(':', 2).map { |s| s.strip }
|
96
|
+
|
97
|
+
if k && v
|
98
|
+
if hash[k]
|
99
|
+
hash[k] = Array(hash[k])
|
100
|
+
hash[k] << v
|
101
|
+
else
|
102
|
+
hash[k] = v
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
new(hash)
|
108
|
+
end
|
109
|
+
|
110
|
+
def merge(headers)
|
111
|
+
Headers.new(to_hash.merge(headers.to_hash))
|
112
|
+
end
|
113
|
+
|
114
|
+
def inspect
|
115
|
+
"(Headers #{@normalized})"
|
116
|
+
end
|
117
|
+
|
118
|
+
def each(&block)
|
119
|
+
@normalized.each(&block)
|
120
|
+
end
|
121
|
+
|
122
|
+
def [](k)
|
123
|
+
@hash[k] || @normalized[k.to_s.downcase]
|
124
|
+
end
|
125
|
+
|
126
|
+
def server
|
127
|
+
@normalized['server']
|
128
|
+
end
|
129
|
+
|
130
|
+
def date
|
131
|
+
Time.parse(@normalized['date'])
|
132
|
+
end
|
133
|
+
|
134
|
+
def age
|
135
|
+
(@normalized['age'] || (Time.now - date)).to_i
|
136
|
+
end
|
137
|
+
|
138
|
+
def expires
|
139
|
+
if d = @normalized['expires']
|
140
|
+
Time.parse(d)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def max_age
|
145
|
+
(cache_control &&
|
146
|
+
(cache_control.shared_max_age || cache_control.max_age)) ||
|
147
|
+
(expires && (expires - Time.now))
|
148
|
+
end
|
149
|
+
|
150
|
+
def ttl
|
151
|
+
max_age && (max_age - age)
|
152
|
+
end
|
153
|
+
|
154
|
+
def last_modified
|
155
|
+
@normalized['last-modified']
|
156
|
+
end
|
157
|
+
|
158
|
+
def etag
|
159
|
+
@normalized['etag']
|
160
|
+
end
|
161
|
+
|
162
|
+
def content_type
|
163
|
+
@normalized['content-type']
|
164
|
+
end
|
165
|
+
|
166
|
+
def json?
|
167
|
+
content_type.to_s.downcase['json']
|
168
|
+
end
|
169
|
+
|
170
|
+
def html?
|
171
|
+
content_type.to_s.downcase['html']
|
172
|
+
end
|
173
|
+
|
174
|
+
def xml?
|
175
|
+
content_type.to_s.downcase['xml']
|
176
|
+
end
|
177
|
+
|
178
|
+
def content_length
|
179
|
+
if (length = @normalized['content-length']) && length =~ /^\d+$/
|
180
|
+
length.to_i
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def cache_control
|
185
|
+
if @normalized['cache-control']
|
186
|
+
@cache_control ||= CacheControl.parse(@normalized['cache-control'])
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def set_cookie
|
191
|
+
@normalized['set-cookie']
|
192
|
+
end
|
193
|
+
|
194
|
+
def location
|
195
|
+
@normalized['location']
|
196
|
+
end
|
197
|
+
|
198
|
+
def to_hash
|
199
|
+
@hash
|
200
|
+
end
|
201
|
+
|
202
|
+
class CacheControl
|
203
|
+
attr_reader :directives
|
204
|
+
|
205
|
+
def initialize(directives)
|
206
|
+
@directives = directives
|
207
|
+
end
|
208
|
+
|
209
|
+
def public?
|
210
|
+
@directives['public']
|
211
|
+
end
|
212
|
+
|
213
|
+
def private?
|
214
|
+
@directives['private']
|
215
|
+
end
|
216
|
+
|
217
|
+
def no_store?
|
218
|
+
@directives['no-store']
|
219
|
+
end
|
220
|
+
|
221
|
+
def no_cache?
|
222
|
+
@directives['no-cache']
|
223
|
+
end
|
224
|
+
|
225
|
+
def must_revalidate?
|
226
|
+
@directives['must-revalidate']
|
227
|
+
end
|
228
|
+
|
229
|
+
def max_age
|
230
|
+
@directives['max-age'] && @directives['max-age'].to_i
|
231
|
+
end
|
232
|
+
|
233
|
+
def shared_max_age
|
234
|
+
@directives['s-max-age'] && @directives['s-max-age'].to_i
|
235
|
+
end
|
236
|
+
|
237
|
+
def to_s
|
238
|
+
directives.map { |k,v| v == true ? k : "#{k}=#{v}" }.join(', ')
|
239
|
+
end
|
240
|
+
|
241
|
+
def inspect
|
242
|
+
"(CacheControl #{directives})"
|
243
|
+
end
|
244
|
+
|
245
|
+
def self.parse(s)
|
246
|
+
h = {}
|
247
|
+
|
248
|
+
Array(s).join(',').gsub(' ', '').split(',').each do |p|
|
249
|
+
k, v = p.split('=')
|
250
|
+
|
251
|
+
h[k.downcase] = (v || true)
|
252
|
+
end
|
253
|
+
|
254
|
+
new(h)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
end
|
261
|
+
|