agentx 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+
2
+ module AgentX
3
+
4
+ class HTML
5
+ def initialize(html)
6
+ if html.kind_of?(String)
7
+ @html = Nokogiri::HTML(html)
8
+ else
9
+ @html = html
10
+ end
11
+ end
12
+
13
+ def to_html
14
+ @html.to_html
15
+ end
16
+
17
+ def first(selector)
18
+ (e = @html.css(selector).first) && HTML.new(e)
19
+ end
20
+
21
+ def all(selector)
22
+ @html.css(selector).map { |e| HTML.new(e) }
23
+ end
24
+
25
+ def parent
26
+ HTML.new(@html.parent)
27
+ end
28
+
29
+ def children
30
+ @html.children.map { |e| HTML.new(e) }
31
+ end
32
+
33
+ def next
34
+ HTML.new(@html.next)
35
+ end
36
+
37
+ def previous
38
+ HTML.new(@html.previous)
39
+ end
40
+
41
+ def attributes
42
+ h = {}
43
+ @html.attribute_nodes.each { |n| h[n.name] = n.value }
44
+ h
45
+ end
46
+
47
+ def [](attr)
48
+ @html[attr.to_s]
49
+ end
50
+
51
+ def form_to_hash(opts={})
52
+ h = {}
53
+ all('input').each do |input|
54
+ h[input['name']] = input['value']
55
+ end
56
+ opts.each do |k,v|
57
+ h[k.to_s] = v
58
+ end
59
+ h
60
+ end
61
+
62
+ def to_s
63
+ to_html
64
+ end
65
+
66
+ def inspect
67
+ to_html
68
+ end
69
+
70
+ def to_nokogiri
71
+ @html
72
+ end
73
+
74
+ NBSP = Nokogiri::HTML(' ').text
75
+
76
+ def text
77
+ @html.text.gsub(NBSP, ' ')
78
+ end
79
+ end
80
+
81
+ end
82
+
@@ -0,0 +1,288 @@
1
+
2
+ module AgentX
3
+
4
+ class Request
5
+ attr_reader :url, :method
6
+
7
+ def initialize(session, url, params={})
8
+ @session = session
9
+ @url, @params = url, params
10
+ @headers = session.headers
11
+ @body = {}
12
+ end
13
+
14
+ def headers(headers={})
15
+ if @method
16
+ @headers
17
+ else
18
+ headers.each { |k,v| set_header(k, v) }
19
+ self
20
+ end
21
+ end
22
+
23
+ def body(body=nil)
24
+ if @method
25
+ @body
26
+ else
27
+ @body.merge!(body) if body
28
+ self
29
+ end
30
+ end
31
+
32
+ def params(params=nil)
33
+ if @method
34
+ @params
35
+ else
36
+ @params.merge!(params) if params
37
+ self
38
+ end
39
+ end
40
+
41
+ def head(params={})
42
+ params(params)
43
+ http(:head)
44
+ end
45
+
46
+ def get(params={})
47
+ params(params)
48
+ http(:get)
49
+ end
50
+
51
+ def post(body={})
52
+ body(body)
53
+ http(:post)
54
+ end
55
+
56
+ def put(body={})
57
+ body(body)
58
+ http(:put)
59
+ end
60
+
61
+ def cacheable?
62
+ method == 'GET' || method == 'HEAD'
63
+ end
64
+
65
+ # Force: parse response as json (normally type is determined by correctly
66
+ # set headers.
67
+
68
+ def json
69
+ @parse_type = :json
70
+
71
+ self
72
+ end
73
+
74
+ def json?
75
+ @parse_type == :json
76
+ end
77
+
78
+ def html
79
+ @parse_type = :html
80
+
81
+ self
82
+ end
83
+
84
+ def html?
85
+ @parse_type == :html
86
+ end
87
+
88
+ def xml
89
+ @parse_type = :xml
90
+
91
+ self
92
+ end
93
+
94
+ def xml?
95
+ @parse_type == :xml
96
+ end
97
+
98
+ def inspect
99
+ if @method
100
+ "(Request #{@method.to_s.upcase} #{url})"
101
+ else
102
+ "(Request #{url})"
103
+ end
104
+ end
105
+
106
+ def host
107
+ uri.host
108
+ end
109
+
110
+ def scheme
111
+ uri.scheme
112
+ end
113
+
114
+ def port
115
+ uri.port
116
+ end
117
+
118
+ def base_url
119
+ return @session.base_url if @session.base_url
120
+
121
+ if uri.port != AgentX::Session::DEFAULT_PORT[uri.scheme]
122
+ "#{uri.scheme}://#{uri.host}:#{uri.port}"
123
+ else
124
+ "#{uri.scheme}://#{uri.host}"
125
+ end
126
+ end
127
+
128
+ def full_url
129
+ @full_url ||= case
130
+ when url.start_with?('/')
131
+ "#{@session.base_url}#{url}"
132
+ when url.start_with?('http://', 'https://')
133
+ url
134
+ else "#{@session.relative_base_url}#{url}"
135
+ end
136
+ end
137
+
138
+ def cache_key
139
+ k = [full_url, method, body, params]
140
+ Digest::MD5.hexdigest(Oj.dump(k, mode: :compat))
141
+ end
142
+
143
+ def timings
144
+ ts = {}
145
+ (@times || {}).each do |k,v|
146
+ ts[k] = "#{'%.2f' % (v * 1000)}ms"
147
+ end
148
+ ts
149
+ end
150
+
151
+ def request_time
152
+ timings[:http_request]
153
+ end
154
+
155
+ private
156
+
157
+ HEADER_MAP = {
158
+ accept: 'Accept',
159
+ accept_charset: 'Accept-Charset',
160
+ accept_encoding: 'Accept-Encoding',
161
+ accept_language: 'Accept-Language',
162
+ accept_datetime: 'Accept-Datetime',
163
+ authorization: 'Authorization',
164
+ cache_control: 'Cache-Control',
165
+ connection: 'Connection',
166
+ cookie: 'Cookie',
167
+ content_length: 'Content-Length',
168
+ date: 'Date',
169
+ expect: 'Expect',
170
+ from: 'From',
171
+ host: 'Host',
172
+ if_match: 'If-Match',
173
+ if_modified_since: 'If-Modified-Since',
174
+ if_none_match: 'If-None-Match',
175
+ if_range: 'If-Range',
176
+ if_unmodified_since: 'If-Unmodified-Since',
177
+ max_forwards: 'Max-Forwards',
178
+ origin: 'Origin',
179
+ pragma: 'Pragma',
180
+ proxy_authorization: 'Proxy-Authorization',
181
+ range: 'Range',
182
+ referer: 'Referer',
183
+ referrer: 'Referer',
184
+ te: 'TE',
185
+ user_agent: 'User-Agent',
186
+ upgrade: 'Upgrade',
187
+ via: 'Via',
188
+ warning: 'Warning'
189
+ }
190
+
191
+ def set_header(k, v)
192
+ @headers[HEADER_MAP[k] || k.to_s] = v
193
+ end
194
+
195
+ def http(*args)
196
+ r = nil
197
+ time(:http_request) do
198
+ r = untimed_http(*args)
199
+ end
200
+ AgentX.logger.info([
201
+ request_time,
202
+ @session.history.last.response.code,
203
+ method,
204
+ full_url].join(' '))
205
+ r
206
+ end
207
+
208
+ def untimed_http(verb, params=@params, body=@body, headers=@headers)
209
+ @method = verb.to_s.upcase
210
+
211
+ if cookies = HTTP::Cookie.cookie_value(@session.jar.cookies(full_url))
212
+ set_header(:cookie, cookies)
213
+ end
214
+
215
+ response = nil
216
+
217
+ if cacheable? && (response = Cache.read(self))
218
+ if response.fresh?
219
+ AgentX.logger.debug("cache fresh")
220
+ else
221
+ AgentX.logger.debug("cache validate")
222
+ response = validate(response)
223
+ end
224
+ end
225
+
226
+ unless response
227
+ AgentX.logger.debug("cache miss")
228
+ response = response_from_easy
229
+ end
230
+
231
+ @session.history.add(self, response)
232
+ response.cookies.each do |cookie|
233
+ @session.jar.parse(cookie, full_url)
234
+ end
235
+
236
+ if response.headers.location
237
+ @session[response.headers.location].get
238
+ else
239
+ response.parse(@parse_type)
240
+ end
241
+ end
242
+
243
+ def validate(response)
244
+ if response.headers.last_modified
245
+ set_header(:if_modified_since, response.headers.last_modified)
246
+ end
247
+
248
+ if response.headers.etag
249
+ set_header(:if_none_match, response.headers.etag)
250
+ end
251
+
252
+ response_from_easy(response)
253
+ end
254
+
255
+ def response_from_easy(response=nil)
256
+ easy = Ethon::Easy.new
257
+
258
+ AgentX.logger.info("easy: #{method} #{full_url}")
259
+
260
+ easy.http_request(full_url, method,
261
+ params: params, body: body, headers: headers)
262
+
263
+ unless easy.perform == :ok
264
+ raise "Error: #{easy.return_code}"
265
+ end
266
+
267
+ r = Response.from_easy(easy, response)
268
+
269
+ Cache.write(self, r) if cacheable? && r.cacheable?
270
+
271
+ r
272
+ end
273
+
274
+ def uri
275
+ @uri ||= URI(full_url)
276
+ end
277
+
278
+ def time(name)
279
+ @times ||= {}
280
+ start = Time.now
281
+ r = yield
282
+ @times[name] = Time.now - start
283
+ r
284
+ end
285
+
286
+ end
287
+ end
288
+
@@ -0,0 +1,261 @@
1
+
2
+ module AgentX
3
+
4
+ class Response
5
+ attr_reader :code, :body, :headers
6
+
7
+ def initialize(code, body, headers)
8
+ @code, @body, @headers = code, body, Headers.parse(headers)
9
+ end
10
+
11
+ def self.from_easy(easy, response=nil)
12
+ headers = Headers.parse(easy.response_headers)
13
+
14
+ r = new(easy.response_code, easy.response_body, headers)
15
+
16
+ if response && r.not_modified?
17
+ r = new(response.code, response.body, response.headers.merge(r.headers))
18
+ end
19
+
20
+ r
21
+ end
22
+
23
+ def cookies
24
+ Array(headers.set_cookie || [])
25
+ end
26
+
27
+ def ok?
28
+ code == 200
29
+ end
30
+
31
+ def not_modified?
32
+ code == 304
33
+ end
34
+
35
+ def fresh?
36
+ headers.ttl && headers.ttl > 0
37
+ end
38
+
39
+ def expires_at
40
+ headers.ttl ? Time.now + headers.ttl : Time.at(0)
41
+ end
42
+
43
+ CACHEABLE_CODES = [200, 203, 300, 301, 302, 404, 410]
44
+
45
+ def cacheable?
46
+ return false if headers.cache_control && headers.cache_control.no_store?
47
+ return false unless CACHEABLE_CODES.include?(code)
48
+
49
+ !! (headers.etag || headers.last_modified || fresh?)
50
+ end
51
+
52
+ def inspect
53
+ "(Response #{code})"
54
+ end
55
+
56
+ def to_hash
57
+ { 'code' => code,
58
+ 'body' => body,
59
+ 'headers' => headers.to_hash }
60
+ end
61
+
62
+ def self.from_hash(h)
63
+ new(h['code'], h['body'], h['headers'])
64
+ end
65
+
66
+ def parse(type=nil)
67
+ case
68
+ when type == :json || headers.json? then Oj.load(body)
69
+ when type == :html || headers.html? then HTML.new(body)
70
+ when type == :xml || headers.xml? then XML.new(body)
71
+ else body
72
+ end
73
+ end
74
+
75
+ class Headers
76
+ include Enumerable
77
+
78
+ def initialize(hash={})
79
+ @hash = hash
80
+ @hash['Date'] ||= Time.now.httpdate
81
+ @normalized = {}
82
+ @hash.each do |k,v|
83
+ @normalized[k.to_s.downcase] = v
84
+ end
85
+ end
86
+
87
+ def self.parse(str)
88
+ return new(str) if str.kind_of?(Hash)
89
+ return str if str.kind_of?(Headers)
90
+
91
+ hash = {}
92
+
93
+ str.lines.each do |line|
94
+ next if line =~ /^HTTP\/\d/
95
+ k, v = line.split(':', 2).map { |s| s.strip }
96
+
97
+ if k && v
98
+ if hash[k]
99
+ hash[k] = Array(hash[k])
100
+ hash[k] << v
101
+ else
102
+ hash[k] = v
103
+ end
104
+ end
105
+ end
106
+
107
+ new(hash)
108
+ end
109
+
110
+ def merge(headers)
111
+ Headers.new(to_hash.merge(headers.to_hash))
112
+ end
113
+
114
+ def inspect
115
+ "(Headers #{@normalized})"
116
+ end
117
+
118
+ def each(&block)
119
+ @normalized.each(&block)
120
+ end
121
+
122
+ def [](k)
123
+ @hash[k] || @normalized[k.to_s.downcase]
124
+ end
125
+
126
+ def server
127
+ @normalized['server']
128
+ end
129
+
130
+ def date
131
+ Time.parse(@normalized['date'])
132
+ end
133
+
134
+ def age
135
+ (@normalized['age'] || (Time.now - date)).to_i
136
+ end
137
+
138
+ def expires
139
+ if d = @normalized['expires']
140
+ Time.parse(d)
141
+ end
142
+ end
143
+
144
+ def max_age
145
+ (cache_control &&
146
+ (cache_control.shared_max_age || cache_control.max_age)) ||
147
+ (expires && (expires - Time.now))
148
+ end
149
+
150
+ def ttl
151
+ max_age && (max_age - age)
152
+ end
153
+
154
+ def last_modified
155
+ @normalized['last-modified']
156
+ end
157
+
158
+ def etag
159
+ @normalized['etag']
160
+ end
161
+
162
+ def content_type
163
+ @normalized['content-type']
164
+ end
165
+
166
+ def json?
167
+ content_type.to_s.downcase['json']
168
+ end
169
+
170
+ def html?
171
+ content_type.to_s.downcase['html']
172
+ end
173
+
174
+ def xml?
175
+ content_type.to_s.downcase['xml']
176
+ end
177
+
178
+ def content_length
179
+ if (length = @normalized['content-length']) && length =~ /^\d+$/
180
+ length.to_i
181
+ end
182
+ end
183
+
184
+ def cache_control
185
+ if @normalized['cache-control']
186
+ @cache_control ||= CacheControl.parse(@normalized['cache-control'])
187
+ end
188
+ end
189
+
190
+ def set_cookie
191
+ @normalized['set-cookie']
192
+ end
193
+
194
+ def location
195
+ @normalized['location']
196
+ end
197
+
198
+ def to_hash
199
+ @hash
200
+ end
201
+
202
+ class CacheControl
203
+ attr_reader :directives
204
+
205
+ def initialize(directives)
206
+ @directives = directives
207
+ end
208
+
209
+ def public?
210
+ @directives['public']
211
+ end
212
+
213
+ def private?
214
+ @directives['private']
215
+ end
216
+
217
+ def no_store?
218
+ @directives['no-store']
219
+ end
220
+
221
+ def no_cache?
222
+ @directives['no-cache']
223
+ end
224
+
225
+ def must_revalidate?
226
+ @directives['must-revalidate']
227
+ end
228
+
229
+ def max_age
230
+ @directives['max-age'] && @directives['max-age'].to_i
231
+ end
232
+
233
+ def shared_max_age
234
+ @directives['s-max-age'] && @directives['s-max-age'].to_i
235
+ end
236
+
237
+ def to_s
238
+ directives.map { |k,v| v == true ? k : "#{k}=#{v}" }.join(', ')
239
+ end
240
+
241
+ def inspect
242
+ "(CacheControl #{directives})"
243
+ end
244
+
245
+ def self.parse(s)
246
+ h = {}
247
+
248
+ Array(s).join(',').gsub(' ', '').split(',').each do |p|
249
+ k, v = p.split('=')
250
+
251
+ h[k.downcase] = (v || true)
252
+ end
253
+
254
+ new(h)
255
+ end
256
+ end
257
+ end
258
+ end
259
+
260
+ end
261
+