agentx 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+
2
+ module AgentX
3
+
4
+ class HTML
5
+ def initialize(html)
6
+ if html.kind_of?(String)
7
+ @html = Nokogiri::HTML(html)
8
+ else
9
+ @html = html
10
+ end
11
+ end
12
+
13
+ def to_html
14
+ @html.to_html
15
+ end
16
+
17
+ def first(selector)
18
+ (e = @html.css(selector).first) && HTML.new(e)
19
+ end
20
+
21
+ def all(selector)
22
+ @html.css(selector).map { |e| HTML.new(e) }
23
+ end
24
+
25
+ def parent
26
+ HTML.new(@html.parent)
27
+ end
28
+
29
+ def children
30
+ @html.children.map { |e| HTML.new(e) }
31
+ end
32
+
33
+ def next
34
+ HTML.new(@html.next)
35
+ end
36
+
37
+ def previous
38
+ HTML.new(@html.previous)
39
+ end
40
+
41
+ def attributes
42
+ h = {}
43
+ @html.attribute_nodes.each { |n| h[n.name] = n.value }
44
+ h
45
+ end
46
+
47
+ def [](attr)
48
+ @html[attr.to_s]
49
+ end
50
+
51
+ def form_to_hash(opts={})
52
+ h = {}
53
+ all('input').each do |input|
54
+ h[input['name']] = input['value']
55
+ end
56
+ opts.each do |k,v|
57
+ h[k.to_s] = v
58
+ end
59
+ h
60
+ end
61
+
62
+ def to_s
63
+ to_html
64
+ end
65
+
66
+ def inspect
67
+ to_html
68
+ end
69
+
70
+ def to_nokogiri
71
+ @html
72
+ end
73
+
74
+ NBSP = Nokogiri::HTML(' ').text
75
+
76
+ def text
77
+ @html.text.gsub(NBSP, ' ')
78
+ end
79
+ end
80
+
81
+ end
82
+
@@ -0,0 +1,288 @@
1
+
2
+ module AgentX
3
+
4
+ class Request
5
+ attr_reader :url, :method
6
+
7
+ def initialize(session, url, params={})
8
+ @session = session
9
+ @url, @params = url, params
10
+ @headers = session.headers
11
+ @body = {}
12
+ end
13
+
14
+ def headers(headers={})
15
+ if @method
16
+ @headers
17
+ else
18
+ headers.each { |k,v| set_header(k, v) }
19
+ self
20
+ end
21
+ end
22
+
23
+ def body(body=nil)
24
+ if @method
25
+ @body
26
+ else
27
+ @body.merge!(body) if body
28
+ self
29
+ end
30
+ end
31
+
32
+ def params(params=nil)
33
+ if @method
34
+ @params
35
+ else
36
+ @params.merge!(params) if params
37
+ self
38
+ end
39
+ end
40
+
41
+ def head(params={})
42
+ params(params)
43
+ http(:head)
44
+ end
45
+
46
+ def get(params={})
47
+ params(params)
48
+ http(:get)
49
+ end
50
+
51
+ def post(body={})
52
+ body(body)
53
+ http(:post)
54
+ end
55
+
56
+ def put(body={})
57
+ body(body)
58
+ http(:put)
59
+ end
60
+
61
+ def cacheable?
62
+ method == 'GET' || method == 'HEAD'
63
+ end
64
+
65
+ # Force: parse response as json (normally type is determined by correctly
66
+ # set headers.
67
+
68
+ def json
69
+ @parse_type = :json
70
+
71
+ self
72
+ end
73
+
74
+ def json?
75
+ @parse_type == :json
76
+ end
77
+
78
+ def html
79
+ @parse_type = :html
80
+
81
+ self
82
+ end
83
+
84
+ def html?
85
+ @parse_type == :html
86
+ end
87
+
88
+ def xml
89
+ @parse_type = :xml
90
+
91
+ self
92
+ end
93
+
94
+ def xml?
95
+ @parse_type == :xml
96
+ end
97
+
98
+ def inspect
99
+ if @method
100
+ "(Request #{@method.to_s.upcase} #{url})"
101
+ else
102
+ "(Request #{url})"
103
+ end
104
+ end
105
+
106
+ def host
107
+ uri.host
108
+ end
109
+
110
+ def scheme
111
+ uri.scheme
112
+ end
113
+
114
+ def port
115
+ uri.port
116
+ end
117
+
118
+ def base_url
119
+ return @session.base_url if @session.base_url
120
+
121
+ if uri.port != AgentX::Session::DEFAULT_PORT[uri.scheme]
122
+ "#{uri.scheme}://#{uri.host}:#{uri.port}"
123
+ else
124
+ "#{uri.scheme}://#{uri.host}"
125
+ end
126
+ end
127
+
128
+ def full_url
129
+ @full_url ||= case
130
+ when url.start_with?('/')
131
+ "#{@session.base_url}#{url}"
132
+ when url.start_with?('http://', 'https://')
133
+ url
134
+ else "#{@session.relative_base_url}#{url}"
135
+ end
136
+ end
137
+
138
+ def cache_key
139
+ k = [full_url, method, body, params]
140
+ Digest::MD5.hexdigest(Oj.dump(k, mode: :compat))
141
+ end
142
+
143
+ def timings
144
+ ts = {}
145
+ (@times || {}).each do |k,v|
146
+ ts[k] = "#{'%.2f' % (v * 1000)}ms"
147
+ end
148
+ ts
149
+ end
150
+
151
+ def request_time
152
+ timings[:http_request]
153
+ end
154
+
155
+ private
156
+
157
+ HEADER_MAP = {
158
+ accept: 'Accept',
159
+ accept_charset: 'Accept-Charset',
160
+ accept_encoding: 'Accept-Encoding',
161
+ accept_language: 'Accept-Language',
162
+ accept_datetime: 'Accept-Datetime',
163
+ authorization: 'Authorization',
164
+ cache_control: 'Cache-Control',
165
+ connection: 'Connection',
166
+ cookie: 'Cookie',
167
+ content_length: 'Content-Length',
168
+ date: 'Date',
169
+ expect: 'Expect',
170
+ from: 'From',
171
+ host: 'Host',
172
+ if_match: 'If-Match',
173
+ if_modified_since: 'If-Modified-Since',
174
+ if_none_match: 'If-None-Match',
175
+ if_range: 'If-Range',
176
+ if_unmodified_since: 'If-Unmodified-Since',
177
+ max_forwards: 'Max-Forwards',
178
+ origin: 'Origin',
179
+ pragma: 'Pragma',
180
+ proxy_authorization: 'Proxy-Authorization',
181
+ range: 'Range',
182
+ referer: 'Referer',
183
+ referrer: 'Referer',
184
+ te: 'TE',
185
+ user_agent: 'User-Agent',
186
+ upgrade: 'Upgrade',
187
+ via: 'Via',
188
+ warning: 'Warning'
189
+ }
190
+
191
+ def set_header(k, v)
192
+ @headers[HEADER_MAP[k] || k.to_s] = v
193
+ end
194
+
195
+ def http(*args)
196
+ r = nil
197
+ time(:http_request) do
198
+ r = untimed_http(*args)
199
+ end
200
+ AgentX.logger.info([
201
+ request_time,
202
+ @session.history.last.response.code,
203
+ method,
204
+ full_url].join(' '))
205
+ r
206
+ end
207
+
208
+ def untimed_http(verb, params=@params, body=@body, headers=@headers)
209
+ @method = verb.to_s.upcase
210
+
211
+ if cookies = HTTP::Cookie.cookie_value(@session.jar.cookies(full_url))
212
+ set_header(:cookie, cookies)
213
+ end
214
+
215
+ response = nil
216
+
217
+ if cacheable? && (response = Cache.read(self))
218
+ if response.fresh?
219
+ AgentX.logger.debug("cache fresh")
220
+ else
221
+ AgentX.logger.debug("cache validate")
222
+ response = validate(response)
223
+ end
224
+ end
225
+
226
+ unless response
227
+ AgentX.logger.debug("cache miss")
228
+ response = response_from_easy
229
+ end
230
+
231
+ @session.history.add(self, response)
232
+ response.cookies.each do |cookie|
233
+ @session.jar.parse(cookie, full_url)
234
+ end
235
+
236
+ if response.headers.location
237
+ @session[response.headers.location].get
238
+ else
239
+ response.parse(@parse_type)
240
+ end
241
+ end
242
+
243
+ def validate(response)
244
+ if response.headers.last_modified
245
+ set_header(:if_modified_since, response.headers.last_modified)
246
+ end
247
+
248
+ if response.headers.etag
249
+ set_header(:if_none_match, response.headers.etag)
250
+ end
251
+
252
+ response_from_easy(response)
253
+ end
254
+
255
+ def response_from_easy(response=nil)
256
+ easy = Ethon::Easy.new
257
+
258
+ AgentX.logger.info("easy: #{method} #{full_url}")
259
+
260
+ easy.http_request(full_url, method,
261
+ params: params, body: body, headers: headers)
262
+
263
+ unless easy.perform == :ok
264
+ raise "Error: #{easy.return_code}"
265
+ end
266
+
267
+ r = Response.from_easy(easy, response)
268
+
269
+ Cache.write(self, r) if cacheable? && r.cacheable?
270
+
271
+ r
272
+ end
273
+
274
+ def uri
275
+ @uri ||= URI(full_url)
276
+ end
277
+
278
+ def time(name)
279
+ @times ||= {}
280
+ start = Time.now
281
+ r = yield
282
+ @times[name] = Time.now - start
283
+ r
284
+ end
285
+
286
+ end
287
+ end
288
+
@@ -0,0 +1,261 @@
1
+
2
+ module AgentX
3
+
4
+ class Response
5
+ attr_reader :code, :body, :headers
6
+
7
+ def initialize(code, body, headers)
8
+ @code, @body, @headers = code, body, Headers.parse(headers)
9
+ end
10
+
11
+ def self.from_easy(easy, response=nil)
12
+ headers = Headers.parse(easy.response_headers)
13
+
14
+ r = new(easy.response_code, easy.response_body, headers)
15
+
16
+ if response && r.not_modified?
17
+ r = new(response.code, response.body, response.headers.merge(r.headers))
18
+ end
19
+
20
+ r
21
+ end
22
+
23
+ def cookies
24
+ Array(headers.set_cookie || [])
25
+ end
26
+
27
+ def ok?
28
+ code == 200
29
+ end
30
+
31
+ def not_modified?
32
+ code == 304
33
+ end
34
+
35
+ def fresh?
36
+ headers.ttl && headers.ttl > 0
37
+ end
38
+
39
+ def expires_at
40
+ headers.ttl ? Time.now + headers.ttl : Time.at(0)
41
+ end
42
+
43
+ CACHEABLE_CODES = [200, 203, 300, 301, 302, 404, 410]
44
+
45
+ def cacheable?
46
+ return false if headers.cache_control && headers.cache_control.no_store?
47
+ return false unless CACHEABLE_CODES.include?(code)
48
+
49
+ !! (headers.etag || headers.last_modified || fresh?)
50
+ end
51
+
52
+ def inspect
53
+ "(Response #{code})"
54
+ end
55
+
56
+ def to_hash
57
+ { 'code' => code,
58
+ 'body' => body,
59
+ 'headers' => headers.to_hash }
60
+ end
61
+
62
+ def self.from_hash(h)
63
+ new(h['code'], h['body'], h['headers'])
64
+ end
65
+
66
+ def parse(type=nil)
67
+ case
68
+ when type == :json || headers.json? then Oj.load(body)
69
+ when type == :html || headers.html? then HTML.new(body)
70
+ when type == :xml || headers.xml? then XML.new(body)
71
+ else body
72
+ end
73
+ end
74
+
75
+ class Headers
76
+ include Enumerable
77
+
78
+ def initialize(hash={})
79
+ @hash = hash
80
+ @hash['Date'] ||= Time.now.httpdate
81
+ @normalized = {}
82
+ @hash.each do |k,v|
83
+ @normalized[k.to_s.downcase] = v
84
+ end
85
+ end
86
+
87
+ def self.parse(str)
88
+ return new(str) if str.kind_of?(Hash)
89
+ return str if str.kind_of?(Headers)
90
+
91
+ hash = {}
92
+
93
+ str.lines.each do |line|
94
+ next if line =~ /^HTTP\/\d/
95
+ k, v = line.split(':', 2).map { |s| s.strip }
96
+
97
+ if k && v
98
+ if hash[k]
99
+ hash[k] = Array(hash[k])
100
+ hash[k] << v
101
+ else
102
+ hash[k] = v
103
+ end
104
+ end
105
+ end
106
+
107
+ new(hash)
108
+ end
109
+
110
+ def merge(headers)
111
+ Headers.new(to_hash.merge(headers.to_hash))
112
+ end
113
+
114
+ def inspect
115
+ "(Headers #{@normalized})"
116
+ end
117
+
118
+ def each(&block)
119
+ @normalized.each(&block)
120
+ end
121
+
122
+ def [](k)
123
+ @hash[k] || @normalized[k.to_s.downcase]
124
+ end
125
+
126
+ def server
127
+ @normalized['server']
128
+ end
129
+
130
+ def date
131
+ Time.parse(@normalized['date'])
132
+ end
133
+
134
+ def age
135
+ (@normalized['age'] || (Time.now - date)).to_i
136
+ end
137
+
138
+ def expires
139
+ if d = @normalized['expires']
140
+ Time.parse(d)
141
+ end
142
+ end
143
+
144
+ def max_age
145
+ (cache_control &&
146
+ (cache_control.shared_max_age || cache_control.max_age)) ||
147
+ (expires && (expires - Time.now))
148
+ end
149
+
150
+ def ttl
151
+ max_age && (max_age - age)
152
+ end
153
+
154
+ def last_modified
155
+ @normalized['last-modified']
156
+ end
157
+
158
+ def etag
159
+ @normalized['etag']
160
+ end
161
+
162
+ def content_type
163
+ @normalized['content-type']
164
+ end
165
+
166
+ def json?
167
+ content_type.to_s.downcase['json']
168
+ end
169
+
170
+ def html?
171
+ content_type.to_s.downcase['html']
172
+ end
173
+
174
+ def xml?
175
+ content_type.to_s.downcase['xml']
176
+ end
177
+
178
+ def content_length
179
+ if (length = @normalized['content-length']) && length =~ /^\d+$/
180
+ length.to_i
181
+ end
182
+ end
183
+
184
+ def cache_control
185
+ if @normalized['cache-control']
186
+ @cache_control ||= CacheControl.parse(@normalized['cache-control'])
187
+ end
188
+ end
189
+
190
+ def set_cookie
191
+ @normalized['set-cookie']
192
+ end
193
+
194
+ def location
195
+ @normalized['location']
196
+ end
197
+
198
+ def to_hash
199
+ @hash
200
+ end
201
+
202
+ class CacheControl
203
+ attr_reader :directives
204
+
205
+ def initialize(directives)
206
+ @directives = directives
207
+ end
208
+
209
+ def public?
210
+ @directives['public']
211
+ end
212
+
213
+ def private?
214
+ @directives['private']
215
+ end
216
+
217
+ def no_store?
218
+ @directives['no-store']
219
+ end
220
+
221
+ def no_cache?
222
+ @directives['no-cache']
223
+ end
224
+
225
+ def must_revalidate?
226
+ @directives['must-revalidate']
227
+ end
228
+
229
+ def max_age
230
+ @directives['max-age'] && @directives['max-age'].to_i
231
+ end
232
+
233
+ def shared_max_age
234
+ @directives['s-max-age'] && @directives['s-max-age'].to_i
235
+ end
236
+
237
+ def to_s
238
+ directives.map { |k,v| v == true ? k : "#{k}=#{v}" }.join(', ')
239
+ end
240
+
241
+ def inspect
242
+ "(CacheControl #{directives})"
243
+ end
244
+
245
+ def self.parse(s)
246
+ h = {}
247
+
248
+ Array(s).join(',').gsub(' ', '').split(',').each do |p|
249
+ k, v = p.split('=')
250
+
251
+ h[k.downcase] = (v || true)
252
+ end
253
+
254
+ new(h)
255
+ end
256
+ end
257
+ end
258
+ end
259
+
260
+ end
261
+