rbkb-http 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,406 @@
1
+ require 'uri'
2
+
3
+ module Rbkb::Http
4
+
5
+ # A base class for RequestHeaders and ResponseHeaders
6
+ #
7
+ # Includes common implementations of to_raw, to_raw_array, capture, and
8
+ # the class method parse
9
+ #
10
+ # The Headers array are stored internally as an named value pairs array.
11
+ #
12
+ # The headers are generally name/value pairs in the form of:
13
+ #
14
+ # [ ["Name1", "value1"], ["Name2", "value2"], ... ]
15
+ #
16
+ # Which will be rendered with to_raw() to (or captured with capture() from):
17
+ #
18
+ # Name1: value1
19
+ # Name2: value2
20
+ # ...
21
+ #
22
+ # This has the benefit of letting the data= accessor automatically render a
23
+ # Hash or any other Enumerable to a Headers object through the use of to_a.
24
+ # However it has the caveat that named pairs are expected on various
25
+ # operations.
26
+ class Headers < Array
27
+ include CommonInterface
28
+
29
+ # Class method to instantiate a new RequestHeaders object
30
+ def self.request_hdr(*args)
31
+ Headers.new(*args).extend(RequestHeaders)
32
+ end
33
+
34
+ # Class method to instantiate a new ResponseHeaders object
35
+ def self.response_hdr(*args)
36
+ Headers.new(*args).extend(ResponseHeaders)
37
+ end
38
+
39
+ # Instantiates a new Headers object and returns the result of capture(str)
40
+ # Note, this method does not distinguish between ResponseHeaders or
41
+ # RequestHeaders, and so the object may need to be extended with one
42
+ # or the other, if you need access to specific behviors from either.
43
+ def self.parse(str)
44
+ new().capture(str)
45
+ end
46
+
47
+ # Instantiates a new Headers object and returns the result of
48
+ # capture_full_headers(str, first_obj)
49
+ def self.parse_full_headers(str, first_obj)
50
+ new().capture_full_headers(str, first_obj)
51
+ end
52
+
53
+ # Instantiates a new Headers object.
54
+ #
55
+ # Arguments:
56
+ # raw: String or Enumerable. Strings are parsed with capture.
57
+ # Enumerables are converted with 'to_a' and stored directly.
58
+ #
59
+ # opts: Options which affect the behavior of the Headers object.
60
+ # (none currently defined)
61
+ #
62
+ def initialize(*args)
63
+ super()
64
+ if args.first.kind_of? Enumerable
65
+ raw=args.first
66
+ args[0]=nil
67
+ _common_init(*args)
68
+ self.data = raw.to_a
69
+ else
70
+ _common_init(*args)
71
+ end
72
+ end
73
+
74
+ attr_reader :base
75
+
76
+ # Conditionally sets the @base class variable if it is a kind of Base
77
+ # object.
78
+ def base=(b)
79
+ if b.nil? or b.kind_of? Base
80
+ @base = b
81
+ else
82
+ raise "base must be a kind of Base object or nil"
83
+ end
84
+ end
85
+
86
+ # The data method provides a common interface to access internal
87
+ # non-raw information stored in the object.
88
+ #
89
+ # The Headers incarnation returns the internal headers array
90
+ # (actually self).
91
+ def data
92
+ self
93
+ end
94
+
95
+ # The data= method provides a common interface to access internal
96
+ # non-raw information stored in the object.
97
+ #
98
+ # This method stores creates a shallow copy for anything but another
99
+ # Headers object which it references directly. A few rules are enforced:
100
+ # * 1-dimensional elements will be expanded to tuples with 'nil' as the
101
+ # second value.
102
+ #
103
+ # * Names which are enumerables will be 'join()'ed, but not values.
104
+ def data=(d)
105
+ if d.kind_of? Headers
106
+ self.replace d
107
+ else
108
+ self.replace []
109
+ d.to_a.each do |k, v|
110
+ k = k.to_s if k.is_a? Numeric
111
+ self << [k,v]
112
+ end
113
+ end
114
+ return self
115
+ end
116
+
117
+ # The to_raw_array method returns an interim formatted array of raw
118
+ # "Cookie: Value" strings.
119
+ def to_raw_array
120
+ self.map {|h,v| "#{h}: #{v}" }
121
+ end
122
+
123
+ def get_all(k)
124
+ self.select {|h| h[0].downcase == k.downcase }
125
+ end
126
+
127
+ def get_all_values_for(k)
128
+ self.get_all(k).collect {|h,v| v }
129
+ end
130
+ alias all_values_for get_all_values_for
131
+
132
+ def get_header(k)
133
+ self.find {|h| h[0].downcase == k.downcase }
134
+ end
135
+
136
+ def get_value_for(k)
137
+ if h=self.get_header(k)
138
+ return h[1]
139
+ end
140
+ end
141
+ alias get_header_value get_value_for
142
+ alias value_for get_value_for
143
+
144
+ # returns a header value after "fully" parsing it. Any semi-colon separated
145
+ # key=value or key parameters after the first value in the header will be
146
+ # returned as an extra HeaderParams array in addition to the value. T
147
+ #
148
+ # XXX NOTE, the current implementation will incorrectly parse several headers
149
+ # that may legally contain ';' with no special meaning.
150
+ # For example, 'Referer: http://example.com;sometimes_a_url_parameter=1'
151
+ def get_parameterized_value(k)
152
+ if v=get_value_for(k)
153
+ if i=v.index(';')
154
+ val = v[0,i]
155
+ parms = v[(i+1)..-1]
156
+ [ val, HeaderParams.parse( parms ) ]
157
+ else
158
+ [ v, HeaderParams.new ]
159
+ end
160
+ end
161
+ end
162
+ alias parameterized_value get_parameterized_value
163
+
164
+
165
+ def set_parameterized_value(k, v)
166
+ raise "v is not an array. use set_header()" unless v.kind_of? Array
167
+ parms = v[1]
168
+ v = v[0].to_s
169
+ v << parms.to_raw if parms
170
+ set_header(k,v)
171
+ end
172
+
173
+
174
+ def set_header(k,v)
175
+ sel = get_all(k)
176
+
177
+ if sel.empty?
178
+ self << [k,v]
179
+ return [[k,v]]
180
+ else
181
+ sel.each {|h| h[1] = v }
182
+ return sel
183
+ end
184
+ end
185
+ alias set_all_for set_header
186
+
187
+
188
+ def delete_header(k)
189
+ self.delete_if {|h| h[0].downcase == k.downcase }
190
+ end
191
+
192
+ # The to_raw method returns a raw string of headers as they appear
193
+ # on the wire.
194
+ def to_raw
195
+ to_raw_array.join("\r\n") << "\r\n"
196
+ end
197
+
198
+ # Captures a raw string of headers into this instance's internal array.
199
+ # Note: This method expects not to include the first element such as a
200
+ # RequestAction or ResponseStatus. See capture_full_headers for a version
201
+ # that can handle this.
202
+ def capture(str)
203
+
204
+ raise "arg 0 must be a string" unless str.is_a?(String)
205
+ heads = str.split(/\s*\r?\n/)
206
+
207
+ # pass interim parsed headers to a block if given
208
+ yield(self, heads) if block_given?
209
+
210
+ self.replace [] if capture_complete?
211
+ heads.each do |s|
212
+ k,v = s.split(/\s*:\s*/, 2)
213
+ self << [k,v]
214
+ end
215
+ return self
216
+ end
217
+
218
+ # See capture_full_headers. This method is used to resolve the parser
219
+ # for the first entity above the HTTP headers. This instance is designed
220
+ # to raise an exception when capturing.
221
+ def get_first_obj; raise "get_first_obj called on base stub"; end
222
+
223
+ # This method parses a full set of raw headers from the 'str' argument.
224
+ # Unlike the regular capture method, the string is expected to start
225
+ # with a line which will be parsed by first_obj using its own capture
226
+ # method. For example, first_obj would parse something like
227
+ # "GET / HTTP/1.1" for RequestAction or "HTTP/1.1 200 OK" for
228
+ # ResponseStatus. If first_obj is not defined, there will be an attempt
229
+ # to resolve it by calling get_first_obj which should return the
230
+ # appropriate type of object or raise an exception.
231
+ #
232
+ # Returns a 2 element array containing [first_entity, headers]
233
+ # where first entity is the instantiated first_obj object and headers
234
+ # is self.
235
+ def capture_full_headers(str, first_obj=nil)
236
+ first_obj ||= get_first_obj() {|x|}
237
+
238
+ first = nil
239
+ capture(str) do |this, heads|
240
+ first = first_obj.capture(heads.shift)
241
+ yield(heads) if block_given?
242
+ end
243
+ return [first, self]
244
+ end
245
+
246
+ # This method will non-destructively reset the capture state on this object.
247
+ # The existing headers are maintained when this is called.
248
+ # See also: capture_complete? reset_capture!
249
+ def reset_capture
250
+ @capture_state = nil
251
+ self
252
+ end
253
+
254
+ # This method will destructively reset the capture state on this object.
255
+ # The existing headers array is emptied when this is called.
256
+ # See also: capture_complete?, reset_capture
257
+ def reset_capture!
258
+ @capture_state = nil
259
+ self.data = []
260
+ end
261
+
262
+ # Indicates whether this object is ready to capture fresh data, or is
263
+ # waiting for additional data or a reset from a previous incomplete or
264
+ # otherwise broken capture. See also: reset_capture, reset_capture!
265
+ def capture_complete?
266
+ not @capture_state
267
+ end
268
+
269
+
270
+ end
271
+
272
+
273
+ # A mixin for HTTP Request headers to add specific request header
274
+ # behaviors and features.
275
+ #
276
+ # To instantiate a new request header, use Headers.request_hdr
277
+ module RequestHeaders
278
+ NO_PARAMETERS = ["Referer", "Host"]
279
+
280
+ # This method is used to resolve the parser for the first entity above the
281
+ # HTTP headers. The incarnation for ResponseHeaders returns ResponseStatus
282
+ # See Headers.capture_full_headers for more information.
283
+ def get_first_obj(*args)
284
+ RequestAction.new(*args)
285
+ end
286
+ end
287
+
288
+
289
+ # A mixin for HTTP Response headers to add specific response header
290
+ # behaviors and features.
291
+ #
292
+ # To instantiate a new response header, use Headers.response_hdr
293
+ module ResponseHeaders
294
+
295
+ # This method is used to resolve the parser for the first entity above the
296
+ # HTTP headers. The incarnation for ResponseHeaders returns ResponseStatus
297
+ # See Headers.capture_full_headers for more information.
298
+ def get_first_obj(*args)
299
+ ResponseStatus.new(*args)
300
+ end
301
+ end
302
+
303
+
304
+ # A class for HTTP request actions, i.e. the first
305
+ # header sent in an HTTP request, as in "GET / HTTP/1.1"
306
+ class RequestAction
307
+ include CommonInterface
308
+
309
+ def self.parse(str)
310
+ new().capture(str)
311
+ end
312
+
313
+ attr_accessor :verb, :uri, :version
314
+
315
+ def initialize(*args)
316
+ _common_init(*args)
317
+ @verb ||= "GET"
318
+ @uri ||= URI.parse("/")
319
+ @version ||= "HTTP/1.1"
320
+ end
321
+
322
+ def to_raw
323
+ ary = [ @verb, @uri ]
324
+ ary << @version if @version
325
+ ary.join(" ")
326
+ end
327
+
328
+ # This method parses a request action String into the current instance.
329
+ def capture(str)
330
+ raise "arg 0 must be a string" unless str.is_a?(String)
331
+ unless m=/^([^\s]+)\s+([^\s]+)(?:\s+([^\s]+))?\s*$/.match(str)
332
+ raise "invalid action #{str.inspect}"
333
+ end
334
+ @verb = m[1]
335
+ @uri = URI.parse m[2]
336
+ @version = m[3]
337
+ return self
338
+ end
339
+
340
+ # Returns the URI path as a String if defined
341
+ def path
342
+ @uri.path if @uri
343
+ end
344
+
345
+ # Returns the URI query as a String if it is defined
346
+ def query
347
+ @uri.query if @uri
348
+ end
349
+
350
+ # Returns the URI query parameters as a FormUrlencodedParams object if
351
+ # the query string is defined.
352
+ # XXX note parameters cannot currently be modified in this form.
353
+ def parameters
354
+ FormUrlencodedParams.parse(query) if query
355
+ end
356
+
357
+ attr_reader :base
358
+
359
+ def base=(b)
360
+ raise "base must be a kind of Base object" if not b.is_a? Base
361
+ @base = b
362
+ end
363
+
364
+ end
365
+
366
+
367
+ # A class for HTTP response status messages, i.e. the first
368
+ # header returned by a server, as in "HTTP/1.0 200 OK"
369
+ class ResponseStatus
370
+ include CommonInterface
371
+
372
+ def self.parse(str)
373
+ new().capture(str)
374
+ end
375
+
376
+ attr_accessor :version, :code, :text
377
+
378
+ def initialize(*args)
379
+ _common_init(*args)
380
+ @version ||= DEFAULT_HTTP_VERSION
381
+ end
382
+
383
+ def to_raw
384
+ [@version, @code, @text].join(" ")
385
+ end
386
+
387
+ def capture(str)
388
+ raise "arg 0 must be a string" unless str.is_a?(String)
389
+ unless m=/^([^\s]+)\s+(\d+)(?:\s+(.*))?$/.match(str)
390
+ raise "invalid status #{str.inspect}"
391
+ end
392
+ @version = m[1]
393
+ @code = m[2] =~ /^\d+$/ ? m[2].to_i : m[2]
394
+ @text = m[3]
395
+ return self
396
+ end
397
+
398
+ attr_reader :base
399
+
400
+ def base=(b)
401
+ raise "base must be a kind of Base object" if not b.is_a? Base
402
+ @base = b
403
+ end
404
+ end
405
+ end
406
+
@@ -0,0 +1,220 @@
1
+ module Rbkb::Http
2
+
3
+ # The Parameters class is for handling named parameter values. This is a
4
+ # stub base class from which to derive specific parameter parsers such as:
5
+ #
6
+ # FormUrlencodedParams for request query string parameters and POST
7
+ # content using application/www-form-urlencoded format.
8
+ #
9
+ # MultiPartFormParams for POST content using multipart/form-data
10
+ class Parameters < Array
11
+ include CommonInterface
12
+
13
+ def self.parse(str)
14
+ new().capture(str)
15
+ end
16
+
17
+ def initialize(*args)
18
+ _common_init(*args)
19
+ end
20
+
21
+ def get_all(k)
22
+ self.select {|p| p[0] == k}
23
+ end
24
+
25
+ def get_all_values_for(k)
26
+ self.get_all(k).collect {|p,v| v }
27
+ end
28
+ alias all_values_for get_all_values_for
29
+
30
+ def get_param(k)
31
+ self.find {|p| p[0] == k}
32
+ end
33
+
34
+ def get_value_for(k)
35
+ if p=self.get_param(k)
36
+ return p[1]
37
+ end
38
+ end
39
+ alias get_param_value get_value_for
40
+ alias value_for get_value_for
41
+
42
+ def set_param(k, v)
43
+ if p=self.get_param(k)
44
+ p[1]=v
45
+ else
46
+ p << [k,v]
47
+ end
48
+ return [[k,v]]
49
+ end
50
+
51
+ def set_all_for(k, v)
52
+ sel=self.get_all(k)
53
+ if sel.empty?
54
+ self << [k,v]
55
+ return [[k,v]]
56
+ else
57
+ sel.each {|p| p[1] = v}
58
+ return sel
59
+ end
60
+ end
61
+
62
+ def delete_param(k)
63
+ self.delete_if {|p| p[0] == k }
64
+ end
65
+ end
66
+
67
+ class HeaderParams < Parameters
68
+ def to_raw(quote_val=false)
69
+ ret = ([nil] + self).map do |k,v|
70
+ if v
71
+ "#{k}=#{quote_val ? "\"#{v}\"" : v}"
72
+ else
73
+ "#{k}"
74
+ end
75
+ end.join("; ")
76
+ end
77
+
78
+ def capture(str)
79
+ raise "arg 0 must be a string" unless str.is_a? String
80
+ str.split(/\s*;\s*/).each do |p|
81
+ var, val = p.split('=', 2)
82
+ if val =~ /^(['"])(.*)\1$/
83
+ val = $2
84
+ end
85
+ self << [var.strip, val]
86
+ end
87
+ return self
88
+ end
89
+ end
90
+
91
+
92
+ # The FormUrlencodedParams class is for Parameters values in the
93
+ # form of 'q=foo&l=1&z=baz' as found in GET query strings and
94
+ # application/www-form-urlencoded or application/x-url-encoded POST
95
+ # contents.
96
+ class FormUrlencodedParams < Parameters
97
+ def to_raw(url_enc=false)
98
+ self.map do |k,v|
99
+ if url_enc
100
+ k = k.urlenc
101
+ v = v.urlenc
102
+ end
103
+ if v
104
+ "#{k}=#{v}"
105
+ else
106
+ "#{k}"
107
+ end
108
+ end.join('&')
109
+ end
110
+
111
+ def capture(str, url_dec=false)
112
+ raise "arg 0 must be a string" unless str.is_a? String
113
+ str.split('&').each do |p|
114
+ k,v = p.split('=',2)
115
+ if url_dec
116
+ k = k.urldec
117
+ v = v.urldec
118
+ end
119
+ self << [k, v]
120
+ end
121
+ return self
122
+ end
123
+ end
124
+
125
+ # The TextPlainParams class is for Parameters values in the
126
+ # form of 'text/plain' post data. These are usually simple key=value
127
+ # pairs separated by a CR?LF.
128
+ #
129
+ # XXX Note, safari seems to think these should be urlencoded, and not
130
+ # newline separated. joy!
131
+ class TextPlainFormParams < Parameters
132
+ def to_raw
133
+ self.map do |k,v|
134
+ if v
135
+ "#{k}=#{v.urlenc}"
136
+ else
137
+ "#{k}"
138
+ end
139
+ end.join('\r\n')
140
+ end
141
+
142
+ def capture(str)
143
+ raise "arg 0 must be a string" unless str.is_a? String
144
+ str.split(/\r?\n/).each do |p|
145
+ var,val = p.split('=',2)
146
+ self << [var,val]
147
+ end
148
+ return self
149
+ end
150
+ end
151
+
152
+ require 'strscan'
153
+
154
+ # The MultipartFormParams class is for Parameters in POST data when using
155
+ # the multipart/form-data content type. This is often used for file uploads.
156
+ class MultipartFormParams < Parameters
157
+ attr_accessor :boundary, :part_headers
158
+
159
+ # You must specify a boundary somehow when instantiating a new MultipartFormParams
160
+ # object. The
161
+ def initialize(*args)
162
+ _common_init(*args) do |this|
163
+ yield this if block_given?
164
+ this.boundary ||=
165
+ ( this.opts[:boundary] || rand(0xffffffffffffffff).to_s(16).rjust(48,'-') )
166
+ end
167
+ end
168
+
169
+ def to_raw
170
+ ret = ""
171
+ self.each_with_index do |p,i|
172
+ name, value = p
173
+ ret << "--#{boundary.to_s}\n"
174
+ hdrs = @part_headers[i]
175
+ if cd = hdrs.get_parameterized_value("Content-Disposition")
176
+ v, parms = cd
177
+ parms.set_value_for("name", name) if name
178
+ hdrs.set_parameterized_value("Content-Disposition", v, parms)
179
+ else
180
+ hdrs.set_value_for("Content-Disposition", "form-data; name=#{name}")
181
+ end
182
+
183
+ ret << hdrs.to_raw
184
+ ret << "#{value}\n"
185
+ end
186
+ ret << "#{boundary}--"
187
+
188
+ end
189
+
190
+ def capture(str)
191
+ raise "arg 0 must be a string" unless String === str
192
+ @part_headers = []
193
+ self.replace([])
194
+
195
+ s = StringScanner.new(str)
196
+ bound = /\-\-#{Regexp.escape(@boundary)}\r?\n/
197
+ unless start=s.scan_until(bound) and start.index(@boundary)==2
198
+ raise "unexpected start data #{start.inspect}"
199
+ end
200
+
201
+ while chunk = s.scan_until(bound)
202
+ part = chunk[0,chunk.index(bound)].chomp
203
+ phdr, body = part.split(/^\r?\n/, 2)
204
+ head=Headers.parse(phdr)
205
+ x, parms = head.get_parameterized_value('Content-Disposition')
206
+ if parms and name=parms.get_value_for("name")
207
+ @part_headers << head
208
+ self << [name, body]
209
+ else
210
+ raise "invalid chunk at #{s.pos} bytes"
211
+ end
212
+ end
213
+ unless str[s.pos..-1] =~ /^\-\-#{Regexp.escape(@boundary)}--(?:\r?\n|$)/
214
+ raise "expected boundary terminator at #{s.pos}"
215
+ end
216
+ return self
217
+ end
218
+ end
219
+ end
220
+
@@ -0,0 +1,76 @@
1
+ module Rbkb::Http
2
+
3
+ # A Request encapsulates all the entities in a HTTP request message
4
+ # including the action header, general headers, and body.
5
+ class Request < Base
6
+ attr_accessor :action
7
+
8
+ alias first_entity action
9
+ alias first_entity= action=
10
+
11
+ def action_parameters
12
+ @action.parameters
13
+ end
14
+
15
+ def body_parameters
16
+ ctype, ct_parms = @headers.get_parameterized_value('Content-Type')
17
+ case ctype
18
+ when /^application\/(?:x-)?(?:www-form-url|url-)encoded(?:\W|$)/
19
+ FormUrlencodedParams.new(@body)
20
+ when /^multipart\/form-data$/
21
+ MultipartFormParams.new(@body, :boundary => ct_parms.get_value_for('boundary'))
22
+ when /^text\/plain$/
23
+ # safari just gives us url-encoded parameters for text/plain.
24
+ # Joy!
25
+ if @headers.get_value_for('User-Agent') =~ /\WSafari\W/
26
+ FormUrlencodedParams.new(@body)
27
+ else
28
+ TextPlainFormParams.new(@body)
29
+ end
30
+ end
31
+ end
32
+
33
+ # Returns a new Headers object extended as RequestHeaders. This is the
34
+ # default object which will be used when composing fresh Request header
35
+ # entities.
36
+ def default_headers_obj(*args)
37
+ Headers.new(*args).extend(RequestHeaders)
38
+ end
39
+
40
+ # Returns a new BoundBody object. This is the default object which will
41
+ # be used when composing fresh Request body entities.
42
+ def default_body_obj(*args)
43
+ Body.new(*args)
44
+ end
45
+
46
+ # Returns a raw HTTP request for this instance. The instance must have
47
+ # an action element defined at the bare minimum.
48
+ def to_raw(tmp_body=@body)
49
+ raise "this request has no action entity" unless first_entity()
50
+ self.headers ||= default_headers_obj()
51
+ self.body ||= default_body_obj()
52
+
53
+ if len=@opts[:static_length]
54
+ @body = Body.new(@body, @body.opts) {|x| x.base = self}
55
+ @headers.set_header("Content-Length", len.to_i)
56
+ elsif @opts[:ignore_content_length]
57
+ @headers.delete_header("Content-Length")
58
+ end
59
+
60
+ bstr = tmp_body.to_raw
61
+ hdrs = (@headers).to_raw_array.unshift(first_entity.to_raw)
62
+ return "#{hdrs.join("\r\n")}\r\n\r\n#{bstr}"
63
+ end
64
+
65
+
66
+ # Parses a raw HTTP request and captures data into the current instance.
67
+ def capture(str)
68
+ raise "arg 0 must be a string" unless String === str
69
+ hstr, bstr = str.split(/\s*\r?\n\r?\n/, 2)
70
+ capture_headers(hstr)
71
+ self.body = content_length ? BoundBody.new : Body.new
72
+ capture_body(bstr)
73
+ return self
74
+ end
75
+ end
76
+ end