rbkb-http 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,406 @@
1
+ require 'uri'
2
+
3
+ module Rbkb::Http
4
+
5
+ # A base class for RequestHeaders and ResponseHeaders
6
+ #
7
+ # Includes common implementations of to_raw, to_raw_array, capture, and
8
+ # the class method parse
9
+ #
10
+ # The Headers array are stored internally as an named value pairs array.
11
+ #
12
+ # The headers are generally name/value pairs in the form of:
13
+ #
14
+ # [ ["Name1", "value1"], ["Name2", "value2"], ... ]
15
+ #
16
+ # Which will be rendered with to_raw() to (or captured with capture() from):
17
+ #
18
+ # Name1: value1
19
+ # Name2: value2
20
+ # ...
21
+ #
22
+ # This has the benefit of letting the data= accessor automatically render a
23
+ # Hash or any other Enumerable to a Headers object through the use of to_a.
24
+ # However it has the caveat that named pairs are expected on various
25
+ # operations.
26
+ class Headers < Array
27
+ include CommonInterface
28
+
29
+ # Class method to instantiate a new RequestHeaders object
30
+ def self.request_hdr(*args)
31
+ Headers.new(*args).extend(RequestHeaders)
32
+ end
33
+
34
+ # Class method to instantiate a new ResponseHeaders object
35
+ def self.response_hdr(*args)
36
+ Headers.new(*args).extend(ResponseHeaders)
37
+ end
38
+
39
+ # Instantiates a new Headers object and returns the result of capture(str)
40
+ # Note, this method does not distinguish between ResponseHeaders or
41
+ # RequestHeaders, and so the object may need to be extended with one
42
+ # or the other, if you need access to specific behviors from either.
43
+ def self.parse(str)
44
+ new().capture(str)
45
+ end
46
+
47
+ # Instantiates a new Headers object and returns the result of
48
+ # capture_full_headers(str, first_obj)
49
+ def self.parse_full_headers(str, first_obj)
50
+ new().capture_full_headers(str, first_obj)
51
+ end
52
+
53
+ # Instantiates a new Headers object.
54
+ #
55
+ # Arguments:
56
+ # raw: String or Enumerable. Strings are parsed with capture.
57
+ # Enumerables are converted with 'to_a' and stored directly.
58
+ #
59
+ # opts: Options which affect the behavior of the Headers object.
60
+ # (none currently defined)
61
+ #
62
+ def initialize(*args)
63
+ super()
64
+ if args.first.kind_of? Enumerable
65
+ raw=args.first
66
+ args[0]=nil
67
+ _common_init(*args)
68
+ self.data = raw.to_a
69
+ else
70
+ _common_init(*args)
71
+ end
72
+ end
73
+
74
+ attr_reader :base
75
+
76
+ # Conditionally sets the @base class variable if it is a kind of Base
77
+ # object.
78
+ def base=(b)
79
+ if b.nil? or b.kind_of? Base
80
+ @base = b
81
+ else
82
+ raise "base must be a kind of Base object or nil"
83
+ end
84
+ end
85
+
86
+ # The data method provides a common interface to access internal
87
+ # non-raw information stored in the object.
88
+ #
89
+ # The Headers incarnation returns the internal headers array
90
+ # (actually self).
91
+ def data
92
+ self
93
+ end
94
+
95
+ # The data= method provides a common interface to access internal
96
+ # non-raw information stored in the object.
97
+ #
98
+ # This method stores creates a shallow copy for anything but another
99
+ # Headers object which it references directly. A few rules are enforced:
100
+ # * 1-dimensional elements will be expanded to tuples with 'nil' as the
101
+ # second value.
102
+ #
103
+ # * Names which are enumerables will be 'join()'ed, but not values.
104
+ def data=(d)
105
+ if d.kind_of? Headers
106
+ self.replace d
107
+ else
108
+ self.replace []
109
+ d.to_a.each do |k, v|
110
+ k = k.to_s if k.is_a? Numeric
111
+ self << [k,v]
112
+ end
113
+ end
114
+ return self
115
+ end
116
+
117
+ # The to_raw_array method returns an interim formatted array of raw
118
+ # "Cookie: Value" strings.
119
+ def to_raw_array
120
+ self.map {|h,v| "#{h}: #{v}" }
121
+ end
122
+
123
+ def get_all(k)
124
+ self.select {|h| h[0].downcase == k.downcase }
125
+ end
126
+
127
+ def get_all_values_for(k)
128
+ self.get_all(k).collect {|h,v| v }
129
+ end
130
+ alias all_values_for get_all_values_for
131
+
132
+ def get_header(k)
133
+ self.find {|h| h[0].downcase == k.downcase }
134
+ end
135
+
136
+ def get_value_for(k)
137
+ if h=self.get_header(k)
138
+ return h[1]
139
+ end
140
+ end
141
+ alias get_header_value get_value_for
142
+ alias value_for get_value_for
143
+
144
+ # returns a header value after "fully" parsing it. Any semi-colon separated
145
+ # key=value or key parameters after the first value in the header will be
146
+ # returned as an extra HeaderParams array in addition to the value. T
147
+ #
148
+ # XXX NOTE, the current implementation will incorrectly parse several headers
149
+ # that may legally contain ';' with no special meaning.
150
+ # For example, 'Referer: http://example.com;sometimes_a_url_parameter=1'
151
+ def get_parameterized_value(k)
152
+ if v=get_value_for(k)
153
+ if i=v.index(';')
154
+ val = v[0,i]
155
+ parms = v[(i+1)..-1]
156
+ [ val, HeaderParams.parse( parms ) ]
157
+ else
158
+ [ v, HeaderParams.new ]
159
+ end
160
+ end
161
+ end
162
+ alias parameterized_value get_parameterized_value
163
+
164
+
165
+ def set_parameterized_value(k, v)
166
+ raise "v is not an array. use set_header()" unless v.kind_of? Array
167
+ parms = v[1]
168
+ v = v[0].to_s
169
+ v << parms.to_raw if parms
170
+ set_header(k,v)
171
+ end
172
+
173
+
174
+ def set_header(k,v)
175
+ sel = get_all(k)
176
+
177
+ if sel.empty?
178
+ self << [k,v]
179
+ return [[k,v]]
180
+ else
181
+ sel.each {|h| h[1] = v }
182
+ return sel
183
+ end
184
+ end
185
+ alias set_all_for set_header
186
+
187
+
188
+ def delete_header(k)
189
+ self.delete_if {|h| h[0].downcase == k.downcase }
190
+ end
191
+
192
+ # The to_raw method returns a raw string of headers as they appear
193
+ # on the wire.
194
+ def to_raw
195
+ to_raw_array.join("\r\n") << "\r\n"
196
+ end
197
+
198
+ # Captures a raw string of headers into this instance's internal array.
199
+ # Note: This method expects not to include the first element such as a
200
+ # RequestAction or ResponseStatus. See capture_full_headers for a version
201
+ # that can handle this.
202
+ def capture(str)
203
+
204
+ raise "arg 0 must be a string" unless str.is_a?(String)
205
+ heads = str.split(/\s*\r?\n/)
206
+
207
+ # pass interim parsed headers to a block if given
208
+ yield(self, heads) if block_given?
209
+
210
+ self.replace [] if capture_complete?
211
+ heads.each do |s|
212
+ k,v = s.split(/\s*:\s*/, 2)
213
+ self << [k,v]
214
+ end
215
+ return self
216
+ end
217
+
218
+ # See capture_full_headers. This method is used to resolve the parser
219
+ # for the first entity above the HTTP headers. This instance is designed
220
+ # to raise an exception when capturing.
221
+ def get_first_obj; raise "get_first_obj called on base stub"; end
222
+
223
+ # This method parses a full set of raw headers from the 'str' argument.
224
+ # Unlike the regular capture method, the string is expected to start
225
+ # with a line which will be parsed by first_obj using its own capture
226
+ # method. For example, first_obj would parse something like
227
+ # "GET / HTTP/1.1" for RequestAction or "HTTP/1.1 200 OK" for
228
+ # ResponseStatus. If first_obj is not defined, there will be an attempt
229
+ # to resolve it by calling get_first_obj which should return the
230
+ # appropriate type of object or raise an exception.
231
+ #
232
+ # Returns a 2 element array containing [first_entity, headers]
233
+ # where first entity is the instantiated first_obj object and headers
234
+ # is self.
235
+ def capture_full_headers(str, first_obj=nil)
236
+ first_obj ||= get_first_obj() {|x|}
237
+
238
+ first = nil
239
+ capture(str) do |this, heads|
240
+ first = first_obj.capture(heads.shift)
241
+ yield(heads) if block_given?
242
+ end
243
+ return [first, self]
244
+ end
245
+
246
+ # This method will non-destructively reset the capture state on this object.
247
+ # The existing headers are maintained when this is called.
248
+ # See also: capture_complete? reset_capture!
249
+ def reset_capture
250
+ @capture_state = nil
251
+ self
252
+ end
253
+
254
+ # This method will destructively reset the capture state on this object.
255
+ # The existing headers array is emptied when this is called.
256
+ # See also: capture_complete?, reset_capture
257
+ def reset_capture!
258
+ @capture_state = nil
259
+ self.data = []
260
+ end
261
+
262
+ # Indicates whether this object is ready to capture fresh data, or is
263
+ # waiting for additional data or a reset from a previous incomplete or
264
+ # otherwise broken capture. See also: reset_capture, reset_capture!
265
+ def capture_complete?
266
+ not @capture_state
267
+ end
268
+
269
+
270
+ end
271
+
272
+
273
+ # A mixin for HTTP Request headers to add specific request header
274
+ # behaviors and features.
275
+ #
276
+ # To instantiate a new request header, use Headers.request_hdr
277
+ module RequestHeaders
278
+ NO_PARAMETERS = ["Referer", "Host"]
279
+
280
+ # This method is used to resolve the parser for the first entity above the
281
+ # HTTP headers. The incarnation for ResponseHeaders returns ResponseStatus
282
+ # See Headers.capture_full_headers for more information.
283
+ def get_first_obj(*args)
284
+ RequestAction.new(*args)
285
+ end
286
+ end
287
+
288
+
289
+ # A mixin for HTTP Response headers to add specific response header
290
+ # behaviors and features.
291
+ #
292
+ # To instantiate a new response header, use Headers.response_hdr
293
+ module ResponseHeaders
294
+
295
+ # This method is used to resolve the parser for the first entity above the
296
+ # HTTP headers. The incarnation for ResponseHeaders returns ResponseStatus
297
+ # See Headers.capture_full_headers for more information.
298
+ def get_first_obj(*args)
299
+ ResponseStatus.new(*args)
300
+ end
301
+ end
302
+
303
+
304
+ # A class for HTTP request actions, i.e. the first
305
+ # header sent in an HTTP request, as in "GET / HTTP/1.1"
306
+ class RequestAction
307
+ include CommonInterface
308
+
309
+ def self.parse(str)
310
+ new().capture(str)
311
+ end
312
+
313
+ attr_accessor :verb, :uri, :version
314
+
315
+ def initialize(*args)
316
+ _common_init(*args)
317
+ @verb ||= "GET"
318
+ @uri ||= URI.parse("/")
319
+ @version ||= "HTTP/1.1"
320
+ end
321
+
322
+ def to_raw
323
+ ary = [ @verb, @uri ]
324
+ ary << @version if @version
325
+ ary.join(" ")
326
+ end
327
+
328
+ # This method parses a request action String into the current instance.
329
+ def capture(str)
330
+ raise "arg 0 must be a string" unless str.is_a?(String)
331
+ unless m=/^([^\s]+)\s+([^\s]+)(?:\s+([^\s]+))?\s*$/.match(str)
332
+ raise "invalid action #{str.inspect}"
333
+ end
334
+ @verb = m[1]
335
+ @uri = URI.parse m[2]
336
+ @version = m[3]
337
+ return self
338
+ end
339
+
340
+ # Returns the URI path as a String if defined
341
+ def path
342
+ @uri.path if @uri
343
+ end
344
+
345
+ # Returns the URI query as a String if it is defined
346
+ def query
347
+ @uri.query if @uri
348
+ end
349
+
350
+ # Returns the URI query parameters as a FormUrlencodedParams object if
351
+ # the query string is defined.
352
+ # XXX note parameters cannot currently be modified in this form.
353
+ def parameters
354
+ FormUrlencodedParams.parse(query) if query
355
+ end
356
+
357
+ attr_reader :base
358
+
359
+ def base=(b)
360
+ raise "base must be a kind of Base object" if not b.is_a? Base
361
+ @base = b
362
+ end
363
+
364
+ end
365
+
366
+
367
+ # A class for HTTP response status messages, i.e. the first
368
+ # header returned by a server, as in "HTTP/1.0 200 OK"
369
+ class ResponseStatus
370
+ include CommonInterface
371
+
372
+ def self.parse(str)
373
+ new().capture(str)
374
+ end
375
+
376
+ attr_accessor :version, :code, :text
377
+
378
+ def initialize(*args)
379
+ _common_init(*args)
380
+ @version ||= DEFAULT_HTTP_VERSION
381
+ end
382
+
383
+ def to_raw
384
+ [@version, @code, @text].join(" ")
385
+ end
386
+
387
+ def capture(str)
388
+ raise "arg 0 must be a string" unless str.is_a?(String)
389
+ unless m=/^([^\s]+)\s+(\d+)(?:\s+(.*))?$/.match(str)
390
+ raise "invalid status #{str.inspect}"
391
+ end
392
+ @version = m[1]
393
+ @code = m[2] =~ /^\d+$/ ? m[2].to_i : m[2]
394
+ @text = m[3]
395
+ return self
396
+ end
397
+
398
+ attr_reader :base
399
+
400
+ def base=(b)
401
+ raise "base must be a kind of Base object" if not b.is_a? Base
402
+ @base = b
403
+ end
404
+ end
405
+ end
406
+
@@ -0,0 +1,220 @@
1
+ module Rbkb::Http
2
+
3
+ # The Parameters class is for handling named parameter values. This is a
4
+ # stub base class from which to derive specific parameter parsers such as:
5
+ #
6
+ # FormUrlencodedParams for request query string parameters and POST
7
+ # content using application/www-form-urlencoded format.
8
+ #
9
+ # MultiPartFormParams for POST content using multipart/form-data
10
+ class Parameters < Array
11
+ include CommonInterface
12
+
13
+ def self.parse(str)
14
+ new().capture(str)
15
+ end
16
+
17
+ def initialize(*args)
18
+ _common_init(*args)
19
+ end
20
+
21
+ def get_all(k)
22
+ self.select {|p| p[0] == k}
23
+ end
24
+
25
+ def get_all_values_for(k)
26
+ self.get_all(k).collect {|p,v| v }
27
+ end
28
+ alias all_values_for get_all_values_for
29
+
30
+ def get_param(k)
31
+ self.find {|p| p[0] == k}
32
+ end
33
+
34
+ def get_value_for(k)
35
+ if p=self.get_param(k)
36
+ return p[1]
37
+ end
38
+ end
39
+ alias get_param_value get_value_for
40
+ alias value_for get_value_for
41
+
42
+ def set_param(k, v)
43
+ if p=self.get_param(k)
44
+ p[1]=v
45
+ else
46
+ p << [k,v]
47
+ end
48
+ return [[k,v]]
49
+ end
50
+
51
+ def set_all_for(k, v)
52
+ sel=self.get_all(k)
53
+ if sel.empty?
54
+ self << [k,v]
55
+ return [[k,v]]
56
+ else
57
+ sel.each {|p| p[1] = v}
58
+ return sel
59
+ end
60
+ end
61
+
62
+ def delete_param(k)
63
+ self.delete_if {|p| p[0] == k }
64
+ end
65
+ end
66
+
67
+ class HeaderParams < Parameters
68
+ def to_raw(quote_val=false)
69
+ ret = ([nil] + self).map do |k,v|
70
+ if v
71
+ "#{k}=#{quote_val ? "\"#{v}\"" : v}"
72
+ else
73
+ "#{k}"
74
+ end
75
+ end.join("; ")
76
+ end
77
+
78
+ def capture(str)
79
+ raise "arg 0 must be a string" unless str.is_a? String
80
+ str.split(/\s*;\s*/).each do |p|
81
+ var, val = p.split('=', 2)
82
+ if val =~ /^(['"])(.*)\1$/
83
+ val = $2
84
+ end
85
+ self << [var.strip, val]
86
+ end
87
+ return self
88
+ end
89
+ end
90
+
91
+
92
+ # The FormUrlencodedParams class is for Parameters values in the
93
+ # form of 'q=foo&l=1&z=baz' as found in GET query strings and
94
+ # application/www-form-urlencoded or application/x-url-encoded POST
95
+ # contents.
96
+ class FormUrlencodedParams < Parameters
97
+ def to_raw(url_enc=false)
98
+ self.map do |k,v|
99
+ if url_enc
100
+ k = k.urlenc
101
+ v = v.urlenc
102
+ end
103
+ if v
104
+ "#{k}=#{v}"
105
+ else
106
+ "#{k}"
107
+ end
108
+ end.join('&')
109
+ end
110
+
111
+ def capture(str, url_dec=false)
112
+ raise "arg 0 must be a string" unless str.is_a? String
113
+ str.split('&').each do |p|
114
+ k,v = p.split('=',2)
115
+ if url_dec
116
+ k = k.urldec
117
+ v = v.urldec
118
+ end
119
+ self << [k, v]
120
+ end
121
+ return self
122
+ end
123
+ end
124
+
125
+ # The TextPlainParams class is for Parameters values in the
126
+ # form of 'text/plain' post data. These are usually simple key=value
127
+ # pairs separated by a CR?LF.
128
+ #
129
+ # XXX Note, safari seems to think these should be urlencoded, and not
130
+ # newline separated. joy!
131
+ class TextPlainFormParams < Parameters
132
+ def to_raw
133
+ self.map do |k,v|
134
+ if v
135
+ "#{k}=#{v.urlenc}"
136
+ else
137
+ "#{k}"
138
+ end
139
+ end.join('\r\n')
140
+ end
141
+
142
+ def capture(str)
143
+ raise "arg 0 must be a string" unless str.is_a? String
144
+ str.split(/\r?\n/).each do |p|
145
+ var,val = p.split('=',2)
146
+ self << [var,val]
147
+ end
148
+ return self
149
+ end
150
+ end
151
+
152
+ require 'strscan'
153
+
154
+ # The MultipartFormParams class is for Parameters in POST data when using
155
+ # the multipart/form-data content type. This is often used for file uploads.
156
+ class MultipartFormParams < Parameters
157
+ attr_accessor :boundary, :part_headers
158
+
159
+ # You must specify a boundary somehow when instantiating a new MultipartFormParams
160
+ # object. The
161
+ def initialize(*args)
162
+ _common_init(*args) do |this|
163
+ yield this if block_given?
164
+ this.boundary ||=
165
+ ( this.opts[:boundary] || rand(0xffffffffffffffff).to_s(16).rjust(48,'-') )
166
+ end
167
+ end
168
+
169
+ def to_raw
170
+ ret = ""
171
+ self.each_with_index do |p,i|
172
+ name, value = p
173
+ ret << "--#{boundary.to_s}\n"
174
+ hdrs = @part_headers[i]
175
+ if cd = hdrs.get_parameterized_value("Content-Disposition")
176
+ v, parms = cd
177
+ parms.set_value_for("name", name) if name
178
+ hdrs.set_parameterized_value("Content-Disposition", v, parms)
179
+ else
180
+ hdrs.set_value_for("Content-Disposition", "form-data; name=#{name}")
181
+ end
182
+
183
+ ret << hdrs.to_raw
184
+ ret << "#{value}\n"
185
+ end
186
+ ret << "#{boundary}--"
187
+
188
+ end
189
+
190
+ def capture(str)
191
+ raise "arg 0 must be a string" unless String === str
192
+ @part_headers = []
193
+ self.replace([])
194
+
195
+ s = StringScanner.new(str)
196
+ bound = /\-\-#{Regexp.escape(@boundary)}\r?\n/
197
+ unless start=s.scan_until(bound) and start.index(@boundary)==2
198
+ raise "unexpected start data #{start.inspect}"
199
+ end
200
+
201
+ while chunk = s.scan_until(bound)
202
+ part = chunk[0,chunk.index(bound)].chomp
203
+ phdr, body = part.split(/^\r?\n/, 2)
204
+ head=Headers.parse(phdr)
205
+ x, parms = head.get_parameterized_value('Content-Disposition')
206
+ if parms and name=parms.get_value_for("name")
207
+ @part_headers << head
208
+ self << [name, body]
209
+ else
210
+ raise "invalid chunk at #{s.pos} bytes"
211
+ end
212
+ end
213
+ unless str[s.pos..-1] =~ /^\-\-#{Regexp.escape(@boundary)}--(?:\r?\n|$)/
214
+ raise "expected boundary terminator at #{s.pos}"
215
+ end
216
+ return self
217
+ end
218
+ end
219
+ end
220
+
@@ -0,0 +1,76 @@
1
+ module Rbkb::Http
2
+
3
+ # A Request encapsulates all the entities in a HTTP request message
4
+ # including the action header, general headers, and body.
5
+ class Request < Base
6
+ attr_accessor :action
7
+
8
+ alias first_entity action
9
+ alias first_entity= action=
10
+
11
+ def action_parameters
12
+ @action.parameters
13
+ end
14
+
15
+ def body_parameters
16
+ ctype, ct_parms = @headers.get_parameterized_value('Content-Type')
17
+ case ctype
18
+ when /^application\/(?:x-)?(?:www-form-url|url-)encoded(?:\W|$)/
19
+ FormUrlencodedParams.new(@body)
20
+ when /^multipart\/form-data$/
21
+ MultipartFormParams.new(@body, :boundary => ct_parms.get_value_for('boundary'))
22
+ when /^text\/plain$/
23
+ # safari just gives us url-encoded parameters for text/plain.
24
+ # Joy!
25
+ if @headers.get_value_for('User-Agent') =~ /\WSafari\W/
26
+ FormUrlencodedParams.new(@body)
27
+ else
28
+ TextPlainFormParams.new(@body)
29
+ end
30
+ end
31
+ end
32
+
33
+ # Returns a new Headers object extended as RequestHeaders. This is the
34
+ # default object which will be used when composing fresh Request header
35
+ # entities.
36
+ def default_headers_obj(*args)
37
+ Headers.new(*args).extend(RequestHeaders)
38
+ end
39
+
40
+ # Returns a new BoundBody object. This is the default object which will
41
+ # be used when composing fresh Request body entities.
42
+ def default_body_obj(*args)
43
+ Body.new(*args)
44
+ end
45
+
46
+ # Returns a raw HTTP request for this instance. The instance must have
47
+ # an action element defined at the bare minimum.
48
+ def to_raw(tmp_body=@body)
49
+ raise "this request has no action entity" unless first_entity()
50
+ self.headers ||= default_headers_obj()
51
+ self.body ||= default_body_obj()
52
+
53
+ if len=@opts[:static_length]
54
+ @body = Body.new(@body, @body.opts) {|x| x.base = self}
55
+ @headers.set_header("Content-Length", len.to_i)
56
+ elsif @opts[:ignore_content_length]
57
+ @headers.delete_header("Content-Length")
58
+ end
59
+
60
+ bstr = tmp_body.to_raw
61
+ hdrs = (@headers).to_raw_array.unshift(first_entity.to_raw)
62
+ return "#{hdrs.join("\r\n")}\r\n\r\n#{bstr}"
63
+ end
64
+
65
+
66
+ # Parses a raw HTTP request and captures data into the current instance.
67
+ def capture(str)
68
+ raise "arg 0 must be a string" unless String === str
69
+ hstr, bstr = str.split(/\s*\r?\n\r?\n/, 2)
70
+ capture_headers(hstr)
71
+ self.body = content_length ? BoundBody.new : Body.new
72
+ capture_body(bstr)
73
+ return self
74
+ end
75
+ end
76
+ end