http_tools 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,11 @@ module HTTPTools
11
11
  #
12
12
  # Example:
13
13
  # parser = HTTPTools::Parser.new
14
- # parser.on(:status) {|status, message| puts "#{status} #{message}"}
15
- # parser.on(:headers) {|headers| puts headers.inspect}
16
- # parser.on(:body) {|body| puts body}
14
+ # parser.on(:header) do |header|
15
+ # puts parser.status_code + " " + parser.method
16
+ # puts parser.header.inspect
17
+ # end
18
+ # parser.on(:stream) {|chunk| print chunk}
17
19
  #
18
20
  # parser << "HTTP/1.1 200 OK\r\n"
19
21
  # parser << "Content-Length: 20\r\n\r\n"
@@ -27,16 +29,42 @@ module HTTPTools
27
29
  class Parser
28
30
  include Encoding
29
31
 
32
+ COLON = ":".freeze
30
33
  KEY_TERMINATOR = ": ".freeze
31
34
  CONTENT_LENGTH = "Content-Length".freeze
32
35
  TRANSFER_ENCODING = "Transfer-Encoding".freeze
33
36
  TRAILER = "Trailer".freeze
37
+ CONNECTION = "Connection".freeze
38
+ CLOSE = "close".freeze
34
39
  CHUNKED = "chunked".freeze
35
- EVENTS = ["method", "path", "uri", "fragment", "version", "status", "key",
36
- "value", "headers", "stream", "body", "trailers", "finished",
37
- "error"].map {|event| event.freeze}.freeze
40
+ EVENTS = %W{header stream trailer finish error}.map do |event|
41
+ event.freeze
42
+ end.freeze
43
+
44
+ REQUEST_METHOD = "REQUEST_METHOD".freeze
45
+ PATH_INFO = "PATH_INFO".freeze
46
+ QUERY_STRING = "QUERY_STRING".freeze
47
+ REQUEST_URI = "REQUEST_URI".freeze
48
+ FRAGMENT = "FRAGMENT".freeze
49
+
50
+ PROTOTYPE_ENV = {
51
+ "SCRIPT_NAME" => "".freeze,
52
+ PATH_INFO => "/".freeze,
53
+ QUERY_STRING => "".freeze,
54
+ "rack.version" => [1, 1].freeze,
55
+ "rack.url_scheme" => "http".freeze,
56
+ "rack.errors" => STDERR,
57
+ "rack.multithread" => false,
58
+ "rack.multiprocess" => false,
59
+ "rack.run_once" => false}.freeze
60
+
61
+ HTTP_ = "HTTP_".freeze
62
+ LOWERCASE = "a-z-".freeze
63
+ UPPERCASE = "A-Z_".freeze
38
64
 
39
65
  attr_reader :state # :nodoc:
66
+ attr_reader :request_method, :path_info, :query_string, :request_uri,
67
+ :fragment, :version, :status_code, :message, :header, :trailer
40
68
 
41
69
  # Force parser to expect and parse a trailer when Trailer header missing.
42
70
  attr_accessor :force_trailer
@@ -44,40 +72,19 @@ module HTTPTools
44
72
  # Skip parsing the body, e.g. with the response to a HEAD request.
45
73
  attr_accessor :force_no_body
46
74
 
47
- # :call-seq: Parser.new(delegate=nil) -> parser
75
+ # Allow responses with no status line or headers if it looks like HTML.
76
+ attr_accessor :allow_html_without_header
77
+
78
+ # :call-seq: Parser.new -> parser
48
79
  #
49
80
  # Create a new HTTPTools::Parser.
50
81
  #
51
- # delegate is an object that will recieve callbacks for events during
52
- # parsing. The delegate's methods should be named on_[event name], e.g.
53
- # on_status, on_body, etc. See #add_listener for more.
54
- #
55
- # Example:
56
- # class ExampleDelegate
57
- # def on_status(status, message)
58
- # puts "#{status} #{message}"
59
- # end
60
- # end
61
- # parser = HTTPTools::Parser.new(ExampleDelegate.new)
62
- #
63
- # If a callback is set for an event, it will take precedence over the
64
- # delegate for that event.
65
- #
66
- def initialize(delegate=nil)
82
+ def initialize
67
83
  @state = :start
68
84
  @buffer = StringScanner.new("")
69
85
  @buffer_backup_reference = @buffer
70
- @status = nil
71
- @headers = {}
72
- @last_key = nil
73
- @content_left = nil
74
- @body = nil
75
- if delegate
76
- EVENTS.each do |event|
77
- id = "on_#{event}"
78
- add_listener(event, delegate.method(id)) if delegate.respond_to?(id)
79
- end
80
- end
86
+ @header = {}
87
+ @trailer = {}
81
88
  end
82
89
 
83
90
  # :call-seq: parser.concat(data) -> parser
@@ -96,6 +103,29 @@ module HTTPTools
96
103
  end
97
104
  alias << concat
98
105
 
106
+ # :call-seq: parser.env -> hash or nil
107
+ #
108
+ # Returns a Rack compatible environment hash. Will return nil if called
109
+ # before headers are complete.
110
+ #
111
+ # The following are not supplied, and must be added to make the environment
112
+ # hash fully Rack compliant: SERVER_NAME, SERVER_PORT, rack.input
113
+ #
114
+ def env
115
+ return unless @header_complete
116
+ env = PROTOTYPE_ENV.merge(
117
+ REQUEST_METHOD => @request_method,
118
+ REQUEST_URI => @request_uri)
119
+ if @path_info
120
+ env[PATH_INFO] = @path_info
121
+ env[QUERY_STRING] = @query_string
122
+ end
123
+ env[FRAGMENT] = @fragment if @fragment
124
+ @header.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
125
+ @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
126
+ env
127
+ end
128
+
99
129
  # :call-seq: parser.finish -> parser
100
130
  #
101
131
  # Used to notify the parser that the request has finished in a case where it
@@ -121,8 +151,12 @@ module HTTPTools
121
151
  #
122
152
  def finish
123
153
  if @state == :body_on_close
124
- @body_callback.call(@body) if @body_callback
125
154
  @state = end_of_message
155
+ elsif @state == :body_chunked && @header[CONNECTION] == CLOSE &&
156
+ !@header[TRAILER] && @buffer.eos?
157
+ @state = end_of_message
158
+ elsif @state == :start && @buffer.string.length < 1
159
+ raise EmptyMessageError.new("Message empty")
126
160
  else
127
161
  raise MessageIncompleteError.new("Message ended early")
128
162
  end
@@ -153,12 +187,17 @@ module HTTPTools
153
187
  @buffer = @buffer_backup_reference
154
188
  @buffer.string.replace("")
155
189
  @buffer.reset
156
- # @status = nil
157
- @headers = {}
190
+ @request_method = nil
191
+ @path_info = nil
192
+ @query_string = nil
193
+ @request_uri = nil
194
+ @fragment = nil
195
+ @version = nil
196
+ @status_code = nil
197
+ @header = {}
158
198
  @trailer = {}
159
- # @last_key = nil
160
- # @content_left = nil
161
- @body = nil
199
+ @last_key = nil
200
+ @content_left = nil
162
201
  self
163
202
  end
164
203
 
@@ -167,44 +206,20 @@ module HTTPTools
167
206
  # parser.on(event) {|arg1 [, arg2]| block} -> parser
168
207
  # parser.on(event, proc) -> parser
169
208
  #
170
- # Available events are :method, :path, :version, :status, :headers, :stream,
171
- # :body, and :error.
209
+ # Available events are :header, :stream, :trailer, :finish, and :error.
172
210
  #
173
211
  # Adding a second callback for an event will overwite the existing callback
174
212
  # or delegate.
175
213
  #
176
214
  # Events:
177
- # [method] Supplied with one argument, the HTTP method as a String,
178
- # e.g. "GET"
179
- #
180
- # [path] Supplied with two arguments, the request path as a String,
181
- # e.g. "/example.html", and the query string as a String,
182
- # e.g. "query=foo"
183
- # (this callback is only called if the request uri is a path)
184
- #
185
- # [uri] Supplied with one argument, the request uri as a String,
186
- # e.g. "/example.html?query=foo"
187
- #
188
- # [fragment] Supplied with one argument, the fragment from the request
189
- # uri, if present
190
- #
191
- # [version] Supplied with one argument, the HTTP version as a String,
192
- # e.g. "1.1"
193
- #
194
- # [status] Supplied with two arguments, the HTTP status code as a
195
- # Numeric, e.g. 200, and the HTTP status message as a String,
196
- # e.g. "OK"
197
- #
198
- # [headers] Supplied with one argument, the message headers as a Hash,
199
- # e.g. {"Content-Length" => "20"}
215
+ # [header] Called when headers are complete
200
216
  #
201
217
  # [stream] Supplied with one argument, the last chunk of body data fed
202
218
  # in to the parser as a String, e.g. "<h1>Hello"
203
219
  #
204
- # [body] Supplied with one argument, the message body as a String,
205
- # e.g. "<h1>Hello world</h1>"
220
+ # [trailer] Called on the completion of the trailer, if present
206
221
  #
207
- # [finished] Supplied with one argument, any data left in the parser's
222
+ # [finish] Supplied with one argument, any data left in the parser's
208
223
  # buffer after the end of the HTTP message (likely nil, but
209
224
  # possibly the start of the next message)
210
225
  #
@@ -221,37 +236,31 @@ module HTTPTools
221
236
 
222
237
  private
223
238
  def start
224
- method = @buffer.scan(/[a-z]+ /i)
225
- if method
226
- if @method_callback
227
- method.chop!
228
- method.upcase!
229
- @method_callback.call(method)
230
- end
239
+ @request_method = @buffer.scan(/[a-z]+ /i)
240
+ if @request_method
241
+ @request_method.chop!
242
+ @request_method.upcase!
231
243
  uri
232
244
  elsif @buffer.skip(/HTTP\//i)
233
245
  response_http_version
234
- elsif @buffer.check(/[a-z]+\Z/i)
246
+ elsif @buffer.check(/[a-z]*\Z/i)
235
247
  :start
248
+ elsif @allow_html_without_header && @buffer.check(/\s*</i)
249
+ skip_header
236
250
  else
237
251
  raise ParseError.new("Protocol or method not recognised")
238
252
  end
239
253
  end
240
254
 
241
255
  def uri
242
- uri = @buffer.scan(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]*(?=( |\r\n))/i)
243
- if uri
244
- fragment = uri.slice!(/#[a-z0-9;\/?:@&=+$,%_.!~*')(-]+\Z/i)
245
- if @path_callback && uri =~ /^\//i
246
- path = uri.dup
247
- query = path.slice!(/\?[a-z0-9;\/?:@&=+$,%_.!~*')(-]*/i)
248
- query.slice!(0) if query
249
- @path_callback.call(path, query)
250
- end
251
- @uri_callback.call(uri) if @uri_callback
252
- if fragment && @fragment_callback
253
- fragment.slice!(0)
254
- @fragment_callback.call(fragment)
256
+ @request_uri= @buffer.scan(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]*(?=( |\r\n))/i)
257
+ if @request_uri
258
+ @fragment = @request_uri.slice!(/#[a-z0-9;\/?:@&=+$,%_.!~*')(-]+\Z/i)
259
+ @fragment.slice!(0) if @fragment
260
+ if @request_uri =~ /^\//i
261
+ @path_info = @request_uri.dup
262
+ @query_string = @path_info.slice!(/\?[a-z0-9;\/?:@&=+$,%_.!~*')(-]*/i)
263
+ @query_string ? @query_string.slice!(0) : @query_string = ""
255
264
  end
256
265
  space_before_http
257
266
  elsif @buffer.check(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]+\Z/i)
@@ -280,12 +289,9 @@ module HTTPTools
280
289
  end
281
290
 
282
291
  def request_http_version
283
- version = @buffer.scan(/[0-9]+\.[0-9]+\r\n/i)
284
- if version
285
- if @version_callback
286
- version.chop!
287
- @version_callback.call(version)
288
- end
292
+ @version = @buffer.scan(/[0-9]+\.[0-9x]+\r\n/i)
293
+ if @version
294
+ @version.chop!
289
295
  key_or_newline
290
296
  elsif @buffer.eos? || @buffer.check(/\d+(\.(\d+\r?)?)?\Z/i)
291
297
  :request_http_version
@@ -295,12 +301,9 @@ module HTTPTools
295
301
  end
296
302
 
297
303
  def response_http_version
298
- version = @buffer.scan(/[0-9]+\.[0-9]+ /i)
299
- if version
300
- if @version_callback
301
- version.chop!
302
- @version_callback.call(version)
303
- end
304
+ @version = @buffer.scan(/[0-9]+\.[0-9x]+ /i)
305
+ if @version
306
+ version.chop!
304
307
  status
305
308
  elsif @buffer.eos? || @buffer.check(/\d+(\.(\d+)?)?\Z/i)
306
309
  :response_http_version
@@ -309,13 +312,23 @@ module HTTPTools
309
312
  end
310
313
  end
311
314
 
315
+ def skip_header
316
+ @version = "0.0"
317
+ @status_code = 200
318
+ @message = ""
319
+ @header_complete = true
320
+ @header_callback.call if @header_callback
321
+ body
322
+ end
323
+
312
324
  def status
313
- status = @buffer.scan(/\d\d\d [a-z -]+\r?\n/i)
325
+ status = @buffer.scan(/\d\d\d[^\x00-\x1f\x7f]*\r?\n/i)
314
326
  if status
315
- @status = status.slice!(0, 3).to_i
316
- @status_callback.call(@status, status.strip) if @status_callback
327
+ @status_code = status.slice!(0, 3).to_i
328
+ @message = status.strip
317
329
  key_or_newline
318
- elsif @buffer.eos? || @buffer.check(/\d(\d(\d( ([a-z]+\r?)?)?)?)?\Z/i)
330
+ elsif @buffer.eos? ||
331
+ @buffer.check(/\d(\d(\d( ([^\x00-\x1f\x7f]+\r?)?)?)?)?\Z/i)
319
332
  :status
320
333
  else
321
334
  raise ParseError.new("Invalid status line")
@@ -323,27 +336,45 @@ module HTTPTools
323
336
  end
324
337
 
325
338
  def key_or_newline
326
- @last_key = @buffer.scan(/[!-9;-~]+: /i)
339
+ @last_key = @buffer.scan(/[ -9;-~]+: /i)
327
340
  if @last_key
328
341
  @last_key.chomp!(KEY_TERMINATOR)
329
342
  value
330
- elsif @buffer.skip(/\n|\r\n/i)
331
- @headers_callback.call(@headers) if @headers_callback
343
+ elsif @buffer.skip(/\r?\n/i)
344
+ @header_complete = true
345
+ @header_callback.call if @header_callback
332
346
  body
333
- elsif @buffer.eos? || @buffer.check(/[!-9;-~]+:?\Z/i)
347
+ elsif @buffer.eos? || @buffer.check(/([ -9;-~]+:?|\r)\Z/i)
334
348
  :key_or_newline
349
+ elsif @last_key = @buffer.scan(/[ -9;-~]+:(?=[^ ])/i)
350
+ @last_key.chomp!(COLON)
351
+ value
352
+ else
353
+ skip_bad_header
354
+ end
355
+ end
356
+
357
+ def skip_bad_header
358
+ if @buffer.skip(/[^\x00\n\x7f]*\n/)
359
+ key_or_newline
360
+ elsif @buffer.check(/[^\x00\n\x7f]+\Z/)
361
+ :skip_bad_header
335
362
  else
336
363
  raise ParseError.new("Illegal character in field name")
337
364
  end
338
365
  end
339
366
 
340
367
  def value
341
- value = @buffer.scan(/[ -~]+\r?\n/i)
368
+ value = @buffer.scan(/[^\x00\n\x7f]*\r?\n/i)
342
369
  if value
343
370
  value.chop!
344
- @headers[@last_key] = value
371
+ if ARRAY_VALUE_HEADERS[@last_key]
372
+ @header.fetch(@last_key) {@header[@last_key] = []}.push(value)
373
+ else
374
+ @header[@last_key] = value
375
+ end
345
376
  key_or_newline
346
- elsif @buffer.eos? || @buffer.check(/[ -~]+\Z/i)
377
+ elsif @buffer.eos? || @buffer.check(/[^\x00\n\x7f]+\r?\Z/i)
347
378
  :value
348
379
  else
349
380
  raise ParseError.new("Illegal character in field body")
@@ -351,18 +382,14 @@ module HTTPTools
351
382
  end
352
383
 
353
384
  def body
354
- if @force_no_body || NO_BODY[@status]
385
+ if @force_no_body || NO_BODY[@status_code]
355
386
  end_of_message
356
- elsif @buffer.eos?
357
- :body
358
387
  else
359
- @body = "" if @body_callback
360
- @buffer = @buffer.rest # Switch @buffer from StringScanner to String
361
- length = @headers[CONTENT_LENGTH]
388
+ length = @header[CONTENT_LENGTH]
362
389
  if length
363
390
  @content_left = length.to_i
364
391
  body_with_length
365
- elsif @headers[TRANSFER_ENCODING] == CHUNKED
392
+ elsif @header[TRANSFER_ENCODING] == CHUNKED
366
393
  body_chunked
367
394
  else
368
395
  body_on_close
@@ -370,48 +397,35 @@ module HTTPTools
370
397
  end
371
398
  end
372
399
 
373
- #--
374
- # From this point on @buffer is a String, not a StringScanner.
375
- # This is because 1. we don't need a StringScanner anymore, 2. if we
376
- # switched to a diffrent instace variable we'd need a condition in #concat
377
- # to feed the data in to the new instace variable, which would slow us down.
378
- #++
379
-
380
400
  def body_with_length
381
- if @buffer.length > 0
382
- chunk = @buffer.slice!(0, @content_left)
401
+ if !@buffer.eos?
402
+ chunk = @buffer.string.slice(@buffer.pos, @content_left)
383
403
  @stream_callback.call(chunk) if @stream_callback
384
- @body << chunk if @body_callback
385
- @content_left -= chunk.length
404
+ chunk_length = chunk.length
405
+ @buffer.pos += chunk_length
406
+ @content_left -= chunk_length
386
407
  if @content_left < 1
387
- @body_callback.call(@body) if @body_callback
388
408
  end_of_message
389
409
  else
390
410
  :body_with_length
391
411
  end
412
+ elsif @content_left < 1 # zero length body
413
+ @stream_callback.call("") if @stream_callback
414
+ end_of_message
392
415
  else
393
416
  :body_with_length
394
417
  end
395
418
  end
396
419
 
397
420
  def body_chunked
398
- decoded, remainder = transfer_encoding_chunked_decode(@buffer)
421
+ decoded, remainder = transfer_encoding_chunked_decode(nil, @buffer)
399
422
  if decoded
400
423
  @stream_callback.call(decoded) if @stream_callback
401
- @body << decoded if @body_callback
402
424
  end
403
425
  if remainder
404
- @buffer = remainder
405
426
  :body_chunked
406
427
  else
407
- @buffer.slice!(/.*0\r\n/m)
408
- @body_callback.call(@body) if @body_callback
409
- if @headers[TRAILER] || @force_trailer
410
- @trailer = {}
411
- # @buffer switches back to a StringScanner for the trailer.
412
- @buffer_backup_reference.string.replace(@buffer)
413
- @buffer_backup_reference.reset
414
- @buffer = @buffer_backup_reference
428
+ if @header[TRAILER] || @force_trailer
415
429
  trailer_key_or_newline
416
430
  else
417
431
  end_of_message
@@ -420,37 +434,36 @@ module HTTPTools
420
434
  end
421
435
 
422
436
  def body_on_close
423
- @stream_callback.call(@buffer) if @stream_callback
424
- @body << @buffer if @body_callback
425
- @buffer = ""
437
+ chunk = @buffer.rest
438
+ @buffer.terminate
439
+ @stream_callback.call(chunk) if @stream_callback
426
440
  :body_on_close
427
441
  end
428
442
 
429
- #--
430
- # @buffer switches back to a StringScanner for the trailer.
431
- #++
432
-
433
443
  def trailer_key_or_newline
434
- if @last_key = @buffer.scan(/[!-9;-~]+: /i)
444
+ if @last_key = @buffer.scan(/[ -9;-~]+: /i)
435
445
  @last_key.chomp!(KEY_TERMINATOR)
436
446
  trailer_value
437
- elsif @buffer.skip(/\n|\r\n/i)
438
- @trailer_callback.call(@trailer) if @trailer_callback
447
+ elsif @buffer.skip(/\r?\n/i)
448
+ @trailer_callback.call if @trailer_callback
439
449
  end_of_message
440
- elsif @buffer.eos? || @buffer.check(/[!-9;-~]+:?\Z/i)
450
+ elsif @buffer.eos? || @buffer.check(/([ -9;-~]+:?|\r)\Z/i)
441
451
  :trailer_key_or_newline
452
+ elsif @last_key = @buffer.scan(/[ -9;-~]+:(?=[^ ])/i)
453
+ @last_key.chomp!(COLON)
454
+ trailer_value
442
455
  else
443
456
  raise ParseError.new("Illegal character in field name")
444
457
  end
445
458
  end
446
459
 
447
460
  def trailer_value
448
- value = @buffer.scan(/[ -~]+\r?\n/i)
461
+ value = @buffer.scan(/[^\000\n\177]+\r?\n/i)
449
462
  if value
450
463
  value.chop!
451
464
  @trailer[@last_key] = value
452
465
  trailer_key_or_newline
453
- elsif @buffer.eos? || @buffer.check(/[ -~]+\Z/i)
466
+ elsif @buffer.eos? || @buffer.check(/[^\x00\n\x7f]+\r?\Z/i)
454
467
  :trailer_value
455
468
  else
456
469
  raise ParseError.new("Illegal character in field body")
@@ -460,8 +473,8 @@ module HTTPTools
460
473
  def end_of_message
461
474
  raise EndOfMessageError.new("Message ended") if @state == :end_of_message
462
475
  remainder = @buffer.respond_to?(:rest) ? @buffer.rest : @buffer
463
- if @finished_callback
464
- @finished_callback.call((remainder if remainder.length > 0))
476
+ if @finish_callback
477
+ @finish_callback.call((remainder if remainder.length > 0))
465
478
  end
466
479
  :end_of_message
467
480
  end