http_tools 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,9 +11,11 @@ module HTTPTools
11
11
  #
12
12
  # Example:
13
13
  # parser = HTTPTools::Parser.new
14
- # parser.on(:status) {|status, message| puts "#{status} #{message}"}
15
- # parser.on(:headers) {|headers| puts headers.inspect}
16
- # parser.on(:body) {|body| puts body}
14
+ # parser.on(:header) do |header|
15
+ # puts parser.status_code + " " + parser.method
16
+ # puts parser.header.inspect
17
+ # end
18
+ # parser.on(:stream) {|chunk| print chunk}
17
19
  #
18
20
  # parser << "HTTP/1.1 200 OK\r\n"
19
21
  # parser << "Content-Length: 20\r\n\r\n"
@@ -27,16 +29,42 @@ module HTTPTools
27
29
  class Parser
28
30
  include Encoding
29
31
 
32
+ COLON = ":".freeze
30
33
  KEY_TERMINATOR = ": ".freeze
31
34
  CONTENT_LENGTH = "Content-Length".freeze
32
35
  TRANSFER_ENCODING = "Transfer-Encoding".freeze
33
36
  TRAILER = "Trailer".freeze
37
+ CONNECTION = "Connection".freeze
38
+ CLOSE = "close".freeze
34
39
  CHUNKED = "chunked".freeze
35
- EVENTS = ["method", "path", "uri", "fragment", "version", "status", "key",
36
- "value", "headers", "stream", "body", "trailers", "finished",
37
- "error"].map {|event| event.freeze}.freeze
40
+ EVENTS = %W{header stream trailer finish error}.map do |event|
41
+ event.freeze
42
+ end.freeze
43
+
44
+ REQUEST_METHOD = "REQUEST_METHOD".freeze
45
+ PATH_INFO = "PATH_INFO".freeze
46
+ QUERY_STRING = "QUERY_STRING".freeze
47
+ REQUEST_URI = "REQUEST_URI".freeze
48
+ FRAGMENT = "FRAGMENT".freeze
49
+
50
+ PROTOTYPE_ENV = {
51
+ "SCRIPT_NAME" => "".freeze,
52
+ PATH_INFO => "/".freeze,
53
+ QUERY_STRING => "".freeze,
54
+ "rack.version" => [1, 1].freeze,
55
+ "rack.url_scheme" => "http".freeze,
56
+ "rack.errors" => STDERR,
57
+ "rack.multithread" => false,
58
+ "rack.multiprocess" => false,
59
+ "rack.run_once" => false}.freeze
60
+
61
+ HTTP_ = "HTTP_".freeze
62
+ LOWERCASE = "a-z-".freeze
63
+ UPPERCASE = "A-Z_".freeze
38
64
 
39
65
  attr_reader :state # :nodoc:
66
+ attr_reader :request_method, :path_info, :query_string, :request_uri,
67
+ :fragment, :version, :status_code, :message, :header, :trailer
40
68
 
41
69
  # Force parser to expect and parse a trailer when Trailer header missing.
42
70
  attr_accessor :force_trailer
@@ -44,40 +72,19 @@ module HTTPTools
44
72
  # Skip parsing the body, e.g. with the response to a HEAD request.
45
73
  attr_accessor :force_no_body
46
74
 
47
- # :call-seq: Parser.new(delegate=nil) -> parser
75
+ # Allow responses with no status line or headers if it looks like HTML.
76
+ attr_accessor :allow_html_without_header
77
+
78
+ # :call-seq: Parser.new -> parser
48
79
  #
49
80
  # Create a new HTTPTools::Parser.
50
81
  #
51
- # delegate is an object that will recieve callbacks for events during
52
- # parsing. The delegate's methods should be named on_[event name], e.g.
53
- # on_status, on_body, etc. See #add_listener for more.
54
- #
55
- # Example:
56
- # class ExampleDelegate
57
- # def on_status(status, message)
58
- # puts "#{status} #{message}"
59
- # end
60
- # end
61
- # parser = HTTPTools::Parser.new(ExampleDelegate.new)
62
- #
63
- # If a callback is set for an event, it will take precedence over the
64
- # delegate for that event.
65
- #
66
- def initialize(delegate=nil)
82
+ def initialize
67
83
  @state = :start
68
84
  @buffer = StringScanner.new("")
69
85
  @buffer_backup_reference = @buffer
70
- @status = nil
71
- @headers = {}
72
- @last_key = nil
73
- @content_left = nil
74
- @body = nil
75
- if delegate
76
- EVENTS.each do |event|
77
- id = "on_#{event}"
78
- add_listener(event, delegate.method(id)) if delegate.respond_to?(id)
79
- end
80
- end
86
+ @header = {}
87
+ @trailer = {}
81
88
  end
82
89
 
83
90
  # :call-seq: parser.concat(data) -> parser
@@ -96,6 +103,29 @@ module HTTPTools
96
103
  end
97
104
  alias << concat
98
105
 
106
+ # :call-seq: parser.env -> hash or nil
107
+ #
108
+ # Returns a Rack compatible environment hash. Will return nil if called
109
+ # before headers are complete.
110
+ #
111
+ # The following are not supplied, and must be added to make the environment
112
+ # hash fully Rack compliant: SERVER_NAME, SERVER_PORT, rack.input
113
+ #
114
+ def env
115
+ return unless @header_complete
116
+ env = PROTOTYPE_ENV.merge(
117
+ REQUEST_METHOD => @request_method,
118
+ REQUEST_URI => @request_uri)
119
+ if @path_info
120
+ env[PATH_INFO] = @path_info
121
+ env[QUERY_STRING] = @query_string
122
+ end
123
+ env[FRAGMENT] = @fragment if @fragment
124
+ @header.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
125
+ @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
126
+ env
127
+ end
128
+
99
129
  # :call-seq: parser.finish -> parser
100
130
  #
101
131
  # Used to notify the parser that the request has finished in a case where it
@@ -121,8 +151,12 @@ module HTTPTools
121
151
  #
122
152
  def finish
123
153
  if @state == :body_on_close
124
- @body_callback.call(@body) if @body_callback
125
154
  @state = end_of_message
155
+ elsif @state == :body_chunked && @header[CONNECTION] == CLOSE &&
156
+ !@header[TRAILER] && @buffer.eos?
157
+ @state = end_of_message
158
+ elsif @state == :start && @buffer.string.length < 1
159
+ raise EmptyMessageError.new("Message empty")
126
160
  else
127
161
  raise MessageIncompleteError.new("Message ended early")
128
162
  end
@@ -153,12 +187,17 @@ module HTTPTools
153
187
  @buffer = @buffer_backup_reference
154
188
  @buffer.string.replace("")
155
189
  @buffer.reset
156
- # @status = nil
157
- @headers = {}
190
+ @request_method = nil
191
+ @path_info = nil
192
+ @query_string = nil
193
+ @request_uri = nil
194
+ @fragment = nil
195
+ @version = nil
196
+ @status_code = nil
197
+ @header = {}
158
198
  @trailer = {}
159
- # @last_key = nil
160
- # @content_left = nil
161
- @body = nil
199
+ @last_key = nil
200
+ @content_left = nil
162
201
  self
163
202
  end
164
203
 
@@ -167,44 +206,20 @@ module HTTPTools
167
206
  # parser.on(event) {|arg1 [, arg2]| block} -> parser
168
207
  # parser.on(event, proc) -> parser
169
208
  #
170
- # Available events are :method, :path, :version, :status, :headers, :stream,
171
- # :body, and :error.
209
+ # Available events are :header, :stream, :trailer, :finish, and :error.
172
210
  #
173
211
  # Adding a second callback for an event will overwite the existing callback
174
212
  # or delegate.
175
213
  #
176
214
  # Events:
177
- # [method] Supplied with one argument, the HTTP method as a String,
178
- # e.g. "GET"
179
- #
180
- # [path] Supplied with two arguments, the request path as a String,
181
- # e.g. "/example.html", and the query string as a String,
182
- # e.g. "query=foo"
183
- # (this callback is only called if the request uri is a path)
184
- #
185
- # [uri] Supplied with one argument, the request uri as a String,
186
- # e.g. "/example.html?query=foo"
187
- #
188
- # [fragment] Supplied with one argument, the fragment from the request
189
- # uri, if present
190
- #
191
- # [version] Supplied with one argument, the HTTP version as a String,
192
- # e.g. "1.1"
193
- #
194
- # [status] Supplied with two arguments, the HTTP status code as a
195
- # Numeric, e.g. 200, and the HTTP status message as a String,
196
- # e.g. "OK"
197
- #
198
- # [headers] Supplied with one argument, the message headers as a Hash,
199
- # e.g. {"Content-Length" => "20"}
215
+ # [header] Called when headers are complete
200
216
  #
201
217
  # [stream] Supplied with one argument, the last chunk of body data fed
202
218
  # in to the parser as a String, e.g. "<h1>Hello"
203
219
  #
204
- # [body] Supplied with one argument, the message body as a String,
205
- # e.g. "<h1>Hello world</h1>"
220
+ # [trailer] Called on the completion of the trailer, if present
206
221
  #
207
- # [finished] Supplied with one argument, any data left in the parser's
222
+ # [finish] Supplied with one argument, any data left in the parser's
208
223
  # buffer after the end of the HTTP message (likely nil, but
209
224
  # possibly the start of the next message)
210
225
  #
@@ -221,37 +236,31 @@ module HTTPTools
221
236
 
222
237
  private
223
238
  def start
224
- method = @buffer.scan(/[a-z]+ /i)
225
- if method
226
- if @method_callback
227
- method.chop!
228
- method.upcase!
229
- @method_callback.call(method)
230
- end
239
+ @request_method = @buffer.scan(/[a-z]+ /i)
240
+ if @request_method
241
+ @request_method.chop!
242
+ @request_method.upcase!
231
243
  uri
232
244
  elsif @buffer.skip(/HTTP\//i)
233
245
  response_http_version
234
- elsif @buffer.check(/[a-z]+\Z/i)
246
+ elsif @buffer.check(/[a-z]*\Z/i)
235
247
  :start
248
+ elsif @allow_html_without_header && @buffer.check(/\s*</i)
249
+ skip_header
236
250
  else
237
251
  raise ParseError.new("Protocol or method not recognised")
238
252
  end
239
253
  end
240
254
 
241
255
  def uri
242
- uri = @buffer.scan(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]*(?=( |\r\n))/i)
243
- if uri
244
- fragment = uri.slice!(/#[a-z0-9;\/?:@&=+$,%_.!~*')(-]+\Z/i)
245
- if @path_callback && uri =~ /^\//i
246
- path = uri.dup
247
- query = path.slice!(/\?[a-z0-9;\/?:@&=+$,%_.!~*')(-]*/i)
248
- query.slice!(0) if query
249
- @path_callback.call(path, query)
250
- end
251
- @uri_callback.call(uri) if @uri_callback
252
- if fragment && @fragment_callback
253
- fragment.slice!(0)
254
- @fragment_callback.call(fragment)
256
+ @request_uri= @buffer.scan(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]*(?=( |\r\n))/i)
257
+ if @request_uri
258
+ @fragment = @request_uri.slice!(/#[a-z0-9;\/?:@&=+$,%_.!~*')(-]+\Z/i)
259
+ @fragment.slice!(0) if @fragment
260
+ if @request_uri =~ /^\//i
261
+ @path_info = @request_uri.dup
262
+ @query_string = @path_info.slice!(/\?[a-z0-9;\/?:@&=+$,%_.!~*')(-]*/i)
263
+ @query_string ? @query_string.slice!(0) : @query_string = ""
255
264
  end
256
265
  space_before_http
257
266
  elsif @buffer.check(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]+\Z/i)
@@ -280,12 +289,9 @@ module HTTPTools
280
289
  end
281
290
 
282
291
  def request_http_version
283
- version = @buffer.scan(/[0-9]+\.[0-9]+\r\n/i)
284
- if version
285
- if @version_callback
286
- version.chop!
287
- @version_callback.call(version)
288
- end
292
+ @version = @buffer.scan(/[0-9]+\.[0-9x]+\r\n/i)
293
+ if @version
294
+ @version.chop!
289
295
  key_or_newline
290
296
  elsif @buffer.eos? || @buffer.check(/\d+(\.(\d+\r?)?)?\Z/i)
291
297
  :request_http_version
@@ -295,12 +301,9 @@ module HTTPTools
295
301
  end
296
302
 
297
303
  def response_http_version
298
- version = @buffer.scan(/[0-9]+\.[0-9]+ /i)
299
- if version
300
- if @version_callback
301
- version.chop!
302
- @version_callback.call(version)
303
- end
304
+ @version = @buffer.scan(/[0-9]+\.[0-9x]+ /i)
305
+ if @version
306
+ version.chop!
304
307
  status
305
308
  elsif @buffer.eos? || @buffer.check(/\d+(\.(\d+)?)?\Z/i)
306
309
  :response_http_version
@@ -309,13 +312,23 @@ module HTTPTools
309
312
  end
310
313
  end
311
314
 
315
+ def skip_header
316
+ @version = "0.0"
317
+ @status_code = 200
318
+ @message = ""
319
+ @header_complete = true
320
+ @header_callback.call if @header_callback
321
+ body
322
+ end
323
+
312
324
  def status
313
- status = @buffer.scan(/\d\d\d [a-z -]+\r?\n/i)
325
+ status = @buffer.scan(/\d\d\d[^\x00-\x1f\x7f]*\r?\n/i)
314
326
  if status
315
- @status = status.slice!(0, 3).to_i
316
- @status_callback.call(@status, status.strip) if @status_callback
327
+ @status_code = status.slice!(0, 3).to_i
328
+ @message = status.strip
317
329
  key_or_newline
318
- elsif @buffer.eos? || @buffer.check(/\d(\d(\d( ([a-z]+\r?)?)?)?)?\Z/i)
330
+ elsif @buffer.eos? ||
331
+ @buffer.check(/\d(\d(\d( ([^\x00-\x1f\x7f]+\r?)?)?)?)?\Z/i)
319
332
  :status
320
333
  else
321
334
  raise ParseError.new("Invalid status line")
@@ -323,27 +336,45 @@ module HTTPTools
323
336
  end
324
337
 
325
338
  def key_or_newline
326
- @last_key = @buffer.scan(/[!-9;-~]+: /i)
339
+ @last_key = @buffer.scan(/[ -9;-~]+: /i)
327
340
  if @last_key
328
341
  @last_key.chomp!(KEY_TERMINATOR)
329
342
  value
330
- elsif @buffer.skip(/\n|\r\n/i)
331
- @headers_callback.call(@headers) if @headers_callback
343
+ elsif @buffer.skip(/\r?\n/i)
344
+ @header_complete = true
345
+ @header_callback.call if @header_callback
332
346
  body
333
- elsif @buffer.eos? || @buffer.check(/[!-9;-~]+:?\Z/i)
347
+ elsif @buffer.eos? || @buffer.check(/([ -9;-~]+:?|\r)\Z/i)
334
348
  :key_or_newline
349
+ elsif @last_key = @buffer.scan(/[ -9;-~]+:(?=[^ ])/i)
350
+ @last_key.chomp!(COLON)
351
+ value
352
+ else
353
+ skip_bad_header
354
+ end
355
+ end
356
+
357
+ def skip_bad_header
358
+ if @buffer.skip(/[^\x00\n\x7f]*\n/)
359
+ key_or_newline
360
+ elsif @buffer.check(/[^\x00\n\x7f]+\Z/)
361
+ :skip_bad_header
335
362
  else
336
363
  raise ParseError.new("Illegal character in field name")
337
364
  end
338
365
  end
339
366
 
340
367
  def value
341
- value = @buffer.scan(/[ -~]+\r?\n/i)
368
+ value = @buffer.scan(/[^\x00\n\x7f]*\r?\n/i)
342
369
  if value
343
370
  value.chop!
344
- @headers[@last_key] = value
371
+ if ARRAY_VALUE_HEADERS[@last_key]
372
+ @header.fetch(@last_key) {@header[@last_key] = []}.push(value)
373
+ else
374
+ @header[@last_key] = value
375
+ end
345
376
  key_or_newline
346
- elsif @buffer.eos? || @buffer.check(/[ -~]+\Z/i)
377
+ elsif @buffer.eos? || @buffer.check(/[^\x00\n\x7f]+\r?\Z/i)
347
378
  :value
348
379
  else
349
380
  raise ParseError.new("Illegal character in field body")
@@ -351,18 +382,14 @@ module HTTPTools
351
382
  end
352
383
 
353
384
  def body
354
- if @force_no_body || NO_BODY[@status]
385
+ if @force_no_body || NO_BODY[@status_code]
355
386
  end_of_message
356
- elsif @buffer.eos?
357
- :body
358
387
  else
359
- @body = "" if @body_callback
360
- @buffer = @buffer.rest # Switch @buffer from StringScanner to String
361
- length = @headers[CONTENT_LENGTH]
388
+ length = @header[CONTENT_LENGTH]
362
389
  if length
363
390
  @content_left = length.to_i
364
391
  body_with_length
365
- elsif @headers[TRANSFER_ENCODING] == CHUNKED
392
+ elsif @header[TRANSFER_ENCODING] == CHUNKED
366
393
  body_chunked
367
394
  else
368
395
  body_on_close
@@ -370,48 +397,35 @@ module HTTPTools
370
397
  end
371
398
  end
372
399
 
373
- #--
374
- # From this point on @buffer is a String, not a StringScanner.
375
- # This is because 1. we don't need a StringScanner anymore, 2. if we
376
- # switched to a diffrent instace variable we'd need a condition in #concat
377
- # to feed the data in to the new instace variable, which would slow us down.
378
- #++
379
-
380
400
  def body_with_length
381
- if @buffer.length > 0
382
- chunk = @buffer.slice!(0, @content_left)
401
+ if !@buffer.eos?
402
+ chunk = @buffer.string.slice(@buffer.pos, @content_left)
383
403
  @stream_callback.call(chunk) if @stream_callback
384
- @body << chunk if @body_callback
385
- @content_left -= chunk.length
404
+ chunk_length = chunk.length
405
+ @buffer.pos += chunk_length
406
+ @content_left -= chunk_length
386
407
  if @content_left < 1
387
- @body_callback.call(@body) if @body_callback
388
408
  end_of_message
389
409
  else
390
410
  :body_with_length
391
411
  end
412
+ elsif @content_left < 1 # zero length body
413
+ @stream_callback.call("") if @stream_callback
414
+ end_of_message
392
415
  else
393
416
  :body_with_length
394
417
  end
395
418
  end
396
419
 
397
420
  def body_chunked
398
- decoded, remainder = transfer_encoding_chunked_decode(@buffer)
421
+ decoded, remainder = transfer_encoding_chunked_decode(nil, @buffer)
399
422
  if decoded
400
423
  @stream_callback.call(decoded) if @stream_callback
401
- @body << decoded if @body_callback
402
424
  end
403
425
  if remainder
404
- @buffer = remainder
405
426
  :body_chunked
406
427
  else
407
- @buffer.slice!(/.*0\r\n/m)
408
- @body_callback.call(@body) if @body_callback
409
- if @headers[TRAILER] || @force_trailer
410
- @trailer = {}
411
- # @buffer switches back to a StringScanner for the trailer.
412
- @buffer_backup_reference.string.replace(@buffer)
413
- @buffer_backup_reference.reset
414
- @buffer = @buffer_backup_reference
428
+ if @header[TRAILER] || @force_trailer
415
429
  trailer_key_or_newline
416
430
  else
417
431
  end_of_message
@@ -420,37 +434,36 @@ module HTTPTools
420
434
  end
421
435
 
422
436
  def body_on_close
423
- @stream_callback.call(@buffer) if @stream_callback
424
- @body << @buffer if @body_callback
425
- @buffer = ""
437
+ chunk = @buffer.rest
438
+ @buffer.terminate
439
+ @stream_callback.call(chunk) if @stream_callback
426
440
  :body_on_close
427
441
  end
428
442
 
429
- #--
430
- # @buffer switches back to a StringScanner for the trailer.
431
- #++
432
-
433
443
  def trailer_key_or_newline
434
- if @last_key = @buffer.scan(/[!-9;-~]+: /i)
444
+ if @last_key = @buffer.scan(/[ -9;-~]+: /i)
435
445
  @last_key.chomp!(KEY_TERMINATOR)
436
446
  trailer_value
437
- elsif @buffer.skip(/\n|\r\n/i)
438
- @trailer_callback.call(@trailer) if @trailer_callback
447
+ elsif @buffer.skip(/\r?\n/i)
448
+ @trailer_callback.call if @trailer_callback
439
449
  end_of_message
440
- elsif @buffer.eos? || @buffer.check(/[!-9;-~]+:?\Z/i)
450
+ elsif @buffer.eos? || @buffer.check(/([ -9;-~]+:?|\r)\Z/i)
441
451
  :trailer_key_or_newline
452
+ elsif @last_key = @buffer.scan(/[ -9;-~]+:(?=[^ ])/i)
453
+ @last_key.chomp!(COLON)
454
+ trailer_value
442
455
  else
443
456
  raise ParseError.new("Illegal character in field name")
444
457
  end
445
458
  end
446
459
 
447
460
  def trailer_value
448
- value = @buffer.scan(/[ -~]+\r?\n/i)
461
+ value = @buffer.scan(/[^\000\n\177]+\r?\n/i)
449
462
  if value
450
463
  value.chop!
451
464
  @trailer[@last_key] = value
452
465
  trailer_key_or_newline
453
- elsif @buffer.eos? || @buffer.check(/[ -~]+\Z/i)
466
+ elsif @buffer.eos? || @buffer.check(/[^\x00\n\x7f]+\r?\Z/i)
454
467
  :trailer_value
455
468
  else
456
469
  raise ParseError.new("Illegal character in field body")
@@ -460,8 +473,8 @@ module HTTPTools
460
473
  def end_of_message
461
474
  raise EndOfMessageError.new("Message ended") if @state == :end_of_message
462
475
  remainder = @buffer.respond_to?(:rest) ? @buffer.rest : @buffer
463
- if @finished_callback
464
- @finished_callback.call((remainder if remainder.length > 0))
476
+ if @finish_callback
477
+ @finish_callback.call((remainder if remainder.length > 0))
465
478
  end
466
479
  :end_of_message
467
480
  end