http_tools 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  require 'strscan'
2
+ require 'stringio'
2
3
 
3
4
  module HTTPTools
4
5
 
@@ -15,7 +16,7 @@ module HTTPTools
15
16
  # puts parser.status_code + " " + parser.request_method
16
17
  # puts parser.header.inspect
17
18
  # end
18
- # parser.on(:stream) {|chunk| print chunk}
19
+ # parser.on(:finish) {print parser.body}
19
20
  #
20
21
  # parser << "HTTP/1.1 200 OK\r\n"
21
22
  # parser << "Content-Length: 20\r\n\r\n"
@@ -27,8 +28,7 @@ module HTTPTools
27
28
  # <h1>Hello world</h1>
28
29
  #
29
30
  class Parser
30
- include Encoding
31
-
31
+ # :stopdoc:
32
32
  COLON = ":".freeze
33
33
  KEY_TERMINATOR = ": ".freeze
34
34
  CONTENT_LENGTH = "Content-Length".freeze
@@ -37,7 +37,6 @@ module HTTPTools
37
37
  CONNECTION = "Connection".freeze
38
38
  CLOSE = "close".freeze
39
39
  CHUNKED = "chunked".freeze
40
- EVENTS = %W{header stream trailer finish error}.map {|e| e.freeze}.freeze
41
40
 
42
41
  REQUEST_METHOD = "REQUEST_METHOD".freeze
43
42
  PATH_INFO = "PATH_INFO".freeze
@@ -45,6 +44,7 @@ module HTTPTools
45
44
  SERVER_NAME = "SERVER_NAME".freeze
46
45
  SERVER_PORT = "SERVER_PORT".freeze
47
46
  HTTP_HOST = "HTTP_HOST".freeze
47
+ RACK_INPUT = "rack.input".freeze
48
48
 
49
49
  PROTOTYPE_ENV = {
50
50
  "SCRIPT_NAME" => "".freeze,
@@ -59,10 +59,12 @@ module HTTPTools
59
59
  LOWERCASE = "a-z-".freeze
60
60
  UPPERCASE = "A-Z_".freeze
61
61
  NO_HTTP_ = {"CONTENT_LENGTH" => true, "CONTENT_TYPE" => true}
62
+ # :startdoc:
63
+ EVENTS = %W{header stream trailer finish error}.map {|e| e.freeze}.freeze
62
64
 
63
65
  attr_reader :state # :nodoc:
64
66
  attr_reader :request_method, :path_info, :query_string, :request_uri,
65
- :version, :status_code, :message, :header, :trailer
67
+ :version, :status_code, :message, :header, :body, :trailer
66
68
 
67
69
  # Force parser to expect and parse a trailer when Trailer header missing.
68
70
  attr_accessor :force_trailer
@@ -79,9 +81,21 @@ module HTTPTools
79
81
  #
80
82
  def initialize
81
83
  @state = :start
82
- @buffer = StringScanner.new("")
84
+ @buffer = @scanner = StringScanner.new("")
83
85
  @header = {}
84
86
  @trailer = {}
87
+ @force_no_body = nil
88
+ @allow_html_without_header = nil
89
+ @force_trailer = nil
90
+ @status_code = nil
91
+ @content_left = nil
92
+ @chunked = nil
93
+ @body = nil
94
+ @header_callback = nil
95
+ @stream_callback = method(:setup_stream_callback)
96
+ @trailer_callback = nil
97
+ @finish_callback = nil
98
+ @error_callback = nil
85
99
  end
86
100
 
87
101
  # :call-seq: parser.concat(data) -> parser
@@ -105,8 +119,9 @@ module HTTPTools
105
119
  # Returns a Rack compatible environment hash. Will return nil if called
106
120
  # before headers are complete.
107
121
  #
108
- # "rack.input" is not supplied and must be added to make the environment
109
- # hash fully Rack compliant.
122
+ # "rack.input" is only supplied if #env is called after parsing the request
123
+ # has finsished, and no listener is set for the `stream` event, otherwise
124
+ # you must add it yourself to make the environment hash fully Rack compliant
110
125
  #
111
126
  def env
112
127
  return unless @header_complete
@@ -123,6 +138,9 @@ module HTTPTools
123
138
  env[SERVER_NAME] = host
124
139
  env[SERVER_PORT] = port || "80"
125
140
  @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
141
+ if @body || @stream_callback == method(:setup_stream_callback)
142
+ env[RACK_INPUT] = StringIO.new(@body || "")
143
+ end
126
144
  env
127
145
  end
128
146
 
@@ -151,9 +169,10 @@ module HTTPTools
151
169
  #
152
170
  def finish
153
171
  if @state == :body_on_close
172
+ @buffer = @scanner
154
173
  @state = end_of_message
155
- elsif @state == :body_chunked && @header[CONNECTION] == CLOSE &&
156
- !@header[TRAILER] && @buffer.eos?
174
+ elsif @state == :body_chunked && @buffer.eos? && !@trailer_expected &&
175
+ @header.any? {|k,v| CONNECTION.casecmp(k) == 0 && CLOSE.casecmp(v) == 0}
157
176
  @state = end_of_message
158
177
  elsif @state == :start && @buffer.string.length < 1
159
178
  raise EmptyMessageError.new("Message empty")
@@ -204,6 +223,8 @@ module HTTPTools
204
223
  @trailer = {}
205
224
  @last_key = nil
206
225
  @content_left = nil
226
+ @chunked = nil
227
+ @trailer_expected = nil
207
228
  self
208
229
  end
209
230
 
@@ -214,14 +235,15 @@ module HTTPTools
214
235
  #
215
236
  # Available events are :header, :stream, :trailer, :finish, and :error.
216
237
  #
217
- # Adding a second callback for an event will overwite the existing callback
218
- # or delegate.
238
+ # Adding a second callback for an event will overwite the existing callback.
219
239
  #
220
240
  # Events:
221
241
  # [header] Called when headers are complete
222
242
  #
223
243
  # [stream] Supplied with one argument, the last chunk of body data fed
224
- # in to the parser as a String, e.g. "<h1>Hello"
244
+ # in to the parser as a String, e.g. "<h1>Hello". If no
245
+ # listener is set for this event the body can be retrieved with
246
+ # #body
225
247
  #
226
248
  # [trailer] Called on the completion of the trailer, if present
227
249
  #
@@ -240,6 +262,12 @@ module HTTPTools
240
262
  end
241
263
  alias on add_listener
242
264
 
265
+ def inspect # :nodoc:
266
+ str = to_s
267
+ str[-1, 0] = " #{posstr} #{state}"
268
+ str
269
+ end
270
+
243
271
  private
244
272
  def start
245
273
  @request_method = @buffer.scan(/[a-z]+ /i)
@@ -254,7 +282,7 @@ module HTTPTools
254
282
  elsif @allow_html_without_header && @buffer.check(/\s*</i)
255
283
  skip_header
256
284
  else
257
- raise ParseError.new("Protocol or method not recognised")
285
+ raise ParseError.new("Protocol or method not recognised at " + posstr)
258
286
  end
259
287
  end
260
288
 
@@ -269,7 +297,7 @@ module HTTPTools
269
297
  elsif @buffer.check(/[a-z0-9;\/?:@&=+$,%_.!~*')(#-]+\Z/i)
270
298
  :uri
271
299
  else
272
- raise ParseError.new("URI or path not recognised")
300
+ raise ParseError.new("URI or path not recognised at " + posstr)
273
301
  end
274
302
  end
275
303
 
@@ -285,7 +313,7 @@ module HTTPTools
285
313
  @buffer.check(/ (H(T(T(P(\/(\d+(\.(\d+\r?)?)?)?)?)?)?)?)?\Z/i)
286
314
  :request_http_version
287
315
  else
288
- raise ParseError.new("Invalid version specifier")
316
+ raise ParseError.new("Invalid version specifier at " + posstr)
289
317
  end
290
318
  end
291
319
 
@@ -299,7 +327,7 @@ module HTTPTools
299
327
  @buffer.check(/H(T(T(P(\/(\d+(\.(\d+\r?)?)?)?)?)?)?)?\Z/i)
300
328
  :response_http_version
301
329
  else
302
- raise ParseError.new("Invalid version specifier")
330
+ raise ParseError.new("Invalid version specifier at " + posstr)
303
331
  end
304
332
  end
305
333
 
@@ -309,7 +337,7 @@ module HTTPTools
309
337
  @message = ""
310
338
  @header_complete = true
311
339
  @header_callback.call if @header_callback
312
- body
340
+ start_body
313
341
  end
314
342
 
315
343
  def status
@@ -322,7 +350,7 @@ module HTTPTools
322
350
  @buffer.check(/\d(\d(\d( ([^\x00-\x1f\x7f]+\r?)?)?)?)?\Z/i)
323
351
  :status
324
352
  else
325
- raise ParseError.new("Invalid status line")
353
+ raise ParseError.new("Invalid status line at " + posstr)
326
354
  end
327
355
  end
328
356
 
@@ -334,7 +362,7 @@ module HTTPTools
334
362
  elsif @buffer.skip(/\r?\n/i)
335
363
  @header_complete = true
336
364
  @header_callback.call if @header_callback
337
- body
365
+ start_body
338
366
  elsif @buffer.eos? || @buffer.check(/([ -9;-~]+:?|\r)\Z/i)
339
367
  :key_or_newline
340
368
  elsif @last_key = @buffer.scan(/[ -9;-~]+:(?=[^ ])/i)
@@ -351,7 +379,7 @@ module HTTPTools
351
379
  elsif @buffer.check(/[^\x00\n\x7f]+\Z/)
352
380
  :skip_bad_header
353
381
  else
354
- raise ParseError.new("Illegal character in field name")
382
+ raise ParseError.new("Illegal character in field name at " + posstr)
355
383
  end
356
384
  end
357
385
 
@@ -359,51 +387,58 @@ module HTTPTools
359
387
  value = @buffer.scan(/[^\x00\n\x7f]*\n/i)
360
388
  if value
361
389
  value.chop!
362
- if ARRAY_VALUE_HEADERS.key?(@last_key)
363
- @header.fetch(@last_key) {@header[@last_key] = []}.push(value)
390
+ if @header.key?(@last_key)
391
+ @header[@last_key] << "\n#{value}"
364
392
  else
365
393
  @header[@last_key] = value
366
394
  end
395
+ if CONTENT_LENGTH.casecmp(@last_key) == 0
396
+ @content_left = value.to_i
397
+ elsif TRANSFER_ENCODING.casecmp(@last_key) == 0
398
+ @chunked = CHUNKED.casecmp(value) == 0
399
+ end
367
400
  key_or_newline
368
401
  elsif @buffer.eos? || @buffer.check(/[^\x00\n\x7f]+\Z/i)
369
402
  :value
370
403
  else
371
- raise ParseError.new("Illegal character in field body")
404
+ raise ParseError.new("Illegal character in field body at " + posstr)
372
405
  end
373
406
  end
374
407
 
375
- def body
376
- if @request_method &&
377
- !(@header.key?(CONTENT_LENGTH) || @header.key?(TRANSFER_ENCODING)) ||
408
+ def start_body
409
+ if @request_method && !(@content_left || @chunked) ||
378
410
  NO_BODY.key?(@status_code) || @force_no_body
379
411
  end_of_message
412
+ elsif @content_left
413
+ @buffer = [@buffer.rest]
414
+ body_with_length
415
+ elsif @chunked
416
+ @trailer_expected = @header.any? {|k,v| TRAILER.casecmp(k) == 0}
417
+ body_chunked
380
418
  else
381
- length = @header[CONTENT_LENGTH]
382
- if length
383
- @content_left = length.to_i
384
- body_with_length
385
- elsif @header[TRANSFER_ENCODING] == CHUNKED
386
- body_chunked
387
- else
388
- body_on_close
389
- end
419
+ @buffer = [@buffer.rest]
420
+ body_on_close
390
421
  end
391
422
  end
392
423
 
393
424
  def body_with_length
394
- if !@buffer.eos?
395
- chunk = @buffer.string.slice(@buffer.pos, @content_left)
396
- @stream_callback.call(chunk) if @stream_callback
425
+ chunk = @buffer.shift
426
+ if !chunk.empty?
397
427
  chunk_length = chunk.length
398
- @buffer.pos += chunk_length
428
+ if chunk_length > @content_left
429
+ @scanner << chunk.slice!(@content_left..-1)
430
+ end
431
+ @stream_callback.call(chunk) if @stream_callback
399
432
  @content_left -= chunk_length
400
433
  if @content_left < 1
434
+ @buffer = @scanner
401
435
  end_of_message
402
436
  else
403
437
  :body_with_length
404
438
  end
405
439
  elsif @content_left < 1 # zero length body
406
440
  @stream_callback.call("") if @stream_callback
441
+ @buffer = @scanner
407
442
  end_of_message
408
443
  else
409
444
  :body_with_length
@@ -411,24 +446,38 @@ module HTTPTools
411
446
  end
412
447
 
413
448
  def body_chunked
414
- decoded, remainder = transfer_encoding_chunked_decode(nil, @buffer)
415
- if decoded
416
- @stream_callback.call(decoded) if @stream_callback
417
- end
418
- if remainder
419
- :body_chunked
420
- else
421
- if @header[TRAILER] || @force_trailer
422
- trailer_key_or_newline
423
- else
424
- end_of_message
449
+ while true
450
+ start_pos = @buffer.pos
451
+ hex_chunk_length = @buffer.scan(/[0-9a-f]+ *\r?\n/i)
452
+ break :body_chunked unless hex_chunk_length
453
+
454
+ chunk_length = hex_chunk_length.to_i(16)
455
+ if chunk_length == 0
456
+ if @trailer_expected || @force_trailer
457
+ break trailer_key_or_newline
458
+ else
459
+ break end_of_message
460
+ end
461
+ end
462
+
463
+ begin
464
+ chunk = @buffer.rest.slice(0, chunk_length)
465
+ @buffer.pos += chunk_length
466
+ if chunk && @buffer.skip(/\r?\n/i)
467
+ @stream_callback.call(chunk) if @stream_callback
468
+ else
469
+ @buffer.pos = start_pos
470
+ break :body_chunked
471
+ end
472
+ rescue RangeError
473
+ @buffer.pos = start_pos
474
+ break :body_chunked
425
475
  end
426
476
  end
427
477
  end
428
478
 
429
479
  def body_on_close
430
- chunk = @buffer.rest
431
- @buffer.terminate
480
+ chunk = @buffer.shift
432
481
  @stream_callback.call(chunk) if @stream_callback
433
482
  :body_on_close
434
483
  end
@@ -446,7 +495,7 @@ module HTTPTools
446
495
  @last_key.chomp!(COLON)
447
496
  trailer_value
448
497
  else
449
- raise ParseError.new("Illegal character in field name")
498
+ raise ParseError.new("Illegal character in field name at " + posstr)
450
499
  end
451
500
  end
452
501
 
@@ -454,12 +503,16 @@ module HTTPTools
454
503
  value = @buffer.scan(/[^\000\n\177]+\n/i)
455
504
  if value
456
505
  value.chop!
457
- @trailer[@last_key] = value
506
+ if @trailer.key?(@last_key)
507
+ @trailer[@last_key] << "\n#{value}"
508
+ else
509
+ @trailer[@last_key] = value
510
+ end
458
511
  trailer_key_or_newline
459
512
  elsif @buffer.eos? || @buffer.check(/[^\x00\n\x7f]+\Z/i)
460
513
  :trailer_value
461
514
  else
462
- raise ParseError.new("Illegal character in field body")
515
+ raise ParseError.new("Illegal character in field body at " + posstr)
463
516
  end
464
517
  end
465
518
 
@@ -477,5 +530,31 @@ module HTTPTools
477
530
  end
478
531
  alias error raise
479
532
 
533
+ def setup_stream_callback(chunk)
534
+ @body = ""
535
+ stream_callback(chunk)
536
+ @stream_callback = method(:stream_callback)
537
+ end
538
+
539
+ def stream_callback(chunk)
540
+ @body << chunk
541
+ end
542
+
543
+ def line_char(string, position)
544
+ line_count = 1
545
+ char_count = 0
546
+ string.each_line do |line|
547
+ break if line.length + char_count > position
548
+ line_count += 1
549
+ char_count += line.length
550
+ end
551
+ [line_count, position + 1 - char_count]
552
+ end
553
+
554
+ def posstr
555
+ line, char = line_char(@buffer.string, @buffer.pos)
556
+ "line #{line}, char #{char}"
557
+ end
558
+
480
559
  end
481
- end
560
+ end
@@ -0,0 +1,16 @@
1
+ base = File.expand_path(File.dirname(__FILE__) + '/../../lib')
2
+ require base + '/http_tools'
3
+ require 'rubygems'
4
+ require 'ruby-prof'
5
+
6
+ body = "x" * 1024 * 1024 * 1
7
+ response = "HTTP/1.1 200 OK\r\nDate: Mon, 06 Jun 2011 14:55:51 GMT\r\nServer: Apache/2.2.17 (Unix) mod_ssl/2.2.17 OpenSSL/0.9.8l DAV/2 mod_fastcgi/2.4.2\r\nLast-Modified: Mon, 06 Jun 2011 14:55:49 GMT\r\nETag: \"3f18045-400-4a50c4c87c740\"\r\nAccept-Ranges: bytes\r\nContent-Length: #{body.length}\r\nContent-Type: text/plain\r\n\r\n"
8
+ chunks = []
9
+ 64.times {|i| chunks << body[i * 64, body.length / 64]}
10
+ parser = HTTPTools::Parser.new
11
+
12
+ result = RubyProf.profile do
13
+ parser << response
14
+ chunks.each {|chunk| parser << chunk}
15
+ end
16
+ RubyProf::FlatPrinter.new(result).print(STDOUT, 0)
@@ -3,7 +3,7 @@ require base + '/http_tools'
3
3
  require 'test/unit'
4
4
  require 'uri'
5
5
 
6
- class RequestTest < Test::Unit::TestCase
6
+ class BuilderRequestTest < Test::Unit::TestCase
7
7
 
8
8
  def test_get
9
9
  result = HTTPTools::Builder.request(:get, "www.example.com", "/test")
@@ -18,9 +18,27 @@ class RequestTest < Test::Unit::TestCase
18
18
  end
19
19
 
20
20
  def test_headers
21
- result = HTTPTools::Builder.request(:get, "www.foobar.com", "/", "x-test" => "foo")
21
+ result = HTTPTools::Builder.request(:get, "www.foobar.com", "/", "X-Test" => "foo")
22
22
 
23
- assert_equal("GET / HTTP/1.1\r\nHost: www.foobar.com\r\nx-test: foo\r\n\r\n", result)
23
+ assert_equal("GET / HTTP/1.1\r\nHost: www.foobar.com\r\nX-Test: foo\r\n\r\n", result)
24
+ end
25
+
26
+ def test_newline_separated_multi_value_headers
27
+ result = HTTPTools::Builder.request(:get, "www.foo.com", "/", "X-Test" => "foo\nbar")
28
+
29
+ assert_equal("GET / HTTP/1.1\r\nHost: www.foo.com\r\nX-Test: foo\r\nX-Test: bar\r\n\r\n", result)
30
+ end
31
+
32
+ def test_array_multi_value_headers
33
+ result = HTTPTools::Builder.request(:get, "www.foo.com", "/", "X-Test" => ["foo", "bar"])
34
+
35
+ assert_equal("GET / HTTP/1.1\r\nHost: www.foo.com\r\nX-Test: foo\r\nX-Test: bar\r\n\r\n", result)
36
+ end
37
+
38
+ def test_non_string_headers
39
+ result = HTTPTools::Builder.request(:get, "www.foobar.com", "/", "X-Test" => 42)
40
+
41
+ assert_equal("GET / HTTP/1.1\r\nHost: www.foobar.com\r\nX-Test: 42\r\n\r\n", result)
24
42
  end
25
43
 
26
44
  end