cae-multipart_parser 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 58b85fbc0eb96b1b5d3d124d74b726df472fb1ed
4
- data.tar.gz: 34cf8eabe80ace9e5df827b27b1355cd377d0e2d
3
+ metadata.gz: ba2747bd66a2c381c5d6b0845d5005af0c193638
4
+ data.tar.gz: 59fa00307201bb96569afcf18146e0466abc3dad
5
5
  SHA512:
6
- metadata.gz: 5ec5356efc56b49734c768631fda770bf8e51dd3aa6edd60cc9fa4d24bd604ba077fdac53268e86e398e1215e84415603fc54f750932f0ecf722989bf06824bd
7
- data.tar.gz: 39a4915b557153bd5f39630471d2a03b29b58e496cf7ca7fdbadbabe8f7d9b001baa8e48dd018b99389baaade121731e575fc715e709e29936bd60cbc8fbf2af
6
+ metadata.gz: 69c00279283004fa1f1145a8b0b66afc915ef510628f8fd530d60c881fb01755ceed5114705d1a94a0d535cd11d2f55c03e2f4d94931d509c920d2790c45095a
7
+ data.tar.gz: 2accc6e0407347dfebe6154a06dc484a3d2ab67bd918722218d26ffddc9df6a4c4b5417d18f7fec30af787aaeb55ae485fefec6a90952b78d1d7d477ffd2a17a
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Event-driven HTTP Multipart Parser
2
2
 
3
- This is based on https://github.com/danabr/multipart-parser, with
4
- modifications to suit my use-case.
3
+ This is based on https://github.com/danabr/multipart-parser, with modifications to suit my use-case.
5
4
 
6
5
  It currently depends on finding a `Content-Length` part header to avoid having to scan the entire body. It will raise `Cae::MultipartParser::Parser::ContentLengthUnsetError` if this header is not present.
7
6
 
@@ -12,15 +11,25 @@ It currently depends on finding a `Content-Length` part header to avoid having t
12
11
  parser = Cae::MultipartParser::Parser.new(boundary: boundary)
13
12
 
14
13
  parser.parse fh do |part|
15
- part.on(:headers) do |headers|
16
- # headers is a Hash
14
+ # part.headers and part.content_length should be set now
15
+ # headers are underscored and uppercased:
16
+ if part.headers['CONTENT_TYPE'] == 'text/html'
17
+ # ...
17
18
  end
18
- part.on(:data) do |data|
19
- # data is a chunk of body data
20
- # this may be called multiple times
19
+
20
+ # content_length is an integer:
21
+ if part.content_length < 1024
22
+ # ...
21
23
  end
22
- part.on(:end) do
23
- # part is finished, there will be no more data callbacks
24
+
25
+ while part.body.read(chunksize, buf)
26
+ # buf contains up to chunksize bytes of data.
27
+ # Do not assume if less than chunksize is returned, you're done,
28
+ # for internal as-yet-to-be-fixed reasons.
29
+ # Only stop when #read returns nil. Note that #read is NOT IO#read,
30
+ # but is mostly compatible.
24
31
  end
32
+
33
+ # all the part body has now been read
25
34
  end
26
- ```
35
+ ```
@@ -44,6 +44,8 @@ module Cae
44
44
  while i < length
45
45
  c = buffer[i]
46
46
 
47
+ #p "state=#{@state} index=#{@index} chars=#{buffer[i, 40]}"
48
+
47
49
  case @state
48
50
  when :start
49
51
  if @index == @boundary_length - 2
@@ -88,29 +90,17 @@ module Cae
88
90
  # this must populate #content_length
89
91
  @part.parse_header_str @headers
90
92
 
91
- @part_data_remaining = @part.content_length
92
- raise ContentLengthUnsetError if @part_data_remaining == 0
93
-
94
- # allow caller to setup callbacks
95
- yield @part
93
+ raise ContentLengthUnsetError if @part.content_length == 0
96
94
 
97
- @part.callback :headers, @part.headers
98
-
99
- data_start = i
100
- @index = 0
101
- @state = :part_data
102
- next # keep i pointing at current char for :part_data
103
-
104
- when :part_data
105
95
  chunk_remaining = length - data_start
106
- cb_len = @part_data_remaining > chunk_remaining ? chunk_remaining : @part_data_remaining
107
-
108
- @part.callback :data, buffer[data_start, cb_len]
96
+ cb_len = @part.content_length > chunk_remaining ? chunk_remaining : @part.content_length
97
+ @part.body = Part::Body.new(io, @part.content_length, buffer[i, cb_len])
98
+ yield @part
109
99
 
110
- @part_data_remaining -= cb_len
111
100
  i += cb_len
112
101
 
113
- @state = :boundary if @part_data_remaining == 0
102
+ @index = 0
103
+ @state = :boundary
114
104
  next # we've bumped i already, don't increment it
115
105
 
116
106
  when :boundary
@@ -1,30 +1,34 @@
1
1
  # vim: et sw=2 ts=2 sts=2
2
+
3
+ require 'cae/multipart_parser/part/body'
4
+
2
5
  module Cae
3
6
  module MultipartParser
4
7
  class Part
5
8
 
6
9
  attr_reader :headers
10
+ attr_accessor :body
7
11
 
8
12
  def initialize
9
13
  @callbacks = {}
10
14
  @headers = {}
15
+ @body = nil
11
16
  end
12
17
 
13
18
  def parse_header_str(str)
14
- # munge multiline headers back into one line.
19
+ # Munge multiline headers back into one line.
15
20
  str = str.gsub /\r\n\s+/, ' '
16
21
 
17
- # split header string into a hash
18
- str.split(/\r\n/).each do |h|
19
- key, value = h.split ':'
22
+ # Split header string into a hash. Returns the initial hash.
23
+ str.split(/\r\n/).each_with_object(@headers) do |line, headers|
24
+ key, value = line.split ':'
20
25
 
21
- # normalize content-length -> CONTENT_LENGTH
26
+ # normalize Content-Length -> CONTENT_LENGTH
22
27
  key.upcase!
23
28
  key.tr! '-', '_'
24
29
 
25
- @headers[key] = value.lstrip
30
+ headers[key] = value.lstrip
26
31
  end
27
- @headers
28
32
  end
29
33
 
30
34
  def content_length
@@ -0,0 +1,49 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+ module Cae
3
+ module MultipartParser
4
+ class Part
5
+ class Body
6
+
7
+ def initialize(fh, read_limit, read_buffer = nil)
8
+ # Backing filehandle
9
+ @fh = fh
10
+
11
+ # After we've read this amount, act like the backing filehandle is empty
12
+ @read_limit = read_limit
13
+
14
+ # If anything is in this, we'll return it on the first read()
15
+ @read_buffer = read_buffer
16
+ end
17
+
18
+ def read(length, outbuf = nil)
19
+ # Check we're not trying to read more bytes than are available
20
+ length = @read_limit > length ? length : @read_limit
21
+
22
+ # Early nil return if there's nothing available. This technically
23
+ # breaks compatibility with IO#read, but I don't care.
24
+ # (IO#read returns an empty string if passed length is 0; we'll return nil)
25
+ return nil if length == 0
26
+
27
+ # if there's anything in @read_buffer, return it before doing a real read
28
+ if @read_buffer
29
+ # initialise outbuf if it wasn't passed in.
30
+ outbuf ||= String.new
31
+
32
+ # copy contents into outbuf, being careful NOT to change the object_id
33
+ outbuf.clear
34
+ outbuf << @read_buffer[0, length]
35
+
36
+ # advance buffer pointer; if there's none left, @read_buffer will be nil
37
+ @read_limit -= outbuf.length
38
+ @read_buffer = @read_buffer[length, @read_limit]
39
+
40
+ return outbuf
41
+ end
42
+
43
+ @fh.read(length, outbuf).tap{|o| @read_limit -= o.length }
44
+ end
45
+
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,5 +1,5 @@
1
1
  module Cae
2
2
  module MultipartParser
3
- VERSION = "1.1.0"
3
+ VERSION = "2.0.0"
4
4
  end
5
5
  end
@@ -18,56 +18,40 @@ describe Cae::MultipartParser::Parser do
18
18
  body = generate_body(boundary, [part])
19
19
  fh = StringIO.new body
20
20
 
21
- r = parser.parse fh do |part|
22
- end
23
-
21
+ r = parser.parse(fh){ }
24
22
  r.must_equal body.length
25
23
  end
26
24
 
27
- it "calls the :headers callback with a hash" do
25
+ it "sets part#headers" do
28
26
  part = SecureRandom.random_bytes(1024) # random data
29
27
  fh = StringIO.new generate_body(boundary, [part])
30
28
 
31
29
  headers = nil
32
- parser.parse fh do |part|
33
- part.on(:headers){|h| headers = h }
34
- end
30
+ parser.parse(fh){|part| headers = part.headers }
35
31
  headers.must_be_kind_of Hash
36
32
  end
37
33
 
38
- it "calls the :data callback with the original data" do
34
+ it "passes the original data to the part#body handle" do
39
35
  part = SecureRandom.random_bytes(1024 * 1024) # 1MB of random data
40
36
  fh = StringIO.new generate_body(boundary, [part])
41
37
  ret = ''
42
38
  parser.parse fh do |part|
43
- part.on(:data){|data| ret << data }
39
+ part.body.must_be_kind_of Cae::MultipartParser::Part::Body
40
+ while x = part.body.read(1024)
41
+ ret << x
42
+ end
44
43
  end
45
44
 
46
45
  ret.must_equal part
47
46
  end
48
47
 
49
- it "calls the :end callback after the part is done" do
48
+ it "yields for each part" do
50
49
  part = SecureRandom.random_bytes(1024) # random data
51
- fh = StringIO.new generate_body(boundary, [part])
50
+ parts = [part, part]
51
+ fh = StringIO.new generate_body(boundary, parts)
52
52
  done = 0
53
- parser.parse fh do |part|
54
- part.on(:end){ done += 1 }
55
- end
56
-
57
- done.must_equal 1
58
- end
59
-
60
- it "calls callbacks after each part is done" do
61
- part = SecureRandom.random_bytes(1024) # random data
62
- fh = StringIO.new generate_body(boundary, [part, part])
63
- headers, done = 0, 0
64
- parser.parse fh do |part|
65
- part.on(:headers){ headers += 1 }
66
- part.on(:end){ done += 1 }
67
- end
68
-
69
- headers.must_equal 2
70
- done.must_equal 2
53
+ parser.parse(fh){|part| done += 1 }
54
+ done.must_equal parts.count
71
55
  end
72
56
  end
73
57
  end
@@ -0,0 +1,69 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
4
+
5
+ def read_all(fh, chunksize, outbuf = nil)
6
+ str = String.new
7
+ outbuf = String.new
8
+ while fh.read(chunksize, outbuf)
9
+ str << outbuf
10
+ end
11
+ str
12
+ end
13
+
14
+ describe Cae::MultipartParser::Part::Body do
15
+ let(:fh_data) { '12345' }
16
+ let(:initial_buffer) { nil }
17
+ let(:size_limit) { 5 }
18
+ let(:expected) { ((initial_buffer || '') + fh_data)[0, size_limit] }
19
+ let(:fh) { StringIO.new fh_data }
20
+ let(:body) do
21
+ Cae::MultipartParser::Part::Body.new(fh, size_limit, initial_buffer)
22
+ end
23
+
24
+ (1..15).each do |chunksize|
25
+ it "should work with read chunk size #{chunksize}" do
26
+ read_all(body, chunksize).must_equal expected
27
+ end
28
+ end
29
+
30
+ describe "with an initial buffer" do
31
+ let(:initial_buffer) { 'abcde' }
32
+ let(:size_limit) { 10 }
33
+
34
+ (1..15).each do |chunksize|
35
+ it "should work with read chunk size #{chunksize}" do
36
+ read_all(body, chunksize).must_equal expected
37
+ end
38
+ end
39
+
40
+ it "should not reallocate if passed an outbuf" do
41
+ outbuf = String.new
42
+ refute_changes(->{outbuf.object_id}) do
43
+ while body.read(1, outbuf)
44
+ # no-op
45
+ end
46
+ end
47
+ end
48
+
49
+ it "should return nil when empty" do
50
+ read_all(body, size_limit) # empty the "file"
51
+ body.read(1).must_equal nil
52
+ end
53
+
54
+ describe "with a shorter size_limit than data available" do
55
+ let(:size_limit) { 5 }
56
+ it "should stop returning data when size_limit is hit" do
57
+ read_all(body, 1).must_equal expected
58
+ end
59
+
60
+ it "should return nil when 'empty'" do
61
+ read_all(body, size_limit) # empty the "file"
62
+ body.read(1).must_equal nil
63
+ end
64
+ end
65
+
66
+ end
67
+
68
+
69
+ end
@@ -23,3 +23,11 @@ def generate_body(boundary, arr)
23
23
  req = Net::HTTP::Post::Multipart.new '/', parts, {}, boundary
24
24
  req.body_stream.read
25
25
  end
26
+
27
+
28
+ def refute_changes(what)
29
+ old = what.call
30
+ yield
31
+ assert_equal old, what.call
32
+ end
33
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cae-multipart_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Elsworth
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-05 00:00:00.000000000 Z
11
+ date: 2015-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multipart-post
@@ -54,8 +54,10 @@ files:
54
54
  - lib/cae/multipart_parser.rb
55
55
  - lib/cae/multipart_parser/parser.rb
56
56
  - lib/cae/multipart_parser/part.rb
57
+ - lib/cae/multipart_parser/part/body.rb
57
58
  - lib/cae/multipart_parser/version.rb
58
59
  - spec/parser_spec.rb
60
+ - spec/part_body_spec.rb
59
61
  - spec/part_spec.rb
60
62
  - spec/spec_helper.rb
61
63
  homepage: https://github.com/celsworth/cae-multipart_parser
@@ -84,6 +86,7 @@ specification_version: 4
84
86
  summary: Event-driven HTTP Multipart parser
85
87
  test_files:
86
88
  - spec/parser_spec.rb
89
+ - spec/part_body_spec.rb
87
90
  - spec/part_spec.rb
88
91
  - spec/spec_helper.rb
89
92
  has_rdoc: