cae-multipart_parser 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 58b85fbc0eb96b1b5d3d124d74b726df472fb1ed
4
- data.tar.gz: 34cf8eabe80ace9e5df827b27b1355cd377d0e2d
3
+ metadata.gz: ba2747bd66a2c381c5d6b0845d5005af0c193638
4
+ data.tar.gz: 59fa00307201bb96569afcf18146e0466abc3dad
5
5
  SHA512:
6
- metadata.gz: 5ec5356efc56b49734c768631fda770bf8e51dd3aa6edd60cc9fa4d24bd604ba077fdac53268e86e398e1215e84415603fc54f750932f0ecf722989bf06824bd
7
- data.tar.gz: 39a4915b557153bd5f39630471d2a03b29b58e496cf7ca7fdbadbabe8f7d9b001baa8e48dd018b99389baaade121731e575fc715e709e29936bd60cbc8fbf2af
6
+ metadata.gz: 69c00279283004fa1f1145a8b0b66afc915ef510628f8fd530d60c881fb01755ceed5114705d1a94a0d535cd11d2f55c03e2f4d94931d509c920d2790c45095a
7
+ data.tar.gz: 2accc6e0407347dfebe6154a06dc484a3d2ab67bd918722218d26ffddc9df6a4c4b5417d18f7fec30af787aaeb55ae485fefec6a90952b78d1d7d477ffd2a17a
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Event-driven HTTP Multipart Parser
2
2
 
3
- This is based on https://github.com/danabr/multipart-parser, with
4
- modifications to suit my use-case.
3
+ This is based on https://github.com/danabr/multipart-parser, with modifications to suit my use-case.
5
4
 
6
5
  It currently depends on finding a `Content-Length` part header to avoid having to scan the entire body. It will raise `Cae::MultipartParser::Parser::ContentLengthUnsetError` if this header is not present.
7
6
 
@@ -12,15 +11,25 @@ It currently depends on finding a `Content-Length` part header to avoid having t
12
11
  parser = Cae::MultipartParser::Parser.new(boundary: boundary)
13
12
 
14
13
  parser.parse fh do |part|
15
- part.on(:headers) do |headers|
16
- # headers is a Hash
14
+ # part.headers and part.content_length should be set now
15
+ # headers are underscored and uppercased:
16
+ if part.headers['CONTENT_TYPE'] == 'text/html'
17
+ # ...
17
18
  end
18
- part.on(:data) do |data|
19
- # data is a chunk of body data
20
- # this may be called multiple times
19
+
20
+ # content_length is an integer:
21
+ if part.content_length < 1024
22
+ # ...
21
23
  end
22
- part.on(:end) do
23
- # part is finished, there will be no more data callbacks
24
+
25
+ while part.body.read(chunksize, buf)
26
+ # buf contains up to chunksize bytes of data.
27
+ # Do not assume if less than chunksize is returned, you're done,
28
+ # for internal as-yet-to-be-fixed reasons.
29
+ # Only stop when #read returns nil. Note that #read is NOT IO#read,
30
+ # but is mostly compatible.
24
31
  end
32
+
33
+ # all the part body has now been read
25
34
  end
26
- ```
35
+ ```
@@ -44,6 +44,8 @@ module Cae
44
44
  while i < length
45
45
  c = buffer[i]
46
46
 
47
+ #p "state=#{@state} index=#{@index} chars=#{buffer[i, 40]}"
48
+
47
49
  case @state
48
50
  when :start
49
51
  if @index == @boundary_length - 2
@@ -88,29 +90,17 @@ module Cae
88
90
  # this must populate #content_length
89
91
  @part.parse_header_str @headers
90
92
 
91
- @part_data_remaining = @part.content_length
92
- raise ContentLengthUnsetError if @part_data_remaining == 0
93
-
94
- # allow caller to setup callbacks
95
- yield @part
93
+ raise ContentLengthUnsetError if @part.content_length == 0
96
94
 
97
- @part.callback :headers, @part.headers
98
-
99
- data_start = i
100
- @index = 0
101
- @state = :part_data
102
- next # keep i pointing at current char for :part_data
103
-
104
- when :part_data
105
95
  chunk_remaining = length - data_start
106
- cb_len = @part_data_remaining > chunk_remaining ? chunk_remaining : @part_data_remaining
107
-
108
- @part.callback :data, buffer[data_start, cb_len]
96
+ cb_len = @part.content_length > chunk_remaining ? chunk_remaining : @part.content_length
97
+ @part.body = Part::Body.new(io, @part.content_length, buffer[i, cb_len])
98
+ yield @part
109
99
 
110
- @part_data_remaining -= cb_len
111
100
  i += cb_len
112
101
 
113
- @state = :boundary if @part_data_remaining == 0
102
+ @index = 0
103
+ @state = :boundary
114
104
  next # we've bumped i already, don't increment it
115
105
 
116
106
  when :boundary
@@ -1,30 +1,34 @@
1
1
  # vim: et sw=2 ts=2 sts=2
2
+
3
+ require 'cae/multipart_parser/part/body'
4
+
2
5
  module Cae
3
6
  module MultipartParser
4
7
  class Part
5
8
 
6
9
  attr_reader :headers
10
+ attr_accessor :body
7
11
 
8
12
  def initialize
9
13
  @callbacks = {}
10
14
  @headers = {}
15
+ @body = nil
11
16
  end
12
17
 
13
18
  def parse_header_str(str)
14
- # munge multiline headers back into one line.
19
+ # Munge multiline headers back into one line.
15
20
  str = str.gsub /\r\n\s+/, ' '
16
21
 
17
- # split header string into a hash
18
- str.split(/\r\n/).each do |h|
19
- key, value = h.split ':'
22
+ # Split header string into a hash. Returns the initial hash.
23
+ str.split(/\r\n/).each_with_object(@headers) do |line, headers|
24
+ key, value = line.split ':'
20
25
 
21
- # normalize content-length -> CONTENT_LENGTH
26
+ # normalize Content-Length -> CONTENT_LENGTH
22
27
  key.upcase!
23
28
  key.tr! '-', '_'
24
29
 
25
- @headers[key] = value.lstrip
30
+ headers[key] = value.lstrip
26
31
  end
27
- @headers
28
32
  end
29
33
 
30
34
  def content_length
@@ -0,0 +1,49 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+ module Cae
3
+ module MultipartParser
4
+ class Part
5
+ class Body
6
+
7
+ def initialize(fh, read_limit, read_buffer = nil)
8
+ # Backing filehandle
9
+ @fh = fh
10
+
11
+ # After we've read this amount, act like the backing filehandle is empty
12
+ @read_limit = read_limit
13
+
14
+ # If anything is in this, we'll return it on the first read()
15
+ @read_buffer = read_buffer
16
+ end
17
+
18
+ def read(length, outbuf = nil)
19
+ # Check we're not trying to read more bytes than are available
20
+ length = @read_limit > length ? length : @read_limit
21
+
22
+ # Early nil return if there's nothing available. This technically
23
+ # breaks compatibility with IO#read, but I don't care.
24
+ # (IO#read returns an empty string if passed length is 0; we'll return nil)
25
+ return nil if length == 0
26
+
27
+ # if there's anything in @read_buffer, return it before doing a real read
28
+ if @read_buffer
29
+ # initialise outbuf if it wasn't passed in.
30
+ outbuf ||= String.new
31
+
32
+ # copy contents into outbuf, being careful NOT to change the object_id
33
+ outbuf.clear
34
+ outbuf << @read_buffer[0, length]
35
+
36
+ # advance buffer pointer; if there's none left, @read_buffer will be nil
37
+ @read_limit -= outbuf.length
38
+ @read_buffer = @read_buffer[length, @read_limit]
39
+
40
+ return outbuf
41
+ end
42
+
43
+ @fh.read(length, outbuf).tap{|o| @read_limit -= o.length }
44
+ end
45
+
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,5 +1,5 @@
1
1
  module Cae
2
2
  module MultipartParser
3
- VERSION = "1.1.0"
3
+ VERSION = "2.0.0"
4
4
  end
5
5
  end
@@ -18,56 +18,40 @@ describe Cae::MultipartParser::Parser do
18
18
  body = generate_body(boundary, [part])
19
19
  fh = StringIO.new body
20
20
 
21
- r = parser.parse fh do |part|
22
- end
23
-
21
+ r = parser.parse(fh){ }
24
22
  r.must_equal body.length
25
23
  end
26
24
 
27
- it "calls the :headers callback with a hash" do
25
+ it "sets part#headers" do
28
26
  part = SecureRandom.random_bytes(1024) # random data
29
27
  fh = StringIO.new generate_body(boundary, [part])
30
28
 
31
29
  headers = nil
32
- parser.parse fh do |part|
33
- part.on(:headers){|h| headers = h }
34
- end
30
+ parser.parse(fh){|part| headers = part.headers }
35
31
  headers.must_be_kind_of Hash
36
32
  end
37
33
 
38
- it "calls the :data callback with the original data" do
34
+ it "passes the original data to the part#body handle" do
39
35
  part = SecureRandom.random_bytes(1024 * 1024) # 1MB of random data
40
36
  fh = StringIO.new generate_body(boundary, [part])
41
37
  ret = ''
42
38
  parser.parse fh do |part|
43
- part.on(:data){|data| ret << data }
39
+ part.body.must_be_kind_of Cae::MultipartParser::Part::Body
40
+ while x = part.body.read(1024)
41
+ ret << x
42
+ end
44
43
  end
45
44
 
46
45
  ret.must_equal part
47
46
  end
48
47
 
49
- it "calls the :end callback after the part is done" do
48
+ it "yields for each part" do
50
49
  part = SecureRandom.random_bytes(1024) # random data
51
- fh = StringIO.new generate_body(boundary, [part])
50
+ parts = [part, part]
51
+ fh = StringIO.new generate_body(boundary, parts)
52
52
  done = 0
53
- parser.parse fh do |part|
54
- part.on(:end){ done += 1 }
55
- end
56
-
57
- done.must_equal 1
58
- end
59
-
60
- it "calls callbacks after each part is done" do
61
- part = SecureRandom.random_bytes(1024) # random data
62
- fh = StringIO.new generate_body(boundary, [part, part])
63
- headers, done = 0, 0
64
- parser.parse fh do |part|
65
- part.on(:headers){ headers += 1 }
66
- part.on(:end){ done += 1 }
67
- end
68
-
69
- headers.must_equal 2
70
- done.must_equal 2
53
+ parser.parse(fh){|part| done += 1 }
54
+ done.must_equal parts.count
71
55
  end
72
56
  end
73
57
  end
@@ -0,0 +1,69 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
4
+
5
+ def read_all(fh, chunksize, outbuf = nil)
6
+ str = String.new
7
+ outbuf = String.new
8
+ while fh.read(chunksize, outbuf)
9
+ str << outbuf
10
+ end
11
+ str
12
+ end
13
+
14
+ describe Cae::MultipartParser::Part::Body do
15
+ let(:fh_data) { '12345' }
16
+ let(:initial_buffer) { nil }
17
+ let(:size_limit) { 5 }
18
+ let(:expected) { ((initial_buffer || '') + fh_data)[0, size_limit] }
19
+ let(:fh) { StringIO.new fh_data }
20
+ let(:body) do
21
+ Cae::MultipartParser::Part::Body.new(fh, size_limit, initial_buffer)
22
+ end
23
+
24
+ (1..15).each do |chunksize|
25
+ it "should work with read chunk size #{chunksize}" do
26
+ read_all(body, chunksize).must_equal expected
27
+ end
28
+ end
29
+
30
+ describe "with an initial buffer" do
31
+ let(:initial_buffer) { 'abcde' }
32
+ let(:size_limit) { 10 }
33
+
34
+ (1..15).each do |chunksize|
35
+ it "should work with read chunk size #{chunksize}" do
36
+ read_all(body, chunksize).must_equal expected
37
+ end
38
+ end
39
+
40
+ it "should not reallocate if passed an outbuf" do
41
+ outbuf = String.new
42
+ refute_changes(->{outbuf.object_id}) do
43
+ while body.read(1, outbuf)
44
+ # no-op
45
+ end
46
+ end
47
+ end
48
+
49
+ it "should return nil when empty" do
50
+ read_all(body, size_limit) # empty the "file"
51
+ body.read(1).must_equal nil
52
+ end
53
+
54
+ describe "with a shorter size_limit than data available" do
55
+ let(:size_limit) { 5 }
56
+ it "should stop returning data when size_limit is hit" do
57
+ read_all(body, 1).must_equal expected
58
+ end
59
+
60
+ it "should return nil when 'empty'" do
61
+ read_all(body, size_limit) # empty the "file"
62
+ body.read(1).must_equal nil
63
+ end
64
+ end
65
+
66
+ end
67
+
68
+
69
+ end
@@ -23,3 +23,11 @@ def generate_body(boundary, arr)
23
23
  req = Net::HTTP::Post::Multipart.new '/', parts, {}, boundary
24
24
  req.body_stream.read
25
25
  end
26
+
27
+
28
+ def refute_changes(what)
29
+ old = what.call
30
+ yield
31
+ assert_equal old, what.call
32
+ end
33
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cae-multipart_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Elsworth
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-05 00:00:00.000000000 Z
11
+ date: 2015-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multipart-post
@@ -54,8 +54,10 @@ files:
54
54
  - lib/cae/multipart_parser.rb
55
55
  - lib/cae/multipart_parser/parser.rb
56
56
  - lib/cae/multipart_parser/part.rb
57
+ - lib/cae/multipart_parser/part/body.rb
57
58
  - lib/cae/multipart_parser/version.rb
58
59
  - spec/parser_spec.rb
60
+ - spec/part_body_spec.rb
59
61
  - spec/part_spec.rb
60
62
  - spec/spec_helper.rb
61
63
  homepage: https://github.com/celsworth/cae-multipart_parser
@@ -84,6 +86,7 @@ specification_version: 4
84
86
  summary: Event-driven HTTP Multipart parser
85
87
  test_files:
86
88
  - spec/parser_spec.rb
89
+ - spec/part_body_spec.rb
87
90
  - spec/part_spec.rb
88
91
  - spec/spec_helper.rb
89
92
  has_rdoc: