cae-multipart_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b40160cadbf8cc8a3f697358faf9fbc86189f69a
4
+ data.tar.gz: 9c53814388b57e07c4fe9067f1bcac1c642d5498
5
+ SHA512:
6
+ metadata.gz: 01ed9067615cf6c444f92558b6d6232ff3b431afbd09264fbc5348f1a83183cb7e8c20d3b095f0e825fad8c1c788c12b19719cf554fa57232227693e3181c47b
7
+ data.tar.gz: 9cd5bf6885f0a60098497089797c9dc0345279a2458d69b8f11e198662f70d084af9a8bd9ca522a73e35e63881db17f38ee462564710050fd535fac6e8905d88
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ *.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cae-logger.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Chris Elsworth
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # Event-driven HTTP Multipart Parser
2
+
3
+ This is based on https://github.com/danabr/multipart-parser, with
4
+ modifications to suit my use-case.
5
+
6
+ It currently depends on finding a `Content-Length` part header to avoid having to scan the entire body. It will raise `Cae::MultipartParser::Parser::ContentLengthUnsetError` if this header is not present.
7
+
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ parser = Cae::MultipartParser::Parser.new(boundary: boundary)
13
+
14
+ parser.parse fh do |part|
15
+ part.on(:headers) do |headers|
16
+ # headers is a Hash
17
+ end
18
+ part.on(:data) do |data|
19
+ # data is a chunk of body data
20
+ # this may be called multiple times
21
+ end
22
+ part.on(:end) do
23
+ # part is finished, there will be no more data callbacks
24
+ end
25
+ end
26
+ ```
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ ### Specs
2
+
3
+ spec = proc do |env|
4
+ env.each{|k,v| ENV[k] = v}
5
+ sh "#{FileUtils::RUBY} -rubygems -I lib -e 'ARGV.each{|f| require f}' ./spec/*_spec.rb"
6
+ env.each{|k,v| ENV.delete(k) }
7
+ end
8
+
9
+ desc "Run specs"
10
+ task "spec" do
11
+ spec.call({})
12
+ end
13
+
14
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cae/multipart_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cae-multipart_parser"
8
+ spec.version = Cae::MultipartParser::VERSION
9
+ spec.authors = ["Chris Elsworth"]
10
+ spec.email = ["chris@shagged.org"]
11
+ spec.summary = "Event-driven HTTP Multipart parser"
12
+ spec.homepage = "https://github.com/celsworth/cae-multipart_parser"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "multipart-post"
21
+ spec.add_development_dependency "minitest"
22
+ end
@@ -0,0 +1,165 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ module Cae
4
+ module MultipartParser
5
+ class Parser
6
+
7
+ CR = "\r".freeze
8
+ LF = "\n".freeze
9
+ DASH = "-".freeze
10
+
11
+ BOUNDARY_PREFIX = (CR + LF + DASH + DASH).freeze
12
+
13
+ ContentLengthUnsetError = Class.new(StandardError)
14
+
15
+ attr_accessor :read_buffer_size
16
+
17
+ def initialize(opts = {})
18
+ # remember offsets into our state in between calls
19
+ @index = 0
20
+
21
+ @state = :start
22
+
23
+ # default 2MB read buffer
24
+ @read_buffer_size = 2 * 1024 * 1024
25
+
26
+ @boundary = BOUNDARY_PREFIX + opts[:boundary]
27
+ @boundary_length = @boundary.length
28
+ end
29
+
30
+ # Parse data from the IO +io+, calling callbacks appropriately.
31
+ #
32
+ # @return [Integer] the number of bytes parsed.
33
+ def parse(io)
34
+ parsed = 0
35
+
36
+ buffer = String.new
37
+ while io.read(@read_buffer_size, buffer)
38
+ length = buffer.length
39
+ i = 0
40
+ data_start = 0
41
+
42
+ #p "parsing #{length} chars: #{buffer}"
43
+
44
+ while i < length
45
+ c = buffer[i]
46
+
47
+ #p "state=#{@state}: i=#{i}. index=#{@index}, d_start=#{data_start} chars=#{buffer[i, 50]}"
48
+
49
+ case @state
50
+ when :start
51
+ if @index == @boundary_length - 2
52
+ break unless c == CR
53
+ @index += 1
54
+ elsif @index == @boundary_length - 1
55
+ break unless c == LF
56
+ # reached end of boundary, we're into the first part
57
+ @state = :headers_start
58
+ else
59
+ # there is no leading \r\n on the first boundary, hence index+2
60
+ break unless c == @boundary[@index + 2] # Unexpected character
61
+ @index += 1
62
+ end
63
+
64
+ when :headers_start
65
+ @part = Part.new
66
+ @state = :headers
67
+ @index = 0
68
+ @headers = ''
69
+ next # keep i pointing at current char for :headers
70
+
71
+ when :headers
72
+ if (c == CR && @index == 0) || (c == LF && @index == 1)
73
+ # keep \r\n to split on later
74
+ @headers << c
75
+ @index += 1
76
+ elsif c == CR && @index == 2
77
+ # don't keep final \r, update state to check for final \n
78
+ @state = :headers_almost_done
79
+ else
80
+ # normal header char, reset index and keep char
81
+ @headers << c
82
+ @index = 0
83
+ end
84
+
85
+ when :headers_almost_done
86
+ break unless c == LF # Unexpected character
87
+ @state = :part_start
88
+
89
+ when :part_start
90
+ # this must populate #content_length
91
+ @part.parse_header_str @headers
92
+
93
+ @part_data_remaining = @part.content_length
94
+ raise ContentLengthUnsetError if @part_data_remaining == 0
95
+
96
+ # allow caller to setup callbacks
97
+ yield @part
98
+
99
+ @part.callback :headers, @part.headers
100
+
101
+ data_start = i
102
+ @index = 0
103
+ @state = :part_data
104
+ next # keep i pointing at current char for :part_data
105
+
106
+ when :part_data
107
+ chunk_remaining = length - data_start
108
+ cb_len = @part_data_remaining > chunk_remaining ? chunk_remaining : @part_data_remaining
109
+
110
+ @part.callback :data, buffer[data_start, cb_len]
111
+
112
+ @part_data_remaining -= cb_len
113
+ i += cb_len
114
+
115
+ @state = :boundary if @part_data_remaining == 0
116
+ next # we've bumped i already, don't increment it
117
+
118
+ when :boundary
119
+ break unless c == @boundary[@index] # unexpected character
120
+ @index += 1
121
+ @state = :boundary_almost_done if @index == @boundary_length
122
+
123
+ when :boundary_almost_done
124
+ # work out whether this is a part boundary or the final boundary
125
+ if c == CR
126
+ @state = :boundary_part_almost_done
127
+ elsif c == DASH
128
+ @state = :boundary_last_almost_done
129
+ else
130
+ break # unexpected character, this isn't a boundary after all
131
+ end
132
+ @index += 1
133
+
134
+ when :boundary_part_almost_done
135
+ # final character of an inter-part boundary must be LF
136
+ break unless c == LF # unexpected character
137
+ @part.callback :end
138
+ @state = :headers_start
139
+
140
+ when :boundary_last_almost_done
141
+ # final character of final boundary must be -
142
+ break unless c == DASH # unexpected character
143
+ @part.callback :end
144
+ @state = :end
145
+
146
+ end # case
147
+
148
+ i += 1
149
+
150
+ end # while
151
+
152
+ if i != length
153
+ raise "unexpected char at #{i} (#{buffer[i].inspect})"
154
+ end
155
+
156
+ parsed += length
157
+
158
+ end # while
159
+
160
+ parsed
161
+ end
162
+
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,44 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+ module Cae
3
+ module MultipartParser
4
+ class Part
5
+
6
+ attr_reader :headers
7
+
8
+ def initialize
9
+ @callbacks = {}
10
+ @headers = {}
11
+ end
12
+
13
+ def parse_header_str(str)
14
+ # munge multiline headers back into one line.
15
+ str = str.gsub /\r\n\s+/, ' '
16
+
17
+ # split header string into a hash
18
+ str.split(/\r\n/).each do |h|
19
+ key, value = h.split ':'
20
+
21
+ # normalize content-length -> Content-Length
22
+ key = key.split('-').map(&:capitalize).join('-')
23
+
24
+ @headers[key] = value.lstrip
25
+ end
26
+ @headers
27
+ end
28
+
29
+ def content_length
30
+ @headers['Content-Length'].to_i
31
+ end
32
+
33
+ def on(event, &callback)
34
+ @callbacks[event] = callback
35
+ end
36
+
37
+ # Parser will call :headers, :data, :end
38
+ def callback(event, arg = nil)
39
+ @callbacks[event].call(arg) if @callbacks.has_key?(event)
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,5 @@
1
+ module Cae
2
+ module MultipartParser
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
@@ -0,0 +1,2 @@
1
+ require 'cae/multipart_parser/parser'
2
+ require 'cae/multipart_parser/part'
@@ -0,0 +1,73 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
4
+
5
+ describe Cae::MultipartParser::Parser do
6
+ let(:boundary) do
7
+ # create a random boundary
8
+ ("-" * 24) + SecureRandom.random_bytes(8).unpack('H*').first
9
+ end
10
+
11
+ let(:parser) do
12
+ Cae::MultipartParser::Parser.new(boundary: boundary)
13
+ end
14
+
15
+ describe "#parse" do
16
+ it "returns the number of bytes parsed" do
17
+ part = SecureRandom.random_bytes(1024) # random data
18
+ body = generate_body(boundary, [part])
19
+ fh = StringIO.new body
20
+
21
+ r = parser.parse fh do |part|
22
+ end
23
+
24
+ r.must_equal body.length
25
+ end
26
+
27
+ it "calls the :headers callback with a hash" do
28
+ part = SecureRandom.random_bytes(1024) # random data
29
+ fh = StringIO.new generate_body(boundary, [part])
30
+
31
+ headers = nil
32
+ parser.parse fh do |part|
33
+ part.on(:headers){|h| headers = h }
34
+ end
35
+ headers.must_be_kind_of Hash
36
+ end
37
+
38
+ it "calls the :data callback with the original data" do
39
+ part = SecureRandom.random_bytes(1024 * 1024) # 1MB of random data
40
+ fh = StringIO.new generate_body(boundary, [part])
41
+ ret = ''
42
+ parser.parse fh do |part|
43
+ part.on(:data){|data| ret << data }
44
+ end
45
+
46
+ ret.must_equal part
47
+ end
48
+
49
+ it "calls the :end callback after the part is done" do
50
+ part = SecureRandom.random_bytes(1024) # random data
51
+ fh = StringIO.new generate_body(boundary, [part])
52
+ done = 0
53
+ parser.parse fh do |part|
54
+ part.on(:end){ done += 1 }
55
+ end
56
+
57
+ done.must_equal 1
58
+ end
59
+
60
+ it "calls callbacks after each part is done" do
61
+ part = SecureRandom.random_bytes(1024) # random data
62
+ fh = StringIO.new generate_body(boundary, [part, part])
63
+ headers, done = 0, 0
64
+ parser.parse fh do |part|
65
+ part.on(:headers){ headers += 1 }
66
+ part.on(:end){ done += 1 }
67
+ end
68
+
69
+ headers.must_equal 2
70
+ done.must_equal 2
71
+ end
72
+ end
73
+ end
data/spec/part_spec.rb ADDED
@@ -0,0 +1,82 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
4
+
5
+ describe Cae::MultipartParser::Part do
6
+ let(:part) { Cae::MultipartParser::Part.new }
7
+
8
+ describe '#parse_header_str' do
9
+ it "parses a simple input" do
10
+ input = "Content-Type: text/html"
11
+ part.parse_header_str(input).must_equal({
12
+ 'Content-Type' => 'text/html'
13
+ })
14
+ end
15
+
16
+ it "ignores trailing CRLF" do
17
+ input = "Content-Type: text/html\r\n"
18
+ part.parse_header_str(input).must_equal({
19
+ 'Content-Type' => 'text/html'
20
+ })
21
+ end
22
+
23
+ it "parses multiple lines" do
24
+ input = "Content-Type: text/html\r\nContent-Length: 100"
25
+ part.parse_header_str(input).must_equal({
26
+ 'Content-Type' => 'text/html',
27
+ 'Content-Length' => '100'
28
+ })
29
+ end
30
+
31
+ it "parses multiline headers" do
32
+ input = "Content-Type: multipart/form-data;\r\n\tboundary=foo"
33
+ part.parse_header_str(input).must_equal({
34
+ 'Content-Type' => 'multipart/form-data; boundary=foo'
35
+ })
36
+ end
37
+
38
+ it "parses multiple multiline headers" do
39
+ input = "Content-Type: multipart/form-data;\r\n\tboundary=foo\r\nContent-Type2: multipart/form-data;\r\n\tboundary=bar"
40
+ part.parse_header_str(input).must_equal({
41
+ 'Content-Type' => 'multipart/form-data; boundary=foo',
42
+ 'Content-Type2' => 'multipart/form-data; boundary=bar'
43
+ })
44
+ end
45
+ end
46
+
47
+ describe '#content_length' do
48
+ it "considers Content-Length to be zero if unset" do
49
+ input = "Content-Type: text/html"
50
+ part.parse_header_str(input)
51
+ part.content_length.must_equal 0
52
+ end
53
+
54
+ it "exposes Content-Length if present in headers" do
55
+ input = "Content-Type: text/html\r\nContent-Length: 100"
56
+ part.parse_header_str(input)
57
+ part.content_length.must_equal 100
58
+ end
59
+ end
60
+
61
+ describe "#callback" do
62
+ it "ignores an unregistered callback type" do
63
+ part.callback(:unregistered).must_equal nil
64
+ end
65
+
66
+ it "calls a registered callback type" do
67
+ foo = 0
68
+ cb = ->(arg){ foo = arg }
69
+ part.on(:registered, &cb)
70
+ part.callback(:registered, 1)
71
+ foo.must_equal 1
72
+ end
73
+
74
+ it "defaults to a nil arg" do
75
+ foo = true
76
+ cb = ->(arg){ foo = arg }
77
+ part.on(:registered, &cb)
78
+ part.callback(:registered)
79
+ foo.must_equal nil
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,25 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ $:.unshift(File.expand_path("../lib", File.dirname(__FILE__)))
4
+
5
+ require "rubygems"
6
+
7
+ gem 'minitest'
8
+ require "minitest/autorun"
9
+
10
+ require 'cae/multipart_parser'
11
+
12
+ require 'securerandom'
13
+ require 'net/http/post/multipart'
14
+
15
+ # create a multipart body out of the given array
16
+ def generate_body(boundary, arr)
17
+ parts = {}
18
+ arr.each_with_index do |part, idx|
19
+ fileno = "file#{idx}"
20
+ parts[fileno] = UploadIO.new(StringIO.new(part), fileno, 'application/binary')
21
+ end
22
+
23
+ req = Net::HTTP::Post::Multipart.new '/', parts, {}, boundary
24
+ req.body_stream.read
25
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cae-multipart_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Elsworth
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: multipart-post
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description:
42
+ email:
43
+ - chris@shagged.org
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - cae-multipart_parser.gemspec
54
+ - lib/cae/multipart_parser.rb
55
+ - lib/cae/multipart_parser/parser.rb
56
+ - lib/cae/multipart_parser/part.rb
57
+ - lib/cae/multipart_parser/version.rb
58
+ - spec/parser_spec.rb
59
+ - spec/part_spec.rb
60
+ - spec/spec_helper.rb
61
+ homepage: https://github.com/celsworth/cae-multipart_parser
62
+ licenses:
63
+ - MIT
64
+ metadata: {}
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubyforge_project:
81
+ rubygems_version: 2.4.5.1
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: Event-driven HTTP Multipart parser
85
+ test_files:
86
+ - spec/parser_spec.rb
87
+ - spec/part_spec.rb
88
+ - spec/spec_helper.rb
89
+ has_rdoc: