cae-multipart_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b40160cadbf8cc8a3f697358faf9fbc86189f69a
4
+ data.tar.gz: 9c53814388b57e07c4fe9067f1bcac1c642d5498
5
+ SHA512:
6
+ metadata.gz: 01ed9067615cf6c444f92558b6d6232ff3b431afbd09264fbc5348f1a83183cb7e8c20d3b095f0e825fad8c1c788c12b19719cf554fa57232227693e3181c47b
7
+ data.tar.gz: 9cd5bf6885f0a60098497089797c9dc0345279a2458d69b8f11e198662f70d084af9a8bd9ca522a73e35e63881db17f38ee462564710050fd535fac6e8905d88
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ *.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cae-logger.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Chris Elsworth
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # Event-driven HTTP Multipart Parser
2
+
3
+ This is based on https://github.com/danabr/multipart-parser, with
4
+ modifications to suit my use-case.
5
+
6
+ It currently depends on finding a `Content-Length` part header to avoid having to scan the entire body. It will raise `Cae::MultipartParser::Parser::ContentLengthUnsetError` if this header is not present.
7
+
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ parser = Cae::MultipartParser::Parser.new(boundary: boundary)
13
+
14
+ parser.parse fh do |part|
15
+ part.on(:headers) do |headers|
16
+ # headers is a Hash
17
+ end
18
+ part.on(:data) do |data|
19
+ # data is a chunk of body data
20
+ # this may be called multiple times
21
+ end
22
+ part.on(:end) do
23
+ # part is finished, there will be no more data callbacks
24
+ end
25
+ end
26
+ ```
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ ### Specs
2
+
3
+ spec = proc do |env|
4
+ env.each{|k,v| ENV[k] = v}
5
+ sh "#{FileUtils::RUBY} -rubygems -I lib -e 'ARGV.each{|f| require f}' ./spec/*_spec.rb"
6
+ env.each{|k,v| ENV.delete(k) }
7
+ end
8
+
9
+ desc "Run specs"
10
+ task "spec" do
11
+ spec.call({})
12
+ end
13
+
14
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cae/multipart_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cae-multipart_parser"
8
+ spec.version = Cae::MultipartParser::VERSION
9
+ spec.authors = ["Chris Elsworth"]
10
+ spec.email = ["chris@shagged.org"]
11
+ spec.summary = "Event-driven HTTP Multipart parser"
12
+ spec.homepage = "https://github.com/celsworth/cae-multipart_parser"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "multipart-post"
21
+ spec.add_development_dependency "minitest"
22
+ end
@@ -0,0 +1,165 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ module Cae
4
+ module MultipartParser
5
+ class Parser
6
+
7
+ CR = "\r".freeze
8
+ LF = "\n".freeze
9
+ DASH = "-".freeze
10
+
11
+ BOUNDARY_PREFIX = (CR + LF + DASH + DASH).freeze
12
+
13
+ ContentLengthUnsetError = Class.new(StandardError)
14
+
15
+ attr_accessor :read_buffer_size
16
+
17
+ def initialize(opts = {})
18
+ # remember offsets into our state in between calls
19
+ @index = 0
20
+
21
+ @state = :start
22
+
23
+ # default 2MB read buffer
24
+ @read_buffer_size = 2 * 1024 * 1024
25
+
26
+ @boundary = BOUNDARY_PREFIX + opts[:boundary]
27
+ @boundary_length = @boundary.length
28
+ end
29
+
30
+ # Parse data from the IO +io+, calling callbacks appropriately.
31
+ #
32
+ # @return [Integer] the number of bytes parsed.
33
+ def parse(io)
34
+ parsed = 0
35
+
36
+ buffer = String.new
37
+ while io.read(@read_buffer_size, buffer)
38
+ length = buffer.length
39
+ i = 0
40
+ data_start = 0
41
+
42
+ #p "parsing #{length} chars: #{buffer}"
43
+
44
+ while i < length
45
+ c = buffer[i]
46
+
47
+ #p "state=#{@state}: i=#{i}. index=#{@index}, d_start=#{data_start} chars=#{buffer[i, 50]}"
48
+
49
+ case @state
50
+ when :start
51
+ if @index == @boundary_length - 2
52
+ break unless c == CR
53
+ @index += 1
54
+ elsif @index == @boundary_length - 1
55
+ break unless c == LF
56
+ # reached end of boundary, we're into the first part
57
+ @state = :headers_start
58
+ else
59
+ # there is no leading \r\n on the first boundary, hence index+2
60
+ break unless c == @boundary[@index + 2] # Unexpected character
61
+ @index += 1
62
+ end
63
+
64
+ when :headers_start
65
+ @part = Part.new
66
+ @state = :headers
67
+ @index = 0
68
+ @headers = ''
69
+ next # keep i pointing at current char for :headers
70
+
71
+ when :headers
72
+ if (c == CR && @index == 0) || (c == LF && @index == 1)
73
+ # keep \r\n to split on later
74
+ @headers << c
75
+ @index += 1
76
+ elsif c == CR && @index == 2
77
+ # don't keep final \r, update state to check for final \n
78
+ @state = :headers_almost_done
79
+ else
80
+ # normal header char, reset index and keep char
81
+ @headers << c
82
+ @index = 0
83
+ end
84
+
85
+ when :headers_almost_done
86
+ break unless c == LF # Unexpected character
87
+ @state = :part_start
88
+
89
+ when :part_start
90
+ # this must populate #content_length
91
+ @part.parse_header_str @headers
92
+
93
+ @part_data_remaining = @part.content_length
94
+ raise ContentLengthUnsetError if @part_data_remaining == 0
95
+
96
+ # allow caller to setup callbacks
97
+ yield @part
98
+
99
+ @part.callback :headers, @part.headers
100
+
101
+ data_start = i
102
+ @index = 0
103
+ @state = :part_data
104
+ next # keep i pointing at current char for :part_data
105
+
106
+ when :part_data
107
+ chunk_remaining = length - data_start
108
+ cb_len = @part_data_remaining > chunk_remaining ? chunk_remaining : @part_data_remaining
109
+
110
+ @part.callback :data, buffer[data_start, cb_len]
111
+
112
+ @part_data_remaining -= cb_len
113
+ i += cb_len
114
+
115
+ @state = :boundary if @part_data_remaining == 0
116
+ next # we've bumped i already, don't increment it
117
+
118
+ when :boundary
119
+ break unless c == @boundary[@index] # unexpected character
120
+ @index += 1
121
+ @state = :boundary_almost_done if @index == @boundary_length
122
+
123
+ when :boundary_almost_done
124
+ # work out whether this is a part boundary or the final boundary
125
+ if c == CR
126
+ @state = :boundary_part_almost_done
127
+ elsif c == DASH
128
+ @state = :boundary_last_almost_done
129
+ else
130
+ break # unexpected character, this isn't a boundary after all
131
+ end
132
+ @index += 1
133
+
134
+ when :boundary_part_almost_done
135
+ # final character of an inter-part boundary must be LF
136
+ break unless c == LF # unexpected character
137
+ @part.callback :end
138
+ @state = :headers_start
139
+
140
+ when :boundary_last_almost_done
141
+ # final character of final boundary must be -
142
+ break unless c == DASH # unexpected character
143
+ @part.callback :end
144
+ @state = :end
145
+
146
+ end # case
147
+
148
+ i += 1
149
+
150
+ end # while
151
+
152
+ if i != length
153
+ raise "unexpected char at #{i} (#{buffer[i].inspect})"
154
+ end
155
+
156
+ parsed += length
157
+
158
+ end # while
159
+
160
+ parsed
161
+ end
162
+
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,44 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+ module Cae
3
+ module MultipartParser
4
+ class Part
5
+
6
+ attr_reader :headers
7
+
8
+ def initialize
9
+ @callbacks = {}
10
+ @headers = {}
11
+ end
12
+
13
+ def parse_header_str(str)
14
+ # munge multiline headers back into one line.
15
+ str = str.gsub /\r\n\s+/, ' '
16
+
17
+ # split header string into a hash
18
+ str.split(/\r\n/).each do |h|
19
+ key, value = h.split ':'
20
+
21
+ # normalize content-length -> Content-Length
22
+ key = key.split('-').map(&:capitalize).join('-')
23
+
24
+ @headers[key] = value.lstrip
25
+ end
26
+ @headers
27
+ end
28
+
29
+ def content_length
30
+ @headers['Content-Length'].to_i
31
+ end
32
+
33
+ def on(event, &callback)
34
+ @callbacks[event] = callback
35
+ end
36
+
37
+ # Parser will call :headers, :data, :end
38
+ def callback(event, arg = nil)
39
+ @callbacks[event].call(arg) if @callbacks.has_key?(event)
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,5 @@
1
+ module Cae
2
+ module MultipartParser
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
@@ -0,0 +1,2 @@
1
+ require 'cae/multipart_parser/parser'
2
+ require 'cae/multipart_parser/part'
@@ -0,0 +1,73 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
4
+
5
+ describe Cae::MultipartParser::Parser do
6
+ let(:boundary) do
7
+ # create a random boundary
8
+ ("-" * 24) + SecureRandom.random_bytes(8).unpack('H*').first
9
+ end
10
+
11
+ let(:parser) do
12
+ Cae::MultipartParser::Parser.new(boundary: boundary)
13
+ end
14
+
15
+ describe "#parse" do
16
+ it "returns the number of bytes parsed" do
17
+ part = SecureRandom.random_bytes(1024) # random data
18
+ body = generate_body(boundary, [part])
19
+ fh = StringIO.new body
20
+
21
+ r = parser.parse fh do |part|
22
+ end
23
+
24
+ r.must_equal body.length
25
+ end
26
+
27
+ it "calls the :headers callback with a hash" do
28
+ part = SecureRandom.random_bytes(1024) # random data
29
+ fh = StringIO.new generate_body(boundary, [part])
30
+
31
+ headers = nil
32
+ parser.parse fh do |part|
33
+ part.on(:headers){|h| headers = h }
34
+ end
35
+ headers.must_be_kind_of Hash
36
+ end
37
+
38
+ it "calls the :data callback with the original data" do
39
+ part = SecureRandom.random_bytes(1024 * 1024) # 1MB of random data
40
+ fh = StringIO.new generate_body(boundary, [part])
41
+ ret = ''
42
+ parser.parse fh do |part|
43
+ part.on(:data){|data| ret << data }
44
+ end
45
+
46
+ ret.must_equal part
47
+ end
48
+
49
+ it "calls the :end callback after the part is done" do
50
+ part = SecureRandom.random_bytes(1024) # random data
51
+ fh = StringIO.new generate_body(boundary, [part])
52
+ done = 0
53
+ parser.parse fh do |part|
54
+ part.on(:end){ done += 1 }
55
+ end
56
+
57
+ done.must_equal 1
58
+ end
59
+
60
+ it "calls callbacks after each part is done" do
61
+ part = SecureRandom.random_bytes(1024) # random data
62
+ fh = StringIO.new generate_body(boundary, [part, part])
63
+ headers, done = 0, 0
64
+ parser.parse fh do |part|
65
+ part.on(:headers){ headers += 1 }
66
+ part.on(:end){ done += 1 }
67
+ end
68
+
69
+ headers.must_equal 2
70
+ done.must_equal 2
71
+ end
72
+ end
73
+ end
data/spec/part_spec.rb ADDED
@@ -0,0 +1,82 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ require File.expand_path("spec_helper", File.dirname(__FILE__))
4
+
5
+ describe Cae::MultipartParser::Part do
6
+ let(:part) { Cae::MultipartParser::Part.new }
7
+
8
+ describe '#parse_header_str' do
9
+ it "parses a simple input" do
10
+ input = "Content-Type: text/html"
11
+ part.parse_header_str(input).must_equal({
12
+ 'Content-Type' => 'text/html'
13
+ })
14
+ end
15
+
16
+ it "ignores trailing CRLF" do
17
+ input = "Content-Type: text/html\r\n"
18
+ part.parse_header_str(input).must_equal({
19
+ 'Content-Type' => 'text/html'
20
+ })
21
+ end
22
+
23
+ it "parses multiple lines" do
24
+ input = "Content-Type: text/html\r\nContent-Length: 100"
25
+ part.parse_header_str(input).must_equal({
26
+ 'Content-Type' => 'text/html',
27
+ 'Content-Length' => '100'
28
+ })
29
+ end
30
+
31
+ it "parses multiline headers" do
32
+ input = "Content-Type: multipart/form-data;\r\n\tboundary=foo"
33
+ part.parse_header_str(input).must_equal({
34
+ 'Content-Type' => 'multipart/form-data; boundary=foo'
35
+ })
36
+ end
37
+
38
+ it "parses multiple multiline headers" do
39
+ input = "Content-Type: multipart/form-data;\r\n\tboundary=foo\r\nContent-Type2: multipart/form-data;\r\n\tboundary=bar"
40
+ part.parse_header_str(input).must_equal({
41
+ 'Content-Type' => 'multipart/form-data; boundary=foo',
42
+ 'Content-Type2' => 'multipart/form-data; boundary=bar'
43
+ })
44
+ end
45
+ end
46
+
47
+ describe '#content_length' do
48
+ it "considers Content-Length to be zero if unset" do
49
+ input = "Content-Type: text/html"
50
+ part.parse_header_str(input)
51
+ part.content_length.must_equal 0
52
+ end
53
+
54
+ it "exposes Content-Length if present in headers" do
55
+ input = "Content-Type: text/html\r\nContent-Length: 100"
56
+ part.parse_header_str(input)
57
+ part.content_length.must_equal 100
58
+ end
59
+ end
60
+
61
+ describe "#callback" do
62
+ it "ignores an unregistered callback type" do
63
+ part.callback(:unregistered).must_equal nil
64
+ end
65
+
66
+ it "calls a registered callback type" do
67
+ foo = 0
68
+ cb = ->(arg){ foo = arg }
69
+ part.on(:registered, &cb)
70
+ part.callback(:registered, 1)
71
+ foo.must_equal 1
72
+ end
73
+
74
+ it "defaults to a nil arg" do
75
+ foo = true
76
+ cb = ->(arg){ foo = arg }
77
+ part.on(:registered, &cb)
78
+ part.callback(:registered)
79
+ foo.must_equal nil
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,25 @@
1
+ # vim: et sw=2 ts=2 sts=2
2
+
3
+ $:.unshift(File.expand_path("../lib", File.dirname(__FILE__)))
4
+
5
+ require "rubygems"
6
+
7
+ gem 'minitest'
8
+ require "minitest/autorun"
9
+
10
+ require 'cae/multipart_parser'
11
+
12
+ require 'securerandom'
13
+ require 'net/http/post/multipart'
14
+
15
+ # create a multipart body out of the given array
16
+ def generate_body(boundary, arr)
17
+ parts = {}
18
+ arr.each_with_index do |part, idx|
19
+ fileno = "file#{idx}"
20
+ parts[fileno] = UploadIO.new(StringIO.new(part), fileno, 'application/binary')
21
+ end
22
+
23
+ req = Net::HTTP::Post::Multipart.new '/', parts, {}, boundary
24
+ req.body_stream.read
25
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cae-multipart_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Elsworth
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: multipart-post
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description:
42
+ email:
43
+ - chris@shagged.org
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - cae-multipart_parser.gemspec
54
+ - lib/cae/multipart_parser.rb
55
+ - lib/cae/multipart_parser/parser.rb
56
+ - lib/cae/multipart_parser/part.rb
57
+ - lib/cae/multipart_parser/version.rb
58
+ - spec/parser_spec.rb
59
+ - spec/part_spec.rb
60
+ - spec/spec_helper.rb
61
+ homepage: https://github.com/celsworth/cae-multipart_parser
62
+ licenses:
63
+ - MIT
64
+ metadata: {}
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubyforge_project:
81
+ rubygems_version: 2.4.5.1
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: Event-driven HTTP Multipart parser
85
+ test_files:
86
+ - spec/parser_spec.rb
87
+ - spec/part_spec.rb
88
+ - spec/spec_helper.rb
89
+ has_rdoc: