strelka 0.0.1.pre.265 → 0.0.1.pre.279

Sign up to get free protection for your applications and to get access to all the features.
@@ -165,7 +165,7 @@ module Strelka::App::RestResources
165
165
  ### OPTIONS /resources
166
166
  def add_options_handler( route, rsrcobj, options )
167
167
  # :TODO: Documentation for HTML mode (possibly using http://swagger.wordnik.com/)
168
- self.log.debug "Adding OPTIONS handler for %p" % [ route, rsrcobj ]
168
+ self.log.debug "Adding OPTIONS handler for %s (%p)" % [ route, rsrcobj ]
169
169
  self.add_route( :OPTIONS, route, options ) do |req|
170
170
  self.log.debug "OPTIONS handler!"
171
171
  verbs = self.class.resource_verbs[ route ].sort
@@ -167,7 +167,8 @@ module Strelka::App::Templating
167
167
  ### and return them in a hash keyed by name (Symbol).
168
168
  def load_template_map
169
169
  return self.class.template_map.inject( {} ) do |map, (name, path)|
170
- map[ name ] = Inversion::Template.load( path )
170
+ enc = Encoding.default_internal || Encoding::UTF_8
171
+ map[ name ] = Inversion::Template.load( path, encoding: enc )
171
172
  map
172
173
  end
173
174
  end
@@ -177,7 +178,8 @@ module Strelka::App::Templating
177
178
  ### If none was declared, returns +nil+.
178
179
  def load_layout_template
179
180
  return nil unless ( lt_path = self.class.layout_template )
180
- return Inversion::Template.load( lt_path )
181
+ enc = Encoding.default_internal || Encoding::UTF_8
182
+ return Inversion::Template.load( lt_path, encoding: enc )
181
183
  end
182
184
 
183
185
 
@@ -22,6 +22,9 @@ module Strelka
22
22
 
23
23
  end # class RequestError
24
24
 
25
+ # An exception that's raised when parsing of some structured data fails.
26
+ class ParseError < Error; end
27
+
25
28
  # An exception raised when there is a problem with an application plugin.
26
29
  class PluginError < Error; end
27
30
 
@@ -12,6 +12,7 @@ require 'strelka' unless defined?( Strelka )
12
12
  require 'strelka/httpresponse'
13
13
  require 'strelka/cookieset'
14
14
  require 'strelka/mixins'
15
+ require 'strelka/multipartparser'
15
16
 
16
17
  # An HTTP request class.
17
18
  class Strelka::HTTPRequest < Mongrel2::HTTPRequest
@@ -175,6 +176,9 @@ class Strelka::HTTPRequest < Mongrel2::HTTPRequest
175
176
  end
176
177
 
177
178
 
179
+ # multipart/form-data: http://tools.ietf.org/html/rfc2388
180
+ # Content-disposition header: http://tools.ietf.org/html/rfc2183
181
+
178
182
  ### Return a Hash of request form data.
179
183
  def parse_form_data
180
184
  unless self.headers.content_type
@@ -191,8 +195,13 @@ class Strelka::HTTPRequest < Mongrel2::HTTPRequest
191
195
  when 'text/x-yaml', 'application/x-yaml'
192
196
  return YAML.load( self.body )
193
197
  when 'multipart/form-data'
194
- raise NotImplementedError, "%p doesn't handle multipart form data yet" %
195
- [ self.class ]
198
+ boundary = self.headers.content_type[ /\bboundary=(\S+)/, 1 ] or
199
+ raise Strelka::ParseError, "no boundary found for form data: %p" %
200
+ [ self.headers.content_type ]
201
+ boundary = dequote( boundary )
202
+
203
+ parser = Strelka::MultipartParser.new( self.body, boundary )
204
+ return parser.parse
196
205
  else
197
206
  raise Strelka::Error, "don't know how to handle %p form data" %
198
207
  [ self.headers.content_type ]
@@ -204,6 +213,13 @@ class Strelka::HTTPRequest < Mongrel2::HTTPRequest
204
213
  private
205
214
  #######
206
215
 
216
+ ### Strip surrounding double quotes from a copy of the specified string
217
+ ### and return it.
218
+ def dequote( string )
219
+ return string[ /^"(?<quoted_string>(?:[^"]+|\\.)*)"/, :quoted_string ] || string.dup
220
+ end
221
+
222
+
207
223
  ### Return the given +enum+ containing query arguments (such as those returned from
208
224
  ### URI.decode_www_form) as a Hash, combining multiple values for the same key
209
225
  ### into an Array.
@@ -192,7 +192,7 @@ module Strelka
192
192
  return obj if obj.class.name == 'RSpec::Mocks::Mock'
193
193
 
194
194
  return case obj
195
- when NilClass, Numeric, TrueClass, FalseClass, Symbol
195
+ when NilClass, Numeric, TrueClass, FalseClass, Symbol, Module
196
196
  obj
197
197
 
198
198
  when Array
@@ -0,0 +1,358 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set nosta noet ts=4 sw=4:
3
+ # encoding: utf-8
4
+
5
+ require 'tmpdir'
6
+ require 'tempfile'
7
+ require 'pathname'
8
+ require 'stringio'
9
+
10
+ require 'strelka' unless defined?( Strelka )
11
+
12
+ # A parser for extracting uploaded files and parameters from the body of a
13
+ # multipart/form-data request.
14
+ #
15
+ # == Synopsis
16
+ #
17
+ # require 'strelka/multipartmimeparser'
18
+ #
19
+ # parser = Strelka::MultipartMimeParser.new
20
+ # files, params = parser.parse( io, '---boundary' )
21
+ #
22
+ # == Authors
23
+ #
24
+ # * Michael Granger <ged@FaerieMUD.org>
25
+ # * Mahlon E. Smith <mahlon@martini.nu>
26
+ #
27
+ ### A class for parsing multipart mime documents from a stream.
28
+ class Strelka::MultipartParser
29
+ extend Loggability,
30
+ Strelka::MethodUtilities
31
+ include Configurability
32
+
33
+ # Loggability API -- log to Strelka's logger
34
+ log_to :strelka
35
+
36
+ # Configurability API -- use the 'multipartparser' section of the config
37
+ config_key :multipartparser
38
+
39
+
40
+ # Line-ending regexp. Supports UNIX line-endings for testing.
41
+ CRLF_REGEXP = /\r?\n/
42
+
43
+ # Pattern for matching a blank line
44
+ BLANK_LINE_REGEXP = /#{CRLF_REGEXP}{2}/
45
+
46
+ # Line-ending for RFC5322 header fields; EOL *not* followed by a WSP char
47
+ HEADER_FIELD_EOL = /#{CRLF_REGEXP}(?!\x32|\x09)/
48
+
49
+ # Configurability API -- configuration defaults
50
+ CONFIG_DEFAULTS = {
51
+ bufsize: 524288,
52
+ spooldir: Pathname( Dir.tmpdir ) + 'strelka-mimeparts',
53
+ }
54
+
55
+ # A mixin that extends the IO objects for uploaded files.
56
+ module FileInputField
57
+
58
+ attr_accessor :content_type, :content_length, :filename
59
+
60
+ end # module FileInputField
61
+
62
+ ##
63
+ # The configured buffer size to use when parsing
64
+ singleton_attr_accessor :bufsize
65
+
66
+ ##
67
+ # The configured spool directory for storing attachments
68
+ singleton_attr_accessor :spooldir
69
+
70
+
71
+ ### Configurability API -- configure the parser with the 'mimeparser' section
72
+ ### of the config:
73
+ ###
74
+ ### bufsize:: the size of the buffer (in bytes) to use when reading the
75
+ ### document. Larger sizes use more heap, but are faster.
76
+ ### spooldir:: the directory to spool file upload parts to.
77
+ def self::configure( options=nil )
78
+ if options
79
+ self.log.debug "Configuring the %p: %p" % [ self, options ]
80
+ self.bufsize = Integer( options[:bufsize] ) || CONFIG_DEFAULTS[:bufsize]
81
+ self.spooldir = Pathname( options[:spooldir] ) || CONFIG_DEFAULTS[:spooldir]
82
+ else
83
+ self.log.debug "Configuring %p with defaults" % [ self, CONFIG_DEFAULTS ]
84
+ self.bufsize = CONFIG_DEFAULTS[:bufsize]
85
+ self.spooldir = CONFIG_DEFAULTS[:spooldir]
86
+ end
87
+ end
88
+
89
+
90
+ ### Create a new Strelka::MultipartMimeParser
91
+ def initialize( io, boundary )
92
+ io = StringIO.new( io ) unless io.respond_to?( :read )
93
+ boundary = '--' + boundary # unless boundary.start_with?( '--' )
94
+
95
+ @bufsize = self.class.bufsize || CONFIG_DEFAULTS[:bufsize]
96
+ @spooldir = self.class.spooldir || CONFIG_DEFAULTS[:spooldir]
97
+ @io = io
98
+ @boundary = boundary
99
+ @fields = {}
100
+ @buffer = ''
101
+
102
+ # Ensure that the buffer can contain at least a whole boundary,
103
+ # otherwise we can't scan for it.
104
+ @bufsize = @boundary.bytesize * 1.5 if @bufsize < @boundary.bytesize * 1.5
105
+ @spooldir.mkpath
106
+ end
107
+
108
+
109
+ ######
110
+ public
111
+ ######
112
+
113
+ # Parsed form fields
114
+ attr_reader :fields
115
+
116
+ # The current buffer for unparsed data
117
+ attr_reader :buffer
118
+
119
+
120
+ ### Parse the form data from the IO and return it as a Hash.
121
+ def parse
122
+ self.log.debug "Starting parse: %p" % [ self ]
123
+
124
+ # Strip off the initial boundary
125
+ self.strip_boundary or
126
+ raise Strelka::ParseError, "No initial boundary"
127
+
128
+ # Now scan until we see the ending boundary (the one with the trailing '--')
129
+ begin
130
+ key, val = self.scan_part
131
+ end until @buffer.start_with?( '--' )
132
+
133
+ self.log.debug "Finished parse. %d fields" % [ self.fields.length ]
134
+ return self.fields
135
+ end
136
+
137
+
138
+
139
+ #########
140
+ protected
141
+ #########
142
+
143
+ ### Scan a part from the buffer.
144
+ def scan_part
145
+ headers = self.scan_headers
146
+ disposition = headers['content-disposition']
147
+
148
+ raise UnimplementedError, "don't know what to do with %p parts" % [ disposition ] unless
149
+ disposition.start_with?( 'form-data' )
150
+ key = disposition[ /\bname="(\S+)"/i, 1 ] or
151
+ raise Strelka::ParseError, "no field name: %p" % [ disposition ]
152
+ val = nil
153
+
154
+ # :TODO: Support for content-type and content-transfer-encoding headers for parts.
155
+
156
+ # If it's a file, spool it out to a tempfile
157
+ if disposition =~ /\bfilename=/i
158
+ file = disposition[ /\bfilename="(?:.*\\)?(.+?)"/, 1 ] or return nil
159
+ self.log.debug "Parsing an uploaded file %p (%p)" % [ key, file ]
160
+ val = self.scan_file_field( file, headers )
161
+
162
+ # otherwise just read it as a regular parameter
163
+ else
164
+ self.log.debug "Parsing a form parameter (%p)" % [ key ]
165
+ val = self.scan_regular_field( key )
166
+ end
167
+
168
+ # Convert the value to an Array if there are more than one
169
+ if @fields.key?( key )
170
+ @fields[ key ] = [ @fields[key] ] unless @fields[ key ].is_a?( Array )
171
+ @fields[ key ] << val
172
+ else
173
+ @fields[ key ] = val
174
+ end
175
+
176
+ self.strip_boundary
177
+ end
178
+
179
+
180
+ ### Scan the buffer for MIME headers and return them as a Hash.
181
+ def scan_headers
182
+ headerlines = ''
183
+
184
+ @buffer.slice!( /^#{CRLF_REGEXP}/ )
185
+
186
+ # Find the headers
187
+ while headerlines.empty?
188
+ if pos = @buffer.index( BLANK_LINE_REGEXP )
189
+ headerlines = @buffer.slice!( 0, pos )
190
+ else
191
+ self.log.debug "Couldn't find a blank line in the first %d bytes (%p)" %
192
+ [ @buffer.bytesize, @buffer[0..100] ]
193
+ self.read_at_least( @bufsize ) or
194
+ raise Strelka::ParseError, "EOF while searching for headers"
195
+ end
196
+ end
197
+
198
+ # put headers into a hash
199
+ headers = headerlines.strip.split( HEADER_FIELD_EOL ).inject({}) {|hash, line|
200
+ line.gsub!( CRLF_REGEXP, '' ) # Un-fold long headers
201
+ key, val = line.split( /:\s*/, 2 )
202
+ hash[ key.downcase ] = val
203
+ hash
204
+ }
205
+ self.log.debug "Scanned headers: %p" % [headers]
206
+
207
+ # remove headers from parse buffer
208
+ @buffer.slice!( /^#{BLANK_LINE_REGEXP}/ )
209
+
210
+ return headers
211
+ end
212
+
213
+
214
+ ### Scan the value after the scan pointer for the specified metadata
215
+ ### +parameter+.
216
+ def scan_regular_field( key )
217
+ param = ''
218
+
219
+ self.log.debug "Scanning form parameter: %p" % [key]
220
+ while param.empty?
221
+ if start = @buffer.index( @boundary )
222
+ self.log.debug "Found the end of the parameter."
223
+ param = @buffer.slice!( 0, start )
224
+ else
225
+ self.read_some_more or raise Strelka::ParseError,
226
+ "EOF while scanning a form parameter"
227
+ end
228
+ end
229
+
230
+ return param.chomp
231
+ end
232
+
233
+
234
+ ### Scan the body of the current document part, spooling the data to a tempfile
235
+ ### on disk and returning the resulting filehandle.
236
+ def scan_file_field( filename, headers )
237
+ self.log.info "Parsing file '%s'" % [ filename ]
238
+
239
+ io, size = self.spool_file_upload
240
+
241
+ io.extend( FileInputField )
242
+ io.filename = filename
243
+ io.content_type = headers['content-type']
244
+ io.content_length = size
245
+
246
+ self.log.debug "Scanned file %p to: %s (%d bytes)" % [ io.filename, io.path, size ]
247
+ return io
248
+ end
249
+
250
+
251
+ ### Scan the file data and metadata in the given +scannner+, spooling the file
252
+ ### data into a temporary file. Returns the tempfile object and a hash of
253
+ ### metadata.
254
+ def spool_file_upload
255
+ self.log.debug "Spooling file from upload"
256
+ tmpfile = Tempfile.open( 'filedata', @spooldir.to_s, encoding: 'ascii-8bit' )
257
+ size = 0
258
+
259
+ # :TODO: Use mmap(2) to map the resulting IOs from mongrel's spool file
260
+ # rather than writing them all out to disk a second time.
261
+ until tmpfile.closed?
262
+
263
+ # look for end, store everything until boundary
264
+ if start = @buffer.index( @boundary )
265
+ self.log.debug "Found the end of the file"
266
+ leavings = @buffer.slice!( 0, start )
267
+ leavings.slice!( -2, 2 ) # trailing CRLF
268
+ tmpfile.write( leavings )
269
+ size += leavings.length
270
+ tmpfile.close
271
+
272
+ # not at the end yet, buffer this chunker to disk
273
+ elsif @buffer.bytesize >= @bufsize
274
+ # make sure we're never writing a portion of the boundary
275
+ # out while we're buffering
276
+ buf = @buffer.slice!( 0, @buffer.bytesize - @bufsize )
277
+ # self.log.debug " writing %d bytes" % [ buf.bytesize ]
278
+ tmpfile.print( buf )
279
+ size += buf.bytesize
280
+ end
281
+
282
+ # put some more data into the buffer
283
+ unless tmpfile.closed?
284
+ self.read_some_more or
285
+ raise Strelka::ParseError, "EOF while spooling file upload"
286
+ end
287
+ end
288
+
289
+ return tmpfile, size
290
+ end
291
+
292
+
293
+ ### Strip data from the head of the buffer that matches +pat+, returning it
294
+ ### if successful, or returning +nil+ if not. The matched data should fit within
295
+ ### the parser's chunk size.
296
+ def strip( pat )
297
+ self.read_chunk
298
+ return nil unless @buffer.index( pat ) == 0
299
+ @buffer.slice!( pat )
300
+ end
301
+
302
+
303
+ ### Strip the boundary that's at the front of the buffer, reading more
304
+ ### data into it as necessary. Returns the boundary if successful, or +nil+ if
305
+ ### there wasn't a boundary in the buffer.
306
+ def strip_boundary
307
+ self.log.debug "Stripping boundary:\n%p at:\n%p" % [ @boundary, @buffer[0,40] ]
308
+ self.strip( @boundary )
309
+ end
310
+
311
+
312
+ ### Read data from the state's IO until the buffer contains at least the number
313
+ ### of bytes in the chunksize, or the IO is at EOF.
314
+ def read_chunk
315
+ # self.log.debug "Reading a new chunk."
316
+ self.read_at_least( @bufsize )
317
+ # self.log.debug " buffer is now: %p" % [ @buffer ]
318
+ end
319
+
320
+
321
+ ### Read at least +bytecount+ bytes from the io, appending the data onto the
322
+ ### buffer.
323
+ def read_at_least( bytecount )
324
+ # self.log.debug "Reading at least %d bytes from %p." % [ bytecount, @io ]
325
+
326
+ if @io.eof?
327
+ # self.log.debug " input stream at EOF. Returning."
328
+ return false
329
+ end
330
+
331
+ self.read_some_more until
332
+ @buffer.bytesize >= bytecount || @io.eof?
333
+
334
+ return true
335
+ end
336
+
337
+
338
+ ### Try to read another chunk of data into the buffer of the given +state+,
339
+ ### returning true unless the state's IO is at eof.
340
+ def read_some_more
341
+ # self.log.debug "Reading more data from %p..." % [ @io ]
342
+ return false if @io.eof?
343
+ startsize = @buffer.bytesize
344
+
345
+ @buffer << @io.read( @bufsize )
346
+ # self.log.debug " after reading, buffer has %d bytes." % [ @buffer.bytesize ]
347
+
348
+ until @buffer.bytesize > startsize
349
+ return false if @io.eof?
350
+ Thread.pass
351
+ @buffer << @io.read( @bufsize )
352
+ end
353
+
354
+ return true
355
+ end
356
+
357
+ end # class Strelka::MultipartParser
358
+