lifter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,111 @@
1
+ require 'zlib'
2
+
3
+ module Lifter
4
+ class ThreadPool
5
+ def initialize(pool_size)
6
+ @pool_size = pool_size
7
+
8
+ @monitor = Monitor.new
9
+
10
+ @queues = {}
11
+ @workers = {}
12
+
13
+ @pending = {}
14
+ @cleared = []
15
+
16
+ spawn_workers
17
+ end
18
+
19
+ # Add a job closure to the thread pool, tagged with a given job_tag to allow for consistent
20
+ # execution ordering.
21
+ def push(job_tag, &job)
22
+ job_tag = job_tag.to_s
23
+
24
+ raise ArgumentError.new('job_tag must be defined') if job_tag.empty?
25
+
26
+ job_hash = Zlib.crc32(job_tag)
27
+ worker_id = job_hash % @pool_size
28
+
29
+ queue = @queues[worker_id]
30
+ queue.push([job_tag, job])
31
+
32
+ add_pending(job_tag)
33
+ end
34
+
35
+ # For a given job_tag, prevents any future pending jobs from running.
36
+ def clear(job_tag)
37
+ @monitor.synchronize do
38
+ @cleared << job_tag if !@cleared.include?(job_tag)
39
+ end
40
+ end
41
+
42
+ private def cleared?(job_tag)
43
+ cleared = false
44
+
45
+ @monitor.synchronize do
46
+ cleared = @cleared.include?(job_tag)
47
+ @cleared.delete(job_tag) if count_pending(job_tag) == 0
48
+ end
49
+
50
+ cleared
51
+ end
52
+
53
+ private def add_pending(job_tag)
54
+ @monitor.synchronize do
55
+ count = count_pending(job_tag)
56
+ @pending[job_tag] = count + 1
57
+ end
58
+ end
59
+
60
+ private def count_pending(job_tag)
61
+ count = 0
62
+
63
+ @monitor.synchronize do
64
+ count = @pending[job_tag] || 0
65
+ end
66
+
67
+ count
68
+ end
69
+
70
+ private def remove_pending(job_tag)
71
+ @monitor.synchronize do
72
+ count = count_pending(job_tag)
73
+
74
+ if count == 0
75
+ @pending.delete(job_tag)
76
+ else
77
+ @pending[job_tag] = count - 1
78
+ end
79
+ end
80
+ end
81
+
82
+ private def spawn_workers
83
+ (0...@pool_size).each do |worker_id|
84
+ queue = Queue.new
85
+
86
+ @queues[worker_id] = queue
87
+
88
+ worker = Thread.new do
89
+ loop do
90
+ job_tag, job = queue.pop
91
+ remove_pending(job_tag)
92
+
93
+ next if cleared?(job_tag)
94
+
95
+ begin
96
+ job.call
97
+ rescue StandardError => e
98
+ puts e.to_s
99
+ puts e.backtrace
100
+ exit
101
+ add_pending(job_tag)
102
+ queue.push([job_tag, job])
103
+ end
104
+ end
105
+ end
106
+
107
+ @workers[worker_id] = worker
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,3 @@
1
+ module Lifter
2
+ VERSION = "0.1.0".freeze
3
+ end
@@ -0,0 +1,41 @@
1
+ require 'http'
2
+
3
+ module Lifter
4
+ class Webhook
5
+ attr_reader :url, :method, :headers, :params
6
+
7
+ def initialize(endpoint)
8
+ @url = endpoint.url
9
+ @method = endpoint.method
10
+ @headers = {}
11
+ @params = {}
12
+ end
13
+
14
+ def headers=(headers)
15
+ @headers = headers
16
+ end
17
+
18
+ def params=(params)
19
+ @params = params
20
+ end
21
+
22
+ def on_failure(&block)
23
+ @on_failure = block
24
+ end
25
+
26
+ def deliver
27
+ http_stub = HTTP.headers(@headers)
28
+
29
+ case @method.to_sym
30
+ when :get
31
+ http_stub.get(@url, params: @params)
32
+ when :post
33
+ http_stub.post(@url, form: @params)
34
+ when :put
35
+ http_stub.put(@url, form: @params)
36
+ else
37
+ raise StandardError.new('unsupported http method in webhook')
38
+ end
39
+ end
40
+ end
41
+ end
data/lib/lifter.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'multipart_parser/parser'
2
+ require 'multipart_parser/reader'
3
+
4
+ require 'lifter/thread_pool'
5
+ require 'lifter/file_pool'
6
+ require 'lifter/webhook'
7
+ require 'lifter/config'
8
+ require 'lifter/connection'
9
+ require 'lifter/payloads/multipart_payload'
10
+ require 'lifter/payloads/inline_payload'
11
+ require 'lifter/file_upload'
12
+ require 'lifter/file_manager'
13
+ require 'lifter/server'
14
+ require 'lifter/version'
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011,2012 Daniel Abrahamsson
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,246 @@
1
+ module MultipartParser
2
+ # A low level parser for multipart messages,
3
+ # based on the node-formidable parser.
4
+ class Parser
5
+
6
+ def initialize
7
+ @boundary = nil
8
+ @boundary_chars = nil
9
+ @lookbehind = nil
10
+ @state = :parser_uninitialized
11
+ @index = 0 # Index into boundary or header
12
+ @flags = {}
13
+ @marks = {} # Keep track of different parts
14
+ @callbacks = {}
15
+ end
16
+
17
+ # Initializes the parser, using the given boundary
18
+ def init_with_boundary(boundary)
19
+ @boundary = "\r\n--" + boundary
20
+ @lookbehind = "\0"*(@boundary.length + 8)
21
+ @state = :start
22
+
23
+ @boundary_chars = {}
24
+ @boundary.each_byte do |b|
25
+ @boundary_chars[b.chr] = true
26
+ end
27
+ end
28
+
29
+ # Registers a callback to be called when the
30
+ # given event occurs. Each callback is expected to
31
+ # take three parameters: buffer, start_index, and end_index.
32
+ # All of these parameters may be null, depending on the callback.
33
+ # Valid callbacks are:
34
+ # :end
35
+ # :header_field
36
+ # :header_value
37
+ # :header_end
38
+ # :headers_end
39
+ # :part_begin
40
+ # :part_data
41
+ # :part_end
42
+ def on(event, &callback)
43
+ @callbacks[event] = callback
44
+ end
45
+
46
+ # Writes data to the parser.
47
+ # Returns the number of bytes parsed.
48
+ # In practise, this means that if the return value
49
+ # is less than the buffer length, a parse error occured.
50
+ def write(buffer)
51
+ i = 0
52
+ buffer_length = buffer.length
53
+ index = @index
54
+ flags = @flags.dup
55
+ state = @state
56
+ lookbehind = @lookbehind
57
+ boundary = @boundary
58
+ boundary_chars = @boundary_chars
59
+ boundary_length = @boundary.length
60
+ boundary_end = boundary_length - 1
61
+
62
+ while i < buffer_length
63
+ c = buffer[i, 1]
64
+ case state
65
+ when :parser_uninitialized
66
+ return i;
67
+ when :start
68
+ index = 0;
69
+ state = :start_boundary
70
+ when :start_boundary # Differs in that it has no preceeding \r\n
71
+ if index == boundary_length - 2
72
+ return i unless c == "\r"
73
+ index += 1
74
+ elsif index - 1 == boundary_length - 2
75
+ return i unless c == "\n"
76
+ # Boundary read successfully, begin next part
77
+ callback(:part_begin)
78
+ state = :header_field_start
79
+ else
80
+ return i unless c == boundary[index+2, 1] # Unexpected character
81
+ index += 1
82
+ end
83
+ i += 1
84
+ when :header_field_start
85
+ state = :header_field
86
+ @marks[:header_field] = i
87
+ index = 0
88
+ when :header_field
89
+ if c == "\r"
90
+ @marks.delete :header_field
91
+ state = :headers_almost_done
92
+ else
93
+ index += 1
94
+ unless c == "-" # Skip hyphens
95
+ if c == ":"
96
+ return i if index == 1 # Empty header field
97
+ data_callback(:header_field, buffer, i, :clear => true)
98
+ state = :header_value_start
99
+ else
100
+ cl = c.downcase
101
+ return i if cl < "a" || cl > "z"
102
+ end
103
+ end
104
+ end
105
+ i += 1
106
+ when :header_value_start
107
+ if c == " " # Skip spaces
108
+ i += 1
109
+ else
110
+ @marks[:header_value] = i
111
+ state = :header_value
112
+ end
113
+ when :header_value
114
+ if c == "\r"
115
+ data_callback(:header_value, buffer, i, :clear => true)
116
+ callback(:header_end)
117
+ state = :header_value_almost_done
118
+ end
119
+ i += 1
120
+ when :header_value_almost_done
121
+ return i unless c == "\n"
122
+ state = :header_field_start
123
+ i += 1
124
+ when :headers_almost_done
125
+ return i unless c == "\n"
126
+ callback(:headers_end)
127
+ state = :part_data_start
128
+ i += 1
129
+ when :part_data_start
130
+ state = :part_data
131
+ @marks[:part_data] = i
132
+ when :part_data
133
+ prev_index = index
134
+
135
+ if index == 0
136
+ # Boyer-Moore derived algorithm to safely skip non-boundary data
137
+ # See http://debuggable.com/posts/parsing-file-uploads-at-500-
138
+ # mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
139
+ while i + boundary_length <= buffer_length
140
+ break if boundary_chars.has_key? buffer[i + boundary_end].chr
141
+ i += boundary_length
142
+ end
143
+ c = buffer[i, 1]
144
+ end
145
+
146
+ if index < boundary_length
147
+ if boundary[index, 1] == c
148
+ if index == 0
149
+ data_callback(:part_data, buffer, i, :clear => true)
150
+ end
151
+ index += 1
152
+ else # It was not the boundary we found, after all
153
+ index = 0
154
+ end
155
+ elsif index == boundary_length
156
+ index += 1
157
+ if c == "\r"
158
+ flags[:part_boundary] = true
159
+ elsif c == "-"
160
+ flags[:last_boundary] = true
161
+ else # We did not find a boundary after all
162
+ index = 0
163
+ end
164
+ elsif index - 1 == boundary_length
165
+ if flags[:part_boundary]
166
+ index = 0
167
+ if c == "\n"
168
+ flags.delete :part_boundary
169
+ callback(:part_end)
170
+ callback(:part_begin)
171
+ state = :header_field_start
172
+ i += 1
173
+ next # Ugly way to break out of the case statement
174
+ end
175
+ elsif flags[:last_boundary]
176
+ if c == "-"
177
+ callback(:part_end)
178
+ callback(:end)
179
+ state = :end
180
+ else
181
+ index = 0 # False alarm
182
+ end
183
+ else
184
+ index = 0
185
+ end
186
+ end
187
+
188
+ if index > 0
189
+ # When matching a possible boundary, keep a lookbehind
190
+ # reference in case it turns out to be a false lead
191
+ lookbehind[index-1] = c
192
+ elsif prev_index > 0
193
+ # If our boundary turns out to be rubbish,
194
+ # the captured lookbehind belongs to part_data
195
+ callback(:part_data, lookbehind, 0, prev_index)
196
+ @marks[:part_data] = i
197
+
198
+ # Reconsider the current character as it might be the
199
+ # beginning of a new sequence.
200
+ i -= 1
201
+ end
202
+
203
+ i += 1
204
+ when :end
205
+ i += 1
206
+ else
207
+ return i;
208
+ end
209
+ end
210
+
211
+ data_callback(:header_field, buffer, buffer_length)
212
+ data_callback(:header_value, buffer, buffer_length)
213
+ data_callback(:part_data, buffer, buffer_length)
214
+
215
+ @index = index
216
+ @state = state
217
+ @flags = flags
218
+
219
+ return buffer_length
220
+ end
221
+
222
+ private
223
+
224
+ # Issues a callback.
225
+ def callback(event, buffer = nil, start = nil, the_end = nil)
226
+ return if !start.nil? && start == the_end
227
+ if @callbacks.has_key? event
228
+ @callbacks[event].call(buffer, start, the_end)
229
+ end
230
+ end
231
+
232
+ # Issues a data callback,
233
+ # The only valid options is :clear,
234
+ # which, if true, will reset the appropriate mark to 0,
235
+ # If not specified, the mark will be removed.
236
+ def data_callback(data_type, buffer, the_end, options = {})
237
+ return unless @marks.has_key? data_type
238
+ callback(data_type, buffer, @marks[data_type], the_end)
239
+ unless options[:clear]
240
+ @marks[data_type] = 0
241
+ else
242
+ @marks.delete data_type
243
+ end
244
+ end
245
+ end
246
+ end
@@ -0,0 +1,156 @@
1
+ module MultipartParser
2
+ class NotMultipartError < StandardError; end;
3
+
4
+ # A more high level interface to MultipartParser.
5
+ class Reader
6
+
7
+ # Initializes a MultipartReader, that will
8
+ # read a request with the given boundary value.
9
+ def initialize(boundary)
10
+ @parser = Parser.new
11
+ @parser.init_with_boundary(boundary)
12
+ @header_field = ''
13
+ @header_value = ''
14
+ @part = nil
15
+ @ended = false
16
+ @on_error = nil
17
+ @on_part = nil
18
+ @on_end = nil
19
+
20
+ init_parser_callbacks
21
+ end
22
+
23
+ # Returns true if the parser has finished parsing
24
+ def ended?
25
+ @ended
26
+ end
27
+
28
+ # Sets to a code block to call
29
+ # when part headers have been parsed.
30
+ def on_part(&callback)
31
+ @on_part = callback
32
+ end
33
+
34
+ def on_end(&callback)
35
+ @on_end = callback
36
+ end
37
+
38
+ # Sets a code block to call when
39
+ # a parser error occurs.
40
+ def on_error(&callback)
41
+ @on_error = callback
42
+ end
43
+
44
+ # Write data from the given buffer (String)
45
+ # into the reader.
46
+ def write(buffer)
47
+ bytes_parsed = @parser.write(buffer)
48
+ if bytes_parsed != buffer.size
49
+ msg = "Parser error, #{bytes_parsed} of #{buffer.length} bytes parsed"
50
+ @on_error.call(msg) unless @on_error.nil?
51
+ end
52
+ end
53
+
54
+ # Extracts a boundary value from a Content-Type header.
55
+ # Note that it is the header value you provide here.
56
+ # Raises NotMultipartError if content_type is invalid.
57
+ def self.extract_boundary_value(content_type)
58
+ if content_type =~ /multipart/i
59
+ if match = (content_type =~ /boundary=(?:"([^"]+)"|([^;]+))/i)
60
+ $1 || $2
61
+ else
62
+ raise NotMultipartError.new("No multipart boundary")
63
+ end
64
+ else
65
+ raise NotMultipartError.new("Not a multipart content type!")
66
+ end
67
+ end
68
+
69
+ class Part
70
+ attr_accessor :filename, :headers, :name, :mime
71
+
72
+ def initialize
73
+ @headers = {}
74
+ @data_callback = nil
75
+ @end_callback = nil
76
+ end
77
+
78
+ # Calls the data callback with the given data
79
+ def emit_data(data)
80
+ @data_callback.call(data) unless @data_callback.nil?
81
+ end
82
+
83
+ # Calls the end callback
84
+ def emit_end
85
+ @end_callback.call unless @end_callback.nil?
86
+ end
87
+
88
+ # Sets a block to be called when part data
89
+ # is read. The block should take one parameter,
90
+ # namely the read data.
91
+ def on_data(&callback)
92
+ @data_callback = callback
93
+ end
94
+
95
+ # Sets a block to be called when all data
96
+ # for the part has been read.
97
+ def on_end(&callback)
98
+ @end_callback = callback
99
+ end
100
+ end
101
+
102
+ private
103
+
104
+ def init_parser_callbacks
105
+ @parser.on(:part_begin) do
106
+ @part = Part.new
107
+ @header_field = ''
108
+ @header_value = ''
109
+ end
110
+
111
+ @parser.on(:header_field) do |b, start, the_end|
112
+ @header_field << b[start...the_end]
113
+ end
114
+
115
+ @parser.on(:header_value) do |b, start, the_end|
116
+ @header_value << b[start...the_end]
117
+ end
118
+
119
+ @parser.on(:header_end) do
120
+ @header_field.downcase!
121
+ @part.headers[@header_field] = @header_value
122
+ if @header_field == 'content-disposition'
123
+ if @header_value =~ /name="([^"]+)"/i
124
+ @part.name = $1
125
+ end
126
+ if @header_value =~ /filename="([^;]+)"/i
127
+ match = $1
128
+ start = (match.rindex("\\") || -1)+1
129
+ @part.filename = match[start...(match.length)]
130
+ end
131
+ elsif @header_field == 'content-type'
132
+ @part.mime = @header_value
133
+ end
134
+ @header_field = ''
135
+ @header_value = ''
136
+ end
137
+
138
+ @parser.on(:headers_end) do
139
+ @on_part.call(@part) unless @on_part.nil?
140
+ end
141
+
142
+ @parser.on(:part_data) do |b, start, the_end|
143
+ @part.emit_data b[start...the_end]
144
+ end
145
+
146
+ @parser.on(:part_end) do
147
+ @part.emit_end
148
+ end
149
+
150
+ @parser.on(:end) do
151
+ @ended = true
152
+ @on_end.call unless @on_end.nil?
153
+ end
154
+ end
155
+ end
156
+ end
data/lifter.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "lifter/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = ["Michael Amundson"]
7
+ gem.email = ["sumofparts@uh-oh.co"]
8
+
9
+ gem.description = <<-DESCRIPTION.strip.gsub(/\s+/, " ")
10
+ A Ruby daemon for managing concurrent large file uploads independent of a web application.
11
+ DESCRIPTION
12
+
13
+ gem.summary = "Painless file uploads"
14
+ gem.homepage = "https://github.com/sumofparts/lifter"
15
+ gem.licenses = ["MIT"]
16
+
17
+ gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
18
+ gem.files = `git ls-files`.split("\n")
19
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ gem.name = "lifter"
21
+ gem.require_paths = ["lib"]
22
+ gem.version = Lifter::VERSION
23
+
24
+ gem.add_runtime_dependency "http_parser.rb", "~> 0.6.0"
25
+ gem.add_runtime_dependency "http", "~> 0.9.8"
26
+ gem.add_runtime_dependency "eventmachine", "~> 1.0.8"
27
+
28
+ gem.add_development_dependency "bundler", "~> 1.0"
29
+ end
data/test/test.html ADDED
@@ -0,0 +1,14 @@
1
+ <html>
2
+ <head>
3
+ <title>Upload Test</title>
4
+ </head>
5
+
6
+ <body>
7
+ <form action="http://127.0.0.1:8080/upload" method="post" enctype="multipart/form-data">
8
+ <input type="hidden" name="upload[token]" value="xyzzy" />
9
+ <input type="file" name="upload[file1]" />
10
+ <input type="file" name="upload[file2]" />
11
+ <button type="submit">Upload!</button>
12
+ </form>
13
+ </body>
14
+ </html>
data/test/test.rb ADDED
@@ -0,0 +1,52 @@
1
+ require 'lifter'
2
+
3
+ server = Lifter::Server.new do |config|
4
+ # The host and port to listen on for this Forklift server. Typically, Forklift is backed by nginx
5
+ # or Apache, although it can directly listen to public network interfaces.
6
+ #
7
+ host '127.0.0.1'
8
+ port 8080
9
+
10
+ # A file system path to store in-progress uploads and completed uploads. What happens to uploads
11
+ # after they complete is outside of the scope of Forklift.
12
+ #
13
+ working_dir 'tmp/uploads'
14
+
15
+ # Define maximum size in bytes of a file upload. Files larger than this will be automatically
16
+ # removed, the connection closed, and no uploaded webhook will fire.
17
+ #
18
+ max_upload_size 500 * 1024 * 1024
19
+
20
+ # Specify desired digest type for file uploads. Passed in uploaded webhook after upload completes.
21
+ # Possible options: md5, sha1, sha256, sha512.
22
+ #
23
+ upload_hash_method :sha1
24
+
25
+ # Configure maximum number of bytes to pass along in authorize webhook.
26
+ #
27
+ upload_prologue_size 1024
28
+
29
+ # A request to this webhook is made once <prologue_limit> bytes have been received by the upload
30
+ # endpoint. The webhook request contains all of the original query params and headers of the
31
+ # upload request, the first <prologue_limit> of data, HTTP headers reflecting the request IP,
32
+ # file name, claimed file size, and file MIME type.
33
+ #
34
+ # In the event the webhook returns a non-200 response, the upload connection is terminated and
35
+ # all uploaded data is removed.
36
+ #
37
+ # In the event the upload is multipart, this endpoint will be called once for each file, as soon
38
+ # as the first <prologue_limit> bytes of data is received. Non-200 responses for one part will not
39
+ # remove data from other parts, although the connection will still be terminated.
40
+ #
41
+ authorize_webhook :post, 'http://127.0.0.1:8081/uploads/authorize'
42
+
43
+ # A request to this webhook is made once a single file upload completes. In the event the upload
44
+ # is multipart with multiple files, this endpoint will be called once for each file, upon
45
+ # completion of the file.
46
+ #
47
+ # An authorize webhook is always sent prior to sending this webhook.
48
+ #
49
+ completed_webhook :post, 'http://127.0.0.1:8081/uploads/ingest'
50
+ end
51
+
52
+ server.start