lifter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ require 'zlib'
2
+
3
+ module Lifter
4
+ class ThreadPool
5
+ def initialize(pool_size)
6
+ @pool_size = pool_size
7
+
8
+ @monitor = Monitor.new
9
+
10
+ @queues = {}
11
+ @workers = {}
12
+
13
+ @pending = {}
14
+ @cleared = []
15
+
16
+ spawn_workers
17
+ end
18
+
19
+ # Add a job closure to the thread pool, tagged with a given job_tag to allow for consistent
20
+ # execution ordering.
21
+ def push(job_tag, &job)
22
+ job_tag = job_tag.to_s
23
+
24
+ raise ArgumentError.new('job_tag must be defined') if job_tag.empty?
25
+
26
+ job_hash = Zlib.crc32(job_tag)
27
+ worker_id = job_hash % @pool_size
28
+
29
+ queue = @queues[worker_id]
30
+ queue.push([job_tag, job])
31
+
32
+ add_pending(job_tag)
33
+ end
34
+
35
+ # For a given job_tag, prevents any future pending jobs from running.
36
+ def clear(job_tag)
37
+ @monitor.synchronize do
38
+ @cleared << job_tag if !@cleared.include?(job_tag)
39
+ end
40
+ end
41
+
42
+ private def cleared?(job_tag)
43
+ cleared = false
44
+
45
+ @monitor.synchronize do
46
+ cleared = @cleared.include?(job_tag)
47
+ @cleared.delete(job_tag) if count_pending(job_tag) == 0
48
+ end
49
+
50
+ cleared
51
+ end
52
+
53
+ private def add_pending(job_tag)
54
+ @monitor.synchronize do
55
+ count = count_pending(job_tag)
56
+ @pending[job_tag] = count + 1
57
+ end
58
+ end
59
+
60
+ private def count_pending(job_tag)
61
+ count = 0
62
+
63
+ @monitor.synchronize do
64
+ count = @pending[job_tag] || 0
65
+ end
66
+
67
+ count
68
+ end
69
+
70
+ private def remove_pending(job_tag)
71
+ @monitor.synchronize do
72
+ count = count_pending(job_tag)
73
+
74
+ if count == 0
75
+ @pending.delete(job_tag)
76
+ else
77
+ @pending[job_tag] = count - 1
78
+ end
79
+ end
80
+ end
81
+
82
+ private def spawn_workers
83
+ (0...@pool_size).each do |worker_id|
84
+ queue = Queue.new
85
+
86
+ @queues[worker_id] = queue
87
+
88
+ worker = Thread.new do
89
+ loop do
90
+ job_tag, job = queue.pop
91
+ remove_pending(job_tag)
92
+
93
+ next if cleared?(job_tag)
94
+
95
+ begin
96
+ job.call
97
+ rescue StandardError => e
98
+ puts e.to_s
99
+ puts e.backtrace
100
+ exit
101
+ add_pending(job_tag)
102
+ queue.push([job_tag, job])
103
+ end
104
+ end
105
+ end
106
+
107
+ @workers[worker_id] = worker
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,3 @@
1
+ module Lifter
2
+ VERSION = "0.1.0".freeze
3
+ end
@@ -0,0 +1,41 @@
1
+ require 'http'
2
+
3
+ module Lifter
4
+ class Webhook
5
+ attr_reader :url, :method, :headers, :params
6
+
7
+ def initialize(endpoint)
8
+ @url = endpoint.url
9
+ @method = endpoint.method
10
+ @headers = {}
11
+ @params = {}
12
+ end
13
+
14
+ def headers=(headers)
15
+ @headers = headers
16
+ end
17
+
18
+ def params=(params)
19
+ @params = params
20
+ end
21
+
22
+ def on_failure(&block)
23
+ @on_failure = block
24
+ end
25
+
26
+ def deliver
27
+ http_stub = HTTP.headers(@headers)
28
+
29
+ case @method.to_sym
30
+ when :get
31
+ http_stub.get(@url, params: @params)
32
+ when :post
33
+ http_stub.post(@url, form: @params)
34
+ when :put
35
+ http_stub.put(@url, form: @params)
36
+ else
37
+ raise StandardError.new('unsupported http method in webhook')
38
+ end
39
+ end
40
+ end
41
+ end
data/lib/lifter.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'multipart_parser/parser'
2
+ require 'multipart_parser/reader'
3
+
4
+ require 'lifter/thread_pool'
5
+ require 'lifter/file_pool'
6
+ require 'lifter/webhook'
7
+ require 'lifter/config'
8
+ require 'lifter/connection'
9
+ require 'lifter/payloads/multipart_payload'
10
+ require 'lifter/payloads/inline_payload'
11
+ require 'lifter/file_upload'
12
+ require 'lifter/file_manager'
13
+ require 'lifter/server'
14
+ require 'lifter/version'
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011,2012 Daniel Abrahamsson
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,246 @@
1
+ module MultipartParser
2
+ # A low level parser for multipart messages,
3
+ # based on the node-formidable parser.
4
+ class Parser
5
+
6
+ def initialize
7
+ @boundary = nil
8
+ @boundary_chars = nil
9
+ @lookbehind = nil
10
+ @state = :parser_uninitialized
11
+ @index = 0 # Index into boundary or header
12
+ @flags = {}
13
+ @marks = {} # Keep track of different parts
14
+ @callbacks = {}
15
+ end
16
+
17
+ # Initializes the parser, using the given boundary
18
+ def init_with_boundary(boundary)
19
+ @boundary = "\r\n--" + boundary
20
+ @lookbehind = "\0"*(@boundary.length + 8)
21
+ @state = :start
22
+
23
+ @boundary_chars = {}
24
+ @boundary.each_byte do |b|
25
+ @boundary_chars[b.chr] = true
26
+ end
27
+ end
28
+
29
+ # Registers a callback to be called when the
30
+ # given event occurs. Each callback is expected to
31
+ # take three parameters: buffer, start_index, and end_index.
32
+ # All of these parameters may be null, depending on the callback.
33
+ # Valid callbacks are:
34
+ # :end
35
+ # :header_field
36
+ # :header_value
37
+ # :header_end
38
+ # :headers_end
39
+ # :part_begin
40
+ # :part_data
41
+ # :part_end
42
+ def on(event, &callback)
43
+ @callbacks[event] = callback
44
+ end
45
+
46
+ # Writes data to the parser.
47
+ # Returns the number of bytes parsed.
48
+ # In practise, this means that if the return value
49
+ # is less than the buffer length, a parse error occured.
50
+ def write(buffer)
51
+ i = 0
52
+ buffer_length = buffer.length
53
+ index = @index
54
+ flags = @flags.dup
55
+ state = @state
56
+ lookbehind = @lookbehind
57
+ boundary = @boundary
58
+ boundary_chars = @boundary_chars
59
+ boundary_length = @boundary.length
60
+ boundary_end = boundary_length - 1
61
+
62
+ while i < buffer_length
63
+ c = buffer[i, 1]
64
+ case state
65
+ when :parser_uninitialized
66
+ return i;
67
+ when :start
68
+ index = 0;
69
+ state = :start_boundary
70
+ when :start_boundary # Differs in that it has no preceeding \r\n
71
+ if index == boundary_length - 2
72
+ return i unless c == "\r"
73
+ index += 1
74
+ elsif index - 1 == boundary_length - 2
75
+ return i unless c == "\n"
76
+ # Boundary read successfully, begin next part
77
+ callback(:part_begin)
78
+ state = :header_field_start
79
+ else
80
+ return i unless c == boundary[index+2, 1] # Unexpected character
81
+ index += 1
82
+ end
83
+ i += 1
84
+ when :header_field_start
85
+ state = :header_field
86
+ @marks[:header_field] = i
87
+ index = 0
88
+ when :header_field
89
+ if c == "\r"
90
+ @marks.delete :header_field
91
+ state = :headers_almost_done
92
+ else
93
+ index += 1
94
+ unless c == "-" # Skip hyphens
95
+ if c == ":"
96
+ return i if index == 1 # Empty header field
97
+ data_callback(:header_field, buffer, i, :clear => true)
98
+ state = :header_value_start
99
+ else
100
+ cl = c.downcase
101
+ return i if cl < "a" || cl > "z"
102
+ end
103
+ end
104
+ end
105
+ i += 1
106
+ when :header_value_start
107
+ if c == " " # Skip spaces
108
+ i += 1
109
+ else
110
+ @marks[:header_value] = i
111
+ state = :header_value
112
+ end
113
+ when :header_value
114
+ if c == "\r"
115
+ data_callback(:header_value, buffer, i, :clear => true)
116
+ callback(:header_end)
117
+ state = :header_value_almost_done
118
+ end
119
+ i += 1
120
+ when :header_value_almost_done
121
+ return i unless c == "\n"
122
+ state = :header_field_start
123
+ i += 1
124
+ when :headers_almost_done
125
+ return i unless c == "\n"
126
+ callback(:headers_end)
127
+ state = :part_data_start
128
+ i += 1
129
+ when :part_data_start
130
+ state = :part_data
131
+ @marks[:part_data] = i
132
+ when :part_data
133
+ prev_index = index
134
+
135
+ if index == 0
136
+ # Boyer-Moore derived algorithm to safely skip non-boundary data
137
+ # See http://debuggable.com/posts/parsing-file-uploads-at-500-
138
+ # mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
139
+ while i + boundary_length <= buffer_length
140
+ break if boundary_chars.has_key? buffer[i + boundary_end].chr
141
+ i += boundary_length
142
+ end
143
+ c = buffer[i, 1]
144
+ end
145
+
146
+ if index < boundary_length
147
+ if boundary[index, 1] == c
148
+ if index == 0
149
+ data_callback(:part_data, buffer, i, :clear => true)
150
+ end
151
+ index += 1
152
+ else # It was not the boundary we found, after all
153
+ index = 0
154
+ end
155
+ elsif index == boundary_length
156
+ index += 1
157
+ if c == "\r"
158
+ flags[:part_boundary] = true
159
+ elsif c == "-"
160
+ flags[:last_boundary] = true
161
+ else # We did not find a boundary after all
162
+ index = 0
163
+ end
164
+ elsif index - 1 == boundary_length
165
+ if flags[:part_boundary]
166
+ index = 0
167
+ if c == "\n"
168
+ flags.delete :part_boundary
169
+ callback(:part_end)
170
+ callback(:part_begin)
171
+ state = :header_field_start
172
+ i += 1
173
+ next # Ugly way to break out of the case statement
174
+ end
175
+ elsif flags[:last_boundary]
176
+ if c == "-"
177
+ callback(:part_end)
178
+ callback(:end)
179
+ state = :end
180
+ else
181
+ index = 0 # False alarm
182
+ end
183
+ else
184
+ index = 0
185
+ end
186
+ end
187
+
188
+ if index > 0
189
+ # When matching a possible boundary, keep a lookbehind
190
+ # reference in case it turns out to be a false lead
191
+ lookbehind[index-1] = c
192
+ elsif prev_index > 0
193
+ # If our boundary turns out to be rubbish,
194
+ # the captured lookbehind belongs to part_data
195
+ callback(:part_data, lookbehind, 0, prev_index)
196
+ @marks[:part_data] = i
197
+
198
+ # Reconsider the current character as it might be the
199
+ # beginning of a new sequence.
200
+ i -= 1
201
+ end
202
+
203
+ i += 1
204
+ when :end
205
+ i += 1
206
+ else
207
+ return i;
208
+ end
209
+ end
210
+
211
+ data_callback(:header_field, buffer, buffer_length)
212
+ data_callback(:header_value, buffer, buffer_length)
213
+ data_callback(:part_data, buffer, buffer_length)
214
+
215
+ @index = index
216
+ @state = state
217
+ @flags = flags
218
+
219
+ return buffer_length
220
+ end
221
+
222
+ private
223
+
224
+ # Issues a callback.
225
+ def callback(event, buffer = nil, start = nil, the_end = nil)
226
+ return if !start.nil? && start == the_end
227
+ if @callbacks.has_key? event
228
+ @callbacks[event].call(buffer, start, the_end)
229
+ end
230
+ end
231
+
232
+ # Issues a data callback,
233
+ # The only valid options is :clear,
234
+ # which, if true, will reset the appropriate mark to 0,
235
+ # If not specified, the mark will be removed.
236
+ def data_callback(data_type, buffer, the_end, options = {})
237
+ return unless @marks.has_key? data_type
238
+ callback(data_type, buffer, @marks[data_type], the_end)
239
+ unless options[:clear]
240
+ @marks[data_type] = 0
241
+ else
242
+ @marks.delete data_type
243
+ end
244
+ end
245
+ end
246
+ end
@@ -0,0 +1,156 @@
1
+ module MultipartParser
2
+ class NotMultipartError < StandardError; end;
3
+
4
+ # A more high level interface to MultipartParser.
5
+ class Reader
6
+
7
+ # Initializes a MultipartReader, that will
8
+ # read a request with the given boundary value.
9
+ def initialize(boundary)
10
+ @parser = Parser.new
11
+ @parser.init_with_boundary(boundary)
12
+ @header_field = ''
13
+ @header_value = ''
14
+ @part = nil
15
+ @ended = false
16
+ @on_error = nil
17
+ @on_part = nil
18
+ @on_end = nil
19
+
20
+ init_parser_callbacks
21
+ end
22
+
23
+ # Returns true if the parser has finished parsing
24
+ def ended?
25
+ @ended
26
+ end
27
+
28
+ # Sets to a code block to call
29
+ # when part headers have been parsed.
30
+ def on_part(&callback)
31
+ @on_part = callback
32
+ end
33
+
34
+ def on_end(&callback)
35
+ @on_end = callback
36
+ end
37
+
38
+ # Sets a code block to call when
39
+ # a parser error occurs.
40
+ def on_error(&callback)
41
+ @on_error = callback
42
+ end
43
+
44
+ # Write data from the given buffer (String)
45
+ # into the reader.
46
+ def write(buffer)
47
+ bytes_parsed = @parser.write(buffer)
48
+ if bytes_parsed != buffer.size
49
+ msg = "Parser error, #{bytes_parsed} of #{buffer.length} bytes parsed"
50
+ @on_error.call(msg) unless @on_error.nil?
51
+ end
52
+ end
53
+
54
+ # Extracts a boundary value from a Content-Type header.
55
+ # Note that it is the header value you provide here.
56
+ # Raises NotMultipartError if content_type is invalid.
57
+ def self.extract_boundary_value(content_type)
58
+ if content_type =~ /multipart/i
59
+ if match = (content_type =~ /boundary=(?:"([^"]+)"|([^;]+))/i)
60
+ $1 || $2
61
+ else
62
+ raise NotMultipartError.new("No multipart boundary")
63
+ end
64
+ else
65
+ raise NotMultipartError.new("Not a multipart content type!")
66
+ end
67
+ end
68
+
69
+ class Part
70
+ attr_accessor :filename, :headers, :name, :mime
71
+
72
+ def initialize
73
+ @headers = {}
74
+ @data_callback = nil
75
+ @end_callback = nil
76
+ end
77
+
78
+ # Calls the data callback with the given data
79
+ def emit_data(data)
80
+ @data_callback.call(data) unless @data_callback.nil?
81
+ end
82
+
83
+ # Calls the end callback
84
+ def emit_end
85
+ @end_callback.call unless @end_callback.nil?
86
+ end
87
+
88
+ # Sets a block to be called when part data
89
+ # is read. The block should take one parameter,
90
+ # namely the read data.
91
+ def on_data(&callback)
92
+ @data_callback = callback
93
+ end
94
+
95
+ # Sets a block to be called when all data
96
+ # for the part has been read.
97
+ def on_end(&callback)
98
+ @end_callback = callback
99
+ end
100
+ end
101
+
102
+ private
103
+
104
+ def init_parser_callbacks
105
+ @parser.on(:part_begin) do
106
+ @part = Part.new
107
+ @header_field = ''
108
+ @header_value = ''
109
+ end
110
+
111
+ @parser.on(:header_field) do |b, start, the_end|
112
+ @header_field << b[start...the_end]
113
+ end
114
+
115
+ @parser.on(:header_value) do |b, start, the_end|
116
+ @header_value << b[start...the_end]
117
+ end
118
+
119
+ @parser.on(:header_end) do
120
+ @header_field.downcase!
121
+ @part.headers[@header_field] = @header_value
122
+ if @header_field == 'content-disposition'
123
+ if @header_value =~ /name="([^"]+)"/i
124
+ @part.name = $1
125
+ end
126
+ if @header_value =~ /filename="([^;]+)"/i
127
+ match = $1
128
+ start = (match.rindex("\\") || -1)+1
129
+ @part.filename = match[start...(match.length)]
130
+ end
131
+ elsif @header_field == 'content-type'
132
+ @part.mime = @header_value
133
+ end
134
+ @header_field = ''
135
+ @header_value = ''
136
+ end
137
+
138
+ @parser.on(:headers_end) do
139
+ @on_part.call(@part) unless @on_part.nil?
140
+ end
141
+
142
+ @parser.on(:part_data) do |b, start, the_end|
143
+ @part.emit_data b[start...the_end]
144
+ end
145
+
146
+ @parser.on(:part_end) do
147
+ @part.emit_end
148
+ end
149
+
150
+ @parser.on(:end) do
151
+ @ended = true
152
+ @on_end.call unless @on_end.nil?
153
+ end
154
+ end
155
+ end
156
+ end
data/lifter.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "lifter/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = ["Michael Amundson"]
7
+ gem.email = ["sumofparts@uh-oh.co"]
8
+
9
+ gem.description = <<-DESCRIPTION.strip.gsub(/\s+/, " ")
10
+ A Ruby daemon for managing concurrent large file uploads independent of a web application.
11
+ DESCRIPTION
12
+
13
+ gem.summary = "Painless file uploads"
14
+ gem.homepage = "https://github.com/sumofparts/lifter"
15
+ gem.licenses = ["MIT"]
16
+
17
+ gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
18
+ gem.files = `git ls-files`.split("\n")
19
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ gem.name = "lifter"
21
+ gem.require_paths = ["lib"]
22
+ gem.version = Lifter::VERSION
23
+
24
+ gem.add_runtime_dependency "http_parser.rb", "~> 0.6.0"
25
+ gem.add_runtime_dependency "http", "~> 0.9.8"
26
+ gem.add_runtime_dependency "eventmachine", "~> 1.0.8"
27
+
28
+ gem.add_development_dependency "bundler", "~> 1.0"
29
+ end
data/test/test.html ADDED
@@ -0,0 +1,14 @@
1
+ <html>
2
+ <head>
3
+ <title>Upload Test</title>
4
+ </head>
5
+
6
+ <body>
7
+ <form action="http://127.0.0.1:8080/upload" method="post" enctype="multipart/form-data">
8
+ <input type="hidden" name="upload[token]" value="xyzzy" />
9
+ <input type="file" name="upload[file1]" />
10
+ <input type="file" name="upload[file2]" />
11
+ <button type="submit">Upload!</button>
12
+ </form>
13
+ </body>
14
+ </html>
data/test/test.rb ADDED
@@ -0,0 +1,52 @@
1
+ require 'lifter'
2
+
3
+ server = Lifter::Server.new do |config|
4
+ # The host and port to listen on for this Forklift server. Typically, Forklift is backed by nginx
5
+ # or Apache, although it can directly listen to public network interfaces.
6
+ #
7
+ host '127.0.0.1'
8
+ port 8080
9
+
10
+ # A file system path to store in-progress uploads and completed uploads. What happens to uploads
11
+ # after they complete is outside of the scope of Forklift.
12
+ #
13
+ working_dir 'tmp/uploads'
14
+
15
+ # Define maximum size in bytes of a file upload. Files larger than this will be automatically
16
+ # removed, the connection closed, and no uploaded webhook will fire.
17
+ #
18
+ max_upload_size 500 * 1024 * 1024
19
+
20
+ # Specify desired digest type for file uploads. Passed in uploaded webhook after upload completes.
21
+ # Possible options: md5, sha1, sha256, sha512.
22
+ #
23
+ upload_hash_method :sha1
24
+
25
+ # Configure maximum number of bytes to pass along in authorize webhook.
26
+ #
27
+ upload_prologue_size 1024
28
+
29
+ # A request to this webhook is made once <prologue_limit> bytes have been received by the upload
30
+ # endpoint. The webhook request contains all of the original query params and headers of the
31
+ # upload request, the first <prologue_limit> of data, HTTP headers reflecting the request IP,
32
+ # file name, claimed file size, and file MIME type.
33
+ #
34
+ # In the event the webhook returns a non-200 response, the upload connection is terminated and
35
+ # all uploaded data is removed.
36
+ #
37
+ # In the event the upload is multipart, this endpoint will be called once for each file, as soon
38
+ # as the first <prologue_limit> bytes of data is received. Non-200 responses for one part will not
39
+ # remove data from other parts, although the connection will still be terminated.
40
+ #
41
+ authorize_webhook :post, 'http://127.0.0.1:8081/uploads/authorize'
42
+
43
+ # A request to this webhook is made once a single file upload completes. In the event the upload
44
+ # is multipart with multiple files, this endpoint will be called once for each file, upon
45
+ # completion of the file.
46
+ #
47
+ # An authorize webhook is always sent prior to sending this webhook.
48
+ #
49
+ completed_webhook :post, 'http://127.0.0.1:8081/uploads/ingest'
50
+ end
51
+
52
+ server.start