RubyGems - lifter - Versions diffs - 0.1.0 - Mend

lifter 0.1.0

Files changed (23) hide show

checksums.yaml +7 -0
data/.gitignore +5 -0
data/Gemfile +2 -0
data/LICENSE +20 -0
data/lib/lifter/config.rb +54 -0
data/lib/lifter/connection.rb +170 -0
data/lib/lifter/file_manager.rb +158 -0
data/lib/lifter/file_pool.rb +20 -0
data/lib/lifter/file_upload.rb +74 -0
data/lib/lifter/payloads/inline_payload.rb +10 -0
data/lib/lifter/payloads/multipart_payload.rb +85 -0
data/lib/lifter/server.rb +26 -0
data/lib/lifter/thread_pool.rb +111 -0
data/lib/lifter/version.rb +3 -0
data/lib/lifter/webhook.rb +41 -0
data/lib/lifter.rb +14 -0
data/lib/multipart_parser/LICENSE +20 -0
data/lib/multipart_parser/parser.rb +246 -0
data/lib/multipart_parser/reader.rb +156 -0
data/lifter.gemspec +29 -0
data/test/test.html +14 -0
data/test/test.rb +52 -0
metadata +124 -0

data/lib/lifter/thread_pool.rb ADDED Viewed

@@ -0,0 +1,111 @@
+require 'zlib'
+module Lifter
+  class ThreadPool
+    def initialize(pool_size)
+      @pool_size = pool_size
+      @monitor = Monitor.new
+      @queues = {}
+      @workers = {}
+      @pending = {}
+      @cleared = []
+      spawn_workers
+    end
+    # Add a job closure to the thread pool, tagged with a given job_tag to allow for consistent
+    # execution ordering.
+    def push(job_tag, &job)
+      job_tag = job_tag.to_s
+      raise ArgumentError.new('job_tag must be defined') if job_tag.empty?
+      job_hash = Zlib.crc32(job_tag)
+      worker_id = job_hash % @pool_size
+      queue = @queues[worker_id]
+      queue.push([job_tag, job])
+      add_pending(job_tag)
+    end
+    # For a given job_tag, prevents any future pending jobs from running.
+    def clear(job_tag)
+      @monitor.synchronize do
+        @cleared << job_tag if !@cleared.include?(job_tag)
+      end
+    end
+    private def cleared?(job_tag)
+      cleared = false
+      @monitor.synchronize do
+        cleared = @cleared.include?(job_tag)
+        @cleared.delete(job_tag) if count_pending(job_tag) == 0
+      end
+      cleared
+    end
+    private def add_pending(job_tag)
+      @monitor.synchronize do
+        count = count_pending(job_tag)
+        @pending[job_tag] = count + 1
+      end
+    end
+    private def count_pending(job_tag)
+      count = 0
+      @monitor.synchronize do
+        count = @pending[job_tag] || 0
+      end
+      count
+    end
+    private def remove_pending(job_tag)
+      @monitor.synchronize do
+        count = count_pending(job_tag)
+        if count == 0
+          @pending.delete(job_tag)
+        else
+          @pending[job_tag] = count - 1
+        end
+      end
+    end
+    private def spawn_workers
+      (0...@pool_size).each do |worker_id|
+        queue = Queue.new
+        @queues[worker_id] = queue
+        worker = Thread.new do
+          loop do
+            job_tag, job = queue.pop
+            remove_pending(job_tag)
+            next if cleared?(job_tag)
+            begin
+              job.call
+            rescue StandardError => e
+              puts e.to_s
+              puts e.backtrace
+              exit
+              add_pending(job_tag)
+              queue.push([job_tag, job])
+            end
+          end
+        end
+        @workers[worker_id] = worker
+      end
+    end
+  end
+end

data/lib/lifter/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Lifter
+  VERSION = "0.1.0".freeze
+end

data/lib/lifter/webhook.rb ADDED Viewed

@@ -0,0 +1,41 @@
+require 'http'
+module Lifter
+  class Webhook
+    attr_reader :url, :method, :headers, :params
+    def initialize(endpoint)
+      @url = endpoint.url
+      @method = endpoint.method
+      @headers = {}
+      @params = {}
+    end
+    def headers=(headers)
+      @headers = headers
+    end
+    def params=(params)
+      @params = params
+    end
+    def on_failure(&block)
+      @on_failure = block
+    end
+    def deliver
+      http_stub = HTTP.headers(@headers)
+      case @method.to_sym
+      when :get
+        http_stub.get(@url, params: @params)
+      when :post
+        http_stub.post(@url, form: @params)
+      when :put
+        http_stub.put(@url, form: @params)
+      else
+        raise StandardError.new('unsupported http method in webhook')
+      end
+    end
+  end
+end

data/lib/lifter.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require 'multipart_parser/parser'
+require 'multipart_parser/reader'
+require 'lifter/thread_pool'
+require 'lifter/file_pool'
+require 'lifter/webhook'
+require 'lifter/config'
+require 'lifter/connection'
+require 'lifter/payloads/multipart_payload'
+require 'lifter/payloads/inline_payload'
+require 'lifter/file_upload'
+require 'lifter/file_manager'
+require 'lifter/server'
+require 'lifter/version'

data/lib/multipart_parser/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2011,2012 Daniel Abrahamsson
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/lib/multipart_parser/parser.rb ADDED Viewed

@@ -0,0 +1,246 @@
+module MultipartParser
+  # A low level parser for multipart messages,
+  # based on the node-formidable parser.
+  class Parser
+    def initialize
+      @boundary = nil
+      @boundary_chars = nil
+      @lookbehind = nil
+      @state = :parser_uninitialized
+      @index = 0  # Index into boundary or header
+      @flags = {}
+      @marks = {} # Keep track of different parts
+      @callbacks = {}
+    end
+    # Initializes the parser, using the given boundary
+    def init_with_boundary(boundary)
+      @boundary = "\r\n--" + boundary
+      @lookbehind = "\0"*(@boundary.length + 8)
+      @state = :start
+      @boundary_chars = {}
+      @boundary.each_byte do |b|
+        @boundary_chars[b.chr] = true
+      end
+    end
+    # Registers a callback to be called when the
+    # given event occurs. Each callback is expected to
+    # take three parameters: buffer, start_index, and end_index.
+    # All of these parameters may be null, depending on the callback.
+    # Valid callbacks are:
+    # :end
+    # :header_field
+    # :header_value
+    # :header_end
+    # :headers_end
+    # :part_begin
+    # :part_data
+    # :part_end
+    def on(event, &callback)
+      @callbacks[event] = callback
+    end
+    # Writes data to the parser.
+    # Returns the number of bytes parsed.
+    # In practise, this means that if the return value
+    # is less than the buffer length, a parse error occured.
+    def write(buffer)
+      i = 0
+      buffer_length = buffer.length
+      index = @index
+      flags = @flags.dup
+      state = @state
+      lookbehind = @lookbehind
+      boundary = @boundary
+      boundary_chars = @boundary_chars
+      boundary_length = @boundary.length
+      boundary_end = boundary_length - 1
+      while i < buffer_length
+        c = buffer[i, 1]
+        case state
+          when :parser_uninitialized
+            return i;
+          when :start
+            index = 0;
+            state = :start_boundary
+          when :start_boundary # Differs in that it has no preceeding \r\n
+            if index == boundary_length - 2
+              return i unless c == "\r"
+              index += 1
+            elsif index - 1 == boundary_length - 2
+              return i unless c == "\n"
+              # Boundary read successfully, begin next part
+              callback(:part_begin)
+              state = :header_field_start
+            else
+              return i unless c == boundary[index+2, 1] # Unexpected character
+              index += 1
+            end
+            i += 1
+          when :header_field_start
+            state = :header_field
+            @marks[:header_field] = i
+            index = 0
+          when :header_field
+            if c == "\r"
+              @marks.delete :header_field
+              state = :headers_almost_done
+            else
+              index += 1
+              unless c == "-" # Skip hyphens
+                if c == ":"
+                  return i if index == 1 # Empty header field
+                  data_callback(:header_field, buffer, i, :clear => true)
+                  state = :header_value_start
+                else
+                  cl = c.downcase
+                  return i if cl < "a" || cl > "z"
+                end
+              end
+            end
+            i += 1
+          when :header_value_start
+            if c == " " # Skip spaces
+              i += 1
+            else
+              @marks[:header_value] = i
+              state = :header_value
+            end
+          when :header_value
+            if c == "\r"
+              data_callback(:header_value, buffer, i, :clear => true)
+              callback(:header_end)
+              state = :header_value_almost_done
+            end
+            i += 1
+          when :header_value_almost_done
+            return i unless c == "\n"
+            state = :header_field_start
+            i += 1
+          when :headers_almost_done
+            return i unless c == "\n"
+            callback(:headers_end)
+            state = :part_data_start
+            i += 1
+          when :part_data_start
+            state = :part_data
+            @marks[:part_data] = i
+          when :part_data
+            prev_index = index
+            if index == 0
+              # Boyer-Moore derived algorithm to safely skip non-boundary data
+              # See http://debuggable.com/posts/parsing-file-uploads-at-500-
+              # mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
+              while i + boundary_length <= buffer_length
+                break if boundary_chars.has_key? buffer[i + boundary_end].chr
+                i += boundary_length
+              end
+              c = buffer[i, 1]
+            end
+            if index < boundary_length
+              if boundary[index, 1] == c
+                if index == 0
+                  data_callback(:part_data, buffer, i, :clear => true)
+                end
+                index += 1
+              else # It was not the boundary we found, after all
+                index = 0
+              end
+            elsif index == boundary_length
+              index += 1
+              if c == "\r"
+                flags[:part_boundary] = true
+              elsif c == "-"
+                flags[:last_boundary] = true
+              else # We did not find a boundary after all
+                index = 0
+              end
+            elsif index - 1 == boundary_length
+              if flags[:part_boundary]
+                index = 0
+                if c == "\n"
+                  flags.delete :part_boundary
+                  callback(:part_end)
+                  callback(:part_begin)
+                  state = :header_field_start
+                  i += 1
+                  next # Ugly way to break out of the case statement
+                end
+              elsif flags[:last_boundary]
+                if c == "-"
+                  callback(:part_end)
+                  callback(:end)
+                  state = :end
+                else
+                  index = 0 # False alarm
+                end
+              else
+                index = 0
+              end
+            end
+            if index > 0
+              # When matching a possible boundary, keep a lookbehind
+              # reference in case it turns out to be a false lead
+              lookbehind[index-1] = c
+            elsif prev_index > 0
+              # If our boundary turns out to be rubbish,
+              # the captured lookbehind belongs to part_data
+              callback(:part_data, lookbehind, 0, prev_index)
+              @marks[:part_data] = i
+              # Reconsider the current character as it might be the
+              # beginning of a new sequence.
+              i -= 1
+            end
+            i += 1
+          when :end
+            i += 1
+          else
+            return i;
+        end
+      end
+      data_callback(:header_field, buffer, buffer_length)
+      data_callback(:header_value, buffer, buffer_length)
+      data_callback(:part_data, buffer, buffer_length)
+      @index = index
+      @state = state
+      @flags = flags
+      return buffer_length
+    end
+    private
+    # Issues a callback.
+    def callback(event, buffer = nil, start = nil, the_end = nil)
+      return if !start.nil? && start == the_end
+      if @callbacks.has_key? event
+        @callbacks[event].call(buffer, start, the_end)
+      end
+    end
+    # Issues a data callback,
+    # The only valid options is :clear,
+    # which, if true, will reset the appropriate mark to 0,
+    # If not specified, the mark will be removed.
+    def data_callback(data_type, buffer, the_end, options = {})
+      return unless @marks.has_key? data_type
+      callback(data_type, buffer, @marks[data_type], the_end)
+      unless options[:clear]
+        @marks[data_type] = 0
+      else
+        @marks.delete data_type
+      end
+    end
+  end
+end

data/lib/multipart_parser/reader.rb ADDED Viewed

@@ -0,0 +1,156 @@
+module MultipartParser
+  class NotMultipartError < StandardError; end;
+  # A more high level interface to MultipartParser.
+  class Reader
+    # Initializes a MultipartReader, that will
+    # read a request with the given boundary value.
+    def initialize(boundary)
+      @parser = Parser.new
+      @parser.init_with_boundary(boundary)
+      @header_field = ''
+      @header_value = ''
+      @part = nil
+      @ended = false
+      @on_error = nil
+      @on_part = nil
+      @on_end = nil
+      init_parser_callbacks
+    end
+    # Returns true if the parser has finished parsing
+    def ended?
+      @ended
+    end
+    # Sets to a code block to call
+    # when part headers have been parsed.
+    def on_part(&callback)
+      @on_part = callback
+    end
+    def on_end(&callback)
+      @on_end = callback
+    end
+    # Sets a code block to call when
+    # a parser error occurs.
+    def on_error(&callback)
+      @on_error = callback
+    end
+    # Write data from the given buffer (String)
+    # into the reader.
+    def write(buffer)
+      bytes_parsed = @parser.write(buffer)
+      if bytes_parsed != buffer.size
+        msg = "Parser error, #{bytes_parsed} of #{buffer.length} bytes parsed"
+        @on_error.call(msg) unless @on_error.nil?
+      end
+    end
+    # Extracts a boundary value from a Content-Type header.
+    # Note that it is the header value you provide here.
+    # Raises NotMultipartError if content_type is invalid.
+    def self.extract_boundary_value(content_type)
+      if content_type =~ /multipart/i
+        if match = (content_type =~ /boundary=(?:"([^"]+)"|([^;]+))/i)
+          $1 || $2
+        else
+          raise NotMultipartError.new("No multipart boundary")
+        end
+      else
+        raise NotMultipartError.new("Not a multipart content type!")
+      end
+    end
+    class Part
+      attr_accessor :filename, :headers, :name, :mime
+      def initialize
+        @headers = {}
+        @data_callback = nil
+        @end_callback = nil
+      end
+      # Calls the data callback with the given data
+      def emit_data(data)
+        @data_callback.call(data) unless @data_callback.nil?
+      end
+      # Calls the end callback
+      def emit_end
+        @end_callback.call unless @end_callback.nil?
+      end
+      # Sets a block to be called when part data
+      # is read. The block should take one parameter,
+      # namely the read data.
+      def on_data(&callback)
+        @data_callback = callback
+      end
+      # Sets a block to be called when all data
+      # for the part has been read.
+      def on_end(&callback)
+        @end_callback = callback
+      end
+    end
+    private
+    def init_parser_callbacks
+      @parser.on(:part_begin) do
+        @part = Part.new
+        @header_field = ''
+        @header_value = ''
+      end
+      @parser.on(:header_field) do |b, start, the_end|
+        @header_field << b[start...the_end]
+      end
+      @parser.on(:header_value) do |b, start, the_end|
+        @header_value << b[start...the_end]
+      end
+      @parser.on(:header_end) do
+        @header_field.downcase!
+        @part.headers[@header_field] = @header_value
+        if @header_field == 'content-disposition'
+          if @header_value =~ /name="([^"]+)"/i
+            @part.name = $1
+          end
+          if @header_value =~ /filename="([^;]+)"/i
+            match = $1
+            start = (match.rindex("\\") || -1)+1
+            @part.filename = match[start...(match.length)]
+          end
+        elsif @header_field == 'content-type'
+          @part.mime = @header_value
+        end
+        @header_field = ''
+        @header_value = ''
+      end
+      @parser.on(:headers_end) do
+        @on_part.call(@part) unless @on_part.nil?
+      end
+      @parser.on(:part_data) do |b, start, the_end|
+        @part.emit_data b[start...the_end]
+      end
+      @parser.on(:part_end) do
+        @part.emit_end
+      end
+      @parser.on(:end) do
+        @ended = true
+        @on_end.call unless @on_end.nil?
+      end
+    end
+  end
+end

data/lifter.gemspec ADDED Viewed

@@ -0,0 +1,29 @@
+lib = File.expand_path("../lib", __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require "lifter/version"
+Gem::Specification.new do |gem|
+  gem.authors       = ["Michael Amundson"]
+  gem.email         = ["sumofparts@uh-oh.co"]
+  gem.description   = <<-DESCRIPTION.strip.gsub(/\s+/, " ")
+    A Ruby daemon for managing concurrent large file uploads independent of a web application.
+  DESCRIPTION
+  gem.summary       = "Painless file uploads"
+  gem.homepage      = "https://github.com/sumofparts/lifter"
+  gem.licenses      = ["MIT"]
+  gem.executables   = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
+  gem.files         = `git ls-files`.split("\n")
+  gem.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  gem.name          = "lifter"
+  gem.require_paths = ["lib"]
+  gem.version       = Lifter::VERSION
+  gem.add_runtime_dependency "http_parser.rb", "~> 0.6.0"
+  gem.add_runtime_dependency "http", "~> 0.9.8"
+  gem.add_runtime_dependency "eventmachine", "~> 1.0.8"
+  gem.add_development_dependency "bundler", "~> 1.0"
+end

data/test/test.html ADDED Viewed

@@ -0,0 +1,14 @@
+<html>
+  <head>
+    <title>Upload Test</title>
+  </head>
+  <body>
+    <form action="http://127.0.0.1:8080/upload" method="post" enctype="multipart/form-data">
+      <input type="hidden" name="upload[token]" value="xyzzy" />
+      <input type="file" name="upload[file1]" />
+      <input type="file" name="upload[file2]" />
+      <button type="submit">Upload!</button>
+    </form>
+  </body>
+</html>

data/test/test.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require 'lifter'
+server = Lifter::Server.new do |config|
+  # The host and port to listen on for this Forklift server. Typically, Forklift is backed by nginx
+  # or Apache, although it can directly listen to public network interfaces.
+  #
+  host '127.0.0.1'
+  port 8080
+  # A file system path to store in-progress uploads and completed uploads. What happens to uploads
+  # after they complete is outside of the scope of Forklift.
+  #
+  working_dir 'tmp/uploads'
+  # Define maximum size in bytes of a file upload. Files larger than this will be automatically
+  # removed, the connection closed, and no uploaded webhook will fire.
+  #
+  max_upload_size 500 * 1024 * 1024
+  # Specify desired digest type for file uploads. Passed in uploaded webhook after upload completes.
+  # Possible options: md5, sha1, sha256, sha512.
+  #
+  upload_hash_method :sha1
+  # Configure maximum number of bytes to pass along in authorize webhook.
+  #
+  upload_prologue_size 1024
+  # A request to this webhook is made once <prologue_limit> bytes have been received by the upload
+  # endpoint. The webhook request contains all of the original query params and headers of the
+  # upload request, the first <prologue_limit> of data, HTTP headers reflecting the request IP,
+  # file name, claimed file size, and file MIME type.
+  #
+  # In the event the webhook returns a non-200 response, the upload connection is terminated and
+  # all uploaded data is removed.
+  #
+  # In the event the upload is multipart, this endpoint will be called once for each file, as soon
+  # as the first <prologue_limit> bytes of data is received. Non-200 responses for one part will not
+  # remove data from other parts, although the connection will still be terminated.
+  #
+  authorize_webhook :post, 'http://127.0.0.1:8081/uploads/authorize'
+  # A request to this webhook is made once a single file upload completes. In the event the upload
+  # is multipart with multiple files, this endpoint will be called once for each file, upon
+  # completion of the file.
+  #
+  # An authorize webhook is always sent prior to sending this webhook.
+  #
+  completed_webhook :post, 'http://127.0.0.1:8081/uploads/ingest'
+end
+server.start