RubyGems - pipes - Versions diffs - 0.1.0 - Mend

pipes 0.1.0

Files changed (24) hide show

data/.gitignore +24 -0
data/.rspec +1 -0
data/.rvmrc +1 -0
data/Gemfile +3 -0
data/LICENSE.txt +22 -0
data/README.md +331 -0
data/Rakefile +8 -0
data/lib/pipes.rb +46 -0
data/lib/pipes/resque_hooks.rb +18 -0
data/lib/pipes/runner.rb +112 -0
data/lib/pipes/stage_parser.rb +152 -0
data/lib/pipes/store.rb +122 -0
data/lib/pipes/utils.rb +7 -0
data/lib/pipes/version.rb +3 -0
data/pipes.gemspec +24 -0
data/spec/mock_jobs.rb +58 -0
data/spec/pipes/resque_hooks_spec.rb +22 -0
data/spec/pipes/runner_spec.rb +110 -0
data/spec/pipes/stage_parser_spec.rb +169 -0
data/spec/pipes/store_spec.rb +181 -0
data/spec/pipes/utils_spec.rb +14 -0
data/spec/pipes_spec.rb +46 -0
data/spec/spec_helper.rb +13 -0
metadata +140 -0

@@ -0,0 +1,112 @@
+module Pipes
+  # This is the entry point to running jobs.
+  #
+  # In most cases, this is the sole API used to start up some jobs and run
+  # a series of stages (a pipe).
+  #
+  class Runner
+    # Entry point to begin running jobs.
+    #
+    # eg, Pipes::Runner.run(MyStrategies::ContentWriter)
+    #       ie, You want to run a single job from somewhere in the app.
+    #     Pipes::Runner.run('MyStrategies::ContentWriter')
+    #       ie, Params were passed for a single job
+    #     Pipes::Runner.run([MyStrategies::ContentWriter, YourStrategies::Publisher])
+    #       ie, You want to run multiple jobs from somewhere in the app.
+    #     Pipes::Runner.run(['MyStrategies::ContentWriter', 'YourStrategies::Publisher'])
+    #       ie, Params were passed for multiple jobs
+    #     Pipes::Runner.run(:content_writers)
+    #       ie, You want to run an entire stage
+    #     Pipes::Runner.run([:content_writers, :publishers])
+    #       ie, You want to run multiple stages
+    #
+    def self.run(jobs, *args)
+      options = args.last.is_a?(Hash) ? args.pop : {}
+      self.new(jobs, args, options)
+    end
+    # Set up the runner.
+    #
+    def initialize(jobs, job_args, options)
+      @job_args, @options = job_args, options
+      @requested = normalize_jobs(jobs)
+      # Resolve if the option has been explicitly passed or it's specified in the config.
+      if @options[:resolve] or (@options[:resolve] != false and Pipes.resolve)
+        @requested = include_dependencies(@requested)
+      end
+      Store.add_pipe(construct_pipe, options)
+    end
+    private
+    # Normalize requested jobs into an array of classes.
+    #
+    def normalize_jobs(jobs)
+      if jobs.is_a?(Array)
+        jobs.map { |job| normalize_job(job) }
+      else
+        [normalize_job(jobs)]
+      end.flatten
+    end
+    # Normalize requested job, based on type requested
+    #
+    def normalize_job(job)
+      if job.is_a?(String)
+        Utils.constantize(job)
+      elsif job.is_a?(Symbol)
+        stage_parser.jobs_in_stage(job)
+      else
+        job
+      end
+    end
+    # Given a list of jobs, include dependencies of those jobs in
+    # the returned array.
+    #
+    def include_dependencies(jobs)
+      jobs.inject([]) do |resolved, job|
+        resolved << [job] + stage_parser.dependents_for(job)
+      end.flatten
+    end
+    # Filter jobs by only the ones being requested and build out the pipe
+    # array, including options.
+    #
+    def construct_pipe
+      # Of all the stages listed in the config...
+      stages.inject([]) do |filtered_stages, (stage_name, jobs)|
+        filtered = filtered_jobs(stage_parser.jobs_in_stage(stage_name))
+        # Add it unless all jobs have been filtered out
+        if !filtered.empty?
+          filtered_stages << {name: stage_name, jobs: filtered}
+        else; filtered_stages; end
+      end
+    end
+    # Construct an array of jobs that have been requested.
+    #
+    def filtered_jobs(jobs)
+      jobs.inject([]) do |filtered_jobs, registered_job|
+        # Is the configured job being requested for this pipe?
+        if @requested.include?(registered_job)
+          filtered_jobs << {class: registered_job, args: @job_args}
+        else; filtered_jobs; end
+      end
+    end
+    def stage_parser
+      @parser ||= StageParser.new
+    end
+    def stages
+      @stages ||= stage_parser.stages_with_resolved_dependencies
+    end
+  end
+end

data/lib/pipes/stage_parser.rb ADDED

@@ -0,0 +1,152 @@
+require 'abyss'
+module Pipes
+  class StageParser
+    def initialize(stages = nil)
+      @stages = stages || Abyss.configuration.stages.configurations
+      resolve_dependencies
+    end
+    # Grab all stage names.
+    #
+    def stage_names
+      @stages.keys
+    end
+    # Grab all jobs for the given stage.
+    #
+    def jobs_in_stage(stage)
+      array_for_stage(@stages[stage])
+    end
+    # Recursively grab dependencies for a given job.
+    #
+    def dependents_for(job)
+      if !@dependencies[job] or @dependencies[job].empty?
+        []
+      else
+        recursive_dependencies = @dependencies[job].map{ |strat| dependents_for(strat) }
+        (@dependencies[job] + recursive_dependencies).flatten.uniq
+      end
+    end
+    # Normalize configured stages so they have a consistent form.
+    #
+    # This will return a structure exactly the same as that defined in the config,
+    # except, all the "magic" dependencies (symbols to other stages, references
+    # to classes, and arrays of both) have been replaced with the name of the actual
+    # dependency, ie the class.
+    #
+    # Further, each job has been converted to a hash, with the job as the
+    # key and the dependencies as the the values.
+    #
+    # This data is normalized so that it can be used within the interface, and what
+    # to do about the dependencies is up to the implementation.
+    #
+    def stages_with_resolved_dependencies
+      # Looping over all stages...
+      @stages.inject({}) do |resolved_stages, (name, stage)|
+        # Looping over all jobs...
+        resolved_stages[name] = stage.inject([]) do |resolved_stage, job|
+          job = job.keys[0] if job.is_a? Hash
+          # Normalze to new hash form
+          resolved_stage << {job => @dependencies[job]}
+        end
+        resolved_stages
+      end
+    end
+    private
+    # Populates @dependencies hash in the form of:
+    # {
+    #   SomeClass => [OtherClass, AnotherClass],
+    #   ...
+    # }
+    #
+    # Loop over and resolve dependencies on a job-by-job basis.
+    #
+    # Work from the bottom up since dependencies can only be specified for
+    # lower-priority stages (ie lower stages won't reference higher ones)
+    #
+    def resolve_dependencies
+      @dependencies = {}
+      reversed = Hash[@stages.to_a.reverse]
+      reversed.each do |name, stage|
+        stage.each do |job|
+          # Does the job have dependents?
+          if job.is_a? Hash
+            strat, dependents = job.to_a.first
+            @dependencies[strat] = dependencies_for_job(dependents)
+          else
+            # Defined job is a simple class (eg Publisher)
+            @dependencies[job] = []
+          end
+        end
+      end
+    end
+    # If the job has dependents, figure out how to resolve.
+    #
+    def dependencies_for_job(dependents)
+      if dependents.is_a? Symbol
+        # Referring to another stage (eg :publishers)
+        dependents_for_stage(dependents)
+      elsif dependents.is_a? Array
+        # Referring to an array of dependencies (eg [:publishers, Publisher2])
+        dependencies_from_array(dependents)
+      else
+        # Referring to another job (eg Publisher1)
+        [dependents] + dependents_for(dependents)
+      end
+    end
+    # Iterate over all jobs for this stage and find dependents.
+    #
+    def dependents_for_stage(stage_name)
+      stage = @stages[stage_name.to_sym]
+      stage.inject([]) do |klasses, job|
+        # Does the job have dependents?
+        if job.is_a? Hash
+          strat, dependents = job.to_a.first
+          klasses << strat
+          klasses << dependencies_for_job(dependents)
+        else
+          # Defined job is a simple class (eg Publisher)
+          klasses << [job] + dependents_for(job)
+        end
+      end.flatten.uniq
+    end
+    # When dependencies are defined as an array, loop over the array and resolve.
+    #
+    def dependencies_from_array(dependents)
+      # Referring to an array of dependents
+      # Can be a mixed array (eg [:publishers, Publisher2])
+      dependents.inject([]) do |klasses, dependent|
+        if dependent.is_a? Symbol
+          # Referring to an array of stages (eg [:publishers, :emailers])
+          klasses << dependents_for_stage(dependent)
+        else
+          # Referring to an array of jobs (eg [Publisher1, Publisher2])
+          klasses << [dependent] + dependents_for(dependent)
+        end
+      end.flatten.uniq
+    end
+    # Just list the jobs in the stage, ignoring dependencies.
+    #
+    def array_for_stage(stage)
+      stage.inject([]) do |arr, job|
+        arr << if job.is_a? Hash
+          # Take just the job class, without any dependents
+          job.keys[0]
+        else
+          job
+        end
+      end
+    end
+  end
+end

data/lib/pipes/store.rb ADDED

@@ -0,0 +1,122 @@
+require 'redis/objects'
+require 'redis/list'
+require 'redis/counter'
+module Pipes
+  # Stages are stored in Redis in the following manner:
+  # pipes:stages:stage_1 [{class: 'ContentWriterStrategy', args: ['en-US']}, ...]
+  # pipes:stages:stage_2 [{class: 'PublisherStrategy', args: ['en-US']}]
+  #
+  # The jobs stored in Redis are Marshalled Ruby objects, so the structure is
+  # more-or-less arbitrary, though at a performance cost.
+  #
+  # Jobs are queued up in the following steps
+  #   1. Strategies in stage n? No, look in stage n+1 until last stage.
+  #                             Yes, shift off the next stage and queue up its jobs
+  #   2. Strategies run concurrently. Keep track of how many are currently running to
+  #      know when the next stage should be started.
+  #
+  class Store
+    # Add a new set of stages to Redis.
+    #
+    def self.add_pipe(stages, options = {})
+      stages.each do |stage|
+        stage[:jobs].each do |job|
+          pending = pending_jobs(stage[:name])
+          pending << job if valid_for_queue?(stage[:name], pending, job, options)
+        end
+      end
+      next_stage
+    end
+    # Fire off the next available stage, if available.
+    #
+    def self.next_stage
+      return unless remaining_jobs == 0
+      # Always start at the first stage, in case new stragies have been added mid-pipe
+      stages.each do |stage|
+        if !(jobs = pending_jobs(stage)).empty?
+          run_stage(jobs)
+          clear(stage)
+          return
+        end
+      end
+    end
+    # Actually enqueue the jobs.
+    #
+    def self.run_stage(jobs)
+      remaining_jobs.clear
+      remaining_jobs.incr(jobs.count)
+      jobs.each do |job|
+        Resque.enqueue(job[:class], *job[:args])
+      end
+    end
+    # Register that a job has finished.
+    #
+    def self.done
+      if remaining_jobs.decrement == 0
+        next_stage
+      end
+    end
+    # Clear a specific stage queue.
+    #
+    def self.clear(stage)
+      pending_jobs(stage).clear
+    end
+    # Find all stage queues in Redis (even ones not configured), and clear them.
+    #
+    def self.clear_all
+      stage_keys = Redis.current.keys "#{@redis_stages_key}:*"
+      Redis.current.del *stage_keys unless stage_keys.empty?
+      remaining_jobs.clear
+    end
+    private
+    def self.valid_for_queue?(stage, pending, job, options)
+      # allow_duplicates checks just the class for duplication
+      if options[:allow_duplicates] and !Array(options[:allow_duplicates]).include?(stage)
+        pending_classes = pending.map { |job| job[:class] }
+        return false if pending_classes.include?(job[:class])
+      end
+      # Is this exact job already queued up?
+      !pending.include?(job)
+    end
+    def self.stages
+      StageParser.new.stage_names
+    end
+    def self.stage_key(name)
+      "#{@redis_stages_key}:#{name}"
+    end
+    def self.pending_jobs(stage)
+      Redis::List.new(stage_key(stage), marshal: true)
+    end
+    def self.remaining_jobs
+      @remaining_jobs ||= Redis::Counter.new(@redis_remaining_key)
+    end
+    def self.namespace
+      "#{Pipes.namespace + ':' if Pipes.namespace}#{@namespace}"
+    end
+    @namespace           = 'pipes'
+    # All pending stages for the current job
+    @redis_stages_key    = "#{namespace}:stages"
+    # Remaining jobs to call .done, ie jobs still in the workers
+    @redis_remaining_key = "#{namespace}:stage_remaining"
+  end
+end

data/lib/pipes/utils.rb ADDED

@@ -0,0 +1,7 @@
+module Pipes
+  module Utils
+    def self.constantize(str)
+      str.split('::').reject(&:empty?).inject(Kernel) { |const, name| const.const_get(name) }
+    end
+  end
+end

data/lib/pipes/version.rb ADDED

@@ -0,0 +1,3 @@
+module Pipes
+  VERSION = '0.1.0'
+end

data/pipes.gemspec ADDED

@@ -0,0 +1,24 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'pipes/version'
+Gem::Specification.new do |gem|
+  gem.name          = "pipes"
+  gem.version       = Pipes::VERSION
+  gem.authors       = ["Mike Pack"]
+  gem.email         = ["mikepackdev@gmail.com"]
+  gem.description   = %q{A Redis-backed concurrency management system}
+  gem.summary       = %q{A Redis-backed concurrency management system}
+  gem.homepage      = "http://www.github.com/mikepack/pipes"
+  gem.files         = `git ls-files`.split($/)
+  gem.test_files    = gem.files.grep(%r{^(spec)/})
+  gem.require_paths = ["lib"]
+  gem.add_dependency 'resque', '~> 1.22.0'
+  gem.add_dependency 'redis-objects', '~> 0.5.3'
+  gem.add_dependency 'abyss', '~> 0.4.0'
+  gem.add_development_dependency 'rspec'
+end

data/spec/mock_jobs.rb ADDED

@@ -0,0 +1,58 @@
+# These are Resque jobs
+module Writers
+  class ContentWriter
+    @queue = :content_writers
+    def self.perform(locale)
+      sleep 5
+    end
+  end
+  class AnotherContentWriter
+    @queue = :content_writers
+    def self.perform(locale)
+      sleep 5
+    end
+  end
+  class UnregisteredStrategy
+    @queue = :content_writers
+    def self.perform; end
+  end
+end
+module Publishers
+  class Publisher
+    @queue = :publishers
+    def self.perform(locale)
+      sleep 5
+    end
+  end
+end
+module Messengers
+  class SMS
+    def self.perform; end
+  end
+end
+module Uploaders
+  class Rsync
+    def self.perform; end
+  end
+end
+module Emailers
+  class Email
+    def self.perform; end
+  end
+  class AnotherEmail
+    def self.perform; end
+  end
+end
+module Notifiers
+  class Twitter
+    def self.perform; end
+  end
+end