RubyGems - ruby-pipeline - Versions diffs - 1.0.0 - Mend

ruby-pipeline 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/bin/pipeline-client ADDED

@@ -0,0 +1,14 @@
+#! /usr/bin/ruby
+gem 'ruby-pipeline'
+require 'client'
+working_dir = ARGV[0] || "./"
+# let's get this party started!
+RubyPipeline::Client.start_client(working_dir)
+# start working
+RubyPipeline::Client.start_work
+DRb.thread.join

data/bin/pipeline-server ADDED

@@ -0,0 +1,30 @@
+#! /usr/bin/ruby
+gem 'ruby-pipeline'
+require 'server'
+# get the working directory
+working_dir = ARGV[0] || "./"
+# all systems go!
+RubyPipeline::Server.start_server(working_dir)
+# start all the jobs currently in our working directory
+RubyPipeline::Server.start_jobs
+# start a thread to watch for jobs added later on. this means "check the
+# working directory for new dirs"
+Thread.new do
+  loop do
+    sleep 60
+    RubyPipeline::Server.start_jobs
+  end
+end
+# start looking for finished jobs
+Thread.new do
+  RubyPipeline::Server.finish_jobs
+end
+# all done
+DRb.thread.join

data/lib/client.rb ADDED

@@ -0,0 +1,55 @@
+require 'rinda/ring'
+require 'rinda/tuplespace'
+require 'fileutils'
+require 'drb'
+require 'job'
+require 'client_env'
+module RubyPipeline
+  module Client
+    # all the work we need to do before we can get started
+    def self.start_client(working_dir)
+      # set up the working directory and remove its contents
+      @working_dir = working_dir
+      # XXX this line is a little too dangerous
+      #FileUtils.rm_r(Dir.glob("#{@working_dir}/**"))
+      DRb.start_service
+      begin
+        ring_server = Rinda::RingFinger.primary
+        @job_queue = ring_server.read( [:name, :Queue, nil, 'jobs'] )[2]
+        @finished_queue = ring_server.read( [:name, :Queue, nil, 'finished'] )[2]
+      rescue
+        puts "Couldn't connect to the server; is it started?"
+        puts $!
+        exit(-1)
+      end
+      # XXX it's not too hard to add a "universal" job that all clients run
+      # before starting real jobs. like binaries that are installed locally,
+      # or whatever.
+    end
+    # the main work loop
+    def self.start_work
+      while job = @job_queue.pop
+        # create the environment
+        env = ClientEnv.new(@working_dir, job)
+        # do the work
+        env.run
+        # post whatever we produced back to the server
+        job = Job.new("#{job.name}_finished", nil, env.results)
+        @finished_queue.push job
+        # all done!
+        env.cleanup
+      end
+    end
+  end
+end

data/lib/client_env.rb ADDED

@@ -0,0 +1,47 @@
+require 'job'
+# the client runs the server's code in the context of one of these objects.
+# this helps us isolate the server code (at least a little bit) from the rest
+# of the of pipeline environment, and later on we can use it to establish a
+# neat little environment for the client code
+module RubyPipeline
+  class ClientEnv
+    def initialize(working_dir, job)
+      # create this job's directory
+      @dir, @job = "#{working_dir}/#{job.name}", job
+      FileUtils.mkdir_p(@dir)
+      # pull down all the files
+      @job.files.each do |remote_file|
+        File.open(@dir + "/" + remote_file.filename, "w") do |dest|
+          remote_file.get { |f| dest << f }
+        end
+      end
+    end
+    # fork a new process, change the working directory, and run the code
+    def run
+      pid = Kernel.fork do
+        Dir.chdir(@dir)
+        self.instance_eval(@job.code)
+      end
+      Process.waitpid(pid)
+    end
+    # remove the contents of the job's scratch directory
+    def cleanup
+      FileUtils.rm(@job.files.collect { |f| @dir + "/" + f.filename })
+    end
+    def results
+      # all the files that we DIDNT add are considered results
+      theirs = @job.files.collect { |f| f.filename }
+      ours = Dir.glob("#{@dir}/*").delete_if do |f|
+        theirs.include?(File.basename(f))
+      end
+      ours
+    end
+  end
+end

data/lib/job.rb ADDED

@@ -0,0 +1,15 @@
+require 'remote_file'
+# this is what ends up in the queues
+module RubyPipeline
+  class Job
+    attr_accessor :code, :files, :name
+    # code is a string of ruby code that gets executed on the client. files is an
+    # array of filenames, each of which will be available in the working
+    # directory of the client when the client starts
+    def initialize(name, code, files = nil)
+      @name, @code, @files = name, code, files.collect { |f| RemoteFile.new(f) }
+    end
+  end
+end

data/lib/remote_file.rb ADDED

@@ -0,0 +1,29 @@
+require 'drb'
+# drb files are hard, so we wrap it all up in a class. this is how we get
+# big files to the clients without having an existing distributed file
+# system in place
+module RubyPipeline
+  class RemoteFile
+    include DRbUndumped
+    def initialize(filename)
+      @filename = filename
+    end
+    # we flatten the filesystem out, maybe we shouldnt
+    def filename
+      File.basename(@filename)
+    end
+    # returns the bytes of the file, buf_size at a time. dave, thanks for
+    # the buffer size suggestion; that just about quadrupled performance
+    def get(buf_size=8192)
+      File.open(@filename) do |f|
+        while buf = f.read(buf_size)
+          yield buf
+        end
+      end
+    end
+  end
+end

data/lib/server.rb ADDED

@@ -0,0 +1,77 @@
+require 'set'
+require 'rinda/ring'
+require 'rinda/tuplespace'
+require 'drb'
+require 'job'
+module RubyPipeline
+  module Server
+    # start a rinda server, create the two queues,
+    # host them both, and host whatever universal
+    # files are provided
+    def self.start_server(working_dir)
+      @working_dir = working_dir
+      DRb.start_service
+      # XXX we assume no other ringservers are running on the network
+      Rinda::RingServer.new Rinda::TupleSpace.new
+      @job_queue = Queue.new
+      @finished_queue = Queue.new
+      # whoops! you MUST provide drbobjects for the object to be synchronized between
+      # hosts. otherwise it just gets marshalled and unmarshalled.
+      Rinda::RingProvider.new(:Queue,
+                              DRbObject.new(@job_queue), 'jobs').provide
+      Rinda::RingProvider.new(:Queue,
+                              DRbObject.new(@finished_queue), 'finished').provide
+      # used for keeping track of which jobs we've already started
+      @started_jobs = Set.new
+      @code = File.read("#{@working_dir}/config/client_code.rb")
+      # this is where we'd set up the job for the universal files
+    end
+    # start any new jobs currently sitting in the working
+    # directory
+    def self.start_jobs
+      Dir.glob("#{@working_dir}/jobs/*") do |path|
+        # notice that jobs must have a unique directory name to be considered
+        # unique jobs
+        if File.directory?(path) && @started_jobs.add?(path)
+          job_name = File.basename path
+          job = Job.new(job_name, @code,
+                        Dir.glob("#{@working_dir}/jobs/#{job_name}/**"))
+          @job_queue.enq(job)
+        end
+      end
+    end
+    # looks for finished jobs and downloads the results
+    def self.finish_jobs
+      while finished = @finished_queue.pop
+        # set up the directory for the results
+        dir = "#{@working_dir}finished/#{finished.name}"
+        # XXX i have no idea why this doesnt work, but it causes ruby
+        # to hang right here when we try to use fileutils, so we just
+        # use the shell. we officially have a unix dependency! there are
+        # similar calls all over the place, so im totally mystified
+        #FileUtils.mkdir_p(dir)
+        `mkdir -p #{dir}`
+        # grab all the files for the finished job
+        finished.files.each do |remote_file|
+          File.open("#{dir}/#{remote_file.filename}", "w") do |dest|
+            remote_file.get { |f| dest << f }
+          end
+        end
+      end
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,53 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.9.4
+specification_version: 1
+name: ruby-pipeline
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+date: 2007-09-15 00:00:00 -04:00
+summary: A simple batch-process management system for small clusters
+require_paths:
+- lib
+email: amckinle@andrew.cmu.edu
+homepage: http://ruby-pipeline.rubyforge.org
+rubyforge_project:
+description:
+autorequire:
+default_executable:
+bindir: bin
+has_rdoc: false
+required_ruby_version: !ruby/object:Gem::Version::Requirement
+  requirements:
+  - - ">"
+    - !ruby/object:Gem::Version
+      version: 0.0.0
+  version:
+platform: ruby
+signing_key:
+cert_chain:
+post_install_message:
+authors:
+- Austin McKinley
+files:
+- bin/pipeline-client
+- bin/pipeline-server
+- lib/server.rb
+- lib/client_env.rb
+- lib/job.rb
+- lib/client.rb
+- lib/remote_file.rb
+test_files: []
+rdoc_options: []
+extra_rdoc_files: []
+executables:
+- pipeline-client
+- pipeline-server
+extensions: []
+requirements: []
+dependencies: []