RubyGems - ruby-pipeline - Versions diffs - 1.0.0 - Mend

ruby-pipeline 1.0.0

Files changed (8) hide show

@@ -0,0 +1,14 @@
+#! /usr/bin/ruby
+gem 'ruby-pipeline'
+require 'client'
+working_dir = ARGV[0] || "./"
+# let's get this party started!
+RubyPipeline::Client.start_client(working_dir)
+# start working
+RubyPipeline::Client.start_work
+DRb.thread.join

data/bin/pipeline-server ADDED

@@ -0,0 +1,30 @@
+#! /usr/bin/ruby
+gem 'ruby-pipeline'
+require 'server'
+# get the working directory
+working_dir = ARGV[0] || "./"
+# all systems go!
+RubyPipeline::Server.start_server(working_dir)
+# start all the jobs currently in our working directory
+RubyPipeline::Server.start_jobs
+# start a thread to watch for jobs added later on. this means "check the
+# working directory for new dirs"
+Thread.new do
+  loop do
+    sleep 60
+    RubyPipeline::Server.start_jobs
+  end
+end
+# start looking for finished jobs
+Thread.new do
+  RubyPipeline::Server.finish_jobs
+end
+# all done
+DRb.thread.join

data/lib/client.rb ADDED

@@ -0,0 +1,55 @@
+require 'rinda/ring'
+require 'rinda/tuplespace'
+require 'fileutils'
+require 'drb'
+require 'job'
+require 'client_env'
+module RubyPipeline
+  module Client
+    # all the work we need to do before we can get started
+    def self.start_client(working_dir)
+      # set up the working directory and remove its contents
+      @working_dir = working_dir
+      # XXX this line is a little too dangerous
+      #FileUtils.rm_r(Dir.glob("#{@working_dir}/**"))
+      DRb.start_service
+      begin
+        ring_server = Rinda::RingFinger.primary
+        @job_queue = ring_server.read( [:name, :Queue, nil, 'jobs'] )[2]
+        @finished_queue = ring_server.read( [:name, :Queue, nil, 'finished'] )[2]
+      rescue
+        puts "Couldn't connect to the server; is it started?"
+        puts $!
+        exit(-1)
+      end
+      # XXX it's not too hard to add a "universal" job that all clients run
+      # before starting real jobs. like binaries that are installed locally,
+      # or whatever.
+    end
+    # the main work loop
+    def self.start_work
+      while job = @job_queue.pop
+        # create the environment
+        env = ClientEnv.new(@working_dir, job)
+        # do the work
+        env.run
+        # post whatever we produced back to the server
+        job = Job.new("#{job.name}_finished", nil, env.results)
+        @finished_queue.push job
+        # all done!
+        env.cleanup
+      end
+    end
+  end
+end

data/lib/client_env.rb ADDED

@@ -0,0 +1,47 @@
+require 'job'
+# the client runs the server's code in the context of one of these objects.
+# this helps us isolate the server code (at least a little bit) from the rest
+# of the of pipeline environment, and later on we can use it to establish a
+# neat little environment for the client code
+module RubyPipeline
+  class ClientEnv
+    def initialize(working_dir, job)
+      # create this job's directory
+      @dir, @job = "#{working_dir}/#{job.name}", job
+      FileUtils.mkdir_p(@dir)
+      # pull down all the files
+      @job.files.each do |remote_file|
+        File.open(@dir + "/" + remote_file.filename, "w") do |dest|
+          remote_file.get { |f| dest << f }
+        end
+      end
+    end
+    # fork a new process, change the working directory, and run the code
+    def run
+      pid = Kernel.fork do
+        Dir.chdir(@dir)
+        self.instance_eval(@job.code)
+      end
+      Process.waitpid(pid)
+    end
+    # remove the contents of the job's scratch directory
+    def cleanup
+      FileUtils.rm(@job.files.collect { |f| @dir + "/" + f.filename })
+    end
+    def results
+      # all the files that we DIDNT add are considered results
+      theirs = @job.files.collect { |f| f.filename }
+      ours = Dir.glob("#{@dir}/*").delete_if do |f|
+        theirs.include?(File.basename(f))
+      end
+      ours
+    end
+  end
+end

data/lib/job.rb ADDED

@@ -0,0 +1,15 @@
+require 'remote_file'
+# this is what ends up in the queues
+module RubyPipeline
+  class Job
+    attr_accessor :code, :files, :name
+    # code is a string of ruby code that gets executed on the client. files is an
+    # array of filenames, each of which will be available in the working
+    # directory of the client when the client starts
+    def initialize(name, code, files = nil)
+      @name, @code, @files = name, code, files.collect { |f| RemoteFile.new(f) }
+    end
+  end
+end

data/lib/remote_file.rb ADDED

@@ -0,0 +1,29 @@
+require 'drb'
+# drb files are hard, so we wrap it all up in a class. this is how we get
+# big files to the clients without having an existing distributed file
+# system in place
+module RubyPipeline
+  class RemoteFile
+    include DRbUndumped
+    def initialize(filename)
+      @filename = filename
+    end
+    # we flatten the filesystem out, maybe we shouldnt
+    def filename
+      File.basename(@filename)
+    end
+    # returns the bytes of the file, buf_size at a time. dave, thanks for
+    # the buffer size suggestion; that just about quadrupled performance
+    def get(buf_size=8192)
+      File.open(@filename) do |f|
+        while buf = f.read(buf_size)
+          yield buf
+        end
+      end
+    end
+  end
+end

data/lib/server.rb ADDED

@@ -0,0 +1,77 @@
+require 'set'
+require 'rinda/ring'
+require 'rinda/tuplespace'
+require 'drb'
+require 'job'
+module RubyPipeline
+  module Server
+    # start a rinda server, create the two queues,
+    # host them both, and host whatever universal
+    # files are provided
+    def self.start_server(working_dir)
+      @working_dir = working_dir
+      DRb.start_service
+      # XXX we assume no other ringservers are running on the network
+      Rinda::RingServer.new Rinda::TupleSpace.new
+      @job_queue = Queue.new
+      @finished_queue = Queue.new
+      # whoops! you MUST provide drbobjects for the object to be synchronized between
+      # hosts. otherwise it just gets marshalled and unmarshalled.
+      Rinda::RingProvider.new(:Queue,
+                              DRbObject.new(@job_queue), 'jobs').provide
+      Rinda::RingProvider.new(:Queue,
+                              DRbObject.new(@finished_queue), 'finished').provide
+      # used for keeping track of which jobs we've already started
+      @started_jobs = Set.new
+      @code = File.read("#{@working_dir}/config/client_code.rb")
+      # this is where we'd set up the job for the universal files
+    end
+    # start any new jobs currently sitting in the working
+    # directory
+    def self.start_jobs
+      Dir.glob("#{@working_dir}/jobs/*") do |path|
+        # notice that jobs must have a unique directory name to be considered
+        # unique jobs
+        if File.directory?(path) && @started_jobs.add?(path)
+          job_name = File.basename path
+          job = Job.new(job_name, @code,
+                        Dir.glob("#{@working_dir}/jobs/#{job_name}/**"))
+          @job_queue.enq(job)
+        end
+      end
+    end
+    # looks for finished jobs and downloads the results
+    def self.finish_jobs
+      while finished = @finished_queue.pop
+        # set up the directory for the results
+        dir = "#{@working_dir}finished/#{finished.name}"
+        # XXX i have no idea why this doesnt work, but it causes ruby
+        # to hang right here when we try to use fileutils, so we just
+        # use the shell. we officially have a unix dependency! there are
+        # similar calls all over the place, so im totally mystified
+        #FileUtils.mkdir_p(dir)
+        `mkdir -p #{dir}`
+        # grab all the files for the finished job
+        finished.files.each do |remote_file|
+          File.open("#{dir}/#{remote_file.filename}", "w") do |dest|
+            remote_file.get { |f| dest << f }
+          end
+        end
+      end
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,53 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.9.4
+specification_version: 1
+name: ruby-pipeline
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+date: 2007-09-15 00:00:00 -04:00
+summary: A simple batch-process management system for small clusters
+require_paths:
+- lib
+email: amckinle@andrew.cmu.edu
+homepage: http://ruby-pipeline.rubyforge.org
+rubyforge_project:
+description:
+autorequire:
+default_executable:
+bindir: bin
+has_rdoc: false
+required_ruby_version: !ruby/object:Gem::Version::Requirement
+  requirements:
+  - - ">"
+    - !ruby/object:Gem::Version
+      version: 0.0.0
+  version:
+platform: ruby
+signing_key:
+cert_chain:
+post_install_message:
+authors:
+- Austin McKinley
+files:
+- bin/pipeline-client
+- bin/pipeline-server
+- lib/server.rb
+- lib/client_env.rb
+- lib/job.rb
+- lib/client.rb
+- lib/remote_file.rb
+test_files: []
+rdoc_options: []
+extra_rdoc_files: []
+executables:
+- pipeline-client
+- pipeline-server
+extensions: []
+requirements: []
+dependencies: []