ruby-pipeline 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ #! /usr/bin/ruby
2
+
3
+ gem 'ruby-pipeline'
4
+ require 'client'
5
+
6
+ working_dir = ARGV[0] || "./"
7
+
8
+ # let's get this party started!
9
+ RubyPipeline::Client.start_client(working_dir)
10
+
11
+ # start working
12
+ RubyPipeline::Client.start_work
13
+
14
+ DRb.thread.join
@@ -0,0 +1,30 @@
1
+ #! /usr/bin/ruby
2
+
3
+ gem 'ruby-pipeline'
4
+ require 'server'
5
+
6
+ # get the working directory
7
+ working_dir = ARGV[0] || "./"
8
+
9
+ # all systems go!
10
+ RubyPipeline::Server.start_server(working_dir)
11
+
12
+ # start all the jobs currently in our working directory
13
+ RubyPipeline::Server.start_jobs
14
+
15
+ # start a thread to watch for jobs added later on. this means "check the
16
+ # working directory for new dirs"
17
+ Thread.new do
18
+ loop do
19
+ sleep 60
20
+ RubyPipeline::Server.start_jobs
21
+ end
22
+ end
23
+
24
+ # start looking for finished jobs
25
+ Thread.new do
26
+ RubyPipeline::Server.finish_jobs
27
+ end
28
+
29
+ # all done
30
+ DRb.thread.join
@@ -0,0 +1,55 @@
1
+ require 'rinda/ring'
2
+ require 'rinda/tuplespace'
3
+ require 'fileutils'
4
+ require 'drb'
5
+
6
+ require 'job'
7
+ require 'client_env'
8
+
9
+ module RubyPipeline
10
+
11
+ module Client
12
+ # all the work we need to do before we can get started
13
+ def self.start_client(working_dir)
14
+ # set up the working directory and remove its contents
15
+ @working_dir = working_dir
16
+
17
+ # XXX this line is a little too dangerous
18
+ #FileUtils.rm_r(Dir.glob("#{@working_dir}/**"))
19
+
20
+ DRb.start_service
21
+
22
+ begin
23
+ ring_server = Rinda::RingFinger.primary
24
+ @job_queue = ring_server.read( [:name, :Queue, nil, 'jobs'] )[2]
25
+ @finished_queue = ring_server.read( [:name, :Queue, nil, 'finished'] )[2]
26
+ rescue
27
+ puts "Couldn't connect to the server; is it started?"
28
+ puts $!
29
+ exit(-1)
30
+ end
31
+
32
+ # XXX it's not too hard to add a "universal" job that all clients run
33
+ # before starting real jobs. like binaries that are installed locally,
34
+ # or whatever.
35
+ end
36
+
37
+ # the main work loop
38
+ def self.start_work
39
+ while job = @job_queue.pop
40
+ # create the environment
41
+ env = ClientEnv.new(@working_dir, job)
42
+
43
+ # do the work
44
+ env.run
45
+
46
+ # post whatever we produced back to the server
47
+ job = Job.new("#{job.name}_finished", nil, env.results)
48
+ @finished_queue.push job
49
+
50
+ # all done!
51
+ env.cleanup
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,47 @@
1
+ require 'job'
2
+
3
+ # the client runs the server's code in the context of one of these objects.
4
+ # this helps us isolate the server code (at least a little bit) from the rest
5
+ # of the of pipeline environment, and later on we can use it to establish a
6
+ # neat little environment for the client code
7
+ module RubyPipeline
8
+ class ClientEnv
9
+ def initialize(working_dir, job)
10
+ # create this job's directory
11
+ @dir, @job = "#{working_dir}/#{job.name}", job
12
+ FileUtils.mkdir_p(@dir)
13
+
14
+ # pull down all the files
15
+ @job.files.each do |remote_file|
16
+ File.open(@dir + "/" + remote_file.filename, "w") do |dest|
17
+ remote_file.get { |f| dest << f }
18
+ end
19
+ end
20
+ end
21
+
22
+ # fork a new process, change the working directory, and run the code
23
+ def run
24
+ pid = Kernel.fork do
25
+ Dir.chdir(@dir)
26
+ self.instance_eval(@job.code)
27
+ end
28
+
29
+ Process.waitpid(pid)
30
+ end
31
+
32
+ # remove the contents of the job's scratch directory
33
+ def cleanup
34
+ FileUtils.rm(@job.files.collect { |f| @dir + "/" + f.filename })
35
+ end
36
+
37
+ def results
38
+ # all the files that we DIDNT add are considered results
39
+ theirs = @job.files.collect { |f| f.filename }
40
+ ours = Dir.glob("#{@dir}/*").delete_if do |f|
41
+ theirs.include?(File.basename(f))
42
+ end
43
+
44
+ ours
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,15 @@
1
+ require 'remote_file'
2
+
3
+ # this is what ends up in the queues
4
+ module RubyPipeline
5
+ class Job
6
+ attr_accessor :code, :files, :name
7
+
8
+ # code is a string of ruby code that gets executed on the client. files is an
9
+ # array of filenames, each of which will be available in the working
10
+ # directory of the client when the client starts
11
+ def initialize(name, code, files = nil)
12
+ @name, @code, @files = name, code, files.collect { |f| RemoteFile.new(f) }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ require 'drb'
2
+
3
+ # drb files are hard, so we wrap it all up in a class. this is how we get
4
+ # big files to the clients without having an existing distributed file
5
+ # system in place
6
+ module RubyPipeline
7
+ class RemoteFile
8
+ include DRbUndumped
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ end
13
+
14
+ # we flatten the filesystem out, maybe we shouldnt
15
+ def filename
16
+ File.basename(@filename)
17
+ end
18
+
19
+ # returns the bytes of the file, buf_size at a time. dave, thanks for
20
+ # the buffer size suggestion; that just about quadrupled performance
21
+ def get(buf_size=8192)
22
+ File.open(@filename) do |f|
23
+ while buf = f.read(buf_size)
24
+ yield buf
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,77 @@
1
+ require 'set'
2
+ require 'rinda/ring'
3
+ require 'rinda/tuplespace'
4
+ require 'drb'
5
+
6
+ require 'job'
7
+
8
+ module RubyPipeline
9
+
10
+ module Server
11
+ # start a rinda server, create the two queues,
12
+ # host them both, and host whatever universal
13
+ # files are provided
14
+ def self.start_server(working_dir)
15
+ @working_dir = working_dir
16
+
17
+ DRb.start_service
18
+
19
+ # XXX we assume no other ringservers are running on the network
20
+ Rinda::RingServer.new Rinda::TupleSpace.new
21
+
22
+ @job_queue = Queue.new
23
+ @finished_queue = Queue.new
24
+
25
+ # whoops! you MUST provide drbobjects for the object to be synchronized between
26
+ # hosts. otherwise it just gets marshalled and unmarshalled.
27
+ Rinda::RingProvider.new(:Queue,
28
+ DRbObject.new(@job_queue), 'jobs').provide
29
+ Rinda::RingProvider.new(:Queue,
30
+ DRbObject.new(@finished_queue), 'finished').provide
31
+
32
+ # used for keeping track of which jobs we've already started
33
+ @started_jobs = Set.new
34
+ @code = File.read("#{@working_dir}/config/client_code.rb")
35
+
36
+ # this is where we'd set up the job for the universal files
37
+ end
38
+
39
+ # start any new jobs currently sitting in the working
40
+ # directory
41
+ def self.start_jobs
42
+ Dir.glob("#{@working_dir}/jobs/*") do |path|
43
+
44
+ # notice that jobs must have a unique directory name to be considered
45
+ # unique jobs
46
+ if File.directory?(path) && @started_jobs.add?(path)
47
+ job_name = File.basename path
48
+ job = Job.new(job_name, @code,
49
+ Dir.glob("#{@working_dir}/jobs/#{job_name}/**"))
50
+ @job_queue.enq(job)
51
+ end
52
+ end
53
+ end
54
+
55
+ # looks for finished jobs and downloads the results
56
+ def self.finish_jobs
57
+ while finished = @finished_queue.pop
58
+ # set up the directory for the results
59
+ dir = "#{@working_dir}finished/#{finished.name}"
60
+
61
+ # XXX i have no idea why this doesnt work, but it causes ruby
62
+ # to hang right here when we try to use fileutils, so we just
63
+ # use the shell. we officially have a unix dependency! there are
64
+ # similar calls all over the place, so im totally mystified
65
+ #FileUtils.mkdir_p(dir)
66
+ `mkdir -p #{dir}`
67
+
68
+ # grab all the files for the finished job
69
+ finished.files.each do |remote_file|
70
+ File.open("#{dir}/#{remote_file.filename}", "w") do |dest|
71
+ remote_file.get { |f| dest << f }
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: ruby-pipeline
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2007-09-15 00:00:00 -04:00
8
+ summary: A simple batch-process management system for small clusters
9
+ require_paths:
10
+ - lib
11
+ email: amckinle@andrew.cmu.edu
12
+ homepage: http://ruby-pipeline.rubyforge.org
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Austin McKinley
31
+ files:
32
+ - bin/pipeline-client
33
+ - bin/pipeline-server
34
+ - lib/server.rb
35
+ - lib/client_env.rb
36
+ - lib/job.rb
37
+ - lib/client.rb
38
+ - lib/remote_file.rb
39
+ test_files: []
40
+
41
+ rdoc_options: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ executables:
46
+ - pipeline-client
47
+ - pipeline-server
48
+ extensions: []
49
+
50
+ requirements: []
51
+
52
+ dependencies: []
53
+