ruby-pipeline 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ #! /usr/bin/ruby
2
+
3
+ gem 'ruby-pipeline'
4
+ require 'client'
5
+
6
+ working_dir = ARGV[0] || "./"
7
+
8
+ # let's get this party started!
9
+ RubyPipeline::Client.start_client(working_dir)
10
+
11
+ # start working
12
+ RubyPipeline::Client.start_work
13
+
14
+ DRb.thread.join
@@ -0,0 +1,30 @@
1
+ #! /usr/bin/ruby
2
+
3
+ gem 'ruby-pipeline'
4
+ require 'server'
5
+
6
+ # get the working directory
7
+ working_dir = ARGV[0] || "./"
8
+
9
+ # all systems go!
10
+ RubyPipeline::Server.start_server(working_dir)
11
+
12
+ # start all the jobs currently in our working directory
13
+ RubyPipeline::Server.start_jobs
14
+
15
+ # start a thread to watch for jobs added later on. this means "check the
16
+ # working directory for new dirs"
17
+ Thread.new do
18
+ loop do
19
+ sleep 60
20
+ RubyPipeline::Server.start_jobs
21
+ end
22
+ end
23
+
24
+ # start looking for finished jobs
25
+ Thread.new do
26
+ RubyPipeline::Server.finish_jobs
27
+ end
28
+
29
+ # all done
30
+ DRb.thread.join
@@ -0,0 +1,55 @@
1
+ require 'rinda/ring'
2
+ require 'rinda/tuplespace'
3
+ require 'fileutils'
4
+ require 'drb'
5
+
6
+ require 'job'
7
+ require 'client_env'
8
+
9
+ module RubyPipeline
10
+
11
+ module Client
12
+ # all the work we need to do before we can get started
13
+ def self.start_client(working_dir)
14
+ # set up the working directory and remove its contents
15
+ @working_dir = working_dir
16
+
17
+ # XXX this line is a little too dangerous
18
+ #FileUtils.rm_r(Dir.glob("#{@working_dir}/**"))
19
+
20
+ DRb.start_service
21
+
22
+ begin
23
+ ring_server = Rinda::RingFinger.primary
24
+ @job_queue = ring_server.read( [:name, :Queue, nil, 'jobs'] )[2]
25
+ @finished_queue = ring_server.read( [:name, :Queue, nil, 'finished'] )[2]
26
+ rescue
27
+ puts "Couldn't connect to the server; is it started?"
28
+ puts $!
29
+ exit(-1)
30
+ end
31
+
32
+ # XXX it's not too hard to add a "universal" job that all clients run
33
+ # before starting real jobs. like binaries that are installed locally,
34
+ # or whatever.
35
+ end
36
+
37
+ # the main work loop
38
+ def self.start_work
39
+ while job = @job_queue.pop
40
+ # create the environment
41
+ env = ClientEnv.new(@working_dir, job)
42
+
43
+ # do the work
44
+ env.run
45
+
46
+ # post whatever we produced back to the server
47
+ job = Job.new("#{job.name}_finished", nil, env.results)
48
+ @finished_queue.push job
49
+
50
+ # all done!
51
+ env.cleanup
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,47 @@
1
+ require 'job'
2
+
3
+ # the client runs the server's code in the context of one of these objects.
4
+ # this helps us isolate the server code (at least a little bit) from the rest
5
+ # of the of pipeline environment, and later on we can use it to establish a
6
+ # neat little environment for the client code
7
+ module RubyPipeline
8
+ class ClientEnv
9
+ def initialize(working_dir, job)
10
+ # create this job's directory
11
+ @dir, @job = "#{working_dir}/#{job.name}", job
12
+ FileUtils.mkdir_p(@dir)
13
+
14
+ # pull down all the files
15
+ @job.files.each do |remote_file|
16
+ File.open(@dir + "/" + remote_file.filename, "w") do |dest|
17
+ remote_file.get { |f| dest << f }
18
+ end
19
+ end
20
+ end
21
+
22
+ # fork a new process, change the working directory, and run the code
23
+ def run
24
+ pid = Kernel.fork do
25
+ Dir.chdir(@dir)
26
+ self.instance_eval(@job.code)
27
+ end
28
+
29
+ Process.waitpid(pid)
30
+ end
31
+
32
+ # remove the contents of the job's scratch directory
33
+ def cleanup
34
+ FileUtils.rm(@job.files.collect { |f| @dir + "/" + f.filename })
35
+ end
36
+
37
+ def results
38
+ # all the files that we DIDNT add are considered results
39
+ theirs = @job.files.collect { |f| f.filename }
40
+ ours = Dir.glob("#{@dir}/*").delete_if do |f|
41
+ theirs.include?(File.basename(f))
42
+ end
43
+
44
+ ours
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,15 @@
1
+ require 'remote_file'
2
+
3
+ # this is what ends up in the queues
4
+ module RubyPipeline
5
+ class Job
6
+ attr_accessor :code, :files, :name
7
+
8
+ # code is a string of ruby code that gets executed on the client. files is an
9
+ # array of filenames, each of which will be available in the working
10
+ # directory of the client when the client starts
11
+ def initialize(name, code, files = nil)
12
+ @name, @code, @files = name, code, files.collect { |f| RemoteFile.new(f) }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ require 'drb'
2
+
3
+ # drb files are hard, so we wrap it all up in a class. this is how we get
4
+ # big files to the clients without having an existing distributed file
5
+ # system in place
6
+ module RubyPipeline
7
+ class RemoteFile
8
+ include DRbUndumped
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ end
13
+
14
+ # we flatten the filesystem out, maybe we shouldnt
15
+ def filename
16
+ File.basename(@filename)
17
+ end
18
+
19
+ # returns the bytes of the file, buf_size at a time. dave, thanks for
20
+ # the buffer size suggestion; that just about quadrupled performance
21
+ def get(buf_size=8192)
22
+ File.open(@filename) do |f|
23
+ while buf = f.read(buf_size)
24
+ yield buf
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,77 @@
1
+ require 'set'
2
+ require 'rinda/ring'
3
+ require 'rinda/tuplespace'
4
+ require 'drb'
5
+
6
+ require 'job'
7
+
8
+ module RubyPipeline
9
+
10
+ module Server
11
+ # start a rinda server, create the two queues,
12
+ # host them both, and host whatever universal
13
+ # files are provided
14
+ def self.start_server(working_dir)
15
+ @working_dir = working_dir
16
+
17
+ DRb.start_service
18
+
19
+ # XXX we assume no other ringservers are running on the network
20
+ Rinda::RingServer.new Rinda::TupleSpace.new
21
+
22
+ @job_queue = Queue.new
23
+ @finished_queue = Queue.new
24
+
25
+ # whoops! you MUST provide drbobjects for the object to be synchronized between
26
+ # hosts. otherwise it just gets marshalled and unmarshalled.
27
+ Rinda::RingProvider.new(:Queue,
28
+ DRbObject.new(@job_queue), 'jobs').provide
29
+ Rinda::RingProvider.new(:Queue,
30
+ DRbObject.new(@finished_queue), 'finished').provide
31
+
32
+ # used for keeping track of which jobs we've already started
33
+ @started_jobs = Set.new
34
+ @code = File.read("#{@working_dir}/config/client_code.rb")
35
+
36
+ # this is where we'd set up the job for the universal files
37
+ end
38
+
39
+ # start any new jobs currently sitting in the working
40
+ # directory
41
+ def self.start_jobs
42
+ Dir.glob("#{@working_dir}/jobs/*") do |path|
43
+
44
+ # notice that jobs must have a unique directory name to be considered
45
+ # unique jobs
46
+ if File.directory?(path) && @started_jobs.add?(path)
47
+ job_name = File.basename path
48
+ job = Job.new(job_name, @code,
49
+ Dir.glob("#{@working_dir}/jobs/#{job_name}/**"))
50
+ @job_queue.enq(job)
51
+ end
52
+ end
53
+ end
54
+
55
+ # looks for finished jobs and downloads the results
56
+ def self.finish_jobs
57
+ while finished = @finished_queue.pop
58
+ # set up the directory for the results
59
+ dir = "#{@working_dir}finished/#{finished.name}"
60
+
61
+ # XXX i have no idea why this doesnt work, but it causes ruby
62
+ # to hang right here when we try to use fileutils, so we just
63
+ # use the shell. we officially have a unix dependency! there are
64
+ # similar calls all over the place, so im totally mystified
65
+ #FileUtils.mkdir_p(dir)
66
+ `mkdir -p #{dir}`
67
+
68
+ # grab all the files for the finished job
69
+ finished.files.each do |remote_file|
70
+ File.open("#{dir}/#{remote_file.filename}", "w") do |dest|
71
+ remote_file.get { |f| dest << f }
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: ruby-pipeline
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2007-09-15 00:00:00 -04:00
8
+ summary: A simple batch-process management system for small clusters
9
+ require_paths:
10
+ - lib
11
+ email: amckinle@andrew.cmu.edu
12
+ homepage: http://ruby-pipeline.rubyforge.org
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Austin McKinley
31
+ files:
32
+ - bin/pipeline-client
33
+ - bin/pipeline-server
34
+ - lib/server.rb
35
+ - lib/client_env.rb
36
+ - lib/job.rb
37
+ - lib/client.rb
38
+ - lib/remote_file.rb
39
+ test_files: []
40
+
41
+ rdoc_options: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ executables:
46
+ - pipeline-client
47
+ - pipeline-server
48
+ extensions: []
49
+
50
+ requirements: []
51
+
52
+ dependencies: []
53
+