ruby-pipeline 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/pipeline-client +14 -0
- data/bin/pipeline-server +30 -0
- data/lib/client.rb +55 -0
- data/lib/client_env.rb +47 -0
- data/lib/job.rb +15 -0
- data/lib/remote_file.rb +29 -0
- data/lib/server.rb +77 -0
- metadata +53 -0
data/bin/pipeline-client
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
gem 'ruby-pipeline'
|
4
|
+
require 'client'
|
5
|
+
|
6
|
+
working_dir = ARGV[0] || "./"
|
7
|
+
|
8
|
+
# let's get this party started!
|
9
|
+
RubyPipeline::Client.start_client(working_dir)
|
10
|
+
|
11
|
+
# start working
|
12
|
+
RubyPipeline::Client.start_work
|
13
|
+
|
14
|
+
DRb.thread.join
|
data/bin/pipeline-server
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
gem 'ruby-pipeline'
|
4
|
+
require 'server'
|
5
|
+
|
6
|
+
# get the working directory
|
7
|
+
working_dir = ARGV[0] || "./"
|
8
|
+
|
9
|
+
# all systems go!
|
10
|
+
RubyPipeline::Server.start_server(working_dir)
|
11
|
+
|
12
|
+
# start all the jobs currently in our working directory
|
13
|
+
RubyPipeline::Server.start_jobs
|
14
|
+
|
15
|
+
# start a thread to watch for jobs added later on. this means "check the
|
16
|
+
# working directory for new dirs"
|
17
|
+
Thread.new do
|
18
|
+
loop do
|
19
|
+
sleep 60
|
20
|
+
RubyPipeline::Server.start_jobs
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# start looking for finished jobs
|
25
|
+
Thread.new do
|
26
|
+
RubyPipeline::Server.finish_jobs
|
27
|
+
end
|
28
|
+
|
29
|
+
# all done
|
30
|
+
DRb.thread.join
|
data/lib/client.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rinda/ring'
|
2
|
+
require 'rinda/tuplespace'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'drb'
|
5
|
+
|
6
|
+
require 'job'
|
7
|
+
require 'client_env'
|
8
|
+
|
9
|
+
module RubyPipeline
|
10
|
+
|
11
|
+
module Client
|
12
|
+
# all the work we need to do before we can get started
|
13
|
+
def self.start_client(working_dir)
|
14
|
+
# set up the working directory and remove its contents
|
15
|
+
@working_dir = working_dir
|
16
|
+
|
17
|
+
# XXX this line is a little too dangerous
|
18
|
+
#FileUtils.rm_r(Dir.glob("#{@working_dir}/**"))
|
19
|
+
|
20
|
+
DRb.start_service
|
21
|
+
|
22
|
+
begin
|
23
|
+
ring_server = Rinda::RingFinger.primary
|
24
|
+
@job_queue = ring_server.read( [:name, :Queue, nil, 'jobs'] )[2]
|
25
|
+
@finished_queue = ring_server.read( [:name, :Queue, nil, 'finished'] )[2]
|
26
|
+
rescue
|
27
|
+
puts "Couldn't connect to the server; is it started?"
|
28
|
+
puts $!
|
29
|
+
exit(-1)
|
30
|
+
end
|
31
|
+
|
32
|
+
# XXX it's not too hard to add a "universal" job that all clients run
|
33
|
+
# before starting real jobs. like binaries that are installed locally,
|
34
|
+
# or whatever.
|
35
|
+
end
|
36
|
+
|
37
|
+
# the main work loop
|
38
|
+
def self.start_work
|
39
|
+
while job = @job_queue.pop
|
40
|
+
# create the environment
|
41
|
+
env = ClientEnv.new(@working_dir, job)
|
42
|
+
|
43
|
+
# do the work
|
44
|
+
env.run
|
45
|
+
|
46
|
+
# post whatever we produced back to the server
|
47
|
+
job = Job.new("#{job.name}_finished", nil, env.results)
|
48
|
+
@finished_queue.push job
|
49
|
+
|
50
|
+
# all done!
|
51
|
+
env.cleanup
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/client_env.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'job'
|
2
|
+
|
3
|
+
# the client runs the server's code in the context of one of these objects.
|
4
|
+
# this helps us isolate the server code (at least a little bit) from the rest
|
5
|
+
# of the of pipeline environment, and later on we can use it to establish a
|
6
|
+
# neat little environment for the client code
|
7
|
+
module RubyPipeline
|
8
|
+
class ClientEnv
|
9
|
+
def initialize(working_dir, job)
|
10
|
+
# create this job's directory
|
11
|
+
@dir, @job = "#{working_dir}/#{job.name}", job
|
12
|
+
FileUtils.mkdir_p(@dir)
|
13
|
+
|
14
|
+
# pull down all the files
|
15
|
+
@job.files.each do |remote_file|
|
16
|
+
File.open(@dir + "/" + remote_file.filename, "w") do |dest|
|
17
|
+
remote_file.get { |f| dest << f }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# fork a new process, change the working directory, and run the code
|
23
|
+
def run
|
24
|
+
pid = Kernel.fork do
|
25
|
+
Dir.chdir(@dir)
|
26
|
+
self.instance_eval(@job.code)
|
27
|
+
end
|
28
|
+
|
29
|
+
Process.waitpid(pid)
|
30
|
+
end
|
31
|
+
|
32
|
+
# remove the contents of the job's scratch directory
|
33
|
+
def cleanup
|
34
|
+
FileUtils.rm(@job.files.collect { |f| @dir + "/" + f.filename })
|
35
|
+
end
|
36
|
+
|
37
|
+
def results
|
38
|
+
# all the files that we DIDNT add are considered results
|
39
|
+
theirs = @job.files.collect { |f| f.filename }
|
40
|
+
ours = Dir.glob("#{@dir}/*").delete_if do |f|
|
41
|
+
theirs.include?(File.basename(f))
|
42
|
+
end
|
43
|
+
|
44
|
+
ours
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/job.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'remote_file'
|
2
|
+
|
3
|
+
# this is what ends up in the queues
|
4
|
+
module RubyPipeline
|
5
|
+
class Job
|
6
|
+
attr_accessor :code, :files, :name
|
7
|
+
|
8
|
+
# code is a string of ruby code that gets executed on the client. files is an
|
9
|
+
# array of filenames, each of which will be available in the working
|
10
|
+
# directory of the client when the client starts
|
11
|
+
def initialize(name, code, files = nil)
|
12
|
+
@name, @code, @files = name, code, files.collect { |f| RemoteFile.new(f) }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/remote_file.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'drb'
|
2
|
+
|
3
|
+
# drb files are hard, so we wrap it all up in a class. this is how we get
|
4
|
+
# big files to the clients without having an existing distributed file
|
5
|
+
# system in place
|
6
|
+
module RubyPipeline
|
7
|
+
class RemoteFile
|
8
|
+
include DRbUndumped
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
end
|
13
|
+
|
14
|
+
# we flatten the filesystem out, maybe we shouldnt
|
15
|
+
def filename
|
16
|
+
File.basename(@filename)
|
17
|
+
end
|
18
|
+
|
19
|
+
# returns the bytes of the file, buf_size at a time. dave, thanks for
|
20
|
+
# the buffer size suggestion; that just about quadrupled performance
|
21
|
+
def get(buf_size=8192)
|
22
|
+
File.open(@filename) do |f|
|
23
|
+
while buf = f.read(buf_size)
|
24
|
+
yield buf
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/server.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'rinda/ring'
|
3
|
+
require 'rinda/tuplespace'
|
4
|
+
require 'drb'
|
5
|
+
|
6
|
+
require 'job'
|
7
|
+
|
8
|
+
module RubyPipeline
|
9
|
+
|
10
|
+
module Server
|
11
|
+
# start a rinda server, create the two queues,
|
12
|
+
# host them both, and host whatever universal
|
13
|
+
# files are provided
|
14
|
+
def self.start_server(working_dir)
|
15
|
+
@working_dir = working_dir
|
16
|
+
|
17
|
+
DRb.start_service
|
18
|
+
|
19
|
+
# XXX we assume no other ringservers are running on the network
|
20
|
+
Rinda::RingServer.new Rinda::TupleSpace.new
|
21
|
+
|
22
|
+
@job_queue = Queue.new
|
23
|
+
@finished_queue = Queue.new
|
24
|
+
|
25
|
+
# whoops! you MUST provide drbobjects for the object to be synchronized between
|
26
|
+
# hosts. otherwise it just gets marshalled and unmarshalled.
|
27
|
+
Rinda::RingProvider.new(:Queue,
|
28
|
+
DRbObject.new(@job_queue), 'jobs').provide
|
29
|
+
Rinda::RingProvider.new(:Queue,
|
30
|
+
DRbObject.new(@finished_queue), 'finished').provide
|
31
|
+
|
32
|
+
# used for keeping track of which jobs we've already started
|
33
|
+
@started_jobs = Set.new
|
34
|
+
@code = File.read("#{@working_dir}/config/client_code.rb")
|
35
|
+
|
36
|
+
# this is where we'd set up the job for the universal files
|
37
|
+
end
|
38
|
+
|
39
|
+
# start any new jobs currently sitting in the working
|
40
|
+
# directory
|
41
|
+
def self.start_jobs
|
42
|
+
Dir.glob("#{@working_dir}/jobs/*") do |path|
|
43
|
+
|
44
|
+
# notice that jobs must have a unique directory name to be considered
|
45
|
+
# unique jobs
|
46
|
+
if File.directory?(path) && @started_jobs.add?(path)
|
47
|
+
job_name = File.basename path
|
48
|
+
job = Job.new(job_name, @code,
|
49
|
+
Dir.glob("#{@working_dir}/jobs/#{job_name}/**"))
|
50
|
+
@job_queue.enq(job)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# looks for finished jobs and downloads the results
|
56
|
+
def self.finish_jobs
|
57
|
+
while finished = @finished_queue.pop
|
58
|
+
# set up the directory for the results
|
59
|
+
dir = "#{@working_dir}finished/#{finished.name}"
|
60
|
+
|
61
|
+
# XXX i have no idea why this doesnt work, but it causes ruby
|
62
|
+
# to hang right here when we try to use fileutils, so we just
|
63
|
+
# use the shell. we officially have a unix dependency! there are
|
64
|
+
# similar calls all over the place, so im totally mystified
|
65
|
+
#FileUtils.mkdir_p(dir)
|
66
|
+
`mkdir -p #{dir}`
|
67
|
+
|
68
|
+
# grab all the files for the finished job
|
69
|
+
finished.files.each do |remote_file|
|
70
|
+
File.open("#{dir}/#{remote_file.filename}", "w") do |dest|
|
71
|
+
remote_file.get { |f| dest << f }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
metadata
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
4
|
+
name: ruby-pipeline
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2007-09-15 00:00:00 -04:00
|
8
|
+
summary: A simple batch-process management system for small clusters
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: amckinle@andrew.cmu.edu
|
12
|
+
homepage: http://ruby-pipeline.rubyforge.org
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Austin McKinley
|
31
|
+
files:
|
32
|
+
- bin/pipeline-client
|
33
|
+
- bin/pipeline-server
|
34
|
+
- lib/server.rb
|
35
|
+
- lib/client_env.rb
|
36
|
+
- lib/job.rb
|
37
|
+
- lib/client.rb
|
38
|
+
- lib/remote_file.rb
|
39
|
+
test_files: []
|
40
|
+
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
extra_rdoc_files: []
|
44
|
+
|
45
|
+
executables:
|
46
|
+
- pipeline-client
|
47
|
+
- pipeline-server
|
48
|
+
extensions: []
|
49
|
+
|
50
|
+
requirements: []
|
51
|
+
|
52
|
+
dependencies: []
|
53
|
+
|