stapel 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/bin/stapel +51 -0
  3. data/lib/machine.rb +44 -0
  4. data/lib/pool.rb +27 -0
  5. data/lib/worker.rb +45 -0
  6. metadata +82 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7aa52485f766d6fa3f1753c76edd0fe1c7d4fd22
4
+ data.tar.gz: c76735760f6cc822ccaf9b1381f76b12d3a649ed
5
+ SHA512:
6
+ metadata.gz: 823f20bcc19751e612e77a95614cc26bf79567353bf4c120e5f5bb748f9c3cd5a2bc8046d7f48b62d1dfa461878ca71d6661e25e07513943624d9e43a091fa47
7
+ data.tar.gz: b5431fa08b220b00b2c8f39de551f1317ede0b644eea03348685c91e899613eebae575e5897322028c9326c0c04c9d7b71b6dd8b9c8f5cd585c770a16a58ee5a
data/bin/stapel ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/pool.rb'
3
+ require_relative '../lib/machine.rb'
4
+ require_relative '../lib/worker.rb'
5
+ # project layout:
6
+ # inputs
7
+ # job1 ... any filename
8
+ # job2
9
+ # output
10
+ # job1 #this will be copied from ~user/output
11
+ # files written by job1
12
+ # job2.cpy #unfinished copy of job, will be retried when master is restarted
13
+ # data
14
+ # some_file #additional files that your runner needs (libs, binaries whatever
15
+ # config #contains a ssh config for client list
16
+ # run #the script that is executed on the worker for each input
17
+ # init #the script that is executed on the worker once to setup the worker
18
+ # cleanup #the script that is executed on the worker after a worker is finished
19
+ # aggregate #the script that will be executed on the master to aggregate all results after all jobs finished
20
+
21
+ def aggregate(path_to_env,output)
22
+ aggregate_path = File.join(path_to_env, "aggregate")
23
+ system(aggregate_path, output) if File.exists?(aggregate_path)
24
+ end
25
+
26
+ def run(path_to_env)
27
+ pool = WorkPool.new(path_to_env)
28
+ Machine.get_all(path_to_env).each do |machine|
29
+ machine.num_workers.times do
30
+ Thread.new { run_worker_on_machine(path_to_env, machine, pool) }
31
+ end
32
+ end
33
+ Thread.list.each{|t| t.join unless t == Thread.current}
34
+ aggregate(path_to_env, pool.output)
35
+ end
36
+
37
+ def run_worker_on_machine(path_to_env, machine, pool)
38
+ worker = Worker.new(path_to_env, machine, pool)
39
+ worker.upload_environment
40
+ while (input = pool.take)
41
+ worker.run_input(input)
42
+ worker.download_result(input)
43
+ end
44
+ ensure
45
+ worker.cleanup if worker
46
+ end
47
+
48
+
49
+ Thread.abort_on_exception = true
50
+ fail 'usage master.rb path/to/project' unless ARGV.length == 1
51
+ run(File.expand_path(ARGV[0]))
data/lib/machine.rb ADDED
@@ -0,0 +1,44 @@
1
+ require 'yaml'
2
+ # A class that handles the ssh conection and parameters to a single machine
3
+ class Machine
4
+ attr_accessor :host, :user, :keys, :num_workers, :port
5
+
6
+ def initialize(params)
7
+ @host, @user, @keys = params['host'], params['user'], params['keys']
8
+ @num_workers, @port = params['num_workers'], params['port']
9
+ end
10
+
11
+ def self.get_all(path_to_env)
12
+ config = YAML.load_file(File.join(path_to_env, 'config.yaml'))
13
+ default = config['default']
14
+ machines = config['clients'].map { |machine| default.merge(machine) }
15
+ machines.map { |m| Machine.new(m) }
16
+ end
17
+
18
+ def to_ssh_args
19
+ args = { user: @user, port: @port, keys: @keys, keys_only: true }
20
+ args[:number_of_password_prompts] = 0
21
+ args
22
+ end
23
+
24
+ def ssh(cmd)
25
+ Net::SSH.start(@host, @user, to_ssh_args) do |ssh|
26
+ res = OpenStruct.new(stdout: '', stderr: '')
27
+ ssh.exec!(cmd) { |_channel, stream, data| res[stream] << data }
28
+ return res
29
+ end
30
+ end
31
+
32
+ def scp(local_path, remote_path)
33
+ raise "file not found: #{local_path}" unless File.exists?(local_path)
34
+ Net::SCP.upload!(@host, @user, local_path, remote_path, ssh: to_ssh_args, recursive: true) #doesn't change the name of the last folder in local path to remote_path...
35
+ end
36
+
37
+ def scp_r(remote_path, local_path)
38
+ Net::SCP.download!(@host, @user, remote_path, local_path, ssh: to_ssh_args, recursive: true)
39
+ end
40
+
41
+ def log(tag, text)
42
+ puts "[#@host,#{Time.now},#{tag}]: #{text}"
43
+ end
44
+ end
data/lib/pool.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'fileutils'
2
+ require 'pathname'
3
+
4
+ # a class that handles the pool of inputs
5
+ class WorkPool
6
+ attr_accessor :output
7
+ def initialize(path_to_env)
8
+ input_prefix = Pathname.new(File.join(path_to_env, 'inputs'))
9
+ output_prefix = Pathname.new(File.join(path_to_env, 'output'))
10
+ Dir.glob("#{output_prefix}/**/*.cpy").each { |copy| FileUtils.rm_r(copy) }
11
+ inputs = inputs_under(input_prefix)
12
+ done = inputs_under(output_prefix)
13
+ @aktive = inputs - done
14
+ @mut = Mutex.new
15
+ @output = File.join(path_to_env, "outputs_#{Time.now.to_i}")
16
+ FileUtils.mkdir( output )
17
+ end
18
+
19
+ def inputs_under(prefix)
20
+ glob = prefix.to_s + '/**/*'
21
+ Dir.glob(glob).map { |p| Pathname.new(p).relative_path_from(prefix).to_s }
22
+ end
23
+
24
+ def take
25
+ @mut.synchronize { return @aktive.pop }
26
+ end
27
+ end
data/lib/worker.rb ADDED
@@ -0,0 +1,45 @@
1
+ require 'ostruct'
2
+ require 'net/ssh'
3
+ require 'net/scp'
4
+
5
+ # class used to represent a single worker on one machine
6
+ class Worker
7
+ def initialize(path_to_env, machine, pool)
8
+ @path_to_env, @machine, @pool = path_to_env, machine, pool
9
+ @target_dir = "/home/#{machine.user}/"+@machine.ssh('mktemp -d stapel_tempdir_XXXXXX').stdout.strip
10
+ raise "failed to create temp dir" if @target_dir!~ /stapel_tempdir/
11
+ @path_to_output = pool.output
12
+ end
13
+
14
+ def upload(*paths)
15
+ paths.each{|p| @machine.scp(File.join(@path_to_env,p), @target_dir)}
16
+ end
17
+
18
+ def upload_environment
19
+ upload("run", "init", "inputs", "data", "cleanup")
20
+ @machine.log("upload","done")
21
+ @machine.log("init",@machine.ssh("cd #{@target_dir}; ./init"))
22
+ @machine.log("init","done")
23
+ end
24
+
25
+ def run_input(input)
26
+ input_path = File.join(@target_dir, 'inputs', input) #this should include the project file name...
27
+ @machine.ssh("cd #{@target_dir}; rm -rf output; mkdir output")
28
+ @machine.log("run", "on #{input}")
29
+ @machine.log("run", @machine.ssh("cd #{@target_dir}; ./run #{input_path}"))
30
+ end
31
+
32
+ def download_result(input)
33
+ local_cpy_path = File.join(@path_to_output, "#{input}.cpy")
34
+ remote_output_path = File.join(@target_dir, 'output')
35
+ local_output_path = File.join(@path_to_output, input)
36
+ @machine.scp_r(remote_output_path, local_cpy_path)
37
+ FileUtils.mv(File.join(local_cpy_path,"output"), local_output_path)
38
+ FileUtils.rmdir(local_cpy_path)
39
+ @machine.log("download","done")
40
+ end
41
+
42
+ def cleanup
43
+ @machine.log("cleanup",@machine.ssh("cd #{@target_dir}; ./cleanup")) if @target_dir
44
+ end
45
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: stapel
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Cornelius Aschermann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-08-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: net-scp
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.2.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.1'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.2.1
33
+ - !ruby/object:Gem::Dependency
34
+ name: net-ssh
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.2'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.2'
47
+ description: A tool for dead simple batch processing on multiple machines
48
+ email: coco@hexgolems.com
49
+ executables:
50
+ - stapel
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - bin/stapel
55
+ - lib/machine.rb
56
+ - lib/pool.rb
57
+ - lib/worker.rb
58
+ homepage: http://github.com/eqv/stapel
59
+ licenses:
60
+ - MIT
61
+ metadata: {}
62
+ post_install_message:
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 2.2.2
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: Batch processing made easy
82
+ test_files: []