workflow_manager 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe50e7ccd102e4cd531cf2afb47e1222ca0b06e434ab34fdfff6a0f3a0fd35c3
4
- data.tar.gz: fd07c0b863627a1a8065e7f710931ba99c0609e5ef9e9e82f107c35eee678b68
3
+ metadata.gz: 81101870809e71203ee0a00d65a210fb3b43077273515e51329ad443396621a8
4
+ data.tar.gz: 9651fae73c5e930ad5fb424eff4d27888f6aa667b480cf2381607d74b5cfb624
5
5
  SHA512:
6
- metadata.gz: 6a9701699b79fd61f889b86dbc3060fe63f6d5e8b6e2532dabb0635b5c3f37f335563dfeda5ba677328ff88e1c2197df60c84e9c9526b4a316eac751b9e3b939
7
- data.tar.gz: d42a9d888a80e7f8b549a64595d13e3c268499d4f78d00805c070c6d60feb9febd20f335e7e8af398f910f6e1eade7b60cf200e83d99451230a4e0f37e134e0f
6
+ metadata.gz: dceef4d0268fd963c5c9b2285d38301973a32c10c976393b90fcc125dd07c750c9c29c75b4d3f10fcad5f770329507453d9e9b2a4b075e88f86a66d40d2e94e2
7
+ data.tar.gz: cfe1de2fd651090060dba921b37fd01d8f442876c5581831a6d41ccafd1597eb0e9c31513741482f2ee3d7fe168daf015841aaaffa4530e27fa511bf8115e52a
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ command.log
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- # Specify your gem's dependencies in workflow_manager.gemspec
4
- gemspec
3
+ gem 'redis'
4
+ gem 'sidekiq'
5
+ gem 'workflow_manager', :path => '/srv/GT/analysis/masaomi/FGCZ/prototype_workflow_manager_with_sidekiq_20210122/workflow_manager/'
data/bin/wfm_monitoring CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
  # 20121112 masa workflow manager client
4
- Version = '20200722-161135'
4
+ Version = '20210625-165025'
5
5
 
6
6
  require 'drb/drb'
7
7
  require 'workflow_manager/optparse_ex'
@@ -54,4 +54,5 @@ sge_options << "-n #{opt.nodes}" if opt.nodes
54
54
  script_content = File.read(script_file)
55
55
  workflow_manager = DRbObject.new_with_uri(uri)
56
56
  #puts workflow_manager.start_monitoring(script_file, user, 0, script_content, project_number, sge_options.join(' '), opt.log)
57
- puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
57
+ #puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
58
+ puts workflow_manager.start_monitoring3(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
data/bin/workflow_manager CHANGED
@@ -8,7 +8,7 @@ Version = WorkflowManager::VERSION
8
8
  opt = OptionParser.new do |o|
9
9
  o.banner = "Version: #{Version}\nUsage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
10
10
  o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
11
- o.on(:mode, 'development', '-m mode', '--mode', 'development|production (default: development)')
11
+ o.on(:mode, 'production', '-m mode', '--mode', 'development|production (default: production)')
12
12
  o.parse!(ARGV)
13
13
  end
14
14
 
@@ -43,4 +43,20 @@ if opt.mode
43
43
  end
44
44
  DRb.start_service(uri, WorkflowManager::Server.new)
45
45
  puts DRb.uri
46
- DRb.thread.join
46
+ #DRb.thread.join
47
+
48
+ sleep 1
49
+
50
+ sidekiq_pid = fork do
51
+ exec("sidekiq -C config/environments/sidekiq.yml -r ./lib/job_checker.rb")
52
+ end
53
+
54
+ begin
55
+ DRb.thread.join
56
+ puts "__END__"
57
+ rescue SignalException
58
+ Process.kill("HUP", sidekiq_pid)
59
+ sleep 1
60
+ puts "__CORRECTLY_END__"
61
+ end
62
+
@@ -7,6 +7,6 @@ WorkflowManager::Server.configure do |config|
7
7
  config.interval = 30
8
8
  config.resubmit = 0
9
9
  config.redis_conf = "config/environments/redis.conf"
10
- config.cluster = WorkflowManager::FGCZCluster.new('FGCZCluster')
10
+ config.cluster = WorkflowManager::FGCZDebian10Cluster.new('FGCZDebian10Cluster')
11
11
  end
12
12
 
@@ -1,9 +1,9 @@
1
1
  loglevel debug
2
2
  logfile ./logs/redis.log
3
- databases 2
3
+ databases 4
4
4
  save 300 10
5
5
  rdbcompression yes
6
6
  dir ./dbs
7
7
  dbfilename redis.rdb
8
8
  maxmemory 10gb
9
- port 6379
9
+ port 6380
@@ -0,0 +1,8 @@
1
+ :verbose: false
2
+ :pidfile: ./tmp/pids/sidekiq.pid
3
+ :logfile: ./log/sidekiq.log
4
+ :concurrency: 100
5
+ :queues:
6
+ - default
7
+ - test
8
+
@@ -0,0 +1,81 @@
1
+ require 'sidekiq'
2
+ require 'redis'
3
+
4
+ WORKER_INTERVAL = 10 # [s]
5
+ REDIS_CONF = File.expand_path("../../config/environments/redis.conf", __FILE__)
6
+ PORT = if File.exist?(REDIS_CONF)
7
+ redis_conf = Hash[*File.readlines(REDIS_CONF).map{|line| line.chomp.split}.map{|e| [e[0], e[1,100].join(",")]}.flatten]
8
+ redis_conf["port"].to_i
9
+ else
10
+ 6379
11
+ end
12
+ SIDEKIQ_URL = "redis://localhost:#{PORT}/3"
13
+ warn "redis.conf: #{REDIS_CONF}"
14
+ warn "Redis port: #{PORT}"
15
+ warn "Sidekiq URL: #{SIDEKIQ_URL}"
16
+
17
+ Sidekiq.configure_server do |config|
18
+ config.redis = { url: SIDEKIQ_URL }
19
+ end
20
+
21
+ Sidekiq.configure_client do |config|
22
+ config.redis = { url: SIDEKIQ_URL }
23
+ end
24
+
25
+ class Redis
26
+ alias_method :[], :get
27
+ alias_method :[]=, :set
28
+ end
29
+
30
+ class JobChecker
31
+ include Sidekiq::Worker
32
+ sidekiq_options queue: :default, retry: 5
33
+
34
+ def generate_new_job_script(log_dir, script_basename, script_content)
35
+ new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
36
+ new_job_script = File.join(log_dir, new_job_script)
37
+ open(new_job_script, 'w') do |out|
38
+ out.print script_content
39
+ out.print "\necho __SCRIPT END__\n"
40
+ end
41
+ new_job_script
42
+ end
43
+ def update_time_status(status, script_basename, user, project_number)
44
+ unless @start_time
45
+ @start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
46
+ end
47
+ time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
48
+ [status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
49
+ end
50
+
51
+ def perform(job_id, script_basename, log_file, user, project_id)
52
+ puts "JobID (in JobChecker): #{job_id}"
53
+ db0 = Redis.new(port: PORT, db: 0) # state + alpha DB
54
+ db1 = Redis.new(port: PORT, db: 1) # log DB
55
+ db2 = Redis.new(port: PORT, db: 2) # project jobs DB
56
+ db1[job_id] = log_file
57
+ pre_state = nil
58
+ @start_time = nil
59
+ begin
60
+ command = "sacct --jobs=#{job_id} --format=state"
61
+ #puts command
62
+ ret = `#{command}`
63
+ #print ret
64
+ state = ret.split(/\n/).last.strip
65
+ #puts "state: #{state}"
66
+ db0[job_id] = update_time_status(state, script_basename, user, project_id)
67
+
68
+ unless state == pre_state
69
+ db0[job_id] = update_time_status(state, script_basename, user, project_id)
70
+ project_jobs = eval((db2[project_id]||[]).to_s)
71
+ project_jobs = Hash[*project_jobs]
72
+ project_jobs[job_id] = state
73
+ #p project_jobs
74
+ db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
75
+ end
76
+ pre_state = state
77
+ sleep WORKER_INTERVAL
78
+ end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
79
+ end
80
+ end
81
+
data/lib/worker4.rb ADDED
@@ -0,0 +1,80 @@
1
+ require 'sidekiq'
2
+ require 'redis'
3
+
4
+ WORKER_INTERVAL = 10 # [s]
5
+
6
+ Sidekiq.configure_server do |config|
7
+ config.redis = { url: 'redis://localhost:6380/3' }
8
+ end
9
+
10
+ Sidekiq.configure_client do |config|
11
+ config.redis = { url: 'redis://localhost:6380/3' }
12
+ end
13
+
14
+ class Redis
15
+ alias_method :[], :get
16
+ alias_method :[]=, :set
17
+ end
18
+
19
+ class JobWorker
20
+ include Sidekiq::Worker
21
+ sidekiq_options queue: :default, retry: 5
22
+
23
+ def generate_new_job_script(log_dir, script_basename, script_content)
24
+ new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
25
+ new_job_script = File.join(log_dir, new_job_script)
26
+ open(new_job_script, 'w') do |out|
27
+ out.print script_content
28
+ out.print "\necho __SCRIPT END__\n"
29
+ end
30
+ new_job_script
31
+ end
32
+ def update_time_status(status, script_basename, user, project_number)
33
+ unless @start_time
34
+ @start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
35
+ end
36
+ time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
37
+ [status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
38
+ end
39
+
40
+ def perform(project_id, log_dir, script_basename, script_content)
41
+ #script_base_name = "test_job.sh"
42
+ job_script = generate_new_job_script(log_dir, script_basename, script_content)
43
+ log_file = job_script + "_o.log"
44
+ err_file = job_script + "_e.log"
45
+ command = "sbatch -o #{log_file} -e #{err_file} -N 1 #{job_script}"
46
+ puts command
47
+ ret = `#{command}`
48
+ job_id = ret.chomp.split.last
49
+ puts "JobID: #{job_id}"
50
+ db0 = Redis.new(port: 6380, db: 0) # state + alpha DB
51
+ db1 = Redis.new(port: 6380, db: 1) # log DB
52
+ db2 = Redis.new(port: 6380, db: 2) # project jobs DB
53
+ db1[job_id] = log_file
54
+ pre_state = nil
55
+ @start_time = nil
56
+ begin
57
+ command = "sacct --jobs=#{job_id} --format=state"
58
+ puts command
59
+ ret = `#{command}`
60
+ #print ret
61
+ state = ret.split(/\n/).last.strip
62
+ puts "state: #{state}"
63
+ #db.set(job_id, state)
64
+ db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
65
+
66
+ unless state == pre_state
67
+ db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
68
+ project_jobs = eval((db2[project_id]||[]).to_s)
69
+ project_jobs = Hash[*project_jobs]
70
+ project_jobs[job_id] = state
71
+ p project_jobs
72
+ db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
73
+ #db2[project_id] = project_jobs.to_s
74
+ end
75
+ pre_state = state
76
+ sleep WORKER_INTERVAL
77
+ end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
78
+ end
79
+ end
80
+
@@ -4,6 +4,9 @@
4
4
  require 'drb/drb'
5
5
  require 'fileutils'
6
6
  require 'csv'
7
+
8
+ require 'job_checker'
9
+
7
10
  begin
8
11
  require 'redis'
9
12
  DB_MODE = "Redis"
@@ -159,6 +162,7 @@ module WorkflowManager
159
162
  when "Redis"
160
163
  RedisDB.new(1, @redis_conf)
161
164
  end
165
+ @jobs = RedisDB.new(2, @redis_conf)
162
166
 
163
167
  @system_log = File.join(@log_dir, "system.log")
164
168
  @mutex = Mutex.new
@@ -172,15 +176,34 @@ module WorkflowManager
172
176
  log_puts("DB = #{DB_MODE}")
173
177
  log_puts("Cluster = #{@cluster.name}")
174
178
  log_puts("Server starts")
179
+ log_puts("Recovery check")
180
+ recovery_job_checker
181
+ end
182
+ def recovery_job_checker
183
+ @logs.transaction do |logs|
184
+ @statuses.transaction do |statuses|
185
+ statuses.each do |job_id, status|
186
+ # puts [job_id, status].join(",")
187
+ # 120249,RUNNING,QC_ventricles_100k.sh,2021-07-30 09:47:04/2021-07-30 09:47:04,masaomi,1535
188
+ stat, script_basename, time, user, project_number = status.split(",")
189
+ if stat == "RUNNING" or stat == "PENDING"
190
+ log_file = logs[job_id]
191
+ log_puts("JobID (in recovery check): #{job_id}")
192
+ puts "JobID (in recovery check): #{job_id}"
193
+ JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
194
+ end
195
+ end
196
+ end
197
+ end
175
198
  end
176
199
  def hello
177
- 'hello test hoge, '+ @cluster.name
200
+ 'hello hoge hoge bar boo bundle, '+ @cluster.name
178
201
  end
179
202
  def copy_commands(org_dir, dest_parent_dir, now=nil)
180
203
  @cluster.copy_commands(org_dir, dest_parent_dir, now)
181
204
  end
182
205
  def kill_job(job_id)
183
- status(job_id, 'fail')
206
+ status(job_id, 'FAIL')
184
207
  status = `#{@cluster.kill_command(job_id)}`
185
208
  end
186
209
  def delete_command(target)
@@ -272,6 +295,17 @@ module WorkflowManager
272
295
  Thread.current.kill
273
296
  end
274
297
  end
298
+ def start_monitoring3(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
299
+ script_basename = File.basename(script_path)
300
+ job_id, log_file, command = @cluster.submit_job(script_path, script_content, sge_options)
301
+ #p command
302
+ #p log_file
303
+ #p job_id
304
+ puts "JobID (in WorkflowManager): #{job_id}"
305
+ sleep 1
306
+ JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
307
+ job_id
308
+ end
275
309
  def start_monitoring2(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
276
310
  # script_path is only used to generate a log file name
277
311
  # It is not used to read the script contents
@@ -418,7 +452,7 @@ module WorkflowManager
418
452
  #@statuses.open(@db_stat)
419
453
  @statuses.transaction do |statuses|
420
454
  if new_status and stat = statuses[job_id.to_s]
421
- status_list = ['success', 'running', 'pending', 'fail']
455
+ status_list = ['CONPLETED', 'RUNNING', 'PENDING', 'FAIL']
422
456
  if status_list.include?(new_status)
423
457
  items = stat.split(/,/)
424
458
  items.shift
@@ -438,17 +472,19 @@ module WorkflowManager
438
472
  job_idsh = if job_ids
439
473
  Hash[*(job_ids.split(',')).map{|job_id| [job_id, true]}.flatten]
440
474
  end
441
- @statuses.transaction do |statuses|
442
- statuses.each do |key, value|
443
- if project_number
444
- if x = value.split(/,/)[4].to_i==project_number.to_i
445
- s << [key, value]
446
- end
447
- else
448
- s << [key, value]
475
+ s_ = {}
476
+ unless job_ids
477
+ @jobs.transaction do |jobs|
478
+ if project_jobs = jobs[project_number]
479
+ s_ = Hash[*eval(project_jobs)]
449
480
  end
450
481
  end
451
482
  end
483
+ @statuses.transaction do |statuses|
484
+ s_.each do |job_id, stat|
485
+ s << [job_id, statuses[job_id]]
486
+ end
487
+ end
452
488
  if job_ids
453
489
  s = s.select{|job_id, stat| job_idsh[job_id]}
454
490
  end
@@ -1,3 +1,3 @@
1
1
  module WorkflowManager
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Version = '20210625-104318'
4
+
5
+ require './lib/worker4'
6
+ script_file = "./test/test_job1.sh"
7
+ script_content = File.read(script_file)
8
+ log_dir = "./logs"
9
+ script_basename = File.basename(script_file)
10
+ JobWorker.perform_async(1001, log_dir, script_basename, script_content)
11
+ p "submitted test_job1.sh"
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Version = '20210625-162836'
4
+
5
+ require 'workflow_manager'
6
+ script_file = "./test/test_job1.sh"
7
+ script_content = File.read(script_file)
8
+ log_dir = "./logs"
9
+ script_basename = File.basename(script_file)
10
+ JobWorker.perform_async(1001, log_dir, script_basename, script_content)
11
+ p "submitted test_job1.sh"
data/test/job_list.rb ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Version = '20210723-134812'
4
+
5
+ PORT = (ARGV[0]||6380).to_i
6
+ require 'redis'
7
+ db0 = Redis.new(port: PORT, db: 0)
8
+ db1 = Redis.new(port: PORT, db: 1)
9
+ db2 = Redis.new(port: PORT, db: 2)
10
+ #db3 = Redis.new(port: 6380, db: 3)
11
+
12
+ class Redis
13
+ def show_all
14
+ self.keys.sort.each do |key|
15
+ value = self.get(key)
16
+ puts [key, value].join("\t")
17
+ end
18
+ end
19
+ end
20
+
21
+ dbs = [db0, db1, db2]
22
+ db_notes = ["state DB", "log DB", "project job DB"]
23
+
24
+ dbs.each.with_index do |db, i|
25
+ note = db_notes[i]
26
+ puts ["db#{i}", note].join("\t")
27
+ db.show_all
28
+ puts
29
+ end
30
+ exit
31
+ puts "db0, status DB"
32
+ puts ["JobID", "Status"].join("\t")
33
+ db0.keys.sort.each do |key|
34
+ value = db0.get(key)
35
+ puts [key, value].join("\t")
36
+ end
37
+
38
+ puts
39
+ puts "db1, log DB"
40
+ db1.keys.sort.each do |key|
41
+ value = db1.get(key)
42
+ puts [key, value].join("\t")
43
+ end
44
+
45
+ puts
46
+ puts "db2, status DB2, project specific"
47
+ db2.keys.sort.each do |key|
48
+ value = db2.get(key)
49
+ puts [key, value].join("\t")
50
+ end
data/test/test_job1.sh ADDED
@@ -0,0 +1,5 @@
1
+ #!/bin/bash
2
+
3
+ echo "Job1"
4
+ sleep 60
5
+ echo "END"
@@ -19,6 +19,6 @@ Gem::Specification.new do |spec|
19
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
20
  spec.require_paths = ["lib"]
21
21
 
22
- spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "bundler", "~> 2.2.10"
23
23
  spec.add_development_dependency "rake"
24
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Functional Genomics Center Zurich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-18 00:00:00.000000000 Z
11
+ date: 2021-07-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: 2.2.10
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: 2.2.10
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -76,6 +76,9 @@ files:
76
76
  - config/environments/development.rb
77
77
  - config/environments/production.rb
78
78
  - config/environments/redis.conf
79
+ - config/environments/sidekiq.yml
80
+ - lib/job_checker.rb
81
+ - lib/worker4.rb
79
82
  - lib/workflow_manager.rb
80
83
  - lib/workflow_manager/cluster.rb
81
84
  - lib/workflow_manager/optparse_ex.rb
@@ -84,6 +87,10 @@ files:
84
87
  - spec/cluster_spec.rb
85
88
  - spec/server_spec.rb
86
89
  - spec/spec_helper.rb
90
+ - test/call_worker4.rb
91
+ - test/call_worker_method.rb
92
+ - test/job_list.rb
93
+ - test/test_job1.sh
87
94
  - workflow_manager.gemspec
88
95
  homepage: ''
89
96
  licenses:
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
111
  - !ruby/object:Gem::Version
105
112
  version: '0'
106
113
  requirements: []
107
- rubygems_version: 3.0.3
114
+ rubygems_version: 3.0.3.1
108
115
  signing_key:
109
116
  specification_version: 4
110
117
  summary: Workflow Manager manages job submissions using dRuby.
@@ -112,3 +119,7 @@ test_files:
112
119
  - spec/cluster_spec.rb
113
120
  - spec/server_spec.rb
114
121
  - spec/spec_helper.rb
122
+ - test/call_worker4.rb
123
+ - test/call_worker_method.rb
124
+ - test/job_list.rb
125
+ - test/test_job1.sh