workflow_manager 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe50e7ccd102e4cd531cf2afb47e1222ca0b06e434ab34fdfff6a0f3a0fd35c3
4
- data.tar.gz: fd07c0b863627a1a8065e7f710931ba99c0609e5ef9e9e82f107c35eee678b68
3
+ metadata.gz: 81101870809e71203ee0a00d65a210fb3b43077273515e51329ad443396621a8
4
+ data.tar.gz: 9651fae73c5e930ad5fb424eff4d27888f6aa667b480cf2381607d74b5cfb624
5
5
  SHA512:
6
- metadata.gz: 6a9701699b79fd61f889b86dbc3060fe63f6d5e8b6e2532dabb0635b5c3f37f335563dfeda5ba677328ff88e1c2197df60c84e9c9526b4a316eac751b9e3b939
7
- data.tar.gz: d42a9d888a80e7f8b549a64595d13e3c268499d4f78d00805c070c6d60feb9febd20f335e7e8af398f910f6e1eade7b60cf200e83d99451230a4e0f37e134e0f
6
+ metadata.gz: dceef4d0268fd963c5c9b2285d38301973a32c10c976393b90fcc125dd07c750c9c29c75b4d3f10fcad5f770329507453d9e9b2a4b075e88f86a66d40d2e94e2
7
+ data.tar.gz: cfe1de2fd651090060dba921b37fd01d8f442876c5581831a6d41ccafd1597eb0e9c31513741482f2ee3d7fe168daf015841aaaffa4530e27fa511bf8115e52a
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ command.log
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- # Specify your gem's dependencies in workflow_manager.gemspec
4
- gemspec
3
+ gem 'redis'
4
+ gem 'sidekiq'
5
+ gem 'workflow_manager', :path => '/srv/GT/analysis/masaomi/FGCZ/prototype_workflow_manager_with_sidekiq_20210122/workflow_manager/'
data/bin/wfm_monitoring CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
  # 20121112 masa workflow manager client
4
- Version = '20200722-161135'
4
+ Version = '20210625-165025'
5
5
 
6
6
  require 'drb/drb'
7
7
  require 'workflow_manager/optparse_ex'
@@ -54,4 +54,5 @@ sge_options << "-n #{opt.nodes}" if opt.nodes
54
54
  script_content = File.read(script_file)
55
55
  workflow_manager = DRbObject.new_with_uri(uri)
56
56
  #puts workflow_manager.start_monitoring(script_file, user, 0, script_content, project_number, sge_options.join(' '), opt.log)
57
- puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
57
+ #puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
58
+ puts workflow_manager.start_monitoring3(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
data/bin/workflow_manager CHANGED
@@ -8,7 +8,7 @@ Version = WorkflowManager::VERSION
8
8
  opt = OptionParser.new do |o|
9
9
  o.banner = "Version: #{Version}\nUsage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
10
10
  o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
11
- o.on(:mode, 'development', '-m mode', '--mode', 'development|production (default: development)')
11
+ o.on(:mode, 'production', '-m mode', '--mode', 'development|production (default: production)')
12
12
  o.parse!(ARGV)
13
13
  end
14
14
 
@@ -43,4 +43,20 @@ if opt.mode
43
43
  end
44
44
  DRb.start_service(uri, WorkflowManager::Server.new)
45
45
  puts DRb.uri
46
- DRb.thread.join
46
+ #DRb.thread.join
47
+
48
+ sleep 1
49
+
50
+ sidekiq_pid = fork do
51
+ exec("sidekiq -C config/environments/sidekiq.yml -r ./lib/job_checker.rb")
52
+ end
53
+
54
+ begin
55
+ DRb.thread.join
56
+ puts "__END__"
57
+ rescue SignalException
58
+ Process.kill("HUP", sidekiq_pid)
59
+ sleep 1
60
+ puts "__CORRECTLY_END__"
61
+ end
62
+
@@ -7,6 +7,6 @@ WorkflowManager::Server.configure do |config|
7
7
  config.interval = 30
8
8
  config.resubmit = 0
9
9
  config.redis_conf = "config/environments/redis.conf"
10
- config.cluster = WorkflowManager::FGCZCluster.new('FGCZCluster')
10
+ config.cluster = WorkflowManager::FGCZDebian10Cluster.new('FGCZDebian10Cluster')
11
11
  end
12
12
 
@@ -1,9 +1,9 @@
1
1
  loglevel debug
2
2
  logfile ./logs/redis.log
3
- databases 2
3
+ databases 4
4
4
  save 300 10
5
5
  rdbcompression yes
6
6
  dir ./dbs
7
7
  dbfilename redis.rdb
8
8
  maxmemory 10gb
9
- port 6379
9
+ port 6380
@@ -0,0 +1,8 @@
1
+ :verbose: false
2
+ :pidfile: ./tmp/pids/sidekiq.pid
3
+ :logfile: ./log/sidekiq.log
4
+ :concurrency: 100
5
+ :queues:
6
+ - default
7
+ - test
8
+
@@ -0,0 +1,81 @@
1
+ require 'sidekiq'
2
+ require 'redis'
3
+
4
+ WORKER_INTERVAL = 10 # [s]
5
+ REDIS_CONF = File.expand_path("../../config/environments/redis.conf", __FILE__)
6
+ PORT = if File.exist?(REDIS_CONF)
7
+ redis_conf = Hash[*File.readlines(REDIS_CONF).map{|line| line.chomp.split}.map{|e| [e[0], e[1,100].join(",")]}.flatten]
8
+ redis_conf["port"].to_i
9
+ else
10
+ 6379
11
+ end
12
+ SIDEKIQ_URL = "redis://localhost:#{PORT}/3"
13
+ warn "redis.conf: #{REDIS_CONF}"
14
+ warn "Redis port: #{PORT}"
15
+ warn "Sidekiq URL: #{SIDEKIQ_URL}"
16
+
17
+ Sidekiq.configure_server do |config|
18
+ config.redis = { url: SIDEKIQ_URL }
19
+ end
20
+
21
+ Sidekiq.configure_client do |config|
22
+ config.redis = { url: SIDEKIQ_URL }
23
+ end
24
+
25
+ class Redis
26
+ alias_method :[], :get
27
+ alias_method :[]=, :set
28
+ end
29
+
30
+ class JobChecker
31
+ include Sidekiq::Worker
32
+ sidekiq_options queue: :default, retry: 5
33
+
34
+ def generate_new_job_script(log_dir, script_basename, script_content)
35
+ new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
36
+ new_job_script = File.join(log_dir, new_job_script)
37
+ open(new_job_script, 'w') do |out|
38
+ out.print script_content
39
+ out.print "\necho __SCRIPT END__\n"
40
+ end
41
+ new_job_script
42
+ end
43
+ def update_time_status(status, script_basename, user, project_number)
44
+ unless @start_time
45
+ @start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
46
+ end
47
+ time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
48
+ [status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
49
+ end
50
+
51
+ def perform(job_id, script_basename, log_file, user, project_id)
52
+ puts "JobID (in JobChecker): #{job_id}"
53
+ db0 = Redis.new(port: PORT, db: 0) # state + alpha DB
54
+ db1 = Redis.new(port: PORT, db: 1) # log DB
55
+ db2 = Redis.new(port: PORT, db: 2) # project jobs DB
56
+ db1[job_id] = log_file
57
+ pre_state = nil
58
+ @start_time = nil
59
+ begin
60
+ command = "sacct --jobs=#{job_id} --format=state"
61
+ #puts command
62
+ ret = `#{command}`
63
+ #print ret
64
+ state = ret.split(/\n/).last.strip
65
+ #puts "state: #{state}"
66
+ db0[job_id] = update_time_status(state, script_basename, user, project_id)
67
+
68
+ unless state == pre_state
69
+ db0[job_id] = update_time_status(state, script_basename, user, project_id)
70
+ project_jobs = eval((db2[project_id]||[]).to_s)
71
+ project_jobs = Hash[*project_jobs]
72
+ project_jobs[job_id] = state
73
+ #p project_jobs
74
+ db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
75
+ end
76
+ pre_state = state
77
+ sleep WORKER_INTERVAL
78
+ end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
79
+ end
80
+ end
81
+
data/lib/worker4.rb ADDED
@@ -0,0 +1,80 @@
1
+ require 'sidekiq'
2
+ require 'redis'
3
+
4
+ WORKER_INTERVAL = 10 # [s]
5
+
6
+ Sidekiq.configure_server do |config|
7
+ config.redis = { url: 'redis://localhost:6380/3' }
8
+ end
9
+
10
+ Sidekiq.configure_client do |config|
11
+ config.redis = { url: 'redis://localhost:6380/3' }
12
+ end
13
+
14
+ class Redis
15
+ alias_method :[], :get
16
+ alias_method :[]=, :set
17
+ end
18
+
19
+ class JobWorker
20
+ include Sidekiq::Worker
21
+ sidekiq_options queue: :default, retry: 5
22
+
23
+ def generate_new_job_script(log_dir, script_basename, script_content)
24
+ new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
25
+ new_job_script = File.join(log_dir, new_job_script)
26
+ open(new_job_script, 'w') do |out|
27
+ out.print script_content
28
+ out.print "\necho __SCRIPT END__\n"
29
+ end
30
+ new_job_script
31
+ end
32
+ def update_time_status(status, script_basename, user, project_number)
33
+ unless @start_time
34
+ @start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
35
+ end
36
+ time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
37
+ [status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
38
+ end
39
+
40
+ def perform(project_id, log_dir, script_basename, script_content)
41
+ #script_base_name = "test_job.sh"
42
+ job_script = generate_new_job_script(log_dir, script_basename, script_content)
43
+ log_file = job_script + "_o.log"
44
+ err_file = job_script + "_e.log"
45
+ command = "sbatch -o #{log_file} -e #{err_file} -N 1 #{job_script}"
46
+ puts command
47
+ ret = `#{command}`
48
+ job_id = ret.chomp.split.last
49
+ puts "JobID: #{job_id}"
50
+ db0 = Redis.new(port: 6380, db: 0) # state + alpha DB
51
+ db1 = Redis.new(port: 6380, db: 1) # log DB
52
+ db2 = Redis.new(port: 6380, db: 2) # project jobs DB
53
+ db1[job_id] = log_file
54
+ pre_state = nil
55
+ @start_time = nil
56
+ begin
57
+ command = "sacct --jobs=#{job_id} --format=state"
58
+ puts command
59
+ ret = `#{command}`
60
+ #print ret
61
+ state = ret.split(/\n/).last.strip
62
+ puts "state: #{state}"
63
+ #db.set(job_id, state)
64
+ db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
65
+
66
+ unless state == pre_state
67
+ db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
68
+ project_jobs = eval((db2[project_id]||[]).to_s)
69
+ project_jobs = Hash[*project_jobs]
70
+ project_jobs[job_id] = state
71
+ p project_jobs
72
+ db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
73
+ #db2[project_id] = project_jobs.to_s
74
+ end
75
+ pre_state = state
76
+ sleep WORKER_INTERVAL
77
+ end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
78
+ end
79
+ end
80
+
@@ -4,6 +4,9 @@
4
4
  require 'drb/drb'
5
5
  require 'fileutils'
6
6
  require 'csv'
7
+
8
+ require 'job_checker'
9
+
7
10
  begin
8
11
  require 'redis'
9
12
  DB_MODE = "Redis"
@@ -159,6 +162,7 @@ module WorkflowManager
159
162
  when "Redis"
160
163
  RedisDB.new(1, @redis_conf)
161
164
  end
165
+ @jobs = RedisDB.new(2, @redis_conf)
162
166
 
163
167
  @system_log = File.join(@log_dir, "system.log")
164
168
  @mutex = Mutex.new
@@ -172,15 +176,34 @@ module WorkflowManager
172
176
  log_puts("DB = #{DB_MODE}")
173
177
  log_puts("Cluster = #{@cluster.name}")
174
178
  log_puts("Server starts")
179
+ log_puts("Recovery check")
180
+ recovery_job_checker
181
+ end
182
+ def recovery_job_checker
183
+ @logs.transaction do |logs|
184
+ @statuses.transaction do |statuses|
185
+ statuses.each do |job_id, status|
186
+ # puts [job_id, status].join(",")
187
+ # 120249,RUNNING,QC_ventricles_100k.sh,2021-07-30 09:47:04/2021-07-30 09:47:04,masaomi,1535
188
+ stat, script_basename, time, user, project_number = status.split(",")
189
+ if stat == "RUNNING" or stat == "PENDING"
190
+ log_file = logs[job_id]
191
+ log_puts("JobID (in recovery check): #{job_id}")
192
+ puts "JobID (in recovery check): #{job_id}"
193
+ JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
194
+ end
195
+ end
196
+ end
197
+ end
175
198
  end
176
199
  def hello
177
- 'hello test hoge, '+ @cluster.name
200
+ 'hello hoge hoge bar boo bundle, '+ @cluster.name
178
201
  end
179
202
  def copy_commands(org_dir, dest_parent_dir, now=nil)
180
203
  @cluster.copy_commands(org_dir, dest_parent_dir, now)
181
204
  end
182
205
  def kill_job(job_id)
183
- status(job_id, 'fail')
206
+ status(job_id, 'FAIL')
184
207
  status = `#{@cluster.kill_command(job_id)}`
185
208
  end
186
209
  def delete_command(target)
@@ -272,6 +295,17 @@ module WorkflowManager
272
295
  Thread.current.kill
273
296
  end
274
297
  end
298
+ def start_monitoring3(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
299
+ script_basename = File.basename(script_path)
300
+ job_id, log_file, command = @cluster.submit_job(script_path, script_content, sge_options)
301
+ #p command
302
+ #p log_file
303
+ #p job_id
304
+ puts "JobID (in WorkflowManager): #{job_id}"
305
+ sleep 1
306
+ JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
307
+ job_id
308
+ end
275
309
  def start_monitoring2(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
276
310
  # script_path is only used to generate a log file name
277
311
  # It is not used to read the script contents
@@ -418,7 +452,7 @@ module WorkflowManager
418
452
  #@statuses.open(@db_stat)
419
453
  @statuses.transaction do |statuses|
420
454
  if new_status and stat = statuses[job_id.to_s]
421
- status_list = ['success', 'running', 'pending', 'fail']
455
+ status_list = ['CONPLETED', 'RUNNING', 'PENDING', 'FAIL']
422
456
  if status_list.include?(new_status)
423
457
  items = stat.split(/,/)
424
458
  items.shift
@@ -438,17 +472,19 @@ module WorkflowManager
438
472
  job_idsh = if job_ids
439
473
  Hash[*(job_ids.split(',')).map{|job_id| [job_id, true]}.flatten]
440
474
  end
441
- @statuses.transaction do |statuses|
442
- statuses.each do |key, value|
443
- if project_number
444
- if x = value.split(/,/)[4].to_i==project_number.to_i
445
- s << [key, value]
446
- end
447
- else
448
- s << [key, value]
475
+ s_ = {}
476
+ unless job_ids
477
+ @jobs.transaction do |jobs|
478
+ if project_jobs = jobs[project_number]
479
+ s_ = Hash[*eval(project_jobs)]
449
480
  end
450
481
  end
451
482
  end
483
+ @statuses.transaction do |statuses|
484
+ s_.each do |job_id, stat|
485
+ s << [job_id, statuses[job_id]]
486
+ end
487
+ end
452
488
  if job_ids
453
489
  s = s.select{|job_id, stat| job_idsh[job_id]}
454
490
  end
@@ -1,3 +1,3 @@
1
1
  module WorkflowManager
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Version = '20210625-104318'
4
+
5
+ require './lib/worker4'
6
+ script_file = "./test/test_job1.sh"
7
+ script_content = File.read(script_file)
8
+ log_dir = "./logs"
9
+ script_basename = File.basename(script_file)
10
+ JobWorker.perform_async(1001, log_dir, script_basename, script_content)
11
+ p "submitted test_job1.sh"
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Version = '20210625-162836'
4
+
5
+ require 'workflow_manager'
6
+ script_file = "./test/test_job1.sh"
7
+ script_content = File.read(script_file)
8
+ log_dir = "./logs"
9
+ script_basename = File.basename(script_file)
10
+ JobWorker.perform_async(1001, log_dir, script_basename, script_content)
11
+ p "submitted test_job1.sh"
data/test/job_list.rb ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ # Version = '20210723-134812'
4
+
5
+ PORT = (ARGV[0]||6380).to_i
6
+ require 'redis'
7
+ db0 = Redis.new(port: PORT, db: 0)
8
+ db1 = Redis.new(port: PORT, db: 1)
9
+ db2 = Redis.new(port: PORT, db: 2)
10
+ #db3 = Redis.new(port: 6380, db: 3)
11
+
12
+ class Redis
13
+ def show_all
14
+ self.keys.sort.each do |key|
15
+ value = self.get(key)
16
+ puts [key, value].join("\t")
17
+ end
18
+ end
19
+ end
20
+
21
+ dbs = [db0, db1, db2]
22
+ db_notes = ["state DB", "log DB", "project job DB"]
23
+
24
+ dbs.each.with_index do |db, i|
25
+ note = db_notes[i]
26
+ puts ["db#{i}", note].join("\t")
27
+ db.show_all
28
+ puts
29
+ end
30
+ exit
31
+ puts "db0, status DB"
32
+ puts ["JobID", "Status"].join("\t")
33
+ db0.keys.sort.each do |key|
34
+ value = db0.get(key)
35
+ puts [key, value].join("\t")
36
+ end
37
+
38
+ puts
39
+ puts "db1, log DB"
40
+ db1.keys.sort.each do |key|
41
+ value = db1.get(key)
42
+ puts [key, value].join("\t")
43
+ end
44
+
45
+ puts
46
+ puts "db2, status DB2, project specific"
47
+ db2.keys.sort.each do |key|
48
+ value = db2.get(key)
49
+ puts [key, value].join("\t")
50
+ end
data/test/test_job1.sh ADDED
@@ -0,0 +1,5 @@
1
+ #!/bin/bash
2
+
3
+ echo "Job1"
4
+ sleep 60
5
+ echo "END"
@@ -19,6 +19,6 @@ Gem::Specification.new do |spec|
19
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
20
  spec.require_paths = ["lib"]
21
21
 
22
- spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "bundler", "~> 2.2.10"
23
23
  spec.add_development_dependency "rake"
24
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Functional Genomics Center Zurich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-18 00:00:00.000000000 Z
11
+ date: 2021-07-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: 2.2.10
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: 2.2.10
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -76,6 +76,9 @@ files:
76
76
  - config/environments/development.rb
77
77
  - config/environments/production.rb
78
78
  - config/environments/redis.conf
79
+ - config/environments/sidekiq.yml
80
+ - lib/job_checker.rb
81
+ - lib/worker4.rb
79
82
  - lib/workflow_manager.rb
80
83
  - lib/workflow_manager/cluster.rb
81
84
  - lib/workflow_manager/optparse_ex.rb
@@ -84,6 +87,10 @@ files:
84
87
  - spec/cluster_spec.rb
85
88
  - spec/server_spec.rb
86
89
  - spec/spec_helper.rb
90
+ - test/call_worker4.rb
91
+ - test/call_worker_method.rb
92
+ - test/job_list.rb
93
+ - test/test_job1.sh
87
94
  - workflow_manager.gemspec
88
95
  homepage: ''
89
96
  licenses:
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
111
  - !ruby/object:Gem::Version
105
112
  version: '0'
106
113
  requirements: []
107
- rubygems_version: 3.0.3
114
+ rubygems_version: 3.0.3.1
108
115
  signing_key:
109
116
  specification_version: 4
110
117
  summary: Workflow Manager manages job submissions using dRuby.
@@ -112,3 +119,7 @@ test_files:
112
119
  - spec/cluster_spec.rb
113
120
  - spec/server_spec.rb
114
121
  - spec/spec_helper.rb
122
+ - test/call_worker4.rb
123
+ - test/call_worker_method.rb
124
+ - test/job_list.rb
125
+ - test/test_job1.sh