cnvrg 1.6.0.5 → 1.6.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cnvrg/api.rb +1 -1
- data/lib/cnvrg/cli.rb +12 -1
- data/lib/cnvrg/helpers/executer.rb +101 -0
- data/lib/cnvrg/job_cli.rb +56 -13
- data/lib/cnvrg/project.rb +40 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eec6f04b04951d9939b8a6b139fb4566d524b68ca01ace06e7f2be48fa92b6bb
|
4
|
+
data.tar.gz: 1aa365dca2cae40afb8dc1b8bddfd0029e9f376c708c8bed2ba66741094a2016
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fb5ccb7658268ca0ccfcad00a1e490468f91bf77de5313b61aead3885fe64c4c2f50248a89a24488cf016f8cc53fa53a98a5d3f891deb6740dfecd4c682adec
|
7
|
+
data.tar.gz: 787f8aa03cfc15989ba97ee583144e83070aa31d774886269af1910d15a2bd81338a2ae4e1a44b60a1a8b60de5b2db9cc995df0cda1f12f046af9e7fefa67283
|
data/lib/cnvrg/api.rb
CHANGED
data/lib/cnvrg/cli.rb
CHANGED
@@ -865,6 +865,7 @@ module Cnvrg
|
|
865
865
|
begin
|
866
866
|
verify_logged_in(false)
|
867
867
|
log_start(__method__, args, options)
|
868
|
+
return if check_pod_restart[1] ## It means that all datasets downloaded successfully
|
868
869
|
commit = options["commit"] || commit
|
869
870
|
only_tree = options["only_tree"] || only_tree
|
870
871
|
read = options["read"] || read || false
|
@@ -1800,6 +1801,7 @@ module Cnvrg
|
|
1800
1801
|
begin
|
1801
1802
|
verify_logged_in(false)
|
1802
1803
|
log_start(__method__, args, options)
|
1804
|
+
return if check_pod_restart[0] ## It means that project downloaded successfully
|
1803
1805
|
url_parts = project_url.split("/")
|
1804
1806
|
project_index = Cnvrg::Helpers.look_for_in_path(project_url, "projects")
|
1805
1807
|
slug = url_parts[project_index + 1]
|
@@ -2449,6 +2451,7 @@ module Cnvrg
|
|
2449
2451
|
@dataset = Dataset.new(dataset_dir)
|
2450
2452
|
@files = Cnvrg::Datafiles.new(@dataset.owner, @dataset.slug, dataset: @dataset)
|
2451
2453
|
all_files = all_files
|
2454
|
+
@dataset.generate_idx
|
2452
2455
|
res = @dataset.compare_idx_download(all_files: all_files, desired_commit: commit)
|
2453
2456
|
unless CLI.is_response_success(res, false)
|
2454
2457
|
log_message("Cant find the desired commit, please check it or try to download without it.", Thor::Shell::Color::RED)
|
@@ -5367,6 +5370,15 @@ module Cnvrg
|
|
5367
5370
|
@image.update_image_activity(project.last_local_commit, nil)
|
5368
5371
|
end
|
5369
5372
|
|
5373
|
+
desc 'check_pod_restart', description: 'Check pod restart', hide: true
|
5374
|
+
def check_pod_restart
|
5375
|
+
Cnvrg::CLI.new.log_start(__method__, args, options)
|
5376
|
+
@project = Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
5377
|
+
@project.check_job_pod_restart
|
5378
|
+
rescue => e
|
5379
|
+
Cnvrg::Logger.log_error(e)
|
5380
|
+
[false, false]
|
5381
|
+
end
|
5370
5382
|
|
5371
5383
|
no_tasks do
|
5372
5384
|
def get_instance_type(instances)
|
@@ -6161,7 +6173,6 @@ module Cnvrg
|
|
6161
6173
|
end
|
6162
6174
|
end
|
6163
6175
|
|
6164
|
-
|
6165
6176
|
end
|
6166
6177
|
|
6167
6178
|
|
@@ -1,4 +1,10 @@
|
|
1
1
|
class Cnvrg::Helpers::Executer
|
2
|
+
module CommandsStatus
|
3
|
+
ACK = :ack
|
4
|
+
STARTED = :started
|
5
|
+
FAILED = :failed
|
6
|
+
SUCCESS = :success
|
7
|
+
end
|
2
8
|
def initialize(project: nil, job_type: nil, job_id: nil, image: nil)
|
3
9
|
@image = image
|
4
10
|
@project = project || Cnvrg::Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
@@ -53,6 +59,10 @@ class Cnvrg::Helpers::Executer
|
|
53
59
|
# :title => command title, can replace the on_error, on_success fields
|
54
60
|
# :on_error_log => log to be logged on exit_code != 0
|
55
61
|
# :on_success_log => log to be logged on exit_code == 0
|
62
|
+
# when type == 'notify_command'
|
63
|
+
# notify to commands api about command progress
|
64
|
+
# when type == 'spawn'
|
65
|
+
# run in another process and detach from it
|
56
66
|
#
|
57
67
|
retries = cmd[:retries] || 1
|
58
68
|
resp = []
|
@@ -82,6 +92,22 @@ class Cnvrg::Helpers::Executer
|
|
82
92
|
commands.map{|k| k.with_indifferent_access}
|
83
93
|
end
|
84
94
|
|
95
|
+
def get_commands
|
96
|
+
resp = Cnvrg::API.request("#{base_url}/commands", "GET")
|
97
|
+
commands = resp["commands"]
|
98
|
+
commands.map{|k| k.with_indifferent_access}
|
99
|
+
end
|
100
|
+
|
101
|
+
def get_command(command_slug)
|
102
|
+
resp = Cnvrg::API.request("#{base_url}/commands/#{command_slug}", "GET")
|
103
|
+
command = resp["command"]
|
104
|
+
command.with_indifferent_access
|
105
|
+
end
|
106
|
+
|
107
|
+
def update_command(status, context, command_slug)
|
108
|
+
Cnvrg::API.request("#{base_url}/commands/#{command_slug}", "PUT", {status: status, context: context, timestamp: Time.now})
|
109
|
+
end
|
110
|
+
|
85
111
|
|
86
112
|
def update_git_commit
|
87
113
|
git_commit = `git rev-parse --verify HEAD`
|
@@ -93,11 +119,19 @@ class Cnvrg::Helpers::Executer
|
|
93
119
|
Cnvrg::API.request("#{base_url}/datasets/#{dataset}", "PUT", {status: status})
|
94
120
|
end
|
95
121
|
|
122
|
+
def monitor_command(command, command_slug)
|
123
|
+
monitor_single_command(command, command_slug)
|
124
|
+
end
|
125
|
+
|
96
126
|
private
|
97
127
|
def execute_helper(cmd)
|
98
128
|
case cmd[:type]
|
99
129
|
when 'notify'
|
100
130
|
return run_and_notify(cmd)
|
131
|
+
when 'notify_command'
|
132
|
+
return notify_command(cmd)
|
133
|
+
when 'spawn'
|
134
|
+
return spawn_command(cmd)
|
101
135
|
when 'file_exists'
|
102
136
|
if File.exists? cmd[:file]
|
103
137
|
return execute_cmds(cmd[:exists_commands]) if cmd[:exists_commands].present?
|
@@ -111,6 +145,22 @@ class Cnvrg::Helpers::Executer
|
|
111
145
|
end
|
112
146
|
end
|
113
147
|
|
148
|
+
def notify_command(cmd)
|
149
|
+
exit_status, output, errors, start_time, end_time = regular_command(cmd)
|
150
|
+
context ={
|
151
|
+
exit_status: exit_status,
|
152
|
+
output: output,
|
153
|
+
errors: errors,
|
154
|
+
start_time: start_time,
|
155
|
+
end_time: end_time
|
156
|
+
}
|
157
|
+
if exit_status == 0
|
158
|
+
update_command(CommandsStatus::SUCCESS, context, cmd[:command_slug])
|
159
|
+
else
|
160
|
+
update_command(CommandsStatus::FAILED, context, cmd[:command_slug])
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
114
164
|
def run_and_notify(cmd)
|
115
165
|
with_logs = cmd[:logs]
|
116
166
|
cmd = init_cmd_logs(cmd)
|
@@ -134,6 +184,11 @@ class Cnvrg::Helpers::Executer
|
|
134
184
|
end
|
135
185
|
|
136
186
|
|
187
|
+
def spawn_command(cmd={})
|
188
|
+
pid = Process.spawn(cmd[:command])
|
189
|
+
Process.detach(pid)
|
190
|
+
end
|
191
|
+
|
137
192
|
def regular_command(cmd = {})
|
138
193
|
errors = []
|
139
194
|
output = []
|
@@ -205,4 +260,50 @@ class Cnvrg::Helpers::Executer
|
|
205
260
|
end
|
206
261
|
end
|
207
262
|
|
263
|
+
|
264
|
+
|
265
|
+
def monitor_single_command(cmd, command_slug)
|
266
|
+
log = ""
|
267
|
+
PTY.spawn(cmd[:command]) do |stdout, stdin, pid, stderr|
|
268
|
+
begin
|
269
|
+
stdout.each do |line|
|
270
|
+
log += line + "\n"
|
271
|
+
if cmd[:success_log] and line.match(cmd[:success_log])
|
272
|
+
puts "Match found!"
|
273
|
+
context = {log: line, pid: Process.pid}
|
274
|
+
update_command(CommandsStatus::SUCCESS, context, command_slug)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
if stderr
|
279
|
+
stderr.each do |err|
|
280
|
+
log += err + "\n"
|
281
|
+
#context = {error: err}
|
282
|
+
#update_command(CommandsStatus::FAILED, context, command_slug)
|
283
|
+
#log << {time: Time.now, message: err, type: "stderr"}
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
rescue Errno::EIO => e
|
288
|
+
Cnvrg::Logger.log_error(e)
|
289
|
+
context = {error: e.message}
|
290
|
+
update_command(CommandsStatus::FAILED, context, command_slug)
|
291
|
+
rescue Errno::ENOENT => e
|
292
|
+
exp_success = false
|
293
|
+
context = {error: e.message}
|
294
|
+
update_command(CommandsStatus::FAILED, context, command_slug)
|
295
|
+
Cnvrg::Logger.info("command \"#{cmd[:command]}\" couldn't be executed, verify command is valid")
|
296
|
+
Cnvrg::Logger.log_error(e)
|
297
|
+
rescue => e
|
298
|
+
#res = @exp.end(log, 1, start_commit, 0, 0)
|
299
|
+
context = {error: e.message}
|
300
|
+
update_command(CommandsStatus::FAILED, context, command_slug)
|
301
|
+
Cnvrg::Logger.info("Error occurred,aborting")
|
302
|
+
Cnvrg::Logger.log_error(e)
|
303
|
+
exit(0)
|
304
|
+
end
|
305
|
+
::Process.wait pid
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
208
309
|
end
|
data/lib/cnvrg/job_cli.rb
CHANGED
@@ -4,10 +4,15 @@ module Cnvrg
|
|
4
4
|
desc 'log', description: '', hide: true
|
5
5
|
method_option :level, :type => :string, :aliases => ["-l", "--level"], :default => 'info'
|
6
6
|
method_option :step, :type => :string, :aliases => ["-s", "--step"], :default => nil
|
7
|
+
method_option :restart, :type => :boolean, :aliases => ["-r", "--restart"], :default => false
|
7
8
|
def log(*logs)
|
8
9
|
Cnvrg::CLI.new.log_start(__method__, args, options)
|
9
10
|
@project = Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
10
|
-
|
11
|
+
if options['restart'] == @project.check_job_pod_restart[0] or options['step'] == "ready"
|
12
|
+
@project.job_log(logs, level: options['level'], step: options['step'])
|
13
|
+
else
|
14
|
+
@project.job_log(nil, level: options['level'], step: options['step'])
|
15
|
+
end
|
11
16
|
end
|
12
17
|
|
13
18
|
|
@@ -38,19 +43,57 @@ module Cnvrg
|
|
38
43
|
commands = @executer.get_requirements_commands
|
39
44
|
@executer.execute_cmds(commands)
|
40
45
|
end
|
41
|
-
desc 'start/stop tensorboard', hide: true
|
42
|
-
def tensorboard(start:true)
|
43
|
-
cli = Cnvrg::CLI.new
|
44
|
-
cli.log_start(__method__, args, options)
|
45
|
-
@project = Project.new(nil, owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
46
|
-
@executer = Helpers::Executer.new(project: @project, job_type: ENV['CNVRG_JOB_TYPE'], job_id: ENV['CNVRG_JOB_ID'])
|
47
|
-
commands = []
|
48
|
-
if start
|
49
|
-
commands << "nohup tensorboard --logdir=/home/ds/notebooks </dev/null &"
|
50
|
-
@executer.execute_cmds(commands)
|
51
|
-
else
|
52
46
|
|
53
|
-
|
47
|
+
desc 'poll_commands', description: 'Poll waiting commands', hide: true
|
48
|
+
def poll_commands
|
49
|
+
cli = Cnvrg::CLI.new
|
50
|
+
cli.log_start(__method__, args, options)
|
51
|
+
@project = Project.new(nil, owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
52
|
+
@executer = Helpers::Executer.new(project: @project, job_type: ENV['CNVRG_JOB_TYPE'], job_id: ENV['CNVRG_JOB_ID'])
|
53
|
+
while true
|
54
|
+
begin
|
55
|
+
commands = @executer.get_commands
|
56
|
+
@executer.execute_cmds(commands)
|
57
|
+
rescue => e
|
58
|
+
Cnvrg::Logger.log_error(e)
|
59
|
+
end
|
60
|
+
sleep 10
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
desc 'run_command', description: 'Poll waiting commands', hide: true
|
65
|
+
method_option :command_slug, :type => :string, :aliases => ["-s", "--slug"], :default => nil
|
66
|
+
def run_command
|
67
|
+
Cnvrg::Logger.info("in run command")
|
68
|
+
cli = Cnvrg::CLI.new
|
69
|
+
cli.log_start(__method__, args, options)
|
70
|
+
command_slug = options['command_slug']
|
71
|
+
#TODO handle if missing
|
72
|
+
@project = Project.new(nil, owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
73
|
+
@executer = Helpers::Executer.new(project: @project, job_type: ENV['CNVRG_JOB_TYPE'], job_id: ENV['CNVRG_JOB_ID'])
|
74
|
+
command = @executer.get_command(command_slug)
|
75
|
+
@executer.monitor_command(command, command_slug)
|
76
|
+
end
|
77
|
+
|
78
|
+
desc 'set_pod_restart', description: 'Set pod restart', hide: true
|
79
|
+
def set_pod_restart
|
80
|
+
Cnvrg::CLI.new.log_start(__method__, args, options)
|
81
|
+
@project = Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
82
|
+
@project.set_job_pod_restart
|
83
|
+
end
|
84
|
+
|
85
|
+
desc 'pre_pod_restart', description: 'Pre pod restart actions', hide: true
|
86
|
+
def pre_pod_restart
|
87
|
+
Cnvrg::CLI.new.log_start(__method__, args, options)
|
88
|
+
@project = Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
89
|
+
@project.pre_job_pod_restart
|
90
|
+
end
|
91
|
+
|
92
|
+
desc 'started', description: 'set job status to started after job restart', hide: true
|
93
|
+
def started
|
94
|
+
Cnvrg::CLI.new.log_start(__method__, args, options)
|
95
|
+
@project = Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
|
96
|
+
@project.set_job_started
|
54
97
|
end
|
55
98
|
end
|
56
99
|
end
|
data/lib/cnvrg/project.rb
CHANGED
@@ -649,7 +649,9 @@ module Cnvrg
|
|
649
649
|
|
650
650
|
|
651
651
|
def update_job_jupyter_token(job_type, job_id, token)
|
652
|
-
|
652
|
+
owner = self.owner || ENV['CNVRG_OWNER']
|
653
|
+
slug = self.slug || ENV['CNVRG_PROJECT']
|
654
|
+
base_url = "users/#{owner}/projects/#{slug}/jobs/#{job_type.underscore}/#{job_id}"
|
653
655
|
Cnvrg::API.request("#{base_url}/update_jupyter_token", "POST", {token: token})
|
654
656
|
end
|
655
657
|
|
@@ -736,6 +738,43 @@ module Cnvrg
|
|
736
738
|
end
|
737
739
|
end
|
738
740
|
|
741
|
+
def set_job_pod_restart(job_type: nil, job_id: nil)
|
742
|
+
job_type ||= ENV['CNVRG_JOB_TYPE']
|
743
|
+
job_id ||= ENV['CNVRG_JOB_ID']
|
744
|
+
if job_type.blank? or job_id.blank?
|
745
|
+
raise StandardError.new("Cant find job env variables")
|
746
|
+
end
|
747
|
+
Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/set_pod_restart", "POST", {job_type: job_type, job_id: job_id})
|
748
|
+
end
|
749
|
+
|
750
|
+
def check_job_pod_restart(job_type: nil, job_id: nil)
|
751
|
+
job_type ||= ENV['CNVRG_JOB_TYPE']
|
752
|
+
job_id ||= ENV['CNVRG_JOB_ID']
|
753
|
+
if job_type.blank? or job_id.blank?
|
754
|
+
raise StandardError.new("Cant find job env variables")
|
755
|
+
end
|
756
|
+
resp = Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/check_pod_restart", "GET", {job_type: job_type, job_id: job_id})
|
757
|
+
return [false, false] if resp.blank?
|
758
|
+
Cnvrg::Logger.log_info("Checked for pod restart got response #{resp}")
|
759
|
+
[resp['project_downloaded'], resp['dataset_downloaded']]
|
760
|
+
end
|
739
761
|
|
762
|
+
def pre_job_pod_restart
|
763
|
+
job_type ||= ENV['CNVRG_JOB_TYPE']
|
764
|
+
job_id ||= ENV['CNVRG_JOB_ID']
|
765
|
+
if job_type.blank? or job_id.blank?
|
766
|
+
raise StandardError.new("Cant find job env variables")
|
767
|
+
end
|
768
|
+
Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/pre_pod_restart", "POST", {job_type: job_type, job_id: job_id})
|
769
|
+
end
|
770
|
+
|
771
|
+
def set_job_started
|
772
|
+
job_type ||= ENV['CNVRG_JOB_TYPE']
|
773
|
+
job_id ||= ENV['CNVRG_JOB_ID']
|
774
|
+
if job_type.blank? or job_id.blank?
|
775
|
+
raise StandardError.new("Cant find job env variables")
|
776
|
+
end
|
777
|
+
Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/set_started", "POST", {job_type: job_type, job_id: job_id})
|
778
|
+
end
|
740
779
|
end
|
741
780
|
end
|
data/lib/cnvrg/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.0.
|
4
|
+
version: 1.6.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -476,7 +476,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
476
476
|
- !ruby/object:Gem::Version
|
477
477
|
version: '0'
|
478
478
|
requirements: []
|
479
|
-
rubygems_version: 3.0.
|
479
|
+
rubygems_version: 3.0.4
|
480
480
|
signing_key:
|
481
481
|
specification_version: 4
|
482
482
|
summary: A CLI tool for interacting with cnvrg.io.
|