cnvrg 1.6.38 → 1.9.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,188 @@
1
+ class Cnvrg::Helpers::Agent
2
+
3
+ module Status
4
+ STARTED = :started
5
+ RUNNING = :running
6
+ FINISHED = :finished
7
+ ABORTED = "aborted"
8
+ end
9
+
10
+ module LogLevel
11
+ INFO = :info
12
+ PURE = :pure
13
+ ERROR = :error
14
+ end
15
+
16
+ #### This class represent a single command in the system.
17
+ #### it runs under an executer (machine_activity) so it should have all the executer
18
+ #### params
19
+ def initialize(executer: nil, slug: nil, command: nil, container_name: nil, send_log_interval: 60, timeout: -1, logs_regex: [], async: false, send_logs: false, files_exist: [], retries: 0, sleep_before_retry: 30, single_quotes: false, docker_user: nil, use_bash: false, **kwargs)
20
+ @executer = executer
21
+ @slug = slug
22
+ @files_exist = files_exist
23
+ @container_name = container_name
24
+ @run_in_slave = @container_name.downcase == "slave"
25
+ @log_interval = send_log_interval
26
+ # https://ruby-doc.org/stdlib-2.5.1/libdoc/timeout/rdoc/Timeout.html timeout should be 0 for running forever
27
+ if timeout.blank? or timeout.negative?
28
+ @timeout = 0
29
+ else
30
+ @timeout = timeout
31
+ end
32
+ @logs_regex = logs_regex || []
33
+ @async = async
34
+ @command = command
35
+ @send_logs = send_logs
36
+ @retries = retries.try(:to_i) ## How many times the user asked to try to execute the command again
37
+ @sleep_before_retry = sleep_before_retry
38
+ @real_execution_retries = 0 ## How many times the command really executed until success
39
+ @single_quotes = single_quotes
40
+ @docker_user = ""
41
+ @shell_type = use_bash ? "bash -l" : "sh"
42
+ if docker_user.present?
43
+ @docker_user = " --user #{docker_user}"
44
+ end
45
+ if @run_in_slave
46
+ if @single_quotes
47
+ @command = "docker exec #{@docker_user} -it #{@executer.slave_id} #{@shell_type} -c '#{@command}'"
48
+ else
49
+ @command = "docker exec #{@docker_user} -it #{@executer.slave_id} #{@shell_type} -c \"#{@command}\""
50
+ end
51
+ end
52
+ @output = []
53
+ @errors = []
54
+ @exit_status = nil
55
+ @is_running = true
56
+ @pid = nil
57
+ end
58
+
59
+ def base_url
60
+ [@executer.activity_url, "commands", @slug].join("/")
61
+ end
62
+
63
+ def should_run?
64
+ if @files_exist.present?
65
+ file_doesnt_exists = @files_exist.find do |file|
66
+ not File.exists? file
67
+ end
68
+ return true if file_doesnt_exists.blank?
69
+ log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
70
+ return false
71
+ end
72
+ true
73
+ end
74
+
75
+
76
+ def exec!
77
+ log_internal("Command: #{@command} with slug: #{@slug} started!")
78
+ if should_run?
79
+ send_logs(status: Status::STARTED)
80
+ periodic_thread
81
+ execute_command
82
+ else
83
+ @exit_status = 127
84
+ end
85
+ finish_log = "Command: #{@command} with slug: #{@slug} finished"
86
+ finish_log += " after #{@real_execution_retries} retries" if @real_execution_retries > 0
87
+ log_internal(finish_log)
88
+ send_logs(exit_status: @exit_status, status: Status::FINISHED)
89
+ end
90
+
91
+ def get_logs_to_send
92
+ new_logs = @output.pop(@output.length)
93
+ new_errors = @errors.pop(@errors.length)
94
+ [new_logs, new_errors]
95
+ end
96
+
97
+
98
+ def periodic_thread
99
+ Thread.new do
100
+ while @exit_status.blank?
101
+ Thread.exit if @log_interval.blank?
102
+ sleep(@log_interval)
103
+ send_logs
104
+ end
105
+ end
106
+ end
107
+
108
+ def retry_command
109
+ @retries -=1
110
+ sleep @sleep_before_retry
111
+ @real_execution_retries +=1
112
+ execute_command
113
+ end
114
+
115
+ def execute_command
116
+ Timeout.timeout(@timeout) do
117
+ PTY.spawn(@command) do |stdout, stdin, pid, stderr|
118
+ @pid = pid
119
+ begin
120
+ if stdout.present?
121
+ stdout.each do |line|
122
+ log_internal(line, level: LogLevel::PURE)
123
+ line = line.strip.gsub(/\e\[([;\d]+)?m/, '')
124
+ @output << {log: line, timestamp: Time.now}
125
+ end
126
+ end
127
+
128
+ if stderr.present?
129
+ stderr.each do |line|
130
+ line = line.strip.gsub(/\e\[([;\d]+)?m/, '')
131
+ log_internal(line, level: LogLevel::ERROR)
132
+ @errors << {log: line, timestamp: Time.now}
133
+ end
134
+ end
135
+ rescue Errno::EIO => e
136
+ next
137
+ rescue => e
138
+ log_internal(e.message, level: LogLevel::ERROR)
139
+ log_internal(e.backtrace.join("\n"), level: LogLevel::ERROR)
140
+ @errors << {log: e.message, timestamp: Time.now}
141
+ end
142
+ ::Process.wait pid
143
+ end
144
+ end
145
+ @exit_status = $?.exitstatus
146
+ rescue Timeout::Error
147
+ Process.kill(0, @pid)
148
+ @errors << {log: "Command timed out!", timestamp: Time.now}
149
+ log_internal("Command timed out!", level: LogLevel::ERROR)
150
+ @exit_status = 124
151
+ ensure
152
+ retry_command if @retries != 0 and @exit_status !=0
153
+ @exit_status
154
+ end
155
+
156
+ private
157
+ def send_logs(exit_status: nil, status: Status::RUNNING)
158
+ logs, error_logs = get_logs_to_send
159
+ # Filter logs only if not failed
160
+ if exit_status.blank? or exit_status == 0
161
+ logs = filter_logs_by_regex(logs)
162
+ end
163
+ ### there is no logs, no exit_status and status is running.
164
+ ### this condition let us call "send_logs" every interval iteration.
165
+ if logs.blank? and error_logs.blank? and exit_status.blank? and status == Status::RUNNING
166
+ return
167
+ end
168
+ Cnvrg::API.request(base_url, 'PUT', {logs: logs, error_logs: error_logs, exit_status: exit_status, status: status, execution_retries: @real_execution_retries, pid: @pid})
169
+ end
170
+
171
+ def log_internal(log, level: LogLevel::INFO)
172
+ if level == LogLevel::PURE
173
+ puts(log)
174
+ else
175
+ puts({log: log, level: level, timestamp: Time.now, command: @slug, machine_activity: @executer.machine_activity}.to_json)
176
+ end
177
+ STDOUT.flush
178
+ end
179
+
180
+ def filter_logs_by_regex(logs)
181
+ logs.select do |log|
182
+ next true if @send_logs
183
+ @logs_regex.find do |regexp_str|
184
+ Regexp.new(regexp_str).match(log[:log]).present?
185
+ end
186
+ end
187
+ end
188
+ end
@@ -1,309 +1,213 @@
1
+ require 'cnvrg/helpers/agent'
1
2
  class Cnvrg::Helpers::Executer
2
- module CommandsStatus
3
- ACK = :ack
4
- STARTED = :started
5
- FAILED = :failed
6
- SUCCESS = :success
7
- end
8
- def initialize(project: nil, job_type: nil, job_id: nil, image: nil)
9
- @image = image
10
- @project = project || Cnvrg::Project.new(owner: ENV['CNVRG_OWNER'], slug: ENV['CNVRG_PROJECT'])
11
- @job_type = job_type || ENV['CNVRG_JOB_TYPE']
12
- @job_id = job_id || ENV['CNVRG_JOB_ID']
13
- if @job_id.blank?
14
- Cnvrg::CLI.log_message("Cant find job, exiting.", 'red')
15
- exit(1)
16
- end
17
- end
3
+ attr_reader :machine_activity, :agent_id, :slave_id
18
4
 
19
5
 
20
- def self.get_executer
21
- if ENV["CNVRG_JOB_ID"].blank?
22
- return
23
- end
24
- self.new
25
- end
26
-
27
- def fetch_commands(block: nil, key: nil)
28
- resp = Cnvrg::API.request("#{base_url}/commands", "GET", {block: block})
29
- commands = decrypt_commands(resp["commands"], resp["key"], resp["iv"])
30
- commands.map{|k| k.with_indifferent_access}
6
+ ### this class represent a machine_activity. it will poll the commands, communicate with the
7
+ # server (poll commands) and let the server know the status of this executer.
8
+ def initialize(owner: nil, machine_activity: nil, poll_every: 30, job_id: nil)
9
+ @owner = owner
10
+ @job_id = job_id
11
+ @poll_every = poll_every
12
+ @machine_activity = machine_activity
13
+ @commands_q = Queue.new
14
+ @files_q = Queue.new
15
+ @agent_id = nil
16
+ @slave_id = nil
31
17
  end
32
18
 
33
- def decrypt_commands(text,key,iv)
34
- text, key, iv = [text,key,iv].map{|x| x.unpack('m')[0]}
35
- decipher = OpenSSL::Cipher::AES256.new :CBC
36
- decipher.decrypt
37
- decipher.key = key
38
- decipher.iv = iv
39
- commands = decipher.update(text) + decipher.final
40
- JSON.parse(commands)
41
- end
42
-
43
- def execute(cmd)
44
- ## execute the command for running
45
- # cmd will have to following fields
46
- #
47
- # :command => the command to execute
48
- # :type => the command type, 'notify' or nil
49
- # :timeout => the timeout for the command in seconds (default is 60 hours)
50
- # :retries => integer, default 1
51
- #
52
- # when type == 'file_exists'
53
- # 'file' => string => file to check (fullpath)
54
- # 'exists_commands' => list of commands in case file exists
55
- # 'non_exists_commands' => list of commands in case file doesnt exists
56
- # when type == 'notify'
57
- # :before_execute_log => log to be logged before execution
58
- # :logs => boolean => add the execution logs to the job logs
59
- # :title => command title, can replace the on_error, on_success fields
60
- # :on_error_log => log to be logged on exit_code != 0
61
- # :on_success_log => log to be logged on exit_code == 0
62
- # when type == 'notify_command'
63
- # notify to commands api about command progress
64
- # when type == 'spawn'
65
- # run in another process and detach from it
66
- #
67
- retries = cmd[:retries] || 1
68
- resp = []
69
- retries.times.each do
70
- resp = execute_helper(cmd)
71
- exit_status, _, _, _, _ = resp
72
- return resp if exit_status == 0
19
+ def create_file_cmd(path, content)
20
+ if path.include? "~"
21
+ path = File.expand_path(path)
73
22
  end
74
- return resp
23
+ FileUtils.mkdir_p(File.dirname(path))
24
+ File.open(path, "w+"){|f| f.write(content)}
75
25
  end
76
26
 
77
- def execute_cmds(cmds)
78
- cmds.each do |command|
79
- puts "===================="
80
- if command[:type] == "file_exists"
81
- puts "Looking for file #{command[:file]}"
82
- else
83
- puts "Execute #{command[:command]}" unless command[:no_stdout]
84
- end
85
- execute(command)
27
+ def handle_files(files)
28
+ (files || {}).each do |path, content|
29
+ create_file_cmd(path, content)
86
30
  end
87
31
  end
88
32
 
89
- def get_requirements_commands
90
- resp = Cnvrg::API.request("#{base_url}/requirements", "GET")
91
- commands = resp["commands"]
92
- commands.map{|k| k.with_indifferent_access}
33
+ def activity_url
34
+ ['users', @owner, 'machine_activities', @machine_activity].join("/")
35
+ end
36
+
37
+ def executer_stats
38
+ return @stats if @stats.present?
39
+ Cnvrg::Logger.log_info("getting containers")
40
+ @agent_id, @slave_id = containers
41
+ Cnvrg::Logger.log_info("got containers")
42
+ pod_name, node_name = get_node_and_pod_names
43
+ @stats = {
44
+ pod_name: pod_name,
45
+ node_name: node_name,
46
+ agent: {
47
+ container_id: @agent_id,
48
+ workdir: `pwd`.strip,
49
+ homedir: current_homedir,
50
+ user: `whoami`.strip,
51
+ user_id: `id -u`.strip,
52
+ group_id: `id -g`.strip,
53
+ cnvrg: Cnvrg::VERSION
54
+ },
55
+ slave: {
56
+ container_id: @slave_id,
57
+ workdir: run_in_slave('pwd'),
58
+ homedir: slave_homedir,
59
+ spark_path: spark_path,
60
+ user: run_in_slave( 'whoami'),
61
+ cnvrg: run_in_slave( 'which cnvrg'),
62
+ has_bash: run_in_slave( 'which bash'),
63
+ user_id: run_in_slave( 'id -u'),
64
+ group_id: run_in_slave( 'id -g'),
65
+ python_version: run_in_slave( 'python --version'),
66
+ python3_version: run_in_slave( 'python3 --version'),
67
+ pip_version: run_in_slave( 'pip --version'),
68
+ pip3_version: run_in_slave( 'pip3 --version')
69
+ },
70
+ }
71
+ @stats
72
+ end
73
+
74
+ def containers
75
+ agent_id = nil
76
+ slave_id = nil
77
+ while agent_id.blank? or slave_id.blank?
78
+ grep_by = @job_id
79
+ grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
80
+ cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
81
+ agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
82
+ slave_id = cntrs.find{|container_name| container_name.include? "slave"}.split(",").first rescue nil
83
+ sleep(5)
84
+ end
85
+ if slave_id.blank?
86
+ raise "Can't find slave id"
87
+ end
88
+ [agent_id, slave_id]
93
89
  end
94
90
 
95
- def get_commands
96
- resp = Cnvrg::API.request("#{base_url}/commands", "GET")
97
- commands = resp["commands"]
98
- commands.map{|k| k.with_indifferent_access}
91
+ def current_homedir
92
+ `env | grep HOME`.strip.split("=").try(:last)
99
93
  end
100
94
 
101
- def get_command(command_slug)
102
- resp = Cnvrg::API.request("#{base_url}/commands/#{command_slug}", "GET")
103
- command = resp["command"]
104
- command.with_indifferent_access
95
+ def spark_path
96
+ run_in_slave("env | grep SPARK_HOME").strip.split("=").try(:last)
105
97
  end
106
98
 
107
- def update_command(status, context, command_slug)
108
- Cnvrg::API.request("#{base_url}/commands/#{command_slug}", "PUT", {status: status, context: context, timestamp: Time.now})
99
+ def slave_homedir()
100
+ run_in_slave("env | grep HOME").split("=").try(:last)
109
101
  end
110
102
 
111
-
112
- def update_git_commit
113
- git_commit = `git rev-parse --verify HEAD`
114
- return if git_commit.blank?
115
- Cnvrg::API.request("#{base_url}/update_git_commit", "POST", {git_commit: git_commit.strip!})
103
+ def slave_env
104
+ run_in_slave("env").split("\n").map{|x| x.split("=")}
116
105
  end
117
106
 
118
- def set_dataset_status(dataset: nil, status: nil)
119
- Cnvrg::API.request("#{base_url}/datasets/#{dataset}", "PUT", {status: status})
107
+ def run_in_slave(command)
108
+ `docker exec -i #{@slave_id} sh -c '#{command}'`.strip
120
109
  end
121
110
 
122
- def monitor_command(command, command_slug)
123
- monitor_single_command(command, command_slug)
124
- end
125
111
 
126
- private
127
- def execute_helper(cmd)
128
- case cmd[:type]
129
- when 'notify'
130
- return run_and_notify(cmd)
131
- when 'notify_command'
132
- return notify_command(cmd)
133
- when 'spawn'
134
- return spawn_command(cmd)
135
- when 'file_exists'
136
- if File.exists? cmd[:file]
137
- return execute_cmds(cmd[:exists_commands]) if cmd[:exists_commands].present?
138
- else
139
- return execute_cmds(cmd[:non_exists_commands]) if cmd[:non_exists_commands].present?
140
- end
141
- when 'create_file'
142
- return create_file(cmd)
143
- else
144
- return regular_command(cmd)
112
+ def poll
113
+ resp = Cnvrg::API.request([activity_url, "commands"].join('/'), "POST")
114
+ commands = resp["commands"]
115
+ files = resp["files"]
116
+ handle_files(files)
117
+ commands.each{|cmd| @commands_q.push(cmd)}
118
+ rescue => e
119
+ Cnvrg::Logger.log_error(e)
120
+ end
121
+
122
+ def init
123
+ resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
124
+ machine_activity = resp["machine_activity"]
125
+ Cnvrg::Logger.log_info("Got back machine activity #{machine_activity}")
126
+ if machine_activity.present? and @machine_activity != machine_activity
127
+ Cnvrg::Logger.log_info("Changing to machine activity #{machine_activity}")
128
+ machine_activity_yml = {slug: machine_activity}
129
+ File.open("/conf/.machine_activity.yml", "w+") {|f| f.write machine_activity_yml.to_yaml}
130
+ @machine_activity = machine_activity
145
131
  end
132
+ rescue => e
133
+ Cnvrg::Logger.log_error(e)
146
134
  end
147
135
 
148
- def notify_command(cmd)
149
- exit_status, output, errors, start_time, end_time = regular_command(cmd)
150
- context ={
151
- exit_status: exit_status,
152
- output: output,
153
- errors: errors,
154
- start_time: start_time,
155
- end_time: end_time
156
- }
157
- if exit_status == 0
158
- update_command(CommandsStatus::SUCCESS, context, cmd[:command_slug])
159
- else
160
- update_command(CommandsStatus::FAILED, context, cmd[:command_slug])
136
+ def polling_thread
137
+ while true
138
+ poll
139
+ sleep(@poll_every)
161
140
  end
162
141
  end
163
142
 
164
- def run_and_notify(cmd)
165
- with_logs = cmd[:logs]
166
- cmd = init_cmd_logs(cmd)
167
- job_log(cmd[:before_execute_log]) if cmd[:before_execute_log].present?
168
- exit_status, output, errors, start_time, end_time = regular_command(cmd)
169
- logs = []
170
- if exit_status == 0
171
- logs = output.map{|log| log[:logs]} if with_logs
172
- job_log(logs + cmd[:on_success_log])
173
- else
174
- logs = output + errors
175
- logs = logs.sort_by{|x| x[:timestamp]}.map{|x| x[:logs]} if with_logs
176
- job_log(logs + cmd[:on_error_log], level: 'error')
143
+ def main_thread
144
+ init
145
+ Thread.new do
146
+ polling_thread
177
147
  end
178
- return [exit_status, output, errors, start_time, end_time]
179
- end
180
-
181
- def merge_log_block(logs)
182
- logs.group_by {|log| log[:timestamp].to_s}
183
- .map {|ts, logz| {timestamp: ts, logs: logz.map {|l| l[:log]}.join("\n")}}
148
+ execute_cmds
184
149
  end
185
150
 
151
+ def execute_cmds
152
+ pids = []
153
+ while true
154
+ if @commands_q.empty?
155
+ sleep(5)
156
+ next
157
+ end
158
+ cmd = @commands_q.pop.symbolize_keys
159
+ command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
186
160
 
187
- def spawn_command(cmd={})
188
- pid = Process.spawn(cmd[:command])
189
- Process.detach(pid)
190
- end
161
+ cmd_status = command_json["status"] rescue ""
191
162
 
192
- def regular_command(cmd = {})
193
- errors = []
194
- output = []
195
- start_time = Time.now
196
- timeout = cmd[:timeout] || 5*60
197
- exit_status = nil
198
- t = Thread.new do
199
- PTY.spawn(cmd[:command]) do |stdout, stdin, pid, stderr|
200
- begin
201
- if stdout.present?
202
- stdout.each do |line|
203
- puts line
204
- output << {log: line.strip, timestamp: Time.now}
205
- end
206
- end
207
- if stderr.present?
208
- stderr.each do |line|
209
- errors << {log: line.strip, timestamp: Time.now}
210
- end
211
- end
212
- rescue Errno::EIO
213
- rescue => e
214
- errors << {log: e.message, timestamp: Time.now}
215
- end
216
- ::Process.wait pid
163
+ if cmd_status == Cnvrg::Helpers::Agent::Status::ABORTED
164
+ Cnvrg::Logger.log_info("stopping job because command #{cmd[:slug]} with status #{cmd_status}")
165
+ next
217
166
  end
218
- exit_status = $?.exitstatus
219
- end
220
- while t.status
221
- if Time.now - start_time > timeout
222
- puts "Kill thread because of timeout..."
223
- errors << {log: "Timeout", timestamp: Time.now}
224
- Thread.kill(t)
225
- exit_status = 100 ##killed
167
+ pid = Process.fork do
168
+ Cnvrg::Helpers::Agent.new(executer: self, **cmd).exec!
169
+ end
170
+ if cmd[:async].blank?
171
+ Process.waitpid(pid)
172
+ else
173
+ Process.detach(pid)
226
174
  end
227
- sleep 1
175
+ pids << pid
176
+ ######
228
177
  end
229
- end_time = Time.now
230
- [exit_status, merge_log_block(output), merge_log_block(errors), start_time, end_time]
178
+ pids
231
179
  end
232
180
 
233
- def create_file(cmd)
234
- cmd = init_cmd_logs(cmd)
235
- File.open(cmd[:path], "w+"){|f| f.write(cmd[:content])}
181
+ def merge_log_block(logs)
182
+ logs.group_by {|log| log[:timestamp].to_s}
183
+ .map {|ts, logz| {timestamp: ts, logs: logz.map {|l| l[:log]}.join("\n")}}
236
184
  end
237
185
 
238
- def init_cmd_logs(cmd)
239
- if cmd[:title]
240
- cmd[:before_execute_log] ||= ["Running #{cmd[:title]}"]
241
- cmd[:on_error_log] ||= ["#{cmd[:title]} was failed during running"]
242
- cmd[:on_success_log] ||= ["#{cmd[:title]} executed successfully"]
186
+ def get_node_and_pod_names
187
+ pod_name = `hostname`.strip rescue nil
188
+ node_name = nil
189
+ if pod_name.present?
190
+ pod_describe = `kubectl -n cnvrg get pod #{pod_name} -o json` rescue nil
191
+ pod_describe = JSON.parse(pod_describe) rescue {}
192
+ node_name = pod_describe["spec"]["nodeName"] rescue nil
243
193
  end
244
- cmd[:on_success_log] ||= []
245
- cmd[:on_error_log] ||= []
246
- [:before_execute_log, :on_success_log, :on_error_log].each{|x| cmd[x] = [cmd[x]].flatten}
247
- cmd
194
+ [pod_name, node_name]
248
195
  end
249
196
 
250
- def base_url
251
- "users/#{@project.owner}/projects/#{@project.slug}/jobs/#{@job_type.underscore}/#{@job_id}"
197
+ def pre_pod_stop
198
+ pod_name, node_name = get_node_and_pod_names
199
+ pod_events = get_pod_events(pod_name)
200
+ node_events = get_node_events(node_name)
201
+ Cnvrg::API.request([activity_url, "job_events"].join('/'), "POST", {pod_events: pod_events, node_events: node_events})
252
202
  end
253
203
 
254
-
255
- def job_log(logs, level: 'info', step: nil)
256
- if @job_type == "image"
257
- @image.job_log(logs, level: level, step: step)
258
- else
259
- @project.job_log(logs, level: level, step: step, job_type: @job_type, job_id: @job_id)
260
- end
204
+ def get_pod_events(pod_name)
205
+ return if pod_name.blank?
206
+ `kubectl get event --namespace cnvrg --field-selector involvedObject.name=#{pod_name} -o json`
261
207
  end
262
208
 
263
-
264
-
265
- def monitor_single_command(cmd, command_slug)
266
- log = ""
267
- PTY.spawn(cmd[:command]) do |stdout, stdin, pid, stderr|
268
- begin
269
- stdout.each do |line|
270
- log += line + "\n"
271
- if cmd[:success_log] and line.match(cmd[:success_log])
272
- puts "Match found!"
273
- context = {log: line, pid: Process.pid}
274
- update_command(CommandsStatus::SUCCESS, context, command_slug)
275
- end
276
- end
277
-
278
- if stderr
279
- stderr.each do |err|
280
- log += err + "\n"
281
- #context = {error: err}
282
- #update_command(CommandsStatus::FAILED, context, command_slug)
283
- #log << {time: Time.now, message: err, type: "stderr"}
284
- end
285
- end
286
-
287
- rescue Errno::EIO => e
288
- Cnvrg::Logger.log_error(e)
289
- context = {error: e.message}
290
- update_command(CommandsStatus::FAILED, context, command_slug)
291
- rescue Errno::ENOENT => e
292
- exp_success = false
293
- context = {error: e.message}
294
- update_command(CommandsStatus::FAILED, context, command_slug)
295
- Cnvrg::Logger.info("command \"#{cmd[:command]}\" couldn't be executed, verify command is valid")
296
- Cnvrg::Logger.log_error(e)
297
- rescue => e
298
- #res = @exp.end(log, 1, start_commit, 0, 0)
299
- context = {error: e.message}
300
- update_command(CommandsStatus::FAILED, context, command_slug)
301
- Cnvrg::Logger.info("Error occurred,aborting")
302
- Cnvrg::Logger.log_error(e)
303
- exit(0)
304
- end
305
- ::Process.wait pid
306
- end
209
+ def get_node_events(node_name)
210
+ return if node_name.blank?
211
+ `kubectl get event --all-namespaces --field-selector involvedObject.name=#{node_name} -o json`
307
212
  end
308
-
309
213
  end