cnvrg 1.9.9.9.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/cnvrg +9 -0
- data/cnvrg.gemspec +47 -0
- data/lib/cnvrg.rb +7 -0
- data/lib/cnvrg/Images.rb +351 -0
- data/lib/cnvrg/api.rb +247 -0
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/auth.rb +79 -0
- data/lib/cnvrg/cli.rb +5715 -0
- data/lib/cnvrg/cli/flow.rb +166 -0
- data/lib/cnvrg/cli/library_cli.rb +33 -0
- data/lib/cnvrg/cli/subcommand.rb +28 -0
- data/lib/cnvrg/cli/task.rb +116 -0
- data/lib/cnvrg/colors.rb +8 -0
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +335 -0
- data/lib/cnvrg/datafiles.rb +1325 -0
- data/lib/cnvrg/dataset.rb +892 -0
- data/lib/cnvrg/downloader/client.rb +101 -0
- data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
- data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
- data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
- data/lib/cnvrg/experiment.rb +209 -0
- data/lib/cnvrg/files.rb +1047 -0
- data/lib/cnvrg/flow.rb +137 -0
- data/lib/cnvrg/helpers.rb +422 -0
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +213 -0
- data/lib/cnvrg/hyper.rb +21 -0
- data/lib/cnvrg/image.rb +113 -0
- data/lib/cnvrg/image_cli.rb +25 -0
- data/lib/cnvrg/job_cli.rb +73 -0
- data/lib/cnvrg/job_ssh.rb +48 -0
- data/lib/cnvrg/logger.rb +111 -0
- data/lib/cnvrg/org_helpers.rb +5 -0
- data/lib/cnvrg/project.rb +822 -0
- data/lib/cnvrg/result.rb +29 -0
- data/lib/cnvrg/runner.rb +49 -0
- data/lib/cnvrg/ssh.rb +94 -0
- data/lib/cnvrg/storage.rb +128 -0
- data/lib/cnvrg/task.rb +165 -0
- data/lib/cnvrg/version.rb +3 -0
- metadata +460 -0
data/lib/cnvrg/flow.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
class Flows
|
3
|
+
def initialize(flow_slug, project: nil)
|
4
|
+
@project = project || Cnvrg::Project.new(Cnvrg::CLI.get_project_home)
|
5
|
+
@flow_info= Flows.resolve_flow_title(flow_slug, project)
|
6
|
+
@slug = @flow_info["slug"]
|
7
|
+
@tasks = {}
|
8
|
+
@relations = {}
|
9
|
+
@title = nil
|
10
|
+
@base_resource = @project.base_resource + "flows/#{@slug}"
|
11
|
+
@public_url = "#{@project.url}/flows/#{@slug}"
|
12
|
+
# self.reload_flow
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.resolve_flow_title(title, project)
|
16
|
+
resp = Cnvrg::API.request("#{project.base_resource}/flows", 'GET')
|
17
|
+
if resp.blank?
|
18
|
+
raise StandardError.new("Can't resolve flow")
|
19
|
+
end
|
20
|
+
res = resp["result"].find{|flow| flow["slug"].downcase == title.downcase}
|
21
|
+
res ||= resp["result"].find{|flow| flow["title"].downcase == title.downcase}
|
22
|
+
if res.blank?
|
23
|
+
raise StandardError.new("Can't find flow with title #{title}")
|
24
|
+
end
|
25
|
+
res
|
26
|
+
end
|
27
|
+
|
28
|
+
def edit_href
|
29
|
+
"#{@public_url}/flow_versions/new"
|
30
|
+
end
|
31
|
+
|
32
|
+
def edit_version_href(version)
|
33
|
+
return "#{edit_href}?flow_version_slug=#{version}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def version_href(version=nil)
|
37
|
+
"#{@base_resource}/flow_versions/#{version || 'latest'}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def export(version, file: nil)
|
41
|
+
resp = Cnvrg::API.request(version_href(version), 'GET')
|
42
|
+
if resp["status"] != 200
|
43
|
+
raise StandardError.new("Cant find flow version: #{version} for flow: #{@slug}")
|
44
|
+
end
|
45
|
+
flow_version = resp["flow_version"]
|
46
|
+
api_recipe = flow_version["api_recipe"]
|
47
|
+
file = file.presence || "flow-#{@slug.downcase.gsub("\s", "_")}.yml"
|
48
|
+
File.open(file, "w"){|f| f.write api_recipe.to_yaml}
|
49
|
+
file
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_version(version)
|
53
|
+
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.create_flow(project, recipe, run: false)
|
58
|
+
url = "#{project.base_resource}flows"
|
59
|
+
if run
|
60
|
+
url += "/run"
|
61
|
+
end
|
62
|
+
resp = Cnvrg::API.request(url, 'POST', {flow_version: recipe.to_json}) || {}
|
63
|
+
if resp["status"] == 200
|
64
|
+
return [Flows.new(resp["flow_version"]["flow_id"], project: project), resp["flow_version"]["id"]]
|
65
|
+
elsif resp["status"] == 400
|
66
|
+
raise StandardError.new(resp["message"])
|
67
|
+
end
|
68
|
+
raise StandardError.new("Can't create new flow")
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_flow
|
72
|
+
unless File.exists? @fullpath
|
73
|
+
raise StandardError.new("Cant find flow in #{@fullpath}")
|
74
|
+
end
|
75
|
+
YAML.load_file(@fullpath)
|
76
|
+
end
|
77
|
+
|
78
|
+
def set_flow(new_flow)
|
79
|
+
File.open(@fullpath, "w"){|file| file.write new_flow.to_yaml}
|
80
|
+
end
|
81
|
+
|
82
|
+
def set_flow_slug(slug)
|
83
|
+
flow = self.get_flow
|
84
|
+
flow[:slug] = slug
|
85
|
+
self.set_flow(flow)
|
86
|
+
end
|
87
|
+
|
88
|
+
def reload_flow
|
89
|
+
flow = self.get_flow
|
90
|
+
@title = flow[:title]
|
91
|
+
@slug = flow[:slug]
|
92
|
+
@relations = flow[:relations]
|
93
|
+
local_tasks = flow[:tasks] || {}
|
94
|
+
@relations.each do |relation|
|
95
|
+
relation.values.each do |task|
|
96
|
+
if local_tasks[task].present?
|
97
|
+
@tasks[task] = Cnvrg::Task.new(@project.local_path, content: local_tasks[task])
|
98
|
+
else
|
99
|
+
@tasks[task] = Cnvrg::Task.new(@project.local_path, path: task)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def run
|
107
|
+
resp = Cnvrg::API.request("#{@base_resource}/#{@slug}/run", 'POST')
|
108
|
+
if Cnvrg::CLI.is_response_success(resp)
|
109
|
+
return resp
|
110
|
+
end
|
111
|
+
Cnvrg::CLI.log_message("Cant run flow #{@slug}")
|
112
|
+
end
|
113
|
+
|
114
|
+
### in use for yaml file
|
115
|
+
# def run
|
116
|
+
# resp = Cnvrg::API.request(@base_resource, 'POST', {data: to_api})
|
117
|
+
# Cnvrg::CLI.is_response_success(resp, true)
|
118
|
+
# flow_slug = resp['result']['flow']
|
119
|
+
# self.set_flow_slug(flow_slug)
|
120
|
+
# url = Cnvrg::Helpers.remote_url + resp['result']['url']
|
121
|
+
# return url
|
122
|
+
# end
|
123
|
+
|
124
|
+
|
125
|
+
private
|
126
|
+
def to_api
|
127
|
+
{
|
128
|
+
relations: @relations,
|
129
|
+
tasks: @tasks.keys.map{|task| [task, @tasks[task].to_api]}.to_h,
|
130
|
+
title: @title,
|
131
|
+
slug: @slug
|
132
|
+
}
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,422 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
extend self
|
5
|
+
def parallel_threads()
|
6
|
+
threads = ENV["CNVRG_PARALLEL_THREADS"].to_i
|
7
|
+
|
8
|
+
|
9
|
+
### if empty, default will be 15 threads
|
10
|
+
threads = threads > 0 ? threads : 15
|
11
|
+
|
12
|
+
### set max threads to be 100k
|
13
|
+
[threads, 100000].min
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parallel_options
|
17
|
+
{
|
18
|
+
in_processes: Cnvrg::CLI::ParallelProcesses,
|
19
|
+
in_thread: Cnvrg::CLI::ParallelThreads,
|
20
|
+
isolation: true
|
21
|
+
}
|
22
|
+
end
|
23
|
+
def checkmark
|
24
|
+
checkmark = "\u2713"
|
25
|
+
return checkmark.encode('utf-8')
|
26
|
+
end
|
27
|
+
|
28
|
+
def internet_connection?
|
29
|
+
begin
|
30
|
+
true if open("http://www.google.com/")
|
31
|
+
rescue
|
32
|
+
false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def try_until_success(tries: 3)
|
37
|
+
exception = nil
|
38
|
+
tries.times do |i|
|
39
|
+
begin
|
40
|
+
yield
|
41
|
+
return true
|
42
|
+
rescue => e
|
43
|
+
Cnvrg::Logger.log_info("Error while trying for the #{i} time")
|
44
|
+
Cnvrg::Logger.log_error(e)
|
45
|
+
sleep(1)
|
46
|
+
exception = e
|
47
|
+
end
|
48
|
+
end
|
49
|
+
raise exception
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_config
|
53
|
+
home_dir = File.expand_path('~')
|
54
|
+
config = {}
|
55
|
+
begin
|
56
|
+
if File.exist? home_dir+"/.cnvrg/config.yml"
|
57
|
+
config = YAML.load_file(home_dir+"/.cnvrg/config.yml")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
return config
|
61
|
+
end
|
62
|
+
|
63
|
+
def set_config(config)
|
64
|
+
home_dir = File.expand_path('~')
|
65
|
+
File.open("#{home_dir}/.cnvrg/config.yml", "w"){|f| f.write config.to_yaml }
|
66
|
+
return config
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def remote_url
|
71
|
+
home_dir = File.expand_path('~')
|
72
|
+
config = ""
|
73
|
+
begin
|
74
|
+
if File.exist? home_dir+"/.cnvrg/config.yml"
|
75
|
+
config = YAML.load_file(home_dir+"/.cnvrg/config.yml")
|
76
|
+
else
|
77
|
+
return "https://app.cnvrg.io"
|
78
|
+
end
|
79
|
+
|
80
|
+
rescue
|
81
|
+
return "https://app.cnvrg.io"
|
82
|
+
end
|
83
|
+
if !config or config.empty? or config.to_h[:api].nil?
|
84
|
+
return "https://app.cnvrg.io"
|
85
|
+
else
|
86
|
+
return config.to_h[:api].gsub("/api", "")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def server_version
|
91
|
+
config = self.get_config
|
92
|
+
config[:version].try(:to_i) || 0
|
93
|
+
end
|
94
|
+
|
95
|
+
def update_version(version)
|
96
|
+
config = self.get_config
|
97
|
+
if config[:version].to_s.eql? version
|
98
|
+
return
|
99
|
+
end
|
100
|
+
config[:version] = version
|
101
|
+
self.set_config(config)
|
102
|
+
end
|
103
|
+
|
104
|
+
def is_verify_ssl
|
105
|
+
home_dir = File.expand_path('~')
|
106
|
+
config = ""
|
107
|
+
begin
|
108
|
+
if File.exist? home_dir+"/.cnvrg/config.yml"
|
109
|
+
config = YAML.load_file(home_dir+"/.cnvrg/config.yml")
|
110
|
+
else
|
111
|
+
return true
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
rescue
|
116
|
+
return true
|
117
|
+
end
|
118
|
+
if !config or config.empty? or config.to_h[:verify_ssl].nil?
|
119
|
+
return true
|
120
|
+
else
|
121
|
+
return config.to_h[:verify_ssl]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def os
|
126
|
+
|
127
|
+
if windows?
|
128
|
+
return "windows"
|
129
|
+
elsif mac?
|
130
|
+
return "mac"
|
131
|
+
elsif ubuntu?
|
132
|
+
return "ubuntu"
|
133
|
+
elsif linux?
|
134
|
+
|
135
|
+
return "linux"
|
136
|
+
else
|
137
|
+
|
138
|
+
return "N/A"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def windows?
|
143
|
+
!!(RUBY_PLATFORM =~ /mswin32|mingw32/)
|
144
|
+
end
|
145
|
+
|
146
|
+
def mac?
|
147
|
+
!!(RUBY_PLATFORM =~ /-darwin\d/)
|
148
|
+
end
|
149
|
+
|
150
|
+
def linux?
|
151
|
+
not mac? and not windows?
|
152
|
+
end
|
153
|
+
|
154
|
+
def ubuntu?
|
155
|
+
unix = `if [ -f /etc/lsb-release ]; then echo "ubuntu"; fi`
|
156
|
+
return unix.include? "ubuntu"
|
157
|
+
end
|
158
|
+
|
159
|
+
def cnvrgignore_content
|
160
|
+
#TODO: cnvrg ignore add .conflict
|
161
|
+
%{
|
162
|
+
# cnvrg ignore: Ignore the following directories and files
|
163
|
+
# for example:
|
164
|
+
# some_dir/
|
165
|
+
# some_file.txt
|
166
|
+
.git*
|
167
|
+
.gitignore
|
168
|
+
*.conflict
|
169
|
+
*.deleted
|
170
|
+
}.strip
|
171
|
+
end
|
172
|
+
|
173
|
+
def hyper_content
|
174
|
+
%{# Hyperparameter Optimization is the process of choosing a set of parameters for a learning algorithm, usually with the goal of optimizing a measure of the algorithm's performance on an independent data set.
|
175
|
+
|
176
|
+
# Below is the list of parameters that will be used in the optimization process. Each parameter has a param_name that should match the argument that is feeded to the experiment s.t kernel => --kernel='rbf'
|
177
|
+
|
178
|
+
parameters:
|
179
|
+
# Integer parameter is a range of possible values between a minimum (inclusive)
|
180
|
+
# and maximum (not inclusive) values. Values are floored (0.7 => 0)
|
181
|
+
- param_name: "learning_rate"
|
182
|
+
type: "integer"
|
183
|
+
min: 0 # inclusive
|
184
|
+
max: 10 # not inclusive
|
185
|
+
scale: "linear"
|
186
|
+
steps: 4 # The number of linear steps to produce.
|
187
|
+
|
188
|
+
|
189
|
+
# Float parameter is a range of possible values between a minimum (inclusive)
|
190
|
+
# and maximum (not inclusive) values.
|
191
|
+
#
|
192
|
+
- param_name: "learning_rate"
|
193
|
+
type: "float" # precision is 9 after period
|
194
|
+
min: 0.00001
|
195
|
+
max: 0.1
|
196
|
+
scale: "log2" # Could be log10 as well
|
197
|
+
steps: 2
|
198
|
+
|
199
|
+
# Discrete parameter is an array of numerical values.
|
200
|
+
#
|
201
|
+
- param_name: "c"
|
202
|
+
type: "discrete"
|
203
|
+
values: [0, 0.1 ,0.001]
|
204
|
+
|
205
|
+
# Categorical parameter is an array of string values
|
206
|
+
#
|
207
|
+
- param_name: "kernel"
|
208
|
+
type: "categorical"
|
209
|
+
values: ["linear", "poly", "rbf"]
|
210
|
+
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
def readme_content
|
215
|
+
%{
|
216
|
+
# README
|
217
|
+
|
218
|
+
This README would normally contain some context and description about the project.
|
219
|
+
|
220
|
+
Things you may want to cover:
|
221
|
+
|
222
|
+
* Data description
|
223
|
+
|
224
|
+
* Benchmark and measurement guidelines
|
225
|
+
|
226
|
+
* Used algorithms
|
227
|
+
|
228
|
+
* Scores
|
229
|
+
|
230
|
+
* Configurations
|
231
|
+
|
232
|
+
* Requirements
|
233
|
+
|
234
|
+
* How to run the experiments
|
235
|
+
|
236
|
+
* ...}.strip
|
237
|
+
end
|
238
|
+
|
239
|
+
def netrc_domain
|
240
|
+
"cnvrg.io"
|
241
|
+
end
|
242
|
+
|
243
|
+
def look_for_in_path(path, name)
|
244
|
+
url_split = path.split("/")
|
245
|
+
url_split.each_with_index do |u, i|
|
246
|
+
if u == name
|
247
|
+
return i
|
248
|
+
end
|
249
|
+
end
|
250
|
+
return -1
|
251
|
+
end
|
252
|
+
|
253
|
+
def extract_owner_slug_from_url(url, breaker)
|
254
|
+
url_parts = url.split("/")
|
255
|
+
project_index = Cnvrg::Helpers.look_for_in_path(url, breaker)
|
256
|
+
slug = url_parts[project_index + 1]
|
257
|
+
owner = url_parts[project_index - 1]
|
258
|
+
return owner, slug
|
259
|
+
end
|
260
|
+
|
261
|
+
# cpu
|
262
|
+
|
263
|
+
def cpu_time
|
264
|
+
Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :microsecond)
|
265
|
+
end
|
266
|
+
|
267
|
+
def wall_time
|
268
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
|
269
|
+
end
|
270
|
+
|
271
|
+
def decrypt(key,iv,str)
|
272
|
+
begin
|
273
|
+
|
274
|
+
cipher = OpenSSL::Cipher.new("aes-256-cbc").decrypt
|
275
|
+
cipher.key = key
|
276
|
+
cipher.iv = Base64.decode64 iv.encode('utf-8')
|
277
|
+
|
278
|
+
result = Base64.decode64 (str.encode('utf-8'))
|
279
|
+
result = cipher.update(result)
|
280
|
+
result << cipher.final
|
281
|
+
return result.force_encoding('utf-8')
|
282
|
+
|
283
|
+
# return result
|
284
|
+
rescue => e
|
285
|
+
puts e
|
286
|
+
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
|
291
|
+
end
|
292
|
+
|
293
|
+
# memory
|
294
|
+
#
|
295
|
+
def get_mem(pid)
|
296
|
+
end
|
297
|
+
|
298
|
+
def get_s3_props(files) #will return client and decryptor
|
299
|
+
sts_path = files["path_sts"]
|
300
|
+
retries = 0
|
301
|
+
success= false
|
302
|
+
while !success and retries < 20
|
303
|
+
begin
|
304
|
+
if !Helpers.is_verify_ssl
|
305
|
+
body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
|
306
|
+
else
|
307
|
+
body = open(sts_path).read
|
308
|
+
end
|
309
|
+
success = true
|
310
|
+
rescue => e
|
311
|
+
retries +=1
|
312
|
+
sleep(5)
|
313
|
+
|
314
|
+
end
|
315
|
+
end
|
316
|
+
if !success
|
317
|
+
return Cnvrg::Result.new(false,"couldn't download some files", "error in sts", "" )
|
318
|
+
end
|
319
|
+
split = body.split("\n")
|
320
|
+
key = split[0]
|
321
|
+
iv = split[1]
|
322
|
+
|
323
|
+
access = Cnvrg::Helpers.decrypt(key, iv, files["sts_a"])
|
324
|
+
|
325
|
+
secret = Cnvrg::Helpers.decrypt(key,iv, files["sts_s"])
|
326
|
+
|
327
|
+
session = Cnvrg::Helpers.decrypt(key,iv, files["sts_st"])
|
328
|
+
region = Cnvrg::Helpers.decrypt(key,iv, files["region"])
|
329
|
+
|
330
|
+
bucket = Cnvrg::Helpers.decrypt(key,iv, files["bucket"])
|
331
|
+
is_s3 = files["is_s3"]
|
332
|
+
server_side_encryption =files["server_side_encryption"]
|
333
|
+
|
334
|
+
if is_s3 or is_s3.nil?
|
335
|
+
client = Aws::S3::Client.new(
|
336
|
+
:access_key_id =>access,
|
337
|
+
:secret_access_key => secret,
|
338
|
+
:session_token => session,
|
339
|
+
:region => region,
|
340
|
+
:http_open_timeout => 60, :retry_limit => 20)
|
341
|
+
use_accelerate_endpoint = true
|
342
|
+
else
|
343
|
+
|
344
|
+
endpoint = Cnvrg::Helpers.decrypt(key,iv, files["endpoint"])
|
345
|
+
client = Aws::S3::Client.new(
|
346
|
+
:access_key_id =>access,
|
347
|
+
:secret_access_key => secret,
|
348
|
+
:region => region,
|
349
|
+
:endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
|
350
|
+
:http_open_timeout => 60, :retry_limit => 20)
|
351
|
+
use_accelerate_endpoint = false
|
352
|
+
end
|
353
|
+
|
354
|
+
if !server_side_encryption
|
355
|
+
upload_options = {:use_accelerate_endpoint => use_accelerate_endpoint}
|
356
|
+
else
|
357
|
+
upload_options = {:use_accelerate_endpoint => use_accelerate_endpoint, :server_side_encryption => server_side_encryption}
|
358
|
+
end
|
359
|
+
return {client: client, key: key, iv: iv, bucket: bucket, upload_options: upload_options}
|
360
|
+
end
|
361
|
+
|
362
|
+
def get_experiment_events_log_from_server(exp, project, commit: nil)
|
363
|
+
dest_dir = exp["slug"]
|
364
|
+
commit = commit || exp["end_commit"]
|
365
|
+
response = project.clone(0, commit)
|
366
|
+
Cnvrg::CLI.is_response_success(response, should_exit=false)
|
367
|
+
commit_sha1 = response["result"]["commit"]
|
368
|
+
files = response["result"]["tree"].keys
|
369
|
+
files = files.select do |f| f.include?("tfevents") end
|
370
|
+
@files = Cnvrg::Files.new(project.owner, project.slug, project_home: "", project: project)
|
371
|
+
@files.download_files(files, commit_sha1, progress: nil)
|
372
|
+
FileUtils.rm_rf("#{dest_dir}")
|
373
|
+
FileUtils.mkdir_p(dest_dir)
|
374
|
+
files.each do |f|
|
375
|
+
file_dir = "#{dest_dir}/#{File.dirname(f)}"
|
376
|
+
FileUtils.mkdir_p(file_dir)
|
377
|
+
FileUtils.mv(f, "#{dest_dir}/#{f}")
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
def get_experiment_events_log_via_kubectl(exp, namespace)
|
382
|
+
dest_dir = exp["slug"]
|
383
|
+
result = `kubectl -n #{namespace} get pods | grep #{exp["slug"]}`
|
384
|
+
|
385
|
+
pod_name = result.split(" ")[0]
|
386
|
+
return false if pod_name.blank?
|
387
|
+
FileUtils.mkdir_p(dest_dir)
|
388
|
+
working_dir = `kubectl -n #{namespace} exec #{pod_name} -c agent -- pwd`
|
389
|
+
working_dir.strip!
|
390
|
+
res = `kubectl -n #{namespace} exec #{pod_name} -c agent -- /bin/bash -c "ls -R #{working_dir}"`
|
391
|
+
files_and_folders = res.split("\n\n")
|
392
|
+
all_files = []
|
393
|
+
|
394
|
+
files_and_folders.each do |file_and_folder|
|
395
|
+
files = file_and_folder.split("\n")
|
396
|
+
if files.first.include?(":")
|
397
|
+
folder = files.first.gsub(":", "")
|
398
|
+
|
399
|
+
folder = folder.sub(working_dir + "/", "")
|
400
|
+
files = files.drop(1)
|
401
|
+
end
|
402
|
+
files.each do |file|
|
403
|
+
if file.include?("tfevents")
|
404
|
+
all_files << "#{folder}/#{file}"
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
all_files.each do |file|
|
410
|
+
file_dir = "#{dest_dir}/#{File.dirname(file)}"
|
411
|
+
FileUtils.mkdir_p(file_dir)
|
412
|
+
res = `kubectl -n #{namespace} cp #{pod_name}:#{file} -c agent #{dest_dir}/#{file}`
|
413
|
+
end
|
414
|
+
|
415
|
+
return true
|
416
|
+
rescue => e
|
417
|
+
Cnvrg::Logger.log_error(e)
|
418
|
+
return false
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
end
|