cnvrg 1.9.9.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/cnvrg +9 -0
- data/cnvrg.gemspec +47 -0
- data/lib/cnvrg.rb +7 -0
- data/lib/cnvrg/Images.rb +351 -0
- data/lib/cnvrg/api.rb +247 -0
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/auth.rb +79 -0
- data/lib/cnvrg/cli.rb +5715 -0
- data/lib/cnvrg/cli/flow.rb +166 -0
- data/lib/cnvrg/cli/library_cli.rb +33 -0
- data/lib/cnvrg/cli/subcommand.rb +28 -0
- data/lib/cnvrg/cli/task.rb +116 -0
- data/lib/cnvrg/colors.rb +8 -0
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +335 -0
- data/lib/cnvrg/datafiles.rb +1325 -0
- data/lib/cnvrg/dataset.rb +892 -0
- data/lib/cnvrg/downloader/client.rb +101 -0
- data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
- data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
- data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
- data/lib/cnvrg/experiment.rb +209 -0
- data/lib/cnvrg/files.rb +1047 -0
- data/lib/cnvrg/flow.rb +137 -0
- data/lib/cnvrg/helpers.rb +422 -0
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +213 -0
- data/lib/cnvrg/hyper.rb +21 -0
- data/lib/cnvrg/image.rb +113 -0
- data/lib/cnvrg/image_cli.rb +25 -0
- data/lib/cnvrg/job_cli.rb +73 -0
- data/lib/cnvrg/job_ssh.rb +48 -0
- data/lib/cnvrg/logger.rb +111 -0
- data/lib/cnvrg/org_helpers.rb +5 -0
- data/lib/cnvrg/project.rb +822 -0
- data/lib/cnvrg/result.rb +29 -0
- data/lib/cnvrg/runner.rb +49 -0
- data/lib/cnvrg/ssh.rb +94 -0
- data/lib/cnvrg/storage.rb +128 -0
- data/lib/cnvrg/task.rb +165 -0
- data/lib/cnvrg/version.rb +3 -0
- metadata +460 -0
data/lib/cnvrg/flow.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
class Flows
|
3
|
+
def initialize(flow_slug, project: nil)
|
4
|
+
@project = project || Cnvrg::Project.new(Cnvrg::CLI.get_project_home)
|
5
|
+
@flow_info= Flows.resolve_flow_title(flow_slug, project)
|
6
|
+
@slug = @flow_info["slug"]
|
7
|
+
@tasks = {}
|
8
|
+
@relations = {}
|
9
|
+
@title = nil
|
10
|
+
@base_resource = @project.base_resource + "flows/#{@slug}"
|
11
|
+
@public_url = "#{@project.url}/flows/#{@slug}"
|
12
|
+
# self.reload_flow
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.resolve_flow_title(title, project)
|
16
|
+
resp = Cnvrg::API.request("#{project.base_resource}/flows", 'GET')
|
17
|
+
if resp.blank?
|
18
|
+
raise StandardError.new("Can't resolve flow")
|
19
|
+
end
|
20
|
+
res = resp["result"].find{|flow| flow["slug"].downcase == title.downcase}
|
21
|
+
res ||= resp["result"].find{|flow| flow["title"].downcase == title.downcase}
|
22
|
+
if res.blank?
|
23
|
+
raise StandardError.new("Can't find flow with title #{title}")
|
24
|
+
end
|
25
|
+
res
|
26
|
+
end
|
27
|
+
|
28
|
+
def edit_href
|
29
|
+
"#{@public_url}/flow_versions/new"
|
30
|
+
end
|
31
|
+
|
32
|
+
def edit_version_href(version)
|
33
|
+
return "#{edit_href}?flow_version_slug=#{version}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def version_href(version=nil)
|
37
|
+
"#{@base_resource}/flow_versions/#{version || 'latest'}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def export(version, file: nil)
|
41
|
+
resp = Cnvrg::API.request(version_href(version), 'GET')
|
42
|
+
if resp["status"] != 200
|
43
|
+
raise StandardError.new("Cant find flow version: #{version} for flow: #{@slug}")
|
44
|
+
end
|
45
|
+
flow_version = resp["flow_version"]
|
46
|
+
api_recipe = flow_version["api_recipe"]
|
47
|
+
file = file.presence || "flow-#{@slug.downcase.gsub("\s", "_")}.yml"
|
48
|
+
File.open(file, "w"){|f| f.write api_recipe.to_yaml}
|
49
|
+
file
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_version(version)
|
53
|
+
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.create_flow(project, recipe, run: false)
|
58
|
+
url = "#{project.base_resource}flows"
|
59
|
+
if run
|
60
|
+
url += "/run"
|
61
|
+
end
|
62
|
+
resp = Cnvrg::API.request(url, 'POST', {flow_version: recipe.to_json}) || {}
|
63
|
+
if resp["status"] == 200
|
64
|
+
return [Flows.new(resp["flow_version"]["flow_id"], project: project), resp["flow_version"]["id"]]
|
65
|
+
elsif resp["status"] == 400
|
66
|
+
raise StandardError.new(resp["message"])
|
67
|
+
end
|
68
|
+
raise StandardError.new("Can't create new flow")
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_flow
|
72
|
+
unless File.exists? @fullpath
|
73
|
+
raise StandardError.new("Cant find flow in #{@fullpath}")
|
74
|
+
end
|
75
|
+
YAML.load_file(@fullpath)
|
76
|
+
end
|
77
|
+
|
78
|
+
def set_flow(new_flow)
|
79
|
+
File.open(@fullpath, "w"){|file| file.write new_flow.to_yaml}
|
80
|
+
end
|
81
|
+
|
82
|
+
def set_flow_slug(slug)
|
83
|
+
flow = self.get_flow
|
84
|
+
flow[:slug] = slug
|
85
|
+
self.set_flow(flow)
|
86
|
+
end
|
87
|
+
|
88
|
+
def reload_flow
|
89
|
+
flow = self.get_flow
|
90
|
+
@title = flow[:title]
|
91
|
+
@slug = flow[:slug]
|
92
|
+
@relations = flow[:relations]
|
93
|
+
local_tasks = flow[:tasks] || {}
|
94
|
+
@relations.each do |relation|
|
95
|
+
relation.values.each do |task|
|
96
|
+
if local_tasks[task].present?
|
97
|
+
@tasks[task] = Cnvrg::Task.new(@project.local_path, content: local_tasks[task])
|
98
|
+
else
|
99
|
+
@tasks[task] = Cnvrg::Task.new(@project.local_path, path: task)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def run
|
107
|
+
resp = Cnvrg::API.request("#{@base_resource}/#{@slug}/run", 'POST')
|
108
|
+
if Cnvrg::CLI.is_response_success(resp)
|
109
|
+
return resp
|
110
|
+
end
|
111
|
+
Cnvrg::CLI.log_message("Cant run flow #{@slug}")
|
112
|
+
end
|
113
|
+
|
114
|
+
### in use for yaml file
|
115
|
+
# def run
|
116
|
+
# resp = Cnvrg::API.request(@base_resource, 'POST', {data: to_api})
|
117
|
+
# Cnvrg::CLI.is_response_success(resp, true)
|
118
|
+
# flow_slug = resp['result']['flow']
|
119
|
+
# self.set_flow_slug(flow_slug)
|
120
|
+
# url = Cnvrg::Helpers.remote_url + resp['result']['url']
|
121
|
+
# return url
|
122
|
+
# end
|
123
|
+
|
124
|
+
|
125
|
+
private
|
126
|
+
def to_api
|
127
|
+
{
|
128
|
+
relations: @relations,
|
129
|
+
tasks: @tasks.keys.map{|task| [task, @tasks[task].to_api]}.to_h,
|
130
|
+
title: @title,
|
131
|
+
slug: @slug
|
132
|
+
}
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,422 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
extend self
|
5
|
+
def parallel_threads()
|
6
|
+
threads = ENV["CNVRG_PARALLEL_THREADS"].to_i
|
7
|
+
|
8
|
+
|
9
|
+
### if empty, default will be 15 threads
|
10
|
+
threads = threads > 0 ? threads : 15
|
11
|
+
|
12
|
+
### set max threads to be 100k
|
13
|
+
[threads, 100000].min
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parallel_options
|
17
|
+
{
|
18
|
+
in_processes: Cnvrg::CLI::ParallelProcesses,
|
19
|
+
in_thread: Cnvrg::CLI::ParallelThreads,
|
20
|
+
isolation: true
|
21
|
+
}
|
22
|
+
end
|
23
|
+
def checkmark
|
24
|
+
checkmark = "\u2713"
|
25
|
+
return checkmark.encode('utf-8')
|
26
|
+
end
|
27
|
+
|
28
|
+
def internet_connection?
|
29
|
+
begin
|
30
|
+
true if open("http://www.google.com/")
|
31
|
+
rescue
|
32
|
+
false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def try_until_success(tries: 3)
|
37
|
+
exception = nil
|
38
|
+
tries.times do |i|
|
39
|
+
begin
|
40
|
+
yield
|
41
|
+
return true
|
42
|
+
rescue => e
|
43
|
+
Cnvrg::Logger.log_info("Error while trying for the #{i} time")
|
44
|
+
Cnvrg::Logger.log_error(e)
|
45
|
+
sleep(1)
|
46
|
+
exception = e
|
47
|
+
end
|
48
|
+
end
|
49
|
+
raise exception
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_config
|
53
|
+
home_dir = File.expand_path('~')
|
54
|
+
config = {}
|
55
|
+
begin
|
56
|
+
if File.exist? home_dir+"/.cnvrg/config.yml"
|
57
|
+
config = YAML.load_file(home_dir+"/.cnvrg/config.yml")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
return config
|
61
|
+
end
|
62
|
+
|
63
|
+
def set_config(config)
|
64
|
+
home_dir = File.expand_path('~')
|
65
|
+
File.open("#{home_dir}/.cnvrg/config.yml", "w"){|f| f.write config.to_yaml }
|
66
|
+
return config
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def remote_url
|
71
|
+
home_dir = File.expand_path('~')
|
72
|
+
config = ""
|
73
|
+
begin
|
74
|
+
if File.exist? home_dir+"/.cnvrg/config.yml"
|
75
|
+
config = YAML.load_file(home_dir+"/.cnvrg/config.yml")
|
76
|
+
else
|
77
|
+
return "https://app.cnvrg.io"
|
78
|
+
end
|
79
|
+
|
80
|
+
rescue
|
81
|
+
return "https://app.cnvrg.io"
|
82
|
+
end
|
83
|
+
if !config or config.empty? or config.to_h[:api].nil?
|
84
|
+
return "https://app.cnvrg.io"
|
85
|
+
else
|
86
|
+
return config.to_h[:api].gsub("/api", "")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def server_version
|
91
|
+
config = self.get_config
|
92
|
+
config[:version].try(:to_i) || 0
|
93
|
+
end
|
94
|
+
|
95
|
+
def update_version(version)
|
96
|
+
config = self.get_config
|
97
|
+
if config[:version].to_s.eql? version
|
98
|
+
return
|
99
|
+
end
|
100
|
+
config[:version] = version
|
101
|
+
self.set_config(config)
|
102
|
+
end
|
103
|
+
|
104
|
+
def is_verify_ssl
|
105
|
+
home_dir = File.expand_path('~')
|
106
|
+
config = ""
|
107
|
+
begin
|
108
|
+
if File.exist? home_dir+"/.cnvrg/config.yml"
|
109
|
+
config = YAML.load_file(home_dir+"/.cnvrg/config.yml")
|
110
|
+
else
|
111
|
+
return true
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
rescue
|
116
|
+
return true
|
117
|
+
end
|
118
|
+
if !config or config.empty? or config.to_h[:verify_ssl].nil?
|
119
|
+
return true
|
120
|
+
else
|
121
|
+
return config.to_h[:verify_ssl]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def os
|
126
|
+
|
127
|
+
if windows?
|
128
|
+
return "windows"
|
129
|
+
elsif mac?
|
130
|
+
return "mac"
|
131
|
+
elsif ubuntu?
|
132
|
+
return "ubuntu"
|
133
|
+
elsif linux?
|
134
|
+
|
135
|
+
return "linux"
|
136
|
+
else
|
137
|
+
|
138
|
+
return "N/A"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def windows?
|
143
|
+
!!(RUBY_PLATFORM =~ /mswin32|mingw32/)
|
144
|
+
end
|
145
|
+
|
146
|
+
def mac?
|
147
|
+
!!(RUBY_PLATFORM =~ /-darwin\d/)
|
148
|
+
end
|
149
|
+
|
150
|
+
def linux?
|
151
|
+
not mac? and not windows?
|
152
|
+
end
|
153
|
+
|
154
|
+
def ubuntu?
|
155
|
+
unix = `if [ -f /etc/lsb-release ]; then echo "ubuntu"; fi`
|
156
|
+
return unix.include? "ubuntu"
|
157
|
+
end
|
158
|
+
|
159
|
+
def cnvrgignore_content
|
160
|
+
#TODO: cnvrg ignore add .conflict
|
161
|
+
%{
|
162
|
+
# cnvrg ignore: Ignore the following directories and files
|
163
|
+
# for example:
|
164
|
+
# some_dir/
|
165
|
+
# some_file.txt
|
166
|
+
.git*
|
167
|
+
.gitignore
|
168
|
+
*.conflict
|
169
|
+
*.deleted
|
170
|
+
}.strip
|
171
|
+
end
|
172
|
+
|
173
|
+
def hyper_content
|
174
|
+
%{# Hyperparameter Optimization is the process of choosing a set of parameters for a learning algorithm, usually with the goal of optimizing a measure of the algorithm's performance on an independent data set.
|
175
|
+
|
176
|
+
# Below is the list of parameters that will be used in the optimization process. Each parameter has a param_name that should match the argument that is feeded to the experiment s.t kernel => --kernel='rbf'
|
177
|
+
|
178
|
+
parameters:
|
179
|
+
# Integer parameter is a range of possible values between a minimum (inclusive)
|
180
|
+
# and maximum (not inclusive) values. Values are floored (0.7 => 0)
|
181
|
+
- param_name: "learning_rate"
|
182
|
+
type: "integer"
|
183
|
+
min: 0 # inclusive
|
184
|
+
max: 10 # not inclusive
|
185
|
+
scale: "linear"
|
186
|
+
steps: 4 # The number of linear steps to produce.
|
187
|
+
|
188
|
+
|
189
|
+
# Float parameter is a range of possible values between a minimum (inclusive)
|
190
|
+
# and maximum (not inclusive) values.
|
191
|
+
#
|
192
|
+
- param_name: "learning_rate"
|
193
|
+
type: "float" # precision is 9 after period
|
194
|
+
min: 0.00001
|
195
|
+
max: 0.1
|
196
|
+
scale: "log2" # Could be log10 as well
|
197
|
+
steps: 2
|
198
|
+
|
199
|
+
# Discrete parameter is an array of numerical values.
|
200
|
+
#
|
201
|
+
- param_name: "c"
|
202
|
+
type: "discrete"
|
203
|
+
values: [0, 0.1 ,0.001]
|
204
|
+
|
205
|
+
# Categorical parameter is an array of string values
|
206
|
+
#
|
207
|
+
- param_name: "kernel"
|
208
|
+
type: "categorical"
|
209
|
+
values: ["linear", "poly", "rbf"]
|
210
|
+
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
def readme_content
|
215
|
+
%{
|
216
|
+
# README
|
217
|
+
|
218
|
+
This README would normally contain some context and description about the project.
|
219
|
+
|
220
|
+
Things you may want to cover:
|
221
|
+
|
222
|
+
* Data description
|
223
|
+
|
224
|
+
* Benchmark and measurement guidelines
|
225
|
+
|
226
|
+
* Used algorithms
|
227
|
+
|
228
|
+
* Scores
|
229
|
+
|
230
|
+
* Configurations
|
231
|
+
|
232
|
+
* Requirements
|
233
|
+
|
234
|
+
* How to run the experiments
|
235
|
+
|
236
|
+
* ...}.strip
|
237
|
+
end
|
238
|
+
|
239
|
+
def netrc_domain
|
240
|
+
"cnvrg.io"
|
241
|
+
end
|
242
|
+
|
243
|
+
def look_for_in_path(path, name)
|
244
|
+
url_split = path.split("/")
|
245
|
+
url_split.each_with_index do |u, i|
|
246
|
+
if u == name
|
247
|
+
return i
|
248
|
+
end
|
249
|
+
end
|
250
|
+
return -1
|
251
|
+
end
|
252
|
+
|
253
|
+
def extract_owner_slug_from_url(url, breaker)
|
254
|
+
url_parts = url.split("/")
|
255
|
+
project_index = Cnvrg::Helpers.look_for_in_path(url, breaker)
|
256
|
+
slug = url_parts[project_index + 1]
|
257
|
+
owner = url_parts[project_index - 1]
|
258
|
+
return owner, slug
|
259
|
+
end
|
260
|
+
|
261
|
+
# cpu
|
262
|
+
|
263
|
+
def cpu_time
|
264
|
+
Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :microsecond)
|
265
|
+
end
|
266
|
+
|
267
|
+
def wall_time
|
268
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
|
269
|
+
end
|
270
|
+
|
271
|
+
def decrypt(key,iv,str)
|
272
|
+
begin
|
273
|
+
|
274
|
+
cipher = OpenSSL::Cipher.new("aes-256-cbc").decrypt
|
275
|
+
cipher.key = key
|
276
|
+
cipher.iv = Base64.decode64 iv.encode('utf-8')
|
277
|
+
|
278
|
+
result = Base64.decode64 (str.encode('utf-8'))
|
279
|
+
result = cipher.update(result)
|
280
|
+
result << cipher.final
|
281
|
+
return result.force_encoding('utf-8')
|
282
|
+
|
283
|
+
# return result
|
284
|
+
rescue => e
|
285
|
+
puts e
|
286
|
+
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
|
291
|
+
end
|
292
|
+
|
293
|
+
# memory
|
294
|
+
#
|
295
|
+
def get_mem(pid)
|
296
|
+
end
|
297
|
+
|
298
|
+
def get_s3_props(files) #will return client and decryptor
|
299
|
+
sts_path = files["path_sts"]
|
300
|
+
retries = 0
|
301
|
+
success= false
|
302
|
+
while !success and retries < 20
|
303
|
+
begin
|
304
|
+
if !Helpers.is_verify_ssl
|
305
|
+
body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
|
306
|
+
else
|
307
|
+
body = open(sts_path).read
|
308
|
+
end
|
309
|
+
success = true
|
310
|
+
rescue => e
|
311
|
+
retries +=1
|
312
|
+
sleep(5)
|
313
|
+
|
314
|
+
end
|
315
|
+
end
|
316
|
+
if !success
|
317
|
+
return Cnvrg::Result.new(false,"couldn't download some files", "error in sts", "" )
|
318
|
+
end
|
319
|
+
split = body.split("\n")
|
320
|
+
key = split[0]
|
321
|
+
iv = split[1]
|
322
|
+
|
323
|
+
access = Cnvrg::Helpers.decrypt(key, iv, files["sts_a"])
|
324
|
+
|
325
|
+
secret = Cnvrg::Helpers.decrypt(key,iv, files["sts_s"])
|
326
|
+
|
327
|
+
session = Cnvrg::Helpers.decrypt(key,iv, files["sts_st"])
|
328
|
+
region = Cnvrg::Helpers.decrypt(key,iv, files["region"])
|
329
|
+
|
330
|
+
bucket = Cnvrg::Helpers.decrypt(key,iv, files["bucket"])
|
331
|
+
is_s3 = files["is_s3"]
|
332
|
+
server_side_encryption =files["server_side_encryption"]
|
333
|
+
|
334
|
+
if is_s3 or is_s3.nil?
|
335
|
+
client = Aws::S3::Client.new(
|
336
|
+
:access_key_id =>access,
|
337
|
+
:secret_access_key => secret,
|
338
|
+
:session_token => session,
|
339
|
+
:region => region,
|
340
|
+
:http_open_timeout => 60, :retry_limit => 20)
|
341
|
+
use_accelerate_endpoint = true
|
342
|
+
else
|
343
|
+
|
344
|
+
endpoint = Cnvrg::Helpers.decrypt(key,iv, files["endpoint"])
|
345
|
+
client = Aws::S3::Client.new(
|
346
|
+
:access_key_id =>access,
|
347
|
+
:secret_access_key => secret,
|
348
|
+
:region => region,
|
349
|
+
:endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
|
350
|
+
:http_open_timeout => 60, :retry_limit => 20)
|
351
|
+
use_accelerate_endpoint = false
|
352
|
+
end
|
353
|
+
|
354
|
+
if !server_side_encryption
|
355
|
+
upload_options = {:use_accelerate_endpoint => use_accelerate_endpoint}
|
356
|
+
else
|
357
|
+
upload_options = {:use_accelerate_endpoint => use_accelerate_endpoint, :server_side_encryption => server_side_encryption}
|
358
|
+
end
|
359
|
+
return {client: client, key: key, iv: iv, bucket: bucket, upload_options: upload_options}
|
360
|
+
end
|
361
|
+
|
362
|
+
def get_experiment_events_log_from_server(exp, project, commit: nil)
|
363
|
+
dest_dir = exp["slug"]
|
364
|
+
commit = commit || exp["end_commit"]
|
365
|
+
response = project.clone(0, commit)
|
366
|
+
Cnvrg::CLI.is_response_success(response, should_exit=false)
|
367
|
+
commit_sha1 = response["result"]["commit"]
|
368
|
+
files = response["result"]["tree"].keys
|
369
|
+
files = files.select do |f| f.include?("tfevents") end
|
370
|
+
@files = Cnvrg::Files.new(project.owner, project.slug, project_home: "", project: project)
|
371
|
+
@files.download_files(files, commit_sha1, progress: nil)
|
372
|
+
FileUtils.rm_rf("#{dest_dir}")
|
373
|
+
FileUtils.mkdir_p(dest_dir)
|
374
|
+
files.each do |f|
|
375
|
+
file_dir = "#{dest_dir}/#{File.dirname(f)}"
|
376
|
+
FileUtils.mkdir_p(file_dir)
|
377
|
+
FileUtils.mv(f, "#{dest_dir}/#{f}")
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
def get_experiment_events_log_via_kubectl(exp, namespace)
|
382
|
+
dest_dir = exp["slug"]
|
383
|
+
result = `kubectl -n #{namespace} get pods | grep #{exp["slug"]}`
|
384
|
+
|
385
|
+
pod_name = result.split(" ")[0]
|
386
|
+
return false if pod_name.blank?
|
387
|
+
FileUtils.mkdir_p(dest_dir)
|
388
|
+
working_dir = `kubectl -n #{namespace} exec #{pod_name} -c agent -- pwd`
|
389
|
+
working_dir.strip!
|
390
|
+
res = `kubectl -n #{namespace} exec #{pod_name} -c agent -- /bin/bash -c "ls -R #{working_dir}"`
|
391
|
+
files_and_folders = res.split("\n\n")
|
392
|
+
all_files = []
|
393
|
+
|
394
|
+
files_and_folders.each do |file_and_folder|
|
395
|
+
files = file_and_folder.split("\n")
|
396
|
+
if files.first.include?(":")
|
397
|
+
folder = files.first.gsub(":", "")
|
398
|
+
|
399
|
+
folder = folder.sub(working_dir + "/", "")
|
400
|
+
files = files.drop(1)
|
401
|
+
end
|
402
|
+
files.each do |file|
|
403
|
+
if file.include?("tfevents")
|
404
|
+
all_files << "#{folder}/#{file}"
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
all_files.each do |file|
|
410
|
+
file_dir = "#{dest_dir}/#{File.dirname(file)}"
|
411
|
+
FileUtils.mkdir_p(file_dir)
|
412
|
+
res = `kubectl -n #{namespace} cp #{pod_name}:#{file} -c agent #{dest_dir}/#{file}`
|
413
|
+
end
|
414
|
+
|
415
|
+
return true
|
416
|
+
rescue => e
|
417
|
+
Cnvrg::Logger.log_error(e)
|
418
|
+
return false
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
end
|