cnvrg 1.6.38 → 1.9.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/cnvrg.gemspec +1 -4
- data/lib/cnvrg/Images.rb +0 -148
- data/lib/cnvrg/api.rb +8 -8
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/cli.rb +288 -781
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +65 -12
- data/lib/cnvrg/datafiles.rb +483 -201
- data/lib/cnvrg/dataset.rb +65 -29
- data/lib/cnvrg/experiment.rb +10 -4
- data/lib/cnvrg/files.rb +46 -14
- data/lib/cnvrg/helpers.rb +34 -26
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +162 -258
- data/lib/cnvrg/job_cli.rb +28 -53
- data/lib/cnvrg/job_ssh.rb +47 -0
- data/lib/cnvrg/logger.rb +4 -0
- data/lib/cnvrg/project.rb +45 -16
- data/lib/cnvrg/ssh.rb +0 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +9 -33
data/lib/cnvrg/dataset.rb
CHANGED
@@ -7,7 +7,7 @@ module Cnvrg
|
|
7
7
|
IDXParallelThreads ||= Cnvrg::Helpers.parallel_threads
|
8
8
|
IDXParallelProcesses ||= Parallel.processor_count
|
9
9
|
|
10
|
-
def initialize(project_home = '', dataset_url: '')
|
10
|
+
def initialize(project_home = '', dataset_url: '', dataset_info: '')
|
11
11
|
begin
|
12
12
|
@info = {}
|
13
13
|
if project_home.present?
|
@@ -17,6 +17,11 @@ module Cnvrg
|
|
17
17
|
@title = config[:dataset_name]
|
18
18
|
@slug = config[:dataset_slug]
|
19
19
|
@owner = config[:owner]
|
20
|
+
elsif dataset_info.present?
|
21
|
+
@title = dataset_info[:slug]
|
22
|
+
@slug = dataset_info[:slug]
|
23
|
+
@owner = dataset_info[:owner]
|
24
|
+
@local_path = Dir.pwd
|
20
25
|
else
|
21
26
|
owner, slug = Cnvrg::Helpers.extract_owner_slug_from_url(dataset_url, 'datasets')
|
22
27
|
@title = slug
|
@@ -87,7 +92,6 @@ module Cnvrg
|
|
87
92
|
files
|
88
93
|
end
|
89
94
|
|
90
|
-
|
91
95
|
def get_stats(commit: nil, query: nil)
|
92
96
|
response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone", 'POST', {commit: commit, query: query})
|
93
97
|
Cnvrg::CLI.is_response_success(response, true)
|
@@ -100,7 +104,6 @@ module Cnvrg
|
|
100
104
|
response['result']['files']['keys']
|
101
105
|
end
|
102
106
|
|
103
|
-
|
104
107
|
def backup_idx
|
105
108
|
Cnvrg::Logger.log_info("Backup idx")
|
106
109
|
if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
|
@@ -123,7 +126,6 @@ module Cnvrg
|
|
123
126
|
File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
124
127
|
end
|
125
128
|
|
126
|
-
|
127
129
|
def self.delete(dataset_slug, owner)
|
128
130
|
response = Cnvrg::API.request("users/#{owner}/datasets/#{dataset_slug}/delete", 'DELETE')
|
129
131
|
return response
|
@@ -142,14 +144,12 @@ module Cnvrg
|
|
142
144
|
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST', {data_commit: commit})
|
143
145
|
CLI.is_response_success(response)
|
144
146
|
return response
|
145
|
-
|
146
147
|
end
|
147
148
|
|
148
149
|
def list(owner)
|
149
150
|
response = Cnvrg::API.request("users/#{owner}/datasets/list", 'GET')
|
150
151
|
CLI.is_response_success(response)
|
151
152
|
return response
|
152
|
-
|
153
153
|
end
|
154
154
|
|
155
155
|
def search_queries
|
@@ -160,7 +160,6 @@ module Cnvrg
|
|
160
160
|
row << [query["name"], query["slug"], query["created_at"].in_time_zone.to_s, query["username"]]
|
161
161
|
end
|
162
162
|
return row
|
163
|
-
|
164
163
|
end
|
165
164
|
|
166
165
|
def get_query_file(query_slug)
|
@@ -171,7 +170,6 @@ module Cnvrg
|
|
171
170
|
row << [file["name"], file["fullpath"], file["s3_url"]]
|
172
171
|
end
|
173
172
|
return row
|
174
|
-
|
175
173
|
end
|
176
174
|
|
177
175
|
def download_tags_yaml
|
@@ -186,11 +184,13 @@ module Cnvrg
|
|
186
184
|
end
|
187
185
|
end
|
188
186
|
|
189
|
-
def list_commits
|
190
|
-
response = Cnvrg::API.request(
|
187
|
+
def list_commits(commit_sha1: nil)
|
188
|
+
response = Cnvrg::API.request(
|
189
|
+
"users/#{self.owner}/datasets/#{self.slug}/list_commits?commit=#{commit_sha1}",
|
190
|
+
'GET'
|
191
|
+
)
|
191
192
|
CLI.is_response_success(response)
|
192
193
|
return response
|
193
|
-
|
194
194
|
end
|
195
195
|
|
196
196
|
def upload_tags_via_yml(tag_file = nil)
|
@@ -277,7 +277,6 @@ module Cnvrg
|
|
277
277
|
end
|
278
278
|
end
|
279
279
|
return ignore_list.flatten
|
280
|
-
|
281
280
|
end
|
282
281
|
|
283
282
|
|
@@ -293,7 +292,6 @@ module Cnvrg
|
|
293
292
|
create_ignore = true
|
294
293
|
end
|
295
294
|
|
296
|
-
|
297
295
|
cnvrgignore = Helpers.cnvrgignore_content
|
298
296
|
begin
|
299
297
|
response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public, bucket: bucket})
|
@@ -351,14 +349,14 @@ module Cnvrg
|
|
351
349
|
end
|
352
350
|
|
353
351
|
def self.blank_clone(owner, dataset_name, dataset_slug)
|
354
|
-
list_dirs = ["
|
352
|
+
list_dirs = ["#{dataset_slug}/.cnvrg"
|
355
353
|
]
|
356
354
|
list_files = [
|
357
|
-
"
|
355
|
+
"#{dataset_slug}/.cnvrg/config.yml"
|
358
356
|
]
|
359
357
|
create_ignore = false
|
360
358
|
if !File.exist? ".cnvrgignore"
|
361
|
-
list_files << "
|
359
|
+
list_files << "#{dataset_slug}/.cnvrgignore"
|
362
360
|
create_ignore = true
|
363
361
|
end
|
364
362
|
|
@@ -372,8 +370,8 @@ module Cnvrg
|
|
372
370
|
|
373
371
|
FileUtils.mkdir_p list_dirs
|
374
372
|
FileUtils.touch list_files
|
375
|
-
File.open("
|
376
|
-
File.open("
|
373
|
+
File.open("#{dataset_slug}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
374
|
+
File.open("#{dataset_slug}/.cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
|
377
375
|
rescue => e
|
378
376
|
return false
|
379
377
|
end
|
@@ -385,16 +383,21 @@ module Cnvrg
|
|
385
383
|
File.open(self.local_path + "/.cnvrgignore", "w+") {|f| f.write cnvrgignore}
|
386
384
|
end
|
387
385
|
|
388
|
-
def self.
|
386
|
+
def self.verify_dataset(dataset_slug)
|
387
|
+
config = YAML.load_file("/data/#{dataset_title}/.cnvrg/config.yml") rescue {}
|
388
|
+
config[:success] == true
|
389
|
+
end
|
390
|
+
|
391
|
+
def self.verify_datasets(dataset_titles, timeout = nil)
|
389
392
|
start_time = Time.now.to_i
|
390
393
|
Cnvrg::Logger.log_info("Verifying datasets #{dataset_titles}")
|
391
394
|
Cnvrg::Logger.log_info("Timeout is #{timeout}")
|
392
395
|
while true
|
393
396
|
begin
|
394
397
|
current_time = Time.now.to_i
|
395
|
-
return false if timeout < current_time - start_time
|
398
|
+
return false if (timeout.present? and timeout < current_time - start_time)
|
396
399
|
all_are_ready = dataset_titles.all? do |dataset_title|
|
397
|
-
config = YAML.load_file("
|
400
|
+
config = YAML.load_file("#{dataset_title}/.cnvrg/config.yml")
|
398
401
|
config[:success] == true
|
399
402
|
end
|
400
403
|
return true if all_are_ready
|
@@ -408,6 +411,30 @@ module Cnvrg
|
|
408
411
|
end
|
409
412
|
end
|
410
413
|
|
414
|
+
def self.scan_datasets()
|
415
|
+
Cnvrg::Logger.log_info("Looking up datasets")
|
416
|
+
datasets = Dir.entries(Dir.pwd).map do |entry|
|
417
|
+
if File.directory? File.join(Dir.pwd,entry) and !(entry =='.' || entry == '..')
|
418
|
+
begin
|
419
|
+
config = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/config.yml") rescue nil
|
420
|
+
local_commit = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/idx.yml")[:commit] rescue nil
|
421
|
+
if config.present? and config[:success] == true and config[:dataset_name].present? and config[:dataset_slug].present? and local_commit.present?
|
422
|
+
{
|
423
|
+
"dataset_slug": config[:dataset_slug],
|
424
|
+
"dataset_name": config[:dataset_name],
|
425
|
+
"local_commit": local_commit,
|
426
|
+
}
|
427
|
+
else
|
428
|
+
nil
|
429
|
+
end
|
430
|
+
rescue
|
431
|
+
nil
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end.compact.uniq
|
435
|
+
datasets
|
436
|
+
end
|
437
|
+
|
411
438
|
def clone(commit)
|
412
439
|
return
|
413
440
|
end
|
@@ -470,7 +497,6 @@ module Cnvrg
|
|
470
497
|
if File.exist?(file_path)
|
471
498
|
File.open(file_path, "a") {|f| f.puts(":success: true")}
|
472
499
|
end
|
473
|
-
rescue
|
474
500
|
end
|
475
501
|
|
476
502
|
def self.init_container(owner, dataset_slug, dataset_name)
|
@@ -504,7 +530,6 @@ module Cnvrg
|
|
504
530
|
return YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
505
531
|
end
|
506
532
|
{commit: nil, tree: {}}
|
507
|
-
|
508
533
|
end
|
509
534
|
|
510
535
|
def set_idx(idx)
|
@@ -572,7 +597,7 @@ module Cnvrg
|
|
572
597
|
end
|
573
598
|
|
574
599
|
def write_idx(tree = nil, commit = nil)
|
575
|
-
if tree.
|
600
|
+
if tree.nil?
|
576
601
|
tree = self.generate_idx[:tree]
|
577
602
|
tree = tree.map {|k, v| (v.present?) ? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k, v]}.to_h
|
578
603
|
end
|
@@ -701,7 +726,6 @@ module Cnvrg
|
|
701
726
|
idx.try(:fetch, :partial_commit)
|
702
727
|
end
|
703
728
|
|
704
|
-
|
705
729
|
def current_status(new_branch)
|
706
730
|
commit = last_local_commit
|
707
731
|
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status_current", 'POST', {current_commit: commit, new_branch: new_branch})
|
@@ -731,7 +755,6 @@ module Cnvrg
|
|
731
755
|
rescue
|
732
756
|
return []
|
733
757
|
end
|
734
|
-
|
735
758
|
end
|
736
759
|
|
737
760
|
|
@@ -748,7 +771,6 @@ module Cnvrg
|
|
748
771
|
idx_hash[:tree] = ""
|
749
772
|
else
|
750
773
|
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
751
|
-
|
752
774
|
end
|
753
775
|
idx_hash[:next_commit] = commit_sha1
|
754
776
|
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
|
@@ -810,11 +832,9 @@ module Cnvrg
|
|
810
832
|
|
811
833
|
def update_idx(idx)
|
812
834
|
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
|
813
|
-
|
814
835
|
return true
|
815
836
|
end
|
816
837
|
|
817
|
-
|
818
838
|
def update_idx_with_commit!(commit)
|
819
839
|
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
820
840
|
idx_hash[:commit] = commit
|
@@ -852,5 +872,21 @@ module Cnvrg
|
|
852
872
|
return {validation: Data::ConfigValidation::SUCCESS, message: "Directory is already linked to #{slug}"}
|
853
873
|
end
|
854
874
|
|
875
|
+
def self.stop_if_dataset_present(dataset_home, dataset_name, commit: nil)
|
876
|
+
|
877
|
+
cli = Cnvrg::CLI.new()
|
878
|
+
config = YAML.load_file(dataset_home + "/.cnvrg/config.yml")
|
879
|
+
if commit.present?
|
880
|
+
local_commit = YAML.load_file(dataset_home + "/.cnvrg/idx.yml")[:commit] rescue nil
|
881
|
+
return if commit != local_commit or local_commit.blank?
|
882
|
+
end
|
883
|
+
if config[:dataset_name] == dataset_name
|
884
|
+
cli.log_message("Dataset already present, clone aborted")
|
885
|
+
exit(0)
|
886
|
+
end
|
887
|
+
rescue => e
|
888
|
+
nil
|
889
|
+
end
|
890
|
+
|
855
891
|
end
|
856
892
|
end
|
data/lib/cnvrg/experiment.rb
CHANGED
@@ -6,11 +6,12 @@ module Cnvrg
|
|
6
6
|
attr_reader :sync_delay_time
|
7
7
|
attr_reader :output_dir
|
8
8
|
|
9
|
-
def initialize(owner, project_slug)
|
9
|
+
def initialize(owner, project_slug, job_id: nil)
|
10
10
|
@project_slug = project_slug
|
11
11
|
@owner = owner
|
12
|
+
@command = nil
|
12
13
|
@base_resource = "users/#{owner}/projects/#{project_slug}/"
|
13
|
-
@slug =
|
14
|
+
@slug = job_id
|
14
15
|
@sync_before_terminate = nil
|
15
16
|
@sync_delay_time = nil
|
16
17
|
@output_dir = nil
|
@@ -37,6 +38,8 @@ module Cnvrg
|
|
37
38
|
@sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
|
38
39
|
@sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
|
39
40
|
@output_dir = res.to_h["result"].to_h["output_dir"]
|
41
|
+
@command = res.to_h["result"].to_h["command"] rescue nil
|
42
|
+
|
40
43
|
return res
|
41
44
|
|
42
45
|
end
|
@@ -123,8 +126,7 @@ module Cnvrg
|
|
123
126
|
dataset_query:data_query,git_commit:git_commit,git_branch:git_branch,
|
124
127
|
restart_if_stuck:restart_if_stuck, local_folders: local_folders, title:title,
|
125
128
|
prerun: prerun, requirements: requirements, recurring: recurring,
|
126
|
-
email_notification_error: email_notification_error,
|
127
|
-
email_notification_success: email_notification_success,
|
129
|
+
email_notification_error: email_notification_error, email_notification_success: email_notification_success,
|
128
130
|
emails_to_notify: emails_to_notify})
|
129
131
|
|
130
132
|
return response
|
@@ -172,6 +174,10 @@ module Cnvrg
|
|
172
174
|
end
|
173
175
|
end
|
174
176
|
|
177
|
+
def get_cmd
|
178
|
+
return @command
|
179
|
+
end
|
180
|
+
|
175
181
|
def restart_spot_instance
|
176
182
|
|
177
183
|
restart = false
|
data/lib/cnvrg/files.rb
CHANGED
@@ -86,7 +86,7 @@ module Cnvrg
|
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
89
|
-
def upload_multiple_files(files_list, commit_sha1, progress: nil)
|
89
|
+
def upload_multiple_files(files_list, commit_sha1, progress: nil, suppress_exceptions: false)
|
90
90
|
#open files on the server.
|
91
91
|
Cnvrg::Logger.log_info("Uploading project files")
|
92
92
|
return if files_list.blank?
|
@@ -95,7 +95,7 @@ module Cnvrg
|
|
95
95
|
return self.upload_files_old(files_list, commit_sha1, progress: progress)
|
96
96
|
end
|
97
97
|
|
98
|
-
files_list = files_list.map{|x| [x,self.parse_file(x)]}.to_h
|
98
|
+
files_list = files_list.map{|x| [x, self.parse_file(x)] if self.parse_file(x)}.compact.to_h
|
99
99
|
resp = Cnvrg::API.request(@base_resource + "upload_files", 'POST', {files: files_list, commit: commit_sha1})
|
100
100
|
unless Cnvrg::CLI.is_response_success(resp, false)
|
101
101
|
raise SignalException.new("Cant upload files to the server.")
|
@@ -106,6 +106,7 @@ module Cnvrg
|
|
106
106
|
|
107
107
|
#upload files
|
108
108
|
token_mutex = Mutex.new
|
109
|
+
buffered_errors = {}
|
109
110
|
blob_ids = Parallel.map(files.keys, in_threads: ParallelThreads) do |file|
|
110
111
|
|
111
112
|
token_mutex.synchronize {
|
@@ -115,21 +116,33 @@ module Cnvrg
|
|
115
116
|
begin
|
116
117
|
Cnvrg::Helpers.try_until_success{self.upload_single_file(files[file].merge(files_list[file]))}
|
117
118
|
rescue => e
|
118
|
-
|
119
|
-
Cnvrg::CLI.log_message("Failed to upload #{file}: #{e.message}", 'red')
|
119
|
+
Cnvrg::CLI.log_message("Failed to upload #{file}: #{e.message}", 'red') unless suppress_exceptions
|
120
120
|
Cnvrg::Logger.log_error(e)
|
121
121
|
Cnvrg::Logger.log_method(bind: binding)
|
122
|
-
|
122
|
+
|
123
|
+
buffered_errors[file] = "Failed to upload #{file}: #{e.message}" if suppress_exceptions
|
124
|
+
|
125
|
+
raise e unless suppress_exceptions
|
123
126
|
end
|
124
127
|
progress.progress += 1 if progress.present?
|
125
|
-
|
128
|
+
|
129
|
+
unless buffered_errors.key?(file)
|
130
|
+
files[file]["bv_id"]
|
131
|
+
else
|
132
|
+
nil
|
133
|
+
end
|
126
134
|
end
|
127
135
|
|
136
|
+
# remove nil files (failed files) from blob_ids
|
137
|
+
blob_ids.compact!
|
138
|
+
|
128
139
|
#save files on the server.
|
129
140
|
resp = Cnvrg::API.request(@base_resource + "upload_files_save", 'POST', {blob_ids: blob_ids, commit: commit_sha1})
|
130
141
|
unless Cnvrg::CLI.is_response_success(resp, false)
|
131
142
|
raise SignalException.new("Cant save uploaded files to the server.")
|
132
143
|
end
|
144
|
+
|
145
|
+
return buffered_errors
|
133
146
|
end
|
134
147
|
|
135
148
|
|
@@ -146,7 +159,7 @@ module Cnvrg
|
|
146
159
|
end
|
147
160
|
end
|
148
161
|
|
149
|
-
def delete_files_from_server(files, commit_sha1)
|
162
|
+
def delete_files_from_server(files, commit_sha1, suppress_exceptions: false)
|
150
163
|
#files are absolute path files here. ^^
|
151
164
|
if Cnvrg::Helpers.server_version < 1
|
152
165
|
return self.delete_files_from_server_old(files, commit_sha1)
|
@@ -156,8 +169,14 @@ module Cnvrg
|
|
156
169
|
return if files.blank?
|
157
170
|
resp = Cnvrg::API.request(@base_resource + "delete_files", 'DELETE', {files: files, commit: commit_sha1})
|
158
171
|
unless Cnvrg::CLI.is_response_success(resp, false)
|
159
|
-
raise SignalException.new("Cant delete the following files from the server.")
|
172
|
+
raise SignalException.new("Cant delete the following files from the server.") unless suppress_exceptions
|
173
|
+
Cnvrg::Logger.log_error_message("Cant delete the following files from the server: ")
|
174
|
+
Cnvrg::Logger.log_error_message(files.to_s)
|
160
175
|
end
|
176
|
+
rescue => e
|
177
|
+
Cnvrg::Logger.log_error_message("An exception raised in delete_files_from_server: ")
|
178
|
+
Cnvrg::Logger.log_error(e)
|
179
|
+
raise e unless suppress_exceptions
|
161
180
|
end
|
162
181
|
|
163
182
|
def upload_single_file(file)
|
@@ -176,6 +195,8 @@ module Cnvrg
|
|
176
195
|
sha1 = OpenSSL::Digest::SHA1.file(abs_path).hexdigest
|
177
196
|
|
178
197
|
{relative_path: file, absolute_path: abs_path, file_name: file_name, file_size: file_size, content_type: content_type, sha1: sha1}
|
198
|
+
rescue => e
|
199
|
+
return false
|
179
200
|
end
|
180
201
|
|
181
202
|
def upload_old(absolute_path, relative_path, commit_sha1)
|
@@ -706,6 +727,7 @@ module Cnvrg
|
|
706
727
|
unless Cnvrg::CLI.is_response_success(res, false)
|
707
728
|
raise SignalException.new("Cant download files from the server.")
|
708
729
|
end
|
730
|
+
puts res
|
709
731
|
self.download_multpile_files_s3(res['result'], @project_home, postfix: postfix, progress: progress)
|
710
732
|
end
|
711
733
|
|
@@ -911,12 +933,22 @@ module Cnvrg
|
|
911
933
|
end
|
912
934
|
end
|
913
935
|
|
914
|
-
def start_commit(new_branch,force:false, exp_start_commit:nil, job_slug: nil, job_type: nil, start_commit: nil, message: nil)
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
936
|
+
def start_commit(new_branch,force:false, exp_start_commit:nil, job_slug: nil, job_type: nil, start_commit: nil, message: nil, debug_mode: false)
|
937
|
+
response = Cnvrg::API.request(
|
938
|
+
"#{base_resource}/commit/start",
|
939
|
+
'POST',
|
940
|
+
{
|
941
|
+
project_slug: @project_slug, username: @owner,
|
942
|
+
new_branch: new_branch, force:force,
|
943
|
+
exp_start_commit:exp_start_commit, start_commit: start_commit,
|
944
|
+
job_slug: job_slug, job_type: job_type, message: message,
|
945
|
+
debug_mode: debug_mode
|
946
|
+
}
|
947
|
+
)
|
948
|
+
|
949
|
+
Cnvrg::CLI.is_response_success(response,false)
|
950
|
+
return response
|
951
|
+
end
|
920
952
|
|
921
953
|
def end_commit(commit_sha1,force:false,message:"")
|
922
954
|
response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1,force:force,message:message})
|
data/lib/cnvrg/helpers.rb
CHANGED
@@ -359,9 +359,9 @@ parameters:
|
|
359
359
|
return {client: client, key: key, iv: iv, bucket: bucket, upload_options: upload_options}
|
360
360
|
end
|
361
361
|
|
362
|
-
def get_experiment_events_log_from_server(exp, project)
|
362
|
+
def get_experiment_events_log_from_server(exp, project, commit: nil)
|
363
363
|
dest_dir = exp["slug"]
|
364
|
-
commit = exp["end_commit"]
|
364
|
+
commit = commit || exp["end_commit"]
|
365
365
|
response = project.clone(0, commit)
|
366
366
|
Cnvrg::CLI.is_response_success(response, should_exit=false)
|
367
367
|
commit_sha1 = response["result"]["commit"]
|
@@ -372,42 +372,50 @@ parameters:
|
|
372
372
|
FileUtils.rm_rf("#{dest_dir}")
|
373
373
|
FileUtils.mkdir_p(dest_dir)
|
374
374
|
files.each do |f|
|
375
|
-
|
375
|
+
file_dir = "#{dest_dir}/#{File.dirname(f)}"
|
376
|
+
FileUtils.mkdir_p(file_dir)
|
377
|
+
FileUtils.mv(f, "#{dest_dir}/#{f}")
|
376
378
|
end
|
377
379
|
end
|
378
380
|
|
379
381
|
def get_experiment_events_log_via_kubectl(exp, namespace)
|
380
382
|
dest_dir = exp["slug"]
|
381
|
-
result = `kubectl -n #{namespace} get pods | grep #{exp["
|
383
|
+
result = `kubectl -n #{namespace} get pods | grep #{exp["slug"]}`
|
384
|
+
|
382
385
|
pod_name = result.split(" ")[0]
|
383
|
-
if pod_name.
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
files
|
398
|
-
if file.include?("tfevents")
|
399
|
-
all_files << "#{folder}/#{file}"
|
400
|
-
end
|
401
|
-
end
|
386
|
+
return false if pod_name.blank?
|
387
|
+
FileUtils.mkdir_p(dest_dir)
|
388
|
+
working_dir = `kubectl -n #{namespace} exec #{pod_name} -c agent -- pwd`
|
389
|
+
working_dir.strip!
|
390
|
+
res = `kubectl -n #{namespace} exec #{pod_name} -c agent -- /bin/bash -c "ls -R #{working_dir}"`
|
391
|
+
files_and_folders = res.split("\n\n")
|
392
|
+
all_files = []
|
393
|
+
|
394
|
+
files_and_folders.each do |file_and_folder|
|
395
|
+
files = file_and_folder.split("\n")
|
396
|
+
if files.first.include?(":")
|
397
|
+
folder = files.first.gsub(":", "")
|
398
|
+
|
399
|
+
folder = folder.sub(working_dir + "/", "")
|
400
|
+
files = files.drop(1)
|
402
401
|
end
|
403
|
-
|
404
|
-
|
405
|
-
|
402
|
+
files.each do |file|
|
403
|
+
if file.include?("tfevents")
|
404
|
+
all_files << "#{folder}/#{file}"
|
405
|
+
end
|
406
406
|
end
|
407
|
+
end
|
407
408
|
|
409
|
+
all_files.each do |file|
|
410
|
+
file_dir = "#{dest_dir}/#{File.dirname(file)}"
|
411
|
+
FileUtils.mkdir_p(file_dir)
|
412
|
+
res = `kubectl -n #{namespace} cp #{pod_name}:#{file} -c agent #{dest_dir}/#{file}`
|
408
413
|
end
|
414
|
+
|
415
|
+
return true
|
409
416
|
rescue => e
|
410
417
|
Cnvrg::Logger.log_error(e)
|
418
|
+
return false
|
411
419
|
end
|
412
420
|
end
|
413
421
|
|