cnvrg 2.0.20 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Readme.md +9 -1
- data/lib/cnvrg/cli.rb +8 -8
- data/lib/cnvrg/data.rb +1 -1
- data/lib/cnvrg/datafiles.rb +2 -1
- data/lib/cnvrg/downloader/clients/azure_client.rb +22 -3
- data/lib/cnvrg/helpers/agent.rb +0 -1
- data/lib/cnvrg/helpers/executer.rb +32 -9
- data/lib/cnvrg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa682e9cf25c1d37b533721888ded60b3ebc397717fc904dcd7703a592dd8853
|
4
|
+
data.tar.gz: bba09312bac1e44ec8b64358dead4036e173832cb605e56803bc368b8eff6dbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8144e807411faa6c507c037309abd5e4919b317253147b6bb1eec8eaab2614ad84d32823bfc9dde405f881cefe4be9d975dd2dc21e47392c8416ddd0ed6c253
|
7
|
+
data.tar.gz: 46aebd82a4901774d605f40c26aee7607a0a3c407ddf1c3cc4f91625f678e5f2f0bcb8e53c92b3c5812150a6b4b98f491e3c21adace85196d82314e35ef7646e
|
data/Readme.md
CHANGED
@@ -80,4 +80,12 @@
|
|
80
80
|
* DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
|
81
81
|
## Version v2.0.20
|
82
82
|
2022-02-27
|
83
|
-
* DEV-12288 - Bug: wrong error message when upload fails
|
83
|
+
* DEV-12288 - Bug: wrong error message when upload fails
|
84
|
+
## Version v2.1.1
|
85
|
+
2022-05-01
|
86
|
+
## Version v2.1.2
|
87
|
+
2022-05-08
|
88
|
+
* DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
|
89
|
+
## Version v2.1.3
|
90
|
+
2022-05-16
|
91
|
+
* DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
|
|
1008
1008
|
abs_path = dataset_home + "/" + relative_path_dir
|
1009
1009
|
abs_path = dataset_home if flatten
|
1010
1010
|
fullpath = abs_path + "/" + file_name
|
1011
|
+
fullpath = fullpath.gsub("//", "/")
|
1011
1012
|
|
1012
1013
|
begin
|
1013
1014
|
FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
|
@@ -1018,14 +1019,14 @@ module Cnvrg
|
|
1018
1019
|
begin
|
1019
1020
|
unless File.exist?(fullpath)
|
1020
1021
|
downloader.safe_operation("#{abs_path}/#{file_name}") do
|
1021
|
-
|
1022
|
+
download = open(f["url"])
|
1023
|
+
IO.copy_stream(download, fullpath)
|
1022
1024
|
end
|
1023
1025
|
end
|
1024
1026
|
rescue => e
|
1025
1027
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
1026
1028
|
exit(1)
|
1027
1029
|
end
|
1028
|
-
|
1029
1030
|
end
|
1030
1031
|
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
1031
1032
|
rescue Interrupt
|
@@ -2061,6 +2062,8 @@ module Cnvrg
|
|
2061
2062
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
2062
2063
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
2063
2064
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
2065
|
+
log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
|
2066
|
+
return
|
2064
2067
|
verify_logged_in(true)
|
2065
2068
|
log_start(__method__, args, options)
|
2066
2069
|
log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
|
@@ -3416,9 +3419,6 @@ module Cnvrg
|
|
3416
3419
|
end
|
3417
3420
|
|
3418
3421
|
end_commit = @project.last_local_commit
|
3419
|
-
if end_commit.present?
|
3420
|
-
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
3421
|
-
end
|
3422
3422
|
|
3423
3423
|
# log_thread.join
|
3424
3424
|
stats_thread.join if docker_stats
|
@@ -5013,10 +5013,10 @@ module Cnvrg
|
|
5013
5013
|
else
|
5014
5014
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
5015
5015
|
success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
5016
|
-
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
5016
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
|
5017
5017
|
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
5018
|
-
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
5019
|
-
copied_commits << exp["last_successful_commit"]
|
5018
|
+
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
|
5019
|
+
copied_commits << exp["last_successful_commit"]["sha1"]
|
5020
5020
|
end
|
5021
5021
|
end
|
5022
5022
|
|
data/lib/cnvrg/data.rb
CHANGED
@@ -101,7 +101,7 @@ module Cnvrg
|
|
101
101
|
message = options["message"]
|
102
102
|
cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
|
103
103
|
end
|
104
|
-
desc 'data sync', 'Synchronise local dataset directory with remote server'
|
104
|
+
desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
|
105
105
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
106
106
|
method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
|
107
107
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
data/lib/cnvrg/datafiles.rb
CHANGED
@@ -1393,13 +1393,13 @@ module Cnvrg
|
|
1393
1393
|
in_threads: threads,
|
1394
1394
|
isolation: true
|
1395
1395
|
}
|
1396
|
+
|
1396
1397
|
Parallel.map(files["keys"], parallel_options) do |f|
|
1397
1398
|
begin
|
1398
1399
|
file_path = f['name']
|
1399
1400
|
file_path = File.basename(f['name']) if flatten
|
1400
1401
|
local_path = @dataset.local_path + '/' + file_path
|
1401
1402
|
Cnvrg::Logger.log_info("Downloading #{local_path}")
|
1402
|
-
progressbar.progress += 1 if progressbar.present?
|
1403
1403
|
if local_path.end_with? "/"
|
1404
1404
|
@downloader.mkdir(local_path, recursive: true)
|
1405
1405
|
next
|
@@ -1420,6 +1420,7 @@ module Cnvrg
|
|
1420
1420
|
end
|
1421
1421
|
|
1422
1422
|
resp = @downloader.safe_download(storage_path, local_path)
|
1423
|
+
progressbar.progress += 1 if progressbar.present?
|
1423
1424
|
Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
|
1424
1425
|
rescue => e
|
1425
1426
|
Cnvrg::Logger.log_error(e)
|
@@ -1,4 +1,6 @@
|
|
1
|
+
require 'open-uri'
|
1
2
|
require 'azure/storage/blob'
|
3
|
+
require 'azure/storage/common/core'
|
2
4
|
|
3
5
|
module Cnvrg
|
4
6
|
module Downloader
|
@@ -13,10 +15,27 @@ module Cnvrg
|
|
13
15
|
|
14
16
|
def download(storage_path, local_path, decrypt: true)
|
15
17
|
prepare_download(local_path)
|
18
|
+
|
16
19
|
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
+
|
21
|
+
# We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
|
22
|
+
uri = client.send(:blob_uri, @container, storage_path)
|
23
|
+
|
24
|
+
generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
|
25
|
+
|
26
|
+
expiring_url = generator.signed_uri(
|
27
|
+
uri,
|
28
|
+
false,
|
29
|
+
service: 'b',
|
30
|
+
resource: 'b',
|
31
|
+
permissions: 'r',
|
32
|
+
start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
|
33
|
+
expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
|
34
|
+
)
|
35
|
+
|
36
|
+
# Stream the file without loading it all into memory
|
37
|
+
download = open(expiring_url)
|
38
|
+
IO.copy_stream(download, local_path)
|
20
39
|
end
|
21
40
|
|
22
41
|
def upload(storage_path, local_path)
|
data/lib/cnvrg/helpers/agent.rb
CHANGED
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
|
|
89
89
|
while agent_id.blank? or main_id.blank?
|
90
90
|
grep_by = @job_id
|
91
91
|
grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
|
92
|
-
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
92
|
+
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
93
93
|
agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
|
94
94
|
main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
|
95
95
|
sleep(2)
|
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
|
|
168
168
|
while !success and retries < 100
|
169
169
|
begin
|
170
170
|
resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
|
171
|
+
if !resp
|
172
|
+
raise StandardError.new("Failed to send request to server")
|
173
|
+
end
|
171
174
|
machine_activity = resp["machine_activity"]
|
172
175
|
success = true
|
173
176
|
puts("Connected to server")
|
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
|
|
216
219
|
def wait_for_main
|
217
220
|
copy_file_to_main
|
218
221
|
start_tiny_if_missing
|
222
|
+
retries = 0
|
219
223
|
puts("Waiting for main container")
|
220
224
|
STDOUT.flush
|
221
225
|
got_response = false
|
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
|
|
233
237
|
got_response = true
|
234
238
|
end
|
235
239
|
rescue => e
|
236
|
-
|
237
|
-
|
238
|
-
|
240
|
+
retries += 1
|
241
|
+
if retries > 3
|
242
|
+
puts("Failed to connect to main")
|
243
|
+
puts(e.message)
|
244
|
+
STDOUT.flush
|
245
|
+
end
|
239
246
|
sleep(0.1)
|
240
247
|
next
|
241
248
|
end
|
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
|
|
265
272
|
end
|
266
273
|
|
267
274
|
def execute_cmds
|
268
|
-
|
275
|
+
pids_by_slug = {}
|
269
276
|
while true
|
270
277
|
if @commands_q.empty?
|
271
278
|
sleep(5)
|
272
279
|
next
|
273
280
|
end
|
274
281
|
cmd = @commands_q.pop.symbolize_keys
|
282
|
+
|
283
|
+
if cmd[:wait_slug].present?
|
284
|
+
if pids_by_slug[cmd[:wait_slug]].present?
|
285
|
+
other_pid = pids_by_slug[cmd[:wait_slug]]
|
286
|
+
begin
|
287
|
+
Process.waitpid(other_pid, Process::WNOHANG)
|
288
|
+
running = true
|
289
|
+
rescue Errno::ECHILD => e
|
290
|
+
running = false
|
291
|
+
end
|
292
|
+
if running
|
293
|
+
@commands_q.push(cmd)
|
294
|
+
sleep(5)
|
295
|
+
next
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
275
299
|
command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
|
276
300
|
|
277
301
|
cmd_status = command_json["status"] rescue ""
|
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
|
|
288
312
|
else
|
289
313
|
Process.detach(pid)
|
290
314
|
end
|
291
|
-
|
315
|
+
pids_by_slug[cmd[:slug]] = pid
|
292
316
|
######
|
293
317
|
end
|
294
|
-
pids
|
295
318
|
end
|
296
319
|
|
297
320
|
def merge_log_block(logs)
|
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
|
|
303
326
|
pod_name = `hostname`.strip rescue nil
|
304
327
|
node_name = nil
|
305
328
|
if pod_name.present?
|
306
|
-
pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
|
329
|
+
pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
|
307
330
|
pod_describe = JSON.parse(pod_describe) rescue {}
|
308
331
|
node_name = pod_describe["spec"]["nodeName"] rescue nil
|
309
332
|
end
|
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
|
|
366
389
|
conn.options.open_timeout = open_timeout
|
367
390
|
conn
|
368
391
|
end
|
369
|
-
end
|
392
|
+
end
|
data/lib/cnvrg/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-05-16 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|