cnvrg 2.0.20 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.md +9 -1
- data/lib/cnvrg/cli.rb +8 -8
- data/lib/cnvrg/data.rb +1 -1
- data/lib/cnvrg/datafiles.rb +2 -1
- data/lib/cnvrg/downloader/clients/azure_client.rb +22 -3
- data/lib/cnvrg/helpers/agent.rb +0 -1
- data/lib/cnvrg/helpers/executer.rb +32 -9
- data/lib/cnvrg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa682e9cf25c1d37b533721888ded60b3ebc397717fc904dcd7703a592dd8853
|
4
|
+
data.tar.gz: bba09312bac1e44ec8b64358dead4036e173832cb605e56803bc368b8eff6dbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8144e807411faa6c507c037309abd5e4919b317253147b6bb1eec8eaab2614ad84d32823bfc9dde405f881cefe4be9d975dd2dc21e47392c8416ddd0ed6c253
|
7
|
+
data.tar.gz: 46aebd82a4901774d605f40c26aee7607a0a3c407ddf1c3cc4f91625f678e5f2f0bcb8e53c92b3c5812150a6b4b98f491e3c21adace85196d82314e35ef7646e
|
data/Readme.md
CHANGED
@@ -80,4 +80,12 @@
|
|
80
80
|
* DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
|
81
81
|
## Version v2.0.20
|
82
82
|
2022-02-27
|
83
|
-
* DEV-12288 - Bug: wrong error message when upload fails
|
83
|
+
* DEV-12288 - Bug: wrong error message when upload fails
|
84
|
+
## Version v2.1.1
|
85
|
+
2022-05-01
|
86
|
+
## Version v2.1.2
|
87
|
+
2022-05-08
|
88
|
+
* DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
|
89
|
+
## Version v2.1.3
|
90
|
+
2022-05-16
|
91
|
+
* DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
|
|
1008
1008
|
abs_path = dataset_home + "/" + relative_path_dir
|
1009
1009
|
abs_path = dataset_home if flatten
|
1010
1010
|
fullpath = abs_path + "/" + file_name
|
1011
|
+
fullpath = fullpath.gsub("//", "/")
|
1011
1012
|
|
1012
1013
|
begin
|
1013
1014
|
FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
|
@@ -1018,14 +1019,14 @@ module Cnvrg
|
|
1018
1019
|
begin
|
1019
1020
|
unless File.exist?(fullpath)
|
1020
1021
|
downloader.safe_operation("#{abs_path}/#{file_name}") do
|
1021
|
-
|
1022
|
+
download = open(f["url"])
|
1023
|
+
IO.copy_stream(download, fullpath)
|
1022
1024
|
end
|
1023
1025
|
end
|
1024
1026
|
rescue => e
|
1025
1027
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
1026
1028
|
exit(1)
|
1027
1029
|
end
|
1028
|
-
|
1029
1030
|
end
|
1030
1031
|
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
1031
1032
|
rescue Interrupt
|
@@ -2061,6 +2062,8 @@ module Cnvrg
|
|
2061
2062
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
2062
2063
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
2063
2064
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
2065
|
+
log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
|
2066
|
+
return
|
2064
2067
|
verify_logged_in(true)
|
2065
2068
|
log_start(__method__, args, options)
|
2066
2069
|
log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
|
@@ -3416,9 +3419,6 @@ module Cnvrg
|
|
3416
3419
|
end
|
3417
3420
|
|
3418
3421
|
end_commit = @project.last_local_commit
|
3419
|
-
if end_commit.present?
|
3420
|
-
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
3421
|
-
end
|
3422
3422
|
|
3423
3423
|
# log_thread.join
|
3424
3424
|
stats_thread.join if docker_stats
|
@@ -5013,10 +5013,10 @@ module Cnvrg
|
|
5013
5013
|
else
|
5014
5014
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
5015
5015
|
success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
5016
|
-
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
5016
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
|
5017
5017
|
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
5018
|
-
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
5019
|
-
copied_commits << exp["last_successful_commit"]
|
5018
|
+
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
|
5019
|
+
copied_commits << exp["last_successful_commit"]["sha1"]
|
5020
5020
|
end
|
5021
5021
|
end
|
5022
5022
|
|
data/lib/cnvrg/data.rb
CHANGED
@@ -101,7 +101,7 @@ module Cnvrg
|
|
101
101
|
message = options["message"]
|
102
102
|
cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
|
103
103
|
end
|
104
|
-
desc 'data sync', 'Synchronise local dataset directory with remote server'
|
104
|
+
desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
|
105
105
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
106
106
|
method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
|
107
107
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
data/lib/cnvrg/datafiles.rb
CHANGED
@@ -1393,13 +1393,13 @@ module Cnvrg
|
|
1393
1393
|
in_threads: threads,
|
1394
1394
|
isolation: true
|
1395
1395
|
}
|
1396
|
+
|
1396
1397
|
Parallel.map(files["keys"], parallel_options) do |f|
|
1397
1398
|
begin
|
1398
1399
|
file_path = f['name']
|
1399
1400
|
file_path = File.basename(f['name']) if flatten
|
1400
1401
|
local_path = @dataset.local_path + '/' + file_path
|
1401
1402
|
Cnvrg::Logger.log_info("Downloading #{local_path}")
|
1402
|
-
progressbar.progress += 1 if progressbar.present?
|
1403
1403
|
if local_path.end_with? "/"
|
1404
1404
|
@downloader.mkdir(local_path, recursive: true)
|
1405
1405
|
next
|
@@ -1420,6 +1420,7 @@ module Cnvrg
|
|
1420
1420
|
end
|
1421
1421
|
|
1422
1422
|
resp = @downloader.safe_download(storage_path, local_path)
|
1423
|
+
progressbar.progress += 1 if progressbar.present?
|
1423
1424
|
Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
|
1424
1425
|
rescue => e
|
1425
1426
|
Cnvrg::Logger.log_error(e)
|
@@ -1,4 +1,6 @@
|
|
1
|
+
require 'open-uri'
|
1
2
|
require 'azure/storage/blob'
|
3
|
+
require 'azure/storage/common/core'
|
2
4
|
|
3
5
|
module Cnvrg
|
4
6
|
module Downloader
|
@@ -13,10 +15,27 @@ module Cnvrg
|
|
13
15
|
|
14
16
|
def download(storage_path, local_path, decrypt: true)
|
15
17
|
prepare_download(local_path)
|
18
|
+
|
16
19
|
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
+
|
21
|
+
# We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
|
22
|
+
uri = client.send(:blob_uri, @container, storage_path)
|
23
|
+
|
24
|
+
generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
|
25
|
+
|
26
|
+
expiring_url = generator.signed_uri(
|
27
|
+
uri,
|
28
|
+
false,
|
29
|
+
service: 'b',
|
30
|
+
resource: 'b',
|
31
|
+
permissions: 'r',
|
32
|
+
start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
|
33
|
+
expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
|
34
|
+
)
|
35
|
+
|
36
|
+
# Stream the file without loading it all into memory
|
37
|
+
download = open(expiring_url)
|
38
|
+
IO.copy_stream(download, local_path)
|
20
39
|
end
|
21
40
|
|
22
41
|
def upload(storage_path, local_path)
|
data/lib/cnvrg/helpers/agent.rb
CHANGED
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
|
|
89
89
|
while agent_id.blank? or main_id.blank?
|
90
90
|
grep_by = @job_id
|
91
91
|
grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
|
92
|
-
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
92
|
+
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
93
93
|
agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
|
94
94
|
main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
|
95
95
|
sleep(2)
|
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
|
|
168
168
|
while !success and retries < 100
|
169
169
|
begin
|
170
170
|
resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
|
171
|
+
if !resp
|
172
|
+
raise StandardError.new("Failed to send request to server")
|
173
|
+
end
|
171
174
|
machine_activity = resp["machine_activity"]
|
172
175
|
success = true
|
173
176
|
puts("Connected to server")
|
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
|
|
216
219
|
def wait_for_main
|
217
220
|
copy_file_to_main
|
218
221
|
start_tiny_if_missing
|
222
|
+
retries = 0
|
219
223
|
puts("Waiting for main container")
|
220
224
|
STDOUT.flush
|
221
225
|
got_response = false
|
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
|
|
233
237
|
got_response = true
|
234
238
|
end
|
235
239
|
rescue => e
|
236
|
-
|
237
|
-
|
238
|
-
|
240
|
+
retries += 1
|
241
|
+
if retries > 3
|
242
|
+
puts("Failed to connect to main")
|
243
|
+
puts(e.message)
|
244
|
+
STDOUT.flush
|
245
|
+
end
|
239
246
|
sleep(0.1)
|
240
247
|
next
|
241
248
|
end
|
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
|
|
265
272
|
end
|
266
273
|
|
267
274
|
def execute_cmds
|
268
|
-
|
275
|
+
pids_by_slug = {}
|
269
276
|
while true
|
270
277
|
if @commands_q.empty?
|
271
278
|
sleep(5)
|
272
279
|
next
|
273
280
|
end
|
274
281
|
cmd = @commands_q.pop.symbolize_keys
|
282
|
+
|
283
|
+
if cmd[:wait_slug].present?
|
284
|
+
if pids_by_slug[cmd[:wait_slug]].present?
|
285
|
+
other_pid = pids_by_slug[cmd[:wait_slug]]
|
286
|
+
begin
|
287
|
+
Process.waitpid(other_pid, Process::WNOHANG)
|
288
|
+
running = true
|
289
|
+
rescue Errno::ECHILD => e
|
290
|
+
running = false
|
291
|
+
end
|
292
|
+
if running
|
293
|
+
@commands_q.push(cmd)
|
294
|
+
sleep(5)
|
295
|
+
next
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
275
299
|
command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
|
276
300
|
|
277
301
|
cmd_status = command_json["status"] rescue ""
|
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
|
|
288
312
|
else
|
289
313
|
Process.detach(pid)
|
290
314
|
end
|
291
|
-
|
315
|
+
pids_by_slug[cmd[:slug]] = pid
|
292
316
|
######
|
293
317
|
end
|
294
|
-
pids
|
295
318
|
end
|
296
319
|
|
297
320
|
def merge_log_block(logs)
|
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
|
|
303
326
|
pod_name = `hostname`.strip rescue nil
|
304
327
|
node_name = nil
|
305
328
|
if pod_name.present?
|
306
|
-
pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
|
329
|
+
pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
|
307
330
|
pod_describe = JSON.parse(pod_describe) rescue {}
|
308
331
|
node_name = pod_describe["spec"]["nodeName"] rescue nil
|
309
332
|
end
|
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
|
|
366
389
|
conn.options.open_timeout = open_timeout
|
367
390
|
conn
|
368
391
|
end
|
369
|
-
end
|
392
|
+
end
|
data/lib/cnvrg/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-05-16 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|