cnvrg 2.0.18 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.md +22 -1
- data/lib/cnvrg/cli.rb +23 -11
- data/lib/cnvrg/data.rb +2 -2
- data/lib/cnvrg/datafiles.rb +2 -1
- data/lib/cnvrg/downloader/clients/azure_client.rb +22 -3
- data/lib/cnvrg/files.rb +1 -1
- data/lib/cnvrg/helpers/agent.rb +6 -1
- data/lib/cnvrg/helpers/executer.rb +32 -9
- data/lib/cnvrg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffe47307abd2feac46497f34ceec95fab9a866324a235db3ab6f0c61129a7a9d
|
4
|
+
data.tar.gz: cd8316866861c8b16ec4a4d1d001d8da20f79e34ab95ea38a850a8bce79d6d66
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9b73987d1023e4aa2600ca11555078194398e7e10cb1d0314569aac8ab24a7385a41c4e86e70a169a4502f122e56baa1bde68ea5cb13706c666996c7a92b5244
|
7
|
+
data.tar.gz: 8dbd96c0b77e8c88d52254d04af38efa20fcab1c003027c8ee4b8e5918dc01ff35ae35505ec5d36687183925847438d5cb49eb8063a2db2ed2784a13e09900d3
|
data/Readme.md
CHANGED
@@ -74,4 +74,25 @@
|
|
74
74
|
* DEV-10581 - Bug: CLI - getting 404 response in "cnvrg set_default_owner"
|
75
75
|
## Version v2.0.18
|
76
76
|
2022-01-31
|
77
|
-
* DEV-12637 - Bug: Dataset - creating file from CLI/SDK in a folder with + sign, replaces + with space AND creates 2 folders
|
77
|
+
* DEV-12637 - Bug: Dataset - creating file from CLI/SDK in a folder with + sign, replaces + with space AND creates 2 folders
|
78
|
+
## Version v2.0.19
|
79
|
+
2022-02-22
|
80
|
+
* DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
|
81
|
+
## Version v2.0.20
|
82
|
+
2022-02-27
|
83
|
+
* DEV-12288 - Bug: wrong error message when upload fails
|
84
|
+
## Version v2.1.1
|
85
|
+
2022-05-01
|
86
|
+
## Version v2.1.2
|
87
|
+
2022-05-08
|
88
|
+
* DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
|
89
|
+
## Version v2.1.3
|
90
|
+
2022-05-16
|
91
|
+
* DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
|
92
|
+
## Version v2.1.4
|
93
|
+
2022-05-22
|
94
|
+
* DEV-14182 - Bug: Cli - hide 'data upload' command
|
95
|
+
## Version v2.1.5
|
96
|
+
2022-07-31
|
97
|
+
* DEV-14244 - Bug: CLI - "failed to upload ongoing stats" due to NaN in float
|
98
|
+
* DEV-14633 - Bug: End sync did not complete, causing the experiment to get stuck in "terminating"
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
|
|
1008
1008
|
abs_path = dataset_home + "/" + relative_path_dir
|
1009
1009
|
abs_path = dataset_home if flatten
|
1010
1010
|
fullpath = abs_path + "/" + file_name
|
1011
|
+
fullpath = fullpath.gsub("//", "/")
|
1011
1012
|
|
1012
1013
|
begin
|
1013
1014
|
FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
|
@@ -1018,14 +1019,14 @@ module Cnvrg
|
|
1018
1019
|
begin
|
1019
1020
|
unless File.exist?(fullpath)
|
1020
1021
|
downloader.safe_operation("#{abs_path}/#{file_name}") do
|
1021
|
-
|
1022
|
+
download = open(f["url"])
|
1023
|
+
IO.copy_stream(download, fullpath)
|
1022
1024
|
end
|
1023
1025
|
end
|
1024
1026
|
rescue => e
|
1025
1027
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
1026
1028
|
exit(1)
|
1027
1029
|
end
|
1028
|
-
|
1029
1030
|
end
|
1030
1031
|
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
1031
1032
|
rescue Interrupt
|
@@ -2061,6 +2062,8 @@ module Cnvrg
|
|
2061
2062
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
2062
2063
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
2063
2064
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
2065
|
+
log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
|
2066
|
+
return
|
2064
2067
|
verify_logged_in(true)
|
2065
2068
|
log_start(__method__, args, options)
|
2066
2069
|
log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
|
@@ -2085,6 +2088,8 @@ module Cnvrg
|
|
2085
2088
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
2086
2089
|
|
2087
2090
|
def upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:nil, total_deleted: 0, total_downloaded: 0)
|
2091
|
+
log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
|
2092
|
+
return
|
2088
2093
|
begin
|
2089
2094
|
commit, files_list = invoke :start_commit_data,[], :new_branch=> new_branch, :direct=>false, :force =>force, :chunk_size => chunk_size, :message => message
|
2090
2095
|
files_to_upload, upload_errors = invoke :upload_data_files,[commit, files_list: files_list],:new_branch=>new_branch, :verbose =>verbose, :force =>force, :sync =>sync, :chunk_size => chunk_size
|
@@ -2377,7 +2382,11 @@ module Cnvrg
|
|
2377
2382
|
if ignore.nil? or ignore.empty?
|
2378
2383
|
ignore = ignore_list
|
2379
2384
|
end
|
2380
|
-
|
2385
|
+
|
2386
|
+
if job_type != "Experiment"
|
2387
|
+
data_ignore = data_dir_include()
|
2388
|
+
end
|
2389
|
+
|
2381
2390
|
if !data_ignore.nil?
|
2382
2391
|
if ignore.nil? or ignore.empty?
|
2383
2392
|
ignore = data_ignore
|
@@ -3412,9 +3421,6 @@ module Cnvrg
|
|
3412
3421
|
end
|
3413
3422
|
|
3414
3423
|
end_commit = @project.last_local_commit
|
3415
|
-
if end_commit.present?
|
3416
|
-
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
3417
|
-
end
|
3418
3424
|
|
3419
3425
|
# log_thread.join
|
3420
3426
|
stats_thread.join if docker_stats
|
@@ -4727,8 +4733,14 @@ module Cnvrg
|
|
4727
4733
|
end
|
4728
4734
|
end
|
4729
4735
|
else
|
4730
|
-
|
4731
|
-
|
4736
|
+
begin
|
4737
|
+
timestamp, value = data_result&.first&.dig('value')
|
4738
|
+
stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
|
4739
|
+
rescue => e
|
4740
|
+
Cnvrg::Logger.log_info("Failed converting string into float with error: #{e.message}")
|
4741
|
+
Cnvrg::Logger.log_error(e)
|
4742
|
+
stat_value = 0
|
4743
|
+
end
|
4732
4744
|
stat_value = stat_value.to_i == stat_value.to_f ? stat_value.to_i : stat_value.to_f # converting 34.00 to 34
|
4733
4745
|
if query_name.include? 'block'
|
4734
4746
|
stats['block_io'] = {} if stats['block_io'].blank?
|
@@ -5009,10 +5021,10 @@ module Cnvrg
|
|
5009
5021
|
else
|
5010
5022
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
5011
5023
|
success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
5012
|
-
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
5024
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
|
5013
5025
|
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
5014
|
-
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
5015
|
-
copied_commits << exp["last_successful_commit"]
|
5026
|
+
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
|
5027
|
+
copied_commits << exp["last_successful_commit"]["sha1"]
|
5016
5028
|
end
|
5017
5029
|
end
|
5018
5030
|
|
data/lib/cnvrg/data.rb
CHANGED
@@ -81,7 +81,7 @@ module Cnvrg
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
desc "data upload", "Upload files from local dataset directory to remote server"
|
84
|
+
desc "data upload", "Upload files from local dataset directory to remote server", :hide => true
|
85
85
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
86
86
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
87
87
|
method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
|
@@ -101,7 +101,7 @@ module Cnvrg
|
|
101
101
|
message = options["message"]
|
102
102
|
cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
|
103
103
|
end
|
104
|
-
desc 'data sync', 'Synchronise local dataset directory with remote server'
|
104
|
+
desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
|
105
105
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
106
106
|
method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
|
107
107
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
data/lib/cnvrg/datafiles.rb
CHANGED
@@ -1393,13 +1393,13 @@ module Cnvrg
|
|
1393
1393
|
in_threads: threads,
|
1394
1394
|
isolation: true
|
1395
1395
|
}
|
1396
|
+
|
1396
1397
|
Parallel.map(files["keys"], parallel_options) do |f|
|
1397
1398
|
begin
|
1398
1399
|
file_path = f['name']
|
1399
1400
|
file_path = File.basename(f['name']) if flatten
|
1400
1401
|
local_path = @dataset.local_path + '/' + file_path
|
1401
1402
|
Cnvrg::Logger.log_info("Downloading #{local_path}")
|
1402
|
-
progressbar.progress += 1 if progressbar.present?
|
1403
1403
|
if local_path.end_with? "/"
|
1404
1404
|
@downloader.mkdir(local_path, recursive: true)
|
1405
1405
|
next
|
@@ -1420,6 +1420,7 @@ module Cnvrg
|
|
1420
1420
|
end
|
1421
1421
|
|
1422
1422
|
resp = @downloader.safe_download(storage_path, local_path)
|
1423
|
+
progressbar.progress += 1 if progressbar.present?
|
1423
1424
|
Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
|
1424
1425
|
rescue => e
|
1425
1426
|
Cnvrg::Logger.log_error(e)
|
@@ -1,4 +1,6 @@
|
|
1
|
+
require 'open-uri'
|
1
2
|
require 'azure/storage/blob'
|
3
|
+
require 'azure/storage/common/core'
|
2
4
|
|
3
5
|
module Cnvrg
|
4
6
|
module Downloader
|
@@ -13,10 +15,27 @@ module Cnvrg
|
|
13
15
|
|
14
16
|
def download(storage_path, local_path, decrypt: true)
|
15
17
|
prepare_download(local_path)
|
18
|
+
|
16
19
|
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
+
|
21
|
+
# We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
|
22
|
+
uri = client.send(:blob_uri, @container, storage_path)
|
23
|
+
|
24
|
+
generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
|
25
|
+
|
26
|
+
expiring_url = generator.signed_uri(
|
27
|
+
uri,
|
28
|
+
false,
|
29
|
+
service: 'b',
|
30
|
+
resource: 'b',
|
31
|
+
permissions: 'r',
|
32
|
+
start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
|
33
|
+
expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
|
34
|
+
)
|
35
|
+
|
36
|
+
# Stream the file without loading it all into memory
|
37
|
+
download = open(expiring_url)
|
38
|
+
IO.copy_stream(download, local_path)
|
20
39
|
end
|
21
40
|
|
22
41
|
def upload(storage_path, local_path)
|
data/lib/cnvrg/files.rb
CHANGED
@@ -106,7 +106,7 @@ module Cnvrg
|
|
106
106
|
commit: commit_sha1
|
107
107
|
})
|
108
108
|
unless Cnvrg::CLI.is_response_success(resp, false)
|
109
|
-
raise StandardError.new("
|
109
|
+
raise StandardError.new("Cant upload files to the server")
|
110
110
|
end
|
111
111
|
# resolve bucket
|
112
112
|
res = resp['result']
|
data/lib/cnvrg/helpers/agent.rb
CHANGED
@@ -59,7 +59,6 @@ class Cnvrg::Helpers::Agent
|
|
59
59
|
not File.exists? file
|
60
60
|
end
|
61
61
|
return true if file_doesnt_exists.blank?
|
62
|
-
log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
|
63
62
|
return false
|
64
63
|
end
|
65
64
|
true
|
@@ -180,11 +179,17 @@ class Cnvrg::Helpers::Agent
|
|
180
179
|
end
|
181
180
|
end
|
182
181
|
@exit_status = $?.exitstatus
|
182
|
+
rescue NoMethodError => e
|
183
|
+
log_internal("No Method Error: #{e}", level: LogLevel::ERROR)
|
184
|
+
@exit_status = 129
|
183
185
|
rescue Timeout::Error
|
184
186
|
Process.kill(0, @pid)
|
185
187
|
@errors << {log: "Command timed out!", timestamp: Time.now}
|
186
188
|
log_internal("Command timed out!", level: LogLevel::ERROR)
|
187
189
|
@exit_status = 124
|
190
|
+
rescue => e
|
191
|
+
log_internal("Error: #{e}", level: LogLevel::ERROR)
|
192
|
+
@exit_status = 129
|
188
193
|
ensure
|
189
194
|
retry_command if @retries != 0 and @exit_status !=0
|
190
195
|
@exit_status
|
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
|
|
89
89
|
while agent_id.blank? or main_id.blank?
|
90
90
|
grep_by = @job_id
|
91
91
|
grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
|
92
|
-
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
92
|
+
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
93
93
|
agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
|
94
94
|
main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
|
95
95
|
sleep(2)
|
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
|
|
168
168
|
while !success and retries < 100
|
169
169
|
begin
|
170
170
|
resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
|
171
|
+
if !resp
|
172
|
+
raise StandardError.new("Failed to send request to server")
|
173
|
+
end
|
171
174
|
machine_activity = resp["machine_activity"]
|
172
175
|
success = true
|
173
176
|
puts("Connected to server")
|
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
|
|
216
219
|
def wait_for_main
|
217
220
|
copy_file_to_main
|
218
221
|
start_tiny_if_missing
|
222
|
+
retries = 0
|
219
223
|
puts("Waiting for main container")
|
220
224
|
STDOUT.flush
|
221
225
|
got_response = false
|
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
|
|
233
237
|
got_response = true
|
234
238
|
end
|
235
239
|
rescue => e
|
236
|
-
|
237
|
-
|
238
|
-
|
240
|
+
retries += 1
|
241
|
+
if retries > 3
|
242
|
+
puts("Failed to connect to main")
|
243
|
+
puts(e.message)
|
244
|
+
STDOUT.flush
|
245
|
+
end
|
239
246
|
sleep(0.1)
|
240
247
|
next
|
241
248
|
end
|
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
|
|
265
272
|
end
|
266
273
|
|
267
274
|
def execute_cmds
|
268
|
-
|
275
|
+
pids_by_slug = {}
|
269
276
|
while true
|
270
277
|
if @commands_q.empty?
|
271
278
|
sleep(5)
|
272
279
|
next
|
273
280
|
end
|
274
281
|
cmd = @commands_q.pop.symbolize_keys
|
282
|
+
|
283
|
+
if cmd[:wait_slug].present?
|
284
|
+
if pids_by_slug[cmd[:wait_slug]].present?
|
285
|
+
other_pid = pids_by_slug[cmd[:wait_slug]]
|
286
|
+
begin
|
287
|
+
Process.waitpid(other_pid, Process::WNOHANG)
|
288
|
+
running = true
|
289
|
+
rescue Errno::ECHILD => e
|
290
|
+
running = false
|
291
|
+
end
|
292
|
+
if running
|
293
|
+
@commands_q.push(cmd)
|
294
|
+
sleep(5)
|
295
|
+
next
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
275
299
|
command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
|
276
300
|
|
277
301
|
cmd_status = command_json["status"] rescue ""
|
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
|
|
288
312
|
else
|
289
313
|
Process.detach(pid)
|
290
314
|
end
|
291
|
-
|
315
|
+
pids_by_slug[cmd[:slug]] = pid
|
292
316
|
######
|
293
317
|
end
|
294
|
-
pids
|
295
318
|
end
|
296
319
|
|
297
320
|
def merge_log_block(logs)
|
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
|
|
303
326
|
pod_name = `hostname`.strip rescue nil
|
304
327
|
node_name = nil
|
305
328
|
if pod_name.present?
|
306
|
-
pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
|
329
|
+
pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
|
307
330
|
pod_describe = JSON.parse(pod_describe) rescue {}
|
308
331
|
node_name = pod_describe["spec"]["nodeName"] rescue nil
|
309
332
|
end
|
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
|
|
366
389
|
conn.options.open_timeout = open_timeout
|
367
390
|
conn
|
368
391
|
end
|
369
|
-
end
|
392
|
+
end
|
data/lib/cnvrg/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-08-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|