cnvrg 2.0.20 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.md +19 -1
- data/lib/cnvrg/cli.rb +20 -12
- data/lib/cnvrg/data.rb +2 -2
- data/lib/cnvrg/datafiles.rb +2 -1
- data/lib/cnvrg/downloader/clients/azure_client.rb +22 -3
- data/lib/cnvrg/helpers/agent.rb +6 -1
- data/lib/cnvrg/helpers/executer.rb +32 -9
- data/lib/cnvrg/project.rb +2 -2
- data/lib/cnvrg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 296eba8c8dab87e1a16b7980c1e80b013be95af52cfc184a12cf366f676a3a2b
|
4
|
+
data.tar.gz: '08ad62abd898bb6bb1a9099237f5ebd854f87359ecc571036ce351b259127d78'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5174c705ed765c76538401ea14ea2c14e267d98836b3d8e8b66a72d31d4f7ee2eeb313fbd080cd15a513bb4149f907aef075be2d7f6bab4b96afefb4ba5f341
|
7
|
+
data.tar.gz: 355b440e9a009571e2f097599d14418fcdbe0a79cfaae8473637722745923f8b138b4fdbc0e3ee60107122835f98468811661b770ffee4008f8b141250361fce
|
data/Readme.md
CHANGED
@@ -80,4 +80,22 @@
|
|
80
80
|
* DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
|
81
81
|
## Version v2.0.20
|
82
82
|
2022-02-27
|
83
|
-
* DEV-12288 - Bug: wrong error message when upload fails
|
83
|
+
* DEV-12288 - Bug: wrong error message when upload fails
|
84
|
+
## Version v2.1.1
|
85
|
+
2022-05-01
|
86
|
+
## Version v2.1.2
|
87
|
+
2022-05-08
|
88
|
+
* DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
|
89
|
+
## Version v2.1.3
|
90
|
+
2022-05-16
|
91
|
+
* DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
|
92
|
+
## Version v2.1.4
|
93
|
+
2022-05-22
|
94
|
+
* DEV-14182 - Bug: Cli - hide 'data upload' command
|
95
|
+
## Version v2.1.5
|
96
|
+
2022-07-31
|
97
|
+
* DEV-14244 - Bug: CLI - "failed to upload ongoing stats" due to NaN in float
|
98
|
+
* DEV-14633 - Bug: End sync did not complete, causing the experiment to get stuck in "terminating"
|
99
|
+
## Version v2.1.6
|
100
|
+
2022-08-09
|
101
|
+
* DEV-14682 - Bug: git-Walki: CLI/SDK experiments goes into debug mode for Github+SSH integrated projects
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
|
|
1008
1008
|
abs_path = dataset_home + "/" + relative_path_dir
|
1009
1009
|
abs_path = dataset_home if flatten
|
1010
1010
|
fullpath = abs_path + "/" + file_name
|
1011
|
+
fullpath = fullpath.gsub("//", "/")
|
1011
1012
|
|
1012
1013
|
begin
|
1013
1014
|
FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
|
@@ -1018,14 +1019,14 @@ module Cnvrg
|
|
1018
1019
|
begin
|
1019
1020
|
unless File.exist?(fullpath)
|
1020
1021
|
downloader.safe_operation("#{abs_path}/#{file_name}") do
|
1021
|
-
|
1022
|
+
download = open(f["url"])
|
1023
|
+
IO.copy_stream(download, fullpath)
|
1022
1024
|
end
|
1023
1025
|
end
|
1024
1026
|
rescue => e
|
1025
1027
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
1026
1028
|
exit(1)
|
1027
1029
|
end
|
1028
|
-
|
1029
1030
|
end
|
1030
1031
|
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
1031
1032
|
rescue Interrupt
|
@@ -1847,7 +1848,7 @@ module Cnvrg
|
|
1847
1848
|
log_start(__method__, args, options)
|
1848
1849
|
project_home = Dir.pwd
|
1849
1850
|
soft = options["soft"] || false
|
1850
|
-
Project.stop_if_project_present(project_home, slug) if soft
|
1851
|
+
Project.stop_if_project_present(project_home, slug, owner) if soft
|
1851
1852
|
clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
|
1852
1853
|
exit 1 if not clone_resp
|
1853
1854
|
idx_status = Project.new(get_project_home).generate_idx(files:[])
|
@@ -1917,7 +1918,7 @@ module Cnvrg
|
|
1917
1918
|
clone_resp = false
|
1918
1919
|
project_home = Dir.pwd
|
1919
1920
|
|
1920
|
-
Project.stop_if_project_present(project_home, project_name) if soft
|
1921
|
+
Project.stop_if_project_present(project_home, project_name, owner) if soft
|
1921
1922
|
|
1922
1923
|
if remote and !git
|
1923
1924
|
clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
|
@@ -2061,6 +2062,8 @@ module Cnvrg
|
|
2061
2062
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
2062
2063
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
2063
2064
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
2065
|
+
log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
|
2066
|
+
return
|
2064
2067
|
verify_logged_in(true)
|
2065
2068
|
log_start(__method__, args, options)
|
2066
2069
|
log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
|
@@ -2085,6 +2088,8 @@ module Cnvrg
|
|
2085
2088
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
2086
2089
|
|
2087
2090
|
def upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:nil, total_deleted: 0, total_downloaded: 0)
|
2091
|
+
log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
|
2092
|
+
return
|
2088
2093
|
begin
|
2089
2094
|
commit, files_list = invoke :start_commit_data,[], :new_branch=> new_branch, :direct=>false, :force =>force, :chunk_size => chunk_size, :message => message
|
2090
2095
|
files_to_upload, upload_errors = invoke :upload_data_files,[commit, files_list: files_list],:new_branch=>new_branch, :verbose =>verbose, :force =>force, :sync =>sync, :chunk_size => chunk_size
|
@@ -3416,9 +3421,6 @@ module Cnvrg
|
|
3416
3421
|
end
|
3417
3422
|
|
3418
3423
|
end_commit = @project.last_local_commit
|
3419
|
-
if end_commit.present?
|
3420
|
-
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
3421
|
-
end
|
3422
3424
|
|
3423
3425
|
# log_thread.join
|
3424
3426
|
stats_thread.join if docker_stats
|
@@ -4731,8 +4733,14 @@ module Cnvrg
|
|
4731
4733
|
end
|
4732
4734
|
end
|
4733
4735
|
else
|
4734
|
-
|
4735
|
-
|
4736
|
+
begin
|
4737
|
+
timestamp, value = data_result&.first&.dig('value')
|
4738
|
+
stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
|
4739
|
+
rescue => e
|
4740
|
+
Cnvrg::Logger.log_info("Failed converting string into float with error: #{e.message}")
|
4741
|
+
Cnvrg::Logger.log_error(e)
|
4742
|
+
stat_value = 0
|
4743
|
+
end
|
4736
4744
|
stat_value = stat_value.to_i == stat_value.to_f ? stat_value.to_i : stat_value.to_f # converting 34.00 to 34
|
4737
4745
|
if query_name.include? 'block'
|
4738
4746
|
stats['block_io'] = {} if stats['block_io'].blank?
|
@@ -5013,10 +5021,10 @@ module Cnvrg
|
|
5013
5021
|
else
|
5014
5022
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
5015
5023
|
success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
5016
|
-
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
5024
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
|
5017
5025
|
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
5018
|
-
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
5019
|
-
copied_commits << exp["last_successful_commit"]
|
5026
|
+
num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
|
5027
|
+
copied_commits << exp["last_successful_commit"]["sha1"]
|
5020
5028
|
end
|
5021
5029
|
end
|
5022
5030
|
|
data/lib/cnvrg/data.rb
CHANGED
@@ -81,7 +81,7 @@ module Cnvrg
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
desc "data upload", "Upload files from local dataset directory to remote server"
|
84
|
+
desc "data upload", "Upload files from local dataset directory to remote server", :hide => true
|
85
85
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
86
86
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
87
87
|
method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
|
@@ -101,7 +101,7 @@ module Cnvrg
|
|
101
101
|
message = options["message"]
|
102
102
|
cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
|
103
103
|
end
|
104
|
-
desc 'data sync', 'Synchronise local dataset directory with remote server'
|
104
|
+
desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
|
105
105
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
106
106
|
method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
|
107
107
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
data/lib/cnvrg/datafiles.rb
CHANGED
@@ -1393,13 +1393,13 @@ module Cnvrg
|
|
1393
1393
|
in_threads: threads,
|
1394
1394
|
isolation: true
|
1395
1395
|
}
|
1396
|
+
|
1396
1397
|
Parallel.map(files["keys"], parallel_options) do |f|
|
1397
1398
|
begin
|
1398
1399
|
file_path = f['name']
|
1399
1400
|
file_path = File.basename(f['name']) if flatten
|
1400
1401
|
local_path = @dataset.local_path + '/' + file_path
|
1401
1402
|
Cnvrg::Logger.log_info("Downloading #{local_path}")
|
1402
|
-
progressbar.progress += 1 if progressbar.present?
|
1403
1403
|
if local_path.end_with? "/"
|
1404
1404
|
@downloader.mkdir(local_path, recursive: true)
|
1405
1405
|
next
|
@@ -1420,6 +1420,7 @@ module Cnvrg
|
|
1420
1420
|
end
|
1421
1421
|
|
1422
1422
|
resp = @downloader.safe_download(storage_path, local_path)
|
1423
|
+
progressbar.progress += 1 if progressbar.present?
|
1423
1424
|
Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
|
1424
1425
|
rescue => e
|
1425
1426
|
Cnvrg::Logger.log_error(e)
|
@@ -1,4 +1,6 @@
|
|
1
|
+
require 'open-uri'
|
1
2
|
require 'azure/storage/blob'
|
3
|
+
require 'azure/storage/common/core'
|
2
4
|
|
3
5
|
module Cnvrg
|
4
6
|
module Downloader
|
@@ -13,10 +15,27 @@ module Cnvrg
|
|
13
15
|
|
14
16
|
def download(storage_path, local_path, decrypt: true)
|
15
17
|
prepare_download(local_path)
|
18
|
+
|
16
19
|
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
+
|
21
|
+
# We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
|
22
|
+
uri = client.send(:blob_uri, @container, storage_path)
|
23
|
+
|
24
|
+
generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
|
25
|
+
|
26
|
+
expiring_url = generator.signed_uri(
|
27
|
+
uri,
|
28
|
+
false,
|
29
|
+
service: 'b',
|
30
|
+
resource: 'b',
|
31
|
+
permissions: 'r',
|
32
|
+
start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
|
33
|
+
expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
|
34
|
+
)
|
35
|
+
|
36
|
+
# Stream the file without loading it all into memory
|
37
|
+
download = open(expiring_url)
|
38
|
+
IO.copy_stream(download, local_path)
|
20
39
|
end
|
21
40
|
|
22
41
|
def upload(storage_path, local_path)
|
data/lib/cnvrg/helpers/agent.rb
CHANGED
@@ -59,7 +59,6 @@ class Cnvrg::Helpers::Agent
|
|
59
59
|
not File.exists? file
|
60
60
|
end
|
61
61
|
return true if file_doesnt_exists.blank?
|
62
|
-
log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
|
63
62
|
return false
|
64
63
|
end
|
65
64
|
true
|
@@ -180,11 +179,17 @@ class Cnvrg::Helpers::Agent
|
|
180
179
|
end
|
181
180
|
end
|
182
181
|
@exit_status = $?.exitstatus
|
182
|
+
rescue NoMethodError => e
|
183
|
+
log_internal("No Method Error: #{e}", level: LogLevel::ERROR)
|
184
|
+
@exit_status = 129
|
183
185
|
rescue Timeout::Error
|
184
186
|
Process.kill(0, @pid)
|
185
187
|
@errors << {log: "Command timed out!", timestamp: Time.now}
|
186
188
|
log_internal("Command timed out!", level: LogLevel::ERROR)
|
187
189
|
@exit_status = 124
|
190
|
+
rescue => e
|
191
|
+
log_internal("Error: #{e}", level: LogLevel::ERROR)
|
192
|
+
@exit_status = 129
|
188
193
|
ensure
|
189
194
|
retry_command if @retries != 0 and @exit_status !=0
|
190
195
|
@exit_status
|
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
|
|
89
89
|
while agent_id.blank? or main_id.blank?
|
90
90
|
grep_by = @job_id
|
91
91
|
grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
|
92
|
-
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
92
|
+
cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
|
93
93
|
agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
|
94
94
|
main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
|
95
95
|
sleep(2)
|
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
|
|
168
168
|
while !success and retries < 100
|
169
169
|
begin
|
170
170
|
resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
|
171
|
+
if !resp
|
172
|
+
raise StandardError.new("Failed to send request to server")
|
173
|
+
end
|
171
174
|
machine_activity = resp["machine_activity"]
|
172
175
|
success = true
|
173
176
|
puts("Connected to server")
|
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
|
|
216
219
|
def wait_for_main
|
217
220
|
copy_file_to_main
|
218
221
|
start_tiny_if_missing
|
222
|
+
retries = 0
|
219
223
|
puts("Waiting for main container")
|
220
224
|
STDOUT.flush
|
221
225
|
got_response = false
|
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
|
|
233
237
|
got_response = true
|
234
238
|
end
|
235
239
|
rescue => e
|
236
|
-
|
237
|
-
|
238
|
-
|
240
|
+
retries += 1
|
241
|
+
if retries > 3
|
242
|
+
puts("Failed to connect to main")
|
243
|
+
puts(e.message)
|
244
|
+
STDOUT.flush
|
245
|
+
end
|
239
246
|
sleep(0.1)
|
240
247
|
next
|
241
248
|
end
|
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
|
|
265
272
|
end
|
266
273
|
|
267
274
|
def execute_cmds
|
268
|
-
|
275
|
+
pids_by_slug = {}
|
269
276
|
while true
|
270
277
|
if @commands_q.empty?
|
271
278
|
sleep(5)
|
272
279
|
next
|
273
280
|
end
|
274
281
|
cmd = @commands_q.pop.symbolize_keys
|
282
|
+
|
283
|
+
if cmd[:wait_slug].present?
|
284
|
+
if pids_by_slug[cmd[:wait_slug]].present?
|
285
|
+
other_pid = pids_by_slug[cmd[:wait_slug]]
|
286
|
+
begin
|
287
|
+
Process.waitpid(other_pid, Process::WNOHANG)
|
288
|
+
running = true
|
289
|
+
rescue Errno::ECHILD => e
|
290
|
+
running = false
|
291
|
+
end
|
292
|
+
if running
|
293
|
+
@commands_q.push(cmd)
|
294
|
+
sleep(5)
|
295
|
+
next
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
275
299
|
command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
|
276
300
|
|
277
301
|
cmd_status = command_json["status"] rescue ""
|
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
|
|
288
312
|
else
|
289
313
|
Process.detach(pid)
|
290
314
|
end
|
291
|
-
|
315
|
+
pids_by_slug[cmd[:slug]] = pid
|
292
316
|
######
|
293
317
|
end
|
294
|
-
pids
|
295
318
|
end
|
296
319
|
|
297
320
|
def merge_log_block(logs)
|
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
|
|
303
326
|
pod_name = `hostname`.strip rescue nil
|
304
327
|
node_name = nil
|
305
328
|
if pod_name.present?
|
306
|
-
pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
|
329
|
+
pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
|
307
330
|
pod_describe = JSON.parse(pod_describe) rescue {}
|
308
331
|
node_name = pod_describe["spec"]["nodeName"] rescue nil
|
309
332
|
end
|
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
|
|
366
389
|
conn.options.open_timeout = open_timeout
|
367
390
|
conn
|
368
391
|
end
|
369
|
-
end
|
392
|
+
end
|
data/lib/cnvrg/project.rb
CHANGED
@@ -829,12 +829,12 @@ module Cnvrg
|
|
829
829
|
Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/set_started", "POST", {job_type: job_type, job_id: job_id})
|
830
830
|
end
|
831
831
|
|
832
|
-
def self.stop_if_project_present(project_home, project_name)
|
832
|
+
def self.stop_if_project_present(project_home, project_name, owner)
|
833
833
|
cli = Cnvrg::CLI.new()
|
834
834
|
config = YAML.load_file(project_home + "/.cnvrg/config.yml")
|
835
835
|
local_commit = YAML.load_file(project_home + "/.cnvrg/idx.yml")[:commit] rescue nil
|
836
836
|
return if local_commit.blank?
|
837
|
-
if config[:project_name] == project_name
|
837
|
+
if config[:project_name] == project_name && config[:owner] == owner
|
838
838
|
cli.log_message("Project already present, clone aborted")
|
839
839
|
exit(0)
|
840
840
|
end
|
data/lib/cnvrg/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-08-09 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|