cnvrg 2.0.18 → 2.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30e82404c79b7780d736c0f4996ab1173cbbbfbc13a68c58a0dc0364775c7d5d
4
- data.tar.gz: 6a846acb27781403e27c39ffd36f2b2149b771307b49375356cabd8f769fafba
3
+ metadata.gz: ffe47307abd2feac46497f34ceec95fab9a866324a235db3ab6f0c61129a7a9d
4
+ data.tar.gz: cd8316866861c8b16ec4a4d1d001d8da20f79e34ab95ea38a850a8bce79d6d66
5
5
  SHA512:
6
- metadata.gz: af34ebd4026b57c3111686f066f5c88cda3851ba27fe13a8273848274b614ba088d055ba1334c85842aea1142a113701e1de63ddf0ca5dc141c891762678dcac
7
- data.tar.gz: 79fe82877378207d25a7ab4a7f73414fde0d90a5df128e4c1bd513a3f441822e5a67f9d8f04986b266ee111a66084875008a862926e285f94960cb2a90b49198
6
+ metadata.gz: 9b73987d1023e4aa2600ca11555078194398e7e10cb1d0314569aac8ab24a7385a41c4e86e70a169a4502f122e56baa1bde68ea5cb13706c666996c7a92b5244
7
+ data.tar.gz: 8dbd96c0b77e8c88d52254d04af38efa20fcab1c003027c8ee4b8e5918dc01ff35ae35505ec5d36687183925847438d5cb49eb8063a2db2ed2784a13e09900d3
data/Readme.md CHANGED
@@ -74,4 +74,25 @@
74
74
  * DEV-10581 - Bug: CLI - getting 404 response in "cnvrg set_default_owner"
75
75
  ## Version v2.0.18
76
76
  2022-01-31
77
- * DEV-12637 - Bug: Dataset - creating file from CLI/SDK in a folder with + sign, replaces + with space AND creates 2 folders
77
+ * DEV-12637 - Bug: Dataset - creating file from CLI/SDK in a folder with + sign, replaces + with space AND creates 2 folders
78
+ ## Version v2.0.19
79
+ 2022-02-22
80
+ * DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
81
+ ## Version v2.0.20
82
+ 2022-02-27
83
+ * DEV-12288 - Bug: wrong error message when upload fails
84
+ ## Version v2.1.1
85
+ 2022-05-01
86
+ ## Version v2.1.2
87
+ 2022-05-08
88
+ * DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
89
+ ## Version v2.1.3
90
+ 2022-05-16
91
+ * DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
92
+ ## Version v2.1.4
93
+ 2022-05-22
94
+ * DEV-14182 - Bug: Cli - hide 'data upload' command
95
+ ## Version v2.1.5
96
+ 2022-07-31
97
+ * DEV-14244 - Bug: CLI - "failed to upload ongoing stats" due to NaN in float
98
+ * DEV-14633 - Bug: End sync did not complete, causing the experiment to get stuck in "terminating"
data/lib/cnvrg/cli.rb CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
1008
1008
  abs_path = dataset_home + "/" + relative_path_dir
1009
1009
  abs_path = dataset_home if flatten
1010
1010
  fullpath = abs_path + "/" + file_name
1011
+ fullpath = fullpath.gsub("//", "/")
1011
1012
 
1012
1013
  begin
1013
1014
  FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
@@ -1018,14 +1019,14 @@ module Cnvrg
1018
1019
  begin
1019
1020
  unless File.exist?(fullpath)
1020
1021
  downloader.safe_operation("#{abs_path}/#{file_name}") do
1021
- File.open(fullpath, "w") { |file| file.write open(f["url"]).read }
1022
+ download = open(f["url"])
1023
+ IO.copy_stream(download, fullpath)
1022
1024
  end
1023
1025
  end
1024
1026
  rescue => e
1025
1027
  log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
1026
1028
  exit(1)
1027
1029
  end
1028
-
1029
1030
  end
1030
1031
  #@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
1031
1032
  rescue Interrupt
@@ -2061,6 +2062,8 @@ module Cnvrg
2061
2062
  method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
2062
2063
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2063
2064
  def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
2065
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2066
+ return
2064
2067
  verify_logged_in(true)
2065
2068
  log_start(__method__, args, options)
2066
2069
  log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
@@ -2085,6 +2088,8 @@ module Cnvrg
2085
2088
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2086
2089
 
2087
2090
  def upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:nil, total_deleted: 0, total_downloaded: 0)
2091
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2092
+ return
2088
2093
  begin
2089
2094
  commit, files_list = invoke :start_commit_data,[], :new_branch=> new_branch, :direct=>false, :force =>force, :chunk_size => chunk_size, :message => message
2090
2095
  files_to_upload, upload_errors = invoke :upload_data_files,[commit, files_list: files_list],:new_branch=>new_branch, :verbose =>verbose, :force =>force, :sync =>sync, :chunk_size => chunk_size
@@ -2377,7 +2382,11 @@ module Cnvrg
2377
2382
  if ignore.nil? or ignore.empty?
2378
2383
  ignore = ignore_list
2379
2384
  end
2380
- data_ignore = data_dir_include()
2385
+
2386
+ if job_type != "Experiment"
2387
+ data_ignore = data_dir_include()
2388
+ end
2389
+
2381
2390
  if !data_ignore.nil?
2382
2391
  if ignore.nil? or ignore.empty?
2383
2392
  ignore = data_ignore
@@ -3412,9 +3421,6 @@ module Cnvrg
3412
3421
  end
3413
3422
 
3414
3423
  end_commit = @project.last_local_commit
3415
- if end_commit.present?
3416
- @exp.job_log(["Experiment end commit: #{end_commit}"])
3417
- end
3418
3424
 
3419
3425
  # log_thread.join
3420
3426
  stats_thread.join if docker_stats
@@ -4727,8 +4733,14 @@ module Cnvrg
4727
4733
  end
4728
4734
  end
4729
4735
  else
4730
- timestamp, value = data_result&.first&.dig('value')
4731
- stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
4736
+ begin
4737
+ timestamp, value = data_result&.first&.dig('value')
4738
+ stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
4739
+ rescue => e
4740
+ Cnvrg::Logger.log_info("Failed converting string into float with error: #{e.message}")
4741
+ Cnvrg::Logger.log_error(e)
4742
+ stat_value = 0
4743
+ end
4732
4744
  stat_value = stat_value.to_i == stat_value.to_f ? stat_value.to_i : stat_value.to_f # converting 34.00 to 34
4733
4745
  if query_name.include? 'block'
4734
4746
  stats['block_io'] = {} if stats['block_io'].blank?
@@ -5009,10 +5021,10 @@ module Cnvrg
5009
5021
  else
5010
5022
  log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
5011
5023
  success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
5012
- if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
5024
+ if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
5013
5025
  log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
5014
- num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
5015
- copied_commits << exp["last_successful_commit"]
5026
+ num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
5027
+ copied_commits << exp["last_successful_commit"]["sha1"]
5016
5028
  end
5017
5029
  end
5018
5030
 
data/lib/cnvrg/data.rb CHANGED
@@ -81,7 +81,7 @@ module Cnvrg
81
81
  end
82
82
  end
83
83
 
84
- desc "data upload", "Upload files from local dataset directory to remote server"
84
+ desc "data upload", "Upload files from local dataset directory to remote server", :hide => true
85
85
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
86
86
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
87
87
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
@@ -101,7 +101,7 @@ module Cnvrg
101
101
  message = options["message"]
102
102
  cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
103
103
  end
104
- desc 'data sync', 'Synchronise local dataset directory with remote server'
104
+ desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
105
105
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
106
106
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
107
107
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
@@ -1393,13 +1393,13 @@ module Cnvrg
1393
1393
  in_threads: threads,
1394
1394
  isolation: true
1395
1395
  }
1396
+
1396
1397
  Parallel.map(files["keys"], parallel_options) do |f|
1397
1398
  begin
1398
1399
  file_path = f['name']
1399
1400
  file_path = File.basename(f['name']) if flatten
1400
1401
  local_path = @dataset.local_path + '/' + file_path
1401
1402
  Cnvrg::Logger.log_info("Downloading #{local_path}")
1402
- progressbar.progress += 1 if progressbar.present?
1403
1403
  if local_path.end_with? "/"
1404
1404
  @downloader.mkdir(local_path, recursive: true)
1405
1405
  next
@@ -1420,6 +1420,7 @@ module Cnvrg
1420
1420
  end
1421
1421
 
1422
1422
  resp = @downloader.safe_download(storage_path, local_path)
1423
+ progressbar.progress += 1 if progressbar.present?
1423
1424
  Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
1424
1425
  rescue => e
1425
1426
  Cnvrg::Logger.log_error(e)
@@ -1,4 +1,6 @@
1
+ require 'open-uri'
1
2
  require 'azure/storage/blob'
3
+ require 'azure/storage/common/core'
2
4
 
3
5
  module Cnvrg
4
6
  module Downloader
@@ -13,10 +15,27 @@ module Cnvrg
13
15
 
14
16
  def download(storage_path, local_path, decrypt: true)
15
17
  prepare_download(local_path)
18
+
16
19
  storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
17
- blob, content = client.get_blob(@container, storage_path)
18
- ::File.open(local_path, 'wb') {|f| f.write(content)}
19
- blob
20
+
21
+ # We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
22
+ uri = client.send(:blob_uri, @container, storage_path)
23
+
24
+ generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
25
+
26
+ expiring_url = generator.signed_uri(
27
+ uri,
28
+ false,
29
+ service: 'b',
30
+ resource: 'b',
31
+ permissions: 'r',
32
+ start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
33
+ expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
34
+ )
35
+
36
+ # Stream the file without loading it all into memory
37
+ download = open(expiring_url)
38
+ IO.copy_stream(download, local_path)
20
39
  end
21
40
 
22
41
  def upload(storage_path, local_path)
data/lib/cnvrg/files.rb CHANGED
@@ -106,7 +106,7 @@ module Cnvrg
106
106
  commit: commit_sha1
107
107
  })
108
108
  unless Cnvrg::CLI.is_response_success(resp, false)
109
- raise StandardError.new("unsupported character: folder name can not include / \\ * : ? \" | ")
109
+ raise StandardError.new("Cant upload files to the server")
110
110
  end
111
111
  # resolve bucket
112
112
  res = resp['result']
@@ -59,7 +59,6 @@ class Cnvrg::Helpers::Agent
59
59
  not File.exists? file
60
60
  end
61
61
  return true if file_doesnt_exists.blank?
62
- log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
63
62
  return false
64
63
  end
65
64
  true
@@ -180,11 +179,17 @@ class Cnvrg::Helpers::Agent
180
179
  end
181
180
  end
182
181
  @exit_status = $?.exitstatus
182
+ rescue NoMethodError => e
183
+ log_internal("No Method Error: #{e}", level: LogLevel::ERROR)
184
+ @exit_status = 129
183
185
  rescue Timeout::Error
184
186
  Process.kill(0, @pid)
185
187
  @errors << {log: "Command timed out!", timestamp: Time.now}
186
188
  log_internal("Command timed out!", level: LogLevel::ERROR)
187
189
  @exit_status = 124
190
+ rescue => e
191
+ log_internal("Error: #{e}", level: LogLevel::ERROR)
192
+ @exit_status = 129
188
193
  ensure
189
194
  retry_command if @retries != 0 and @exit_status !=0
190
195
  @exit_status
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
89
89
  while agent_id.blank? or main_id.blank?
90
90
  grep_by = @job_id
91
91
  grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
92
- cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
92
+ cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
93
93
  agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
94
94
  main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
95
95
  sleep(2)
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
168
168
  while !success and retries < 100
169
169
  begin
170
170
  resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
171
+ if !resp
172
+ raise StandardError.new("Failed to send request to server")
173
+ end
171
174
  machine_activity = resp["machine_activity"]
172
175
  success = true
173
176
  puts("Connected to server")
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
216
219
  def wait_for_main
217
220
  copy_file_to_main
218
221
  start_tiny_if_missing
222
+ retries = 0
219
223
  puts("Waiting for main container")
220
224
  STDOUT.flush
221
225
  got_response = false
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
233
237
  got_response = true
234
238
  end
235
239
  rescue => e
236
- puts("Failed to connect to main")
237
- puts(e)
238
- STDOUT.flush
240
+ retries += 1
241
+ if retries > 3
242
+ puts("Failed to connect to main")
243
+ puts(e.message)
244
+ STDOUT.flush
245
+ end
239
246
  sleep(0.1)
240
247
  next
241
248
  end
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
265
272
  end
266
273
 
267
274
  def execute_cmds
268
- pids = []
275
+ pids_by_slug = {}
269
276
  while true
270
277
  if @commands_q.empty?
271
278
  sleep(5)
272
279
  next
273
280
  end
274
281
  cmd = @commands_q.pop.symbolize_keys
282
+
283
+ if cmd[:wait_slug].present?
284
+ if pids_by_slug[cmd[:wait_slug]].present?
285
+ other_pid = pids_by_slug[cmd[:wait_slug]]
286
+ begin
287
+ Process.waitpid(other_pid, Process::WNOHANG)
288
+ running = true
289
+ rescue Errno::ECHILD => e
290
+ running = false
291
+ end
292
+ if running
293
+ @commands_q.push(cmd)
294
+ sleep(5)
295
+ next
296
+ end
297
+ end
298
+ end
275
299
  command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
276
300
 
277
301
  cmd_status = command_json["status"] rescue ""
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
288
312
  else
289
313
  Process.detach(pid)
290
314
  end
291
- pids << pid
315
+ pids_by_slug[cmd[:slug]] = pid
292
316
  ######
293
317
  end
294
- pids
295
318
  end
296
319
 
297
320
  def merge_log_block(logs)
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
303
326
  pod_name = `hostname`.strip rescue nil
304
327
  node_name = nil
305
328
  if pod_name.present?
306
- pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
329
+ pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
307
330
  pod_describe = JSON.parse(pod_describe) rescue {}
308
331
  node_name = pod_describe["spec"]["nodeName"] rescue nil
309
332
  end
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
366
389
  conn.options.open_timeout = open_timeout
367
390
  conn
368
391
  end
369
- end
392
+ end
data/lib/cnvrg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cnvrg
2
- VERSION = '2.0.18'
2
+ VERSION = '2.1.5'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.18
4
+ version: 2.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-01-31 00:00:00.000000000 Z
13
+ date: 2022-08-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler