cnvrg 2.0.20 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d5e7aa6c49c6bbe9ce99dcc85f90825d67ef8a4d066c84dd5978da99afb116b
4
- data.tar.gz: 81e5d10c09beddd7da049ac29d2bd71fc611f3c2ca5bd4b2e2431457573b683d
3
+ metadata.gz: aa682e9cf25c1d37b533721888ded60b3ebc397717fc904dcd7703a592dd8853
4
+ data.tar.gz: bba09312bac1e44ec8b64358dead4036e173832cb605e56803bc368b8eff6dbc
5
5
  SHA512:
6
- metadata.gz: c338755e158c6e1c03dc84c16f3015c1801875d98e58ad788da47b5cf2374b06fa62445279ddd9585b9e2dd3a62bee381af968f1e5925c2b74a9eedf0652b17b
7
- data.tar.gz: 674fe36d75e00c70919067c2539a85c3c56c6850852faf00781f8419a3a3a1dabb3d925be56316a9d20903a869300b761a893f01a0cea94eef6703f99c34d5a8
6
+ metadata.gz: d8144e807411faa6c507c037309abd5e4919b317253147b6bb1eec8eaab2614ad84d32823bfc9dde405f881cefe4be9d975dd2dc21e47392c8416ddd0ed6c253
7
+ data.tar.gz: 46aebd82a4901774d605f40c26aee7607a0a3c407ddf1c3cc4f91625f678e5f2f0bcb8e53c92b3c5812150a6b4b98f491e3c21adace85196d82314e35ef7646e
data/Readme.md CHANGED
@@ -80,4 +80,12 @@
80
80
  * DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
81
81
  ## Version v2.0.20
82
82
  2022-02-27
83
- * DEV-12288 - Bug: wrong error message when upload fails
83
+ * DEV-12288 - Bug: wrong error message when upload fails
84
+ ## Version v2.1.1
85
+ 2022-05-01
86
+ ## Version v2.1.2
87
+ 2022-05-08
88
+ * DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
89
+ ## Version v2.1.3
90
+ 2022-05-16
91
+ * DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
data/lib/cnvrg/cli.rb CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
1008
1008
  abs_path = dataset_home + "/" + relative_path_dir
1009
1009
  abs_path = dataset_home if flatten
1010
1010
  fullpath = abs_path + "/" + file_name
1011
+ fullpath = fullpath.gsub("//", "/")
1011
1012
 
1012
1013
  begin
1013
1014
  FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
@@ -1018,14 +1019,14 @@ module Cnvrg
1018
1019
  begin
1019
1020
  unless File.exist?(fullpath)
1020
1021
  downloader.safe_operation("#{abs_path}/#{file_name}") do
1021
- File.open(fullpath, "w") { |file| file.write open(f["url"]).read }
1022
+ download = open(f["url"])
1023
+ IO.copy_stream(download, fullpath)
1022
1024
  end
1023
1025
  end
1024
1026
  rescue => e
1025
1027
  log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
1026
1028
  exit(1)
1027
1029
  end
1028
-
1029
1030
  end
1030
1031
  #@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
1031
1032
  rescue Interrupt
@@ -2061,6 +2062,8 @@ module Cnvrg
2061
2062
  method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
2062
2063
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2063
2064
  def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
2065
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2066
+ return
2064
2067
  verify_logged_in(true)
2065
2068
  log_start(__method__, args, options)
2066
2069
  log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
@@ -3416,9 +3419,6 @@ module Cnvrg
3416
3419
  end
3417
3420
 
3418
3421
  end_commit = @project.last_local_commit
3419
- if end_commit.present?
3420
- @exp.job_log(["Experiment end commit: #{end_commit}"])
3421
- end
3422
3422
 
3423
3423
  # log_thread.join
3424
3424
  stats_thread.join if docker_stats
@@ -5013,10 +5013,10 @@ module Cnvrg
5013
5013
  else
5014
5014
  log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
5015
5015
  success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
5016
- if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
5016
+ if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
5017
5017
  log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
5018
- num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
5019
- copied_commits << exp["last_successful_commit"]
5018
+ num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
5019
+ copied_commits << exp["last_successful_commit"]["sha1"]
5020
5020
  end
5021
5021
  end
5022
5022
 
data/lib/cnvrg/data.rb CHANGED
@@ -101,7 +101,7 @@ module Cnvrg
101
101
  message = options["message"]
102
102
  cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
103
103
  end
104
- desc 'data sync', 'Synchronise local dataset directory with remote server'
104
+ desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
105
105
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
106
106
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
107
107
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
@@ -1393,13 +1393,13 @@ module Cnvrg
1393
1393
  in_threads: threads,
1394
1394
  isolation: true
1395
1395
  }
1396
+
1396
1397
  Parallel.map(files["keys"], parallel_options) do |f|
1397
1398
  begin
1398
1399
  file_path = f['name']
1399
1400
  file_path = File.basename(f['name']) if flatten
1400
1401
  local_path = @dataset.local_path + '/' + file_path
1401
1402
  Cnvrg::Logger.log_info("Downloading #{local_path}")
1402
- progressbar.progress += 1 if progressbar.present?
1403
1403
  if local_path.end_with? "/"
1404
1404
  @downloader.mkdir(local_path, recursive: true)
1405
1405
  next
@@ -1420,6 +1420,7 @@ module Cnvrg
1420
1420
  end
1421
1421
 
1422
1422
  resp = @downloader.safe_download(storage_path, local_path)
1423
+ progressbar.progress += 1 if progressbar.present?
1423
1424
  Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
1424
1425
  rescue => e
1425
1426
  Cnvrg::Logger.log_error(e)
@@ -1,4 +1,6 @@
1
+ require 'open-uri'
1
2
  require 'azure/storage/blob'
3
+ require 'azure/storage/common/core'
2
4
 
3
5
  module Cnvrg
4
6
  module Downloader
@@ -13,10 +15,27 @@ module Cnvrg
13
15
 
14
16
  def download(storage_path, local_path, decrypt: true)
15
17
  prepare_download(local_path)
18
+
16
19
  storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
17
- blob, content = client.get_blob(@container, storage_path)
18
- ::File.open(local_path, 'wb') {|f| f.write(content)}
19
- blob
20
+
21
+ # We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
22
+ uri = client.send(:blob_uri, @container, storage_path)
23
+
24
+ generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
25
+
26
+ expiring_url = generator.signed_uri(
27
+ uri,
28
+ false,
29
+ service: 'b',
30
+ resource: 'b',
31
+ permissions: 'r',
32
+ start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
33
+ expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
34
+ )
35
+
36
+ # Stream the file without loading it all into memory
37
+ download = open(expiring_url)
38
+ IO.copy_stream(download, local_path)
20
39
  end
21
40
 
22
41
  def upload(storage_path, local_path)
@@ -59,7 +59,6 @@ class Cnvrg::Helpers::Agent
59
59
  not File.exists? file
60
60
  end
61
61
  return true if file_doesnt_exists.blank?
62
- log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
63
62
  return false
64
63
  end
65
64
  true
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
89
89
  while agent_id.blank? or main_id.blank?
90
90
  grep_by = @job_id
91
91
  grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
92
- cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
92
+ cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
93
93
  agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
94
94
  main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
95
95
  sleep(2)
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
168
168
  while !success and retries < 100
169
169
  begin
170
170
  resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
171
+ if !resp
172
+ raise StandardError.new("Failed to send request to server")
173
+ end
171
174
  machine_activity = resp["machine_activity"]
172
175
  success = true
173
176
  puts("Connected to server")
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
216
219
  def wait_for_main
217
220
  copy_file_to_main
218
221
  start_tiny_if_missing
222
+ retries = 0
219
223
  puts("Waiting for main container")
220
224
  STDOUT.flush
221
225
  got_response = false
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
233
237
  got_response = true
234
238
  end
235
239
  rescue => e
236
- puts("Failed to connect to main")
237
- puts(e)
238
- STDOUT.flush
240
+ retries += 1
241
+ if retries > 3
242
+ puts("Failed to connect to main")
243
+ puts(e.message)
244
+ STDOUT.flush
245
+ end
239
246
  sleep(0.1)
240
247
  next
241
248
  end
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
265
272
  end
266
273
 
267
274
  def execute_cmds
268
- pids = []
275
+ pids_by_slug = {}
269
276
  while true
270
277
  if @commands_q.empty?
271
278
  sleep(5)
272
279
  next
273
280
  end
274
281
  cmd = @commands_q.pop.symbolize_keys
282
+
283
+ if cmd[:wait_slug].present?
284
+ if pids_by_slug[cmd[:wait_slug]].present?
285
+ other_pid = pids_by_slug[cmd[:wait_slug]]
286
+ begin
287
+ Process.waitpid(other_pid, Process::WNOHANG)
288
+ running = true
289
+ rescue Errno::ECHILD => e
290
+ running = false
291
+ end
292
+ if running
293
+ @commands_q.push(cmd)
294
+ sleep(5)
295
+ next
296
+ end
297
+ end
298
+ end
275
299
  command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
276
300
 
277
301
  cmd_status = command_json["status"] rescue ""
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
288
312
  else
289
313
  Process.detach(pid)
290
314
  end
291
- pids << pid
315
+ pids_by_slug[cmd[:slug]] = pid
292
316
  ######
293
317
  end
294
- pids
295
318
  end
296
319
 
297
320
  def merge_log_block(logs)
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
303
326
  pod_name = `hostname`.strip rescue nil
304
327
  node_name = nil
305
328
  if pod_name.present?
306
- pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
329
+ pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
307
330
  pod_describe = JSON.parse(pod_describe) rescue {}
308
331
  node_name = pod_describe["spec"]["nodeName"] rescue nil
309
332
  end
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
366
389
  conn.options.open_timeout = open_timeout
367
390
  conn
368
391
  end
369
- end
392
+ end
data/lib/cnvrg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cnvrg
2
- VERSION = '2.0.20'
2
+ VERSION = '2.1.3'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.20
4
+ version: 2.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-03-02 00:00:00.000000000 Z
13
+ date: 2022-05-16 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler