cnvrg 2.0.20 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d5e7aa6c49c6bbe9ce99dcc85f90825d67ef8a4d066c84dd5978da99afb116b
4
- data.tar.gz: 81e5d10c09beddd7da049ac29d2bd71fc611f3c2ca5bd4b2e2431457573b683d
3
+ metadata.gz: 296eba8c8dab87e1a16b7980c1e80b013be95af52cfc184a12cf366f676a3a2b
4
+ data.tar.gz: '08ad62abd898bb6bb1a9099237f5ebd854f87359ecc571036ce351b259127d78'
5
5
  SHA512:
6
- metadata.gz: c338755e158c6e1c03dc84c16f3015c1801875d98e58ad788da47b5cf2374b06fa62445279ddd9585b9e2dd3a62bee381af968f1e5925c2b74a9eedf0652b17b
7
- data.tar.gz: 674fe36d75e00c70919067c2539a85c3c56c6850852faf00781f8419a3a3a1dabb3d925be56316a9d20903a869300b761a893f01a0cea94eef6703f99c34d5a8
6
+ metadata.gz: f5174c705ed765c76538401ea14ea2c14e267d98836b3d8e8b66a72d31d4f7ee2eeb313fbd080cd15a513bb4149f907aef075be2d7f6bab4b96afefb4ba5f341
7
+ data.tar.gz: 355b440e9a009571e2f097599d14418fcdbe0a79cfaae8473637722745923f8b138b4fdbc0e3ee60107122835f98468811661b770ffee4008f8b141250361fce
data/Readme.md CHANGED
@@ -80,4 +80,22 @@
80
80
  * DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
81
81
  ## Version v2.0.20
82
82
  2022-02-27
83
- * DEV-12288 - Bug: wrong error message when upload fails
83
+ * DEV-12288 - Bug: wrong error message when upload fails
84
+ ## Version v2.1.1
85
+ 2022-05-01
86
+ ## Version v2.1.2
87
+ 2022-05-08
88
+ * DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
89
+ ## Version v2.1.3
90
+ 2022-05-16
91
+ * DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
92
+ ## Version v2.1.4
93
+ 2022-05-22
94
+ * DEV-14182 - Bug: Cli - hide 'data upload' command
95
+ ## Version v2.1.5
96
+ 2022-07-31
97
+ * DEV-14244 - Bug: CLI - "failed to upload ongoing stats" due to NaN in float
98
+ * DEV-14633 - Bug: End sync did not complete, causing the experiment to get stuck in "terminating"
99
+ ## Version v2.1.6
100
+ 2022-08-09
101
+ * DEV-14682 - Bug: git-Walki: CLI/SDK experiments goes into debug mode for Github+SSH integrated projects
data/lib/cnvrg/cli.rb CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
1008
1008
  abs_path = dataset_home + "/" + relative_path_dir
1009
1009
  abs_path = dataset_home if flatten
1010
1010
  fullpath = abs_path + "/" + file_name
1011
+ fullpath = fullpath.gsub("//", "/")
1011
1012
 
1012
1013
  begin
1013
1014
  FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
@@ -1018,14 +1019,14 @@ module Cnvrg
1018
1019
  begin
1019
1020
  unless File.exist?(fullpath)
1020
1021
  downloader.safe_operation("#{abs_path}/#{file_name}") do
1021
- File.open(fullpath, "w") { |file| file.write open(f["url"]).read }
1022
+ download = open(f["url"])
1023
+ IO.copy_stream(download, fullpath)
1022
1024
  end
1023
1025
  end
1024
1026
  rescue => e
1025
1027
  log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
1026
1028
  exit(1)
1027
1029
  end
1028
-
1029
1030
  end
1030
1031
  #@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
1031
1032
  rescue Interrupt
@@ -1847,7 +1848,7 @@ module Cnvrg
1847
1848
  log_start(__method__, args, options)
1848
1849
  project_home = Dir.pwd
1849
1850
  soft = options["soft"] || false
1850
- Project.stop_if_project_present(project_home, slug) if soft
1851
+ Project.stop_if_project_present(project_home, slug, owner) if soft
1851
1852
  clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
1852
1853
  exit 1 if not clone_resp
1853
1854
  idx_status = Project.new(get_project_home).generate_idx(files:[])
@@ -1917,7 +1918,7 @@ module Cnvrg
1917
1918
  clone_resp = false
1918
1919
  project_home = Dir.pwd
1919
1920
 
1920
- Project.stop_if_project_present(project_home, project_name) if soft
1921
+ Project.stop_if_project_present(project_home, project_name, owner) if soft
1921
1922
 
1922
1923
  if remote and !git
1923
1924
  clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
@@ -2061,6 +2062,8 @@ module Cnvrg
2061
2062
  method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
2062
2063
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2063
2064
  def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
2065
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2066
+ return
2064
2067
  verify_logged_in(true)
2065
2068
  log_start(__method__, args, options)
2066
2069
  log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
@@ -2085,6 +2088,8 @@ module Cnvrg
2085
2088
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2086
2089
 
2087
2090
  def upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:nil, total_deleted: 0, total_downloaded: 0)
2091
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2092
+ return
2088
2093
  begin
2089
2094
  commit, files_list = invoke :start_commit_data,[], :new_branch=> new_branch, :direct=>false, :force =>force, :chunk_size => chunk_size, :message => message
2090
2095
  files_to_upload, upload_errors = invoke :upload_data_files,[commit, files_list: files_list],:new_branch=>new_branch, :verbose =>verbose, :force =>force, :sync =>sync, :chunk_size => chunk_size
@@ -3416,9 +3421,6 @@ module Cnvrg
3416
3421
  end
3417
3422
 
3418
3423
  end_commit = @project.last_local_commit
3419
- if end_commit.present?
3420
- @exp.job_log(["Experiment end commit: #{end_commit}"])
3421
- end
3422
3424
 
3423
3425
  # log_thread.join
3424
3426
  stats_thread.join if docker_stats
@@ -4731,8 +4733,14 @@ module Cnvrg
4731
4733
  end
4732
4734
  end
4733
4735
  else
4734
- timestamp, value = data_result&.first&.dig('value')
4735
- stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
4736
+ begin
4737
+ timestamp, value = data_result&.first&.dig('value')
4738
+ stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
4739
+ rescue => e
4740
+ Cnvrg::Logger.log_info("Failed converting string into float with error: #{e.message}")
4741
+ Cnvrg::Logger.log_error(e)
4742
+ stat_value = 0
4743
+ end
4736
4744
  stat_value = stat_value.to_i == stat_value.to_f ? stat_value.to_i : stat_value.to_f # converting 34.00 to 34
4737
4745
  if query_name.include? 'block'
4738
4746
  stats['block_io'] = {} if stats['block_io'].blank?
@@ -5013,10 +5021,10 @@ module Cnvrg
5013
5021
  else
5014
5022
  log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
5015
5023
  success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
5016
- if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
5024
+ if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
5017
5025
  log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
5018
- num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
5019
- copied_commits << exp["last_successful_commit"]
5026
+ num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
5027
+ copied_commits << exp["last_successful_commit"]["sha1"]
5020
5028
  end
5021
5029
  end
5022
5030
 
data/lib/cnvrg/data.rb CHANGED
@@ -81,7 +81,7 @@ module Cnvrg
81
81
  end
82
82
  end
83
83
 
84
- desc "data upload", "Upload files from local dataset directory to remote server"
84
+ desc "data upload", "Upload files from local dataset directory to remote server", :hide => true
85
85
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
86
86
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
87
87
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
@@ -101,7 +101,7 @@ module Cnvrg
101
101
  message = options["message"]
102
102
  cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
103
103
  end
104
- desc 'data sync', 'Synchronise local dataset directory with remote server'
104
+ desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
105
105
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
106
106
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
107
107
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
@@ -1393,13 +1393,13 @@ module Cnvrg
1393
1393
  in_threads: threads,
1394
1394
  isolation: true
1395
1395
  }
1396
+
1396
1397
  Parallel.map(files["keys"], parallel_options) do |f|
1397
1398
  begin
1398
1399
  file_path = f['name']
1399
1400
  file_path = File.basename(f['name']) if flatten
1400
1401
  local_path = @dataset.local_path + '/' + file_path
1401
1402
  Cnvrg::Logger.log_info("Downloading #{local_path}")
1402
- progressbar.progress += 1 if progressbar.present?
1403
1403
  if local_path.end_with? "/"
1404
1404
  @downloader.mkdir(local_path, recursive: true)
1405
1405
  next
@@ -1420,6 +1420,7 @@ module Cnvrg
1420
1420
  end
1421
1421
 
1422
1422
  resp = @downloader.safe_download(storage_path, local_path)
1423
+ progressbar.progress += 1 if progressbar.present?
1423
1424
  Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
1424
1425
  rescue => e
1425
1426
  Cnvrg::Logger.log_error(e)
@@ -1,4 +1,6 @@
1
+ require 'open-uri'
1
2
  require 'azure/storage/blob'
3
+ require 'azure/storage/common/core'
2
4
 
3
5
  module Cnvrg
4
6
  module Downloader
@@ -13,10 +15,27 @@ module Cnvrg
13
15
 
14
16
  def download(storage_path, local_path, decrypt: true)
15
17
  prepare_download(local_path)
18
+
16
19
  storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
17
- blob, content = client.get_blob(@container, storage_path)
18
- ::File.open(local_path, 'wb') {|f| f.write(content)}
19
- blob
20
+
21
+ # We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
22
+ uri = client.send(:blob_uri, @container, storage_path)
23
+
24
+ generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
25
+
26
+ expiring_url = generator.signed_uri(
27
+ uri,
28
+ false,
29
+ service: 'b',
30
+ resource: 'b',
31
+ permissions: 'r',
32
+ start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
33
+ expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
34
+ )
35
+
36
+ # Stream the file without loading it all into memory
37
+ download = open(expiring_url)
38
+ IO.copy_stream(download, local_path)
20
39
  end
21
40
 
22
41
  def upload(storage_path, local_path)
@@ -59,7 +59,6 @@ class Cnvrg::Helpers::Agent
59
59
  not File.exists? file
60
60
  end
61
61
  return true if file_doesnt_exists.blank?
62
- log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
63
62
  return false
64
63
  end
65
64
  true
@@ -180,11 +179,17 @@ class Cnvrg::Helpers::Agent
180
179
  end
181
180
  end
182
181
  @exit_status = $?.exitstatus
182
+ rescue NoMethodError => e
183
+ log_internal("No Method Error: #{e}", level: LogLevel::ERROR)
184
+ @exit_status = 129
183
185
  rescue Timeout::Error
184
186
  Process.kill(0, @pid)
185
187
  @errors << {log: "Command timed out!", timestamp: Time.now}
186
188
  log_internal("Command timed out!", level: LogLevel::ERROR)
187
189
  @exit_status = 124
190
+ rescue => e
191
+ log_internal("Error: #{e}", level: LogLevel::ERROR)
192
+ @exit_status = 129
188
193
  ensure
189
194
  retry_command if @retries != 0 and @exit_status !=0
190
195
  @exit_status
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
89
89
  while agent_id.blank? or main_id.blank?
90
90
  grep_by = @job_id
91
91
  grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
92
- cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
92
+ cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
93
93
  agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
94
94
  main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
95
95
  sleep(2)
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
168
168
  while !success and retries < 100
169
169
  begin
170
170
  resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
171
+ if !resp
172
+ raise StandardError.new("Failed to send request to server")
173
+ end
171
174
  machine_activity = resp["machine_activity"]
172
175
  success = true
173
176
  puts("Connected to server")
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
216
219
  def wait_for_main
217
220
  copy_file_to_main
218
221
  start_tiny_if_missing
222
+ retries = 0
219
223
  puts("Waiting for main container")
220
224
  STDOUT.flush
221
225
  got_response = false
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
233
237
  got_response = true
234
238
  end
235
239
  rescue => e
236
- puts("Failed to connect to main")
237
- puts(e)
238
- STDOUT.flush
240
+ retries += 1
241
+ if retries > 3
242
+ puts("Failed to connect to main")
243
+ puts(e.message)
244
+ STDOUT.flush
245
+ end
239
246
  sleep(0.1)
240
247
  next
241
248
  end
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
265
272
  end
266
273
 
267
274
  def execute_cmds
268
- pids = []
275
+ pids_by_slug = {}
269
276
  while true
270
277
  if @commands_q.empty?
271
278
  sleep(5)
272
279
  next
273
280
  end
274
281
  cmd = @commands_q.pop.symbolize_keys
282
+
283
+ if cmd[:wait_slug].present?
284
+ if pids_by_slug[cmd[:wait_slug]].present?
285
+ other_pid = pids_by_slug[cmd[:wait_slug]]
286
+ begin
287
+ Process.waitpid(other_pid, Process::WNOHANG)
288
+ running = true
289
+ rescue Errno::ECHILD => e
290
+ running = false
291
+ end
292
+ if running
293
+ @commands_q.push(cmd)
294
+ sleep(5)
295
+ next
296
+ end
297
+ end
298
+ end
275
299
  command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
276
300
 
277
301
  cmd_status = command_json["status"] rescue ""
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
288
312
  else
289
313
  Process.detach(pid)
290
314
  end
291
- pids << pid
315
+ pids_by_slug[cmd[:slug]] = pid
292
316
  ######
293
317
  end
294
- pids
295
318
  end
296
319
 
297
320
  def merge_log_block(logs)
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
303
326
  pod_name = `hostname`.strip rescue nil
304
327
  node_name = nil
305
328
  if pod_name.present?
306
- pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
329
+ pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
307
330
  pod_describe = JSON.parse(pod_describe) rescue {}
308
331
  node_name = pod_describe["spec"]["nodeName"] rescue nil
309
332
  end
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
366
389
  conn.options.open_timeout = open_timeout
367
390
  conn
368
391
  end
369
- end
392
+ end
data/lib/cnvrg/project.rb CHANGED
@@ -829,12 +829,12 @@ module Cnvrg
829
829
  Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/set_started", "POST", {job_type: job_type, job_id: job_id})
830
830
  end
831
831
 
832
- def self.stop_if_project_present(project_home, project_name)
832
+ def self.stop_if_project_present(project_home, project_name, owner)
833
833
  cli = Cnvrg::CLI.new()
834
834
  config = YAML.load_file(project_home + "/.cnvrg/config.yml")
835
835
  local_commit = YAML.load_file(project_home + "/.cnvrg/idx.yml")[:commit] rescue nil
836
836
  return if local_commit.blank?
837
- if config[:project_name] == project_name
837
+ if config[:project_name] == project_name && config[:owner] == owner
838
838
  cli.log_message("Project already present, clone aborted")
839
839
  exit(0)
840
840
  end
data/lib/cnvrg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cnvrg
2
- VERSION = '2.0.20'
2
+ VERSION = '2.1.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.20
4
+ version: 2.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-03-02 00:00:00.000000000 Z
13
+ date: 2022-08-09 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler