cnvrg 2.0.20 → 2.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d5e7aa6c49c6bbe9ce99dcc85f90825d67ef8a4d066c84dd5978da99afb116b
4
- data.tar.gz: 81e5d10c09beddd7da049ac29d2bd71fc611f3c2ca5bd4b2e2431457573b683d
3
+ metadata.gz: 296eba8c8dab87e1a16b7980c1e80b013be95af52cfc184a12cf366f676a3a2b
4
+ data.tar.gz: '08ad62abd898bb6bb1a9099237f5ebd854f87359ecc571036ce351b259127d78'
5
5
  SHA512:
6
- metadata.gz: c338755e158c6e1c03dc84c16f3015c1801875d98e58ad788da47b5cf2374b06fa62445279ddd9585b9e2dd3a62bee381af968f1e5925c2b74a9eedf0652b17b
7
- data.tar.gz: 674fe36d75e00c70919067c2539a85c3c56c6850852faf00781f8419a3a3a1dabb3d925be56316a9d20903a869300b761a893f01a0cea94eef6703f99c34d5a8
6
+ metadata.gz: f5174c705ed765c76538401ea14ea2c14e267d98836b3d8e8b66a72d31d4f7ee2eeb313fbd080cd15a513bb4149f907aef075be2d7f6bab4b96afefb4ba5f341
7
+ data.tar.gz: 355b440e9a009571e2f097599d14418fcdbe0a79cfaae8473637722745923f8b138b4fdbc0e3ee60107122835f98468811661b770ffee4008f8b141250361fce
data/Readme.md CHANGED
@@ -80,4 +80,22 @@
80
80
  * DEV-13271 - Bug: CLI - on upload folders in working dir containing .cnvrg, dir not uploading - dir is on .cnvrgignore
81
81
  ## Version v2.0.20
82
82
  2022-02-27
83
- * DEV-12288 - Bug: wrong error message when upload fails
83
+ * DEV-12288 - Bug: wrong error message when upload fails
84
+ ## Version v2.1.1
85
+ 2022-05-01
86
+ ## Version v2.1.2
87
+ 2022-05-08
88
+ * DEV-13815 - Bug: CLI - remove "cnvrg data sync" command
89
+ ## Version v2.1.3
90
+ 2022-05-16
91
+ * DEV-13981 - Bug: CLI - dataset query clone stuck at 50% then "Killed"
92
+ ## Version v2.1.4
93
+ 2022-05-22
94
+ * DEV-14182 - Bug: Cli - hide 'data upload' command
95
+ ## Version v2.1.5
96
+ 2022-07-31
97
+ * DEV-14244 - Bug: CLI - "failed to upload ongoing stats" due to NaN in float
98
+ * DEV-14633 - Bug: End sync did not complete, causing the experiment to get stuck in "terminating"
99
+ ## Version v2.1.6
100
+ 2022-08-09
101
+ * DEV-14682 - Bug: git-Walki: CLI/SDK experiments goes into debug mode for Github+SSH integrated projects
data/lib/cnvrg/cli.rb CHANGED
@@ -1008,6 +1008,7 @@ module Cnvrg
1008
1008
  abs_path = dataset_home + "/" + relative_path_dir
1009
1009
  abs_path = dataset_home if flatten
1010
1010
  fullpath = abs_path + "/" + file_name
1011
+ fullpath = fullpath.gsub("//", "/")
1011
1012
 
1012
1013
  begin
1013
1014
  FileUtils.mkdir_p(abs_path) unless File.exist? (fullpath)
@@ -1018,14 +1019,14 @@ module Cnvrg
1018
1019
  begin
1019
1020
  unless File.exist?(fullpath)
1020
1021
  downloader.safe_operation("#{abs_path}/#{file_name}") do
1021
- File.open(fullpath, "w") { |file| file.write open(f["url"]).read }
1022
+ download = open(f["url"])
1023
+ IO.copy_stream(download, fullpath)
1022
1024
  end
1023
1025
  end
1024
1026
  rescue => e
1025
1027
  log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
1026
1028
  exit(1)
1027
1029
  end
1028
-
1029
1030
  end
1030
1031
  #@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
1031
1032
  rescue Interrupt
@@ -1847,7 +1848,7 @@ module Cnvrg
1847
1848
  log_start(__method__, args, options)
1848
1849
  project_home = Dir.pwd
1849
1850
  soft = options["soft"] || false
1850
- Project.stop_if_project_present(project_home, slug) if soft
1851
+ Project.stop_if_project_present(project_home, slug, owner) if soft
1851
1852
  clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
1852
1853
  exit 1 if not clone_resp
1853
1854
  idx_status = Project.new(get_project_home).generate_idx(files:[])
@@ -1917,7 +1918,7 @@ module Cnvrg
1917
1918
  clone_resp = false
1918
1919
  project_home = Dir.pwd
1919
1920
 
1920
- Project.stop_if_project_present(project_home, project_name) if soft
1921
+ Project.stop_if_project_present(project_home, project_name, owner) if soft
1921
1922
 
1922
1923
  if remote and !git
1923
1924
  clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
@@ -2061,6 +2062,8 @@ module Cnvrg
2061
2062
  method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
2062
2063
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2063
2064
  def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
2065
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2066
+ return
2064
2067
  verify_logged_in(true)
2065
2068
  log_start(__method__, args, options)
2066
2069
  log_message('Syncing dataset', Thor::Shell::Color::BLUE, !options["verbose"])
@@ -2085,6 +2088,8 @@ module Cnvrg
2085
2088
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
2086
2089
 
2087
2090
  def upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:nil, total_deleted: 0, total_downloaded: 0)
2091
+ log_message("This method is deprecated, please use 'data put' instead. for more info visit our docs: https://app.cnvrg.io/docs/cli/install.html#upload-files-to-a-dataset", Thor::Shell::Color::BLUE, !options["verbose"])
2092
+ return
2088
2093
  begin
2089
2094
  commit, files_list = invoke :start_commit_data,[], :new_branch=> new_branch, :direct=>false, :force =>force, :chunk_size => chunk_size, :message => message
2090
2095
  files_to_upload, upload_errors = invoke :upload_data_files,[commit, files_list: files_list],:new_branch=>new_branch, :verbose =>verbose, :force =>force, :sync =>sync, :chunk_size => chunk_size
@@ -3416,9 +3421,6 @@ module Cnvrg
3416
3421
  end
3417
3422
 
3418
3423
  end_commit = @project.last_local_commit
3419
- if end_commit.present?
3420
- @exp.job_log(["Experiment end commit: #{end_commit}"])
3421
- end
3422
3424
 
3423
3425
  # log_thread.join
3424
3426
  stats_thread.join if docker_stats
@@ -4731,8 +4733,14 @@ module Cnvrg
4731
4733
  end
4732
4734
  end
4733
4735
  else
4734
- timestamp, value = data_result&.first&.dig('value')
4735
- stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
4736
+ begin
4737
+ timestamp, value = data_result&.first&.dig('value')
4738
+ stat_value = value.present? ? ("%.2f" % value) : 0 # converting 34.685929244444445 to 34.69
4739
+ rescue => e
4740
+ Cnvrg::Logger.log_info("Failed converting string into float with error: #{e.message}")
4741
+ Cnvrg::Logger.log_error(e)
4742
+ stat_value = 0
4743
+ end
4736
4744
  stat_value = stat_value.to_i == stat_value.to_f ? stat_value.to_i : stat_value.to_f # converting 34.00 to 34
4737
4745
  if query_name.include? 'block'
4738
4746
  stats['block_io'] = {} if stats['block_io'].blank?
@@ -5013,10 +5021,10 @@ module Cnvrg
5013
5021
  else
5014
5022
  log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
5015
5023
  success, num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
5016
- if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
5024
+ if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"]["sha1"])
5017
5025
  log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
5018
- num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
5019
- copied_commits << exp["last_successful_commit"]
5026
+ num_of_new_files = Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"]["sha1"])
5027
+ copied_commits << exp["last_successful_commit"]["sha1"]
5020
5028
  end
5021
5029
  end
5022
5030
 
data/lib/cnvrg/data.rb CHANGED
@@ -81,7 +81,7 @@ module Cnvrg
81
81
  end
82
82
  end
83
83
 
84
- desc "data upload", "Upload files from local dataset directory to remote server"
84
+ desc "data upload", "Upload files from local dataset directory to remote server", :hide => true
85
85
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
86
86
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
87
87
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
@@ -101,7 +101,7 @@ module Cnvrg
101
101
  message = options["message"]
102
102
  cli.upload_data_new(new_branch, verbose, sync, force, tags, chunk_size, message:message)
103
103
  end
104
- desc 'data sync', 'Synchronise local dataset directory with remote server'
104
+ desc 'data sync', 'Synchronise local dataset directory with remote server', :hide => true
105
105
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
106
106
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
107
107
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
@@ -1393,13 +1393,13 @@ module Cnvrg
1393
1393
  in_threads: threads,
1394
1394
  isolation: true
1395
1395
  }
1396
+
1396
1397
  Parallel.map(files["keys"], parallel_options) do |f|
1397
1398
  begin
1398
1399
  file_path = f['name']
1399
1400
  file_path = File.basename(f['name']) if flatten
1400
1401
  local_path = @dataset.local_path + '/' + file_path
1401
1402
  Cnvrg::Logger.log_info("Downloading #{local_path}")
1402
- progressbar.progress += 1 if progressbar.present?
1403
1403
  if local_path.end_with? "/"
1404
1404
  @downloader.mkdir(local_path, recursive: true)
1405
1405
  next
@@ -1420,6 +1420,7 @@ module Cnvrg
1420
1420
  end
1421
1421
 
1422
1422
  resp = @downloader.safe_download(storage_path, local_path)
1423
+ progressbar.progress += 1 if progressbar.present?
1423
1424
  Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
1424
1425
  rescue => e
1425
1426
  Cnvrg::Logger.log_error(e)
@@ -1,4 +1,6 @@
1
+ require 'open-uri'
1
2
  require 'azure/storage/blob'
3
+ require 'azure/storage/common/core'
2
4
 
3
5
  module Cnvrg
4
6
  module Downloader
@@ -13,10 +15,27 @@ module Cnvrg
13
15
 
14
16
  def download(storage_path, local_path, decrypt: true)
15
17
  prepare_download(local_path)
18
+
16
19
  storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
17
- blob, content = client.get_blob(@container, storage_path)
18
- ::File.open(local_path, 'wb') {|f| f.write(content)}
19
- blob
20
+
21
+ # We generate a temp uri in order to stream the file instead of using "get_blob" that overflows memory
22
+ uri = client.send(:blob_uri, @container, storage_path)
23
+
24
+ generator = Azure::Storage::Common::Core::Auth::SharedAccessSignature.new(@account_name, @access_key)
25
+
26
+ expiring_url = generator.signed_uri(
27
+ uri,
28
+ false,
29
+ service: 'b',
30
+ resource: 'b',
31
+ permissions: 'r',
32
+ start: (Time.now - (5 * 60)).utc.iso8601, # start 5 minutes ago
33
+ expiry: (Time.now + 60 * 60 * 2).utc.iso8601 # expire in 2 hours
34
+ )
35
+
36
+ # Stream the file without loading it all into memory
37
+ download = open(expiring_url)
38
+ IO.copy_stream(download, local_path)
20
39
  end
21
40
 
22
41
  def upload(storage_path, local_path)
@@ -59,7 +59,6 @@ class Cnvrg::Helpers::Agent
59
59
  not File.exists? file
60
60
  end
61
61
  return true if file_doesnt_exists.blank?
62
- log_internal("Can't find file #{file_doesnt_exists}, stopping the job")
63
62
  return false
64
63
  end
65
64
  true
@@ -180,11 +179,17 @@ class Cnvrg::Helpers::Agent
180
179
  end
181
180
  end
182
181
  @exit_status = $?.exitstatus
182
+ rescue NoMethodError => e
183
+ log_internal("No Method Error: #{e}", level: LogLevel::ERROR)
184
+ @exit_status = 129
183
185
  rescue Timeout::Error
184
186
  Process.kill(0, @pid)
185
187
  @errors << {log: "Command timed out!", timestamp: Time.now}
186
188
  log_internal("Command timed out!", level: LogLevel::ERROR)
187
189
  @exit_status = 124
190
+ rescue => e
191
+ log_internal("Error: #{e}", level: LogLevel::ERROR)
192
+ @exit_status = 129
188
193
  ensure
189
194
  retry_command if @retries != 0 and @exit_status !=0
190
195
  @exit_status
@@ -89,7 +89,7 @@ class Cnvrg::Helpers::Executer
89
89
  while agent_id.blank? or main_id.blank?
90
90
  grep_by = @job_id
91
91
  grep_by = "$(hostname)" if ENV['KUBERNETES_PORT'].present?
92
- cntrs = `docker ps --format "table {{.ID}},{{.Names}}" | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
92
+ cntrs = `docker ps --format "table {{.ID}},{{.Names}}" 2> /dev/null | grep -i #{grep_by}`.split("\n").map{|x| x.strip}
93
93
  agent_id = cntrs.find{|container_name| container_name.include? "agent"}.split(",").first rescue nil
94
94
  main_id = cntrs.find{|container_name| container_name.include? @main_name}.split(",").first rescue nil
95
95
  sleep(2)
@@ -168,6 +168,9 @@ class Cnvrg::Helpers::Executer
168
168
  while !success and retries < 100
169
169
  begin
170
170
  resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
171
+ if !resp
172
+ raise StandardError.new("Failed to send request to server")
173
+ end
171
174
  machine_activity = resp["machine_activity"]
172
175
  success = true
173
176
  puts("Connected to server")
@@ -216,6 +219,7 @@ class Cnvrg::Helpers::Executer
216
219
  def wait_for_main
217
220
  copy_file_to_main
218
221
  start_tiny_if_missing
222
+ retries = 0
219
223
  puts("Waiting for main container")
220
224
  STDOUT.flush
221
225
  got_response = false
@@ -233,9 +237,12 @@ class Cnvrg::Helpers::Executer
233
237
  got_response = true
234
238
  end
235
239
  rescue => e
236
- puts("Failed to connect to main")
237
- puts(e)
238
- STDOUT.flush
240
+ retries += 1
241
+ if retries > 3
242
+ puts("Failed to connect to main")
243
+ puts(e.message)
244
+ STDOUT.flush
245
+ end
239
246
  sleep(0.1)
240
247
  next
241
248
  end
@@ -265,13 +272,30 @@ class Cnvrg::Helpers::Executer
265
272
  end
266
273
 
267
274
  def execute_cmds
268
- pids = []
275
+ pids_by_slug = {}
269
276
  while true
270
277
  if @commands_q.empty?
271
278
  sleep(5)
272
279
  next
273
280
  end
274
281
  cmd = @commands_q.pop.symbolize_keys
282
+
283
+ if cmd[:wait_slug].present?
284
+ if pids_by_slug[cmd[:wait_slug]].present?
285
+ other_pid = pids_by_slug[cmd[:wait_slug]]
286
+ begin
287
+ Process.waitpid(other_pid, Process::WNOHANG)
288
+ running = true
289
+ rescue Errno::ECHILD => e
290
+ running = false
291
+ end
292
+ if running
293
+ @commands_q.push(cmd)
294
+ sleep(5)
295
+ next
296
+ end
297
+ end
298
+ end
275
299
  command_json = Cnvrg::API.request([activity_url, "commands", cmd[:slug]].join('/'), "GET")
276
300
 
277
301
  cmd_status = command_json["status"] rescue ""
@@ -288,10 +312,9 @@ class Cnvrg::Helpers::Executer
288
312
  else
289
313
  Process.detach(pid)
290
314
  end
291
- pids << pid
315
+ pids_by_slug[cmd[:slug]] = pid
292
316
  ######
293
317
  end
294
- pids
295
318
  end
296
319
 
297
320
  def merge_log_block(logs)
@@ -303,7 +326,7 @@ class Cnvrg::Helpers::Executer
303
326
  pod_name = `hostname`.strip rescue nil
304
327
  node_name = nil
305
328
  if pod_name.present?
306
- pod_describe = `kubectl get pod #{pod_name} -o json` rescue nil
329
+ pod_describe = `kubectl get pod #{pod_name} -o json 2> /dev/null` rescue nil
307
330
  pod_describe = JSON.parse(pod_describe) rescue {}
308
331
  node_name = pod_describe["spec"]["nodeName"] rescue nil
309
332
  end
@@ -366,4 +389,4 @@ class Cnvrg::Helpers::Executer
366
389
  conn.options.open_timeout = open_timeout
367
390
  conn
368
391
  end
369
- end
392
+ end
data/lib/cnvrg/project.rb CHANGED
@@ -829,12 +829,12 @@ module Cnvrg
829
829
  Cnvrg::API.request("users/#{@owner}/projects/#{@slug}/jobs/#{job_type.underscore}/#{job_id}/set_started", "POST", {job_type: job_type, job_id: job_id})
830
830
  end
831
831
 
832
- def self.stop_if_project_present(project_home, project_name)
832
+ def self.stop_if_project_present(project_home, project_name, owner)
833
833
  cli = Cnvrg::CLI.new()
834
834
  config = YAML.load_file(project_home + "/.cnvrg/config.yml")
835
835
  local_commit = YAML.load_file(project_home + "/.cnvrg/idx.yml")[:commit] rescue nil
836
836
  return if local_commit.blank?
837
- if config[:project_name] == project_name
837
+ if config[:project_name] == project_name && config[:owner] == owner
838
838
  cli.log_message("Project already present, clone aborted")
839
839
  exit(0)
840
840
  end
data/lib/cnvrg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cnvrg
2
- VERSION = '2.0.20'
2
+ VERSION = '2.1.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.20
4
+ version: 2.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-03-02 00:00:00.000000000 Z
13
+ date: 2022-08-09 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler