cnvrg 1.11.21 → 1.11.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc1bc67b5f62516c87e90a9b71d7d4921236eb8c13446b0c8ddac44ef9cf8544
4
- data.tar.gz: '09b88539fc1fb041151d7fb1ec9931d0ddccc6e7cc50f958467d6e9fb976ba5d'
3
+ metadata.gz: a0ea5180b8920a78032ea60f9ccedea54357c3207720ff802b5c323aac6e4773
4
+ data.tar.gz: 54f0e631a050d0232921c0103aba65cd6c0ca48085161ee2137bf3ef975550ea
5
5
  SHA512:
6
- metadata.gz: b7206c5c43fec47c6519e3fa9c798cbef0232ae0d9d70e708499d2312bbd5af802d6fc62e3b02f0f137f6322cd3486848fd9effbcbcca521171062ad22830e27
7
- data.tar.gz: 27905ef2f218a241db92e78533390dc0b3b3fa6aa8242a925efc106eacf43d2adc11fdfc12f978ce1155fa6e1948ab9f98026ca163cb57db3729ee6080b15481
6
+ metadata.gz: 2805218f0460fe9bc1315f489d48fb37ea9795836f379eb5b6ab071c42a10ee76d16decb24c62c518356b35ca58489a9ba7637240c842459ad1ed6a94799633f
7
+ data.tar.gz: ed5bf8a1b5867c2ab6705dede44534074771387c31e8049c12f1cbf5f538c72e6cedbc910378db09ae6f2e1ca23524a54fd4109847c1aa02f4538a341ec4c68b
data/lib/cnvrg/api.rb CHANGED
@@ -31,6 +31,10 @@ module Cnvrg
31
31
  end
32
32
  def self.request(resource, method = 'GET', data = {}, parse_request = true)
33
33
  resource = URI::encode resource
34
+
35
+ # We need to remoe all double slashes from the url to work with the proxy
36
+ resource = resource.gsub(/[\/]{2,}/, "/").gsub("https:/", "https://").gsub("http:/", "http://")
37
+
34
38
  begin
35
39
  n = Netrc.read
36
40
  rescue => e
data/lib/cnvrg/cli.rb CHANGED
@@ -858,7 +858,7 @@ module Cnvrg
858
858
  method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
859
859
  method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
860
860
  method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
861
- def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15)
861
+ def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15, cache_link: false)
862
862
  begin
863
863
  verify_logged_in(false)
864
864
  log_start(__method__, args, options)
@@ -904,7 +904,7 @@ module Cnvrg
904
904
 
905
905
  commit = response["result"]["commit"]
906
906
  files_count = response["result"]["file_count"]
907
- files = @files.get_clone_chunk(commit: commit)
907
+ files = @files.get_clone_chunk(commit: commit, cache_link: cache_link)
908
908
  downloaded_files = 0
909
909
  progressbar = ProgressBar.create(:title => "Download Progress",
910
910
  :progress_mark => '=',
@@ -917,7 +917,7 @@ module Cnvrg
917
917
 
918
918
  while files['keys'].length > 0
919
919
  Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
920
- @files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads)
920
+ @files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads, cache_link: cache_link)
921
921
 
922
922
  downloaded_files += files['keys'].length
923
923
  files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
@@ -1201,15 +1201,18 @@ module Cnvrg
1201
1201
  end
1202
1202
 
1203
1203
  desc '', '', :hide => true
1204
- def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil)
1204
+ def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil, auto_cache: false, external_disk: nil)
1205
1205
  begin
1206
1206
  verify_logged_in(false)
1207
1207
  log_start(__method__, args, options)
1208
-
1208
+ if auto_cache && external_disk.blank?
1209
+ raise SignalException.new(1, "for auto caching external disk is required")
1210
+ end
1209
1211
  owner, slug = get_owner_slug(dataset_url)
1210
1212
  @dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
1211
1213
  @datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
1212
1214
  @files = @datafiles.verify_files_exists(files)
1215
+ @files = @files.uniq { |t| t.gsub('./', '')}
1213
1216
 
1214
1217
  if @files.blank?
1215
1218
  raise SignalException.new(1, "Cant find files to upload, exiting.")
@@ -1227,7 +1230,7 @@ module Cnvrg
1227
1230
  Cnvrg::Logger.info("Put files in latest commit")
1228
1231
  response = @datafiles.last_valid_commit()
1229
1232
  unless response #means we failed in the start commit.
1230
- raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and commitc")
1233
+ raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and commit")
1231
1234
  end
1232
1235
  @commit = response['result']['sha1']
1233
1236
  else
@@ -1253,7 +1256,7 @@ module Cnvrg
1253
1256
  raise SignalException.new(1, res.msg)
1254
1257
  end
1255
1258
  Cnvrg::Logger.info("Saving commit on server")
1256
- res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put")
1259
+ res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put", auto_cache: auto_cache, external_disk: external_disk)
1257
1260
  msg = res['result']
1258
1261
  response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
1259
1262
  unless response.is_success?
@@ -1261,19 +1264,25 @@ module Cnvrg
1261
1264
  end
1262
1265
 
1263
1266
  log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
1267
+ if msg['cache_error'].present?
1268
+ log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
1269
+ end
1264
1270
  rescue SignalException => e
1265
1271
  log_message(e.message, Thor::Shell::Color::RED)
1266
1272
  return false
1267
1273
  end
1268
1274
  end
1269
1275
 
1270
-
1271
1276
  desc '', '', :hide => true
1272
- def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
1277
+ def data_rm(dataset_url, regex_list: [], commit: '', message: nil, auto_cache: false, external_disk: nil)
1273
1278
  begin
1274
1279
  verify_logged_in(false)
1275
1280
  log_start(__method__, args, options)
1276
1281
 
1282
+ if auto_cache && external_disk.blank?
1283
+ raise SignalException.new(1, "for auto caching external disk is required")
1284
+ end
1285
+
1277
1286
  owner, slug = get_owner_slug(dataset_url)
1278
1287
  @dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
1279
1288
  @datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
@@ -1309,7 +1318,7 @@ module Cnvrg
1309
1318
  offset += chunk_size
1310
1319
  end
1311
1320
 
1312
- res = @datafiles.end_commit(@commit,false, success: true)
1321
+ res = @datafiles.end_commit(@commit,false, success: true, auto_cache: auto_cache, external_disk: external_disk)
1313
1322
  msg = res['result']
1314
1323
  response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
1315
1324
  unless response.is_success?
@@ -1317,6 +1326,9 @@ module Cnvrg
1317
1326
  end
1318
1327
 
1319
1328
  log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
1329
+ if msg['cache_error'].present?
1330
+ log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
1331
+ end
1320
1332
  rescue SignalException => e
1321
1333
  log_message(e.message, Thor::Shell::Color::RED)
1322
1334
  return false
@@ -2308,7 +2320,6 @@ module Cnvrg
2308
2320
  @project = Project.new(get_project_home)
2309
2321
  chunk_size = chunk_size ? chunk_size : options["chunk_size"]
2310
2322
 
2311
-
2312
2323
  # Enable local/experiment exception logging
2313
2324
  suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
2314
2325
  if in_exp
@@ -2346,7 +2357,6 @@ module Cnvrg
2346
2357
  log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
2347
2358
  return true
2348
2359
  end
2349
- force = true
2350
2360
  end
2351
2361
 
2352
2362
  if ignore.nil? or ignore.empty?
data/lib/cnvrg/data.rb CHANGED
@@ -81,7 +81,6 @@ module Cnvrg
81
81
  end
82
82
  end
83
83
 
84
-
85
84
  desc "data upload", "Upload files from local dataset directory to remote server"
86
85
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
87
86
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
@@ -155,6 +154,7 @@ module Cnvrg
155
154
  method_option :flatten, :type => :boolean, :aliases => ["-f", "--flatten"], :default => false
156
155
  method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
157
156
  method_option :threads, :type => :numeric, :aliases => ["--threads"], :default => 15
157
+ method_option :cache_link, :type => :boolean, :aliases => ["--cache_link"], :default => false, :hide => true
158
158
  def clone(dataset_url)
159
159
  cli = Cnvrg::CLI.new()
160
160
  only_tree =options[:only_tree]
@@ -165,6 +165,7 @@ module Cnvrg
165
165
  soft = options[:soft]
166
166
  flatten = options[:flatten]
167
167
  threads = options[:threads]
168
+ cache_link = options[:cache_link]
168
169
  cli.clone_data(
169
170
  dataset_url,
170
171
  only_tree=only_tree,
@@ -175,7 +176,8 @@ module Cnvrg
175
176
  flatten: flatten,
176
177
  relative: options[:relative],
177
178
  soft: soft,
178
- threads: threads
179
+ threads: threads,
180
+ cache_link: cache_link
179
181
  )
180
182
  end
181
183
 
@@ -220,6 +222,8 @@ module Cnvrg
220
222
  method_option :threads, :type => :numeric, :aliases => ["-t","--threads"], :default => 15
221
223
  method_option :chunk_size, :type => :numeric, :aliases => ["-cs","--chunk"], :default => 1000
222
224
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
225
+ method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
226
+ method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
223
227
 
224
228
  def put(dataset_url, *files)
225
229
  cli = Cnvrg::CLI.new()
@@ -231,6 +235,8 @@ module Cnvrg
231
235
  message = options[:message]
232
236
  threads = options[:threads]
233
237
  chunk_size = options[:chunk_size]
238
+ auto_cache = options[:auto_cache]
239
+ external_disk = options[:external_disk]
234
240
  cli.data_put(
235
241
  dataset_url,
236
242
  files: files,
@@ -240,16 +246,28 @@ module Cnvrg
240
246
  override: override,
241
247
  threads: threads,
242
248
  chunk_size: chunk_size,
243
- message: message
249
+ message: message,
250
+ auto_cache: auto_cache,
251
+ external_disk: external_disk
244
252
  )
245
253
  end
246
254
 
247
255
  desc 'data rm DATASET_URL FILES_PREFIX', 'Delete selected files from remote server'
248
256
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
257
+ method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
258
+ method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
249
259
  def rm(dataset_url, *regex_list)
250
260
  cli = Cnvrg::CLI.new()
251
261
  message = options[:message]
252
- cli.data_rm(dataset_url, regex_list: regex_list, message: message)
262
+ auto_cache = options[:auto_cache]
263
+ external_disk = options[:external_disk]
264
+ cli.data_rm(
265
+ dataset_url,
266
+ regex_list: regex_list,
267
+ message: message,
268
+ auto_cache: auto_cache,
269
+ external_disk: external_disk
270
+ )
253
271
  end
254
272
 
255
273
  desc 'data clone_query --query=QUERY_SLUG DATASET_URL', 'Clone dataset with specific query'
@@ -47,6 +47,7 @@ module Cnvrg
47
47
  file = file[0..-2] if file.end_with? '/'
48
48
  if File.exists? file
49
49
  if File.directory? file
50
+ paths << file unless file == '.'
50
51
  paths += Dir.glob("#{file}/**/*")
51
52
  else
52
53
  paths << file
@@ -344,18 +345,20 @@ module Cnvrg
344
345
  cli = CLI.new
345
346
  cli.log_message("Using #{threads} threads with chunk size of #{chunk_size}.", Thor::Shell::Color::GREEN)
346
347
 
347
- progressbar = create_progressbar("Upload Progress", files.size)
348
+ num_files = files.size
349
+ progressbar = create_progressbar("Upload Progress", num_files)
348
350
  cli = CLI.new
349
351
 
350
352
  # Vars to handle the parallelism
351
353
  progress_mutex = Mutex.new
352
354
  file_queue = Queue.new
353
355
  progress_queue = Queue.new
356
+ dirs_queue = Queue.new
354
357
  worker_threads = []
355
358
  progress_threads = []
359
+ old_api = false
356
360
 
357
361
  # Vars to keep track of uploaded files and directories
358
- dirs = []
359
362
  uploaded_files = []
360
363
 
361
364
  begin
@@ -378,6 +381,36 @@ module Cnvrg
378
381
  end
379
382
  end
380
383
 
384
+ dir_thread = Thread.new do
385
+ dirs_to_create = []
386
+ loop do
387
+ dir = dirs_queue.deq(non_block: true) rescue nil
388
+ if dir.nil? && !progressbar.finished?
389
+ sleep 0.2
390
+ Cnvrg::Logger.info("directories thread status: progressbar.finished? #{progressbar.finished?} || dirs_queue.empty? #{dirs_queue.empty?} #{dirs_queue.size} || dirs_to_create.empty? #{dirs_to_create.empty?} #{dirs_to_create.size}")
391
+ else
392
+ dirs_to_create << dir
393
+
394
+ if dirs_to_create.size >= 1000 || progressbar.finished?
395
+ resp = Cnvrg::API.request(@base_resource + "create_dirs", "POST", { dirs: dirs_to_create, commit_sha1: commit_sha1 })
396
+ Cnvrg::Logger.info("uploaded directories chunk, finished with #{resp}")
397
+ if resp == false # if resp is false it means 404 which is old server
398
+ old_api = true
399
+ break
400
+ end
401
+ unless Cnvrg::CLI.is_response_success(resp, false)
402
+ dirs_to_create = []
403
+ time = Time.current
404
+ Cnvrg::Logger.log_error_message("Failed to create dirs: #{time}, #{resp.try(:fetch, "message")}")
405
+ next
406
+ end
407
+ dirs_to_create = []
408
+ end
409
+ break if progressbar.finished? && dirs_queue.empty? && dirs_to_create.empty?
410
+ end
411
+ end
412
+ end
413
+
381
414
  # init the thread that handles the file upload progress and saving them in the server
382
415
  threads.times do |i|
383
416
  progress_threads[i] = Thread.new do
@@ -385,52 +418,46 @@ module Cnvrg
385
418
  file = progress_queue.deq(non_block: true) rescue nil # to prevent deadlocks
386
419
  unless file.nil?
387
420
  blob_ids = []
388
- dirs_to_upload = []
389
-
390
421
  progress_mutex.synchronize {
391
422
  progressbar.progress += 1
392
423
  uploaded_files.append(file) if file[:success]
393
424
 
394
425
  if uploaded_files.size >= chunk_size or progressbar.finished?
395
426
  blob_ids = uploaded_files.map {|f| f['bv_id']}
396
- dirs_to_upload = dirs.clone
397
427
  uploaded_files = []
398
- dirs = []
399
428
  end
400
429
  }
401
430
 
402
431
  if blob_ids.present?
432
+ random_id = (0...10).map { ('a'..'z').to_a[rand(26)] }.join
403
433
  refresh_storage_token
404
- Cnvrg::Logger.info("Finished upload chunk of #{chunk_size} files, Sending Upload files save")
405
-
406
-
434
+ Cnvrg::Logger.info("chunk #{random_id}: Finished uploading chunk of #{chunk_size} files, Sending Upload files save")
407
435
  retry_count = 0
408
436
  loop do
409
- upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids, dirs: dirs_to_upload})
437
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids})
410
438
 
411
439
  if not (Cnvrg::CLI.is_response_success(upload_resp, false))
412
440
  retry_count += 1
413
- Cnvrg::Logger.log_error_message("Failed request save files: #{Time.current}, retry: #{retry_count}")
414
- Cnvrg::Logger.info("Got an error message from server, #{upload_resp.try(:fetch, "message")}")
441
+ Cnvrg::Logger.log_error_message("chunk #{random_id}: Failed request save files: #{Time.current}, retry: #{retry_count}")
415
442
  if retry_count > 20
416
- puts "Failed to save files: #{Time.current}, trying next chunk"
443
+ puts "chunk #{random_id}: Failed to save files: #{Time.current}, trying next chunk"
417
444
  break
418
445
  end
419
446
  sleep 5
420
447
  next
421
448
  end
422
- Cnvrg::Logger.info("Chunk saved on server")
449
+ Cnvrg::Logger.info("chunk #{random_id}: Chunk saved on server")
423
450
  break
424
451
  end
425
452
  end
426
453
  else
427
454
  sleep(0.1)
428
455
  end
429
-
456
+ Cnvrg::Logger.info("progress_threads: progressbar.finished? #{progressbar.finished?}")
430
457
  if progressbar.finished?
431
458
  Cnvrg::Logger.info("Progress bar finished closing queues")
432
- file_queue.close()
433
- progress_queue.close()
459
+ file_queue.close
460
+ progress_queue.close
434
461
  Thread.exit
435
462
  end
436
463
  end
@@ -439,24 +466,43 @@ module Cnvrg
439
466
 
440
467
  file_chunks = files.each_slice(chunk_size).to_a
441
468
  # Fetch the required files from the server:
469
+ num_chunks = (num_files / 1000.0).ceil
470
+ chunk_index = 0
442
471
  Parallel.map((file_chunks), in_threads: threads) do |chunk|
443
- files_chunk = chunk.map{|p| p.gsub(/^\.\//, '')}
444
- Cnvrg::Logger.info("Generating chunk idx")
472
+ chunk_index += 1
473
+ self_chunk_index = chunk_index
474
+ files_chunk = chunk.map { |p| p.gsub(/^\.\//, '') }
475
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Generating chunk idx")
445
476
  tree = @dataset.generate_chunked_idx(files_chunk, prefix: prefix, threads: threads, cli: cli)
446
- Cnvrg::Logger.info("Getting files info from server")
477
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Generating chunk idx")
478
+
479
+ # Handle directories:
480
+ unless old_api
481
+ while dirs_queue.size > 5000
482
+ sleep(0.1)
483
+ end
484
+ end
485
+ new_dirs = tree.keys.select { |k| tree[k].nil? }
486
+ if new_dirs.blank?
487
+ ## we need to send 1 file so we will inflated dirs from in case when we dont have folders in the tree
488
+ file = tree.keys.find { |k| tree[k] != nil }
489
+ dirs_queue.push(file) unless old_api
490
+ end
491
+ new_dirs.each { |dir| dirs_queue.push dir }
492
+
493
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Getting files info from server")
447
494
  results = request_upload_files(commit_sha1, tree, override, new_branch, partial_commit)
495
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Getting files info from server")
448
496
  next unless results
449
497
 
450
498
  if results['files'].blank?
499
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: no files to upload skipping chunk")
451
500
  progress_mutex.synchronize { progressbar.progress += tree.keys.length }
452
501
  next
453
502
  end
454
503
 
455
- # Handle directories:
456
- new_dirs = tree.keys.select {|k| tree[k].nil?}
457
- dirs += new_dirs
458
-
459
504
  files_to_upload = results['files']
505
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: number of files to upload in this chunk: #{tree.keys.length - files_to_upload.length}")
460
506
  progress_mutex.synchronize {
461
507
  progressbar.progress += tree.keys.length - files_to_upload.length
462
508
  }
@@ -468,8 +514,13 @@ module Cnvrg
468
514
  file_queue.push tree[key].merge(files_to_upload[key])
469
515
  end
470
516
  end
471
- Cnvrg::Logger.info("Waiting to progress and workers to finish")
517
+
518
+ Cnvrg::Logger.info("Waiting dir_thread to finish")
519
+ dir_thread.join
520
+ dirs_queue.close
521
+ Cnvrg::Logger.info("Waiting progress_thread to finish")
472
522
  progress_threads.each(&:join)
523
+ Cnvrg::Logger.info("Waiting workers to finish")
473
524
  worker_threads.each(&:join)
474
525
  Thread.report_on_exception = true
475
526
  rescue => e
@@ -1209,7 +1260,7 @@ module Cnvrg
1209
1260
  false
1210
1261
  end
1211
1262
 
1212
- def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil)
1263
+ def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil, auto_cache: false, external_disk: nil)
1213
1264
  counter = 0
1214
1265
  begin
1215
1266
  counter += 1
@@ -1221,7 +1272,9 @@ module Cnvrg
1221
1272
  force:force,
1222
1273
  success: success,
1223
1274
  uploaded_files: uploaded_files,
1224
- commit_type: commit_type
1275
+ commit_type: commit_type,
1276
+ auto_cache: auto_cache,
1277
+ external_disk: external_disk
1225
1278
  }
1226
1279
  )
1227
1280
  is_success = Cnvrg::CLI.is_response_success(response, false)
@@ -1255,8 +1308,8 @@ module Cnvrg
1255
1308
  response['result']['files']
1256
1309
  end
1257
1310
 
1258
- def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest')
1259
- response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id})
1311
+ def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest', cache_link: false)
1312
+ response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id, cache_link: cache_link})
1260
1313
  unless Cnvrg::CLI.is_response_success(response, false)
1261
1314
  Cnvrg::Logger.log_info("#{{commit: commit, chunk_size: chunk_size, latest_id: latest_id}}")
1262
1315
  return nil
@@ -1323,7 +1376,7 @@ module Cnvrg
1323
1376
  end
1324
1377
  end
1325
1378
 
1326
- def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15)
1379
+ def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15, cache_link: false)
1327
1380
  begin
1328
1381
  refresh_storage_token
1329
1382
  parallel_options = {
@@ -1344,10 +1397,18 @@ module Cnvrg
1344
1397
  # blob
1345
1398
  local_path = "#{local_path}.conflict" if conflict
1346
1399
  storage_path = f["path"]
1347
- # if File.exists? local_path
1348
- # Cnvrg::Logger.log_info("Trying to download #{local_path} but its already exists, skipping..")
1349
- # next
1350
- # end
1400
+ # if File.exists? local_path
1401
+ # Cnvrg::Logger.log_info("Trying to download #{local_path} but its already exists, skipping..")
1402
+ # next
1403
+ # end
1404
+ if cache_link
1405
+ cached_commits = f['cached_commits']
1406
+
1407
+ if cached_commits.present?
1408
+ next if @downloader.link_file(cached_commits, local_path, @dataset.title, f['name'])
1409
+ end
1410
+ end
1411
+
1351
1412
  resp = @downloader.safe_download(storage_path, local_path)
1352
1413
  Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
1353
1414
  rescue => e
data/lib/cnvrg/dataset.rb CHANGED
@@ -564,7 +564,8 @@ module Cnvrg
564
564
  safe_path = file
565
565
  safe_path = file[1..-1] if file.start_with? "/"
566
566
 
567
- label = safe_path.gsub(self.local_path + "/", "")
567
+ dataset_local_path = self.local_path + "/"
568
+ label = safe_path.start_with?(dataset_local_path) ? safe_path.sub(dataset_local_path, "") : safe_path
568
569
  label = "#{prefix}/#{label}" if prefix.present?
569
570
  if not Cnvrg::Files.valid_file_name?(label)
570
571
  if cli
@@ -598,6 +599,7 @@ module Cnvrg
598
599
  }
599
600
  end
600
601
  end
602
+
601
603
  if prefix.present? #add the prefix as dirs to the files
602
604
  #lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
603
605
  dirs = prefix.split('/')
@@ -37,6 +37,21 @@ module Cnvrg
37
37
  ### need to be implemented..
38
38
  end
39
39
 
40
+ def link_file(cached_commits, local_path, dataset_title, file_name)
41
+ prepare_download(local_path)
42
+ cached_commits.each do |cached_commit|
43
+ nfs_path = "/nfs-disk/#{cached_commit}/#{dataset_title}/#{file_name}"
44
+ if File.exist? nfs_path
45
+ FileUtils.ln(nfs_path, local_path)
46
+ return true
47
+ end
48
+ end
49
+ false
50
+ rescue => e
51
+ Cnvrg::Logger.log_error(e)
52
+ false
53
+ end
54
+
40
55
  def safe_download(storage_path, local_path, decrypt: true)
41
56
  safe_operation(local_path) { self.download(storage_path, local_path, decrypt: decrypt) }
42
57
  end
data/lib/cnvrg/files.rb CHANGED
@@ -134,7 +134,6 @@ module Cnvrg
134
134
  end
135
135
  end
136
136
 
137
-
138
137
  blob_ids.concat blob_id_chunk
139
138
  end
140
139
 
data/lib/cnvrg/project.rb CHANGED
@@ -448,8 +448,8 @@ module Cnvrg
448
448
  next
449
449
  end
450
450
  if File.directory? e
451
-
452
- tree_idx[label + "/"] = nil
451
+ dir_name = (label.ends_with? "/") ? label : (label + "/")
452
+ tree_idx[dir_name] = nil
453
453
  else
454
454
  file_in_idx = old_idx[:tree][label] rescue nil
455
455
  last_modified = File.mtime(e).to_f
@@ -513,6 +513,7 @@ module Cnvrg
513
513
  #upload
514
514
  local_idx = self.generate_idx(deploy: deploy, files: specific_files)
515
515
  end
516
+
516
517
  commit = local_idx[:commit]
517
518
  tree = local_idx[:tree]
518
519
  ignore_list = self.send_ignore_list()
@@ -521,12 +522,12 @@ module Cnvrg
521
522
  if tree.present?
522
523
  added += local_idx[:tree].keys
523
524
  end
524
- response = {"result" => {"commit" => nil, "tree" => {"added" => added,
525
- "updated_on_server" => [],
526
- "updated_on_local" => [],
527
- "update_local" => [],
528
- "deleted" => [],
529
- "conflicts" => []}}}
525
+ response = { "result" => { "commit" => nil, "tree" => { "added" => added,
526
+ "updated_on_server" => [],
527
+ "updated_on_local" => [],
528
+ "update_local" => [],
529
+ "deleted" => [],
530
+ "conflicts" => [] } } }
530
531
  return response
531
532
  end
532
533
  #we dont want to send it on download - we only compare between commits sha1 in download.
@@ -534,6 +535,7 @@ module Cnvrg
534
535
  #the new server doesnt need the tree, but the old probably needs :X
535
536
  local_idx[:tree] = {} if Cnvrg::Helpers.server_version > 0
536
537
  end
538
+
537
539
  response = Cnvrg::API.request(@base_resource + "status", 'POST', {idx: local_idx, new_branch: new_branch,
538
540
  current_commit: commit, ignore: ignore_list, force: force, in_exp: in_exp, download: download})
539
541
 
data/lib/cnvrg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cnvrg
2
- VERSION = '1.11.21'
3
- end
2
+ VERSION = '1.11.27'
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.21
4
+ version: 1.11.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-02-01 00:00:00.000000000 Z
13
+ date: 2021-03-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -453,7 +453,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
453
453
  - !ruby/object:Gem::Version
454
454
  version: '0'
455
455
  requirements: []
456
- rubygems_version: 3.0.9
456
+ rubygems_version: 3.1.2
457
457
  signing_key:
458
458
  specification_version: 4
459
459
  summary: A CLI tool for interacting with cnvrg.io.