cnvrg 1.11.21 → 1.11.27

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc1bc67b5f62516c87e90a9b71d7d4921236eb8c13446b0c8ddac44ef9cf8544
4
- data.tar.gz: '09b88539fc1fb041151d7fb1ec9931d0ddccc6e7cc50f958467d6e9fb976ba5d'
3
+ metadata.gz: a0ea5180b8920a78032ea60f9ccedea54357c3207720ff802b5c323aac6e4773
4
+ data.tar.gz: 54f0e631a050d0232921c0103aba65cd6c0ca48085161ee2137bf3ef975550ea
5
5
  SHA512:
6
- metadata.gz: b7206c5c43fec47c6519e3fa9c798cbef0232ae0d9d70e708499d2312bbd5af802d6fc62e3b02f0f137f6322cd3486848fd9effbcbcca521171062ad22830e27
7
- data.tar.gz: 27905ef2f218a241db92e78533390dc0b3b3fa6aa8242a925efc106eacf43d2adc11fdfc12f978ce1155fa6e1948ab9f98026ca163cb57db3729ee6080b15481
6
+ metadata.gz: 2805218f0460fe9bc1315f489d48fb37ea9795836f379eb5b6ab071c42a10ee76d16decb24c62c518356b35ca58489a9ba7637240c842459ad1ed6a94799633f
7
+ data.tar.gz: ed5bf8a1b5867c2ab6705dede44534074771387c31e8049c12f1cbf5f538c72e6cedbc910378db09ae6f2e1ca23524a54fd4109847c1aa02f4538a341ec4c68b
data/lib/cnvrg/api.rb CHANGED
@@ -31,6 +31,10 @@ module Cnvrg
31
31
  end
32
32
  def self.request(resource, method = 'GET', data = {}, parse_request = true)
33
33
  resource = URI::encode resource
34
+
35
+ # We need to remoe all double slashes from the url to work with the proxy
36
+ resource = resource.gsub(/[\/]{2,}/, "/").gsub("https:/", "https://").gsub("http:/", "http://")
37
+
34
38
  begin
35
39
  n = Netrc.read
36
40
  rescue => e
data/lib/cnvrg/cli.rb CHANGED
@@ -858,7 +858,7 @@ module Cnvrg
858
858
  method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
859
859
  method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
860
860
  method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
861
- def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15)
861
+ def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15, cache_link: false)
862
862
  begin
863
863
  verify_logged_in(false)
864
864
  log_start(__method__, args, options)
@@ -904,7 +904,7 @@ module Cnvrg
904
904
 
905
905
  commit = response["result"]["commit"]
906
906
  files_count = response["result"]["file_count"]
907
- files = @files.get_clone_chunk(commit: commit)
907
+ files = @files.get_clone_chunk(commit: commit, cache_link: cache_link)
908
908
  downloaded_files = 0
909
909
  progressbar = ProgressBar.create(:title => "Download Progress",
910
910
  :progress_mark => '=',
@@ -917,7 +917,7 @@ module Cnvrg
917
917
 
918
918
  while files['keys'].length > 0
919
919
  Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
920
- @files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads)
920
+ @files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads, cache_link: cache_link)
921
921
 
922
922
  downloaded_files += files['keys'].length
923
923
  files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
@@ -1201,15 +1201,18 @@ module Cnvrg
1201
1201
  end
1202
1202
 
1203
1203
  desc '', '', :hide => true
1204
- def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil)
1204
+ def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil, auto_cache: false, external_disk: nil)
1205
1205
  begin
1206
1206
  verify_logged_in(false)
1207
1207
  log_start(__method__, args, options)
1208
-
1208
+ if auto_cache && external_disk.blank?
1209
+ raise SignalException.new(1, "for auto caching external disk is required")
1210
+ end
1209
1211
  owner, slug = get_owner_slug(dataset_url)
1210
1212
  @dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
1211
1213
  @datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
1212
1214
  @files = @datafiles.verify_files_exists(files)
1215
+ @files = @files.uniq { |t| t.gsub('./', '')}
1213
1216
 
1214
1217
  if @files.blank?
1215
1218
  raise SignalException.new(1, "Cant find files to upload, exiting.")
@@ -1227,7 +1230,7 @@ module Cnvrg
1227
1230
  Cnvrg::Logger.info("Put files in latest commit")
1228
1231
  response = @datafiles.last_valid_commit()
1229
1232
  unless response #means we failed in the start commit.
1230
- raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and commitc")
1233
+ raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and commit")
1231
1234
  end
1232
1235
  @commit = response['result']['sha1']
1233
1236
  else
@@ -1253,7 +1256,7 @@ module Cnvrg
1253
1256
  raise SignalException.new(1, res.msg)
1254
1257
  end
1255
1258
  Cnvrg::Logger.info("Saving commit on server")
1256
- res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put")
1259
+ res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put", auto_cache: auto_cache, external_disk: external_disk)
1257
1260
  msg = res['result']
1258
1261
  response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
1259
1262
  unless response.is_success?
@@ -1261,19 +1264,25 @@ module Cnvrg
1261
1264
  end
1262
1265
 
1263
1266
  log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
1267
+ if msg['cache_error'].present?
1268
+ log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
1269
+ end
1264
1270
  rescue SignalException => e
1265
1271
  log_message(e.message, Thor::Shell::Color::RED)
1266
1272
  return false
1267
1273
  end
1268
1274
  end
1269
1275
 
1270
-
1271
1276
  desc '', '', :hide => true
1272
- def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
1277
+ def data_rm(dataset_url, regex_list: [], commit: '', message: nil, auto_cache: false, external_disk: nil)
1273
1278
  begin
1274
1279
  verify_logged_in(false)
1275
1280
  log_start(__method__, args, options)
1276
1281
 
1282
+ if auto_cache && external_disk.blank?
1283
+ raise SignalException.new(1, "for auto caching external disk is required")
1284
+ end
1285
+
1277
1286
  owner, slug = get_owner_slug(dataset_url)
1278
1287
  @dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
1279
1288
  @datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
@@ -1309,7 +1318,7 @@ module Cnvrg
1309
1318
  offset += chunk_size
1310
1319
  end
1311
1320
 
1312
- res = @datafiles.end_commit(@commit,false, success: true)
1321
+ res = @datafiles.end_commit(@commit,false, success: true, auto_cache: auto_cache, external_disk: external_disk)
1313
1322
  msg = res['result']
1314
1323
  response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
1315
1324
  unless response.is_success?
@@ -1317,6 +1326,9 @@ module Cnvrg
1317
1326
  end
1318
1327
 
1319
1328
  log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
1329
+ if msg['cache_error'].present?
1330
+ log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
1331
+ end
1320
1332
  rescue SignalException => e
1321
1333
  log_message(e.message, Thor::Shell::Color::RED)
1322
1334
  return false
@@ -2308,7 +2320,6 @@ module Cnvrg
2308
2320
  @project = Project.new(get_project_home)
2309
2321
  chunk_size = chunk_size ? chunk_size : options["chunk_size"]
2310
2322
 
2311
-
2312
2323
  # Enable local/experiment exception logging
2313
2324
  suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
2314
2325
  if in_exp
@@ -2346,7 +2357,6 @@ module Cnvrg
2346
2357
  log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
2347
2358
  return true
2348
2359
  end
2349
- force = true
2350
2360
  end
2351
2361
 
2352
2362
  if ignore.nil? or ignore.empty?
data/lib/cnvrg/data.rb CHANGED
@@ -81,7 +81,6 @@ module Cnvrg
81
81
  end
82
82
  end
83
83
 
84
-
85
84
  desc "data upload", "Upload files from local dataset directory to remote server"
86
85
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
87
86
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
@@ -155,6 +154,7 @@ module Cnvrg
155
154
  method_option :flatten, :type => :boolean, :aliases => ["-f", "--flatten"], :default => false
156
155
  method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
157
156
  method_option :threads, :type => :numeric, :aliases => ["--threads"], :default => 15
157
+ method_option :cache_link, :type => :boolean, :aliases => ["--cache_link"], :default => false, :hide => true
158
158
  def clone(dataset_url)
159
159
  cli = Cnvrg::CLI.new()
160
160
  only_tree =options[:only_tree]
@@ -165,6 +165,7 @@ module Cnvrg
165
165
  soft = options[:soft]
166
166
  flatten = options[:flatten]
167
167
  threads = options[:threads]
168
+ cache_link = options[:cache_link]
168
169
  cli.clone_data(
169
170
  dataset_url,
170
171
  only_tree=only_tree,
@@ -175,7 +176,8 @@ module Cnvrg
175
176
  flatten: flatten,
176
177
  relative: options[:relative],
177
178
  soft: soft,
178
- threads: threads
179
+ threads: threads,
180
+ cache_link: cache_link
179
181
  )
180
182
  end
181
183
 
@@ -220,6 +222,8 @@ module Cnvrg
220
222
  method_option :threads, :type => :numeric, :aliases => ["-t","--threads"], :default => 15
221
223
  method_option :chunk_size, :type => :numeric, :aliases => ["-cs","--chunk"], :default => 1000
222
224
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
225
+ method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
226
+ method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
223
227
 
224
228
  def put(dataset_url, *files)
225
229
  cli = Cnvrg::CLI.new()
@@ -231,6 +235,8 @@ module Cnvrg
231
235
  message = options[:message]
232
236
  threads = options[:threads]
233
237
  chunk_size = options[:chunk_size]
238
+ auto_cache = options[:auto_cache]
239
+ external_disk = options[:external_disk]
234
240
  cli.data_put(
235
241
  dataset_url,
236
242
  files: files,
@@ -240,16 +246,28 @@ module Cnvrg
240
246
  override: override,
241
247
  threads: threads,
242
248
  chunk_size: chunk_size,
243
- message: message
249
+ message: message,
250
+ auto_cache: auto_cache,
251
+ external_disk: external_disk
244
252
  )
245
253
  end
246
254
 
247
255
  desc 'data rm DATASET_URL FILES_PREFIX', 'Delete selected files from remote server'
248
256
  method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
257
+ method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
258
+ method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
249
259
  def rm(dataset_url, *regex_list)
250
260
  cli = Cnvrg::CLI.new()
251
261
  message = options[:message]
252
- cli.data_rm(dataset_url, regex_list: regex_list, message: message)
262
+ auto_cache = options[:auto_cache]
263
+ external_disk = options[:external_disk]
264
+ cli.data_rm(
265
+ dataset_url,
266
+ regex_list: regex_list,
267
+ message: message,
268
+ auto_cache: auto_cache,
269
+ external_disk: external_disk
270
+ )
253
271
  end
254
272
 
255
273
  desc 'data clone_query --query=QUERY_SLUG DATASET_URL', 'Clone dataset with specific query'
@@ -47,6 +47,7 @@ module Cnvrg
47
47
  file = file[0..-2] if file.end_with? '/'
48
48
  if File.exists? file
49
49
  if File.directory? file
50
+ paths << file unless file == '.'
50
51
  paths += Dir.glob("#{file}/**/*")
51
52
  else
52
53
  paths << file
@@ -344,18 +345,20 @@ module Cnvrg
344
345
  cli = CLI.new
345
346
  cli.log_message("Using #{threads} threads with chunk size of #{chunk_size}.", Thor::Shell::Color::GREEN)
346
347
 
347
- progressbar = create_progressbar("Upload Progress", files.size)
348
+ num_files = files.size
349
+ progressbar = create_progressbar("Upload Progress", num_files)
348
350
  cli = CLI.new
349
351
 
350
352
  # Vars to handle the parallelism
351
353
  progress_mutex = Mutex.new
352
354
  file_queue = Queue.new
353
355
  progress_queue = Queue.new
356
+ dirs_queue = Queue.new
354
357
  worker_threads = []
355
358
  progress_threads = []
359
+ old_api = false
356
360
 
357
361
  # Vars to keep track of uploaded files and directories
358
- dirs = []
359
362
  uploaded_files = []
360
363
 
361
364
  begin
@@ -378,6 +381,36 @@ module Cnvrg
378
381
  end
379
382
  end
380
383
 
384
+ dir_thread = Thread.new do
385
+ dirs_to_create = []
386
+ loop do
387
+ dir = dirs_queue.deq(non_block: true) rescue nil
388
+ if dir.nil? && !progressbar.finished?
389
+ sleep 0.2
390
+ Cnvrg::Logger.info("directories thread status: progressbar.finished? #{progressbar.finished?} || dirs_queue.empty? #{dirs_queue.empty?} #{dirs_queue.size} || dirs_to_create.empty? #{dirs_to_create.empty?} #{dirs_to_create.size}")
391
+ else
392
+ dirs_to_create << dir
393
+
394
+ if dirs_to_create.size >= 1000 || progressbar.finished?
395
+ resp = Cnvrg::API.request(@base_resource + "create_dirs", "POST", { dirs: dirs_to_create, commit_sha1: commit_sha1 })
396
+ Cnvrg::Logger.info("uploaded directories chunk, finished with #{resp}")
397
+ if resp == false # if resp is false it means 404 which is old server
398
+ old_api = true
399
+ break
400
+ end
401
+ unless Cnvrg::CLI.is_response_success(resp, false)
402
+ dirs_to_create = []
403
+ time = Time.current
404
+ Cnvrg::Logger.log_error_message("Failed to create dirs: #{time}, #{resp.try(:fetch, "message")}")
405
+ next
406
+ end
407
+ dirs_to_create = []
408
+ end
409
+ break if progressbar.finished? && dirs_queue.empty? && dirs_to_create.empty?
410
+ end
411
+ end
412
+ end
413
+
381
414
  # init the thread that handles the file upload progress and saving them in the server
382
415
  threads.times do |i|
383
416
  progress_threads[i] = Thread.new do
@@ -385,52 +418,46 @@ module Cnvrg
385
418
  file = progress_queue.deq(non_block: true) rescue nil # to prevent deadlocks
386
419
  unless file.nil?
387
420
  blob_ids = []
388
- dirs_to_upload = []
389
-
390
421
  progress_mutex.synchronize {
391
422
  progressbar.progress += 1
392
423
  uploaded_files.append(file) if file[:success]
393
424
 
394
425
  if uploaded_files.size >= chunk_size or progressbar.finished?
395
426
  blob_ids = uploaded_files.map {|f| f['bv_id']}
396
- dirs_to_upload = dirs.clone
397
427
  uploaded_files = []
398
- dirs = []
399
428
  end
400
429
  }
401
430
 
402
431
  if blob_ids.present?
432
+ random_id = (0...10).map { ('a'..'z').to_a[rand(26)] }.join
403
433
  refresh_storage_token
404
- Cnvrg::Logger.info("Finished upload chunk of #{chunk_size} files, Sending Upload files save")
405
-
406
-
434
+ Cnvrg::Logger.info("chunk #{random_id}: Finished uploading chunk of #{chunk_size} files, Sending Upload files save")
407
435
  retry_count = 0
408
436
  loop do
409
- upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids, dirs: dirs_to_upload})
437
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids})
410
438
 
411
439
  if not (Cnvrg::CLI.is_response_success(upload_resp, false))
412
440
  retry_count += 1
413
- Cnvrg::Logger.log_error_message("Failed request save files: #{Time.current}, retry: #{retry_count}")
414
- Cnvrg::Logger.info("Got an error message from server, #{upload_resp.try(:fetch, "message")}")
441
+ Cnvrg::Logger.log_error_message("chunk #{random_id}: Failed request save files: #{Time.current}, retry: #{retry_count}")
415
442
  if retry_count > 20
416
- puts "Failed to save files: #{Time.current}, trying next chunk"
443
+ puts "chunk #{random_id}: Failed to save files: #{Time.current}, trying next chunk"
417
444
  break
418
445
  end
419
446
  sleep 5
420
447
  next
421
448
  end
422
- Cnvrg::Logger.info("Chunk saved on server")
449
+ Cnvrg::Logger.info("chunk #{random_id}: Chunk saved on server")
423
450
  break
424
451
  end
425
452
  end
426
453
  else
427
454
  sleep(0.1)
428
455
  end
429
-
456
+ Cnvrg::Logger.info("progress_threads: progressbar.finished? #{progressbar.finished?}")
430
457
  if progressbar.finished?
431
458
  Cnvrg::Logger.info("Progress bar finished closing queues")
432
- file_queue.close()
433
- progress_queue.close()
459
+ file_queue.close
460
+ progress_queue.close
434
461
  Thread.exit
435
462
  end
436
463
  end
@@ -439,24 +466,43 @@ module Cnvrg
439
466
 
440
467
  file_chunks = files.each_slice(chunk_size).to_a
441
468
  # Fetch the required files from the server:
469
+ num_chunks = (num_files / 1000.0).ceil
470
+ chunk_index = 0
442
471
  Parallel.map((file_chunks), in_threads: threads) do |chunk|
443
- files_chunk = chunk.map{|p| p.gsub(/^\.\//, '')}
444
- Cnvrg::Logger.info("Generating chunk idx")
472
+ chunk_index += 1
473
+ self_chunk_index = chunk_index
474
+ files_chunk = chunk.map { |p| p.gsub(/^\.\//, '') }
475
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Generating chunk idx")
445
476
  tree = @dataset.generate_chunked_idx(files_chunk, prefix: prefix, threads: threads, cli: cli)
446
- Cnvrg::Logger.info("Getting files info from server")
477
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Generating chunk idx")
478
+
479
+ # Handle directories:
480
+ unless old_api
481
+ while dirs_queue.size > 5000
482
+ sleep(0.1)
483
+ end
484
+ end
485
+ new_dirs = tree.keys.select { |k| tree[k].nil? }
486
+ if new_dirs.blank?
487
+ ## we need to send 1 file so we will inflated dirs from in case when we dont have folders in the tree
488
+ file = tree.keys.find { |k| tree[k] != nil }
489
+ dirs_queue.push(file) unless old_api
490
+ end
491
+ new_dirs.each { |dir| dirs_queue.push dir }
492
+
493
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Getting files info from server")
447
494
  results = request_upload_files(commit_sha1, tree, override, new_branch, partial_commit)
495
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Getting files info from server")
448
496
  next unless results
449
497
 
450
498
  if results['files'].blank?
499
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: no files to upload skipping chunk")
451
500
  progress_mutex.synchronize { progressbar.progress += tree.keys.length }
452
501
  next
453
502
  end
454
503
 
455
- # Handle directories:
456
- new_dirs = tree.keys.select {|k| tree[k].nil?}
457
- dirs += new_dirs
458
-
459
504
  files_to_upload = results['files']
505
+ Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: number of files to upload in this chunk: #{tree.keys.length - files_to_upload.length}")
460
506
  progress_mutex.synchronize {
461
507
  progressbar.progress += tree.keys.length - files_to_upload.length
462
508
  }
@@ -468,8 +514,13 @@ module Cnvrg
468
514
  file_queue.push tree[key].merge(files_to_upload[key])
469
515
  end
470
516
  end
471
- Cnvrg::Logger.info("Waiting to progress and workers to finish")
517
+
518
+ Cnvrg::Logger.info("Waiting dir_thread to finish")
519
+ dir_thread.join
520
+ dirs_queue.close
521
+ Cnvrg::Logger.info("Waiting progress_thread to finish")
472
522
  progress_threads.each(&:join)
523
+ Cnvrg::Logger.info("Waiting workers to finish")
473
524
  worker_threads.each(&:join)
474
525
  Thread.report_on_exception = true
475
526
  rescue => e
@@ -1209,7 +1260,7 @@ module Cnvrg
1209
1260
  false
1210
1261
  end
1211
1262
 
1212
- def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil)
1263
+ def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil, auto_cache: false, external_disk: nil)
1213
1264
  counter = 0
1214
1265
  begin
1215
1266
  counter += 1
@@ -1221,7 +1272,9 @@ module Cnvrg
1221
1272
  force:force,
1222
1273
  success: success,
1223
1274
  uploaded_files: uploaded_files,
1224
- commit_type: commit_type
1275
+ commit_type: commit_type,
1276
+ auto_cache: auto_cache,
1277
+ external_disk: external_disk
1225
1278
  }
1226
1279
  )
1227
1280
  is_success = Cnvrg::CLI.is_response_success(response, false)
@@ -1255,8 +1308,8 @@ module Cnvrg
1255
1308
  response['result']['files']
1256
1309
  end
1257
1310
 
1258
- def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest')
1259
- response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id})
1311
+ def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest', cache_link: false)
1312
+ response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id, cache_link: cache_link})
1260
1313
  unless Cnvrg::CLI.is_response_success(response, false)
1261
1314
  Cnvrg::Logger.log_info("#{{commit: commit, chunk_size: chunk_size, latest_id: latest_id}}")
1262
1315
  return nil
@@ -1323,7 +1376,7 @@ module Cnvrg
1323
1376
  end
1324
1377
  end
1325
1378
 
1326
- def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15)
1379
+ def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15, cache_link: false)
1327
1380
  begin
1328
1381
  refresh_storage_token
1329
1382
  parallel_options = {
@@ -1344,10 +1397,18 @@ module Cnvrg
1344
1397
  # blob
1345
1398
  local_path = "#{local_path}.conflict" if conflict
1346
1399
  storage_path = f["path"]
1347
- # if File.exists? local_path
1348
- # Cnvrg::Logger.log_info("Trying to download #{local_path} but its already exists, skipping..")
1349
- # next
1350
- # end
1400
+ # if File.exists? local_path
1401
+ # Cnvrg::Logger.log_info("Trying to download #{local_path} but its already exists, skipping..")
1402
+ # next
1403
+ # end
1404
+ if cache_link
1405
+ cached_commits = f['cached_commits']
1406
+
1407
+ if cached_commits.present?
1408
+ next if @downloader.link_file(cached_commits, local_path, @dataset.title, f['name'])
1409
+ end
1410
+ end
1411
+
1351
1412
  resp = @downloader.safe_download(storage_path, local_path)
1352
1413
  Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
1353
1414
  rescue => e
data/lib/cnvrg/dataset.rb CHANGED
@@ -564,7 +564,8 @@ module Cnvrg
564
564
  safe_path = file
565
565
  safe_path = file[1..-1] if file.start_with? "/"
566
566
 
567
- label = safe_path.gsub(self.local_path + "/", "")
567
+ dataset_local_path = self.local_path + "/"
568
+ label = safe_path.start_with?(dataset_local_path) ? safe_path.sub(dataset_local_path, "") : safe_path
568
569
  label = "#{prefix}/#{label}" if prefix.present?
569
570
  if not Cnvrg::Files.valid_file_name?(label)
570
571
  if cli
@@ -598,6 +599,7 @@ module Cnvrg
598
599
  }
599
600
  end
600
601
  end
602
+
601
603
  if prefix.present? #add the prefix as dirs to the files
602
604
  #lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
603
605
  dirs = prefix.split('/')
@@ -37,6 +37,21 @@ module Cnvrg
37
37
  ### need to be implemented..
38
38
  end
39
39
 
40
+ def link_file(cached_commits, local_path, dataset_title, file_name)
41
+ prepare_download(local_path)
42
+ cached_commits.each do |cached_commit|
43
+ nfs_path = "/nfs-disk/#{cached_commit}/#{dataset_title}/#{file_name}"
44
+ if File.exist? nfs_path
45
+ FileUtils.ln(nfs_path, local_path)
46
+ return true
47
+ end
48
+ end
49
+ false
50
+ rescue => e
51
+ Cnvrg::Logger.log_error(e)
52
+ false
53
+ end
54
+
40
55
  def safe_download(storage_path, local_path, decrypt: true)
41
56
  safe_operation(local_path) { self.download(storage_path, local_path, decrypt: decrypt) }
42
57
  end
data/lib/cnvrg/files.rb CHANGED
@@ -134,7 +134,6 @@ module Cnvrg
134
134
  end
135
135
  end
136
136
 
137
-
138
137
  blob_ids.concat blob_id_chunk
139
138
  end
140
139
 
data/lib/cnvrg/project.rb CHANGED
@@ -448,8 +448,8 @@ module Cnvrg
448
448
  next
449
449
  end
450
450
  if File.directory? e
451
-
452
- tree_idx[label + "/"] = nil
451
+ dir_name = (label.ends_with? "/") ? label : (label + "/")
452
+ tree_idx[dir_name] = nil
453
453
  else
454
454
  file_in_idx = old_idx[:tree][label] rescue nil
455
455
  last_modified = File.mtime(e).to_f
@@ -513,6 +513,7 @@ module Cnvrg
513
513
  #upload
514
514
  local_idx = self.generate_idx(deploy: deploy, files: specific_files)
515
515
  end
516
+
516
517
  commit = local_idx[:commit]
517
518
  tree = local_idx[:tree]
518
519
  ignore_list = self.send_ignore_list()
@@ -521,12 +522,12 @@ module Cnvrg
521
522
  if tree.present?
522
523
  added += local_idx[:tree].keys
523
524
  end
524
- response = {"result" => {"commit" => nil, "tree" => {"added" => added,
525
- "updated_on_server" => [],
526
- "updated_on_local" => [],
527
- "update_local" => [],
528
- "deleted" => [],
529
- "conflicts" => []}}}
525
+ response = { "result" => { "commit" => nil, "tree" => { "added" => added,
526
+ "updated_on_server" => [],
527
+ "updated_on_local" => [],
528
+ "update_local" => [],
529
+ "deleted" => [],
530
+ "conflicts" => [] } } }
530
531
  return response
531
532
  end
532
533
  #we dont want to send it on download - we only compare between commits sha1 in download.
@@ -534,6 +535,7 @@ module Cnvrg
534
535
  #the new server doesnt need the tree, but the old probably needs :X
535
536
  local_idx[:tree] = {} if Cnvrg::Helpers.server_version > 0
536
537
  end
538
+
537
539
  response = Cnvrg::API.request(@base_resource + "status", 'POST', {idx: local_idx, new_branch: new_branch,
538
540
  current_commit: commit, ignore: ignore_list, force: force, in_exp: in_exp, download: download})
539
541
 
data/lib/cnvrg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cnvrg
2
- VERSION = '1.11.21'
3
- end
2
+ VERSION = '1.11.27'
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.21
4
+ version: 1.11.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-02-01 00:00:00.000000000 Z
13
+ date: 2021-03-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -453,7 +453,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
453
453
  - !ruby/object:Gem::Version
454
454
  version: '0'
455
455
  requirements: []
456
- rubygems_version: 3.0.9
456
+ rubygems_version: 3.1.2
457
457
  signing_key:
458
458
  specification_version: 4
459
459
  summary: A CLI tool for interacting with cnvrg.io.