cnvrg 1.11.26 → 1.11.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cnvrg/datafiles.rb +56 -39
- data/lib/cnvrg/files.rb +0 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a0ea5180b8920a78032ea60f9ccedea54357c3207720ff802b5c323aac6e4773
|
|
4
|
+
data.tar.gz: 54f0e631a050d0232921c0103aba65cd6c0ca48085161ee2137bf3ef975550ea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2805218f0460fe9bc1315f489d48fb37ea9795836f379eb5b6ab071c42a10ee76d16decb24c62c518356b35ca58489a9ba7637240c842459ad1ed6a94799633f
|
|
7
|
+
data.tar.gz: ed5bf8a1b5867c2ab6705dede44534074771387c31e8049c12f1cbf5f538c72e6cedbc910378db09ae6f2e1ca23524a54fd4109847c1aa02f4538a341ec4c68b
|
data/lib/cnvrg/datafiles.rb
CHANGED
|
@@ -345,7 +345,8 @@ module Cnvrg
|
|
|
345
345
|
cli = CLI.new
|
|
346
346
|
cli.log_message("Using #{threads} threads with chunk size of #{chunk_size}.", Thor::Shell::Color::GREEN)
|
|
347
347
|
|
|
348
|
-
|
|
348
|
+
num_files = files.size
|
|
349
|
+
progressbar = create_progressbar("Upload Progress", num_files)
|
|
349
350
|
cli = CLI.new
|
|
350
351
|
|
|
351
352
|
# Vars to handle the parallelism
|
|
@@ -355,6 +356,7 @@ module Cnvrg
|
|
|
355
356
|
dirs_queue = Queue.new
|
|
356
357
|
worker_threads = []
|
|
357
358
|
progress_threads = []
|
|
359
|
+
old_api = false
|
|
358
360
|
|
|
359
361
|
# Vars to keep track of uploaded files and directories
|
|
360
362
|
uploaded_files = []
|
|
@@ -382,23 +384,30 @@ module Cnvrg
|
|
|
382
384
|
dir_thread = Thread.new do
|
|
383
385
|
dirs_to_create = []
|
|
384
386
|
loop do
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
387
|
+
dir = dirs_queue.deq(non_block: true) rescue nil
|
|
388
|
+
if dir.nil? && !progressbar.finished?
|
|
389
|
+
sleep 0.2
|
|
390
|
+
Cnvrg::Logger.info("directories thread status: progressbar.finished? #{progressbar.finished?} || dirs_queue.empty? #{dirs_queue.empty?} #{dirs_queue.size} || dirs_to_create.empty? #{dirs_to_create.empty?} #{dirs_to_create.size}")
|
|
391
|
+
else
|
|
392
|
+
dirs_to_create << dir
|
|
393
|
+
|
|
394
|
+
if dirs_to_create.size >= 1000 || progressbar.finished?
|
|
395
|
+
resp = Cnvrg::API.request(@base_resource + "create_dirs", "POST", { dirs: dirs_to_create, commit_sha1: commit_sha1 })
|
|
396
|
+
Cnvrg::Logger.info("uploaded directories chunk, finished with #{resp}")
|
|
397
|
+
if resp == false # if resp is false it means 404 which is old server
|
|
398
|
+
old_api = true
|
|
399
|
+
break
|
|
400
|
+
end
|
|
401
|
+
unless Cnvrg::CLI.is_response_success(resp, false)
|
|
402
|
+
dirs_to_create = []
|
|
403
|
+
time = Time.current
|
|
404
|
+
Cnvrg::Logger.log_error_message("Failed to create dirs: #{time}, #{resp.try(:fetch, "message")}")
|
|
405
|
+
next
|
|
406
|
+
end
|
|
396
407
|
dirs_to_create = []
|
|
397
|
-
next
|
|
398
408
|
end
|
|
399
|
-
|
|
409
|
+
break if progressbar.finished? && dirs_queue.empty? && dirs_to_create.empty?
|
|
400
410
|
end
|
|
401
|
-
break if progressbar.finished? && dirs_queue.empty? && dirs_to_create.empty?
|
|
402
411
|
end
|
|
403
412
|
end
|
|
404
413
|
|
|
@@ -409,7 +418,6 @@ module Cnvrg
|
|
|
409
418
|
file = progress_queue.deq(non_block: true) rescue nil # to prevent deadlocks
|
|
410
419
|
unless file.nil?
|
|
411
420
|
blob_ids = []
|
|
412
|
-
|
|
413
421
|
progress_mutex.synchronize {
|
|
414
422
|
progressbar.progress += 1
|
|
415
423
|
uploaded_files.append(file) if file[:success]
|
|
@@ -421,32 +429,31 @@ module Cnvrg
|
|
|
421
429
|
}
|
|
422
430
|
|
|
423
431
|
if blob_ids.present?
|
|
432
|
+
random_id = (0...10).map { ('a'..'z').to_a[rand(26)] }.join
|
|
424
433
|
refresh_storage_token
|
|
425
|
-
Cnvrg::Logger.info("Finished
|
|
426
|
-
|
|
434
|
+
Cnvrg::Logger.info("chunk #{random_id}: Finished uploading chunk of #{chunk_size} files, Sending Upload files save")
|
|
427
435
|
retry_count = 0
|
|
428
436
|
loop do
|
|
429
437
|
upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids})
|
|
430
438
|
|
|
431
439
|
if not (Cnvrg::CLI.is_response_success(upload_resp, false))
|
|
432
440
|
retry_count += 1
|
|
433
|
-
Cnvrg::Logger.log_error_message("Failed request save files: #{Time.current}, retry: #{retry_count}")
|
|
434
|
-
Cnvrg::Logger.info("Got an error message from server, #{upload_resp.try(:fetch, "message")}")
|
|
441
|
+
Cnvrg::Logger.log_error_message("chunk #{random_id}: Failed request save files: #{Time.current}, retry: #{retry_count}")
|
|
435
442
|
if retry_count > 20
|
|
436
|
-
puts "Failed to save files: #{Time.current}, trying next chunk"
|
|
443
|
+
puts "chunk #{random_id}: Failed to save files: #{Time.current}, trying next chunk"
|
|
437
444
|
break
|
|
438
445
|
end
|
|
439
446
|
sleep 5
|
|
440
447
|
next
|
|
441
448
|
end
|
|
442
|
-
Cnvrg::Logger.info("Chunk saved on server")
|
|
449
|
+
Cnvrg::Logger.info("chunk #{random_id}: Chunk saved on server")
|
|
443
450
|
break
|
|
444
451
|
end
|
|
445
452
|
end
|
|
446
453
|
else
|
|
447
454
|
sleep(0.1)
|
|
448
455
|
end
|
|
449
|
-
|
|
456
|
+
Cnvrg::Logger.info("progress_threads: progressbar.finished? #{progressbar.finished?}")
|
|
450
457
|
if progressbar.finished?
|
|
451
458
|
Cnvrg::Logger.info("Progress bar finished closing queues")
|
|
452
459
|
file_queue.close
|
|
@@ -459,35 +466,43 @@ module Cnvrg
|
|
|
459
466
|
|
|
460
467
|
file_chunks = files.each_slice(chunk_size).to_a
|
|
461
468
|
# Fetch the required files from the server:
|
|
469
|
+
num_chunks = (num_files / 1000.0).ceil
|
|
470
|
+
chunk_index = 0
|
|
462
471
|
Parallel.map((file_chunks), in_threads: threads) do |chunk|
|
|
463
|
-
|
|
464
|
-
|
|
472
|
+
chunk_index += 1
|
|
473
|
+
self_chunk_index = chunk_index
|
|
474
|
+
files_chunk = chunk.map { |p| p.gsub(/^\.\//, '') }
|
|
475
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Generating chunk idx")
|
|
465
476
|
tree = @dataset.generate_chunked_idx(files_chunk, prefix: prefix, threads: threads, cli: cli)
|
|
477
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Generating chunk idx")
|
|
466
478
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
if new_dirs.blank?
|
|
472
|
-
## we need to send 1 file so we will inflated dirs from in case when we dont have folders in the tree
|
|
473
|
-
file = tree.keys.find { |k| tree[k] != nil }
|
|
474
|
-
dirs_queue.push file
|
|
479
|
+
# Handle directories:
|
|
480
|
+
unless old_api
|
|
481
|
+
while dirs_queue.size > 5000
|
|
482
|
+
sleep(0.1)
|
|
475
483
|
end
|
|
484
|
+
end
|
|
485
|
+
new_dirs = tree.keys.select { |k| tree[k].nil? }
|
|
486
|
+
if new_dirs.blank?
|
|
487
|
+
## we need to send 1 file so we will inflated dirs from in case when we dont have folders in the tree
|
|
488
|
+
file = tree.keys.find { |k| tree[k] != nil }
|
|
489
|
+
dirs_queue.push(file) unless old_api
|
|
490
|
+
end
|
|
491
|
+
new_dirs.each { |dir| dirs_queue.push dir }
|
|
476
492
|
|
|
477
|
-
|
|
478
|
-
}
|
|
479
|
-
Cnvrg::Logger.info("Getting files info from server")
|
|
480
|
-
|
|
493
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Getting files info from server")
|
|
481
494
|
results = request_upload_files(commit_sha1, tree, override, new_branch, partial_commit)
|
|
495
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Getting files info from server")
|
|
482
496
|
next unless results
|
|
483
497
|
|
|
484
498
|
if results['files'].blank?
|
|
499
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: no files to upload skipping chunk")
|
|
485
500
|
progress_mutex.synchronize { progressbar.progress += tree.keys.length }
|
|
486
501
|
next
|
|
487
502
|
end
|
|
488
503
|
|
|
489
504
|
files_to_upload = results['files']
|
|
490
|
-
|
|
505
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: number of files to upload in this chunk: #{tree.keys.length - files_to_upload.length}")
|
|
491
506
|
progress_mutex.synchronize {
|
|
492
507
|
progressbar.progress += tree.keys.length - files_to_upload.length
|
|
493
508
|
}
|
|
@@ -500,10 +515,12 @@ module Cnvrg
|
|
|
500
515
|
end
|
|
501
516
|
end
|
|
502
517
|
|
|
503
|
-
Cnvrg::Logger.info("Waiting
|
|
518
|
+
Cnvrg::Logger.info("Waiting dir_thread to finish")
|
|
504
519
|
dir_thread.join
|
|
505
520
|
dirs_queue.close
|
|
521
|
+
Cnvrg::Logger.info("Waiting progress_thread to finish")
|
|
506
522
|
progress_threads.each(&:join)
|
|
523
|
+
Cnvrg::Logger.info("Waiting workers to finish")
|
|
507
524
|
worker_threads.each(&:join)
|
|
508
525
|
Thread.report_on_exception = true
|
|
509
526
|
rescue => e
|
data/lib/cnvrg/files.rb
CHANGED
data/lib/cnvrg/version.rb
CHANGED