cnvrg 1.11.21 → 1.11.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cnvrg/api.rb +4 -0
- data/lib/cnvrg/cli.rb +22 -12
- data/lib/cnvrg/data.rb +22 -4
- data/lib/cnvrg/datafiles.rb +95 -34
- data/lib/cnvrg/dataset.rb +3 -1
- data/lib/cnvrg/downloader/client.rb +15 -0
- data/lib/cnvrg/files.rb +0 -1
- data/lib/cnvrg/project.rb +10 -8
- data/lib/cnvrg/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0ea5180b8920a78032ea60f9ccedea54357c3207720ff802b5c323aac6e4773
|
4
|
+
data.tar.gz: 54f0e631a050d0232921c0103aba65cd6c0ca48085161ee2137bf3ef975550ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2805218f0460fe9bc1315f489d48fb37ea9795836f379eb5b6ab071c42a10ee76d16decb24c62c518356b35ca58489a9ba7637240c842459ad1ed6a94799633f
|
7
|
+
data.tar.gz: ed5bf8a1b5867c2ab6705dede44534074771387c31e8049c12f1cbf5f538c72e6cedbc910378db09ae6f2e1ca23524a54fd4109847c1aa02f4538a341ec4c68b
|
data/lib/cnvrg/api.rb
CHANGED
@@ -31,6 +31,10 @@ module Cnvrg
|
|
31
31
|
end
|
32
32
|
def self.request(resource, method = 'GET', data = {}, parse_request = true)
|
33
33
|
resource = URI::encode resource
|
34
|
+
|
35
|
+
# We need to remoe all double slashes from the url to work with the proxy
|
36
|
+
resource = resource.gsub(/[\/]{2,}/, "/").gsub("https:/", "https://").gsub("http:/", "http://")
|
37
|
+
|
34
38
|
begin
|
35
39
|
n = Netrc.read
|
36
40
|
rescue => e
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -858,7 +858,7 @@ module Cnvrg
|
|
858
858
|
method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
|
859
859
|
method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
|
860
860
|
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
861
|
-
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15)
|
861
|
+
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15, cache_link: false)
|
862
862
|
begin
|
863
863
|
verify_logged_in(false)
|
864
864
|
log_start(__method__, args, options)
|
@@ -904,7 +904,7 @@ module Cnvrg
|
|
904
904
|
|
905
905
|
commit = response["result"]["commit"]
|
906
906
|
files_count = response["result"]["file_count"]
|
907
|
-
files = @files.get_clone_chunk(commit: commit)
|
907
|
+
files = @files.get_clone_chunk(commit: commit, cache_link: cache_link)
|
908
908
|
downloaded_files = 0
|
909
909
|
progressbar = ProgressBar.create(:title => "Download Progress",
|
910
910
|
:progress_mark => '=',
|
@@ -917,7 +917,7 @@ module Cnvrg
|
|
917
917
|
|
918
918
|
while files['keys'].length > 0
|
919
919
|
Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
|
920
|
-
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads)
|
920
|
+
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads, cache_link: cache_link)
|
921
921
|
|
922
922
|
downloaded_files += files['keys'].length
|
923
923
|
files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
|
@@ -1201,15 +1201,18 @@ module Cnvrg
|
|
1201
1201
|
end
|
1202
1202
|
|
1203
1203
|
desc '', '', :hide => true
|
1204
|
-
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil)
|
1204
|
+
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil, auto_cache: false, external_disk: nil)
|
1205
1205
|
begin
|
1206
1206
|
verify_logged_in(false)
|
1207
1207
|
log_start(__method__, args, options)
|
1208
|
-
|
1208
|
+
if auto_cache && external_disk.blank?
|
1209
|
+
raise SignalException.new(1, "for auto caching external disk is required")
|
1210
|
+
end
|
1209
1211
|
owner, slug = get_owner_slug(dataset_url)
|
1210
1212
|
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
1211
1213
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
1212
1214
|
@files = @datafiles.verify_files_exists(files)
|
1215
|
+
@files = @files.uniq { |t| t.gsub('./', '')}
|
1213
1216
|
|
1214
1217
|
if @files.blank?
|
1215
1218
|
raise SignalException.new(1, "Cant find files to upload, exiting.")
|
@@ -1227,7 +1230,7 @@ module Cnvrg
|
|
1227
1230
|
Cnvrg::Logger.info("Put files in latest commit")
|
1228
1231
|
response = @datafiles.last_valid_commit()
|
1229
1232
|
unless response #means we failed in the start commit.
|
1230
|
-
raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and
|
1233
|
+
raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and commit")
|
1231
1234
|
end
|
1232
1235
|
@commit = response['result']['sha1']
|
1233
1236
|
else
|
@@ -1253,7 +1256,7 @@ module Cnvrg
|
|
1253
1256
|
raise SignalException.new(1, res.msg)
|
1254
1257
|
end
|
1255
1258
|
Cnvrg::Logger.info("Saving commit on server")
|
1256
|
-
res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put")
|
1259
|
+
res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put", auto_cache: auto_cache, external_disk: external_disk)
|
1257
1260
|
msg = res['result']
|
1258
1261
|
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
1259
1262
|
unless response.is_success?
|
@@ -1261,19 +1264,25 @@ module Cnvrg
|
|
1261
1264
|
end
|
1262
1265
|
|
1263
1266
|
log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
|
1267
|
+
if msg['cache_error'].present?
|
1268
|
+
log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
|
1269
|
+
end
|
1264
1270
|
rescue SignalException => e
|
1265
1271
|
log_message(e.message, Thor::Shell::Color::RED)
|
1266
1272
|
return false
|
1267
1273
|
end
|
1268
1274
|
end
|
1269
1275
|
|
1270
|
-
|
1271
1276
|
desc '', '', :hide => true
|
1272
|
-
def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
|
1277
|
+
def data_rm(dataset_url, regex_list: [], commit: '', message: nil, auto_cache: false, external_disk: nil)
|
1273
1278
|
begin
|
1274
1279
|
verify_logged_in(false)
|
1275
1280
|
log_start(__method__, args, options)
|
1276
1281
|
|
1282
|
+
if auto_cache && external_disk.blank?
|
1283
|
+
raise SignalException.new(1, "for auto caching external disk is required")
|
1284
|
+
end
|
1285
|
+
|
1277
1286
|
owner, slug = get_owner_slug(dataset_url)
|
1278
1287
|
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
1279
1288
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
@@ -1309,7 +1318,7 @@ module Cnvrg
|
|
1309
1318
|
offset += chunk_size
|
1310
1319
|
end
|
1311
1320
|
|
1312
|
-
res = @datafiles.end_commit(@commit,false, success: true)
|
1321
|
+
res = @datafiles.end_commit(@commit,false, success: true, auto_cache: auto_cache, external_disk: external_disk)
|
1313
1322
|
msg = res['result']
|
1314
1323
|
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
1315
1324
|
unless response.is_success?
|
@@ -1317,6 +1326,9 @@ module Cnvrg
|
|
1317
1326
|
end
|
1318
1327
|
|
1319
1328
|
log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
|
1329
|
+
if msg['cache_error'].present?
|
1330
|
+
log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
|
1331
|
+
end
|
1320
1332
|
rescue SignalException => e
|
1321
1333
|
log_message(e.message, Thor::Shell::Color::RED)
|
1322
1334
|
return false
|
@@ -2308,7 +2320,6 @@ module Cnvrg
|
|
2308
2320
|
@project = Project.new(get_project_home)
|
2309
2321
|
chunk_size = chunk_size ? chunk_size : options["chunk_size"]
|
2310
2322
|
|
2311
|
-
|
2312
2323
|
# Enable local/experiment exception logging
|
2313
2324
|
suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
|
2314
2325
|
if in_exp
|
@@ -2346,7 +2357,6 @@ module Cnvrg
|
|
2346
2357
|
log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
|
2347
2358
|
return true
|
2348
2359
|
end
|
2349
|
-
force = true
|
2350
2360
|
end
|
2351
2361
|
|
2352
2362
|
if ignore.nil? or ignore.empty?
|
data/lib/cnvrg/data.rb
CHANGED
@@ -81,7 +81,6 @@ module Cnvrg
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
|
85
84
|
desc "data upload", "Upload files from local dataset directory to remote server"
|
86
85
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
87
86
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
@@ -155,6 +154,7 @@ module Cnvrg
|
|
155
154
|
method_option :flatten, :type => :boolean, :aliases => ["-f", "--flatten"], :default => false
|
156
155
|
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
157
156
|
method_option :threads, :type => :numeric, :aliases => ["--threads"], :default => 15
|
157
|
+
method_option :cache_link, :type => :boolean, :aliases => ["--cache_link"], :default => false, :hide => true
|
158
158
|
def clone(dataset_url)
|
159
159
|
cli = Cnvrg::CLI.new()
|
160
160
|
only_tree =options[:only_tree]
|
@@ -165,6 +165,7 @@ module Cnvrg
|
|
165
165
|
soft = options[:soft]
|
166
166
|
flatten = options[:flatten]
|
167
167
|
threads = options[:threads]
|
168
|
+
cache_link = options[:cache_link]
|
168
169
|
cli.clone_data(
|
169
170
|
dataset_url,
|
170
171
|
only_tree=only_tree,
|
@@ -175,7 +176,8 @@ module Cnvrg
|
|
175
176
|
flatten: flatten,
|
176
177
|
relative: options[:relative],
|
177
178
|
soft: soft,
|
178
|
-
threads: threads
|
179
|
+
threads: threads,
|
180
|
+
cache_link: cache_link
|
179
181
|
)
|
180
182
|
end
|
181
183
|
|
@@ -220,6 +222,8 @@ module Cnvrg
|
|
220
222
|
method_option :threads, :type => :numeric, :aliases => ["-t","--threads"], :default => 15
|
221
223
|
method_option :chunk_size, :type => :numeric, :aliases => ["-cs","--chunk"], :default => 1000
|
222
224
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
225
|
+
method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
|
226
|
+
method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
|
223
227
|
|
224
228
|
def put(dataset_url, *files)
|
225
229
|
cli = Cnvrg::CLI.new()
|
@@ -231,6 +235,8 @@ module Cnvrg
|
|
231
235
|
message = options[:message]
|
232
236
|
threads = options[:threads]
|
233
237
|
chunk_size = options[:chunk_size]
|
238
|
+
auto_cache = options[:auto_cache]
|
239
|
+
external_disk = options[:external_disk]
|
234
240
|
cli.data_put(
|
235
241
|
dataset_url,
|
236
242
|
files: files,
|
@@ -240,16 +246,28 @@ module Cnvrg
|
|
240
246
|
override: override,
|
241
247
|
threads: threads,
|
242
248
|
chunk_size: chunk_size,
|
243
|
-
message: message
|
249
|
+
message: message,
|
250
|
+
auto_cache: auto_cache,
|
251
|
+
external_disk: external_disk
|
244
252
|
)
|
245
253
|
end
|
246
254
|
|
247
255
|
desc 'data rm DATASET_URL FILES_PREFIX', 'Delete selected files from remote server'
|
248
256
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
257
|
+
method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
|
258
|
+
method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
|
249
259
|
def rm(dataset_url, *regex_list)
|
250
260
|
cli = Cnvrg::CLI.new()
|
251
261
|
message = options[:message]
|
252
|
-
|
262
|
+
auto_cache = options[:auto_cache]
|
263
|
+
external_disk = options[:external_disk]
|
264
|
+
cli.data_rm(
|
265
|
+
dataset_url,
|
266
|
+
regex_list: regex_list,
|
267
|
+
message: message,
|
268
|
+
auto_cache: auto_cache,
|
269
|
+
external_disk: external_disk
|
270
|
+
)
|
253
271
|
end
|
254
272
|
|
255
273
|
desc 'data clone_query --query=QUERY_SLUG DATASET_URL', 'Clone dataset with specific query'
|
data/lib/cnvrg/datafiles.rb
CHANGED
@@ -47,6 +47,7 @@ module Cnvrg
|
|
47
47
|
file = file[0..-2] if file.end_with? '/'
|
48
48
|
if File.exists? file
|
49
49
|
if File.directory? file
|
50
|
+
paths << file unless file == '.'
|
50
51
|
paths += Dir.glob("#{file}/**/*")
|
51
52
|
else
|
52
53
|
paths << file
|
@@ -344,18 +345,20 @@ module Cnvrg
|
|
344
345
|
cli = CLI.new
|
345
346
|
cli.log_message("Using #{threads} threads with chunk size of #{chunk_size}.", Thor::Shell::Color::GREEN)
|
346
347
|
|
347
|
-
|
348
|
+
num_files = files.size
|
349
|
+
progressbar = create_progressbar("Upload Progress", num_files)
|
348
350
|
cli = CLI.new
|
349
351
|
|
350
352
|
# Vars to handle the parallelism
|
351
353
|
progress_mutex = Mutex.new
|
352
354
|
file_queue = Queue.new
|
353
355
|
progress_queue = Queue.new
|
356
|
+
dirs_queue = Queue.new
|
354
357
|
worker_threads = []
|
355
358
|
progress_threads = []
|
359
|
+
old_api = false
|
356
360
|
|
357
361
|
# Vars to keep track of uploaded files and directories
|
358
|
-
dirs = []
|
359
362
|
uploaded_files = []
|
360
363
|
|
361
364
|
begin
|
@@ -378,6 +381,36 @@ module Cnvrg
|
|
378
381
|
end
|
379
382
|
end
|
380
383
|
|
384
|
+
dir_thread = Thread.new do
|
385
|
+
dirs_to_create = []
|
386
|
+
loop do
|
387
|
+
dir = dirs_queue.deq(non_block: true) rescue nil
|
388
|
+
if dir.nil? && !progressbar.finished?
|
389
|
+
sleep 0.2
|
390
|
+
Cnvrg::Logger.info("directories thread status: progressbar.finished? #{progressbar.finished?} || dirs_queue.empty? #{dirs_queue.empty?} #{dirs_queue.size} || dirs_to_create.empty? #{dirs_to_create.empty?} #{dirs_to_create.size}")
|
391
|
+
else
|
392
|
+
dirs_to_create << dir
|
393
|
+
|
394
|
+
if dirs_to_create.size >= 1000 || progressbar.finished?
|
395
|
+
resp = Cnvrg::API.request(@base_resource + "create_dirs", "POST", { dirs: dirs_to_create, commit_sha1: commit_sha1 })
|
396
|
+
Cnvrg::Logger.info("uploaded directories chunk, finished with #{resp}")
|
397
|
+
if resp == false # if resp is false it means 404 which is old server
|
398
|
+
old_api = true
|
399
|
+
break
|
400
|
+
end
|
401
|
+
unless Cnvrg::CLI.is_response_success(resp, false)
|
402
|
+
dirs_to_create = []
|
403
|
+
time = Time.current
|
404
|
+
Cnvrg::Logger.log_error_message("Failed to create dirs: #{time}, #{resp.try(:fetch, "message")}")
|
405
|
+
next
|
406
|
+
end
|
407
|
+
dirs_to_create = []
|
408
|
+
end
|
409
|
+
break if progressbar.finished? && dirs_queue.empty? && dirs_to_create.empty?
|
410
|
+
end
|
411
|
+
end
|
412
|
+
end
|
413
|
+
|
381
414
|
# init the thread that handles the file upload progress and saving them in the server
|
382
415
|
threads.times do |i|
|
383
416
|
progress_threads[i] = Thread.new do
|
@@ -385,52 +418,46 @@ module Cnvrg
|
|
385
418
|
file = progress_queue.deq(non_block: true) rescue nil # to prevent deadlocks
|
386
419
|
unless file.nil?
|
387
420
|
blob_ids = []
|
388
|
-
dirs_to_upload = []
|
389
|
-
|
390
421
|
progress_mutex.synchronize {
|
391
422
|
progressbar.progress += 1
|
392
423
|
uploaded_files.append(file) if file[:success]
|
393
424
|
|
394
425
|
if uploaded_files.size >= chunk_size or progressbar.finished?
|
395
426
|
blob_ids = uploaded_files.map {|f| f['bv_id']}
|
396
|
-
dirs_to_upload = dirs.clone
|
397
427
|
uploaded_files = []
|
398
|
-
dirs = []
|
399
428
|
end
|
400
429
|
}
|
401
430
|
|
402
431
|
if blob_ids.present?
|
432
|
+
random_id = (0...10).map { ('a'..'z').to_a[rand(26)] }.join
|
403
433
|
refresh_storage_token
|
404
|
-
Cnvrg::Logger.info("Finished
|
405
|
-
|
406
|
-
|
434
|
+
Cnvrg::Logger.info("chunk #{random_id}: Finished uploading chunk of #{chunk_size} files, Sending Upload files save")
|
407
435
|
retry_count = 0
|
408
436
|
loop do
|
409
|
-
upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids
|
437
|
+
upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids})
|
410
438
|
|
411
439
|
if not (Cnvrg::CLI.is_response_success(upload_resp, false))
|
412
440
|
retry_count += 1
|
413
|
-
Cnvrg::Logger.log_error_message("Failed request save files: #{Time.current}, retry: #{retry_count}")
|
414
|
-
Cnvrg::Logger.info("Got an error message from server, #{upload_resp.try(:fetch, "message")}")
|
441
|
+
Cnvrg::Logger.log_error_message("chunk #{random_id}: Failed request save files: #{Time.current}, retry: #{retry_count}")
|
415
442
|
if retry_count > 20
|
416
|
-
puts "Failed to save files: #{Time.current}, trying next chunk"
|
443
|
+
puts "chunk #{random_id}: Failed to save files: #{Time.current}, trying next chunk"
|
417
444
|
break
|
418
445
|
end
|
419
446
|
sleep 5
|
420
447
|
next
|
421
448
|
end
|
422
|
-
Cnvrg::Logger.info("Chunk saved on server")
|
449
|
+
Cnvrg::Logger.info("chunk #{random_id}: Chunk saved on server")
|
423
450
|
break
|
424
451
|
end
|
425
452
|
end
|
426
453
|
else
|
427
454
|
sleep(0.1)
|
428
455
|
end
|
429
|
-
|
456
|
+
Cnvrg::Logger.info("progress_threads: progressbar.finished? #{progressbar.finished?}")
|
430
457
|
if progressbar.finished?
|
431
458
|
Cnvrg::Logger.info("Progress bar finished closing queues")
|
432
|
-
file_queue.close
|
433
|
-
progress_queue.close
|
459
|
+
file_queue.close
|
460
|
+
progress_queue.close
|
434
461
|
Thread.exit
|
435
462
|
end
|
436
463
|
end
|
@@ -439,24 +466,43 @@ module Cnvrg
|
|
439
466
|
|
440
467
|
file_chunks = files.each_slice(chunk_size).to_a
|
441
468
|
# Fetch the required files from the server:
|
469
|
+
num_chunks = (num_files / 1000.0).ceil
|
470
|
+
chunk_index = 0
|
442
471
|
Parallel.map((file_chunks), in_threads: threads) do |chunk|
|
443
|
-
|
444
|
-
|
472
|
+
chunk_index += 1
|
473
|
+
self_chunk_index = chunk_index
|
474
|
+
files_chunk = chunk.map { |p| p.gsub(/^\.\//, '') }
|
475
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Generating chunk idx")
|
445
476
|
tree = @dataset.generate_chunked_idx(files_chunk, prefix: prefix, threads: threads, cli: cli)
|
446
|
-
Cnvrg::Logger.info("
|
477
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Generating chunk idx")
|
478
|
+
|
479
|
+
# Handle directories:
|
480
|
+
unless old_api
|
481
|
+
while dirs_queue.size > 5000
|
482
|
+
sleep(0.1)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
new_dirs = tree.keys.select { |k| tree[k].nil? }
|
486
|
+
if new_dirs.blank?
|
487
|
+
## we need to send 1 file so we will inflated dirs from in case when we dont have folders in the tree
|
488
|
+
file = tree.keys.find { |k| tree[k] != nil }
|
489
|
+
dirs_queue.push(file) unless old_api
|
490
|
+
end
|
491
|
+
new_dirs.each { |dir| dirs_queue.push dir }
|
492
|
+
|
493
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Getting files info from server")
|
447
494
|
results = request_upload_files(commit_sha1, tree, override, new_branch, partial_commit)
|
495
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Getting files info from server")
|
448
496
|
next unless results
|
449
497
|
|
450
498
|
if results['files'].blank?
|
499
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: no files to upload skipping chunk")
|
451
500
|
progress_mutex.synchronize { progressbar.progress += tree.keys.length }
|
452
501
|
next
|
453
502
|
end
|
454
503
|
|
455
|
-
# Handle directories:
|
456
|
-
new_dirs = tree.keys.select {|k| tree[k].nil?}
|
457
|
-
dirs += new_dirs
|
458
|
-
|
459
504
|
files_to_upload = results['files']
|
505
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: number of files to upload in this chunk: #{tree.keys.length - files_to_upload.length}")
|
460
506
|
progress_mutex.synchronize {
|
461
507
|
progressbar.progress += tree.keys.length - files_to_upload.length
|
462
508
|
}
|
@@ -468,8 +514,13 @@ module Cnvrg
|
|
468
514
|
file_queue.push tree[key].merge(files_to_upload[key])
|
469
515
|
end
|
470
516
|
end
|
471
|
-
|
517
|
+
|
518
|
+
Cnvrg::Logger.info("Waiting dir_thread to finish")
|
519
|
+
dir_thread.join
|
520
|
+
dirs_queue.close
|
521
|
+
Cnvrg::Logger.info("Waiting progress_thread to finish")
|
472
522
|
progress_threads.each(&:join)
|
523
|
+
Cnvrg::Logger.info("Waiting workers to finish")
|
473
524
|
worker_threads.each(&:join)
|
474
525
|
Thread.report_on_exception = true
|
475
526
|
rescue => e
|
@@ -1209,7 +1260,7 @@ module Cnvrg
|
|
1209
1260
|
false
|
1210
1261
|
end
|
1211
1262
|
|
1212
|
-
def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil)
|
1263
|
+
def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil, auto_cache: false, external_disk: nil)
|
1213
1264
|
counter = 0
|
1214
1265
|
begin
|
1215
1266
|
counter += 1
|
@@ -1221,7 +1272,9 @@ module Cnvrg
|
|
1221
1272
|
force:force,
|
1222
1273
|
success: success,
|
1223
1274
|
uploaded_files: uploaded_files,
|
1224
|
-
commit_type: commit_type
|
1275
|
+
commit_type: commit_type,
|
1276
|
+
auto_cache: auto_cache,
|
1277
|
+
external_disk: external_disk
|
1225
1278
|
}
|
1226
1279
|
)
|
1227
1280
|
is_success = Cnvrg::CLI.is_response_success(response, false)
|
@@ -1255,8 +1308,8 @@ module Cnvrg
|
|
1255
1308
|
response['result']['files']
|
1256
1309
|
end
|
1257
1310
|
|
1258
|
-
def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest')
|
1259
|
-
response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id})
|
1311
|
+
def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest', cache_link: false)
|
1312
|
+
response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id, cache_link: cache_link})
|
1260
1313
|
unless Cnvrg::CLI.is_response_success(response, false)
|
1261
1314
|
Cnvrg::Logger.log_info("#{{commit: commit, chunk_size: chunk_size, latest_id: latest_id}}")
|
1262
1315
|
return nil
|
@@ -1323,7 +1376,7 @@ module Cnvrg
|
|
1323
1376
|
end
|
1324
1377
|
end
|
1325
1378
|
|
1326
|
-
def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15)
|
1379
|
+
def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15, cache_link: false)
|
1327
1380
|
begin
|
1328
1381
|
refresh_storage_token
|
1329
1382
|
parallel_options = {
|
@@ -1344,10 +1397,18 @@ module Cnvrg
|
|
1344
1397
|
# blob
|
1345
1398
|
local_path = "#{local_path}.conflict" if conflict
|
1346
1399
|
storage_path = f["path"]
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1400
|
+
# if File.exists? local_path
|
1401
|
+
# Cnvrg::Logger.log_info("Trying to download #{local_path} but its already exists, skipping..")
|
1402
|
+
# next
|
1403
|
+
# end
|
1404
|
+
if cache_link
|
1405
|
+
cached_commits = f['cached_commits']
|
1406
|
+
|
1407
|
+
if cached_commits.present?
|
1408
|
+
next if @downloader.link_file(cached_commits, local_path, @dataset.title, f['name'])
|
1409
|
+
end
|
1410
|
+
end
|
1411
|
+
|
1351
1412
|
resp = @downloader.safe_download(storage_path, local_path)
|
1352
1413
|
Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
|
1353
1414
|
rescue => e
|
data/lib/cnvrg/dataset.rb
CHANGED
@@ -564,7 +564,8 @@ module Cnvrg
|
|
564
564
|
safe_path = file
|
565
565
|
safe_path = file[1..-1] if file.start_with? "/"
|
566
566
|
|
567
|
-
|
567
|
+
dataset_local_path = self.local_path + "/"
|
568
|
+
label = safe_path.start_with?(dataset_local_path) ? safe_path.sub(dataset_local_path, "") : safe_path
|
568
569
|
label = "#{prefix}/#{label}" if prefix.present?
|
569
570
|
if not Cnvrg::Files.valid_file_name?(label)
|
570
571
|
if cli
|
@@ -598,6 +599,7 @@ module Cnvrg
|
|
598
599
|
}
|
599
600
|
end
|
600
601
|
end
|
602
|
+
|
601
603
|
if prefix.present? #add the prefix as dirs to the files
|
602
604
|
#lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
|
603
605
|
dirs = prefix.split('/')
|
@@ -37,6 +37,21 @@ module Cnvrg
|
|
37
37
|
### need to be implemented..
|
38
38
|
end
|
39
39
|
|
40
|
+
def link_file(cached_commits, local_path, dataset_title, file_name)
|
41
|
+
prepare_download(local_path)
|
42
|
+
cached_commits.each do |cached_commit|
|
43
|
+
nfs_path = "/nfs-disk/#{cached_commit}/#{dataset_title}/#{file_name}"
|
44
|
+
if File.exist? nfs_path
|
45
|
+
FileUtils.ln(nfs_path, local_path)
|
46
|
+
return true
|
47
|
+
end
|
48
|
+
end
|
49
|
+
false
|
50
|
+
rescue => e
|
51
|
+
Cnvrg::Logger.log_error(e)
|
52
|
+
false
|
53
|
+
end
|
54
|
+
|
40
55
|
def safe_download(storage_path, local_path, decrypt: true)
|
41
56
|
safe_operation(local_path) { self.download(storage_path, local_path, decrypt: decrypt) }
|
42
57
|
end
|
data/lib/cnvrg/files.rb
CHANGED
data/lib/cnvrg/project.rb
CHANGED
@@ -448,8 +448,8 @@ module Cnvrg
|
|
448
448
|
next
|
449
449
|
end
|
450
450
|
if File.directory? e
|
451
|
-
|
452
|
-
tree_idx[
|
451
|
+
dir_name = (label.ends_with? "/") ? label : (label + "/")
|
452
|
+
tree_idx[dir_name] = nil
|
453
453
|
else
|
454
454
|
file_in_idx = old_idx[:tree][label] rescue nil
|
455
455
|
last_modified = File.mtime(e).to_f
|
@@ -513,6 +513,7 @@ module Cnvrg
|
|
513
513
|
#upload
|
514
514
|
local_idx = self.generate_idx(deploy: deploy, files: specific_files)
|
515
515
|
end
|
516
|
+
|
516
517
|
commit = local_idx[:commit]
|
517
518
|
tree = local_idx[:tree]
|
518
519
|
ignore_list = self.send_ignore_list()
|
@@ -521,12 +522,12 @@ module Cnvrg
|
|
521
522
|
if tree.present?
|
522
523
|
added += local_idx[:tree].keys
|
523
524
|
end
|
524
|
-
response = {"result" => {"commit" => nil, "tree" => {"added" => added,
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
525
|
+
response = { "result" => { "commit" => nil, "tree" => { "added" => added,
|
526
|
+
"updated_on_server" => [],
|
527
|
+
"updated_on_local" => [],
|
528
|
+
"update_local" => [],
|
529
|
+
"deleted" => [],
|
530
|
+
"conflicts" => [] } } }
|
530
531
|
return response
|
531
532
|
end
|
532
533
|
#we dont want to send it on download - we only compare between commits sha1 in download.
|
@@ -534,6 +535,7 @@ module Cnvrg
|
|
534
535
|
#the new server doesnt need the tree, but the old probably needs :X
|
535
536
|
local_idx[:tree] = {} if Cnvrg::Helpers.server_version > 0
|
536
537
|
end
|
538
|
+
|
537
539
|
response = Cnvrg::API.request(@base_resource + "status", 'POST', {idx: local_idx, new_branch: new_branch,
|
538
540
|
current_commit: commit, ignore: ignore_list, force: force, in_exp: in_exp, download: download})
|
539
541
|
|
data/lib/cnvrg/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Cnvrg
|
2
|
-
VERSION = '1.11.
|
3
|
-
end
|
2
|
+
VERSION = '1.11.27'
|
3
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2021-
|
13
|
+
date: 2021-03-08 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
@@ -453,7 +453,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
453
453
|
- !ruby/object:Gem::Version
|
454
454
|
version: '0'
|
455
455
|
requirements: []
|
456
|
-
rubygems_version: 3.
|
456
|
+
rubygems_version: 3.1.2
|
457
457
|
signing_key:
|
458
458
|
specification_version: 4
|
459
459
|
summary: A CLI tool for interacting with cnvrg.io.
|