cnvrg 1.11.21 → 1.11.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cnvrg/api.rb +4 -0
- data/lib/cnvrg/cli.rb +22 -12
- data/lib/cnvrg/data.rb +22 -4
- data/lib/cnvrg/datafiles.rb +95 -34
- data/lib/cnvrg/dataset.rb +3 -1
- data/lib/cnvrg/downloader/client.rb +15 -0
- data/lib/cnvrg/files.rb +0 -1
- data/lib/cnvrg/project.rb +10 -8
- data/lib/cnvrg/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a0ea5180b8920a78032ea60f9ccedea54357c3207720ff802b5c323aac6e4773
|
|
4
|
+
data.tar.gz: 54f0e631a050d0232921c0103aba65cd6c0ca48085161ee2137bf3ef975550ea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2805218f0460fe9bc1315f489d48fb37ea9795836f379eb5b6ab071c42a10ee76d16decb24c62c518356b35ca58489a9ba7637240c842459ad1ed6a94799633f
|
|
7
|
+
data.tar.gz: ed5bf8a1b5867c2ab6705dede44534074771387c31e8049c12f1cbf5f538c72e6cedbc910378db09ae6f2e1ca23524a54fd4109847c1aa02f4538a341ec4c68b
|
data/lib/cnvrg/api.rb
CHANGED
|
@@ -31,6 +31,10 @@ module Cnvrg
|
|
|
31
31
|
end
|
|
32
32
|
def self.request(resource, method = 'GET', data = {}, parse_request = true)
|
|
33
33
|
resource = URI::encode resource
|
|
34
|
+
|
|
35
|
+
# We need to remoe all double slashes from the url to work with the proxy
|
|
36
|
+
resource = resource.gsub(/[\/]{2,}/, "/").gsub("https:/", "https://").gsub("http:/", "http://")
|
|
37
|
+
|
|
34
38
|
begin
|
|
35
39
|
n = Netrc.read
|
|
36
40
|
rescue => e
|
data/lib/cnvrg/cli.rb
CHANGED
|
@@ -858,7 +858,7 @@ module Cnvrg
|
|
|
858
858
|
method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
|
|
859
859
|
method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
|
|
860
860
|
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
861
|
-
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15)
|
|
861
|
+
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false, threads: 15, cache_link: false)
|
|
862
862
|
begin
|
|
863
863
|
verify_logged_in(false)
|
|
864
864
|
log_start(__method__, args, options)
|
|
@@ -904,7 +904,7 @@ module Cnvrg
|
|
|
904
904
|
|
|
905
905
|
commit = response["result"]["commit"]
|
|
906
906
|
files_count = response["result"]["file_count"]
|
|
907
|
-
files = @files.get_clone_chunk(commit: commit)
|
|
907
|
+
files = @files.get_clone_chunk(commit: commit, cache_link: cache_link)
|
|
908
908
|
downloaded_files = 0
|
|
909
909
|
progressbar = ProgressBar.create(:title => "Download Progress",
|
|
910
910
|
:progress_mark => '=',
|
|
@@ -917,7 +917,7 @@ module Cnvrg
|
|
|
917
917
|
|
|
918
918
|
while files['keys'].length > 0
|
|
919
919
|
Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
|
|
920
|
-
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads)
|
|
920
|
+
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten, threads: threads, cache_link: cache_link)
|
|
921
921
|
|
|
922
922
|
downloaded_files += files['keys'].length
|
|
923
923
|
files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
|
|
@@ -1201,15 +1201,18 @@ module Cnvrg
|
|
|
1201
1201
|
end
|
|
1202
1202
|
|
|
1203
1203
|
desc '', '', :hide => true
|
|
1204
|
-
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil)
|
|
1204
|
+
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, override: false, threads: 15, message: nil, auto_cache: false, external_disk: nil)
|
|
1205
1205
|
begin
|
|
1206
1206
|
verify_logged_in(false)
|
|
1207
1207
|
log_start(__method__, args, options)
|
|
1208
|
-
|
|
1208
|
+
if auto_cache && external_disk.blank?
|
|
1209
|
+
raise SignalException.new(1, "for auto caching external disk is required")
|
|
1210
|
+
end
|
|
1209
1211
|
owner, slug = get_owner_slug(dataset_url)
|
|
1210
1212
|
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1211
1213
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1212
1214
|
@files = @datafiles.verify_files_exists(files)
|
|
1215
|
+
@files = @files.uniq { |t| t.gsub('./', '')}
|
|
1213
1216
|
|
|
1214
1217
|
if @files.blank?
|
|
1215
1218
|
raise SignalException.new(1, "Cant find files to upload, exiting.")
|
|
@@ -1227,7 +1230,7 @@ module Cnvrg
|
|
|
1227
1230
|
Cnvrg::Logger.info("Put files in latest commit")
|
|
1228
1231
|
response = @datafiles.last_valid_commit()
|
|
1229
1232
|
unless response #means we failed in the start commit.
|
|
1230
|
-
raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and
|
|
1233
|
+
raise SignalException.new(1, "Cant put files into commit:#{commit}, check the dataset id and commit")
|
|
1231
1234
|
end
|
|
1232
1235
|
@commit = response['result']['sha1']
|
|
1233
1236
|
else
|
|
@@ -1253,7 +1256,7 @@ module Cnvrg
|
|
|
1253
1256
|
raise SignalException.new(1, res.msg)
|
|
1254
1257
|
end
|
|
1255
1258
|
Cnvrg::Logger.info("Saving commit on server")
|
|
1256
|
-
res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put")
|
|
1259
|
+
res = @datafiles.end_commit(@commit,force, success: true, commit_type: "put", auto_cache: auto_cache, external_disk: external_disk)
|
|
1257
1260
|
msg = res['result']
|
|
1258
1261
|
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1259
1262
|
unless response.is_success?
|
|
@@ -1261,19 +1264,25 @@ module Cnvrg
|
|
|
1261
1264
|
end
|
|
1262
1265
|
|
|
1263
1266
|
log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1267
|
+
if msg['cache_error'].present?
|
|
1268
|
+
log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
|
|
1269
|
+
end
|
|
1264
1270
|
rescue SignalException => e
|
|
1265
1271
|
log_message(e.message, Thor::Shell::Color::RED)
|
|
1266
1272
|
return false
|
|
1267
1273
|
end
|
|
1268
1274
|
end
|
|
1269
1275
|
|
|
1270
|
-
|
|
1271
1276
|
desc '', '', :hide => true
|
|
1272
|
-
def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
|
|
1277
|
+
def data_rm(dataset_url, regex_list: [], commit: '', message: nil, auto_cache: false, external_disk: nil)
|
|
1273
1278
|
begin
|
|
1274
1279
|
verify_logged_in(false)
|
|
1275
1280
|
log_start(__method__, args, options)
|
|
1276
1281
|
|
|
1282
|
+
if auto_cache && external_disk.blank?
|
|
1283
|
+
raise SignalException.new(1, "for auto caching external disk is required")
|
|
1284
|
+
end
|
|
1285
|
+
|
|
1277
1286
|
owner, slug = get_owner_slug(dataset_url)
|
|
1278
1287
|
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1279
1288
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
@@ -1309,7 +1318,7 @@ module Cnvrg
|
|
|
1309
1318
|
offset += chunk_size
|
|
1310
1319
|
end
|
|
1311
1320
|
|
|
1312
|
-
res = @datafiles.end_commit(@commit,false, success: true)
|
|
1321
|
+
res = @datafiles.end_commit(@commit,false, success: true, auto_cache: auto_cache, external_disk: external_disk)
|
|
1313
1322
|
msg = res['result']
|
|
1314
1323
|
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1315
1324
|
unless response.is_success?
|
|
@@ -1317,6 +1326,9 @@ module Cnvrg
|
|
|
1317
1326
|
end
|
|
1318
1327
|
|
|
1319
1328
|
log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1329
|
+
if msg['cache_error'].present?
|
|
1330
|
+
log_message("Couldn't cache commit: #{msg['cache_error']}", Thor::Shell::Color::YELLOW)
|
|
1331
|
+
end
|
|
1320
1332
|
rescue SignalException => e
|
|
1321
1333
|
log_message(e.message, Thor::Shell::Color::RED)
|
|
1322
1334
|
return false
|
|
@@ -2308,7 +2320,6 @@ module Cnvrg
|
|
|
2308
2320
|
@project = Project.new(get_project_home)
|
|
2309
2321
|
chunk_size = chunk_size ? chunk_size : options["chunk_size"]
|
|
2310
2322
|
|
|
2311
|
-
|
|
2312
2323
|
# Enable local/experiment exception logging
|
|
2313
2324
|
suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
|
|
2314
2325
|
if in_exp
|
|
@@ -2346,7 +2357,6 @@ module Cnvrg
|
|
|
2346
2357
|
log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
|
|
2347
2358
|
return true
|
|
2348
2359
|
end
|
|
2349
|
-
force = true
|
|
2350
2360
|
end
|
|
2351
2361
|
|
|
2352
2362
|
if ignore.nil? or ignore.empty?
|
data/lib/cnvrg/data.rb
CHANGED
|
@@ -81,7 +81,6 @@ module Cnvrg
|
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
83
|
|
|
84
|
-
|
|
85
84
|
desc "data upload", "Upload files from local dataset directory to remote server"
|
|
86
85
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
|
87
86
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
|
@@ -155,6 +154,7 @@ module Cnvrg
|
|
|
155
154
|
method_option :flatten, :type => :boolean, :aliases => ["-f", "--flatten"], :default => false
|
|
156
155
|
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
157
156
|
method_option :threads, :type => :numeric, :aliases => ["--threads"], :default => 15
|
|
157
|
+
method_option :cache_link, :type => :boolean, :aliases => ["--cache_link"], :default => false, :hide => true
|
|
158
158
|
def clone(dataset_url)
|
|
159
159
|
cli = Cnvrg::CLI.new()
|
|
160
160
|
only_tree =options[:only_tree]
|
|
@@ -165,6 +165,7 @@ module Cnvrg
|
|
|
165
165
|
soft = options[:soft]
|
|
166
166
|
flatten = options[:flatten]
|
|
167
167
|
threads = options[:threads]
|
|
168
|
+
cache_link = options[:cache_link]
|
|
168
169
|
cli.clone_data(
|
|
169
170
|
dataset_url,
|
|
170
171
|
only_tree=only_tree,
|
|
@@ -175,7 +176,8 @@ module Cnvrg
|
|
|
175
176
|
flatten: flatten,
|
|
176
177
|
relative: options[:relative],
|
|
177
178
|
soft: soft,
|
|
178
|
-
threads: threads
|
|
179
|
+
threads: threads,
|
|
180
|
+
cache_link: cache_link
|
|
179
181
|
)
|
|
180
182
|
end
|
|
181
183
|
|
|
@@ -220,6 +222,8 @@ module Cnvrg
|
|
|
220
222
|
method_option :threads, :type => :numeric, :aliases => ["-t","--threads"], :default => 15
|
|
221
223
|
method_option :chunk_size, :type => :numeric, :aliases => ["-cs","--chunk"], :default => 1000
|
|
222
224
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
|
225
|
+
method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
|
|
226
|
+
method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
|
|
223
227
|
|
|
224
228
|
def put(dataset_url, *files)
|
|
225
229
|
cli = Cnvrg::CLI.new()
|
|
@@ -231,6 +235,8 @@ module Cnvrg
|
|
|
231
235
|
message = options[:message]
|
|
232
236
|
threads = options[:threads]
|
|
233
237
|
chunk_size = options[:chunk_size]
|
|
238
|
+
auto_cache = options[:auto_cache]
|
|
239
|
+
external_disk = options[:external_disk]
|
|
234
240
|
cli.data_put(
|
|
235
241
|
dataset_url,
|
|
236
242
|
files: files,
|
|
@@ -240,16 +246,28 @@ module Cnvrg
|
|
|
240
246
|
override: override,
|
|
241
247
|
threads: threads,
|
|
242
248
|
chunk_size: chunk_size,
|
|
243
|
-
message: message
|
|
249
|
+
message: message,
|
|
250
|
+
auto_cache: auto_cache,
|
|
251
|
+
external_disk: external_disk
|
|
244
252
|
)
|
|
245
253
|
end
|
|
246
254
|
|
|
247
255
|
desc 'data rm DATASET_URL FILES_PREFIX', 'Delete selected files from remote server'
|
|
248
256
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
|
257
|
+
method_option :auto_cache, :type => :boolean, :aliases => ["--auto_cache"], :desc => "auto_cache", :default => false
|
|
258
|
+
method_option :external_disk, :type => :string, :aliases => ["--external_disk"], :desc => "external_disk_title", :default => nil
|
|
249
259
|
def rm(dataset_url, *regex_list)
|
|
250
260
|
cli = Cnvrg::CLI.new()
|
|
251
261
|
message = options[:message]
|
|
252
|
-
|
|
262
|
+
auto_cache = options[:auto_cache]
|
|
263
|
+
external_disk = options[:external_disk]
|
|
264
|
+
cli.data_rm(
|
|
265
|
+
dataset_url,
|
|
266
|
+
regex_list: regex_list,
|
|
267
|
+
message: message,
|
|
268
|
+
auto_cache: auto_cache,
|
|
269
|
+
external_disk: external_disk
|
|
270
|
+
)
|
|
253
271
|
end
|
|
254
272
|
|
|
255
273
|
desc 'data clone_query --query=QUERY_SLUG DATASET_URL', 'Clone dataset with specific query'
|
data/lib/cnvrg/datafiles.rb
CHANGED
|
@@ -47,6 +47,7 @@ module Cnvrg
|
|
|
47
47
|
file = file[0..-2] if file.end_with? '/'
|
|
48
48
|
if File.exists? file
|
|
49
49
|
if File.directory? file
|
|
50
|
+
paths << file unless file == '.'
|
|
50
51
|
paths += Dir.glob("#{file}/**/*")
|
|
51
52
|
else
|
|
52
53
|
paths << file
|
|
@@ -344,18 +345,20 @@ module Cnvrg
|
|
|
344
345
|
cli = CLI.new
|
|
345
346
|
cli.log_message("Using #{threads} threads with chunk size of #{chunk_size}.", Thor::Shell::Color::GREEN)
|
|
346
347
|
|
|
347
|
-
|
|
348
|
+
num_files = files.size
|
|
349
|
+
progressbar = create_progressbar("Upload Progress", num_files)
|
|
348
350
|
cli = CLI.new
|
|
349
351
|
|
|
350
352
|
# Vars to handle the parallelism
|
|
351
353
|
progress_mutex = Mutex.new
|
|
352
354
|
file_queue = Queue.new
|
|
353
355
|
progress_queue = Queue.new
|
|
356
|
+
dirs_queue = Queue.new
|
|
354
357
|
worker_threads = []
|
|
355
358
|
progress_threads = []
|
|
359
|
+
old_api = false
|
|
356
360
|
|
|
357
361
|
# Vars to keep track of uploaded files and directories
|
|
358
|
-
dirs = []
|
|
359
362
|
uploaded_files = []
|
|
360
363
|
|
|
361
364
|
begin
|
|
@@ -378,6 +381,36 @@ module Cnvrg
|
|
|
378
381
|
end
|
|
379
382
|
end
|
|
380
383
|
|
|
384
|
+
dir_thread = Thread.new do
|
|
385
|
+
dirs_to_create = []
|
|
386
|
+
loop do
|
|
387
|
+
dir = dirs_queue.deq(non_block: true) rescue nil
|
|
388
|
+
if dir.nil? && !progressbar.finished?
|
|
389
|
+
sleep 0.2
|
|
390
|
+
Cnvrg::Logger.info("directories thread status: progressbar.finished? #{progressbar.finished?} || dirs_queue.empty? #{dirs_queue.empty?} #{dirs_queue.size} || dirs_to_create.empty? #{dirs_to_create.empty?} #{dirs_to_create.size}")
|
|
391
|
+
else
|
|
392
|
+
dirs_to_create << dir
|
|
393
|
+
|
|
394
|
+
if dirs_to_create.size >= 1000 || progressbar.finished?
|
|
395
|
+
resp = Cnvrg::API.request(@base_resource + "create_dirs", "POST", { dirs: dirs_to_create, commit_sha1: commit_sha1 })
|
|
396
|
+
Cnvrg::Logger.info("uploaded directories chunk, finished with #{resp}")
|
|
397
|
+
if resp == false # if resp is false it means 404 which is old server
|
|
398
|
+
old_api = true
|
|
399
|
+
break
|
|
400
|
+
end
|
|
401
|
+
unless Cnvrg::CLI.is_response_success(resp, false)
|
|
402
|
+
dirs_to_create = []
|
|
403
|
+
time = Time.current
|
|
404
|
+
Cnvrg::Logger.log_error_message("Failed to create dirs: #{time}, #{resp.try(:fetch, "message")}")
|
|
405
|
+
next
|
|
406
|
+
end
|
|
407
|
+
dirs_to_create = []
|
|
408
|
+
end
|
|
409
|
+
break if progressbar.finished? && dirs_queue.empty? && dirs_to_create.empty?
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
|
|
381
414
|
# init the thread that handles the file upload progress and saving them in the server
|
|
382
415
|
threads.times do |i|
|
|
383
416
|
progress_threads[i] = Thread.new do
|
|
@@ -385,52 +418,46 @@ module Cnvrg
|
|
|
385
418
|
file = progress_queue.deq(non_block: true) rescue nil # to prevent deadlocks
|
|
386
419
|
unless file.nil?
|
|
387
420
|
blob_ids = []
|
|
388
|
-
dirs_to_upload = []
|
|
389
|
-
|
|
390
421
|
progress_mutex.synchronize {
|
|
391
422
|
progressbar.progress += 1
|
|
392
423
|
uploaded_files.append(file) if file[:success]
|
|
393
424
|
|
|
394
425
|
if uploaded_files.size >= chunk_size or progressbar.finished?
|
|
395
426
|
blob_ids = uploaded_files.map {|f| f['bv_id']}
|
|
396
|
-
dirs_to_upload = dirs.clone
|
|
397
427
|
uploaded_files = []
|
|
398
|
-
dirs = []
|
|
399
428
|
end
|
|
400
429
|
}
|
|
401
430
|
|
|
402
431
|
if blob_ids.present?
|
|
432
|
+
random_id = (0...10).map { ('a'..'z').to_a[rand(26)] }.join
|
|
403
433
|
refresh_storage_token
|
|
404
|
-
Cnvrg::Logger.info("Finished
|
|
405
|
-
|
|
406
|
-
|
|
434
|
+
Cnvrg::Logger.info("chunk #{random_id}: Finished uploading chunk of #{chunk_size} files, Sending Upload files save")
|
|
407
435
|
retry_count = 0
|
|
408
436
|
loop do
|
|
409
|
-
upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids
|
|
437
|
+
upload_resp = Cnvrg::API.request(@base_resource + "upload_files_save", "POST", {commit: commit_sha1, blob_ids: blob_ids})
|
|
410
438
|
|
|
411
439
|
if not (Cnvrg::CLI.is_response_success(upload_resp, false))
|
|
412
440
|
retry_count += 1
|
|
413
|
-
Cnvrg::Logger.log_error_message("Failed request save files: #{Time.current}, retry: #{retry_count}")
|
|
414
|
-
Cnvrg::Logger.info("Got an error message from server, #{upload_resp.try(:fetch, "message")}")
|
|
441
|
+
Cnvrg::Logger.log_error_message("chunk #{random_id}: Failed request save files: #{Time.current}, retry: #{retry_count}")
|
|
415
442
|
if retry_count > 20
|
|
416
|
-
puts "Failed to save files: #{Time.current}, trying next chunk"
|
|
443
|
+
puts "chunk #{random_id}: Failed to save files: #{Time.current}, trying next chunk"
|
|
417
444
|
break
|
|
418
445
|
end
|
|
419
446
|
sleep 5
|
|
420
447
|
next
|
|
421
448
|
end
|
|
422
|
-
Cnvrg::Logger.info("Chunk saved on server")
|
|
449
|
+
Cnvrg::Logger.info("chunk #{random_id}: Chunk saved on server")
|
|
423
450
|
break
|
|
424
451
|
end
|
|
425
452
|
end
|
|
426
453
|
else
|
|
427
454
|
sleep(0.1)
|
|
428
455
|
end
|
|
429
|
-
|
|
456
|
+
Cnvrg::Logger.info("progress_threads: progressbar.finished? #{progressbar.finished?}")
|
|
430
457
|
if progressbar.finished?
|
|
431
458
|
Cnvrg::Logger.info("Progress bar finished closing queues")
|
|
432
|
-
file_queue.close
|
|
433
|
-
progress_queue.close
|
|
459
|
+
file_queue.close
|
|
460
|
+
progress_queue.close
|
|
434
461
|
Thread.exit
|
|
435
462
|
end
|
|
436
463
|
end
|
|
@@ -439,24 +466,43 @@ module Cnvrg
|
|
|
439
466
|
|
|
440
467
|
file_chunks = files.each_slice(chunk_size).to_a
|
|
441
468
|
# Fetch the required files from the server:
|
|
469
|
+
num_chunks = (num_files / 1000.0).ceil
|
|
470
|
+
chunk_index = 0
|
|
442
471
|
Parallel.map((file_chunks), in_threads: threads) do |chunk|
|
|
443
|
-
|
|
444
|
-
|
|
472
|
+
chunk_index += 1
|
|
473
|
+
self_chunk_index = chunk_index
|
|
474
|
+
files_chunk = chunk.map { |p| p.gsub(/^\.\//, '') }
|
|
475
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Generating chunk idx")
|
|
445
476
|
tree = @dataset.generate_chunked_idx(files_chunk, prefix: prefix, threads: threads, cli: cli)
|
|
446
|
-
Cnvrg::Logger.info("
|
|
477
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Generating chunk idx")
|
|
478
|
+
|
|
479
|
+
# Handle directories:
|
|
480
|
+
unless old_api
|
|
481
|
+
while dirs_queue.size > 5000
|
|
482
|
+
sleep(0.1)
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
new_dirs = tree.keys.select { |k| tree[k].nil? }
|
|
486
|
+
if new_dirs.blank?
|
|
487
|
+
## we need to send 1 file so we will inflated dirs from in case when we dont have folders in the tree
|
|
488
|
+
file = tree.keys.find { |k| tree[k] != nil }
|
|
489
|
+
dirs_queue.push(file) unless old_api
|
|
490
|
+
end
|
|
491
|
+
new_dirs.each { |dir| dirs_queue.push dir }
|
|
492
|
+
|
|
493
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Getting files info from server")
|
|
447
494
|
results = request_upload_files(commit_sha1, tree, override, new_branch, partial_commit)
|
|
495
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: Finished Getting files info from server")
|
|
448
496
|
next unless results
|
|
449
497
|
|
|
450
498
|
if results['files'].blank?
|
|
499
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: no files to upload skipping chunk")
|
|
451
500
|
progress_mutex.synchronize { progressbar.progress += tree.keys.length }
|
|
452
501
|
next
|
|
453
502
|
end
|
|
454
503
|
|
|
455
|
-
# Handle directories:
|
|
456
|
-
new_dirs = tree.keys.select {|k| tree[k].nil?}
|
|
457
|
-
dirs += new_dirs
|
|
458
|
-
|
|
459
504
|
files_to_upload = results['files']
|
|
505
|
+
Cnvrg::Logger.info("chunk #{self_chunk_index} / #{num_chunks}: number of files to upload in this chunk: #{tree.keys.length - files_to_upload.length}")
|
|
460
506
|
progress_mutex.synchronize {
|
|
461
507
|
progressbar.progress += tree.keys.length - files_to_upload.length
|
|
462
508
|
}
|
|
@@ -468,8 +514,13 @@ module Cnvrg
|
|
|
468
514
|
file_queue.push tree[key].merge(files_to_upload[key])
|
|
469
515
|
end
|
|
470
516
|
end
|
|
471
|
-
|
|
517
|
+
|
|
518
|
+
Cnvrg::Logger.info("Waiting dir_thread to finish")
|
|
519
|
+
dir_thread.join
|
|
520
|
+
dirs_queue.close
|
|
521
|
+
Cnvrg::Logger.info("Waiting progress_thread to finish")
|
|
472
522
|
progress_threads.each(&:join)
|
|
523
|
+
Cnvrg::Logger.info("Waiting workers to finish")
|
|
473
524
|
worker_threads.each(&:join)
|
|
474
525
|
Thread.report_on_exception = true
|
|
475
526
|
rescue => e
|
|
@@ -1209,7 +1260,7 @@ module Cnvrg
|
|
|
1209
1260
|
false
|
|
1210
1261
|
end
|
|
1211
1262
|
|
|
1212
|
-
def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil)
|
|
1263
|
+
def end_commit(commit_sha1, force, success: true, uploaded_files: 0, commit_type: nil, auto_cache: false, external_disk: nil)
|
|
1213
1264
|
counter = 0
|
|
1214
1265
|
begin
|
|
1215
1266
|
counter += 1
|
|
@@ -1221,7 +1272,9 @@ module Cnvrg
|
|
|
1221
1272
|
force:force,
|
|
1222
1273
|
success: success,
|
|
1223
1274
|
uploaded_files: uploaded_files,
|
|
1224
|
-
commit_type: commit_type
|
|
1275
|
+
commit_type: commit_type,
|
|
1276
|
+
auto_cache: auto_cache,
|
|
1277
|
+
external_disk: external_disk
|
|
1225
1278
|
}
|
|
1226
1279
|
)
|
|
1227
1280
|
is_success = Cnvrg::CLI.is_response_success(response, false)
|
|
@@ -1255,8 +1308,8 @@ module Cnvrg
|
|
|
1255
1308
|
response['result']['files']
|
|
1256
1309
|
end
|
|
1257
1310
|
|
|
1258
|
-
def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest')
|
|
1259
|
-
response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id})
|
|
1311
|
+
def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest', cache_link: false)
|
|
1312
|
+
response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id, cache_link: cache_link})
|
|
1260
1313
|
unless Cnvrg::CLI.is_response_success(response, false)
|
|
1261
1314
|
Cnvrg::Logger.log_info("#{{commit: commit, chunk_size: chunk_size, latest_id: latest_id}}")
|
|
1262
1315
|
return nil
|
|
@@ -1323,7 +1376,7 @@ module Cnvrg
|
|
|
1323
1376
|
end
|
|
1324
1377
|
end
|
|
1325
1378
|
|
|
1326
|
-
def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15)
|
|
1379
|
+
def download_multiple_files_s3(files, project_home, conflict: false, progressbar: nil, read_only:false, flatten: false, threads: 15, cache_link: false)
|
|
1327
1380
|
begin
|
|
1328
1381
|
refresh_storage_token
|
|
1329
1382
|
parallel_options = {
|
|
@@ -1344,10 +1397,18 @@ module Cnvrg
|
|
|
1344
1397
|
# blob
|
|
1345
1398
|
local_path = "#{local_path}.conflict" if conflict
|
|
1346
1399
|
storage_path = f["path"]
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1400
|
+
# if File.exists? local_path
|
|
1401
|
+
# Cnvrg::Logger.log_info("Trying to download #{local_path} but its already exists, skipping..")
|
|
1402
|
+
# next
|
|
1403
|
+
# end
|
|
1404
|
+
if cache_link
|
|
1405
|
+
cached_commits = f['cached_commits']
|
|
1406
|
+
|
|
1407
|
+
if cached_commits.present?
|
|
1408
|
+
next if @downloader.link_file(cached_commits, local_path, @dataset.title, f['name'])
|
|
1409
|
+
end
|
|
1410
|
+
end
|
|
1411
|
+
|
|
1351
1412
|
resp = @downloader.safe_download(storage_path, local_path)
|
|
1352
1413
|
Cnvrg::Logger.log_info("Download #{local_path} success resp: #{resp}")
|
|
1353
1414
|
rescue => e
|
data/lib/cnvrg/dataset.rb
CHANGED
|
@@ -564,7 +564,8 @@ module Cnvrg
|
|
|
564
564
|
safe_path = file
|
|
565
565
|
safe_path = file[1..-1] if file.start_with? "/"
|
|
566
566
|
|
|
567
|
-
|
|
567
|
+
dataset_local_path = self.local_path + "/"
|
|
568
|
+
label = safe_path.start_with?(dataset_local_path) ? safe_path.sub(dataset_local_path, "") : safe_path
|
|
568
569
|
label = "#{prefix}/#{label}" if prefix.present?
|
|
569
570
|
if not Cnvrg::Files.valid_file_name?(label)
|
|
570
571
|
if cli
|
|
@@ -598,6 +599,7 @@ module Cnvrg
|
|
|
598
599
|
}
|
|
599
600
|
end
|
|
600
601
|
end
|
|
602
|
+
|
|
601
603
|
if prefix.present? #add the prefix as dirs to the files
|
|
602
604
|
#lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
|
|
603
605
|
dirs = prefix.split('/')
|
|
@@ -37,6 +37,21 @@ module Cnvrg
|
|
|
37
37
|
### need to be implemented..
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
+
def link_file(cached_commits, local_path, dataset_title, file_name)
|
|
41
|
+
prepare_download(local_path)
|
|
42
|
+
cached_commits.each do |cached_commit|
|
|
43
|
+
nfs_path = "/nfs-disk/#{cached_commit}/#{dataset_title}/#{file_name}"
|
|
44
|
+
if File.exist? nfs_path
|
|
45
|
+
FileUtils.ln(nfs_path, local_path)
|
|
46
|
+
return true
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
false
|
|
50
|
+
rescue => e
|
|
51
|
+
Cnvrg::Logger.log_error(e)
|
|
52
|
+
false
|
|
53
|
+
end
|
|
54
|
+
|
|
40
55
|
def safe_download(storage_path, local_path, decrypt: true)
|
|
41
56
|
safe_operation(local_path) { self.download(storage_path, local_path, decrypt: decrypt) }
|
|
42
57
|
end
|
data/lib/cnvrg/files.rb
CHANGED
data/lib/cnvrg/project.rb
CHANGED
|
@@ -448,8 +448,8 @@ module Cnvrg
|
|
|
448
448
|
next
|
|
449
449
|
end
|
|
450
450
|
if File.directory? e
|
|
451
|
-
|
|
452
|
-
tree_idx[
|
|
451
|
+
dir_name = (label.ends_with? "/") ? label : (label + "/")
|
|
452
|
+
tree_idx[dir_name] = nil
|
|
453
453
|
else
|
|
454
454
|
file_in_idx = old_idx[:tree][label] rescue nil
|
|
455
455
|
last_modified = File.mtime(e).to_f
|
|
@@ -513,6 +513,7 @@ module Cnvrg
|
|
|
513
513
|
#upload
|
|
514
514
|
local_idx = self.generate_idx(deploy: deploy, files: specific_files)
|
|
515
515
|
end
|
|
516
|
+
|
|
516
517
|
commit = local_idx[:commit]
|
|
517
518
|
tree = local_idx[:tree]
|
|
518
519
|
ignore_list = self.send_ignore_list()
|
|
@@ -521,12 +522,12 @@ module Cnvrg
|
|
|
521
522
|
if tree.present?
|
|
522
523
|
added += local_idx[:tree].keys
|
|
523
524
|
end
|
|
524
|
-
response = {"result" => {"commit" => nil, "tree" => {"added" => added,
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
525
|
+
response = { "result" => { "commit" => nil, "tree" => { "added" => added,
|
|
526
|
+
"updated_on_server" => [],
|
|
527
|
+
"updated_on_local" => [],
|
|
528
|
+
"update_local" => [],
|
|
529
|
+
"deleted" => [],
|
|
530
|
+
"conflicts" => [] } } }
|
|
530
531
|
return response
|
|
531
532
|
end
|
|
532
533
|
#we dont want to send it on download - we only compare between commits sha1 in download.
|
|
@@ -534,6 +535,7 @@ module Cnvrg
|
|
|
534
535
|
#the new server doesnt need the tree, but the old probably needs :X
|
|
535
536
|
local_idx[:tree] = {} if Cnvrg::Helpers.server_version > 0
|
|
536
537
|
end
|
|
538
|
+
|
|
537
539
|
response = Cnvrg::API.request(@base_resource + "status", 'POST', {idx: local_idx, new_branch: new_branch,
|
|
538
540
|
current_commit: commit, ignore: ignore_list, force: force, in_exp: in_exp, download: download})
|
|
539
541
|
|
data/lib/cnvrg/version.rb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
module Cnvrg
|
|
2
|
-
VERSION = '1.11.
|
|
3
|
-
end
|
|
2
|
+
VERSION = '1.11.27'
|
|
3
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cnvrg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.11.
|
|
4
|
+
version: 1.11.27
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yochay Ettun
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2021-
|
|
13
|
+
date: 2021-03-08 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: bundler
|
|
@@ -453,7 +453,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
453
453
|
- !ruby/object:Gem::Version
|
|
454
454
|
version: '0'
|
|
455
455
|
requirements: []
|
|
456
|
-
rubygems_version: 3.
|
|
456
|
+
rubygems_version: 3.1.2
|
|
457
457
|
signing_key:
|
|
458
458
|
specification_version: 4
|
|
459
459
|
summary: A CLI tool for interacting with cnvrg.io.
|