cnvrg 1.9.9.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/cnvrg +9 -0
  3. data/cnvrg.gemspec +47 -0
  4. data/lib/cnvrg.rb +7 -0
  5. data/lib/cnvrg/Images.rb +351 -0
  6. data/lib/cnvrg/api.rb +247 -0
  7. data/lib/cnvrg/api_v2.rb +14 -0
  8. data/lib/cnvrg/auth.rb +79 -0
  9. data/lib/cnvrg/cli.rb +5715 -0
  10. data/lib/cnvrg/cli/flow.rb +166 -0
  11. data/lib/cnvrg/cli/library_cli.rb +33 -0
  12. data/lib/cnvrg/cli/subcommand.rb +28 -0
  13. data/lib/cnvrg/cli/task.rb +116 -0
  14. data/lib/cnvrg/colors.rb +8 -0
  15. data/lib/cnvrg/connect_job_ssh.rb +31 -0
  16. data/lib/cnvrg/data.rb +335 -0
  17. data/lib/cnvrg/datafiles.rb +1325 -0
  18. data/lib/cnvrg/dataset.rb +892 -0
  19. data/lib/cnvrg/downloader/client.rb +101 -0
  20. data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
  21. data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
  22. data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
  23. data/lib/cnvrg/experiment.rb +209 -0
  24. data/lib/cnvrg/files.rb +1047 -0
  25. data/lib/cnvrg/flow.rb +137 -0
  26. data/lib/cnvrg/helpers.rb +422 -0
  27. data/lib/cnvrg/helpers/agent.rb +188 -0
  28. data/lib/cnvrg/helpers/executer.rb +213 -0
  29. data/lib/cnvrg/hyper.rb +21 -0
  30. data/lib/cnvrg/image.rb +113 -0
  31. data/lib/cnvrg/image_cli.rb +25 -0
  32. data/lib/cnvrg/job_cli.rb +73 -0
  33. data/lib/cnvrg/job_ssh.rb +48 -0
  34. data/lib/cnvrg/logger.rb +111 -0
  35. data/lib/cnvrg/org_helpers.rb +5 -0
  36. data/lib/cnvrg/project.rb +822 -0
  37. data/lib/cnvrg/result.rb +29 -0
  38. data/lib/cnvrg/runner.rb +49 -0
  39. data/lib/cnvrg/ssh.rb +94 -0
  40. data/lib/cnvrg/storage.rb +128 -0
  41. data/lib/cnvrg/task.rb +165 -0
  42. data/lib/cnvrg/version.rb +3 -0
  43. metadata +460 -0
@@ -0,0 +1,892 @@
1
+ require 'fileutils'
2
+ module Cnvrg
3
+ class Dataset
4
+ attr_reader :slug, :owner, :title, :local_path, :working_dir
5
+
6
+ RemoteURL ||= "https://cnvrg.io"
7
+ IDXParallelThreads ||= Cnvrg::Helpers.parallel_threads
8
+ IDXParallelProcesses ||= Parallel.processor_count
9
+
10
+ def initialize(project_home = '', dataset_url: '', dataset_info: '')
11
+ begin
12
+ @info = {}
13
+ if project_home.present?
14
+ @local_path = project_home
15
+ @working_dir = project_home
16
+ config = YAML.load_file(project_home + "/.cnvrg/config.yml")
17
+ @title = config[:dataset_name]
18
+ @slug = config[:dataset_slug]
19
+ @owner = config[:owner]
20
+ elsif dataset_info.present?
21
+ @title = dataset_info[:slug]
22
+ @slug = dataset_info[:slug]
23
+ @owner = dataset_info[:owner]
24
+ @local_path = Dir.pwd
25
+ else
26
+ owner, slug = Cnvrg::Helpers.extract_owner_slug_from_url(dataset_url, 'datasets')
27
+ @title = slug
28
+ @slug = slug
29
+ @owner = owner
30
+ @local_path = Dir.pwd
31
+ end
32
+ rescue => e
33
+
34
+ end
35
+ end
36
+
37
+ def soft_linked?
38
+ @dataset_call["dataset_type"] == "soft_link_dataset"
39
+ end
40
+
41
+ def init_home(remote: false)
42
+ dataset_home = File.join(Dir.pwd, @slug)
43
+ if Dir.exists? dataset_home
44
+ if !remote
45
+ Cnvrg::CLI.log_message("Error: Conflict with dir #{@slug}", Thor::Shell::Color::RED)
46
+ if Thor::Shell::Basic.new.no? "Sync to repository anyway? (current data might lost)", Thor::Shell::Color::YELLOW
47
+ Cnvrg::CLI.log_message("Remove dir in order to clone #{@slug}", Thor::Shell::Color::RED)
48
+ exit(1)
49
+ end
50
+ end
51
+ FileUtils.rm_rf(dataset_home)
52
+ end
53
+
54
+ # if Dataset.clone(owner, dataset_name, slug, remote)
55
+ Dataset.clone(@owner, @slug, @slug, remote)
56
+ @local_path = dataset_home
57
+ Cnvrg::CLI.log_message('')
58
+ true
59
+ end
60
+
61
+ def get_dataset(commit: nil, query: nil)
62
+ if @dataset_call
63
+ return @dataset_call
64
+ end
65
+ response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/clone", 'POST',{ commit: commit, query:query})
66
+ Cnvrg::CLI.is_response_success(response,true)
67
+ @dataset_call = response["result"]
68
+ @dataset_call
69
+ end
70
+
71
+ def softlinked?
72
+ get_dataset["dataset_type"] == "soft_link_dataset"
73
+ end
74
+
75
+
76
+ def get_storage_client
77
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/client", 'GET')
78
+ if Cnvrg::CLI.is_response_success(response, false)
79
+ client_params = response['client']
80
+ else
81
+ client_params = get_storage_client_fallback
82
+ end
83
+ Cnvrg::Downloader::Client.factory(client_params)
84
+ end
85
+
86
+ def get_storage_client_fallback
87
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/download_multi", "POST", {files: []})
88
+ raise StandardError.new("Can't find dataset credentials") unless Cnvrg::CLI.is_response_success(response, false)
89
+ files = response['files']
90
+ storage = files['is_s3'] ? 's3' : 'minio'
91
+ files['storage'] = storage
92
+ files
93
+ end
94
+
95
+ def get_stats(commit: nil, query: nil)
96
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone", 'POST', {commit: commit, query: query})
97
+ Cnvrg::CLI.is_response_success(response, true)
98
+ response['result']
99
+ end
100
+
101
+ def get_clone_chunk(latest_id: nil, chunk_size: 1000, offset: 0, commit: 'latest')
102
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone_chunk", 'POST', {commit: commit, chunk_size: chunk_size, latest_id: latest_id, offset: offset})
103
+ return nil unless Cnvrg::CLI.is_response_success(response, false)
104
+ response['result']['files']['keys']
105
+ end
106
+
107
+ def backup_idx
108
+ Cnvrg::Logger.log_info("Backup idx")
109
+ if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
110
+ FileUtils.cp "#{self.local_path}/.cnvrg/idx.yml", "#{self.local_path}/.cnvrg/idx.yml.backup"
111
+ else
112
+ idx = {commit: nil, tree: {}}
113
+ File.open("#{self.local_path}/.cnvrg/idx.yml.backup", 'w') {|f| f.write idx.to_yaml}
114
+ end
115
+ end
116
+
117
+ def restore_idx
118
+ Cnvrg::Logger.log_info("Restore idx because an error.")
119
+ Cnvrg::Logger.log_method(bind: binding)
120
+ idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml.backup")
121
+ self.set_idx(idx)
122
+ end
123
+
124
+ def change_url(owner: '', slug: '', title: '')
125
+ config = {dataset_home: title, dataset_slug: slug, owner: owner}
126
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
127
+ end
128
+
129
+ def self.delete(dataset_slug, owner)
130
+ response = Cnvrg::API.request("users/#{owner}/datasets/#{dataset_slug}/delete", 'DELETE')
131
+ return response
132
+ end
133
+
134
+ def last_local_commit
135
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
136
+ return nil
137
+ end
138
+ idx = YAML.load_file(@local_path + "/.cnvrg/idx.yml")
139
+ return idx[:commit]
140
+ end
141
+
142
+ def snapshot
143
+ commit = last_local_commit
144
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST', {data_commit: commit})
145
+ CLI.is_response_success(response)
146
+ return response
147
+ end
148
+
149
+ def list(owner)
150
+ response = Cnvrg::API.request("users/#{owner}/datasets/list", 'GET')
151
+ CLI.is_response_success(response)
152
+ return response
153
+ end
154
+
155
+ def search_queries
156
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/queries/list", 'GET')
157
+ CLI.is_response_success(response)
158
+ row = [["name", "id", "created_at", "username"]]
159
+ response["results"]["queries"].each do |query|
160
+ row << [query["name"], query["slug"], query["created_at"].in_time_zone.to_s, query["username"]]
161
+ end
162
+ return row
163
+ end
164
+
165
+ def get_query_file(query_slug)
166
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/search/#{query_slug}", 'GET')
167
+ CLI.is_response_success(response)
168
+ row = [["Name", "Full path", "URL"]]
169
+ response["results"]["query_files"].each do |file|
170
+ row << [file["name"], file["fullpath"], file["s3_url"]]
171
+ end
172
+ return row
173
+ end
174
+
175
+ def download_tags_yaml
176
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/tags_yml", 'GET')
177
+ CLI.is_response_success(response)
178
+ begin
179
+ path = self.working_dir
180
+ File.open("#{path}/#{response["results"]["filename"]}", "w+") {|f| f.write response["results"]["file_content"]}
181
+ return true
182
+ rescue
183
+ return false
184
+ end
185
+ end
186
+
187
+ def list_commits(commit_sha1: nil)
188
+ response = Cnvrg::API.request(
189
+ "users/#{self.owner}/datasets/#{self.slug}/list_commits?commit=#{commit_sha1}",
190
+ 'GET'
191
+ )
192
+ CLI.is_response_success(response)
193
+ return response
194
+ end
195
+
196
+ def upload_tags_via_yml(tag_file = nil)
197
+ records_yml = YAML.load_file(tag_file)
198
+ tag_file.close
199
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/data_tags_create", 'POST', {records_yml: records_yml})
200
+ if response["status"] == 200
201
+ return true
202
+ else
203
+ return false
204
+ end
205
+ end
206
+
207
+ def url
208
+ url = Cnvrg::Helpers.remote_url
209
+ "#{url}/#{self.owner}/projects/#{self.slug}"
210
+ end
211
+
212
+ def self.verify_cnvrgignore_exist(dataset_name, remote)
213
+ path = ".cnvrgignore"
214
+ if !File.exist? path
215
+ path = "#{dataset_name}/.cnvrgignore"
216
+ end
217
+ ignore_exits = File.exist? path
218
+ if !ignore_exits
219
+ begin
220
+ list_files = [
221
+ path
222
+ ]
223
+ FileUtils.touch list_files
224
+ cnvrgignore = Helpers.cnvrgignore_content
225
+ File.open(path, "w+") {|f| f.write cnvrgignore}
226
+ rescue => e
227
+ return false
228
+ end
229
+
230
+ end
231
+ end
232
+
233
+ def update_ignore_list(new_ignore)
234
+
235
+ if new_ignore.nil? or new_ignore.empty?
236
+ return true
237
+ end
238
+ begin
239
+ File.open(self.local_path + "/.cnvrgignore", "a+") do |f|
240
+ f.puts("\n")
241
+
242
+ new_ignore.each do |i|
243
+ f.puts("#{i}\n")
244
+ end
245
+ end
246
+ return true
247
+ rescue
248
+ return false
249
+ end
250
+ end
251
+
252
+ def get_ignore_list
253
+ ### handle case when after clone .cnvrgignore doesnt exists
254
+ if not File.exists?(self.local_path + "/.cnvrgignore")
255
+ self.generate_cnvrg_ignore
256
+ end
257
+
258
+ ignore_list = []
259
+ if not File.exists? "#{self.local_path}/.cnvrgignore"
260
+ return ignore_list
261
+ end
262
+ File.open(self.local_path + "/.cnvrgignore", "r").each_line do |line|
263
+ line = line.strip
264
+ if line.start_with? "#" or ignore_list.include? line or line.empty?
265
+ next
266
+ end
267
+ if line.end_with? "/" or File.directory?(line)
268
+ ignore_list << line
269
+ all_sub = Dir.glob("#{line}/**/*", File::FNM_DOTMATCH).flatten
270
+
271
+ ignore_list << all_sub.flatten
272
+ elsif line.include? "*"
273
+ regex_list = Dir.glob("**/*#{line}", File::FNM_DOTMATCH).flatten
274
+ ignore_list << regex_list
275
+ else
276
+ ignore_list << line
277
+ end
278
+ end
279
+ return ignore_list.flatten
280
+ end
281
+
282
+
283
+ def self.init(owner, dataset_name, is_public = false, bucket: nil)
284
+ list_dirs = [".cnvrg"
285
+ ]
286
+ list_files = [
287
+ ".cnvrg/config.yml"
288
+ ]
289
+ create_ignore = false
290
+ if !File.exist? ".cnvrgignore"
291
+ list_files << ".cnvrgignore"
292
+ create_ignore = true
293
+ end
294
+
295
+ cnvrgignore = Helpers.cnvrgignore_content
296
+ begin
297
+ response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public, bucket: bucket})
298
+ Cnvrg::CLI.is_response_success(response)
299
+ response = JSON.parse response["result"]
300
+ dataset_slug = response["slug"]
301
+
302
+ config = {dataset_name: dataset_name,
303
+ dataset_slug: dataset_slug,
304
+ owner: owner}
305
+
306
+ FileUtils.mkdir_p list_dirs
307
+ FileUtils.touch list_files
308
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
309
+ File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
310
+ rescue => e
311
+ return false
312
+ end
313
+ return true
314
+ end
315
+
316
+ def self.link_dataset(owner: nil, slug: nil)
317
+ begin
318
+ return false if owner.blank? or slug.blank?
319
+
320
+ response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}", 'GET')
321
+ success = Cnvrg::CLI.is_response_success(response, false)
322
+ return unless success
323
+ result = response["result"]
324
+
325
+ sha1 = result["init_commit_sha1"]
326
+
327
+ # We need to write init IDX that contain init commit sha1 so the user will be able to doing actions on the dataset
328
+ # so it only relevant for new server
329
+ raise Exception.new("This feature is not available for your cnvrg version. Please contact support for more information") if sha1.blank? ## means this is old version of server
330
+
331
+ config = {dataset_name: result["title"],
332
+ dataset_slug: result["slug"],
333
+ owner: owner}
334
+
335
+ list_dirs = [".cnvrg"]
336
+ list_files = [".cnvrg/config.yml"]
337
+
338
+
339
+ FileUtils.mkdir_p list_dirs
340
+ FileUtils.touch list_files
341
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
342
+
343
+ dataset = Dataset.new(Dir.pwd)
344
+ dataset.write_idx({}, sha1)
345
+ true
346
+ rescue => e
347
+ raise Exception.new(e)
348
+ end
349
+ end
350
+
351
+ def self.blank_clone(owner, dataset_name, dataset_slug)
352
+ list_dirs = ["#{dataset_slug}/.cnvrg"
353
+ ]
354
+ list_files = [
355
+ "#{dataset_slug}/.cnvrg/config.yml"
356
+ ]
357
+ create_ignore = false
358
+ if !File.exist? ".cnvrgignore"
359
+ list_files << "#{dataset_slug}/.cnvrgignore"
360
+ create_ignore = true
361
+ end
362
+
363
+
364
+ cnvrgignore = Helpers.cnvrgignore_content
365
+ begin
366
+
367
+ config = {dataset_name: dataset_name,
368
+ dataset_slug: dataset_slug,
369
+ owner: owner}
370
+
371
+ FileUtils.mkdir_p list_dirs
372
+ FileUtils.touch list_files
373
+ File.open("#{dataset_slug}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
374
+ File.open("#{dataset_slug}/.cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
375
+ rescue => e
376
+ return false
377
+ end
378
+ return true
379
+ end
380
+
381
+ def generate_cnvrg_ignore
382
+ cnvrgignore = Helpers.cnvrgignore_content
383
+ File.open(self.local_path + "/.cnvrgignore", "w+") {|f| f.write cnvrgignore}
384
+ end
385
+
386
+ def self.verify_dataset(dataset_slug)
387
+ config = YAML.load_file("/data/#{dataset_title}/.cnvrg/config.yml") rescue {}
388
+ config[:success] == true
389
+ end
390
+
391
+ def self.verify_datasets(dataset_titles, timeout = nil)
392
+ start_time = Time.now.to_i
393
+ Cnvrg::Logger.log_info("Verifying datasets #{dataset_titles}")
394
+ Cnvrg::Logger.log_info("Timeout is #{timeout}")
395
+ while true
396
+ begin
397
+ current_time = Time.now.to_i
398
+ return false if (timeout.present? and timeout < current_time - start_time)
399
+ all_are_ready = dataset_titles.all? do |dataset_title|
400
+ config = YAML.load_file("#{dataset_title}/.cnvrg/config.yml")
401
+ config[:success] == true
402
+ end
403
+ return true if all_are_ready
404
+ Cnvrg::Logger.log_info("Sleeping..")
405
+ sleep 10
406
+ rescue => e
407
+ Cnvrg::Logger.log_info("Got error")
408
+ Cnvrg::Logger.log_error(e)
409
+ sleep 10
410
+ end
411
+ end
412
+ end
413
+
414
+ def self.scan_datasets()
415
+ Cnvrg::Logger.log_info("Looking up datasets")
416
+ datasets = Dir.entries(Dir.pwd).map do |entry|
417
+ if File.directory? File.join(Dir.pwd,entry) and !(entry =='.' || entry == '..')
418
+ begin
419
+ config = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/config.yml") rescue nil
420
+ local_commit = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/idx.yml")[:commit] rescue nil
421
+ if config.present? and config[:success] == true and config[:dataset_name].present? and config[:dataset_slug].present? and local_commit.present?
422
+ {
423
+ "dataset_slug": config[:dataset_slug],
424
+ "dataset_name": config[:dataset_name],
425
+ "local_commit": local_commit,
426
+ }
427
+ else
428
+ nil
429
+ end
430
+ rescue
431
+ nil
432
+ end
433
+ end
434
+ end.compact.uniq
435
+ datasets
436
+ end
437
+
438
+ def clone(commit)
439
+ return
440
+ end
441
+
442
+ def self.clone(owner, dataset_name, dataset_slug, remote = false)
443
+ begin
444
+ list_dirs = []
445
+ if !remote
446
+ list_dirs << dataset_name
447
+ end
448
+ list_dirs << "#{dataset_name}/.cnvrg"
449
+ list_files = [
450
+ "#{dataset_name}/.cnvrg/config.yml",
451
+ ]
452
+
453
+ config = {dataset_name: dataset_name,
454
+ dataset_slug: dataset_slug,
455
+ owner: owner}
456
+
457
+
458
+ FileUtils.mkdir_p list_dirs
459
+ FileUtils.touch list_files
460
+ File.open("#{dataset_name}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
461
+ rescue => e
462
+ puts "Exception in clone request:#{e.message}"
463
+ return false
464
+ end
465
+ return true
466
+ end
467
+
468
+ def list_files(commit_sha1: "latest", limit: 1000, offset: 0, expires: 3600)
469
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list", 'GET', {commit_sha1: commit_sha1, limit: limit, offset: offset, expires: expires})
470
+ return nil if response.blank?
471
+ response.to_json
472
+ end
473
+
474
+ def self.clone_tree(commit: 'latest', dataset_home: nil)
475
+ @dataset = Cnvrg::Dataset.new(dataset_home)
476
+ @files = Cnvrg::Datafiles.new(@dataset.owner, @dataset.slug, dataset: @dataset)
477
+ trees = @files.get_trees(commit: commit)
478
+ return false if trees.nil?
479
+ pb = ProgressBar.create(:title => "Download Progress",
480
+ :progress_mark => '=',
481
+ :format => "%b>>%i| %p%% %t",
482
+ :starting_at => 0,
483
+ :total => trees.size,
484
+ :autofinish => true)
485
+ trees.each do |tree|
486
+ pb.progress += 1
487
+ @files.download_dir(dataset_home, tree)
488
+ end
489
+ pb.finish
490
+ @dataset.write_success
491
+ true
492
+ end
493
+
494
+ def write_success(in_folder = false)
495
+ file_path = ".cnvrg/config.yml"
496
+ file_path = File.join(@local_path || @working_dir, file_path)
497
+ if File.exist?(file_path)
498
+ File.open(file_path, "a") {|f| f.puts(":success: true")}
499
+ end
500
+ end
501
+
502
+ def self.init_container(owner, dataset_slug, dataset_name)
503
+
504
+ cnvrgignore = Helpers.cnvrgignore_content
505
+ begin
506
+ list_dirs = [".cnvrg"
507
+ ]
508
+ list_files = [
509
+ ".cnvrgignore",
510
+ ".cnvrg/config.yml"
511
+ ]
512
+ FileUtils.mkdir_p list_dirs
513
+ FileUtils.touch list_files
514
+
515
+ config = {dataset_name: dataset_name,
516
+ dataset_slug: dataset_slug,
517
+ owner: owner}
518
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
519
+
520
+ File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless File.exist? ".cnvrgignore"
521
+ rescue => e
522
+ return false
523
+ end
524
+ return true
525
+ end
526
+
527
+
528
+ def get_idx
529
+ if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
530
+ return YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
531
+ end
532
+ {commit: nil, tree: {}}
533
+ end
534
+
535
+ def set_idx(idx)
536
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w+') {|f| f.write idx.to_yaml}
537
+ end
538
+
539
+ def url
540
+ url = Cnvrg::Helpers.remote_url
541
+ "#{url}/#{self.owner}/datasets/#{self.slug}"
542
+ end
543
+
544
+ def generate_chunked_idx(list_files = [], threads: 15, prefix: '')
545
+ tree = {}
546
+ Parallel.map(list_files, in_threads: threads) do |file|
547
+ #check if prefix exists do prefix/path otherwise path
548
+ label = file.gsub(self.local_path + "/", "")
549
+ label = "#{prefix}/#{label}" if prefix.present?
550
+ if not Cnvrg::Files.valid_file_name?(label)
551
+ raise StandardError.new("#{label} is not a valid file name.")
552
+ end
553
+ if File.directory? file
554
+ tree[label + "/"] = nil
555
+ else
556
+ sha1 = OpenSSL::Digest::SHA1.file(file).hexdigest
557
+ file_name = File.basename file
558
+ file_size = File.size(file).to_f
559
+ mime_type = MimeMagic.by_path(file)
560
+ content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
561
+ relative_path = file.gsub(/^#{@local_path + "/"}/, "")
562
+ relative_path = "#{prefix}/#{relative_path}" if prefix.present?
563
+ tree[label] = {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path: file, relative_path: relative_path}
564
+ end
565
+ end
566
+ if prefix.present? #add the prefix as dirs to the files
567
+ #lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
568
+ dirs = prefix.split('/')
569
+ curr_path = []
570
+ dirs.each do |dir|
571
+ curr_path << dir
572
+ list_files << curr_path.join('/')
573
+ end
574
+ end
575
+ return tree
576
+ end
577
+
578
+ def revert_to_last_commit(commit: nil)
579
+ if commit.blank?
580
+ resp = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/last_valid_commit", 'GET')
581
+ if CLI.is_response_success(resp, false)
582
+ commit = resp['result']['commit_sha1']
583
+ end
584
+ end
585
+ self.update_idx_with_commit(commit) if commit.present?
586
+ self.revert_next_commit
587
+ end
588
+
589
+ def list_all_files(with_ignore = false)
590
+ list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject {|x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~ /^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~ /^#{self.local_path}\/\.cnvrgignore/)}
591
+
592
+ #we want that big files will
593
+ list = list.sort_by {|fn| File.size(fn)}
594
+ return list if with_ignore
595
+ list_ignore = self.get_ignore_list.map {|ignore_file| "#{self.local_path}/#{ignore_file}"}
596
+ (list - list_ignore)
597
+ end
598
+
599
+ def write_idx(tree = nil, commit = nil)
600
+ if tree.nil?
601
+ tree = self.generate_idx[:tree]
602
+ tree = tree.map {|k, v| (v.present?) ? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k, v]}.to_h
603
+ end
604
+ idx = {tree: tree, commit: commit}
605
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
606
+ end
607
+
608
+ def write_tree(tree)
609
+ idx = self.get_idx
610
+ idx[:tree] = tree
611
+ self.set_idx(idx)
612
+ end
613
+
614
+ def generate_idx(show_progress = false)
615
+ if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
616
+ old_idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
617
+ else
618
+ old_idx = nil
619
+ end
620
+ tree_idx = Hash.new(0)
621
+ list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject {|x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~ /^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~ /^#{self.local_path}\/\.cnvrgignore/)}
622
+ list_ignore = self.get_ignore_list()
623
+ if show_progress
624
+ parallel_options = {
625
+ :progress => {
626
+ :title => "Checking Dataset",
627
+ :progress_mark => '=',
628
+ :format => "%b>>%i| %p%% %t",
629
+ :starting_at => 0,
630
+ :total => (list).size,
631
+ :autofinish => true
632
+ },
633
+ in_threads: IDXParallelThreads,
634
+ isolation: true
635
+ }
636
+ else
637
+ parallel_options = {
638
+ in_threads: IDXParallelThreads,
639
+ isolation: true
640
+ }
641
+ end
642
+
643
+ Parallel.map(list, parallel_options) do |e|
644
+ label = e.gsub(self.local_path + "/", "")
645
+ if File.directory? e
646
+ if list_ignore.include? label
647
+ next
648
+ end
649
+ tree_idx[label + "/"] = nil
650
+ else
651
+ if list_ignore.include? label
652
+ next
653
+ end
654
+ sha1 = OpenSSL::Digest::SHA1.file(e).hexdigest
655
+ if old_idx.nil? or old_idx.to_h["tree"].nil?
656
+ tree_idx[label] = {sha1: sha1, commit_time: nil}
657
+ elsif old_idx["tree"][label].nil? or old_idx["tree"][label]["sha1"] != sha1
658
+ tree_idx[label] = {sha1: sha1, commit_time: nil}
659
+ else
660
+ tree_idx[label] = old_idx["tree"][label]
661
+ end
662
+ end
663
+ end
664
+ if !old_idx.nil? and !old_idx[:next_commit].nil? and !old_idx[:next_commit].empty?
665
+ idx = {commit: old_idx.to_h[:commit], tree: tree_idx, next_commit: old_idx[:next_commit]}
666
+ else
667
+ idx = {commit: old_idx.to_h[:commit], tree: tree_idx}
668
+ end
669
+ idx_yaml = idx.to_yaml
670
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_yaml}
671
+ return idx
672
+ end
673
+
674
+ def create_volume
675
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST')
676
+ CLI.is_response_success(response)
677
+ return response
678
+ end
679
+
680
+ def download_updated_data(current_commit)
681
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_updated_data", 'POST', {current_commit: current_commit})
682
+ CLI.is_response_success(response, false)
683
+ return response
684
+ end
685
+
686
+ def compare_idx(new_branch, commit = last_local_commit, local_idx = nil, force = false, next_commit = nil)
687
+ if local_idx.nil?
688
+ local_idx = self.generate_idx
689
+ end
690
+ ignore_list = self.get_ignore_list()
691
+ if force
692
+ added = []
693
+ if local_idx[:tree]
694
+ added << local_idx[:tree].keys
695
+ added.flatten!
696
+ end
697
+
698
+ response = {"result" => {"commit" => next_commit, "tree" => {"added" => added,
699
+ "updated_on_server" => [],
700
+ "updated_on_local" => [],
701
+ "deleted" => [],
702
+ "conflicts" => []}}}
703
+ return response
704
+
705
+ end
706
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit, ignore: ignore_list, next_commit: next_commit})
707
+ CLI.is_response_success(response, false)
708
+ return response
709
+ end
710
+
711
+ def compare_idx_download(all_files: false, desired_commit: nil)
712
+ current_commit = self.last_local_commit
713
+ next_commit = self.get_next_commit
714
+ ignore_list = self.send_ignore_list()
715
+ return Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_status", 'POST', {current_commit: current_commit, next_commit: next_commit, ignore: ignore_list, all_files: all_files, desired_commit: desired_commit.presence})
716
+ end
717
+
718
+ def set_partial_commit(commit_sha1)
719
+ idx = self.get_idx
720
+ idx[:partial_commit] = commit_sha1
721
+ self.set_idx(idx)
722
+ end
723
+
724
+ def get_partial_commit
725
+ idx = self.get_idx
726
+ idx.try(:fetch, :partial_commit)
727
+ end
728
+
729
+ def current_status(new_branch)
730
+ commit = last_local_commit
731
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status_current", 'POST', {current_commit: commit, new_branch: new_branch})
732
+ CLI.is_response_success(response, true)
733
+ return response
734
+ end
735
+
736
+ def send_ignore_list()
737
+ begin
738
+ ignore_list = []
739
+ File.open(self.local_path + "/.cnvrgignore", "r").each_line do |line|
740
+ line = line.strip
741
+ if line.start_with? "#" or ignore_list.include? line or line.empty?
742
+ next
743
+ end
744
+ if line.end_with? "/"
745
+ ignore_list << line.gsub("/", "")
746
+ ignore_list << line + "."
747
+ elsif line.include? "*"
748
+ line = line.gsub("*", ".*")
749
+ ignore_list << line
750
+ else
751
+ ignore_list << line
752
+ end
753
+ end
754
+ return ignore_list.flatten
755
+ rescue
756
+ return []
757
+ end
758
+ end
759
+
760
+
761
+ def compare_commits(commit)
762
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/compare_commits", 'POST', {compare_commit: commit, current_commit: last_local_commit})
763
+ CLI.is_response_success(response, false)
764
+ return response
765
+ end
766
+
767
+ def set_next_commit(commit_sha1)
768
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
769
+ idx_hash = Hash.new()
770
+ idx_hash[:commit] = ""
771
+ idx_hash[:tree] = ""
772
+ else
773
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
774
+ end
775
+ idx_hash[:next_commit] = commit_sha1
776
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
777
+ return true
778
+
779
+ end
780
+
781
+ def get_next_commit()
782
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
783
+ return nil
784
+ end
785
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
786
+ return idx_hash[:next_commit]
787
+ end
788
+
789
+ def remove_next_commit()
790
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
791
+ return nil
792
+ end
793
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
794
+ idx = Hash.new()
795
+ idx[:commit] = idx_hash[:next_commit]
796
+ idx[:tree] = idx_hash[:tree]
797
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
798
+ end
799
+
800
+ def revert_next_commit()
801
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
802
+ return nil
803
+ end
804
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
805
+ idx_hash = idx_hash.except(:next_commit)
806
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
807
+ end
808
+
809
+ def compare_commit(commit)
810
+ if commit.nil? or commit.empty?
811
+ commit = last_local_commit
812
+ end
813
+ response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/commit/compare", 'POST', {current_commit: commit})
814
+ CLI.is_response_success(response, false)
815
+ update_is_new_branch(response["result"]["new_branch"])
816
+ return response["result"]["new_branch"]
817
+ end
818
+
819
+ def update_idx_with_files_commits!(files, commit_time)
820
+ # files.flatten!
821
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
822
+ # idx_hash[:commit] = commit
823
+
824
+ files.each do |path|
825
+ idx_hash[:tree].to_h[path].to_h[:commit_time] = commit_time
826
+ end
827
+ idx_hash[:next_commit] = idx_hash[:next_commit]
828
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
829
+
830
+ return true
831
+ end
832
+
833
+ def update_idx(idx)
834
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
835
+ return true
836
+ end
837
+
838
+ def update_idx_with_commit!(commit)
839
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
840
+ idx_hash[:commit] = commit
841
+
842
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
843
+ return true
844
+ end
845
+
846
+ def revert(working_dir)
847
+ FileUtils.rm_rf working_dir
848
+ # response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/revert", 'GET')
849
+ # CLI.is_response_success(response)
850
+ end
851
+
852
+ def self.validate_config
853
+ ## check that the .cnvrg folder exists:
854
+ dot_cnvrg_exists = Dir[".cnvrg"].present?
855
+ return {validation: Data::ConfigValidation::FAILED, message: ".cnvrg folder does not exists"} if not dot_cnvrg_exists
856
+
857
+ ## check that the config.yml exists:
858
+ config_file_exists = Dir[".cnvrg/*"].include? ".cnvrg/config.yml"
859
+ return {validation: Data::ConfigValidation::FAILED, message: "config.yml exists"} if not config_file_exists
860
+
861
+ ## check that the config.yml file not empty:
862
+ config = YAML.load_file("#{Dir.getwd}/.cnvrg/config.yml")
863
+ return {validation: Data::ConfigValidation::FAILED, message: "config.yml is empty"} if not config
864
+
865
+ ## check that config.yml is valid:
866
+ title = config[:dataset_name]
867
+ slug = config[:dataset_slug]
868
+ owner = config[:owner]
869
+ return {validation: Data::ConfigValidation::FAILED, message: "config.yml is not valid or some keys are missing"} if title.blank? or slug.blank? or owner.blank?
870
+
871
+ ## everything OK:
872
+ return {validation: Data::ConfigValidation::SUCCESS, message: "Directory is already linked to #{slug}"}
873
+ end
874
+
875
+ def self.stop_if_dataset_present(dataset_home, dataset_name, commit: nil)
876
+
877
+ cli = Cnvrg::CLI.new()
878
+ config = YAML.load_file(dataset_home + "/.cnvrg/config.yml")
879
+ if commit.present?
880
+ local_commit = YAML.load_file(dataset_home + "/.cnvrg/idx.yml")[:commit] rescue nil
881
+ return if commit != local_commit or local_commit.blank?
882
+ end
883
+ if config[:dataset_name] == dataset_name
884
+ cli.log_message("Dataset already present, clone aborted")
885
+ exit(0)
886
+ end
887
+ rescue => e
888
+ nil
889
+ end
890
+
891
+ end
892
+ end