cnvrg 1.9.9.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/cnvrg +9 -0
  3. data/cnvrg.gemspec +47 -0
  4. data/lib/cnvrg.rb +7 -0
  5. data/lib/cnvrg/Images.rb +351 -0
  6. data/lib/cnvrg/api.rb +247 -0
  7. data/lib/cnvrg/api_v2.rb +14 -0
  8. data/lib/cnvrg/auth.rb +79 -0
  9. data/lib/cnvrg/cli.rb +5715 -0
  10. data/lib/cnvrg/cli/flow.rb +166 -0
  11. data/lib/cnvrg/cli/library_cli.rb +33 -0
  12. data/lib/cnvrg/cli/subcommand.rb +28 -0
  13. data/lib/cnvrg/cli/task.rb +116 -0
  14. data/lib/cnvrg/colors.rb +8 -0
  15. data/lib/cnvrg/connect_job_ssh.rb +31 -0
  16. data/lib/cnvrg/data.rb +335 -0
  17. data/lib/cnvrg/datafiles.rb +1325 -0
  18. data/lib/cnvrg/dataset.rb +892 -0
  19. data/lib/cnvrg/downloader/client.rb +101 -0
  20. data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
  21. data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
  22. data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
  23. data/lib/cnvrg/experiment.rb +209 -0
  24. data/lib/cnvrg/files.rb +1047 -0
  25. data/lib/cnvrg/flow.rb +137 -0
  26. data/lib/cnvrg/helpers.rb +422 -0
  27. data/lib/cnvrg/helpers/agent.rb +188 -0
  28. data/lib/cnvrg/helpers/executer.rb +213 -0
  29. data/lib/cnvrg/hyper.rb +21 -0
  30. data/lib/cnvrg/image.rb +113 -0
  31. data/lib/cnvrg/image_cli.rb +25 -0
  32. data/lib/cnvrg/job_cli.rb +73 -0
  33. data/lib/cnvrg/job_ssh.rb +48 -0
  34. data/lib/cnvrg/logger.rb +111 -0
  35. data/lib/cnvrg/org_helpers.rb +5 -0
  36. data/lib/cnvrg/project.rb +822 -0
  37. data/lib/cnvrg/result.rb +29 -0
  38. data/lib/cnvrg/runner.rb +49 -0
  39. data/lib/cnvrg/ssh.rb +94 -0
  40. data/lib/cnvrg/storage.rb +128 -0
  41. data/lib/cnvrg/task.rb +165 -0
  42. data/lib/cnvrg/version.rb +3 -0
  43. metadata +460 -0
@@ -0,0 +1,892 @@
1
+ require 'fileutils'
2
+ module Cnvrg
3
+ class Dataset
4
+ attr_reader :slug, :owner, :title, :local_path, :working_dir
5
+
6
+ RemoteURL ||= "https://cnvrg.io"
7
+ IDXParallelThreads ||= Cnvrg::Helpers.parallel_threads
8
+ IDXParallelProcesses ||= Parallel.processor_count
9
+
10
+ def initialize(project_home = '', dataset_url: '', dataset_info: '')
11
+ begin
12
+ @info = {}
13
+ if project_home.present?
14
+ @local_path = project_home
15
+ @working_dir = project_home
16
+ config = YAML.load_file(project_home + "/.cnvrg/config.yml")
17
+ @title = config[:dataset_name]
18
+ @slug = config[:dataset_slug]
19
+ @owner = config[:owner]
20
+ elsif dataset_info.present?
21
+ @title = dataset_info[:slug]
22
+ @slug = dataset_info[:slug]
23
+ @owner = dataset_info[:owner]
24
+ @local_path = Dir.pwd
25
+ else
26
+ owner, slug = Cnvrg::Helpers.extract_owner_slug_from_url(dataset_url, 'datasets')
27
+ @title = slug
28
+ @slug = slug
29
+ @owner = owner
30
+ @local_path = Dir.pwd
31
+ end
32
+ rescue => e
33
+
34
+ end
35
+ end
36
+
37
+ def soft_linked?
38
+ @dataset_call["dataset_type"] == "soft_link_dataset"
39
+ end
40
+
41
+ def init_home(remote: false)
42
+ dataset_home = File.join(Dir.pwd, @slug)
43
+ if Dir.exists? dataset_home
44
+ if !remote
45
+ Cnvrg::CLI.log_message("Error: Conflict with dir #{@slug}", Thor::Shell::Color::RED)
46
+ if Thor::Shell::Basic.new.no? "Sync to repository anyway? (current data might lost)", Thor::Shell::Color::YELLOW
47
+ Cnvrg::CLI.log_message("Remove dir in order to clone #{@slug}", Thor::Shell::Color::RED)
48
+ exit(1)
49
+ end
50
+ end
51
+ FileUtils.rm_rf(dataset_home)
52
+ end
53
+
54
+ # if Dataset.clone(owner, dataset_name, slug, remote)
55
+ Dataset.clone(@owner, @slug, @slug, remote)
56
+ @local_path = dataset_home
57
+ Cnvrg::CLI.log_message('')
58
+ true
59
+ end
60
+
61
+ def get_dataset(commit: nil, query: nil)
62
+ if @dataset_call
63
+ return @dataset_call
64
+ end
65
+ response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/clone", 'POST',{ commit: commit, query:query})
66
+ Cnvrg::CLI.is_response_success(response,true)
67
+ @dataset_call = response["result"]
68
+ @dataset_call
69
+ end
70
+
71
+ def softlinked?
72
+ get_dataset["dataset_type"] == "soft_link_dataset"
73
+ end
74
+
75
+
76
+ def get_storage_client
77
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/client", 'GET')
78
+ if Cnvrg::CLI.is_response_success(response, false)
79
+ client_params = response['client']
80
+ else
81
+ client_params = get_storage_client_fallback
82
+ end
83
+ Cnvrg::Downloader::Client.factory(client_params)
84
+ end
85
+
86
+ def get_storage_client_fallback
87
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/download_multi", "POST", {files: []})
88
+ raise StandardError.new("Can't find dataset credentials") unless Cnvrg::CLI.is_response_success(response, false)
89
+ files = response['files']
90
+ storage = files['is_s3'] ? 's3' : 'minio'
91
+ files['storage'] = storage
92
+ files
93
+ end
94
+
95
+ def get_stats(commit: nil, query: nil)
96
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone", 'POST', {commit: commit, query: query})
97
+ Cnvrg::CLI.is_response_success(response, true)
98
+ response['result']
99
+ end
100
+
101
+ def get_clone_chunk(latest_id: nil, chunk_size: 1000, offset: 0, commit: 'latest')
102
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone_chunk", 'POST', {commit: commit, chunk_size: chunk_size, latest_id: latest_id, offset: offset})
103
+ return nil unless Cnvrg::CLI.is_response_success(response, false)
104
+ response['result']['files']['keys']
105
+ end
106
+
107
+ def backup_idx
108
+ Cnvrg::Logger.log_info("Backup idx")
109
+ if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
110
+ FileUtils.cp "#{self.local_path}/.cnvrg/idx.yml", "#{self.local_path}/.cnvrg/idx.yml.backup"
111
+ else
112
+ idx = {commit: nil, tree: {}}
113
+ File.open("#{self.local_path}/.cnvrg/idx.yml.backup", 'w') {|f| f.write idx.to_yaml}
114
+ end
115
+ end
116
+
117
+ def restore_idx
118
+ Cnvrg::Logger.log_info("Restore idx because an error.")
119
+ Cnvrg::Logger.log_method(bind: binding)
120
+ idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml.backup")
121
+ self.set_idx(idx)
122
+ end
123
+
124
+ def change_url(owner: '', slug: '', title: '')
125
+ config = {dataset_home: title, dataset_slug: slug, owner: owner}
126
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
127
+ end
128
+
129
+ def self.delete(dataset_slug, owner)
130
+ response = Cnvrg::API.request("users/#{owner}/datasets/#{dataset_slug}/delete", 'DELETE')
131
+ return response
132
+ end
133
+
134
+ def last_local_commit
135
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
136
+ return nil
137
+ end
138
+ idx = YAML.load_file(@local_path + "/.cnvrg/idx.yml")
139
+ return idx[:commit]
140
+ end
141
+
142
+ def snapshot
143
+ commit = last_local_commit
144
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST', {data_commit: commit})
145
+ CLI.is_response_success(response)
146
+ return response
147
+ end
148
+
149
+ def list(owner)
150
+ response = Cnvrg::API.request("users/#{owner}/datasets/list", 'GET')
151
+ CLI.is_response_success(response)
152
+ return response
153
+ end
154
+
155
+ def search_queries
156
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/queries/list", 'GET')
157
+ CLI.is_response_success(response)
158
+ row = [["name", "id", "created_at", "username"]]
159
+ response["results"]["queries"].each do |query|
160
+ row << [query["name"], query["slug"], query["created_at"].in_time_zone.to_s, query["username"]]
161
+ end
162
+ return row
163
+ end
164
+
165
+ def get_query_file(query_slug)
166
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/search/#{query_slug}", 'GET')
167
+ CLI.is_response_success(response)
168
+ row = [["Name", "Full path", "URL"]]
169
+ response["results"]["query_files"].each do |file|
170
+ row << [file["name"], file["fullpath"], file["s3_url"]]
171
+ end
172
+ return row
173
+ end
174
+
175
+ def download_tags_yaml
176
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/tags_yml", 'GET')
177
+ CLI.is_response_success(response)
178
+ begin
179
+ path = self.working_dir
180
+ File.open("#{path}/#{response["results"]["filename"]}", "w+") {|f| f.write response["results"]["file_content"]}
181
+ return true
182
+ rescue
183
+ return false
184
+ end
185
+ end
186
+
187
+ def list_commits(commit_sha1: nil)
188
+ response = Cnvrg::API.request(
189
+ "users/#{self.owner}/datasets/#{self.slug}/list_commits?commit=#{commit_sha1}",
190
+ 'GET'
191
+ )
192
+ CLI.is_response_success(response)
193
+ return response
194
+ end
195
+
196
+ def upload_tags_via_yml(tag_file = nil)
197
+ records_yml = YAML.load_file(tag_file)
198
+ tag_file.close
199
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/data_tags_create", 'POST', {records_yml: records_yml})
200
+ if response["status"] == 200
201
+ return true
202
+ else
203
+ return false
204
+ end
205
+ end
206
+
207
+ def url
208
+ url = Cnvrg::Helpers.remote_url
209
+ "#{url}/#{self.owner}/projects/#{self.slug}"
210
+ end
211
+
212
+ def self.verify_cnvrgignore_exist(dataset_name, remote)
213
+ path = ".cnvrgignore"
214
+ if !File.exist? path
215
+ path = "#{dataset_name}/.cnvrgignore"
216
+ end
217
+ ignore_exits = File.exist? path
218
+ if !ignore_exits
219
+ begin
220
+ list_files = [
221
+ path
222
+ ]
223
+ FileUtils.touch list_files
224
+ cnvrgignore = Helpers.cnvrgignore_content
225
+ File.open(path, "w+") {|f| f.write cnvrgignore}
226
+ rescue => e
227
+ return false
228
+ end
229
+
230
+ end
231
+ end
232
+
233
+ def update_ignore_list(new_ignore)
234
+
235
+ if new_ignore.nil? or new_ignore.empty?
236
+ return true
237
+ end
238
+ begin
239
+ File.open(self.local_path + "/.cnvrgignore", "a+") do |f|
240
+ f.puts("\n")
241
+
242
+ new_ignore.each do |i|
243
+ f.puts("#{i}\n")
244
+ end
245
+ end
246
+ return true
247
+ rescue
248
+ return false
249
+ end
250
+ end
251
+
252
+ def get_ignore_list
253
+ ### handle case when after clone .cnvrgignore doesnt exists
254
+ if not File.exists?(self.local_path + "/.cnvrgignore")
255
+ self.generate_cnvrg_ignore
256
+ end
257
+
258
+ ignore_list = []
259
+ if not File.exists? "#{self.local_path}/.cnvrgignore"
260
+ return ignore_list
261
+ end
262
+ File.open(self.local_path + "/.cnvrgignore", "r").each_line do |line|
263
+ line = line.strip
264
+ if line.start_with? "#" or ignore_list.include? line or line.empty?
265
+ next
266
+ end
267
+ if line.end_with? "/" or File.directory?(line)
268
+ ignore_list << line
269
+ all_sub = Dir.glob("#{line}/**/*", File::FNM_DOTMATCH).flatten
270
+
271
+ ignore_list << all_sub.flatten
272
+ elsif line.include? "*"
273
+ regex_list = Dir.glob("**/*#{line}", File::FNM_DOTMATCH).flatten
274
+ ignore_list << regex_list
275
+ else
276
+ ignore_list << line
277
+ end
278
+ end
279
+ return ignore_list.flatten
280
+ end
281
+
282
+
283
+ def self.init(owner, dataset_name, is_public = false, bucket: nil)
284
+ list_dirs = [".cnvrg"
285
+ ]
286
+ list_files = [
287
+ ".cnvrg/config.yml"
288
+ ]
289
+ create_ignore = false
290
+ if !File.exist? ".cnvrgignore"
291
+ list_files << ".cnvrgignore"
292
+ create_ignore = true
293
+ end
294
+
295
+ cnvrgignore = Helpers.cnvrgignore_content
296
+ begin
297
+ response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public, bucket: bucket})
298
+ Cnvrg::CLI.is_response_success(response)
299
+ response = JSON.parse response["result"]
300
+ dataset_slug = response["slug"]
301
+
302
+ config = {dataset_name: dataset_name,
303
+ dataset_slug: dataset_slug,
304
+ owner: owner}
305
+
306
+ FileUtils.mkdir_p list_dirs
307
+ FileUtils.touch list_files
308
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
309
+ File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
310
+ rescue => e
311
+ return false
312
+ end
313
+ return true
314
+ end
315
+
316
+ def self.link_dataset(owner: nil, slug: nil)
317
+ begin
318
+ return false if owner.blank? or slug.blank?
319
+
320
+ response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}", 'GET')
321
+ success = Cnvrg::CLI.is_response_success(response, false)
322
+ return unless success
323
+ result = response["result"]
324
+
325
+ sha1 = result["init_commit_sha1"]
326
+
327
+ # We need to write init IDX that contain init commit sha1 so the user will be able to doing actions on the dataset
328
+ # so it only relevant for new server
329
+ raise Exception.new("This feature is not available for your cnvrg version. Please contact support for more information") if sha1.blank? ## means this is old version of server
330
+
331
+ config = {dataset_name: result["title"],
332
+ dataset_slug: result["slug"],
333
+ owner: owner}
334
+
335
+ list_dirs = [".cnvrg"]
336
+ list_files = [".cnvrg/config.yml"]
337
+
338
+
339
+ FileUtils.mkdir_p list_dirs
340
+ FileUtils.touch list_files
341
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
342
+
343
+ dataset = Dataset.new(Dir.pwd)
344
+ dataset.write_idx({}, sha1)
345
+ true
346
+ rescue => e
347
+ raise Exception.new(e)
348
+ end
349
+ end
350
+
351
+ def self.blank_clone(owner, dataset_name, dataset_slug)
352
+ list_dirs = ["#{dataset_slug}/.cnvrg"
353
+ ]
354
+ list_files = [
355
+ "#{dataset_slug}/.cnvrg/config.yml"
356
+ ]
357
+ create_ignore = false
358
+ if !File.exist? ".cnvrgignore"
359
+ list_files << "#{dataset_slug}/.cnvrgignore"
360
+ create_ignore = true
361
+ end
362
+
363
+
364
+ cnvrgignore = Helpers.cnvrgignore_content
365
+ begin
366
+
367
+ config = {dataset_name: dataset_name,
368
+ dataset_slug: dataset_slug,
369
+ owner: owner}
370
+
371
+ FileUtils.mkdir_p list_dirs
372
+ FileUtils.touch list_files
373
+ File.open("#{dataset_slug}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
374
+ File.open("#{dataset_slug}/.cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
375
+ rescue => e
376
+ return false
377
+ end
378
+ return true
379
+ end
380
+
381
+ def generate_cnvrg_ignore
382
+ cnvrgignore = Helpers.cnvrgignore_content
383
+ File.open(self.local_path + "/.cnvrgignore", "w+") {|f| f.write cnvrgignore}
384
+ end
385
+
386
+ def self.verify_dataset(dataset_slug)
387
+ config = YAML.load_file("/data/#{dataset_title}/.cnvrg/config.yml") rescue {}
388
+ config[:success] == true
389
+ end
390
+
391
+ def self.verify_datasets(dataset_titles, timeout = nil)
392
+ start_time = Time.now.to_i
393
+ Cnvrg::Logger.log_info("Verifying datasets #{dataset_titles}")
394
+ Cnvrg::Logger.log_info("Timeout is #{timeout}")
395
+ while true
396
+ begin
397
+ current_time = Time.now.to_i
398
+ return false if (timeout.present? and timeout < current_time - start_time)
399
+ all_are_ready = dataset_titles.all? do |dataset_title|
400
+ config = YAML.load_file("#{dataset_title}/.cnvrg/config.yml")
401
+ config[:success] == true
402
+ end
403
+ return true if all_are_ready
404
+ Cnvrg::Logger.log_info("Sleeping..")
405
+ sleep 10
406
+ rescue => e
407
+ Cnvrg::Logger.log_info("Got error")
408
+ Cnvrg::Logger.log_error(e)
409
+ sleep 10
410
+ end
411
+ end
412
+ end
413
+
414
+ def self.scan_datasets()
415
+ Cnvrg::Logger.log_info("Looking up datasets")
416
+ datasets = Dir.entries(Dir.pwd).map do |entry|
417
+ if File.directory? File.join(Dir.pwd,entry) and !(entry =='.' || entry == '..')
418
+ begin
419
+ config = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/config.yml") rescue nil
420
+ local_commit = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/idx.yml")[:commit] rescue nil
421
+ if config.present? and config[:success] == true and config[:dataset_name].present? and config[:dataset_slug].present? and local_commit.present?
422
+ {
423
+ "dataset_slug": config[:dataset_slug],
424
+ "dataset_name": config[:dataset_name],
425
+ "local_commit": local_commit,
426
+ }
427
+ else
428
+ nil
429
+ end
430
+ rescue
431
+ nil
432
+ end
433
+ end
434
+ end.compact.uniq
435
+ datasets
436
+ end
437
+
438
+ def clone(commit)
439
+ return
440
+ end
441
+
442
+ def self.clone(owner, dataset_name, dataset_slug, remote = false)
443
+ begin
444
+ list_dirs = []
445
+ if !remote
446
+ list_dirs << dataset_name
447
+ end
448
+ list_dirs << "#{dataset_name}/.cnvrg"
449
+ list_files = [
450
+ "#{dataset_name}/.cnvrg/config.yml",
451
+ ]
452
+
453
+ config = {dataset_name: dataset_name,
454
+ dataset_slug: dataset_slug,
455
+ owner: owner}
456
+
457
+
458
+ FileUtils.mkdir_p list_dirs
459
+ FileUtils.touch list_files
460
+ File.open("#{dataset_name}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
461
+ rescue => e
462
+ puts "Exception in clone request:#{e.message}"
463
+ return false
464
+ end
465
+ return true
466
+ end
467
+
468
+ def list_files(commit_sha1: "latest", limit: 1000, offset: 0, expires: 3600)
469
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list", 'GET', {commit_sha1: commit_sha1, limit: limit, offset: offset, expires: expires})
470
+ return nil if response.blank?
471
+ response.to_json
472
+ end
473
+
474
+ def self.clone_tree(commit: 'latest', dataset_home: nil)
475
+ @dataset = Cnvrg::Dataset.new(dataset_home)
476
+ @files = Cnvrg::Datafiles.new(@dataset.owner, @dataset.slug, dataset: @dataset)
477
+ trees = @files.get_trees(commit: commit)
478
+ return false if trees.nil?
479
+ pb = ProgressBar.create(:title => "Download Progress",
480
+ :progress_mark => '=',
481
+ :format => "%b>>%i| %p%% %t",
482
+ :starting_at => 0,
483
+ :total => trees.size,
484
+ :autofinish => true)
485
+ trees.each do |tree|
486
+ pb.progress += 1
487
+ @files.download_dir(dataset_home, tree)
488
+ end
489
+ pb.finish
490
+ @dataset.write_success
491
+ true
492
+ end
493
+
494
+ def write_success(in_folder = false)
495
+ file_path = ".cnvrg/config.yml"
496
+ file_path = File.join(@local_path || @working_dir, file_path)
497
+ if File.exist?(file_path)
498
+ File.open(file_path, "a") {|f| f.puts(":success: true")}
499
+ end
500
+ end
501
+
502
+ def self.init_container(owner, dataset_slug, dataset_name)
503
+
504
+ cnvrgignore = Helpers.cnvrgignore_content
505
+ begin
506
+ list_dirs = [".cnvrg"
507
+ ]
508
+ list_files = [
509
+ ".cnvrgignore",
510
+ ".cnvrg/config.yml"
511
+ ]
512
+ FileUtils.mkdir_p list_dirs
513
+ FileUtils.touch list_files
514
+
515
+ config = {dataset_name: dataset_name,
516
+ dataset_slug: dataset_slug,
517
+ owner: owner}
518
+ File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
519
+
520
+ File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless File.exist? ".cnvrgignore"
521
+ rescue => e
522
+ return false
523
+ end
524
+ return true
525
+ end
526
+
527
+
528
+ def get_idx
529
+ if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
530
+ return YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
531
+ end
532
+ {commit: nil, tree: {}}
533
+ end
534
+
535
+ def set_idx(idx)
536
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w+') {|f| f.write idx.to_yaml}
537
+ end
538
+
539
+ def url
540
+ url = Cnvrg::Helpers.remote_url
541
+ "#{url}/#{self.owner}/datasets/#{self.slug}"
542
+ end
543
+
544
+ def generate_chunked_idx(list_files = [], threads: 15, prefix: '')
545
+ tree = {}
546
+ Parallel.map(list_files, in_threads: threads) do |file|
547
+ #check if prefix exists do prefix/path otherwise path
548
+ label = file.gsub(self.local_path + "/", "")
549
+ label = "#{prefix}/#{label}" if prefix.present?
550
+ if not Cnvrg::Files.valid_file_name?(label)
551
+ raise StandardError.new("#{label} is not a valid file name.")
552
+ end
553
+ if File.directory? file
554
+ tree[label + "/"] = nil
555
+ else
556
+ sha1 = OpenSSL::Digest::SHA1.file(file).hexdigest
557
+ file_name = File.basename file
558
+ file_size = File.size(file).to_f
559
+ mime_type = MimeMagic.by_path(file)
560
+ content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
561
+ relative_path = file.gsub(/^#{@local_path + "/"}/, "")
562
+ relative_path = "#{prefix}/#{relative_path}" if prefix.present?
563
+ tree[label] = {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path: file, relative_path: relative_path}
564
+ end
565
+ end
566
+ if prefix.present? #add the prefix as dirs to the files
567
+ #lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
568
+ dirs = prefix.split('/')
569
+ curr_path = []
570
+ dirs.each do |dir|
571
+ curr_path << dir
572
+ list_files << curr_path.join('/')
573
+ end
574
+ end
575
+ return tree
576
+ end
577
+
578
+ def revert_to_last_commit(commit: nil)
579
+ if commit.blank?
580
+ resp = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/last_valid_commit", 'GET')
581
+ if CLI.is_response_success(resp, false)
582
+ commit = resp['result']['commit_sha1']
583
+ end
584
+ end
585
+ self.update_idx_with_commit(commit) if commit.present?
586
+ self.revert_next_commit
587
+ end
588
+
589
+ def list_all_files(with_ignore = false)
590
+ list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject {|x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~ /^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~ /^#{self.local_path}\/\.cnvrgignore/)}
591
+
592
+ #we want that big files will
593
+ list = list.sort_by {|fn| File.size(fn)}
594
+ return list if with_ignore
595
+ list_ignore = self.get_ignore_list.map {|ignore_file| "#{self.local_path}/#{ignore_file}"}
596
+ (list - list_ignore)
597
+ end
598
+
599
+ def write_idx(tree = nil, commit = nil)
600
+ if tree.nil?
601
+ tree = self.generate_idx[:tree]
602
+ tree = tree.map {|k, v| (v.present?) ? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k, v]}.to_h
603
+ end
604
+ idx = {tree: tree, commit: commit}
605
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
606
+ end
607
+
608
+ def write_tree(tree)
609
+ idx = self.get_idx
610
+ idx[:tree] = tree
611
+ self.set_idx(idx)
612
+ end
613
+
614
+ def generate_idx(show_progress = false)
615
+ if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
616
+ old_idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
617
+ else
618
+ old_idx = nil
619
+ end
620
+ tree_idx = Hash.new(0)
621
+ list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject {|x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~ /^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~ /^#{self.local_path}\/\.cnvrgignore/)}
622
+ list_ignore = self.get_ignore_list()
623
+ if show_progress
624
+ parallel_options = {
625
+ :progress => {
626
+ :title => "Checking Dataset",
627
+ :progress_mark => '=',
628
+ :format => "%b>>%i| %p%% %t",
629
+ :starting_at => 0,
630
+ :total => (list).size,
631
+ :autofinish => true
632
+ },
633
+ in_threads: IDXParallelThreads,
634
+ isolation: true
635
+ }
636
+ else
637
+ parallel_options = {
638
+ in_threads: IDXParallelThreads,
639
+ isolation: true
640
+ }
641
+ end
642
+
643
+ Parallel.map(list, parallel_options) do |e|
644
+ label = e.gsub(self.local_path + "/", "")
645
+ if File.directory? e
646
+ if list_ignore.include? label
647
+ next
648
+ end
649
+ tree_idx[label + "/"] = nil
650
+ else
651
+ if list_ignore.include? label
652
+ next
653
+ end
654
+ sha1 = OpenSSL::Digest::SHA1.file(e).hexdigest
655
+ if old_idx.nil? or old_idx.to_h["tree"].nil?
656
+ tree_idx[label] = {sha1: sha1, commit_time: nil}
657
+ elsif old_idx["tree"][label].nil? or old_idx["tree"][label]["sha1"] != sha1
658
+ tree_idx[label] = {sha1: sha1, commit_time: nil}
659
+ else
660
+ tree_idx[label] = old_idx["tree"][label]
661
+ end
662
+ end
663
+ end
664
+ if !old_idx.nil? and !old_idx[:next_commit].nil? and !old_idx[:next_commit].empty?
665
+ idx = {commit: old_idx.to_h[:commit], tree: tree_idx, next_commit: old_idx[:next_commit]}
666
+ else
667
+ idx = {commit: old_idx.to_h[:commit], tree: tree_idx}
668
+ end
669
+ idx_yaml = idx.to_yaml
670
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_yaml}
671
+ return idx
672
+ end
673
+
674
+ def create_volume
675
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST')
676
+ CLI.is_response_success(response)
677
+ return response
678
+ end
679
+
680
+ def download_updated_data(current_commit)
681
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_updated_data", 'POST', {current_commit: current_commit})
682
+ CLI.is_response_success(response, false)
683
+ return response
684
+ end
685
+
686
+ def compare_idx(new_branch, commit = last_local_commit, local_idx = nil, force = false, next_commit = nil)
687
+ if local_idx.nil?
688
+ local_idx = self.generate_idx
689
+ end
690
+ ignore_list = self.get_ignore_list()
691
+ if force
692
+ added = []
693
+ if local_idx[:tree]
694
+ added << local_idx[:tree].keys
695
+ added.flatten!
696
+ end
697
+
698
+ response = {"result" => {"commit" => next_commit, "tree" => {"added" => added,
699
+ "updated_on_server" => [],
700
+ "updated_on_local" => [],
701
+ "deleted" => [],
702
+ "conflicts" => []}}}
703
+ return response
704
+
705
+ end
706
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit, ignore: ignore_list, next_commit: next_commit})
707
+ CLI.is_response_success(response, false)
708
+ return response
709
+ end
710
+
711
+ def compare_idx_download(all_files: false, desired_commit: nil)
712
+ current_commit = self.last_local_commit
713
+ next_commit = self.get_next_commit
714
+ ignore_list = self.send_ignore_list()
715
+ return Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_status", 'POST', {current_commit: current_commit, next_commit: next_commit, ignore: ignore_list, all_files: all_files, desired_commit: desired_commit.presence})
716
+ end
717
+
718
+ def set_partial_commit(commit_sha1)
719
+ idx = self.get_idx
720
+ idx[:partial_commit] = commit_sha1
721
+ self.set_idx(idx)
722
+ end
723
+
724
+ def get_partial_commit
725
+ idx = self.get_idx
726
+ idx.try(:fetch, :partial_commit)
727
+ end
728
+
729
+ def current_status(new_branch)
730
+ commit = last_local_commit
731
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status_current", 'POST', {current_commit: commit, new_branch: new_branch})
732
+ CLI.is_response_success(response, true)
733
+ return response
734
+ end
735
+
736
+ def send_ignore_list()
737
+ begin
738
+ ignore_list = []
739
+ File.open(self.local_path + "/.cnvrgignore", "r").each_line do |line|
740
+ line = line.strip
741
+ if line.start_with? "#" or ignore_list.include? line or line.empty?
742
+ next
743
+ end
744
+ if line.end_with? "/"
745
+ ignore_list << line.gsub("/", "")
746
+ ignore_list << line + "."
747
+ elsif line.include? "*"
748
+ line = line.gsub("*", ".*")
749
+ ignore_list << line
750
+ else
751
+ ignore_list << line
752
+ end
753
+ end
754
+ return ignore_list.flatten
755
+ rescue
756
+ return []
757
+ end
758
+ end
759
+
760
+
761
+ def compare_commits(commit)
762
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/compare_commits", 'POST', {compare_commit: commit, current_commit: last_local_commit})
763
+ CLI.is_response_success(response, false)
764
+ return response
765
+ end
766
+
767
+ def set_next_commit(commit_sha1)
768
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
769
+ idx_hash = Hash.new()
770
+ idx_hash[:commit] = ""
771
+ idx_hash[:tree] = ""
772
+ else
773
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
774
+ end
775
+ idx_hash[:next_commit] = commit_sha1
776
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
777
+ return true
778
+
779
+ end
780
+
781
+ def get_next_commit()
782
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
783
+ return nil
784
+ end
785
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
786
+ return idx_hash[:next_commit]
787
+ end
788
+
789
+ def remove_next_commit()
790
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
791
+ return nil
792
+ end
793
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
794
+ idx = Hash.new()
795
+ idx[:commit] = idx_hash[:next_commit]
796
+ idx[:tree] = idx_hash[:tree]
797
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
798
+ end
799
+
800
+ def revert_next_commit()
801
+ if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
802
+ return nil
803
+ end
804
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
805
+ idx_hash = idx_hash.except(:next_commit)
806
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
807
+ end
808
+
809
+ def compare_commit(commit)
810
+ if commit.nil? or commit.empty?
811
+ commit = last_local_commit
812
+ end
813
+ response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/commit/compare", 'POST', {current_commit: commit})
814
+ CLI.is_response_success(response, false)
815
+ update_is_new_branch(response["result"]["new_branch"])
816
+ return response["result"]["new_branch"]
817
+ end
818
+
819
+ def update_idx_with_files_commits!(files, commit_time)
820
+ # files.flatten!
821
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
822
+ # idx_hash[:commit] = commit
823
+
824
+ files.each do |path|
825
+ idx_hash[:tree].to_h[path].to_h[:commit_time] = commit_time
826
+ end
827
+ idx_hash[:next_commit] = idx_hash[:next_commit]
828
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
829
+
830
+ return true
831
+ end
832
+
833
+ def update_idx(idx)
834
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
835
+ return true
836
+ end
837
+
838
+ def update_idx_with_commit!(commit)
839
+ idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
840
+ idx_hash[:commit] = commit
841
+
842
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
843
+ return true
844
+ end
845
+
846
+ def revert(working_dir)
847
+ FileUtils.rm_rf working_dir
848
+ # response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/revert", 'GET')
849
+ # CLI.is_response_success(response)
850
+ end
851
+
852
+ def self.validate_config
853
+ ## check that the .cnvrg folder exists:
854
+ dot_cnvrg_exists = Dir[".cnvrg"].present?
855
+ return {validation: Data::ConfigValidation::FAILED, message: ".cnvrg folder does not exists"} if not dot_cnvrg_exists
856
+
857
+ ## check that the config.yml exists:
858
+ config_file_exists = Dir[".cnvrg/*"].include? ".cnvrg/config.yml"
859
+ return {validation: Data::ConfigValidation::FAILED, message: "config.yml exists"} if not config_file_exists
860
+
861
+ ## check that the config.yml file not empty:
862
+ config = YAML.load_file("#{Dir.getwd}/.cnvrg/config.yml")
863
+ return {validation: Data::ConfigValidation::FAILED, message: "config.yml is empty"} if not config
864
+
865
+ ## check that config.yml is valid:
866
+ title = config[:dataset_name]
867
+ slug = config[:dataset_slug]
868
+ owner = config[:owner]
869
+ return {validation: Data::ConfigValidation::FAILED, message: "config.yml is not valid or some keys are missing"} if title.blank? or slug.blank? or owner.blank?
870
+
871
+ ## everything OK:
872
+ return {validation: Data::ConfigValidation::SUCCESS, message: "Directory is already linked to #{slug}"}
873
+ end
874
+
875
+ def self.stop_if_dataset_present(dataset_home, dataset_name, commit: nil)
876
+
877
+ cli = Cnvrg::CLI.new()
878
+ config = YAML.load_file(dataset_home + "/.cnvrg/config.yml")
879
+ if commit.present?
880
+ local_commit = YAML.load_file(dataset_home + "/.cnvrg/idx.yml")[:commit] rescue nil
881
+ return if commit != local_commit or local_commit.blank?
882
+ end
883
+ if config[:dataset_name] == dataset_name
884
+ cli.log_message("Dataset already present, clone aborted")
885
+ exit(0)
886
+ end
887
+ rescue => e
888
+ nil
889
+ end
890
+
891
+ end
892
+ end