cnvrg 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,15 +27,16 @@ module Cnvrg
27
27
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
28
28
  method_option :sync, :type => :boolean, :aliases => ["-s","--sync"], :default => false
29
29
  method_option :tags, :type => :boolean, :aliases => ["--tags"], :desc => "upload file tags", :default => false
30
-
30
+ method_option :chunk_size, :type => :numeric, :aliases => ["--chunk"], :desc => "upload file tags", :default => 1000
31
31
  def upload
32
32
  cli = Cnvrg::CLI.new()
33
33
  verbose = options["verbose"]
34
34
  sync = options["sync"]
35
35
  force = options["force"]
36
36
  new_branch = options["new_branch"]
37
+ chunk_size = options["chunk_size"]
37
38
  tags = options["tags"]
38
- cli.upload_data_new(new_branch, verbose,sync,force, tags)
39
+ cli.upload_data_new(new_branch, verbose,sync,force, tags, chunk_size)
39
40
  end
40
41
  desc 'sync', 'sync_data_new'
41
42
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
@@ -45,6 +46,8 @@ module Cnvrg
45
46
  method_option :all_files, :type => :boolean, :aliases => ["--all"], :desc => "download specified commit", :default => false
46
47
  method_option :tags, :type => :boolean, :aliases => ["--tags"], :desc => "upload file tags", :default => false
47
48
  method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
49
+ method_option :chunk_size, :type => :numeric, :aliases => ["--chunk_size"], :desc => "chunk size to communicate with the server", :default => 1000
50
+ method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "chunk size to communicate with the server", :default => false
48
51
 
49
52
  def sync_data_new()
50
53
  cli = Cnvrg::CLI.new()
@@ -55,9 +58,9 @@ module Cnvrg
55
58
  all_files = options["all_files"]
56
59
  tags = options["tags"]
57
60
  parallel=options["parallel"]
58
-
59
-
60
- cli.sync_data_new(new_branch, force, verbose,commit,all_files, tags,parallel)
61
+ chunk_size = options["chunk_size"]
62
+ init = options["init"]
63
+ cli.sync_data_new(new_branch, force, verbose,commit,all_files, tags,parallel, chunk_size, init)
61
64
  end
62
65
  desc 'data download', 'pull data'
63
66
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits", :default => false
@@ -73,7 +76,6 @@ module Cnvrg
73
76
  new_branch = options["new_branch"]
74
77
  commit = options["commit"]
75
78
  all_files = options["all_files"]
76
-
77
79
  cli.download_data_new(verbose,sync,new_branch, commit,all_files)
78
80
 
79
81
  end
@@ -87,9 +89,7 @@ module Cnvrg
87
89
  only_tree =options[:only_tree]
88
90
  commit =options[:commit]
89
91
  query =options[:query]
90
-
91
- cli.clone_data(dataset_url, only_tree=only_tree,commit=commit, query=query)
92
-
92
+ cli.clone_data(dataset_url, only_tree=only_tree,commit=commit, query=query)
93
93
  end
94
94
 
95
95
  desc 'data clone_query', 'clone query datset'
@@ -20,6 +20,34 @@ module Cnvrg
20
20
  @base_resource = "users/#{owner}/datasets/#{dataset_slug}/"
21
21
  end
22
22
 
23
+ def upload_multiple_files(commit_sha1, tree, threads: ParallelThreads, force: false, new_branch: false)
24
+ random_file_name = (0...8).map { (65 + rand(26)).chr }.join #needed to create a file for post_file..
25
+ # each file have: {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path, relative_path}
26
+
27
+ #this call should also implement compare_idx...
28
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_files", 'POST', {commit_sha1: commit_sha1, tree: tree, force: force, is_branch: new_branch})
29
+ return Cnvrg::Result.new(false, "Failed to upload files") unless Cnvrg::CLI.is_response_success(upload_resp, false)
30
+ results = upload_resp['result'].with_indifferent_access
31
+ props = Cnvrg::Helpers.get_s3_props(results)
32
+ client = props[:client]
33
+ bucket = props[:bucket]
34
+ upload_options = props[:upload_options]
35
+ s3_bucket = Aws::S3::Resource.new(client: client).bucket(bucket)
36
+ files = results['files']
37
+ Parallel.map((files.keys), {in_threads: threads}) do |k|
38
+ o = tree[k].merge(files[k])
39
+ upload_single_file(o, s3_bucket, upload_options)
40
+ end
41
+ return files.keys.length
42
+ end
43
+
44
+ def upload_single_file(file, s3_bucket, upload_options={})
45
+ file = file.with_indifferent_access
46
+ resp = s3_bucket.
47
+ object(file[:path]).
48
+ upload_file(file[:absolute_path], upload_options)
49
+ end
50
+
23
51
  def upload_file(absolute_path, relative_path, commit_sha1)
24
52
  file_name = File.basename relative_path
25
53
  file_size = File.size(absolute_path).to_f
@@ -62,6 +90,8 @@ module Cnvrg
62
90
  upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
63
91
  commit_sha1: commit_sha1, file_name: file_name,
64
92
  file_size: file_size, file_content_type: content_type, sha1: sha1})
93
+ puts upload_resp
94
+
65
95
  end
66
96
 
67
97
  if Cnvrg::CLI.is_response_success(upload_resp, false)
@@ -69,7 +99,7 @@ module Cnvrg
69
99
 
70
100
  return s3_res
71
101
  end
72
- end
102
+ end
73
103
  def upload_tar_file(absolute_path, relative_path, commit_sha1)
74
104
  begin
75
105
  file_name = File.basename relative_path
@@ -653,12 +683,12 @@ module Cnvrg
653
683
  def delete_commit_files_local(deleted)
654
684
  begin
655
685
  FileUtils.rm_rf(deleted) unless (deleted.nil? or deleted.empty?)
656
- return true
686
+ return Cnvrg::Result.new(true, '')
657
687
  rescue => e
658
- return false
688
+ return Cnvrg::Result.new(false, '')
659
689
  end
660
690
 
661
- return true
691
+ return Cnvrg::Result.new(true, '')
662
692
 
663
693
  end
664
694
 
@@ -716,9 +746,9 @@ module Cnvrg
716
746
 
717
747
  end
718
748
 
719
- def end_commit(commit_sha1,force)
749
+ def end_commit(commit_sha1,force, is_success)
720
750
  begin
721
- response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1,force:force})
751
+ response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1,force:force, success:is_success})
722
752
  Cnvrg::CLI.is_response_success(response, true)
723
753
  return response
724
754
  rescue => e
@@ -736,5 +766,120 @@ module Cnvrg
736
766
  response = Cnvrg::API.request("#{base_resource}/commit/rollback", 'POST', {commit_sha1: commit_sha1})
737
767
  Cnvrg::CLI.is_response_success(response, false)
738
768
  end
769
+
770
+ def clone_in_chunks(commit: 'latest', chunk_size: 1000)
771
+ begin
772
+
773
+ end
774
+ end
775
+
776
+ def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest')
777
+ response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id})
778
+ return nil unless Cnvrg::CLI.is_response_success(response, false)
779
+ response['result']['files']
780
+ end
781
+
782
+ def download_files_in_chunks(files, chunk_size: 1000, conflict: false, commit: 'latest')
783
+ begin
784
+ ttl_files = 0
785
+ files.each_slice(chunk_size).each do |files|
786
+ ttl_files += download_files_chunk(files, conflict: conflict)
787
+ end
788
+ return Cnvrg::Result.new(true, "Download Completed")
789
+ rescue Exception => e
790
+ return Cnvrg::Result.new(false, "Can`t download files")
791
+ end
792
+ end
793
+
794
+ def download_files_chunk(files, conflict: false)
795
+ (1..5).each do |i|
796
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@dataset_slug}/download_multi", 'POST', {files: files})
797
+ next unless Cnvrg::CLI.is_response_success(response, false) #trying to api request 5 times.
798
+ files = response['files']
799
+ data_home = "#{Dir.pwd}/#{response['name']}"
800
+ res = download_multiple_files_s3(files, data_home, conflict: conflict)
801
+ next unless res.is_success? #try again..
802
+ return files['keys'].length
803
+ end
804
+ end
805
+
806
+ def download_multiple_chunks(commit, chunk_size=1000)
807
+ last_chunk_size = chunk_size
808
+ q = { commit: commit, chunk_size: chunk_size}
809
+ overall = 0
810
+ while last_chunk_size > 0
811
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@dataset_slug}/clone", 'POST', q)
812
+ if Cnvrg::CLI.is_response_success(response, false)
813
+ files = response['files']
814
+ data_home = "#{Dir.pwd}/#{response['name']}"
815
+ last_chunk_size = files['keys'].length
816
+ break if last_chunk_size == 0
817
+ res = download_multiple_files_s3(files, data_home)
818
+ overall += last_chunk_size
819
+ q[:latest] = files['latest']
820
+ else
821
+ last_chunk_size = 0
822
+ end
823
+ end
824
+ Cnvrg::Result.new(true, "Cloned #{overall} files!")
825
+ end
826
+
827
+ def generate_parallel_idx
828
+
829
+ end
830
+
831
+ def download_multiple_files_s3(files, project_home, conflict: false)
832
+ begin
833
+ props = Cnvrg::Helpers.get_s3_props(files)
834
+ client = props[:client]
835
+ iv = props[:iv]
836
+ key = props[:key]
837
+ bucket = props[:bucket]
838
+ download_succ_count = 0
839
+ parallel_options = {
840
+ in_threads: Cnvrg::Helpers.parallel_threads,
841
+ isolation: true
842
+ }
843
+ Parallel.map(files["keys"], parallel_options) do |f|
844
+
845
+ file_path = f["name"]
846
+ if file_path.end_with? "/"
847
+ # dir
848
+ if download_dir(file_path, file_path)
849
+ download_succ_count += 1
850
+ else
851
+ return Cnvrg::Result.new(false,"Could not create directory: #{file_path}")
852
+ raise Parallel::Kill
853
+ end
854
+ else
855
+ # blob
856
+ begin
857
+ file_key = Cnvrg::Helpers.decrypt(key,iv, f["path"])
858
+ resp = false
859
+ file_path = "#{file_path}.conflict" if conflict
860
+ File.open(project_home+"/"+file_path, 'w+') do |file|
861
+ resp = client.get_object({bucket:bucket,
862
+ key:file_key}, target: file)
863
+ end
864
+ if resp
865
+ download_succ_count +=1
866
+ else
867
+ return Cnvrg::Result.new(false,"Could not create file: #{file_path}")
868
+ end
869
+ rescue => e
870
+ puts(e)
871
+ puts(e.backtrace)
872
+ return Cnvrg::Result.new(false,"Could not create file: #{file_path}", e.message, e.backtrace)
873
+ raise Parallel::Kill
874
+ end
875
+ end
876
+ end
877
+ if download_succ_count == files["keys"].size
878
+ return Cnvrg::Result.new(true,"Done.\nDownloaded #{download_succ_count} files")
879
+ end
880
+ rescue => e
881
+ return Cnvrg::Result.new(false,"Could not download some files", e.message, e.backtrace)
882
+ end
883
+ end
739
884
  end
740
885
  end
@@ -292,15 +292,47 @@ module Cnvrg
292
292
  url = Cnvrg::Helpers.remote_url
293
293
  "#{url}/#{self.owner}/datasets/#{self.slug}"
294
294
  end
295
+ def generate_chunked_idx(list_files=[], threads: IDXParallelThreads)
296
+ tree = {}
297
+ Parallel.map(list_files, in_threads: IDXParallelThreads) do |file|
298
+ label = file.gsub(self.local_path + "/", "")
299
+ if File.directory? file
300
+ tree[label+"/"] = nil
301
+ else
302
+ sha1 = OpenSSL::Digest::SHA1.file(file).hexdigest
303
+ file_name = File.basename file
304
+ file_size = File.size(file).to_f
305
+ mime_type = MimeMagic.by_path(file)
306
+ content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
307
+ relative_path = file.gsub(/^#{@local_path + "/"}/, "")
308
+ tree[label] = {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path: file, relative_path: relative_path}
309
+ end
310
+ end
311
+ return tree
312
+ end
313
+
314
+ def list_all_files
315
+ list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject { |x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~/^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~/^#{self.local_path}\/\.cnvrgignore/) }
316
+ list_ignore = self.get_ignore_list()
317
+ return list.select{|file| !list_ignore.include? file}
318
+ end
319
+
320
+ def write_idx(tree=nil, commit=nil)
321
+ if tree.blank?
322
+ tree = self.generate_idx[:tree]
323
+ tree = tree.map{|k,v| (v.present?)? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k,v]}.to_h
324
+ end
325
+ idx = {tree: tree, commit: commit}
326
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') { |f| f.write idx.to_yaml }
327
+ end
328
+
295
329
  def generate_idx(show_progress=false)
296
330
  if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
297
331
  old_idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
298
332
  else
299
333
  old_idx = nil
300
334
  end
301
-
302
335
  tree_idx = Hash.new(0)
303
-
304
336
  list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject { |x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~/^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~/^#{self.local_path}\/\.cnvrgignore/) }
305
337
  list_ignore = self.get_ignore_list()
306
338
  if show_progress
@@ -348,10 +380,8 @@ module Cnvrg
348
380
  idx = {commit: old_idx.to_h[:commit], tree: tree_idx, next_commit:old_idx[:next_commit] }
349
381
  else
350
382
  idx = {commit: old_idx.to_h[:commit], tree: tree_idx}
351
-
352
383
  end
353
384
  idx_yaml = idx.to_yaml
354
-
355
385
  File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') { |f| f.write idx_yaml }
356
386
  return idx
357
387
  end
@@ -361,9 +391,8 @@ module Cnvrg
361
391
  return response
362
392
  end
363
393
 
364
- def downlowd_updated_data(current_commit)
365
-
366
- response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/downlowd_updated_data", 'POST', {current_commit: current_commit})
394
+ def download_updated_data(current_commit)
395
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_updated_data", 'POST', {current_commit: current_commit})
367
396
  CLI.is_response_success(response,false)
368
397
  return response
369
398
  end
@@ -455,58 +455,13 @@ module Cnvrg
455
455
 
456
456
  end
457
457
  end
458
- def download_multiple_files_s3(files, project_home)
458
+ def download_multpile_files_s3(files, project_home)
459
459
  begin
460
-
461
- sts_path = files["path_sts"]
462
- retries = 0
463
- success= false
464
- while !success and retries < 20
465
- begin
466
- if !Helpers.is_verify_ssl
467
- body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
468
- else
469
- body = open(sts_path).read
470
- end
471
- success = true
472
- rescue => e
473
- retries +=1
474
- sleep(5)
475
-
476
- end
477
- end
478
- if !success
479
- return Cnvrg::Result.new(false,"couldn't download some files", "error in sts", "" )
480
- end
481
- split = body.split("\n")
482
- key = split[0]
483
- iv = split[1]
484
-
485
- access = Cnvrg::Helpers.decrypt(key, iv, files["sts_a"])
486
-
487
- secret = Cnvrg::Helpers.decrypt(key,iv, files["sts_s"])
488
-
489
- session = Cnvrg::Helpers.decrypt(key,iv, files["sts_st"])
490
- region = Cnvrg::Helpers.decrypt(key,iv, files["region"])
491
-
492
- bucket = Cnvrg::Helpers.decrypt(key,iv, files["bucket"])
493
- is_s3 = files["is_s3"]
494
- if is_s3 or is_s3.nil?
495
- client = Aws::S3::Client.new(
496
- :access_key_id =>access,
497
- :secret_access_key => secret,
498
- :session_token => session,
499
- :region => region,
500
- :http_open_timeout => 60, :retry_limit => 20)
501
- else
502
- endpoint = Cnvrg::Helpers.decrypt(key,iv, files["endpoint_url"])
503
- client = Aws::S3::Client.new(
504
- :access_key_id =>access,
505
- :secret_access_key => secret,
506
- :region => region,
507
- :endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
508
- :http_open_timeout => 60, :retry_limit => 20)
509
- end
460
+ props = Cnvrg::Helpers.get_s3_props(files)
461
+ client = props[:client]
462
+ iv = props[:iv]
463
+ key = props[:key]
464
+ bucket = props[:bucket]
510
465
  download_succ_count = 0
511
466
  parallel_options = {
512
467
  :progress => {
@@ -539,11 +494,12 @@ module Cnvrg
539
494
  File.open(project_home+"/"+file_path, 'w+') do |file|
540
495
  resp = client.get_object({bucket:bucket,
541
496
  key:file_key}, target: file)
497
+ #TODO update idx here!
542
498
  end
543
499
  if resp
544
500
  download_succ_count +=1
545
501
  else
546
- return Cnvrg::Result(false,"Could not create file: #{file_path}")
502
+ return Cnvrg::Result.new(false,"Could not create file: #{file_path}")
547
503
  end
548
504
 
549
505
 
@@ -697,10 +653,9 @@ module Cnvrg
697
653
  return true
698
654
 
699
655
  end
700
- def start_commit(new_branch,force:false, exp_start_commit:nil)
701
-
656
+ def start_commit(new_branch,force:false, exp_start_commit:nil, job_slug: nil, job_type: nil)
702
657
  response = Cnvrg::API.request("#{base_resource}/commit/start", 'POST', {project_slug: @project_slug, new_branch: new_branch,force:force,
703
- username: @owner, exp_start_commit:exp_start_commit})
658
+ username: @owner, exp_start_commit:exp_start_commit, job_slug: job_slug, job_type: job_type})
704
659
  Cnvrg::CLI.is_response_success(response,false)
705
660
  return response
706
661
  end
@@ -219,5 +219,61 @@ parameters:
219
219
  def get_mem(pid)
220
220
  end
221
221
 
222
+ def get_s3_props(files) #will return client and decryptor
223
+ sts_path = files["path_sts"]
224
+ retries = 0
225
+ success= false
226
+ while !success and retries < 20
227
+ begin
228
+ if !Helpers.is_verify_ssl
229
+ body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
230
+ else
231
+ body = open(sts_path).read
232
+ end
233
+ success = true
234
+ rescue => e
235
+ retries +=1
236
+ sleep(5)
237
+
238
+ end
239
+ end
240
+ if !success
241
+ return Cnvrg::Result.new(false,"couldn't download some files", "error in sts", "" )
242
+ end
243
+ split = body.split("\n")
244
+ key = split[0]
245
+ iv = split[1]
246
+
247
+ access = Cnvrg::Helpers.decrypt(key, iv, files["sts_a"])
248
+
249
+ secret = Cnvrg::Helpers.decrypt(key,iv, files["sts_s"])
250
+
251
+ session = Cnvrg::Helpers.decrypt(key,iv, files["sts_st"])
252
+ region = Cnvrg::Helpers.decrypt(key,iv, files["region"])
253
+
254
+ bucket = Cnvrg::Helpers.decrypt(key,iv, files["bucket"])
255
+ is_s3 = files["is_s3"]
256
+ if is_s3 or is_s3.nil?
257
+ client = Aws::S3::Client.new(
258
+ :access_key_id =>access,
259
+ :secret_access_key => secret,
260
+ :session_token => session,
261
+ :region => region,
262
+ :http_open_timeout => 60, :retry_limit => 20)
263
+ use_accelerate_endpoint = true
264
+ else
265
+ endpoint = Cnvrg::Helpers.decrypt(key,iv, files["endpoint_url"])
266
+ client = Aws::S3::Client.new(
267
+ :access_key_id =>access,
268
+ :secret_access_key => secret,
269
+ :region => region,
270
+ :endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
271
+ :http_open_timeout => 60, :retry_limit => 20)
272
+ use_accelerate_endpoint = false
273
+ end
274
+ upload_options = {:use_accelerate_endpoint => use_accelerate_endpoint,:server_side_encryption => 'AES256'}
275
+ return {client: client, key: key, iv: iv, bucket: bucket, upload_options: upload_options}
276
+ end
277
+
222
278
  end
223
279
  end