cnvrg 0.5.6 → 0.5.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -27,15 +27,16 @@ module Cnvrg
27
27
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
28
28
  method_option :sync, :type => :boolean, :aliases => ["-s","--sync"], :default => false
29
29
  method_option :tags, :type => :boolean, :aliases => ["--tags"], :desc => "upload file tags", :default => false
30
-
30
+ method_option :chunk_size, :type => :numeric, :aliases => ["--chunk"], :desc => "upload file tags", :default => 1000
31
31
  def upload
32
32
  cli = Cnvrg::CLI.new()
33
33
  verbose = options["verbose"]
34
34
  sync = options["sync"]
35
35
  force = options["force"]
36
36
  new_branch = options["new_branch"]
37
+ chunk_size = options["chunk_size"]
37
38
  tags = options["tags"]
38
- cli.upload_data_new(new_branch, verbose,sync,force, tags)
39
+ cli.upload_data_new(new_branch, verbose,sync,force, tags, chunk_size)
39
40
  end
40
41
  desc 'sync', 'sync_data_new'
41
42
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
@@ -45,6 +46,8 @@ module Cnvrg
45
46
  method_option :all_files, :type => :boolean, :aliases => ["--all"], :desc => "download specified commit", :default => false
46
47
  method_option :tags, :type => :boolean, :aliases => ["--tags"], :desc => "upload file tags", :default => false
47
48
  method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
49
+ method_option :chunk_size, :type => :numeric, :aliases => ["--chunk_size"], :desc => "chunk size to communicate with the server", :default => 1000
50
+ method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "chunk size to communicate with the server", :default => false
48
51
 
49
52
  def sync_data_new()
50
53
  cli = Cnvrg::CLI.new()
@@ -55,9 +58,9 @@ module Cnvrg
55
58
  all_files = options["all_files"]
56
59
  tags = options["tags"]
57
60
  parallel=options["parallel"]
58
-
59
-
60
- cli.sync_data_new(new_branch, force, verbose,commit,all_files, tags,parallel)
61
+ chunk_size = options["chunk_size"]
62
+ init = options["init"]
63
+ cli.sync_data_new(new_branch, force, verbose,commit,all_files, tags,parallel, chunk_size, init)
61
64
  end
62
65
  desc 'data download', 'pull data'
63
66
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits", :default => false
@@ -73,7 +76,6 @@ module Cnvrg
73
76
  new_branch = options["new_branch"]
74
77
  commit = options["commit"]
75
78
  all_files = options["all_files"]
76
-
77
79
  cli.download_data_new(verbose,sync,new_branch, commit,all_files)
78
80
 
79
81
  end
@@ -87,9 +89,7 @@ module Cnvrg
87
89
  only_tree =options[:only_tree]
88
90
  commit =options[:commit]
89
91
  query =options[:query]
90
-
91
- cli.clone_data(dataset_url, only_tree=only_tree,commit=commit, query=query)
92
-
92
+ cli.clone_data(dataset_url, only_tree=only_tree,commit=commit, query=query)
93
93
  end
94
94
 
95
95
  desc 'data clone_query', 'clone query datset'
@@ -20,6 +20,34 @@ module Cnvrg
20
20
  @base_resource = "users/#{owner}/datasets/#{dataset_slug}/"
21
21
  end
22
22
 
23
+ def upload_multiple_files(commit_sha1, tree, threads: ParallelThreads, force: false, new_branch: false)
24
+ random_file_name = (0...8).map { (65 + rand(26)).chr }.join #needed to create a file for post_file..
25
+ # each file have: {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path, relative_path}
26
+
27
+ #this call should also implement compare_idx...
28
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_files", 'POST', {commit_sha1: commit_sha1, tree: tree, force: force, is_branch: new_branch})
29
+ return Cnvrg::Result.new(false, "Failed to upload files") unless Cnvrg::CLI.is_response_success(upload_resp, false)
30
+ results = upload_resp['result'].with_indifferent_access
31
+ props = Cnvrg::Helpers.get_s3_props(results)
32
+ client = props[:client]
33
+ bucket = props[:bucket]
34
+ upload_options = props[:upload_options]
35
+ s3_bucket = Aws::S3::Resource.new(client: client).bucket(bucket)
36
+ files = results['files']
37
+ Parallel.map((files.keys), {in_threads: threads}) do |k|
38
+ o = tree[k].merge(files[k])
39
+ upload_single_file(o, s3_bucket, upload_options)
40
+ end
41
+ return files.keys.length
42
+ end
43
+
44
+ def upload_single_file(file, s3_bucket, upload_options={})
45
+ file = file.with_indifferent_access
46
+ resp = s3_bucket.
47
+ object(file[:path]).
48
+ upload_file(file[:absolute_path], upload_options)
49
+ end
50
+
23
51
  def upload_file(absolute_path, relative_path, commit_sha1)
24
52
  file_name = File.basename relative_path
25
53
  file_size = File.size(absolute_path).to_f
@@ -62,6 +90,8 @@ module Cnvrg
62
90
  upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
63
91
  commit_sha1: commit_sha1, file_name: file_name,
64
92
  file_size: file_size, file_content_type: content_type, sha1: sha1})
93
+ puts upload_resp
94
+
65
95
  end
66
96
 
67
97
  if Cnvrg::CLI.is_response_success(upload_resp, false)
@@ -69,7 +99,7 @@ module Cnvrg
69
99
 
70
100
  return s3_res
71
101
  end
72
- end
102
+ end
73
103
  def upload_tar_file(absolute_path, relative_path, commit_sha1)
74
104
  begin
75
105
  file_name = File.basename relative_path
@@ -653,12 +683,12 @@ module Cnvrg
653
683
  def delete_commit_files_local(deleted)
654
684
  begin
655
685
  FileUtils.rm_rf(deleted) unless (deleted.nil? or deleted.empty?)
656
- return true
686
+ return Cnvrg::Result.new(true, '')
657
687
  rescue => e
658
- return false
688
+ return Cnvrg::Result.new(false, '')
659
689
  end
660
690
 
661
- return true
691
+ return Cnvrg::Result.new(true, '')
662
692
 
663
693
  end
664
694
 
@@ -716,9 +746,9 @@ module Cnvrg
716
746
 
717
747
  end
718
748
 
719
- def end_commit(commit_sha1,force)
749
+ def end_commit(commit_sha1,force, is_success)
720
750
  begin
721
- response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1,force:force})
751
+ response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1,force:force, success:is_success})
722
752
  Cnvrg::CLI.is_response_success(response, true)
723
753
  return response
724
754
  rescue => e
@@ -736,5 +766,120 @@ module Cnvrg
736
766
  response = Cnvrg::API.request("#{base_resource}/commit/rollback", 'POST', {commit_sha1: commit_sha1})
737
767
  Cnvrg::CLI.is_response_success(response, false)
738
768
  end
769
+
770
+ def clone_in_chunks(commit: 'latest', chunk_size: 1000)
771
+ begin
772
+
773
+ end
774
+ end
775
+
776
+ def get_clone_chunk(latest_id: nil, chunk_size: 1000, commit: 'latest')
777
+ response = Cnvrg::API.request("#{@base_resource}/clone_chunk", 'POST',{commit: commit, chunk_size: chunk_size, latest_id: latest_id})
778
+ return nil unless Cnvrg::CLI.is_response_success(response, false)
779
+ response['result']['files']
780
+ end
781
+
782
+ def download_files_in_chunks(files, chunk_size: 1000, conflict: false, commit: 'latest')
783
+ begin
784
+ ttl_files = 0
785
+ files.each_slice(chunk_size).each do |files|
786
+ ttl_files += download_files_chunk(files, conflict: conflict)
787
+ end
788
+ return Cnvrg::Result.new(true, "Download Completed")
789
+ rescue Exception => e
790
+ return Cnvrg::Result.new(false, "Can`t download files")
791
+ end
792
+ end
793
+
794
+ def download_files_chunk(files, conflict: false)
795
+ (1..5).each do |i|
796
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@dataset_slug}/download_multi", 'POST', {files: files})
797
+ next unless Cnvrg::CLI.is_response_success(response, false) #trying to api request 5 times.
798
+ files = response['files']
799
+ data_home = "#{Dir.pwd}/#{response['name']}"
800
+ res = download_multiple_files_s3(files, data_home, conflict: conflict)
801
+ next unless res.is_success? #try again..
802
+ return files['keys'].length
803
+ end
804
+ end
805
+
806
+ def download_multiple_chunks(commit, chunk_size=1000)
807
+ last_chunk_size = chunk_size
808
+ q = { commit: commit, chunk_size: chunk_size}
809
+ overall = 0
810
+ while last_chunk_size > 0
811
+ response = Cnvrg::API.request("users/#{@owner}/datasets/#{@dataset_slug}/clone", 'POST', q)
812
+ if Cnvrg::CLI.is_response_success(response, false)
813
+ files = response['files']
814
+ data_home = "#{Dir.pwd}/#{response['name']}"
815
+ last_chunk_size = files['keys'].length
816
+ break if last_chunk_size == 0
817
+ res = download_multiple_files_s3(files, data_home)
818
+ overall += last_chunk_size
819
+ q[:latest] = files['latest']
820
+ else
821
+ last_chunk_size = 0
822
+ end
823
+ end
824
+ Cnvrg::Result.new(true, "Cloned #{overall} files!")
825
+ end
826
+
827
+ def generate_parallel_idx
828
+
829
+ end
830
+
831
+ def download_multiple_files_s3(files, project_home, conflict: false)
832
+ begin
833
+ props = Cnvrg::Helpers.get_s3_props(files)
834
+ client = props[:client]
835
+ iv = props[:iv]
836
+ key = props[:key]
837
+ bucket = props[:bucket]
838
+ download_succ_count = 0
839
+ parallel_options = {
840
+ in_threads: Cnvrg::Helpers.parallel_threads,
841
+ isolation: true
842
+ }
843
+ Parallel.map(files["keys"], parallel_options) do |f|
844
+
845
+ file_path = f["name"]
846
+ if file_path.end_with? "/"
847
+ # dir
848
+ if download_dir(file_path, file_path)
849
+ download_succ_count += 1
850
+ else
851
+ return Cnvrg::Result.new(false,"Could not create directory: #{file_path}")
852
+ raise Parallel::Kill
853
+ end
854
+ else
855
+ # blob
856
+ begin
857
+ file_key = Cnvrg::Helpers.decrypt(key,iv, f["path"])
858
+ resp = false
859
+ file_path = "#{file_path}.conflict" if conflict
860
+ File.open(project_home+"/"+file_path, 'w+') do |file|
861
+ resp = client.get_object({bucket:bucket,
862
+ key:file_key}, target: file)
863
+ end
864
+ if resp
865
+ download_succ_count +=1
866
+ else
867
+ return Cnvrg::Result.new(false,"Could not create file: #{file_path}")
868
+ end
869
+ rescue => e
870
+ puts(e)
871
+ puts(e.backtrace)
872
+ return Cnvrg::Result.new(false,"Could not create file: #{file_path}", e.message, e.backtrace)
873
+ raise Parallel::Kill
874
+ end
875
+ end
876
+ end
877
+ if download_succ_count == files["keys"].size
878
+ return Cnvrg::Result.new(true,"Done.\nDownloaded #{download_succ_count} files")
879
+ end
880
+ rescue => e
881
+ return Cnvrg::Result.new(false,"Could not download some files", e.message, e.backtrace)
882
+ end
883
+ end
739
884
  end
740
885
  end
@@ -292,15 +292,47 @@ module Cnvrg
292
292
  url = Cnvrg::Helpers.remote_url
293
293
  "#{url}/#{self.owner}/datasets/#{self.slug}"
294
294
  end
295
+ def generate_chunked_idx(list_files=[], threads: IDXParallelThreads)
296
+ tree = {}
297
+ Parallel.map(list_files, in_threads: IDXParallelThreads) do |file|
298
+ label = file.gsub(self.local_path + "/", "")
299
+ if File.directory? file
300
+ tree[label+"/"] = nil
301
+ else
302
+ sha1 = OpenSSL::Digest::SHA1.file(file).hexdigest
303
+ file_name = File.basename file
304
+ file_size = File.size(file).to_f
305
+ mime_type = MimeMagic.by_path(file)
306
+ content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
307
+ relative_path = file.gsub(/^#{@local_path + "/"}/, "")
308
+ tree[label] = {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path: file, relative_path: relative_path}
309
+ end
310
+ end
311
+ return tree
312
+ end
313
+
314
+ def list_all_files
315
+ list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject { |x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~/^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~/^#{self.local_path}\/\.cnvrgignore/) }
316
+ list_ignore = self.get_ignore_list()
317
+ return list.select{|file| !list_ignore.include? file}
318
+ end
319
+
320
+ def write_idx(tree=nil, commit=nil)
321
+ if tree.blank?
322
+ tree = self.generate_idx[:tree]
323
+ tree = tree.map{|k,v| (v.present?)? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k,v]}.to_h
324
+ end
325
+ idx = {tree: tree, commit: commit}
326
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') { |f| f.write idx.to_yaml }
327
+ end
328
+
295
329
  def generate_idx(show_progress=false)
296
330
  if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
297
331
  old_idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
298
332
  else
299
333
  old_idx = nil
300
334
  end
301
-
302
335
  tree_idx = Hash.new(0)
303
-
304
336
  list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject { |x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~/^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~/^#{self.local_path}\/\.cnvrgignore/) }
305
337
  list_ignore = self.get_ignore_list()
306
338
  if show_progress
@@ -348,10 +380,8 @@ module Cnvrg
348
380
  idx = {commit: old_idx.to_h[:commit], tree: tree_idx, next_commit:old_idx[:next_commit] }
349
381
  else
350
382
  idx = {commit: old_idx.to_h[:commit], tree: tree_idx}
351
-
352
383
  end
353
384
  idx_yaml = idx.to_yaml
354
-
355
385
  File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') { |f| f.write idx_yaml }
356
386
  return idx
357
387
  end
@@ -361,9 +391,8 @@ module Cnvrg
361
391
  return response
362
392
  end
363
393
 
364
- def downlowd_updated_data(current_commit)
365
-
366
- response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/downlowd_updated_data", 'POST', {current_commit: current_commit})
394
+ def download_updated_data(current_commit)
395
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_updated_data", 'POST', {current_commit: current_commit})
367
396
  CLI.is_response_success(response,false)
368
397
  return response
369
398
  end
@@ -455,58 +455,13 @@ module Cnvrg
455
455
 
456
456
  end
457
457
  end
458
- def download_multiple_files_s3(files, project_home)
458
+ def download_multpile_files_s3(files, project_home)
459
459
  begin
460
-
461
- sts_path = files["path_sts"]
462
- retries = 0
463
- success= false
464
- while !success and retries < 20
465
- begin
466
- if !Helpers.is_verify_ssl
467
- body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
468
- else
469
- body = open(sts_path).read
470
- end
471
- success = true
472
- rescue => e
473
- retries +=1
474
- sleep(5)
475
-
476
- end
477
- end
478
- if !success
479
- return Cnvrg::Result.new(false,"couldn't download some files", "error in sts", "" )
480
- end
481
- split = body.split("\n")
482
- key = split[0]
483
- iv = split[1]
484
-
485
- access = Cnvrg::Helpers.decrypt(key, iv, files["sts_a"])
486
-
487
- secret = Cnvrg::Helpers.decrypt(key,iv, files["sts_s"])
488
-
489
- session = Cnvrg::Helpers.decrypt(key,iv, files["sts_st"])
490
- region = Cnvrg::Helpers.decrypt(key,iv, files["region"])
491
-
492
- bucket = Cnvrg::Helpers.decrypt(key,iv, files["bucket"])
493
- is_s3 = files["is_s3"]
494
- if is_s3 or is_s3.nil?
495
- client = Aws::S3::Client.new(
496
- :access_key_id =>access,
497
- :secret_access_key => secret,
498
- :session_token => session,
499
- :region => region,
500
- :http_open_timeout => 60, :retry_limit => 20)
501
- else
502
- endpoint = Cnvrg::Helpers.decrypt(key,iv, files["endpoint_url"])
503
- client = Aws::S3::Client.new(
504
- :access_key_id =>access,
505
- :secret_access_key => secret,
506
- :region => region,
507
- :endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
508
- :http_open_timeout => 60, :retry_limit => 20)
509
- end
460
+ props = Cnvrg::Helpers.get_s3_props(files)
461
+ client = props[:client]
462
+ iv = props[:iv]
463
+ key = props[:key]
464
+ bucket = props[:bucket]
510
465
  download_succ_count = 0
511
466
  parallel_options = {
512
467
  :progress => {
@@ -539,11 +494,12 @@ module Cnvrg
539
494
  File.open(project_home+"/"+file_path, 'w+') do |file|
540
495
  resp = client.get_object({bucket:bucket,
541
496
  key:file_key}, target: file)
497
+ #TODO update idx here!
542
498
  end
543
499
  if resp
544
500
  download_succ_count +=1
545
501
  else
546
- return Cnvrg::Result(false,"Could not create file: #{file_path}")
502
+ return Cnvrg::Result.new(false,"Could not create file: #{file_path}")
547
503
  end
548
504
 
549
505
 
@@ -697,10 +653,9 @@ module Cnvrg
697
653
  return true
698
654
 
699
655
  end
700
- def start_commit(new_branch,force:false, exp_start_commit:nil)
701
-
656
+ def start_commit(new_branch,force:false, exp_start_commit:nil, job_slug: nil, job_type: nil)
702
657
  response = Cnvrg::API.request("#{base_resource}/commit/start", 'POST', {project_slug: @project_slug, new_branch: new_branch,force:force,
703
- username: @owner, exp_start_commit:exp_start_commit})
658
+ username: @owner, exp_start_commit:exp_start_commit, job_slug: job_slug, job_type: job_type})
704
659
  Cnvrg::CLI.is_response_success(response,false)
705
660
  return response
706
661
  end
@@ -219,5 +219,61 @@ parameters:
219
219
  def get_mem(pid)
220
220
  end
221
221
 
222
+ def get_s3_props(files) #will return client and decryptor
223
+ sts_path = files["path_sts"]
224
+ retries = 0
225
+ success= false
226
+ while !success and retries < 20
227
+ begin
228
+ if !Helpers.is_verify_ssl
229
+ body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
230
+ else
231
+ body = open(sts_path).read
232
+ end
233
+ success = true
234
+ rescue => e
235
+ retries +=1
236
+ sleep(5)
237
+
238
+ end
239
+ end
240
+ if !success
241
+ return Cnvrg::Result.new(false,"couldn't download some files", "error in sts", "" )
242
+ end
243
+ split = body.split("\n")
244
+ key = split[0]
245
+ iv = split[1]
246
+
247
+ access = Cnvrg::Helpers.decrypt(key, iv, files["sts_a"])
248
+
249
+ secret = Cnvrg::Helpers.decrypt(key,iv, files["sts_s"])
250
+
251
+ session = Cnvrg::Helpers.decrypt(key,iv, files["sts_st"])
252
+ region = Cnvrg::Helpers.decrypt(key,iv, files["region"])
253
+
254
+ bucket = Cnvrg::Helpers.decrypt(key,iv, files["bucket"])
255
+ is_s3 = files["is_s3"]
256
+ if is_s3 or is_s3.nil?
257
+ client = Aws::S3::Client.new(
258
+ :access_key_id =>access,
259
+ :secret_access_key => secret,
260
+ :session_token => session,
261
+ :region => region,
262
+ :http_open_timeout => 60, :retry_limit => 20)
263
+ use_accelerate_endpoint = true
264
+ else
265
+ endpoint = Cnvrg::Helpers.decrypt(key,iv, files["endpoint_url"])
266
+ client = Aws::S3::Client.new(
267
+ :access_key_id =>access,
268
+ :secret_access_key => secret,
269
+ :region => region,
270
+ :endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
271
+ :http_open_timeout => 60, :retry_limit => 20)
272
+ use_accelerate_endpoint = false
273
+ end
274
+ upload_options = {:use_accelerate_endpoint => use_accelerate_endpoint,:server_side_encryption => 'AES256'}
275
+ return {client: client, key: key, iv: iv, bucket: bucket, upload_options: upload_options}
276
+ end
277
+
222
278
  end
223
279
  end