cnvrg 1.6.38 → 1.9.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,7 +7,7 @@ module Cnvrg
7
7
  IDXParallelThreads ||= Cnvrg::Helpers.parallel_threads
8
8
  IDXParallelProcesses ||= Parallel.processor_count
9
9
 
10
- def initialize(project_home = '', dataset_url: '')
10
+ def initialize(project_home = '', dataset_url: '', dataset_info: '')
11
11
  begin
12
12
  @info = {}
13
13
  if project_home.present?
@@ -17,6 +17,11 @@ module Cnvrg
17
17
  @title = config[:dataset_name]
18
18
  @slug = config[:dataset_slug]
19
19
  @owner = config[:owner]
20
+ elsif dataset_info.present?
21
+ @title = dataset_info[:slug]
22
+ @slug = dataset_info[:slug]
23
+ @owner = dataset_info[:owner]
24
+ @local_path = Dir.pwd
20
25
  else
21
26
  owner, slug = Cnvrg::Helpers.extract_owner_slug_from_url(dataset_url, 'datasets')
22
27
  @title = slug
@@ -87,7 +92,6 @@ module Cnvrg
87
92
  files
88
93
  end
89
94
 
90
-
91
95
  def get_stats(commit: nil, query: nil)
92
96
  response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone", 'POST', {commit: commit, query: query})
93
97
  Cnvrg::CLI.is_response_success(response, true)
@@ -100,7 +104,6 @@ module Cnvrg
100
104
  response['result']['files']['keys']
101
105
  end
102
106
 
103
-
104
107
  def backup_idx
105
108
  Cnvrg::Logger.log_info("Backup idx")
106
109
  if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
@@ -123,7 +126,6 @@ module Cnvrg
123
126
  File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
124
127
  end
125
128
 
126
-
127
129
  def self.delete(dataset_slug, owner)
128
130
  response = Cnvrg::API.request("users/#{owner}/datasets/#{dataset_slug}/delete", 'DELETE')
129
131
  return response
@@ -142,14 +144,12 @@ module Cnvrg
142
144
  response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST', {data_commit: commit})
143
145
  CLI.is_response_success(response)
144
146
  return response
145
-
146
147
  end
147
148
 
148
149
  def list(owner)
149
150
  response = Cnvrg::API.request("users/#{owner}/datasets/list", 'GET')
150
151
  CLI.is_response_success(response)
151
152
  return response
152
-
153
153
  end
154
154
 
155
155
  def search_queries
@@ -160,7 +160,6 @@ module Cnvrg
160
160
  row << [query["name"], query["slug"], query["created_at"].in_time_zone.to_s, query["username"]]
161
161
  end
162
162
  return row
163
-
164
163
  end
165
164
 
166
165
  def get_query_file(query_slug)
@@ -171,7 +170,6 @@ module Cnvrg
171
170
  row << [file["name"], file["fullpath"], file["s3_url"]]
172
171
  end
173
172
  return row
174
-
175
173
  end
176
174
 
177
175
  def download_tags_yaml
@@ -186,11 +184,13 @@ module Cnvrg
186
184
  end
187
185
  end
188
186
 
189
- def list_commits
190
- response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list_commits", 'GET')
187
+ def list_commits(commit_sha1: nil)
188
+ response = Cnvrg::API.request(
189
+ "users/#{self.owner}/datasets/#{self.slug}/list_commits?commit=#{commit_sha1}",
190
+ 'GET'
191
+ )
191
192
  CLI.is_response_success(response)
192
193
  return response
193
-
194
194
  end
195
195
 
196
196
  def upload_tags_via_yml(tag_file = nil)
@@ -277,7 +277,6 @@ module Cnvrg
277
277
  end
278
278
  end
279
279
  return ignore_list.flatten
280
-
281
280
  end
282
281
 
283
282
 
@@ -293,7 +292,6 @@ module Cnvrg
293
292
  create_ignore = true
294
293
  end
295
294
 
296
-
297
295
  cnvrgignore = Helpers.cnvrgignore_content
298
296
  begin
299
297
  response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public, bucket: bucket})
@@ -351,14 +349,14 @@ module Cnvrg
351
349
  end
352
350
 
353
351
  def self.blank_clone(owner, dataset_name, dataset_slug)
354
- list_dirs = [".cnvrg"
352
+ list_dirs = ["#{dataset_slug}/.cnvrg"
355
353
  ]
356
354
  list_files = [
357
- ".cnvrg/config.yml"
355
+ "#{dataset_slug}/.cnvrg/config.yml"
358
356
  ]
359
357
  create_ignore = false
360
358
  if !File.exist? ".cnvrgignore"
361
- list_files << ".cnvrgignore"
359
+ list_files << "#{dataset_slug}/.cnvrgignore"
362
360
  create_ignore = true
363
361
  end
364
362
 
@@ -372,8 +370,8 @@ module Cnvrg
372
370
 
373
371
  FileUtils.mkdir_p list_dirs
374
372
  FileUtils.touch list_files
375
- File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
376
- File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
373
+ File.open("#{dataset_slug}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
374
+ File.open("#{dataset_slug}/.cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
377
375
  rescue => e
378
376
  return false
379
377
  end
@@ -385,16 +383,21 @@ module Cnvrg
385
383
  File.open(self.local_path + "/.cnvrgignore", "w+") {|f| f.write cnvrgignore}
386
384
  end
387
385
 
388
- def self.verify_datasets(dataset_titles, timeout = 100)
386
+ def self.verify_dataset(dataset_slug)
387
+ config = YAML.load_file("/data/#{dataset_title}/.cnvrg/config.yml") rescue {}
388
+ config[:success] == true
389
+ end
390
+
391
+ def self.verify_datasets(dataset_titles, timeout = nil)
389
392
  start_time = Time.now.to_i
390
393
  Cnvrg::Logger.log_info("Verifying datasets #{dataset_titles}")
391
394
  Cnvrg::Logger.log_info("Timeout is #{timeout}")
392
395
  while true
393
396
  begin
394
397
  current_time = Time.now.to_i
395
- return false if timeout < current_time - start_time
398
+ return false if (timeout.present? and timeout < current_time - start_time)
396
399
  all_are_ready = dataset_titles.all? do |dataset_title|
397
- config = YAML.load_file("/data/#{dataset_title}/.cnvrg/config.yml")
400
+ config = YAML.load_file("#{dataset_title}/.cnvrg/config.yml")
398
401
  config[:success] == true
399
402
  end
400
403
  return true if all_are_ready
@@ -408,6 +411,30 @@ module Cnvrg
408
411
  end
409
412
  end
410
413
 
414
+ def self.scan_datasets()
415
+ Cnvrg::Logger.log_info("Looking up datasets")
416
+ datasets = Dir.entries(Dir.pwd).map do |entry|
417
+ if File.directory? File.join(Dir.pwd,entry) and !(entry =='.' || entry == '..')
418
+ begin
419
+ config = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/config.yml") rescue nil
420
+ local_commit = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/idx.yml")[:commit] rescue nil
421
+ if config.present? and config[:success] == true and config[:dataset_name].present? and config[:dataset_slug].present? and local_commit.present?
422
+ {
423
+ "dataset_slug": config[:dataset_slug],
424
+ "dataset_name": config[:dataset_name],
425
+ "local_commit": local_commit,
426
+ }
427
+ else
428
+ nil
429
+ end
430
+ rescue
431
+ nil
432
+ end
433
+ end
434
+ end.compact.uniq
435
+ datasets
436
+ end
437
+
411
438
  def clone(commit)
412
439
  return
413
440
  end
@@ -470,7 +497,6 @@ module Cnvrg
470
497
  if File.exist?(file_path)
471
498
  File.open(file_path, "a") {|f| f.puts(":success: true")}
472
499
  end
473
- rescue
474
500
  end
475
501
 
476
502
  def self.init_container(owner, dataset_slug, dataset_name)
@@ -504,7 +530,6 @@ module Cnvrg
504
530
  return YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
505
531
  end
506
532
  {commit: nil, tree: {}}
507
-
508
533
  end
509
534
 
510
535
  def set_idx(idx)
@@ -572,7 +597,7 @@ module Cnvrg
572
597
  end
573
598
 
574
599
  def write_idx(tree = nil, commit = nil)
575
- if tree.blank?
600
+ if tree.nil?
576
601
  tree = self.generate_idx[:tree]
577
602
  tree = tree.map {|k, v| (v.present?) ? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k, v]}.to_h
578
603
  end
@@ -701,7 +726,6 @@ module Cnvrg
701
726
  idx.try(:fetch, :partial_commit)
702
727
  end
703
728
 
704
-
705
729
  def current_status(new_branch)
706
730
  commit = last_local_commit
707
731
  response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status_current", 'POST', {current_commit: commit, new_branch: new_branch})
@@ -731,7 +755,6 @@ module Cnvrg
731
755
  rescue
732
756
  return []
733
757
  end
734
-
735
758
  end
736
759
 
737
760
 
@@ -748,7 +771,6 @@ module Cnvrg
748
771
  idx_hash[:tree] = ""
749
772
  else
750
773
  idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
751
-
752
774
  end
753
775
  idx_hash[:next_commit] = commit_sha1
754
776
  File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
@@ -810,11 +832,9 @@ module Cnvrg
810
832
 
811
833
  def update_idx(idx)
812
834
  File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
813
-
814
835
  return true
815
836
  end
816
837
 
817
-
818
838
  def update_idx_with_commit!(commit)
819
839
  idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
820
840
  idx_hash[:commit] = commit
@@ -852,5 +872,21 @@ module Cnvrg
852
872
  return {validation: Data::ConfigValidation::SUCCESS, message: "Directory is already linked to #{slug}"}
853
873
  end
854
874
 
875
+ def self.stop_if_dataset_present(dataset_home, dataset_name, commit: nil)
876
+
877
+ cli = Cnvrg::CLI.new()
878
+ config = YAML.load_file(dataset_home + "/.cnvrg/config.yml")
879
+ if commit.present?
880
+ local_commit = YAML.load_file(dataset_home + "/.cnvrg/idx.yml")[:commit] rescue nil
881
+ return if commit != local_commit or local_commit.blank?
882
+ end
883
+ if config[:dataset_name] == dataset_name
884
+ cli.log_message("Dataset already present, clone aborted")
885
+ exit(0)
886
+ end
887
+ rescue => e
888
+ nil
889
+ end
890
+
855
891
  end
856
892
  end
@@ -6,11 +6,12 @@ module Cnvrg
6
6
  attr_reader :sync_delay_time
7
7
  attr_reader :output_dir
8
8
 
9
- def initialize(owner, project_slug)
9
+ def initialize(owner, project_slug, job_id: nil)
10
10
  @project_slug = project_slug
11
11
  @owner = owner
12
+ @command = nil
12
13
  @base_resource = "users/#{owner}/projects/#{project_slug}/"
13
- @slug = nil
14
+ @slug = job_id
14
15
  @sync_before_terminate = nil
15
16
  @sync_delay_time = nil
16
17
  @output_dir = nil
@@ -37,6 +38,8 @@ module Cnvrg
37
38
  @sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
38
39
  @sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
39
40
  @output_dir = res.to_h["result"].to_h["output_dir"]
41
+ @command = res.to_h["result"].to_h["command"] rescue nil
42
+
40
43
  return res
41
44
 
42
45
  end
@@ -123,8 +126,7 @@ module Cnvrg
123
126
  dataset_query:data_query,git_commit:git_commit,git_branch:git_branch,
124
127
  restart_if_stuck:restart_if_stuck, local_folders: local_folders, title:title,
125
128
  prerun: prerun, requirements: requirements, recurring: recurring,
126
- email_notification_error: email_notification_error,
127
- email_notification_success: email_notification_success,
129
+ email_notification_error: email_notification_error, email_notification_success: email_notification_success,
128
130
  emails_to_notify: emails_to_notify})
129
131
 
130
132
  return response
@@ -172,6 +174,10 @@ module Cnvrg
172
174
  end
173
175
  end
174
176
 
177
+ def get_cmd
178
+ return @command
179
+ end
180
+
175
181
  def restart_spot_instance
176
182
 
177
183
  restart = false
@@ -86,7 +86,7 @@ module Cnvrg
86
86
  end
87
87
  end
88
88
 
89
- def upload_multiple_files(files_list, commit_sha1, progress: nil)
89
+ def upload_multiple_files(files_list, commit_sha1, progress: nil, suppress_exceptions: false)
90
90
  #open files on the server.
91
91
  Cnvrg::Logger.log_info("Uploading project files")
92
92
  return if files_list.blank?
@@ -95,7 +95,7 @@ module Cnvrg
95
95
  return self.upload_files_old(files_list, commit_sha1, progress: progress)
96
96
  end
97
97
 
98
- files_list = files_list.map{|x| [x,self.parse_file(x)]}.to_h
98
+ files_list = files_list.map{|x| [x, self.parse_file(x)] if self.parse_file(x)}.compact.to_h
99
99
  resp = Cnvrg::API.request(@base_resource + "upload_files", 'POST', {files: files_list, commit: commit_sha1})
100
100
  unless Cnvrg::CLI.is_response_success(resp, false)
101
101
  raise SignalException.new("Cant upload files to the server.")
@@ -106,6 +106,7 @@ module Cnvrg
106
106
 
107
107
  #upload files
108
108
  token_mutex = Mutex.new
109
+ buffered_errors = {}
109
110
  blob_ids = Parallel.map(files.keys, in_threads: ParallelThreads) do |file|
110
111
 
111
112
  token_mutex.synchronize {
@@ -115,21 +116,33 @@ module Cnvrg
115
116
  begin
116
117
  Cnvrg::Helpers.try_until_success{self.upload_single_file(files[file].merge(files_list[file]))}
117
118
  rescue => e
118
-
119
- Cnvrg::CLI.log_message("Failed to upload #{file}: #{e.message}", 'red')
119
+ Cnvrg::CLI.log_message("Failed to upload #{file}: #{e.message}", 'red') unless suppress_exceptions
120
120
  Cnvrg::Logger.log_error(e)
121
121
  Cnvrg::Logger.log_method(bind: binding)
122
- raise e
122
+
123
+ buffered_errors[file] = "Failed to upload #{file}: #{e.message}" if suppress_exceptions
124
+
125
+ raise e unless suppress_exceptions
123
126
  end
124
127
  progress.progress += 1 if progress.present?
125
- files[file]["bv_id"]
128
+
129
+ unless buffered_errors.key?(file)
130
+ files[file]["bv_id"]
131
+ else
132
+ nil
133
+ end
126
134
  end
127
135
 
136
+ # remove nil files (failed files) from blob_ids
137
+ blob_ids.compact!
138
+
128
139
  #save files on the server.
129
140
  resp = Cnvrg::API.request(@base_resource + "upload_files_save", 'POST', {blob_ids: blob_ids, commit: commit_sha1})
130
141
  unless Cnvrg::CLI.is_response_success(resp, false)
131
142
  raise SignalException.new("Cant save uploaded files to the server.")
132
143
  end
144
+
145
+ return buffered_errors
133
146
  end
134
147
 
135
148
 
@@ -146,7 +159,7 @@ module Cnvrg
146
159
  end
147
160
  end
148
161
 
149
- def delete_files_from_server(files, commit_sha1)
162
+ def delete_files_from_server(files, commit_sha1, suppress_exceptions: false)
150
163
  #files are absolute path files here. ^^
151
164
  if Cnvrg::Helpers.server_version < 1
152
165
  return self.delete_files_from_server_old(files, commit_sha1)
@@ -156,8 +169,14 @@ module Cnvrg
156
169
  return if files.blank?
157
170
  resp = Cnvrg::API.request(@base_resource + "delete_files", 'DELETE', {files: files, commit: commit_sha1})
158
171
  unless Cnvrg::CLI.is_response_success(resp, false)
159
- raise SignalException.new("Cant delete the following files from the server.")
172
+ raise SignalException.new("Cant delete the following files from the server.") unless suppress_exceptions
173
+ Cnvrg::Logger.log_error_message("Cant delete the following files from the server: ")
174
+ Cnvrg::Logger.log_error_message(files.to_s)
160
175
  end
176
+ rescue => e
177
+ Cnvrg::Logger.log_error_message("An exception raised in delete_files_from_server: ")
178
+ Cnvrg::Logger.log_error(e)
179
+ raise e unless suppress_exceptions
161
180
  end
162
181
 
163
182
  def upload_single_file(file)
@@ -176,6 +195,8 @@ module Cnvrg
176
195
  sha1 = OpenSSL::Digest::SHA1.file(abs_path).hexdigest
177
196
 
178
197
  {relative_path: file, absolute_path: abs_path, file_name: file_name, file_size: file_size, content_type: content_type, sha1: sha1}
198
+ rescue => e
199
+ return false
179
200
  end
180
201
 
181
202
  def upload_old(absolute_path, relative_path, commit_sha1)
@@ -706,6 +727,7 @@ module Cnvrg
706
727
  unless Cnvrg::CLI.is_response_success(res, false)
707
728
  raise SignalException.new("Cant download files from the server.")
708
729
  end
730
+ puts res
709
731
  self.download_multpile_files_s3(res['result'], @project_home, postfix: postfix, progress: progress)
710
732
  end
711
733
 
@@ -911,12 +933,22 @@ module Cnvrg
911
933
  end
912
934
  end
913
935
 
914
- def start_commit(new_branch,force:false, exp_start_commit:nil, job_slug: nil, job_type: nil, start_commit: nil, message: nil)
915
- response = Cnvrg::API.request("#{base_resource}/commit/start", 'POST', {project_slug: @project_slug, new_branch: new_branch,force:force,
916
- username: @owner, exp_start_commit:exp_start_commit, job_slug: job_slug, job_type: job_type, start_commit: start_commit, message: message})
917
- Cnvrg::CLI.is_response_success(response,false)
918
- return response
919
- end
936
+ def start_commit(new_branch,force:false, exp_start_commit:nil, job_slug: nil, job_type: nil, start_commit: nil, message: nil, debug_mode: false)
937
+ response = Cnvrg::API.request(
938
+ "#{base_resource}/commit/start",
939
+ 'POST',
940
+ {
941
+ project_slug: @project_slug, username: @owner,
942
+ new_branch: new_branch, force:force,
943
+ exp_start_commit:exp_start_commit, start_commit: start_commit,
944
+ job_slug: job_slug, job_type: job_type, message: message,
945
+ debug_mode: debug_mode
946
+ }
947
+ )
948
+
949
+ Cnvrg::CLI.is_response_success(response,false)
950
+ return response
951
+ end
920
952
 
921
953
  def end_commit(commit_sha1,force:false,message:"")
922
954
  response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1,force:force,message:message})
@@ -359,9 +359,9 @@ parameters:
359
359
  return {client: client, key: key, iv: iv, bucket: bucket, upload_options: upload_options}
360
360
  end
361
361
 
362
- def get_experiment_events_log_from_server(exp, project)
362
+ def get_experiment_events_log_from_server(exp, project, commit: nil)
363
363
  dest_dir = exp["slug"]
364
- commit = exp["end_commit"]
364
+ commit = commit || exp["end_commit"]
365
365
  response = project.clone(0, commit)
366
366
  Cnvrg::CLI.is_response_success(response, should_exit=false)
367
367
  commit_sha1 = response["result"]["commit"]
@@ -372,42 +372,50 @@ parameters:
372
372
  FileUtils.rm_rf("#{dest_dir}")
373
373
  FileUtils.mkdir_p(dest_dir)
374
374
  files.each do |f|
375
- FileUtils.mv(f, "#{dest_dir}/#{File.basename(f)}")
375
+ file_dir = "#{dest_dir}/#{File.dirname(f)}"
376
+ FileUtils.mkdir_p(file_dir)
377
+ FileUtils.mv(f, "#{dest_dir}/#{f}")
376
378
  end
377
379
  end
378
380
 
379
381
  def get_experiment_events_log_via_kubectl(exp, namespace)
380
382
  dest_dir = exp["slug"]
381
- result = `kubectl -n #{namespace} get pods | grep #{exp["machine_activity"]}`
383
+ result = `kubectl -n #{namespace} get pods | grep #{exp["slug"]}`
384
+
382
385
  pod_name = result.split(" ")[0]
383
- if pod_name.present?
384
- FileUtils.mkdir_p(dest_dir)
385
- working_dir = `kubectl -n #{namespace} exec #{pod_name} -- pwd`
386
- working_dir.strip!
387
- res = `kubectl -n #{namespace} exec #{pod_name} -- /bin/bash -c "ls -R #{working_dir}"`
388
- files_and_folders = res.split("\n\n")
389
- all_files = []
390
-
391
- files_and_folders.each do |file_and_folder|
392
- files = file_and_folder.split("\n")
393
- if files.first.include?(":")
394
- folder = files.first.gsub(":", "")
395
- files = files.drop(1)
396
- end
397
- files.each do |file|
398
- if file.include?("tfevents")
399
- all_files << "#{folder}/#{file}"
400
- end
401
- end
386
+ return false if pod_name.blank?
387
+ FileUtils.mkdir_p(dest_dir)
388
+ working_dir = `kubectl -n #{namespace} exec #{pod_name} -c agent -- pwd`
389
+ working_dir.strip!
390
+ res = `kubectl -n #{namespace} exec #{pod_name} -c agent -- /bin/bash -c "ls -R #{working_dir}"`
391
+ files_and_folders = res.split("\n\n")
392
+ all_files = []
393
+
394
+ files_and_folders.each do |file_and_folder|
395
+ files = file_and_folder.split("\n")
396
+ if files.first.include?(":")
397
+ folder = files.first.gsub(":", "")
398
+
399
+ folder = folder.sub(working_dir + "/", "")
400
+ files = files.drop(1)
402
401
  end
403
-
404
- all_files.each do |file|
405
- res = `kubectl -n #{namespace} cp #{pod_name}:#{file} #{dest_dir}/#{File.basename(file)}`
402
+ files.each do |file|
403
+ if file.include?("tfevents")
404
+ all_files << "#{folder}/#{file}"
405
+ end
406
406
  end
407
+ end
407
408
 
409
+ all_files.each do |file|
410
+ file_dir = "#{dest_dir}/#{File.dirname(file)}"
411
+ FileUtils.mkdir_p(file_dir)
412
+ res = `kubectl -n #{namespace} cp #{pod_name}:#{file} -c agent #{dest_dir}/#{file}`
408
413
  end
414
+
415
+ return true
409
416
  rescue => e
410
417
  Cnvrg::Logger.log_error(e)
418
+ return false
411
419
  end
412
420
  end
413
421