cnvrg 1.6.38 → 1.9.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/cnvrg.gemspec +1 -4
- data/lib/cnvrg/Images.rb +0 -148
- data/lib/cnvrg/api.rb +8 -8
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/cli.rb +288 -781
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +65 -12
- data/lib/cnvrg/datafiles.rb +483 -201
- data/lib/cnvrg/dataset.rb +65 -29
- data/lib/cnvrg/experiment.rb +10 -4
- data/lib/cnvrg/files.rb +46 -14
- data/lib/cnvrg/helpers.rb +34 -26
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +162 -258
- data/lib/cnvrg/job_cli.rb +28 -53
- data/lib/cnvrg/job_ssh.rb +47 -0
- data/lib/cnvrg/logger.rb +4 -0
- data/lib/cnvrg/project.rb +45 -16
- data/lib/cnvrg/ssh.rb +0 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +9 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e708ef034df38ed0b4f5c1ac4bb02fa79a26c93b188f571256f75dbc9d2eaaa6
|
4
|
+
data.tar.gz: 6badf54b65660776e63c02c7d3c5dbbab83d0e1e83f6e877b48d77fad5ba3036
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 21d89ec4fb99c4102bc1e8e0e50df516339a1c9e9660ee8f0dd8acf3ae30bd27067f5ea4fe979de3b737bd6f748ced98f023100487a4226b7f21eed17975142c
|
7
|
+
data.tar.gz: 91fb2d10994c11e9b28ef3bbc128f847ac2efd641892c29ec1ec2b16d4b125266e85a6166153b66ab9e9e1c475190f6eca771e42d739a02c1136dbe8cb6c3abb
|
data/cnvrg.gemspec
CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_runtime_dependency 'open4', '~> 1.3', '>= 1.3.4'
|
32
32
|
spec.add_runtime_dependency 'highline', '~> 1.7', '>= 1.7.8'
|
33
33
|
spec.add_runtime_dependency 'thor', '~> 0.19.0','>=0.19.1'
|
34
|
-
spec.add_runtime_dependency 'aws-sdk', '~>
|
34
|
+
spec.add_runtime_dependency 'aws-sdk', '~> 3.0'
|
35
35
|
spec.add_runtime_dependency 'signet', '~> 0.11.0'
|
36
36
|
spec.add_runtime_dependency 'google-cloud-env', '~> 1.2.1'
|
37
37
|
spec.add_runtime_dependency 'google-cloud-core', '~> 1.3.2'
|
@@ -40,11 +40,8 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.add_runtime_dependency 'urlcrypt', '~> 0.1.1'
|
41
41
|
spec.add_runtime_dependency 'parallel', '~> 1.12.0'
|
42
42
|
spec.add_runtime_dependency 'azure-storage-blob', '~> 1.1.0'
|
43
|
-
|
44
43
|
spec.add_runtime_dependency 'logstash-logger', '~> 0.22.1'
|
45
|
-
spec.add_runtime_dependency 'docker-api', '~> 1.33'
|
46
44
|
spec.add_runtime_dependency 'activesupport', '~> 5.2.0'
|
47
45
|
spec.add_runtime_dependency 'ruby-progressbar'
|
48
|
-
spec.add_runtime_dependency 'net-ssh'
|
49
46
|
spec.add_runtime_dependency 'down'
|
50
47
|
end
|
data/lib/cnvrg/Images.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
require 'cnvrg/files'
|
3
|
-
require 'docker'
|
4
|
-
require 'net/ssh'
|
5
3
|
require 'mimemagic'
|
6
4
|
|
7
5
|
|
@@ -175,58 +173,6 @@ module Cnvrg
|
|
175
173
|
response = Cnvrg::API.request("users/#{owner}/images/#{slug}/commit_custom_image", 'POST', {image_logs:logs})
|
176
174
|
return response
|
177
175
|
end
|
178
|
-
def self.ssh_to_machine(resp)
|
179
|
-
|
180
|
-
sts_path = resp["result"]["sts_path"]
|
181
|
-
|
182
|
-
uri = URI.parse(sts_path)
|
183
|
-
|
184
|
-
http_object = Net::HTTP.new(uri.host, uri.port)
|
185
|
-
http_object.use_ssl = true if uri.scheme == 'https'
|
186
|
-
request = Net::HTTP::Get.new(sts_path)
|
187
|
-
|
188
|
-
body = ""
|
189
|
-
http_object.start do |http|
|
190
|
-
response = http.request request
|
191
|
-
body = response.read_body
|
192
|
-
end
|
193
|
-
|
194
|
-
URLcrypt::key = [body].pack('H*')
|
195
|
-
|
196
|
-
ip = URLcrypt.decrypt(resp["result"]["machine_i"])
|
197
|
-
|
198
|
-
user = URLcrypt.decrypt(resp["result"]["machine_u"])
|
199
|
-
key = URLcrypt.decrypt(resp["result"]["machine_k"])
|
200
|
-
tempssh = Tempfile.new "sshkey"
|
201
|
-
tempssh.write open(key).read
|
202
|
-
tempssh.rewind
|
203
|
-
key_path = tempssh.path
|
204
|
-
count = 0
|
205
|
-
while count < 5
|
206
|
-
|
207
|
-
begin
|
208
|
-
ssh = Net::SSH.start(ip, user=user, :keys => key_path, :timeout => 10)
|
209
|
-
if !ssh.nil?
|
210
|
-
return ssh
|
211
|
-
else
|
212
|
-
count+=1
|
213
|
-
sleep(2)
|
214
|
-
|
215
|
-
end
|
216
|
-
rescue
|
217
|
-
count+=1
|
218
|
-
sleep(2)
|
219
|
-
|
220
|
-
|
221
|
-
end
|
222
|
-
end
|
223
|
-
if tempssh
|
224
|
-
tempssh.close
|
225
|
-
tempssh.unlink
|
226
|
-
end
|
227
|
-
return false
|
228
|
-
end
|
229
|
-
|
230
176
|
|
231
177
|
|
232
178
|
def create_custom_image(new_image_name,working_dir,stored_commands)
|
@@ -270,100 +216,6 @@ module Cnvrg
|
|
270
216
|
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
271
217
|
end
|
272
218
|
|
273
|
-
def get_container(stop=false)
|
274
|
-
begin
|
275
|
-
container_id=is_container_exist()
|
276
|
-
|
277
|
-
if !container_id
|
278
|
-
return create_container()
|
279
|
-
else
|
280
|
-
container = Docker::Container.get(container_id)
|
281
|
-
status = container.json["State"]["Status"]
|
282
|
-
|
283
|
-
if status == "running"
|
284
|
-
return container
|
285
|
-
else
|
286
|
-
if stop
|
287
|
-
return false
|
288
|
-
end
|
289
|
-
res = container.start()
|
290
|
-
if res.info["State"]["Status"].eql? "exited" and res.info["State"]["Error"].include? "port is already allocated"
|
291
|
-
return create_container()
|
292
|
-
end
|
293
|
-
return container
|
294
|
-
end
|
295
|
-
end
|
296
|
-
rescue => e
|
297
|
-
if e.message.include? "No such container"
|
298
|
-
|
299
|
-
return create_container()
|
300
|
-
else
|
301
|
-
return false
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
|
-
end
|
306
|
-
|
307
|
-
def create_container(port=7654, is_remote=false)
|
308
|
-
begin
|
309
|
-
image_settings = {
|
310
|
-
'Image' => "#{@image_name}:latest",
|
311
|
-
'User' => 'ds',
|
312
|
-
'Cmd' => '/usr/local/cnvrg/run_ipython.sh',
|
313
|
-
'WorkingDir' => '/home/ds/notebooks',
|
314
|
-
'ExposedPorts' => {
|
315
|
-
'8888/tcp' => {},
|
316
|
-
},
|
317
|
-
'HostConfig' => {
|
318
|
-
'Binds' => ["#{@working_dir}:/home/ds/notebooks"],
|
319
|
-
'PortBindings' => {
|
320
|
-
'8888/tcp' => [
|
321
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
322
|
-
],
|
323
|
-
},
|
324
|
-
},
|
325
|
-
}
|
326
|
-
container = Docker::Container.create(image_settings)
|
327
|
-
container.start()
|
328
|
-
netrc = File.open(File.expand_path('~')+"/.netrc", "rb")
|
329
|
-
netrc_content = netrc.read
|
330
|
-
container.store_file("/home/ds/.netrc", netrc_content)
|
331
|
-
command = ["/bin/bash", "-lc", "sudo chmod 600 /home/ds/.netrc"]
|
332
|
-
p = container.exec(command, tty: true)
|
333
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.netrc"]
|
334
|
-
p = container.exec(command, tty: true)
|
335
|
-
config = File.open(File.expand_path('~')+"/.cnvrg/config.yml", "rb")
|
336
|
-
config_content = config.read
|
337
|
-
container.store_file("/home/ds/.cnvrg/config.yml", config_content)
|
338
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg"]
|
339
|
-
container.exec(command, tty: true)
|
340
|
-
# Libraries instlled
|
341
|
-
save_installed_libraries(container)
|
342
|
-
config = {project_name: @project_name,
|
343
|
-
project_slug: @project_slug,
|
344
|
-
owner: @owner,
|
345
|
-
docker: true, image_base: @image_name, image_tag: @image_tag, container: container.id, port: port, image_slug: @image_slug}
|
346
|
-
|
347
|
-
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
348
|
-
|
349
|
-
|
350
|
-
return container
|
351
|
-
|
352
|
-
|
353
|
-
rescue => e
|
354
|
-
if e.message.include? "is not running"
|
355
|
-
return create_container(port-1)
|
356
|
-
end
|
357
|
-
return false
|
358
|
-
rescue SignalException
|
359
|
-
|
360
|
-
say "\nAborting", Thor::Shell::Color::RED
|
361
|
-
exit(1)
|
362
|
-
end
|
363
|
-
|
364
|
-
|
365
|
-
end
|
366
|
-
|
367
219
|
def save_installed_libraries(container)
|
368
220
|
begin
|
369
221
|
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
data/lib/cnvrg/api.rb
CHANGED
@@ -77,20 +77,22 @@ module Cnvrg
|
|
77
77
|
if response.to_hash[:status] == 404
|
78
78
|
return false
|
79
79
|
end
|
80
|
-
if parse_request
|
80
|
+
if parse_request
|
81
81
|
JSON.parse(response.body)
|
82
82
|
else
|
83
83
|
response
|
84
84
|
end
|
85
|
-
|
85
|
+
when 'POST', 'PUT'
|
86
86
|
conn.options.timeout = 4200
|
87
|
-
conn.options.open_timeout=180
|
87
|
+
conn.options.open_timeout = 180
|
88
|
+
conn.headers['Content-Type'] = "application/json"
|
88
89
|
retries = 0
|
89
90
|
success = false
|
91
|
+
data = data || {}
|
90
92
|
while !success and retries < 20
|
91
93
|
begin
|
92
|
-
response = conn.post "#{resource}", data if method.eql? 'POST'
|
93
|
-
response = conn.put "#{resource}", data if method.eql? 'PUT'
|
94
|
+
response = conn.post "#{resource}", data.to_json if method.eql? 'POST'
|
95
|
+
response = conn.put "#{resource}", data.to_json if method.eql? 'PUT'
|
94
96
|
success = true
|
95
97
|
Cnvrg::API.parse_version(response)
|
96
98
|
|
@@ -113,7 +115,7 @@ module Cnvrg
|
|
113
115
|
end
|
114
116
|
when 'POST_JSON'
|
115
117
|
conn.options.timeout = 4200
|
116
|
-
conn.options.open_timeout =4200
|
118
|
+
conn.options.open_timeout = 4200
|
117
119
|
conn.headers['Content-Type'] = "application/json"
|
118
120
|
new_data = JSON.dump(data)
|
119
121
|
|
@@ -124,8 +126,6 @@ module Cnvrg
|
|
124
126
|
begin
|
125
127
|
response = conn.post "#{resource}", new_data
|
126
128
|
success = true
|
127
|
-
Cnvrg::API.parse_version(response)
|
128
|
-
|
129
129
|
rescue => e
|
130
130
|
Cnvrg::Logger.log_error(e)
|
131
131
|
sleep(5)
|
data/lib/cnvrg/api_v2.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
class API_V2 < API
|
3
|
+
ENDPOINT_VERSION = 'v2'
|
4
|
+
|
5
|
+
def self.endpoint_uri
|
6
|
+
api = get_api()
|
7
|
+
return "#{api}/#{Cnvrg::API_V2::ENDPOINT_VERSION}"
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.is_response_success(response)
|
11
|
+
raise Exception.new("Bad status in response #{response.status}") if response.status != 200
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -12,7 +12,6 @@ require 'digest' # sha1up
|
|
12
12
|
require "highline/import"
|
13
13
|
require 'socket'
|
14
14
|
require 'thor'
|
15
|
-
require 'docker'
|
16
15
|
require 'socket'
|
17
16
|
require 'timeout'
|
18
17
|
require 'fileutils'
|
@@ -28,13 +27,11 @@ require 'cnvrg/auth'
|
|
28
27
|
require 'cnvrg/project'
|
29
28
|
require 'cnvrg/files'
|
30
29
|
require 'cnvrg/experiment'
|
31
|
-
require 'cnvrg/Images'
|
32
30
|
require 'cnvrg/image'
|
33
31
|
require 'cnvrg/dataset'
|
34
32
|
require 'cnvrg/datafiles'
|
35
33
|
require 'cnvrg/data'
|
36
34
|
require 'cnvrg/storage'
|
37
|
-
require 'cnvrg/ssh'
|
38
35
|
require 'cnvrg/result'
|
39
36
|
require 'cnvrg/logger'
|
40
37
|
require 'cnvrg/org_helpers'
|
@@ -49,6 +46,9 @@ require 'cnvrg/downloader/clients/s3_client'
|
|
49
46
|
require 'cnvrg/downloader/clients/gcp_client'
|
50
47
|
require 'cnvrg/downloader/clients/azure_client'
|
51
48
|
require 'cnvrg/job_cli'
|
49
|
+
require 'cnvrg/job_ssh'
|
50
|
+
require 'cnvrg/connect_job_ssh'
|
51
|
+
require 'cnvrg/api_v2'
|
52
52
|
|
53
53
|
class Thor
|
54
54
|
module Base
|
@@ -175,6 +175,9 @@ module Cnvrg
|
|
175
175
|
desc "job", "manage running jobs", :hide => false
|
176
176
|
subcommand "job", JobCli
|
177
177
|
|
178
|
+
desc "ssh", "ssh into running jobs", :hide => false
|
179
|
+
subcommand "ssh", JobSsh
|
180
|
+
|
178
181
|
desc "image [COMMAND]", "build existing images", :hide => true
|
179
182
|
subcommand "image", ImageCli
|
180
183
|
|
@@ -819,9 +822,9 @@ module Cnvrg
|
|
819
822
|
end
|
820
823
|
|
821
824
|
desc 'data verify', 'Verify datasets', :hide => true
|
822
|
-
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default =>
|
825
|
+
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default => nil
|
823
826
|
|
824
|
-
def verify_datasets(dataset_titles, timeout=
|
827
|
+
def verify_datasets(dataset_titles, timeout=nil)
|
825
828
|
begin
|
826
829
|
verify_logged_in(false)
|
827
830
|
log_start(__method__, args, options)
|
@@ -830,21 +833,31 @@ module Cnvrg
|
|
830
833
|
log_message("All datasets are verified", Thor::Shell::Color::BLUE) if verified
|
831
834
|
log_message("Failed to verify datasets", Thor::Shell::Color::RED) if !verified
|
832
835
|
exit(1) if !verified
|
833
|
-
|
834
836
|
rescue SignalException
|
835
837
|
say "\nAborting", Thor::Shell::Color::RED
|
836
838
|
exit(1)
|
837
839
|
end
|
838
840
|
end
|
839
841
|
|
842
|
+
desc 'data scan', 'Lookup datasets', :hide => true
|
843
|
+
def scan_datasets()
|
844
|
+
begin
|
845
|
+
verify_logged_in(false)
|
846
|
+
log_start(__method__, args, options)
|
847
|
+
log_message("Scanning datasets", Thor::Shell::Color::BLUE)
|
848
|
+
datasets = Dataset.scan_datasets()
|
849
|
+
puts(datasets.to_json)
|
850
|
+
end
|
851
|
+
end
|
852
|
+
|
840
853
|
desc 'data clone', 'Clone dataset', :hide => true
|
841
854
|
method_option :commit, :type => :string, :aliases => ["-c", "--commit"], :default => ""
|
842
855
|
method_option :only_tree, :type => :boolean, :aliases => ["-t", "--tree"], :default => false
|
843
856
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
844
857
|
method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
|
845
858
|
method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
|
846
|
-
|
847
|
-
def clone_data(dataset_url,only_tree=false,commit=nil,query=nil,read=false,remote=false, relative: false)
|
859
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
860
|
+
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false)
|
848
861
|
begin
|
849
862
|
verify_logged_in(false)
|
850
863
|
log_start(__method__, args, options)
|
@@ -853,10 +866,10 @@ module Cnvrg
|
|
853
866
|
read = options["read"] || read || false
|
854
867
|
remote = options["remote"] || remote || false
|
855
868
|
query = options['query'].presence || query.presence
|
869
|
+
soft = options['soft'] || soft
|
856
870
|
if query.present?
|
857
|
-
return clone_data_query(dataset_url, query)
|
871
|
+
return clone_data_query(dataset_url, query, flatten, soft: soft)
|
858
872
|
end
|
859
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
860
873
|
|
861
874
|
url_parts = dataset_url.split("/")
|
862
875
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
@@ -868,6 +881,8 @@ module Cnvrg
|
|
868
881
|
dataset_name = response["result"]["name"]
|
869
882
|
dataset_home = Dir.pwd+"/"+dataset_name
|
870
883
|
|
884
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name, commit: response["result"]["commit"]) if soft
|
885
|
+
|
871
886
|
check = Helpers.checkmark
|
872
887
|
if @dataset.init_home(remote:remote)
|
873
888
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
@@ -875,14 +890,12 @@ module Cnvrg
|
|
875
890
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
876
891
|
if @dataset.softlinked?
|
877
892
|
@files.cp_ds(relative: relative)
|
878
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
879
893
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
880
894
|
@dataset.write_success
|
881
895
|
return
|
882
896
|
end
|
883
897
|
|
884
898
|
if only_tree
|
885
|
-
|
886
899
|
success = Dataset.clone_tree(commit: commit, dataset_home: dataset_home)
|
887
900
|
return if success
|
888
901
|
end
|
@@ -900,7 +913,7 @@ module Cnvrg
|
|
900
913
|
|
901
914
|
while files['keys'].length > 0
|
902
915
|
Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
|
903
|
-
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read)
|
916
|
+
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten)
|
904
917
|
|
905
918
|
downloaded_files += files['keys'].length
|
906
919
|
files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
|
@@ -908,7 +921,6 @@ module Cnvrg
|
|
908
921
|
progressbar.finish
|
909
922
|
if downloaded_files == files_count
|
910
923
|
Dataset.verify_cnvrgignore_exist(dataset_name, false)
|
911
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
912
924
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
913
925
|
@dataset.write_success
|
914
926
|
### if read, dont generate idx (but create idx.yml) if not read, generate idx.
|
@@ -930,12 +942,14 @@ module Cnvrg
|
|
930
942
|
|
931
943
|
desc 'data clone_query', 'Clone dataset _query', :hide => true
|
932
944
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => ""
|
933
|
-
|
945
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
946
|
+
def clone_data_query(dataset_url, query=nil, flatten=false, soft: false)
|
934
947
|
begin
|
935
948
|
verify_logged_in(false)
|
936
|
-
|
949
|
+
#@executer = Cnvrg::Helpers::Executer.get_executer
|
937
950
|
log_start(__method__, args, options)
|
938
951
|
query = options["query"] || query
|
952
|
+
soft = options["soft"] || soft
|
939
953
|
if !query.present?
|
940
954
|
log_message("Argument missing : query", Thor::Shell::Color::RED)
|
941
955
|
exit(1)
|
@@ -945,13 +959,14 @@ module Cnvrg
|
|
945
959
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
946
960
|
slug = url_parts[project_index + 1]
|
947
961
|
owner = url_parts[project_index - 1]
|
948
|
-
|
949
962
|
response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/search/#{query}", 'GET')
|
950
963
|
Cnvrg::CLI.is_response_success(response,true)
|
951
964
|
dataset_name = response["results"]["name"]
|
952
965
|
dataset_slug = response["results"]["slug"]
|
953
|
-
dataset_home =
|
966
|
+
dataset_home = Dir.pwd+"/"+dataset_slug
|
967
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name) if soft
|
954
968
|
|
969
|
+
# dataset_home = Dir.pwd
|
955
970
|
if Dataset.blank_clone(owner, dataset_name, dataset_slug)
|
956
971
|
dataset = Dataset.new(dataset_home)
|
957
972
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
@@ -966,6 +981,7 @@ module Cnvrg
|
|
966
981
|
},
|
967
982
|
in_threads: ParallelThreads
|
968
983
|
}
|
984
|
+
|
969
985
|
begin
|
970
986
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
971
987
|
Parallel.map((response["results"]["query_files"]), parallel_options) do |f|
|
@@ -974,6 +990,7 @@ module Cnvrg
|
|
974
990
|
file_name = relative_path_dir.pop()
|
975
991
|
relative_path_dir = relative_path_dir.join("/")
|
976
992
|
abs_path = dataset_home + "/" + relative_path_dir
|
993
|
+
abs_path = dataset_home if flatten
|
977
994
|
begin
|
978
995
|
FileUtils.mkdir_p(abs_path) unless File.exist? (abs_path + "/" + file_name)
|
979
996
|
rescue
|
@@ -981,14 +998,14 @@ module Cnvrg
|
|
981
998
|
exit(1)
|
982
999
|
end
|
983
1000
|
begin
|
984
|
-
File.write "#{abs_path}/#{file_name}", open(f["
|
985
|
-
rescue
|
1001
|
+
File.write "#{abs_path}/#{file_name}", open(f["url"]).read unless File.exist? (abs_path + "/" + file_name)
|
1002
|
+
rescue => e
|
986
1003
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
987
1004
|
exit(1)
|
988
1005
|
end
|
989
1006
|
|
990
1007
|
end
|
991
|
-
|
1008
|
+
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
992
1009
|
rescue Interrupt
|
993
1010
|
log_message("Couldn't download", Thor::Shell::Color::RED)
|
994
1011
|
exit(1)
|
@@ -998,7 +1015,7 @@ module Cnvrg
|
|
998
1015
|
check = Helpers.checkmark
|
999
1016
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
1000
1017
|
dataset.write_success(in_folder=true)
|
1001
|
-
rescue
|
1018
|
+
rescue => e
|
1002
1019
|
exit(1)
|
1003
1020
|
end
|
1004
1021
|
end
|
@@ -1008,32 +1025,6 @@ module Cnvrg
|
|
1008
1025
|
end
|
1009
1026
|
end
|
1010
1027
|
|
1011
|
-
desc 'init_data_container', 'Init dataset directory', :hide => true
|
1012
|
-
method_option :login_content, :type => :string, :aliases => ["-l"], :default => ""
|
1013
|
-
|
1014
|
-
def init_data_container(container)
|
1015
|
-
begin
|
1016
|
-
login_content = options["login_content"]
|
1017
|
-
|
1018
|
-
container = Docker::Container.get(container)
|
1019
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
1020
|
-
container.exec(command, tty: true)
|
1021
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
1022
|
-
container.exec(command, tty: true)
|
1023
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
1024
|
-
container.exec(command, tty: true)
|
1025
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg /home/ds/.netrc"]
|
1026
|
-
container.exec(command, tty: true)
|
1027
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
1028
|
-
container.exec(command, tty: true)
|
1029
|
-
|
1030
|
-
rescue SignalException
|
1031
|
-
|
1032
|
-
say "\nAborting", Thor::Shell::Color::RED
|
1033
|
-
exit(1)
|
1034
|
-
end
|
1035
|
-
end
|
1036
|
-
|
1037
1028
|
desc 'data_snap', 'Init dataset directory', :hide => true
|
1038
1029
|
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
1039
1030
|
|
@@ -1184,17 +1175,29 @@ module Cnvrg
|
|
1184
1175
|
end
|
1185
1176
|
|
1186
1177
|
desc '', '', :hide => true
|
1187
|
-
def
|
1178
|
+
def get_owner_slug(url_or_slug)
|
1179
|
+
if url_or_slug =~ URI::regexp
|
1180
|
+
# Find owner and slug in url
|
1181
|
+
url_parts = url_or_slug.split("/")
|
1182
|
+
project_index = Cnvrg::Helpers.look_for_in_path(url_or_slug, "datasets")
|
1183
|
+
slug = url_parts[project_index + 1]
|
1184
|
+
owner = url_parts[project_index - 1]
|
1185
|
+
else
|
1186
|
+
# Find owner in config file
|
1187
|
+
owner = CLI.get_owner
|
1188
|
+
slug = url_or_slug
|
1189
|
+
end
|
1190
|
+
return owner, slug
|
1191
|
+
end
|
1192
|
+
|
1193
|
+
desc '', '', :hide => true
|
1194
|
+
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, threads: 15, message: nil)
|
1188
1195
|
begin
|
1189
1196
|
verify_logged_in(false)
|
1190
1197
|
log_start(__method__, args, options)
|
1191
1198
|
|
1192
|
-
|
1193
|
-
|
1194
|
-
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
1195
|
-
slug = url_parts[project_index + 1]
|
1196
|
-
owner = url_parts[project_index - 1]
|
1197
|
-
@dataset = Dataset.new(dataset_url: dataset_url)
|
1199
|
+
owner, slug = get_owner_slug(dataset_url)
|
1200
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
1198
1201
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
1199
1202
|
@files = @datafiles.verify_files_exists(files)
|
1200
1203
|
|
@@ -1218,28 +1221,33 @@ module Cnvrg
|
|
1218
1221
|
else
|
1219
1222
|
@commit = commit
|
1220
1223
|
end
|
1221
|
-
|
1224
|
+
|
1225
|
+
# dir shouldnt have starting or ending slash.
|
1222
1226
|
dir = dir[0..-2] if dir.end_with? '/'
|
1223
1227
|
dir = dir[1..-1] if dir.start_with? '/'
|
1224
1228
|
|
1225
|
-
@
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
+
@datafiles.upload_multiple_files_optimized(
|
1230
|
+
@files,
|
1231
|
+
@commit,
|
1232
|
+
force: force,
|
1233
|
+
chunk_size: chunk_size,
|
1234
|
+
prefix: dir,
|
1235
|
+
threads: threads
|
1236
|
+
)
|
1237
|
+
|
1238
|
+
# This is for backwards compatibility only and should be removed in future versions:
|
1239
|
+
res = @datafiles.put_commit(@commit)
|
1240
|
+
unless res.is_success?
|
1241
|
+
raise SignalException.new(1, res.msg)
|
1229
1242
|
end
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
res = @datafiles.end_commit(@commit,false, success: true )
|
1237
|
-
msg = res['result']
|
1238
|
-
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
1239
|
-
unless response.is_success?
|
1240
|
-
raise SignalException.new(1, res.msg)
|
1241
|
-
end
|
1243
|
+
|
1244
|
+
res = @datafiles.end_commit(@commit,false, success: true, commit_type: "put")
|
1245
|
+
msg = res['result']
|
1246
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
1247
|
+
unless response.is_success?
|
1248
|
+
raise SignalException.new(1, res.msg)
|
1242
1249
|
end
|
1250
|
+
|
1243
1251
|
log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
|
1244
1252
|
rescue SignalException => e
|
1245
1253
|
log_message(e.message, Thor::Shell::Color::RED)
|
@@ -1248,7 +1256,49 @@ module Cnvrg
|
|
1248
1256
|
end
|
1249
1257
|
|
1250
1258
|
|
1259
|
+
desc '', '', :hide => true
|
1260
|
+
def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
|
1261
|
+
begin
|
1262
|
+
verify_logged_in(false)
|
1263
|
+
log_start(__method__, args, options)
|
1251
1264
|
|
1265
|
+
owner, slug = get_owner_slug(dataset_url)
|
1266
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
1267
|
+
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
1268
|
+
|
1269
|
+
# Init a new commit
|
1270
|
+
response = @datafiles.start_commit(false, true, chunks: 1, message: message )
|
1271
|
+
unless response #means we failed in the start commit.
|
1272
|
+
raise SignalException.new(1, "Cant put files into dataset, check the dataset id")
|
1273
|
+
end
|
1274
|
+
@commit = response['result']['commit_sha1']
|
1275
|
+
files_to_delete, folders_to_delete, job_id = @datafiles.delete_multiple_files(@commit, regex_list)
|
1276
|
+
log_message("Deleting #{files_to_delete} files and #{folders_to_delete} folders", Thor::Shell::Color::GREEN)
|
1277
|
+
|
1278
|
+
total_files = files_to_delete + folders_to_delete
|
1279
|
+
current_progress = 0
|
1280
|
+
progressbar = @datafiles.create_progressbar("Delete Progress", total_files)
|
1281
|
+
chunk_size = 1000
|
1282
|
+
offset = 0
|
1283
|
+
while current_progress < total_files
|
1284
|
+
current_progress = @datafiles.delete_file_chunk(@commit, regex_list, chunk_size, offset)
|
1285
|
+
progressbar.progress = current_progress
|
1286
|
+
offset += chunk_size
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
res = @datafiles.end_commit(@commit,false, success: true)
|
1290
|
+
msg = res['result']
|
1291
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
1292
|
+
unless response.is_success?
|
1293
|
+
raise SignalException.new(1, res.msg)
|
1294
|
+
end
|
1295
|
+
|
1296
|
+
log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
|
1297
|
+
rescue SignalException => e
|
1298
|
+
log_message(e.message, Thor::Shell::Color::RED)
|
1299
|
+
return false
|
1300
|
+
end
|
1301
|
+
end
|
1252
1302
|
|
1253
1303
|
desc 'upload_data', 'Upload data files', :hide => true
|
1254
1304
|
method_option :ignore, :type => :array, :aliases => ["-i", "--i"], :desc => "ignore following files"
|
@@ -1699,18 +1749,22 @@ module Cnvrg
|
|
1699
1749
|
end
|
1700
1750
|
|
1701
1751
|
desc 'data commits', 'List all commits for a specific dataset', :hide => true
|
1702
|
-
|
1703
|
-
|
1704
|
-
verify_logged_in(true)
|
1752
|
+
def list_dataset_commits(dataset_url, commit_sha1: nil)
|
1753
|
+
verify_logged_in(false)
|
1705
1754
|
log_start(__method__, args, options)
|
1706
1755
|
|
1707
|
-
|
1708
|
-
|
1709
|
-
|
1756
|
+
if dataset_url == "."
|
1757
|
+
dataset_dir = is_cnvrg_dir(Dir.pwd)
|
1758
|
+
@dataset = Dataset.new(dataset_dir)
|
1759
|
+
else
|
1760
|
+
owner, slug = get_owner_slug(dataset_url)
|
1761
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
1762
|
+
end
|
1763
|
+
|
1764
|
+
result = @dataset.list_commits(commit_sha1:commit_sha1)
|
1710
1765
|
list = result["result"]["list"]
|
1711
1766
|
|
1712
1767
|
print_table(list)
|
1713
|
-
|
1714
1768
|
end
|
1715
1769
|
|
1716
1770
|
desc 'commits', 'List all commits for a specific Project'
|
@@ -1741,17 +1795,17 @@ module Cnvrg
|
|
1741
1795
|
|
1742
1796
|
|
1743
1797
|
desc 'git_clone', 'Clone project'
|
1798
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
1744
1799
|
def git_clone(slug, owner)
|
1745
1800
|
verify_logged_in(false)
|
1746
1801
|
log_start(__method__, args, options)
|
1747
|
-
|
1802
|
+
project_home = Dir.pwd
|
1803
|
+
soft = options["soft"] || false
|
1804
|
+
Project.stop_if_project_present(project_home, slug) if soft
|
1748
1805
|
clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
|
1749
|
-
|
1806
|
+
exit 1 if not clone_resp
|
1807
|
+
idx_status = Project.new(get_project_home).generate_idx(files:[])
|
1750
1808
|
FileUtils.mkdir_p File.join(get_project_home, ENV['CNVRG_OUTPUT_DIR']) if ENV['CNVRG_OUTPUT_DIR'].present?
|
1751
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
1752
|
-
if @executer.present?
|
1753
|
-
@executer.update_git_commit
|
1754
|
-
end
|
1755
1809
|
end
|
1756
1810
|
|
1757
1811
|
|
@@ -1791,7 +1845,7 @@ module Cnvrg
|
|
1791
1845
|
desc 'clone PROJECT_URL', 'Clone project'
|
1792
1846
|
method_option :remote, :type => :boolean, :aliases => ["-r", "--r"], :default => false
|
1793
1847
|
method_option :commit, :type => :string, :aliases => ["-c", "--c"], :default => nil
|
1794
|
-
|
1848
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
1795
1849
|
def clone(project_url)
|
1796
1850
|
begin
|
1797
1851
|
verify_logged_in(false)
|
@@ -1801,6 +1855,8 @@ module Cnvrg
|
|
1801
1855
|
slug = url_parts[project_index + 1]
|
1802
1856
|
owner = url_parts[project_index - 1]
|
1803
1857
|
remote = options["remote"] || false
|
1858
|
+
soft = options["soft"] || false
|
1859
|
+
|
1804
1860
|
|
1805
1861
|
response = Cnvrg::API.request("users/#{owner}/projects/#{slug}/get_project", 'GET')
|
1806
1862
|
Cnvrg::CLI.is_response_success(response)
|
@@ -1814,6 +1870,8 @@ module Cnvrg
|
|
1814
1870
|
clone_resp = false
|
1815
1871
|
project_home = Dir.pwd
|
1816
1872
|
|
1873
|
+
Project.stop_if_project_present(project_home, project_name) if soft
|
1874
|
+
|
1817
1875
|
if remote and !git
|
1818
1876
|
clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
|
1819
1877
|
elsif git
|
@@ -1954,8 +2012,6 @@ module Cnvrg
|
|
1954
2012
|
method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
|
1955
2013
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
1956
2014
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
1957
|
-
|
1958
|
-
|
1959
2015
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
1960
2016
|
verify_logged_in(true)
|
1961
2017
|
log_start(__method__, args, options)
|
@@ -1964,11 +2020,13 @@ module Cnvrg
|
|
1964
2020
|
# w(verbose=false, new_branch=false,sync=false, commit=nil,all_files=true)
|
1965
2021
|
total_deleted, total_downloaded = invoke :download_data_new,[verbose, new_branch, true, commit, all_files], :new_branch=>new_branch, :direct=>false, :force =>force
|
1966
2022
|
end
|
1967
|
-
|
2023
|
+
|
1968
2024
|
invoke :upload_data_new,[new_branch, verbose, true, force, tags, chunk_size, message:message, total_deleted: total_deleted, total_downloaded: total_downloaded],
|
1969
2025
|
:new_branch=>new_branch, :direct=>false, :force =>force, :sync =>true, :tags =>tags, :parallel => parallel, :message => message
|
1970
2026
|
|
1971
2027
|
end
|
2028
|
+
|
2029
|
+
|
1972
2030
|
desc 'upload_data_new', 'upload_data_new', :hide => true
|
1973
2031
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
1974
2032
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
@@ -2214,13 +2272,24 @@ module Cnvrg
|
|
2214
2272
|
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
2215
2273
|
method_option :job_slug, :type => :string, :aliases => ["--job"], :default => nil, :hide=>true
|
2216
2274
|
method_option :job_type, :type => :string, :aliases => [ "--job_type"], :default => nil, :hide=>true
|
2275
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
2276
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
2217
2277
|
|
2218
|
-
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil)
|
2278
|
+
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil, suppress_exceptions = true)
|
2219
2279
|
begin
|
2220
2280
|
# we are passing "force" twice.. doesnt really make sense :\\
|
2221
2281
|
verify_logged_in(true)
|
2222
2282
|
log_start(__method__, args, options)
|
2223
2283
|
@project = Project.new(get_project_home)
|
2284
|
+
|
2285
|
+
# Enable local/experiment exception logging
|
2286
|
+
suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
|
2287
|
+
if in_exp
|
2288
|
+
exp_obj = Experiment.new(@project.owner, @project.slug, job_id: job_slug)
|
2289
|
+
else
|
2290
|
+
exp_obj = nil
|
2291
|
+
end
|
2292
|
+
|
2224
2293
|
commit_msg = options["message"]
|
2225
2294
|
if commit_msg.nil? or commit_msg.empty?
|
2226
2295
|
commit_msg = ""
|
@@ -2292,8 +2361,6 @@ module Cnvrg
|
|
2292
2361
|
end
|
2293
2362
|
update_count = 0
|
2294
2363
|
update_total = result["added"].size + result["updated_on_local"].size + result["deleted"].size
|
2295
|
-
successful_updates = []
|
2296
|
-
successful_deletions = []
|
2297
2364
|
if options["verbose"]
|
2298
2365
|
if update_total == 1
|
2299
2366
|
log_message("Updating #{update_total} file", Thor::Shell::Color::BLUE)
|
@@ -2313,8 +2380,11 @@ module Cnvrg
|
|
2313
2380
|
end
|
2314
2381
|
job_type = options['job_type'] || job_type
|
2315
2382
|
job_slug = options['job_slug'] || job_slug
|
2316
|
-
commit_sha1 = @files.start_commit(
|
2317
|
-
|
2383
|
+
commit_sha1 = @files.start_commit(
|
2384
|
+
new_branch, force: force, exp_start_commit: exp_start_commit,
|
2385
|
+
job_type: job_type, job_slug: job_slug, start_commit: current_commit,message: options["message"],
|
2386
|
+
debug_mode: options["debug_mode"]
|
2387
|
+
)["result"]["commit_sha1"]
|
2318
2388
|
# upload / update
|
2319
2389
|
# delete
|
2320
2390
|
to_upload = result["added"] + result["updated_on_local"]
|
@@ -2325,32 +2395,30 @@ module Cnvrg
|
|
2325
2395
|
:starting_at => 0,
|
2326
2396
|
:total => (to_upload.size + deleted.size),
|
2327
2397
|
:autofinish => true)
|
2328
|
-
@files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar)
|
2329
2398
|
|
2330
|
-
@files.
|
2399
|
+
buffered_errors = @files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar, suppress_exceptions: suppress_exceptions)
|
2400
|
+
@files.delete_files_from_server(deleted, commit_sha1, suppress_exceptions: suppress_exceptions)
|
2331
2401
|
|
2332
2402
|
progressbar.finish
|
2403
|
+
|
2404
|
+
if buffered_errors.is_a?(Hash)
|
2405
|
+
buffered_errors.keys.each do |file|
|
2406
|
+
to_upload.delete(file)
|
2407
|
+
Cnvrg::CLI.log_message(buffered_errors[file], 'red')
|
2408
|
+
exp_obj.job_log([buffered_errors[file]]) unless exp_obj.nil?
|
2409
|
+
end
|
2410
|
+
end
|
2411
|
+
|
2333
2412
|
res = @files.end_commit(commit_sha1, force: force, message: commit_msg)
|
2334
2413
|
unless Cnvrg::CLI.is_response_success(res, false)
|
2335
2414
|
raise StandardError.new("Cant end commit")
|
2336
2415
|
end
|
2416
|
+
|
2337
2417
|
# save idx
|
2338
2418
|
@project.update_idx_with_files_commits!((to_upload + deleted), res["result"]["commit_time"])
|
2339
2419
|
@project.update_idx_with_commit!(commit_sha1)
|
2340
2420
|
if options["verbose"]
|
2341
2421
|
log_message("#{check} Done", Thor::Shell::Color::BLUE)
|
2342
|
-
if successful_updates.size > 0
|
2343
|
-
successful_updates.flatten!
|
2344
|
-
log_message("Updated:", Thor::Shell::Color::GREEN)
|
2345
|
-
suc = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
2346
|
-
log_message(suc.join("\n"), Thor::Shell::Color::GREEN)
|
2347
|
-
end
|
2348
|
-
if successful_deletions.size > 0
|
2349
|
-
successful_deletions.flatten!
|
2350
|
-
log_message("Deleted:", Thor::Shell::Color::GREEN)
|
2351
|
-
del = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
2352
|
-
log_message(del.join("\n"), Thor::Shell::Color::GREEN)
|
2353
|
-
end
|
2354
2422
|
log_message("Total of #{update_count} / #{update_total} files.", Thor::Shell::Color::GREEN)
|
2355
2423
|
else
|
2356
2424
|
if return_id
|
@@ -2375,9 +2443,13 @@ module Cnvrg
|
|
2375
2443
|
if e.is_a? SignalException
|
2376
2444
|
say "\nAborting", Thor::Shell::Color::BLUE
|
2377
2445
|
say "\nRolling back all changes", Thor::Shell::Color::BLUE
|
2446
|
+
|
2447
|
+
exp_obj.job_log(["Aborting", "Rolling back all changes"]) unless exp_obj.nil?
|
2378
2448
|
else
|
2379
2449
|
log_message(error_message, Thor::Shell::Color::RED)
|
2380
2450
|
log_error(e)
|
2451
|
+
|
2452
|
+
exp_obj.job_log([error_message, e]) unless exp_obj.nil?
|
2381
2453
|
end
|
2382
2454
|
@files.rollback_commit(commit_sha1) unless commit_sha1.nil?
|
2383
2455
|
print_res = {
|
@@ -2896,6 +2968,10 @@ module Cnvrg
|
|
2896
2968
|
method_option :files, :type => :string, :aliases => ["--files"], :default => nil
|
2897
2969
|
method_option :output_dir, :type => :string, :aliases => ["--output_dir"], :default => nil
|
2898
2970
|
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
2971
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
2972
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
2973
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
2974
|
+
|
2899
2975
|
def sync(direct = true)
|
2900
2976
|
verify_logged_in(true) if direct
|
2901
2977
|
@project = Project.new(get_project_home)
|
@@ -2907,16 +2983,20 @@ module Cnvrg
|
|
2907
2983
|
is_git = ENV['CNVRG_GIT_PROJECT'] == "true" || @project.is_git
|
2908
2984
|
in_exp = options["in_exp"] || (job_slug.present? and job_type.present?)
|
2909
2985
|
in_exp = false if job_type.present? and job_type == "NotebookSession"
|
2986
|
+
output_dir = options["output_dir"] || ENV['CNVRG_OUTPUT_DIR']
|
2987
|
+
|
2910
2988
|
run_download = true
|
2911
|
-
if
|
2989
|
+
if (job_type == "NotebookSession" and is_git) or job_type == "Experiment" or options['force']
|
2912
2990
|
run_download = false
|
2913
2991
|
end
|
2914
|
-
|
2992
|
+
|
2993
|
+
if run_download or options['debug_mode']
|
2915
2994
|
invoke :download, [true, "", in_exp ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true
|
2916
2995
|
end
|
2917
|
-
invoke :upload, [false, true, direct, "",in_exp,options[:force],
|
2996
|
+
invoke :upload, [false, true, direct, "",in_exp,options[:force], output_dir, job_type, job_slug ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true,
|
2918
2997
|
:ignore => options[:ignore], :force => options[:force], :message => options[:message], :deploy => options["deploy"], :return_id => options["return_id"],
|
2919
|
-
:files => options["files"], :output_dir =>
|
2998
|
+
:files => options["files"], :output_dir => output_dir, :job_slug => job_slug, :job_type => job_type, :suppress_exceptions => options["suppress_exceptions"], :debug_mode => options['debug_mode'], :git_diff => options["git_diff"]
|
2999
|
+
|
2920
3000
|
end
|
2921
3001
|
|
2922
3002
|
desc 'run cmd', 'Runs an experiment'
|
@@ -3061,6 +3141,8 @@ module Cnvrg
|
|
3061
3141
|
method_option :data, :type => :string, :aliases => ["-d", "--data"], :default => ""
|
3062
3142
|
method_option :data_commit, :type => :string, :aliases => ["-dc", "--data_commit"], :default => ""
|
3063
3143
|
method_option :ignore, :type => :string, :aliases => ["-i", "--ignore"], :desc => "ignore following files", :default => ""
|
3144
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch", :default => ""
|
3145
|
+
method_option :gpu_util_from_docker, :type => :boolean, :aliases => ["--gpu-util-from-docker"], :desc => "take gpu utilization from job docker", :default => false
|
3064
3146
|
method_option :remote, :type => :boolean, :aliases => ["--remote"], :default => false
|
3065
3147
|
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
3066
3148
|
method_option :force, :type => :boolean, :aliases => ["-f", "--force"], :default => false
|
@@ -3068,6 +3150,7 @@ module Cnvrg
|
|
3068
3150
|
method_option :periodic_sync, :type => :string, :aliases => ["-ps", "--periodic_sync"], :default => ""
|
3069
3151
|
method_option :output_dir, :type => :string, :aliases => ["-o", "--output_dir"], :default => nil
|
3070
3152
|
method_option :data_query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
3153
|
+
method_option :use_bash, :type => :boolean, :aliases => ["-b", "--use_bash"], :default => false
|
3071
3154
|
|
3072
3155
|
def exec(*cmd)
|
3073
3156
|
log = []
|
@@ -3134,8 +3217,12 @@ module Cnvrg
|
|
3134
3217
|
end
|
3135
3218
|
remote = options["remote"]
|
3136
3219
|
if remote
|
3137
|
-
docker_id
|
3138
|
-
|
3220
|
+
if options["docker_id"].present?
|
3221
|
+
docker_id = options["docker_id"]
|
3222
|
+
else
|
3223
|
+
docker_id = `cat /etc/hostname`
|
3224
|
+
docker_id = docker_id.strip()
|
3225
|
+
end
|
3139
3226
|
end
|
3140
3227
|
is_on_gpu = options["gpu"]
|
3141
3228
|
start_commit = @project.last_local_commit
|
@@ -3145,9 +3232,9 @@ module Cnvrg
|
|
3145
3232
|
|
3146
3233
|
platform = RUBY_PLATFORM
|
3147
3234
|
machine_name = Socket.gethostname
|
3235
|
+
machine_activity_slug = ENV["CNVRG_MACHINE_ACTIVITY"]
|
3148
3236
|
begin
|
3149
|
-
|
3150
|
-
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity, script_path, sync_before_terminate, periodic_sync)
|
3237
|
+
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity_slug, script_path, sync_before_terminate, periodic_sync)
|
3151
3238
|
log_message("Experiment's live results: #{Cnvrg::Helpers.remote_url}/#{@project.owner}/projects/#{@project.slug}/experiments/#{@exp.slug}", Thor::Shell::Color::GREEN)
|
3152
3239
|
log_message("Running: #{cmd}\n", Thor::Shell::Color::BLUE)
|
3153
3240
|
unless @exp.slug.nil?
|
@@ -3165,7 +3252,7 @@ module Cnvrg
|
|
3165
3252
|
begin
|
3166
3253
|
stats = remote ? usage_metrics_in_docker(docker_id) : Helpers.ubuntu? ? {memory: memory_usage, cpu: cpu_usage} : {}
|
3167
3254
|
if is_on_gpu
|
3168
|
-
gu = gpu_util
|
3255
|
+
gu = gpu_util(take_from_docker: options["gpu_util_from_docker"], docker_id: docker_id)
|
3169
3256
|
stats['gpu_util'] = gu[0]
|
3170
3257
|
stats['gpu'] = gu[1]
|
3171
3258
|
end
|
@@ -3177,6 +3264,16 @@ module Cnvrg
|
|
3177
3264
|
end
|
3178
3265
|
end
|
3179
3266
|
start_time = Time.now
|
3267
|
+
shell_type = options["use_bash"] ? "bash -l" : "sh"
|
3268
|
+
if @exp.get_cmd.present?
|
3269
|
+
cmd = @exp.get_cmd
|
3270
|
+
if options["docker_id"].present? # Escape for docker exec
|
3271
|
+
cmd = cmd.gsub("\"", "\\\"")
|
3272
|
+
end
|
3273
|
+
end
|
3274
|
+
if options["docker_id"].present?
|
3275
|
+
cmd = "docker exec -it #{options["docker_id"]} #{shell_type} -c \"#{cmd}\""
|
3276
|
+
end
|
3180
3277
|
PTY.spawn(@exp.as_env, cmd) do |stdout, stdin, pid, stderr|
|
3181
3278
|
begin
|
3182
3279
|
stdout.each do |line|
|
@@ -3191,7 +3288,7 @@ module Cnvrg
|
|
3191
3288
|
puts line
|
3192
3289
|
end
|
3193
3290
|
log << cur_log
|
3194
|
-
if log.size >=
|
3291
|
+
if log.size >= 1
|
3195
3292
|
@exp.upload_temp_log(log) unless log.empty?
|
3196
3293
|
log = []
|
3197
3294
|
elsif (start_time + 15.seconds) <= Time.now
|
@@ -3241,29 +3338,26 @@ module Cnvrg
|
|
3241
3338
|
exp_success = false
|
3242
3339
|
end
|
3243
3340
|
|
3244
|
-
|
3245
|
-
|
3246
|
-
|
3247
|
-
|
3248
|
-
|
3249
|
-
|
3250
|
-
|
3251
|
-
# invoke :upload, [false, false, true, ignore, true, true], :output_dir => output_dir, :force=>true, :job_type=>'Experiment', :job_slug=>@exp.slug
|
3252
|
-
end
|
3253
|
-
else
|
3254
|
-
upload(false, false, true, ignore, true, true,nil,"Experiment",@exp.slug )
|
3255
|
-
|
3256
|
-
# invoke :upload, [false, false, true, ignore,true, true], :job_type=>'Experiment', :job_slug=>@exp.slug, :force=>true
|
3341
|
+
if sync_after
|
3342
|
+
@exp.job_log(["Syncing Experiment"])
|
3343
|
+
# Sync after run
|
3344
|
+
if @project.is_git
|
3345
|
+
output_dir = output_dir || @exp.output_dir
|
3346
|
+
if output_dir.present?
|
3347
|
+
upload(false, false, true, ignore, true, true, output_dir, "Experiment", @exp.slug, true )
|
3257
3348
|
end
|
3258
|
-
|
3349
|
+
else
|
3350
|
+
upload(false, false, true, ignore, true, true, nil, "Experiment", @exp.slug, true )
|
3259
3351
|
end
|
3352
|
+
end
|
3353
|
+
|
3260
3354
|
end_commit = @project.last_local_commit
|
3261
3355
|
if end_commit.present?
|
3262
3356
|
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
3263
3357
|
end
|
3264
3358
|
|
3265
3359
|
# log_thread.join
|
3266
|
-
|
3360
|
+
stats_thread.join
|
3267
3361
|
|
3268
3362
|
res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average, end_time: end_time)
|
3269
3363
|
|
@@ -3411,8 +3505,8 @@ module Cnvrg
|
|
3411
3505
|
local_folders_options = options["local_folders"]
|
3412
3506
|
options_hash.except!("schedule", "recurring", "machine_type", "image", "upload_output", "grid", "data", "data_commit", "title",
|
3413
3507
|
"local", "small", "medium", "large", "gpu", "gpuxl", "gpuxxl","max_time","dataset_only_tree",
|
3414
|
-
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
3415
|
-
"email_notification_error", "email_notification_success", "emails")
|
3508
|
+
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
3509
|
+
"requirements", "prerun", "email_notification_error", "email_notification_success", "emails")
|
3416
3510
|
exec_options = options_hash.map {|x| "--#{x[0]}=#{x[1]}"}.flatten.join(" ")
|
3417
3511
|
command = "#{exec_options} #{remote} #{upload_output_option} #{cmd.flatten.join(" ")}"
|
3418
3512
|
commit_to_run = options["commit"] || nil
|
@@ -4237,144 +4331,6 @@ module Cnvrg
|
|
4237
4331
|
|
4238
4332
|
end
|
4239
4333
|
|
4240
|
-
method_option :small, :type => :boolean, :aliases => ["-sm", "--small"], :default => false
|
4241
|
-
method_option :medium, :type => :boolean, :aliases => ["-md", "--medium"], :default => false
|
4242
|
-
method_option :large, :type => :boolean, :aliases => ["-lg", "--large"], :default => false
|
4243
|
-
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
4244
|
-
method_option :gpuxl, :type => :boolean, :aliases => ["--gpuxl"], :default => false
|
4245
|
-
method_option :gpuxxl, :type => :boolean, :aliases => ["--gpuxxl"], :default => false
|
4246
|
-
method_option :image, :type => :string, :aliases => ["-i", "--image"], :default => ""
|
4247
|
-
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
4248
|
-
method_option :base, :type => :boolean, :aliases => ["-b", "--base"], :default => false
|
4249
|
-
method_option :python3, :type => :boolean, :aliases => ["--python3"], :default => false
|
4250
|
-
method_option :docker_path, :type => :string, :aliases => ["--docker_path"], :default => ""
|
4251
|
-
|
4252
|
-
|
4253
|
-
desc 'create_custom_image', 'run commands inside containers', :hide => true
|
4254
|
-
|
4255
|
-
def build_image(image_name)
|
4256
|
-
begin
|
4257
|
-
verify_logged_in(false)
|
4258
|
-
log_start(__method__, args, options)
|
4259
|
-
instances = {"small" => options["small"], "medium" => options["medium"], "large" => options["large"],
|
4260
|
-
"gpu" => options["gpu"], "gpuxl" => options["gpuxl"], "gpuxxl" => options["gpuxxl"]}
|
4261
|
-
instance_type = get_instance_type(instances)
|
4262
|
-
image_extend = options["image"]
|
4263
|
-
public = options["public"]
|
4264
|
-
base = options["base"]
|
4265
|
-
python3 = options["python3"]
|
4266
|
-
docker_path = options["docker_path"]
|
4267
|
-
owner = CLI.get_owner
|
4268
|
-
checks = Helpers.checkmark()
|
4269
|
-
tar_path = nil
|
4270
|
-
if !docker_path.nil? and !docker_path.empty?
|
4271
|
-
docker_path = File.absolute_path(docker_path)
|
4272
|
-
#create tar of the docker path: it could be a docker file, and it could be a docker folder
|
4273
|
-
tar_path = File.expand_path('~') + "/.cnvrg/tmp/docker_#{File.basename docker_path}.tar.gz"
|
4274
|
-
resp = create_docker_tar(docker_path, tar_path)
|
4275
|
-
if !resp
|
4276
|
-
log_message("Couldn't create tar from docker path", Thor::Shell::Color::RED)
|
4277
|
-
FileUtils.rm_rf tar_path
|
4278
|
-
exit(1)
|
4279
|
-
end
|
4280
|
-
files = Cnvrg::Files.new(owner, "")
|
4281
|
-
resp = Images.create_new_custom_image_with_docker(instance_type, owner, image_name, public, base, image_extend, python3, tar_path, files)
|
4282
|
-
if resp
|
4283
|
-
end
|
4284
|
-
else
|
4285
|
-
log_message("Creating machine for your custom image, this may take a few moments...", Thor::Shell::Color::BLUE)
|
4286
|
-
resp = Images.create_new_custom_image(instance_type, owner, image_name, public, base, image_extend, python3, nil)
|
4287
|
-
|
4288
|
-
end
|
4289
|
-
|
4290
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
4291
|
-
image_slug = resp["result"]["slug"]
|
4292
|
-
container = resp["result"]["machine_c"]
|
4293
|
-
log_message("#{checks} Created image and machine successfully", Thor::Shell::Color::GREEN)
|
4294
|
-
log_message("Connecting to machine", Thor::Shell::Color::BLUE)
|
4295
|
-
ssh = Ssh.new(resp)
|
4296
|
-
if !ssh.is_ssh
|
4297
|
-
log_message("Couldn't connect to machine,aborting", Thor::Shell::Color::RED)
|
4298
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
4299
|
-
end
|
4300
|
-
log_message("run command until ctrl + c or quit is initiated", Thor::Shell::Color::BLUE)
|
4301
|
-
begin
|
4302
|
-
logs = []
|
4303
|
-
|
4304
|
-
while true
|
4305
|
-
command = ask("$>")
|
4306
|
-
logs << {time: Time.now,
|
4307
|
-
message: command,
|
4308
|
-
type: "stdout"
|
4309
|
-
}
|
4310
|
-
if command.eql? "quit"
|
4311
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
4312
|
-
break
|
4313
|
-
end
|
4314
|
-
res = ssh.exec_command(command)
|
4315
|
-
begin
|
4316
|
-
res_parsed = JSON.parse(res)
|
4317
|
-
res = res_parsed.join(",")
|
4318
|
-
end
|
4319
|
-
|
4320
|
-
puts res
|
4321
|
-
logs << {time: Time.now,
|
4322
|
-
message: res,
|
4323
|
-
type: "stdout"
|
4324
|
-
}
|
4325
|
-
logs.flatten!
|
4326
|
-
|
4327
|
-
end
|
4328
|
-
|
4329
|
-
rescue SignalException
|
4330
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
4331
|
-
|
4332
|
-
end
|
4333
|
-
resp = Images.commit_custom_image(owner, image_slug, logs)
|
4334
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
4335
|
-
log_message("#{checks} Image commited successfuly, email will be sent when image is ready", Thor::Shell::Color::GREEN)
|
4336
|
-
else
|
4337
|
-
if image_slug
|
4338
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
4339
|
-
end
|
4340
|
-
if ssh
|
4341
|
-
ssh.close_ssh()
|
4342
|
-
end
|
4343
|
-
log_message("Image couldn't be commited, rolling back changes", Thor::Shell::Color::RED)
|
4344
|
-
|
4345
|
-
exit(1)
|
4346
|
-
end
|
4347
|
-
if ssh
|
4348
|
-
ssh.close_ssh()
|
4349
|
-
end
|
4350
|
-
|
4351
|
-
|
4352
|
-
end
|
4353
|
-
rescue => e
|
4354
|
-
log_message("Error occurd, aborting", Thor::Shell::Color::RED)
|
4355
|
-
|
4356
|
-
log_error(e)
|
4357
|
-
if image_slug
|
4358
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
4359
|
-
end
|
4360
|
-
if ssh
|
4361
|
-
ssh.close_ssh()
|
4362
|
-
end
|
4363
|
-
|
4364
|
-
|
4365
|
-
rescue SignalException
|
4366
|
-
if image_slug
|
4367
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
4368
|
-
end
|
4369
|
-
if ssh
|
4370
|
-
ssh.close_ssh
|
4371
|
-
end
|
4372
|
-
say "\nAborting"
|
4373
|
-
exit(1)
|
4374
|
-
end
|
4375
|
-
|
4376
|
-
end
|
4377
|
-
|
4378
4334
|
|
4379
4335
|
desc 'build', 'run commands inside containers', :hide => true
|
4380
4336
|
method_option :install, :type => :string, :aliases => ["--i"], :default => nil, :desc => "Install from the given instructions file"
|
@@ -4568,66 +4524,7 @@ module Cnvrg
|
|
4568
4524
|
end
|
4569
4525
|
|
4570
4526
|
|
4571
|
-
desc 'upload_image', 'commit notebook changes to create a new notebook image', :hide =>true
|
4572
|
-
|
4573
|
-
def upload_image_old(image_id, is_public, is_base, *message)
|
4574
|
-
verify_logged_in(true)
|
4575
|
-
log_start(__method__, args, options)
|
4576
|
-
image = Docker::Image.get(image_id)
|
4577
|
-
project_home = get_project_home
|
4578
|
-
@project = Project.new(project_home)
|
4579
|
-
last_local_commit = @project.last_local_commit
|
4580
|
-
image_name = @project.slug + "#{last_local_commit}"
|
4581
|
-
path = File.expand_path('~') + "/.cnvrg/tmp/#{image_name}.tar"
|
4582
|
-
owner = Cnvrg::CLI.get_owner()
|
4583
|
-
if !message.nil? or !message.empty?
|
4584
|
-
message = message.join(" ")
|
4585
|
-
end
|
4586
|
-
|
4587
|
-
log_message("Saving image's current state", Thor::Shell::Color::BLUE)
|
4588
|
-
image.save(path)
|
4589
|
-
|
4590
|
-
begin
|
4591
|
-
log_message("Compressing image file to upload", Thor::Shell::Color::BLUE)
|
4592
|
-
gzipRes = system("gzip -f #{path}")
|
4593
|
-
if !gzipRes
|
4594
|
-
|
4595
|
-
log_message("Couldn't create tar file from image", Thor::Shell::Color::RED)
|
4596
|
-
exit(1)
|
4597
|
-
end
|
4598
|
-
path = path + ".gz"
|
4599
|
-
@files = Cnvrg::Files.new(owner, "")
|
4600
|
-
|
4601
|
-
exit_status = $?.exitstatus
|
4602
|
-
if exit_status == 0
|
4603
|
-
log_message("Uploading image file", Thor::Shell::Color::BLUE)
|
4604
|
-
|
4605
|
-
diff = container_changes(Dir.pwd)
|
4606
|
-
res = @files.upload_image(path, image_name, owner, is_public, is_base, diff[1], diff[0], diff[2], message, image.commit_id)
|
4607
|
-
if res
|
4608
|
-
File.delete(path)
|
4609
|
-
image_loc = is_project_with_docker(Dir.pwd)
|
4610
|
-
image_loc.update_slug(res["result"]["id"])
|
4611
|
-
|
4612
|
-
checks = Helpers.checkmark()
|
4613
|
-
log_message("#{checks} Done", Thor::Shell::Color::GREEN)
|
4614
|
-
else
|
4615
|
-
log_message("Couldn't upload image", Thor::Shell::Color::RED)
|
4616
|
-
|
4617
|
-
end
|
4618
|
-
else
|
4619
|
-
log_message("Couldn't create image file for: #{image_name}", Thor::Shell::Color::RED)
|
4620
|
-
exit(1)
|
4621
|
-
end
|
4622
|
-
rescue => e
|
4623
|
-
log_message("Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED)
|
4624
|
-
log_error(e)
|
4625
|
-
rescue SignalException
|
4626
4527
|
|
4627
|
-
say "Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED
|
4628
|
-
exit(1)
|
4629
|
-
end
|
4630
|
-
end
|
4631
4528
|
|
4632
4529
|
desc '', '', :hide => true
|
4633
4530
|
|
@@ -4638,278 +4535,30 @@ module Cnvrg
|
|
4638
4535
|
|
4639
4536
|
end
|
4640
4537
|
|
4641
|
-
desc '', '', :hide => true
|
4642
|
-
|
4643
|
-
|
4644
|
-
|
4645
|
-
|
4646
|
-
|
4647
|
-
|
4648
|
-
|
4649
|
-
|
4650
|
-
|
4651
|
-
|
4652
|
-
|
4653
|
-
|
4654
|
-
|
4655
|
-
|
4656
|
-
|
4657
|
-
|
4658
|
-
|
4659
|
-
|
4660
|
-
|
4661
|
-
def tensor_port_container(container_id)
|
4662
|
-
container = Docker::Container.get(container_id)
|
4663
|
-
say container.json["HostConfig"]["PortBindings"]["6006/tcp"][0]["HostPort"]
|
4664
|
-
end
|
4665
|
-
|
4666
|
-
desc '', '', :hide => true
|
4667
|
-
|
4668
|
-
def stop_container(container_id)
|
4669
|
-
container = Docker::Container.get(container_id)
|
4670
|
-
container.stop()
|
4671
|
-
container.remove()
|
4672
|
-
|
4673
|
-
end
|
4674
|
-
|
4675
|
-
desc '', '', :hide => true
|
4676
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
4677
|
-
method_option :app_dir, :type => :string, :aliases => ["-d"], :default => "/home/ds/notebooks"
|
4678
|
-
method_option :cmd, :type => :string, :aliases => ["-c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
4679
|
-
|
4680
|
-
|
4681
|
-
def config_remote(image_name, port = 7654, tensport = 6006)
|
4682
|
-
local_images = Docker::Image.all
|
4683
|
-
|
4684
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
4685
|
-
if docker_image_local.empty?
|
4686
|
-
say "no image"
|
4687
|
-
exit(1)
|
4688
|
-
end
|
4689
|
-
|
4690
|
-
begin
|
4691
|
-
login_content = options["login"]
|
4692
|
-
app_dir = options["app_dir"]
|
4693
|
-
cmd = options["cmd"]
|
4694
|
-
volume_from = options["volume"]
|
4695
|
-
|
4696
|
-
image_settings = {
|
4697
|
-
'Image' => "#{image_name}:latest",
|
4698
|
-
|
4699
|
-
'Cmd' => cmd,
|
4700
|
-
'WorkingDir' => app_dir,
|
4701
|
-
'ExposedPorts' => {
|
4702
|
-
'8888/tcp' => {},
|
4703
|
-
},
|
4704
|
-
'HostConfig' => {
|
4705
|
-
'Binds' => ["/var/run/docker.sock:/var/run/docker.sock", "/usr/bin/docker:/usr/bin/docker"],
|
4706
|
-
'PortBindings' => {
|
4707
|
-
'8888/tcp' => [
|
4708
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
4709
|
-
],
|
4710
|
-
'6006/tcp' => [
|
4711
|
-
{'HostPort' => "#{tensport}", 'HostIp' => 'localhost'}
|
4712
|
-
],
|
4713
|
-
},
|
4714
|
-
},
|
4715
|
-
}
|
4716
|
-
container = Docker::Container.create(image_settings)
|
4717
|
-
container.start()
|
4718
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
4719
|
-
container.exec(command, tty: true)
|
4720
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
4721
|
-
# container.exec(command, tty: true)
|
4722
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
4723
|
-
# container.exec(command, tty: true)
|
4724
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
4725
|
-
container.exec(command, tty: true)
|
4726
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
4727
|
-
container.exec(command, tty: true)
|
4728
|
-
say "#{container.id}:#{port}##{tensport}"
|
4729
|
-
rescue => e
|
4730
|
-
puts e
|
4731
|
-
if e.message.include? "is not running"
|
4732
|
-
return config_remote(image_name, port - 1, tensport - 1)
|
4733
|
-
end
|
4734
|
-
|
4735
|
-
if container
|
4736
|
-
container.kill()
|
4737
|
-
end
|
4738
|
-
return false
|
4739
|
-
end
|
4740
|
-
end
|
4741
|
-
|
4742
|
-
|
4743
|
-
desc '', '', :hide => true
|
4744
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
4745
|
-
|
4746
|
-
def config_netrc(container)
|
4747
|
-
|
4748
|
-
login_content = options["login"]
|
4749
|
-
|
4750
|
-
container = Docker::Container.get(container)
|
4751
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
4752
|
-
container.exec(command, tty: true)
|
4753
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
4754
|
-
container.exec(command, tty: true)
|
4755
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
4756
|
-
container.exec(command, tty: true)
|
4757
|
-
say "OK"
|
4758
|
-
|
4759
|
-
end
|
4760
|
-
|
4761
|
-
desc '', '', :hide => true
|
4762
|
-
method_option :login, :type => :string, :aliases => ["-l", "--l"], :default => ""
|
4763
|
-
method_option :app_dir, :type => :string, :aliases => ["-d", "--d"], :default => "/home/ds/notebooks"
|
4764
|
-
method_option :cmd, :type => :string, :aliases => ["-c", "--c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
4765
|
-
|
4766
|
-
|
4767
|
-
def config_remote_gpu(image_name, port = 7654, tensport = 6006)
|
4768
|
-
local_images = Docker::Image.all
|
4769
|
-
|
4770
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
4771
|
-
if docker_image_local.empty?
|
4772
|
-
say "no image"
|
4773
|
-
exit(1)
|
4774
|
-
end
|
4775
|
-
|
4776
|
-
begin
|
4777
|
-
login_content = options["login"]
|
4778
|
-
app_dir = options["app_dir"]
|
4779
|
-
cmd = options["cmd"]
|
4780
|
-
|
4781
|
-
# image_settings = {
|
4782
|
-
# 'Image' => "#{image_name}:latest",
|
4783
|
-
# 'User' => 'ds',
|
4784
|
-
# 'Cmd' => cmd,
|
4785
|
-
# 'WorkingDir' => app_dir,
|
4786
|
-
# 'ExposedPorts' => {
|
4787
|
-
# '8888/tcp' => {},
|
4788
|
-
# },
|
4789
|
-
# 'HostConfig' => {
|
4790
|
-
# 'PortBindings' => {
|
4791
|
-
# '8888/tcp' => [
|
4792
|
-
# {'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
4793
|
-
# ],
|
4794
|
-
# '6006/tcp' => [
|
4795
|
-
# {'HostPort' => "6006", 'HostIp' => 'localhost'}
|
4796
|
-
# ],
|
4797
|
-
# },
|
4798
|
-
# },
|
4799
|
-
# }
|
4800
|
-
|
4801
|
-
container_id = `nvidia-docker run -itd -p #{port}:8888 -p #{tensport}:6006 -w #{app_dir} -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker #{image_name}:latest #{cmd} `
|
4802
|
-
container_id = container_id.gsub("\n", "")
|
4803
|
-
container = Docker::Container.get(container_id)
|
4804
|
-
# container.start()
|
4805
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
4806
|
-
container.exec(command, tty: true)
|
4807
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
4808
|
-
container.exec(command, tty: true)
|
4809
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
4810
|
-
container.exec(command, tty: true)
|
4811
|
-
say "#{container.id}:#{port}##{tensport}"
|
4812
|
-
rescue => e
|
4813
|
-
if e.message.include? "is not running"
|
4814
|
-
puts "running asgain with: #{port - 1} #{tensport - 1}"
|
4815
|
-
return config_remote_gpu(image_name, port - 1, tensport - 1)
|
4816
|
-
end
|
4817
|
-
|
4818
|
-
if container
|
4819
|
-
container.kill()
|
4538
|
+
desc 'Collect and send job utilization', '', :hide => true
|
4539
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch"
|
4540
|
+
method_option :is_on_gpu, :type => :boolean, :aliases => ["--is_on_gpu"], :desc => "is on gpu", :default => true
|
4541
|
+
def get_utilization()
|
4542
|
+
@exp = Experiment.new(ENV['CNVRG_OWNER'], ENV['CNVRG_PROJECT'], job_id: ENV['CNVRG_JOB_ID'])
|
4543
|
+
docker_id = options["docker_id"]
|
4544
|
+
while true do
|
4545
|
+
sleep 30
|
4546
|
+
begin
|
4547
|
+
stats = usage_metrics_in_docker(docker_id)
|
4548
|
+
if options["is_on_gpu"]
|
4549
|
+
gu = gpu_util(take_from_docker: true, docker_id: docker_id)
|
4550
|
+
stats['gpu_util'] = gu[0]
|
4551
|
+
stats['gpu'] = gu[1]
|
4552
|
+
end
|
4553
|
+
stats['docker_id'] = docker_id
|
4554
|
+
@exp.send_machine_stats [stats] unless stats.empty?
|
4555
|
+
rescue => e
|
4556
|
+
log_error(e)
|
4557
|
+
log_message("Failed to upload ongoing stats, continuing with experiment", Thor::Shell::Color::YELLOW)
|
4820
4558
|
end
|
4821
|
-
return false
|
4822
4559
|
end
|
4823
4560
|
end
|
4824
4561
|
|
4825
|
-
desc '', '', :hide => true
|
4826
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
4827
|
-
|
4828
|
-
def config_flask_remote(image_name, port = 80)
|
4829
|
-
local_images = Docker::Image.all
|
4830
|
-
|
4831
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
4832
|
-
if docker_image_local.empty?
|
4833
|
-
say "no image"
|
4834
|
-
exit(1)
|
4835
|
-
end
|
4836
|
-
|
4837
|
-
begin
|
4838
|
-
login_content = options["login"]
|
4839
|
-
image_settings = {
|
4840
|
-
'Image' => "#{image_name}:latest",
|
4841
|
-
'User' => 'ds',
|
4842
|
-
'Cmd' => '/usr/local/cnvrg/start_super.sh',
|
4843
|
-
'WorkingDir' => '/home/ds/app',
|
4844
|
-
'ExposedPorts' => {
|
4845
|
-
'80/tcp' => {},
|
4846
|
-
},
|
4847
|
-
'HostConfig' => {
|
4848
|
-
'PortBindings' => {
|
4849
|
-
'80/tcp' => [
|
4850
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
4851
|
-
],
|
4852
|
-
},
|
4853
|
-
},
|
4854
|
-
}
|
4855
|
-
container = Docker::Container.create(image_settings)
|
4856
|
-
container.start()
|
4857
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
4858
|
-
container.exec(command, tty: true)
|
4859
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
4860
|
-
container.exec(command, tty: true)
|
4861
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
4862
|
-
container.exec(command, tty: true)
|
4863
|
-
say "#{container.id}:#{port}"
|
4864
|
-
rescue => e
|
4865
|
-
pus e
|
4866
|
-
if e.message.include? "is not running"
|
4867
|
-
return "port is taken"
|
4868
|
-
end
|
4869
|
-
puts "error"
|
4870
|
-
if container
|
4871
|
-
container.kill()
|
4872
|
-
end
|
4873
|
-
return false
|
4874
|
-
end
|
4875
|
-
end
|
4876
|
-
|
4877
|
-
desc '', '', :hide => true
|
4878
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
4879
|
-
|
4880
|
-
def config_flask_remote_gpu(image_name, port = 80)
|
4881
|
-
local_images = Docker::Image.all
|
4882
|
-
|
4883
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
4884
|
-
if docker_image_local.empty?
|
4885
|
-
say "no image"
|
4886
|
-
exit(1)
|
4887
|
-
end
|
4888
|
-
|
4889
|
-
begin
|
4890
|
-
login_content = options["login"]
|
4891
|
-
container_id = `nvidia-docker run -itd -p 80:80 -w /home/ds/app #{image_name}:latest /usr/local/cnvrg/start_super.sh`
|
4892
|
-
container_id = container_id.gsub("\n", "")
|
4893
|
-
container = Docker::Container.get(container_id)
|
4894
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
4895
|
-
container.exec(command, tty: true)
|
4896
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
4897
|
-
container.exec(command, tty: true)
|
4898
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
4899
|
-
container.exec(command, tty: true)
|
4900
|
-
say "#{container.id}:#{port}"
|
4901
|
-
rescue => e
|
4902
|
-
puts e
|
4903
|
-
if e.message.include? "is not running"
|
4904
|
-
return "port is taken"
|
4905
|
-
end
|
4906
|
-
puts "error"
|
4907
|
-
if container
|
4908
|
-
container.kill()
|
4909
|
-
end
|
4910
|
-
return false
|
4911
|
-
end
|
4912
|
-
end
|
4913
4562
|
|
4914
4563
|
desc '', '', :hide => true
|
4915
4564
|
|
@@ -4935,39 +4584,10 @@ module Cnvrg
|
|
4935
4584
|
|
4936
4585
|
end
|
4937
4586
|
|
4938
|
-
desc '
|
4939
|
-
|
4940
|
-
|
4941
|
-
|
4942
|
-
method_option :gpu, :type => :boolean, :aliases => ["-g","--gpu"], :default => false
|
4943
|
-
def upload_image(image_name,image_path)
|
4944
|
-
begin
|
4945
|
-
verify_logged_in(false)
|
4946
|
-
log_start(__method__, args, options)
|
4947
|
-
|
4948
|
-
@image = Cnvrg::Images.new()
|
4949
|
-
say "Uploading new docker image file", Thor::Shell::Color::BLUE
|
4950
|
-
workdir = options[:workdir]
|
4951
|
-
description = options[:description]
|
4952
|
-
user = options[:user]
|
4953
|
-
is_gpu = options[:gpu]
|
4954
|
-
res = @image.upload_docker_image(image_path, image_name, workdir, user, description, is_gpu)
|
4955
|
-
if res["status"] == 200
|
4956
|
-
image_slug = res["id"]
|
4957
|
-
owner = CLI.get_owner
|
4958
|
-
image_url = "#{Cnvrg::Helpers.remote_url}/#{owner}/settings/images/#{image_slug}"
|
4959
|
-
log_message("Successfully uploaded image: #{image_url}", Thor::Shell::Color::GREEN, true)
|
4960
|
-
|
4961
|
-
|
4962
|
-
else
|
4963
|
-
log_message("Couldn't upload image: #{image_name}", Thor::Shell::Color::RED, true)
|
4964
|
-
|
4965
|
-
end
|
4966
|
-
rescue => e
|
4967
|
-
log_error(e)
|
4968
|
-
end
|
4969
|
-
|
4970
|
-
|
4587
|
+
desc 'file_exists', description: '', hide: true
|
4588
|
+
def file_exists(file)
|
4589
|
+
exit(0) if File.exists? file
|
4590
|
+
exit(1)
|
4971
4591
|
end
|
4972
4592
|
|
4973
4593
|
|
@@ -5147,29 +4767,40 @@ module Cnvrg
|
|
5147
4767
|
method_option :project_slug, :type => :string, :aliases => ["-s"], :desc => "project slug"
|
5148
4768
|
method_option :project_owner, :type => :string, :aliases => ["-o"], :desc => "project slug"
|
5149
4769
|
method_option :frequency, :type => :numeric, :aliases => ["-f"], :desc => "poll frequency"
|
4770
|
+
method_option :fetch_slugs, :type => :boolean, :default => false, :desc => "Fetch experiments slugs to compare"
|
5150
4771
|
|
5151
4772
|
def compare_experiments
|
5152
4773
|
verify_logged_in(true)
|
5153
4774
|
log_start(__method__, args, options)
|
5154
4775
|
exps_map = {}
|
4776
|
+
copied_commits = []
|
5155
4777
|
|
5156
|
-
if options[:slugs].blank?
|
4778
|
+
if options[:slugs].blank? and options[:fetch_slugs].blank?
|
5157
4779
|
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
5158
4780
|
return false
|
5159
4781
|
end
|
5160
|
-
|
5161
|
-
|
5162
|
-
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
5163
|
-
return false
|
4782
|
+
if options[:slugs].present?
|
4783
|
+
slugs = options[:slugs].split(",")
|
5164
4784
|
end
|
4785
|
+
|
5165
4786
|
frequency = options[:frequency] || 5
|
5166
4787
|
namespace = options[:namespace]
|
5167
4788
|
project_dir = is_cnvrg_dir(Dir.pwd)
|
5168
4789
|
@project = Project.new(project_home=project_dir, slug: options[:project_slug], owner: options[:project_owner])
|
4790
|
+
fetch_slugs = options[:fetch_slugs]
|
4791
|
+
webapp_slug = ENV["CNVRG_JOB_ID"]
|
4792
|
+
if fetch_slugs and webapp_slug.present?
|
4793
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug)
|
4794
|
+
end
|
4795
|
+
if slugs.blank?
|
4796
|
+
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
4797
|
+
return false
|
4798
|
+
end
|
5169
4799
|
|
4800
|
+
log_message("compare is running")
|
5170
4801
|
while true
|
4802
|
+
log_message("compare is running for slugs #{slugs}")
|
5171
4803
|
slugs.each do |exp_slug|
|
5172
|
-
|
5173
4804
|
begin
|
5174
4805
|
if exps_map[exp_slug].blank?
|
5175
4806
|
exp = @project.get_experiment(exp_slug)["experiment"]
|
@@ -5183,15 +4814,23 @@ module Cnvrg
|
|
5183
4814
|
log_message("#{exp_name} has ended, getting files from end commit", Thor::Shell::Color::BLUE)
|
5184
4815
|
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project)
|
5185
4816
|
exps_map[exp_slug] = exp
|
5186
|
-
|
4817
|
+
else
|
5187
4818
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
5188
|
-
Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
4819
|
+
success = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
4820
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
4821
|
+
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
4822
|
+
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
4823
|
+
copied_commits << exp["last_successful_commit"]
|
4824
|
+
end
|
5189
4825
|
end
|
5190
4826
|
rescue => e
|
5191
4827
|
Cnvrg::Logger.log_error(e)
|
5192
4828
|
end
|
5193
4829
|
end
|
5194
4830
|
sleep frequency
|
4831
|
+
if fetch_slugs
|
4832
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug, slugs: slugs)
|
4833
|
+
end
|
5195
4834
|
end
|
5196
4835
|
end
|
5197
4836
|
|
@@ -5277,127 +4916,6 @@ module Cnvrg
|
|
5277
4916
|
end
|
5278
4917
|
|
5279
4918
|
|
5280
|
-
desc 'pull_image', 'downloads and loads an image', :hide => true
|
5281
|
-
|
5282
|
-
def pull_image(image_name)
|
5283
|
-
begin
|
5284
|
-
verify_logged_in(false)
|
5285
|
-
log_start(__method__, args, options)
|
5286
|
-
owner = Cnvrg::CLI.get_owner()
|
5287
|
-
image = Cnvrg::Images.image_exist(owner, image_name)
|
5288
|
-
if !image
|
5289
|
-
log_message("Couldn't find image in cnvrg repository", Thor::Shell::Color::RED)
|
5290
|
-
exit(1)
|
5291
|
-
end
|
5292
|
-
path = download_image(image_name, image["slug"])
|
5293
|
-
if path
|
5294
|
-
log_message("Building image", Thor::Shell::Color::BLUE)
|
5295
|
-
Docker.options[:read_timeout] = 216000
|
5296
|
-
image = Docker::Image.build_from_dir(path, {'dockerfile' => 'Dockerfile.cpu', 't' => "#{image_name}:latest"}) do |v|
|
5297
|
-
begin
|
5298
|
-
if (log = JSON.parse(v)) && log.has_key?("stream")
|
5299
|
-
next if log["stream"].starts_with? "Step"
|
5300
|
-
$stdout.puts log["stream"]
|
5301
|
-
end
|
5302
|
-
rescue
|
5303
|
-
end
|
5304
|
-
|
5305
|
-
end
|
5306
|
-
|
5307
|
-
if not image.nil?
|
5308
|
-
FileUtils.rm_rf(path)
|
5309
|
-
checks = Helpers.checkmark()
|
5310
|
-
log_message("#{checks} Image built successfully", Thor::Shell::Color::GREEN)
|
5311
|
-
return image
|
5312
|
-
else
|
5313
|
-
|
5314
|
-
log_message("Could not build image", Thor::Shell::Color::RED)
|
5315
|
-
return false
|
5316
|
-
end
|
5317
|
-
else
|
5318
|
-
|
5319
|
-
log_message("Could not download image", Thor::Shell::Color::RED)
|
5320
|
-
return false
|
5321
|
-
|
5322
|
-
|
5323
|
-
end
|
5324
|
-
|
5325
|
-
# else
|
5326
|
-
# path = download_image(image_name,image["slug"])
|
5327
|
-
# if path
|
5328
|
-
# image = Docker::Image.import(path)
|
5329
|
-
# image.tag('repo' => image_name, 'tag' => 'latest')
|
5330
|
-
# if not image.nil?
|
5331
|
-
# say "Finished downloading image, cleaning up..", Thor::Shell::Color::GREEN
|
5332
|
-
# FileUtils.rm(path)
|
5333
|
-
# checks = Helpers.checkmark()
|
5334
|
-
# say "#{checks} Done", Thor::Shell::Color::GREEN
|
5335
|
-
# log_end(0)
|
5336
|
-
# return image
|
5337
|
-
# log_end(0)
|
5338
|
-
# else
|
5339
|
-
# say "Could not download image", Thor::Shell::Color::RED
|
5340
|
-
# return false
|
5341
|
-
# end
|
5342
|
-
#
|
5343
|
-
# end
|
5344
|
-
# end
|
5345
|
-
rescue => e
|
5346
|
-
|
5347
|
-
log_message "Error: couldn't build image", Thor::Shell::Color::RED
|
5348
|
-
log_error(e)
|
5349
|
-
|
5350
|
-
rescue SignalException
|
5351
|
-
say "\nAborting"
|
5352
|
-
exit(1)
|
5353
|
-
ensure
|
5354
|
-
if path
|
5355
|
-
FileUtils.rm_rf(path)
|
5356
|
-
|
5357
|
-
end
|
5358
|
-
end
|
5359
|
-
|
5360
|
-
|
5361
|
-
end
|
5362
|
-
|
5363
|
-
desc 'set_image', 'set image to a porject', :hide => true
|
5364
|
-
|
5365
|
-
def set_image(docker_image)
|
5366
|
-
verify_logged_in(true)
|
5367
|
-
log_start(__method__, args, options)
|
5368
|
-
working_dir = is_cnvrg_dir
|
5369
|
-
project = Project.new(working_dir)
|
5370
|
-
|
5371
|
-
local_images = Docker::Image.all
|
5372
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.include? docker_image}.flatten
|
5373
|
-
if docker_image_local.size == 0
|
5374
|
-
|
5375
|
-
if yes? "Image wasn't found locally, pull image from cnvrg repository?", Thor::Shell::Color::YELLOW
|
5376
|
-
image = pull(docker_image)
|
5377
|
-
if image
|
5378
|
-
log_message("downloaded image: #{docker_image}", Thor::Shell::Color::BLUE)
|
5379
|
-
@image = Images.new(working_dir, docker_image)
|
5380
|
-
else
|
5381
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
5382
|
-
exit(1)
|
5383
|
-
end
|
5384
|
-
else
|
5385
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
5386
|
-
exit(1)
|
5387
|
-
|
5388
|
-
end
|
5389
|
-
elsif docker_image_local.size == 1
|
5390
|
-
log_message("found image: #{docker_image_local[0]}, setting it up..", Thor::Shell::Color::BLUE)
|
5391
|
-
@image = Images.new(working_dir, docker_image_local[0])
|
5392
|
-
elsif docker_image_local.size > 1
|
5393
|
-
log_message("found #{docker_image_local.size} images, choose the image name you want to use", Thor::Shell::Color::BLUE)
|
5394
|
-
image_name = ask "#{docker_image_local.join("\n")}\n", Thor::Shell::Color::BLUE
|
5395
|
-
image_name = image_name.strip
|
5396
|
-
@image = Images.new(working_dir, image_name)
|
5397
|
-
end
|
5398
|
-
@image.update_image_activity(project.last_local_commit, nil)
|
5399
|
-
end
|
5400
|
-
|
5401
4919
|
desc 'check_pod_restart', 'Check pod restart', :hide => true
|
5402
4920
|
def check_pod_restart
|
5403
4921
|
Cnvrg::CLI.new.log_start(__method__, args, options)
|
@@ -5672,7 +5190,7 @@ module Cnvrg
|
|
5672
5190
|
|
5673
5191
|
if dirs.size == 0
|
5674
5192
|
log_message("Couldn't find cnvrg directory. Please start a new project", Thor::Shell::Color::RED)
|
5675
|
-
|
5193
|
+
puts Thread.current.backtrace
|
5676
5194
|
exit(1)
|
5677
5195
|
end
|
5678
5196
|
return dirs.join("/")
|
@@ -5775,7 +5293,7 @@ module Cnvrg
|
|
5775
5293
|
is_cnvrg = is_cnvrg_dir
|
5776
5294
|
if !is_cnvrg
|
5777
5295
|
say "You're not in a cnvrg project directory", Thor::Shell::Color::RED
|
5778
|
-
exit(
|
5296
|
+
exit(1)
|
5779
5297
|
end
|
5780
5298
|
|
5781
5299
|
end
|
@@ -5921,21 +5439,6 @@ module Cnvrg
|
|
5921
5439
|
|
5922
5440
|
end
|
5923
5441
|
|
5924
|
-
def container_changes(dir)
|
5925
|
-
container_id = is_project_with_docker(dir)
|
5926
|
-
if not container_id
|
5927
|
-
return false
|
5928
|
-
end
|
5929
|
-
container = Docker::Container.get(container_id)
|
5930
|
-
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
5931
|
-
pip = container.exec(command, tty: true)[0]
|
5932
|
-
command = ["/bin/bash", "-lc", "dpkg -l"]
|
5933
|
-
dpkg = container.exec(command, tty: true)[0]
|
5934
|
-
command = ["/bin/bash", "-lc", "cat /home/ds/.bash_history"]
|
5935
|
-
history = container.exec(command, tty: true)[0]
|
5936
|
-
diff = [pip, dpkg, history]
|
5937
|
-
return diff
|
5938
|
-
end
|
5939
5442
|
|
5940
5443
|
def is_port_taken(ip = Cnvrg::CLI::IP, port = Cnvrg::CLI::PORT, seconds = 1)
|
5941
5444
|
Timeout::timeout(seconds) do
|
@@ -6118,13 +5621,17 @@ module Cnvrg
|
|
6118
5621
|
|
6119
5622
|
end
|
6120
5623
|
|
6121
|
-
def gpu_util
|
5624
|
+
def gpu_util(take_from_docker: false, docker_id: nil)
|
6122
5625
|
if !Helpers.ubuntu?
|
6123
5626
|
return 0.0
|
6124
5627
|
end
|
6125
5628
|
stats = [[],[]]
|
6126
5629
|
begin
|
6127
|
-
|
5630
|
+
if take_from_docker
|
5631
|
+
gpu_stats = `docker exec -it #{docker_id} sh -c 'nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv'`
|
5632
|
+
else
|
5633
|
+
gpu_stats = `nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv`
|
5634
|
+
end
|
6128
5635
|
|
6129
5636
|
if !gpu_stats.nil?
|
6130
5637
|
gpu_stats = gpu_stats.split("\n")[1..-1]
|