cnvrg 1.6.36 → 1.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/cnvrg.gemspec +1 -4
- data/lib/cnvrg/Images.rb +0 -148
- data/lib/cnvrg/api.rb +8 -8
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/cli.rb +298 -787
- data/lib/cnvrg/cli/library_cli.rb +1 -1
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +65 -12
- data/lib/cnvrg/datafiles.rb +324 -39
- data/lib/cnvrg/dataset.rb +65 -29
- data/lib/cnvrg/experiment.rb +10 -4
- data/lib/cnvrg/files.rb +46 -15
- data/lib/cnvrg/helpers.rb +34 -26
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +162 -258
- data/lib/cnvrg/job_cli.rb +30 -56
- data/lib/cnvrg/job_ssh.rb +48 -0
- data/lib/cnvrg/logger.rb +4 -0
- data/lib/cnvrg/project.rb +53 -17
- data/lib/cnvrg/ssh.rb +0 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +10 -34
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 10a6bb8d2946d743e8dd1f609369d503c9bf44a9ba748e1e2dfb33df57444aa0
|
|
4
|
+
data.tar.gz: 382e9b28d7edb8856bcd12d5accabf31eb0f264055b4f652da94508a97458b3c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: db580ef688cf3c3a1e3c95f62c3c520fa606a13ebc678fee420c312e89b7c2c62e39783d25a7962d1f1ba5fa439c968626164443c0828d4fae5604eb881794fd
|
|
7
|
+
data.tar.gz: 296974a98310ef9ba922124385723a33380cd6fd412d80d18cb3cb0a73b070a1a119d61ce26b8a4fe560e6f7a8b591fb222e3ba6ccc09c046d891a02436358bd
|
data/cnvrg.gemspec
CHANGED
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.add_runtime_dependency 'open4', '~> 1.3', '>= 1.3.4'
|
|
32
32
|
spec.add_runtime_dependency 'highline', '~> 1.7', '>= 1.7.8'
|
|
33
33
|
spec.add_runtime_dependency 'thor', '~> 0.19.0','>=0.19.1'
|
|
34
|
-
spec.add_runtime_dependency 'aws-sdk', '~>
|
|
34
|
+
spec.add_runtime_dependency 'aws-sdk-s3', '~> 1'
|
|
35
35
|
spec.add_runtime_dependency 'signet', '~> 0.11.0'
|
|
36
36
|
spec.add_runtime_dependency 'google-cloud-env', '~> 1.2.1'
|
|
37
37
|
spec.add_runtime_dependency 'google-cloud-core', '~> 1.3.2'
|
|
@@ -40,11 +40,8 @@ Gem::Specification.new do |spec|
|
|
|
40
40
|
spec.add_runtime_dependency 'urlcrypt', '~> 0.1.1'
|
|
41
41
|
spec.add_runtime_dependency 'parallel', '~> 1.12.0'
|
|
42
42
|
spec.add_runtime_dependency 'azure-storage-blob', '~> 1.1.0'
|
|
43
|
-
|
|
44
43
|
spec.add_runtime_dependency 'logstash-logger', '~> 0.22.1'
|
|
45
|
-
spec.add_runtime_dependency 'docker-api', '~> 1.33'
|
|
46
44
|
spec.add_runtime_dependency 'activesupport', '~> 5.2.0'
|
|
47
45
|
spec.add_runtime_dependency 'ruby-progressbar'
|
|
48
|
-
spec.add_runtime_dependency 'net-ssh'
|
|
49
46
|
spec.add_runtime_dependency 'down'
|
|
50
47
|
end
|
data/lib/cnvrg/Images.rb
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
require 'fileutils'
|
|
2
2
|
require 'cnvrg/files'
|
|
3
|
-
require 'docker'
|
|
4
|
-
require 'net/ssh'
|
|
5
3
|
require 'mimemagic'
|
|
6
4
|
|
|
7
5
|
|
|
@@ -175,58 +173,6 @@ module Cnvrg
|
|
|
175
173
|
response = Cnvrg::API.request("users/#{owner}/images/#{slug}/commit_custom_image", 'POST', {image_logs:logs})
|
|
176
174
|
return response
|
|
177
175
|
end
|
|
178
|
-
def self.ssh_to_machine(resp)
|
|
179
|
-
|
|
180
|
-
sts_path = resp["result"]["sts_path"]
|
|
181
|
-
|
|
182
|
-
uri = URI.parse(sts_path)
|
|
183
|
-
|
|
184
|
-
http_object = Net::HTTP.new(uri.host, uri.port)
|
|
185
|
-
http_object.use_ssl = true if uri.scheme == 'https'
|
|
186
|
-
request = Net::HTTP::Get.new(sts_path)
|
|
187
|
-
|
|
188
|
-
body = ""
|
|
189
|
-
http_object.start do |http|
|
|
190
|
-
response = http.request request
|
|
191
|
-
body = response.read_body
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
URLcrypt::key = [body].pack('H*')
|
|
195
|
-
|
|
196
|
-
ip = URLcrypt.decrypt(resp["result"]["machine_i"])
|
|
197
|
-
|
|
198
|
-
user = URLcrypt.decrypt(resp["result"]["machine_u"])
|
|
199
|
-
key = URLcrypt.decrypt(resp["result"]["machine_k"])
|
|
200
|
-
tempssh = Tempfile.new "sshkey"
|
|
201
|
-
tempssh.write open(key).read
|
|
202
|
-
tempssh.rewind
|
|
203
|
-
key_path = tempssh.path
|
|
204
|
-
count = 0
|
|
205
|
-
while count < 5
|
|
206
|
-
|
|
207
|
-
begin
|
|
208
|
-
ssh = Net::SSH.start(ip, user=user, :keys => key_path, :timeout => 10)
|
|
209
|
-
if !ssh.nil?
|
|
210
|
-
return ssh
|
|
211
|
-
else
|
|
212
|
-
count+=1
|
|
213
|
-
sleep(2)
|
|
214
|
-
|
|
215
|
-
end
|
|
216
|
-
rescue
|
|
217
|
-
count+=1
|
|
218
|
-
sleep(2)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
if tempssh
|
|
224
|
-
tempssh.close
|
|
225
|
-
tempssh.unlink
|
|
226
|
-
end
|
|
227
|
-
return false
|
|
228
|
-
end
|
|
229
|
-
|
|
230
176
|
|
|
231
177
|
|
|
232
178
|
def create_custom_image(new_image_name,working_dir,stored_commands)
|
|
@@ -270,100 +216,6 @@ module Cnvrg
|
|
|
270
216
|
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
|
271
217
|
end
|
|
272
218
|
|
|
273
|
-
def get_container(stop=false)
|
|
274
|
-
begin
|
|
275
|
-
container_id=is_container_exist()
|
|
276
|
-
|
|
277
|
-
if !container_id
|
|
278
|
-
return create_container()
|
|
279
|
-
else
|
|
280
|
-
container = Docker::Container.get(container_id)
|
|
281
|
-
status = container.json["State"]["Status"]
|
|
282
|
-
|
|
283
|
-
if status == "running"
|
|
284
|
-
return container
|
|
285
|
-
else
|
|
286
|
-
if stop
|
|
287
|
-
return false
|
|
288
|
-
end
|
|
289
|
-
res = container.start()
|
|
290
|
-
if res.info["State"]["Status"].eql? "exited" and res.info["State"]["Error"].include? "port is already allocated"
|
|
291
|
-
return create_container()
|
|
292
|
-
end
|
|
293
|
-
return container
|
|
294
|
-
end
|
|
295
|
-
end
|
|
296
|
-
rescue => e
|
|
297
|
-
if e.message.include? "No such container"
|
|
298
|
-
|
|
299
|
-
return create_container()
|
|
300
|
-
else
|
|
301
|
-
return false
|
|
302
|
-
end
|
|
303
|
-
end
|
|
304
|
-
|
|
305
|
-
end
|
|
306
|
-
|
|
307
|
-
def create_container(port=7654, is_remote=false)
|
|
308
|
-
begin
|
|
309
|
-
image_settings = {
|
|
310
|
-
'Image' => "#{@image_name}:latest",
|
|
311
|
-
'User' => 'ds',
|
|
312
|
-
'Cmd' => '/usr/local/cnvrg/run_ipython.sh',
|
|
313
|
-
'WorkingDir' => '/home/ds/notebooks',
|
|
314
|
-
'ExposedPorts' => {
|
|
315
|
-
'8888/tcp' => {},
|
|
316
|
-
},
|
|
317
|
-
'HostConfig' => {
|
|
318
|
-
'Binds' => ["#{@working_dir}:/home/ds/notebooks"],
|
|
319
|
-
'PortBindings' => {
|
|
320
|
-
'8888/tcp' => [
|
|
321
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
322
|
-
],
|
|
323
|
-
},
|
|
324
|
-
},
|
|
325
|
-
}
|
|
326
|
-
container = Docker::Container.create(image_settings)
|
|
327
|
-
container.start()
|
|
328
|
-
netrc = File.open(File.expand_path('~')+"/.netrc", "rb")
|
|
329
|
-
netrc_content = netrc.read
|
|
330
|
-
container.store_file("/home/ds/.netrc", netrc_content)
|
|
331
|
-
command = ["/bin/bash", "-lc", "sudo chmod 600 /home/ds/.netrc"]
|
|
332
|
-
p = container.exec(command, tty: true)
|
|
333
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.netrc"]
|
|
334
|
-
p = container.exec(command, tty: true)
|
|
335
|
-
config = File.open(File.expand_path('~')+"/.cnvrg/config.yml", "rb")
|
|
336
|
-
config_content = config.read
|
|
337
|
-
container.store_file("/home/ds/.cnvrg/config.yml", config_content)
|
|
338
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg"]
|
|
339
|
-
container.exec(command, tty: true)
|
|
340
|
-
# Libraries instlled
|
|
341
|
-
save_installed_libraries(container)
|
|
342
|
-
config = {project_name: @project_name,
|
|
343
|
-
project_slug: @project_slug,
|
|
344
|
-
owner: @owner,
|
|
345
|
-
docker: true, image_base: @image_name, image_tag: @image_tag, container: container.id, port: port, image_slug: @image_slug}
|
|
346
|
-
|
|
347
|
-
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
return container
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
rescue => e
|
|
354
|
-
if e.message.include? "is not running"
|
|
355
|
-
return create_container(port-1)
|
|
356
|
-
end
|
|
357
|
-
return false
|
|
358
|
-
rescue SignalException
|
|
359
|
-
|
|
360
|
-
say "\nAborting", Thor::Shell::Color::RED
|
|
361
|
-
exit(1)
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
end
|
|
366
|
-
|
|
367
219
|
def save_installed_libraries(container)
|
|
368
220
|
begin
|
|
369
221
|
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
data/lib/cnvrg/api.rb
CHANGED
|
@@ -77,20 +77,22 @@ module Cnvrg
|
|
|
77
77
|
if response.to_hash[:status] == 404
|
|
78
78
|
return false
|
|
79
79
|
end
|
|
80
|
-
if parse_request
|
|
80
|
+
if parse_request
|
|
81
81
|
JSON.parse(response.body)
|
|
82
82
|
else
|
|
83
83
|
response
|
|
84
84
|
end
|
|
85
|
-
|
|
85
|
+
when 'POST', 'PUT'
|
|
86
86
|
conn.options.timeout = 4200
|
|
87
|
-
conn.options.open_timeout=180
|
|
87
|
+
conn.options.open_timeout = 180
|
|
88
|
+
conn.headers['Content-Type'] = "application/json"
|
|
88
89
|
retries = 0
|
|
89
90
|
success = false
|
|
91
|
+
data = data || {}
|
|
90
92
|
while !success and retries < 20
|
|
91
93
|
begin
|
|
92
|
-
response = conn.post "#{resource}", data if method.eql? 'POST'
|
|
93
|
-
response = conn.put "#{resource}", data if method.eql? 'PUT'
|
|
94
|
+
response = conn.post "#{resource}", data.to_json if method.eql? 'POST'
|
|
95
|
+
response = conn.put "#{resource}", data.to_json if method.eql? 'PUT'
|
|
94
96
|
success = true
|
|
95
97
|
Cnvrg::API.parse_version(response)
|
|
96
98
|
|
|
@@ -113,7 +115,7 @@ module Cnvrg
|
|
|
113
115
|
end
|
|
114
116
|
when 'POST_JSON'
|
|
115
117
|
conn.options.timeout = 4200
|
|
116
|
-
conn.options.open_timeout =4200
|
|
118
|
+
conn.options.open_timeout = 4200
|
|
117
119
|
conn.headers['Content-Type'] = "application/json"
|
|
118
120
|
new_data = JSON.dump(data)
|
|
119
121
|
|
|
@@ -124,8 +126,6 @@ module Cnvrg
|
|
|
124
126
|
begin
|
|
125
127
|
response = conn.post "#{resource}", new_data
|
|
126
128
|
success = true
|
|
127
|
-
Cnvrg::API.parse_version(response)
|
|
128
|
-
|
|
129
129
|
rescue => e
|
|
130
130
|
Cnvrg::Logger.log_error(e)
|
|
131
131
|
sleep(5)
|
data/lib/cnvrg/api_v2.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module Cnvrg
|
|
2
|
+
class API_V2 < API
|
|
3
|
+
ENDPOINT_VERSION = 'v2'
|
|
4
|
+
|
|
5
|
+
def self.endpoint_uri
|
|
6
|
+
api = get_api()
|
|
7
|
+
return "#{api}/#{Cnvrg::API_V2::ENDPOINT_VERSION}"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.is_response_success(response)
|
|
11
|
+
raise Exception.new("Bad status in response #{response.status}") if response.status != 200
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
data/lib/cnvrg/cli.rb
CHANGED
|
@@ -12,7 +12,6 @@ require 'digest' # sha1up
|
|
|
12
12
|
require "highline/import"
|
|
13
13
|
require 'socket'
|
|
14
14
|
require 'thor'
|
|
15
|
-
require 'docker'
|
|
16
15
|
require 'socket'
|
|
17
16
|
require 'timeout'
|
|
18
17
|
require 'fileutils'
|
|
@@ -28,13 +27,11 @@ require 'cnvrg/auth'
|
|
|
28
27
|
require 'cnvrg/project'
|
|
29
28
|
require 'cnvrg/files'
|
|
30
29
|
require 'cnvrg/experiment'
|
|
31
|
-
require 'cnvrg/Images'
|
|
32
30
|
require 'cnvrg/image'
|
|
33
31
|
require 'cnvrg/dataset'
|
|
34
32
|
require 'cnvrg/datafiles'
|
|
35
33
|
require 'cnvrg/data'
|
|
36
34
|
require 'cnvrg/storage'
|
|
37
|
-
require 'cnvrg/ssh'
|
|
38
35
|
require 'cnvrg/result'
|
|
39
36
|
require 'cnvrg/logger'
|
|
40
37
|
require 'cnvrg/org_helpers'
|
|
@@ -49,6 +46,9 @@ require 'cnvrg/downloader/clients/s3_client'
|
|
|
49
46
|
require 'cnvrg/downloader/clients/gcp_client'
|
|
50
47
|
require 'cnvrg/downloader/clients/azure_client'
|
|
51
48
|
require 'cnvrg/job_cli'
|
|
49
|
+
require 'cnvrg/job_ssh'
|
|
50
|
+
require 'cnvrg/connect_job_ssh'
|
|
51
|
+
require 'cnvrg/api_v2'
|
|
52
52
|
|
|
53
53
|
class Thor
|
|
54
54
|
module Base
|
|
@@ -175,6 +175,9 @@ module Cnvrg
|
|
|
175
175
|
desc "job", "manage running jobs", :hide => false
|
|
176
176
|
subcommand "job", JobCli
|
|
177
177
|
|
|
178
|
+
desc "ssh", "ssh into running jobs", :hide => false
|
|
179
|
+
subcommand "ssh", JobSsh
|
|
180
|
+
|
|
178
181
|
desc "image [COMMAND]", "build existing images", :hide => true
|
|
179
182
|
subcommand "image", ImageCli
|
|
180
183
|
|
|
@@ -819,9 +822,9 @@ module Cnvrg
|
|
|
819
822
|
end
|
|
820
823
|
|
|
821
824
|
desc 'data verify', 'Verify datasets', :hide => true
|
|
822
|
-
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default =>
|
|
825
|
+
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default => nil
|
|
823
826
|
|
|
824
|
-
def verify_datasets(dataset_titles, timeout=
|
|
827
|
+
def verify_datasets(dataset_titles, timeout=nil)
|
|
825
828
|
begin
|
|
826
829
|
verify_logged_in(false)
|
|
827
830
|
log_start(__method__, args, options)
|
|
@@ -830,21 +833,31 @@ module Cnvrg
|
|
|
830
833
|
log_message("All datasets are verified", Thor::Shell::Color::BLUE) if verified
|
|
831
834
|
log_message("Failed to verify datasets", Thor::Shell::Color::RED) if !verified
|
|
832
835
|
exit(1) if !verified
|
|
833
|
-
|
|
834
836
|
rescue SignalException
|
|
835
837
|
say "\nAborting", Thor::Shell::Color::RED
|
|
836
838
|
exit(1)
|
|
837
839
|
end
|
|
838
840
|
end
|
|
839
841
|
|
|
842
|
+
desc 'data scan', 'Lookup datasets', :hide => true
|
|
843
|
+
def scan_datasets()
|
|
844
|
+
begin
|
|
845
|
+
verify_logged_in(false)
|
|
846
|
+
log_start(__method__, args, options)
|
|
847
|
+
log_message("Scanning datasets", Thor::Shell::Color::BLUE)
|
|
848
|
+
datasets = Dataset.scan_datasets()
|
|
849
|
+
puts(datasets.to_json)
|
|
850
|
+
end
|
|
851
|
+
end
|
|
852
|
+
|
|
840
853
|
desc 'data clone', 'Clone dataset', :hide => true
|
|
841
854
|
method_option :commit, :type => :string, :aliases => ["-c", "--commit"], :default => ""
|
|
842
855
|
method_option :only_tree, :type => :boolean, :aliases => ["-t", "--tree"], :default => false
|
|
843
856
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
|
844
857
|
method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
|
|
845
858
|
method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
|
|
846
|
-
|
|
847
|
-
def clone_data(dataset_url,only_tree=false,commit=nil,query=nil,read=false,remote=false, relative: false)
|
|
859
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
860
|
+
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false)
|
|
848
861
|
begin
|
|
849
862
|
verify_logged_in(false)
|
|
850
863
|
log_start(__method__, args, options)
|
|
@@ -853,10 +866,10 @@ module Cnvrg
|
|
|
853
866
|
read = options["read"] || read || false
|
|
854
867
|
remote = options["remote"] || remote || false
|
|
855
868
|
query = options['query'].presence || query.presence
|
|
869
|
+
soft = options['soft'] || soft
|
|
856
870
|
if query.present?
|
|
857
|
-
return clone_data_query(dataset_url, query)
|
|
871
|
+
return clone_data_query(dataset_url, query, flatten, soft: soft)
|
|
858
872
|
end
|
|
859
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
|
860
873
|
|
|
861
874
|
url_parts = dataset_url.split("/")
|
|
862
875
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
@@ -868,6 +881,8 @@ module Cnvrg
|
|
|
868
881
|
dataset_name = response["result"]["name"]
|
|
869
882
|
dataset_home = Dir.pwd+"/"+dataset_name
|
|
870
883
|
|
|
884
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name, commit: response["result"]["commit"]) if soft
|
|
885
|
+
|
|
871
886
|
check = Helpers.checkmark
|
|
872
887
|
if @dataset.init_home(remote:remote)
|
|
873
888
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
|
@@ -875,14 +890,12 @@ module Cnvrg
|
|
|
875
890
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
|
876
891
|
if @dataset.softlinked?
|
|
877
892
|
@files.cp_ds(relative: relative)
|
|
878
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
|
879
893
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
880
894
|
@dataset.write_success
|
|
881
895
|
return
|
|
882
896
|
end
|
|
883
897
|
|
|
884
898
|
if only_tree
|
|
885
|
-
|
|
886
899
|
success = Dataset.clone_tree(commit: commit, dataset_home: dataset_home)
|
|
887
900
|
return if success
|
|
888
901
|
end
|
|
@@ -900,7 +913,7 @@ module Cnvrg
|
|
|
900
913
|
|
|
901
914
|
while files['keys'].length > 0
|
|
902
915
|
Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
|
|
903
|
-
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read)
|
|
916
|
+
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten)
|
|
904
917
|
|
|
905
918
|
downloaded_files += files['keys'].length
|
|
906
919
|
files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
|
|
@@ -908,7 +921,6 @@ module Cnvrg
|
|
|
908
921
|
progressbar.finish
|
|
909
922
|
if downloaded_files == files_count
|
|
910
923
|
Dataset.verify_cnvrgignore_exist(dataset_name, false)
|
|
911
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
|
912
924
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
913
925
|
@dataset.write_success
|
|
914
926
|
### if read, dont generate idx (but create idx.yml) if not read, generate idx.
|
|
@@ -930,12 +942,14 @@ module Cnvrg
|
|
|
930
942
|
|
|
931
943
|
desc 'data clone_query', 'Clone dataset _query', :hide => true
|
|
932
944
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => ""
|
|
933
|
-
|
|
945
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
946
|
+
def clone_data_query(dataset_url, query=nil, flatten=false, soft: false)
|
|
934
947
|
begin
|
|
935
948
|
verify_logged_in(false)
|
|
936
|
-
|
|
949
|
+
#@executer = Cnvrg::Helpers::Executer.get_executer
|
|
937
950
|
log_start(__method__, args, options)
|
|
938
951
|
query = options["query"] || query
|
|
952
|
+
soft = options["soft"] || soft
|
|
939
953
|
if !query.present?
|
|
940
954
|
log_message("Argument missing : query", Thor::Shell::Color::RED)
|
|
941
955
|
exit(1)
|
|
@@ -945,13 +959,14 @@ module Cnvrg
|
|
|
945
959
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
946
960
|
slug = url_parts[project_index + 1]
|
|
947
961
|
owner = url_parts[project_index - 1]
|
|
948
|
-
|
|
949
962
|
response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/search/#{query}", 'GET')
|
|
950
963
|
Cnvrg::CLI.is_response_success(response,true)
|
|
951
964
|
dataset_name = response["results"]["name"]
|
|
952
965
|
dataset_slug = response["results"]["slug"]
|
|
953
|
-
dataset_home =
|
|
966
|
+
dataset_home = Dir.pwd+"/"+dataset_slug
|
|
967
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name) if soft
|
|
954
968
|
|
|
969
|
+
# dataset_home = Dir.pwd
|
|
955
970
|
if Dataset.blank_clone(owner, dataset_name, dataset_slug)
|
|
956
971
|
dataset = Dataset.new(dataset_home)
|
|
957
972
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
|
@@ -966,6 +981,7 @@ module Cnvrg
|
|
|
966
981
|
},
|
|
967
982
|
in_threads: ParallelThreads
|
|
968
983
|
}
|
|
984
|
+
|
|
969
985
|
begin
|
|
970
986
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
|
971
987
|
Parallel.map((response["results"]["query_files"]), parallel_options) do |f|
|
|
@@ -974,6 +990,7 @@ module Cnvrg
|
|
|
974
990
|
file_name = relative_path_dir.pop()
|
|
975
991
|
relative_path_dir = relative_path_dir.join("/")
|
|
976
992
|
abs_path = dataset_home + "/" + relative_path_dir
|
|
993
|
+
abs_path = dataset_home if flatten
|
|
977
994
|
begin
|
|
978
995
|
FileUtils.mkdir_p(abs_path) unless File.exist? (abs_path + "/" + file_name)
|
|
979
996
|
rescue
|
|
@@ -981,14 +998,14 @@ module Cnvrg
|
|
|
981
998
|
exit(1)
|
|
982
999
|
end
|
|
983
1000
|
begin
|
|
984
|
-
File.write "#{abs_path}/#{file_name}", open(f["
|
|
985
|
-
rescue
|
|
1001
|
+
File.write "#{abs_path}/#{file_name}", open(f["url"]).read unless File.exist? (abs_path + "/" + file_name)
|
|
1002
|
+
rescue => e
|
|
986
1003
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
|
987
1004
|
exit(1)
|
|
988
1005
|
end
|
|
989
1006
|
|
|
990
1007
|
end
|
|
991
|
-
|
|
1008
|
+
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
|
992
1009
|
rescue Interrupt
|
|
993
1010
|
log_message("Couldn't download", Thor::Shell::Color::RED)
|
|
994
1011
|
exit(1)
|
|
@@ -998,7 +1015,7 @@ module Cnvrg
|
|
|
998
1015
|
check = Helpers.checkmark
|
|
999
1016
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
1000
1017
|
dataset.write_success(in_folder=true)
|
|
1001
|
-
rescue
|
|
1018
|
+
rescue => e
|
|
1002
1019
|
exit(1)
|
|
1003
1020
|
end
|
|
1004
1021
|
end
|
|
@@ -1008,32 +1025,6 @@ module Cnvrg
|
|
|
1008
1025
|
end
|
|
1009
1026
|
end
|
|
1010
1027
|
|
|
1011
|
-
desc 'init_data_container', 'Init dataset directory', :hide => true
|
|
1012
|
-
method_option :login_content, :type => :string, :aliases => ["-l"], :default => ""
|
|
1013
|
-
|
|
1014
|
-
def init_data_container(container)
|
|
1015
|
-
begin
|
|
1016
|
-
login_content = options["login_content"]
|
|
1017
|
-
|
|
1018
|
-
container = Docker::Container.get(container)
|
|
1019
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
1020
|
-
container.exec(command, tty: true)
|
|
1021
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
|
1022
|
-
container.exec(command, tty: true)
|
|
1023
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
|
1024
|
-
container.exec(command, tty: true)
|
|
1025
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg /home/ds/.netrc"]
|
|
1026
|
-
container.exec(command, tty: true)
|
|
1027
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
1028
|
-
container.exec(command, tty: true)
|
|
1029
|
-
|
|
1030
|
-
rescue SignalException
|
|
1031
|
-
|
|
1032
|
-
say "\nAborting", Thor::Shell::Color::RED
|
|
1033
|
-
exit(1)
|
|
1034
|
-
end
|
|
1035
|
-
end
|
|
1036
|
-
|
|
1037
1028
|
desc 'data_snap', 'Init dataset directory', :hide => true
|
|
1038
1029
|
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
|
1039
1030
|
|
|
@@ -1184,17 +1175,29 @@ module Cnvrg
|
|
|
1184
1175
|
end
|
|
1185
1176
|
|
|
1186
1177
|
desc '', '', :hide => true
|
|
1187
|
-
def
|
|
1178
|
+
def get_owner_slug(url_or_slug)
|
|
1179
|
+
if url_or_slug =~ URI::regexp
|
|
1180
|
+
# Find owner and slug in url
|
|
1181
|
+
url_parts = url_or_slug.split("/")
|
|
1182
|
+
project_index = Cnvrg::Helpers.look_for_in_path(url_or_slug, "datasets")
|
|
1183
|
+
slug = url_parts[project_index + 1]
|
|
1184
|
+
owner = url_parts[project_index - 1]
|
|
1185
|
+
else
|
|
1186
|
+
# Find owner in config file
|
|
1187
|
+
owner = CLI.get_owner
|
|
1188
|
+
slug = url_or_slug
|
|
1189
|
+
end
|
|
1190
|
+
return owner, slug
|
|
1191
|
+
end
|
|
1192
|
+
|
|
1193
|
+
desc '', '', :hide => true
|
|
1194
|
+
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, threads: 15, message: nil)
|
|
1188
1195
|
begin
|
|
1189
1196
|
verify_logged_in(false)
|
|
1190
1197
|
log_start(__method__, args, options)
|
|
1191
1198
|
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
1195
|
-
slug = url_parts[project_index + 1]
|
|
1196
|
-
owner = url_parts[project_index - 1]
|
|
1197
|
-
@dataset = Dataset.new(dataset_url: dataset_url)
|
|
1199
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1200
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1198
1201
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1199
1202
|
@files = @datafiles.verify_files_exists(files)
|
|
1200
1203
|
|
|
@@ -1218,28 +1221,33 @@ module Cnvrg
|
|
|
1218
1221
|
else
|
|
1219
1222
|
@commit = commit
|
|
1220
1223
|
end
|
|
1221
|
-
|
|
1224
|
+
|
|
1225
|
+
# dir shouldnt have starting or ending slash.
|
|
1222
1226
|
dir = dir[0..-2] if dir.end_with? '/'
|
|
1223
1227
|
dir = dir[1..-1] if dir.start_with? '/'
|
|
1224
1228
|
|
|
1225
|
-
@
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
+
@datafiles.upload_multiple_files_optimized(
|
|
1230
|
+
@files,
|
|
1231
|
+
@commit,
|
|
1232
|
+
force: force,
|
|
1233
|
+
chunk_size: chunk_size,
|
|
1234
|
+
prefix: dir,
|
|
1235
|
+
threads: threads
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
# This is for backwards compatibility only and should be removed in future versions:
|
|
1239
|
+
res = @datafiles.put_commit(@commit)
|
|
1240
|
+
unless res.is_success?
|
|
1241
|
+
raise SignalException.new(1, res.msg)
|
|
1229
1242
|
end
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
res = @datafiles.end_commit(@commit,false, success: true )
|
|
1237
|
-
msg = res['result']
|
|
1238
|
-
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1239
|
-
unless response.is_success?
|
|
1240
|
-
raise SignalException.new(1, res.msg)
|
|
1241
|
-
end
|
|
1243
|
+
|
|
1244
|
+
res = @datafiles.end_commit(@commit,false, success: true, commit_type: "put")
|
|
1245
|
+
msg = res['result']
|
|
1246
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1247
|
+
unless response.is_success?
|
|
1248
|
+
raise SignalException.new(1, res.msg)
|
|
1242
1249
|
end
|
|
1250
|
+
|
|
1243
1251
|
log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1244
1252
|
rescue SignalException => e
|
|
1245
1253
|
log_message(e.message, Thor::Shell::Color::RED)
|
|
@@ -1248,7 +1256,49 @@ module Cnvrg
|
|
|
1248
1256
|
end
|
|
1249
1257
|
|
|
1250
1258
|
|
|
1259
|
+
desc '', '', :hide => true
|
|
1260
|
+
def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
|
|
1261
|
+
begin
|
|
1262
|
+
verify_logged_in(false)
|
|
1263
|
+
log_start(__method__, args, options)
|
|
1251
1264
|
|
|
1265
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1266
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1267
|
+
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1268
|
+
|
|
1269
|
+
# Init a new commit
|
|
1270
|
+
response = @datafiles.start_commit(false, true, chunks: 1, message: message )
|
|
1271
|
+
unless response #means we failed in the start commit.
|
|
1272
|
+
raise SignalException.new(1, "Cant put files into dataset, check the dataset id")
|
|
1273
|
+
end
|
|
1274
|
+
@commit = response['result']['commit_sha1']
|
|
1275
|
+
files_to_delete, folders_to_delete, job_id = @datafiles.delete_multiple_files(@commit, regex_list)
|
|
1276
|
+
log_message("Deleting #{files_to_delete} files and #{folders_to_delete} folders", Thor::Shell::Color::GREEN)
|
|
1277
|
+
|
|
1278
|
+
total_files = files_to_delete + folders_to_delete
|
|
1279
|
+
current_progress = 0
|
|
1280
|
+
progressbar = @datafiles.create_progressbar("Delete Progress", total_files)
|
|
1281
|
+
chunk_size = 1000
|
|
1282
|
+
offset = 0
|
|
1283
|
+
while current_progress < total_files
|
|
1284
|
+
current_progress = @datafiles.delete_file_chunk(@commit, regex_list, chunk_size, offset)
|
|
1285
|
+
progressbar.progress = current_progress
|
|
1286
|
+
offset += chunk_size
|
|
1287
|
+
end
|
|
1288
|
+
|
|
1289
|
+
res = @datafiles.end_commit(@commit,false, success: true)
|
|
1290
|
+
msg = res['result']
|
|
1291
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1292
|
+
unless response.is_success?
|
|
1293
|
+
raise SignalException.new(1, res.msg)
|
|
1294
|
+
end
|
|
1295
|
+
|
|
1296
|
+
log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1297
|
+
rescue SignalException => e
|
|
1298
|
+
log_message(e.message, Thor::Shell::Color::RED)
|
|
1299
|
+
return false
|
|
1300
|
+
end
|
|
1301
|
+
end
|
|
1252
1302
|
|
|
1253
1303
|
desc 'upload_data', 'Upload data files', :hide => true
|
|
1254
1304
|
method_option :ignore, :type => :array, :aliases => ["-i", "--i"], :desc => "ignore following files"
|
|
@@ -1699,18 +1749,22 @@ module Cnvrg
|
|
|
1699
1749
|
end
|
|
1700
1750
|
|
|
1701
1751
|
desc 'data commits', 'List all commits for a specific dataset', :hide => true
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
verify_logged_in(true)
|
|
1752
|
+
def list_dataset_commits(dataset_url, commit_sha1: nil)
|
|
1753
|
+
verify_logged_in(false)
|
|
1705
1754
|
log_start(__method__, args, options)
|
|
1706
1755
|
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1756
|
+
if dataset_url == "."
|
|
1757
|
+
dataset_dir = is_cnvrg_dir(Dir.pwd)
|
|
1758
|
+
@dataset = Dataset.new(dataset_dir)
|
|
1759
|
+
else
|
|
1760
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1761
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1762
|
+
end
|
|
1763
|
+
|
|
1764
|
+
result = @dataset.list_commits(commit_sha1:commit_sha1)
|
|
1710
1765
|
list = result["result"]["list"]
|
|
1711
1766
|
|
|
1712
1767
|
print_table(list)
|
|
1713
|
-
|
|
1714
1768
|
end
|
|
1715
1769
|
|
|
1716
1770
|
desc 'commits', 'List all commits for a specific Project'
|
|
@@ -1741,17 +1795,17 @@ module Cnvrg
|
|
|
1741
1795
|
|
|
1742
1796
|
|
|
1743
1797
|
desc 'git_clone', 'Clone project'
|
|
1798
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
1744
1799
|
def git_clone(slug, owner)
|
|
1745
1800
|
verify_logged_in(false)
|
|
1746
1801
|
log_start(__method__, args, options)
|
|
1747
|
-
|
|
1802
|
+
project_home = Dir.pwd
|
|
1803
|
+
soft = options["soft"] || false
|
|
1804
|
+
Project.stop_if_project_present(project_home, slug) if soft
|
|
1748
1805
|
clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
|
|
1749
|
-
|
|
1806
|
+
exit 1 if not clone_resp
|
|
1807
|
+
idx_status = Project.new(get_project_home).generate_idx(files:[])
|
|
1750
1808
|
FileUtils.mkdir_p File.join(get_project_home, ENV['CNVRG_OUTPUT_DIR']) if ENV['CNVRG_OUTPUT_DIR'].present?
|
|
1751
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
|
1752
|
-
if @executer.present?
|
|
1753
|
-
@executer.update_git_commit
|
|
1754
|
-
end
|
|
1755
1809
|
end
|
|
1756
1810
|
|
|
1757
1811
|
|
|
@@ -1791,7 +1845,7 @@ module Cnvrg
|
|
|
1791
1845
|
desc 'clone PROJECT_URL', 'Clone project'
|
|
1792
1846
|
method_option :remote, :type => :boolean, :aliases => ["-r", "--r"], :default => false
|
|
1793
1847
|
method_option :commit, :type => :string, :aliases => ["-c", "--c"], :default => nil
|
|
1794
|
-
|
|
1848
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
1795
1849
|
def clone(project_url)
|
|
1796
1850
|
begin
|
|
1797
1851
|
verify_logged_in(false)
|
|
@@ -1801,6 +1855,8 @@ module Cnvrg
|
|
|
1801
1855
|
slug = url_parts[project_index + 1]
|
|
1802
1856
|
owner = url_parts[project_index - 1]
|
|
1803
1857
|
remote = options["remote"] || false
|
|
1858
|
+
soft = options["soft"] || false
|
|
1859
|
+
|
|
1804
1860
|
|
|
1805
1861
|
response = Cnvrg::API.request("users/#{owner}/projects/#{slug}/get_project", 'GET')
|
|
1806
1862
|
Cnvrg::CLI.is_response_success(response)
|
|
@@ -1814,6 +1870,8 @@ module Cnvrg
|
|
|
1814
1870
|
clone_resp = false
|
|
1815
1871
|
project_home = Dir.pwd
|
|
1816
1872
|
|
|
1873
|
+
Project.stop_if_project_present(project_home, project_name) if soft
|
|
1874
|
+
|
|
1817
1875
|
if remote and !git
|
|
1818
1876
|
clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
|
|
1819
1877
|
elsif git
|
|
@@ -1954,8 +2012,6 @@ module Cnvrg
|
|
|
1954
2012
|
method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
|
|
1955
2013
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
|
1956
2014
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
2015
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
|
1960
2016
|
verify_logged_in(true)
|
|
1961
2017
|
log_start(__method__, args, options)
|
|
@@ -1964,11 +2020,13 @@ module Cnvrg
|
|
|
1964
2020
|
# w(verbose=false, new_branch=false,sync=false, commit=nil,all_files=true)
|
|
1965
2021
|
total_deleted, total_downloaded = invoke :download_data_new,[verbose, new_branch, true, commit, all_files], :new_branch=>new_branch, :direct=>false, :force =>force
|
|
1966
2022
|
end
|
|
1967
|
-
|
|
2023
|
+
|
|
1968
2024
|
invoke :upload_data_new,[new_branch, verbose, true, force, tags, chunk_size, message:message, total_deleted: total_deleted, total_downloaded: total_downloaded],
|
|
1969
2025
|
:new_branch=>new_branch, :direct=>false, :force =>force, :sync =>true, :tags =>tags, :parallel => parallel, :message => message
|
|
1970
2026
|
|
|
1971
2027
|
end
|
|
2028
|
+
|
|
2029
|
+
|
|
1972
2030
|
desc 'upload_data_new', 'upload_data_new', :hide => true
|
|
1973
2031
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
|
1974
2032
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
|
@@ -2211,15 +2269,27 @@ module Cnvrg
|
|
|
2211
2269
|
method_option :return_id, :type => :boolean, :aliases => ["-r", "--return_id"], :default => false
|
|
2212
2270
|
method_option :files, :type => :string, :aliases => ["--files"], :default => nil
|
|
2213
2271
|
method_option :output_dir, :type => :string, :aliases => ["--output_dir"], :default => nil
|
|
2272
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2214
2273
|
method_option :job_slug, :type => :string, :aliases => ["--job"], :default => nil, :hide=>true
|
|
2215
2274
|
method_option :job_type, :type => :string, :aliases => [ "--job_type"], :default => nil, :hide=>true
|
|
2275
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
|
2276
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
|
2216
2277
|
|
|
2217
|
-
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil)
|
|
2278
|
+
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil, suppress_exceptions = true)
|
|
2218
2279
|
begin
|
|
2219
2280
|
# we are passing "force" twice.. doesnt really make sense :\\
|
|
2220
2281
|
verify_logged_in(true)
|
|
2221
2282
|
log_start(__method__, args, options)
|
|
2222
2283
|
@project = Project.new(get_project_home)
|
|
2284
|
+
|
|
2285
|
+
# Enable local/experiment exception logging
|
|
2286
|
+
suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
|
|
2287
|
+
if in_exp
|
|
2288
|
+
exp_obj = Experiment.new(@project.owner, @project.slug, job_id: job_slug)
|
|
2289
|
+
else
|
|
2290
|
+
exp_obj = nil
|
|
2291
|
+
end
|
|
2292
|
+
|
|
2223
2293
|
commit_msg = options["message"]
|
|
2224
2294
|
if commit_msg.nil? or commit_msg.empty?
|
|
2225
2295
|
commit_msg = ""
|
|
@@ -2235,19 +2305,21 @@ module Cnvrg
|
|
|
2235
2305
|
spec_files_to_upload = spec_files_to_upload.split(",")
|
|
2236
2306
|
end
|
|
2237
2307
|
if @project.is_git
|
|
2308
|
+
list = []
|
|
2238
2309
|
git_output_dir = options["output_dir"] || output_dir
|
|
2239
2310
|
if git_output_dir.present?
|
|
2240
2311
|
if git_output_dir.ends_with? "/"
|
|
2241
2312
|
git_output_dir = git_output_dir[0..-2]
|
|
2242
2313
|
end
|
|
2243
2314
|
list = @project.generate_output_dir(git_output_dir)
|
|
2244
|
-
spec_files_to_upload = list
|
|
2245
|
-
if spec_files_to_upload.blank?
|
|
2246
|
-
log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
|
|
2247
|
-
return true
|
|
2248
|
-
end
|
|
2249
|
-
force = true
|
|
2250
2315
|
end
|
|
2316
|
+
list += @project.generate_git_diff if options["git_diff"]
|
|
2317
|
+
spec_files_to_upload = list
|
|
2318
|
+
if spec_files_to_upload.blank?
|
|
2319
|
+
log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
|
|
2320
|
+
return true
|
|
2321
|
+
end
|
|
2322
|
+
force = true
|
|
2251
2323
|
end
|
|
2252
2324
|
|
|
2253
2325
|
if ignore.nil? or ignore.empty?
|
|
@@ -2289,8 +2361,6 @@ module Cnvrg
|
|
|
2289
2361
|
end
|
|
2290
2362
|
update_count = 0
|
|
2291
2363
|
update_total = result["added"].size + result["updated_on_local"].size + result["deleted"].size
|
|
2292
|
-
successful_updates = []
|
|
2293
|
-
successful_deletions = []
|
|
2294
2364
|
if options["verbose"]
|
|
2295
2365
|
if update_total == 1
|
|
2296
2366
|
log_message("Updating #{update_total} file", Thor::Shell::Color::BLUE)
|
|
@@ -2310,8 +2380,11 @@ module Cnvrg
|
|
|
2310
2380
|
end
|
|
2311
2381
|
job_type = options['job_type'] || job_type
|
|
2312
2382
|
job_slug = options['job_slug'] || job_slug
|
|
2313
|
-
commit_sha1 = @files.start_commit(
|
|
2314
|
-
|
|
2383
|
+
commit_sha1 = @files.start_commit(
|
|
2384
|
+
new_branch, force: force, exp_start_commit: exp_start_commit,
|
|
2385
|
+
job_type: job_type, job_slug: job_slug, start_commit: current_commit,message: options["message"],
|
|
2386
|
+
debug_mode: options["debug_mode"]
|
|
2387
|
+
)["result"]["commit_sha1"]
|
|
2315
2388
|
# upload / update
|
|
2316
2389
|
# delete
|
|
2317
2390
|
to_upload = result["added"] + result["updated_on_local"]
|
|
@@ -2322,32 +2395,30 @@ module Cnvrg
|
|
|
2322
2395
|
:starting_at => 0,
|
|
2323
2396
|
:total => (to_upload.size + deleted.size),
|
|
2324
2397
|
:autofinish => true)
|
|
2325
|
-
@files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar)
|
|
2326
2398
|
|
|
2327
|
-
@files.
|
|
2399
|
+
buffered_errors = @files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar, suppress_exceptions: suppress_exceptions)
|
|
2400
|
+
@files.delete_files_from_server(deleted, commit_sha1, suppress_exceptions: suppress_exceptions)
|
|
2328
2401
|
|
|
2329
2402
|
progressbar.finish
|
|
2403
|
+
|
|
2404
|
+
if buffered_errors.is_a?(Hash)
|
|
2405
|
+
buffered_errors.keys.each do |file|
|
|
2406
|
+
to_upload.delete(file)
|
|
2407
|
+
Cnvrg::CLI.log_message(buffered_errors[file], 'red')
|
|
2408
|
+
exp_obj.job_log([buffered_errors[file]]) unless exp_obj.nil?
|
|
2409
|
+
end
|
|
2410
|
+
end
|
|
2411
|
+
|
|
2330
2412
|
res = @files.end_commit(commit_sha1, force: force, message: commit_msg)
|
|
2331
2413
|
unless Cnvrg::CLI.is_response_success(res, false)
|
|
2332
2414
|
raise StandardError.new("Cant end commit")
|
|
2333
2415
|
end
|
|
2416
|
+
|
|
2334
2417
|
# save idx
|
|
2335
2418
|
@project.update_idx_with_files_commits!((to_upload + deleted), res["result"]["commit_time"])
|
|
2336
2419
|
@project.update_idx_with_commit!(commit_sha1)
|
|
2337
2420
|
if options["verbose"]
|
|
2338
2421
|
log_message("#{check} Done", Thor::Shell::Color::BLUE)
|
|
2339
|
-
if successful_updates.size > 0
|
|
2340
|
-
successful_updates.flatten!
|
|
2341
|
-
log_message("Updated:", Thor::Shell::Color::GREEN)
|
|
2342
|
-
suc = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
|
2343
|
-
log_message(suc.join("\n"), Thor::Shell::Color::GREEN)
|
|
2344
|
-
end
|
|
2345
|
-
if successful_deletions.size > 0
|
|
2346
|
-
successful_deletions.flatten!
|
|
2347
|
-
log_message("Deleted:", Thor::Shell::Color::GREEN)
|
|
2348
|
-
del = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
|
2349
|
-
log_message(del.join("\n"), Thor::Shell::Color::GREEN)
|
|
2350
|
-
end
|
|
2351
2422
|
log_message("Total of #{update_count} / #{update_total} files.", Thor::Shell::Color::GREEN)
|
|
2352
2423
|
else
|
|
2353
2424
|
if return_id
|
|
@@ -2372,9 +2443,13 @@ module Cnvrg
|
|
|
2372
2443
|
if e.is_a? SignalException
|
|
2373
2444
|
say "\nAborting", Thor::Shell::Color::BLUE
|
|
2374
2445
|
say "\nRolling back all changes", Thor::Shell::Color::BLUE
|
|
2446
|
+
|
|
2447
|
+
exp_obj.job_log(["Aborting", "Rolling back all changes"]) unless exp_obj.nil?
|
|
2375
2448
|
else
|
|
2376
2449
|
log_message(error_message, Thor::Shell::Color::RED)
|
|
2377
2450
|
log_error(e)
|
|
2451
|
+
|
|
2452
|
+
exp_obj.job_log([error_message, e]) unless exp_obj.nil?
|
|
2378
2453
|
end
|
|
2379
2454
|
@files.rollback_commit(commit_sha1) unless commit_sha1.nil?
|
|
2380
2455
|
print_res = {
|
|
@@ -2892,6 +2967,11 @@ module Cnvrg
|
|
|
2892
2967
|
method_option :job_type, :type => :string, :aliases => ["-jt", "--job_type"], :default => nil
|
|
2893
2968
|
method_option :files, :type => :string, :aliases => ["--files"], :default => nil
|
|
2894
2969
|
method_option :output_dir, :type => :string, :aliases => ["--output_dir"], :default => nil
|
|
2970
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2971
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
|
2972
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
|
2973
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2974
|
+
|
|
2895
2975
|
def sync(direct = true)
|
|
2896
2976
|
verify_logged_in(true) if direct
|
|
2897
2977
|
@project = Project.new(get_project_home)
|
|
@@ -2903,16 +2983,20 @@ module Cnvrg
|
|
|
2903
2983
|
is_git = ENV['CNVRG_GIT_PROJECT'] == "true" || @project.is_git
|
|
2904
2984
|
in_exp = options["in_exp"] || (job_slug.present? and job_type.present?)
|
|
2905
2985
|
in_exp = false if job_type.present? and job_type == "NotebookSession"
|
|
2986
|
+
output_dir = options["output_dir"] || ENV['CNVRG_OUTPUT_DIR']
|
|
2987
|
+
|
|
2906
2988
|
run_download = true
|
|
2907
|
-
if
|
|
2989
|
+
if (job_type == "NotebookSession" and is_git) or job_type == "Experiment" or options['force']
|
|
2908
2990
|
run_download = false
|
|
2909
2991
|
end
|
|
2910
|
-
|
|
2992
|
+
|
|
2993
|
+
if run_download or options['debug_mode']
|
|
2911
2994
|
invoke :download, [true, "", in_exp ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true
|
|
2912
2995
|
end
|
|
2913
|
-
invoke :upload, [false, true, direct, "",in_exp,options[:force],
|
|
2996
|
+
invoke :upload, [false, true, direct, "",in_exp,options[:force], output_dir, job_type, job_slug ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true,
|
|
2914
2997
|
:ignore => options[:ignore], :force => options[:force], :message => options[:message], :deploy => options["deploy"], :return_id => options["return_id"],
|
|
2915
|
-
:files => options["files"], :output_dir => options["
|
|
2998
|
+
:files => options["files"], :output_dir => output_dir, :job_slug => job_slug, :job_type => job_type, :suppress_exceptions => options["suppress_exceptions"], :debug_mode => options['debug_mode'], :git_diff => options["git_diff"]
|
|
2999
|
+
|
|
2916
3000
|
end
|
|
2917
3001
|
|
|
2918
3002
|
desc 'run cmd', 'Runs an experiment'
|
|
@@ -3057,6 +3141,8 @@ module Cnvrg
|
|
|
3057
3141
|
method_option :data, :type => :string, :aliases => ["-d", "--data"], :default => ""
|
|
3058
3142
|
method_option :data_commit, :type => :string, :aliases => ["-dc", "--data_commit"], :default => ""
|
|
3059
3143
|
method_option :ignore, :type => :string, :aliases => ["-i", "--ignore"], :desc => "ignore following files", :default => ""
|
|
3144
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch", :default => ""
|
|
3145
|
+
method_option :gpu_util_from_docker, :type => :boolean, :aliases => ["--gpu-util-from-docker"], :desc => "take gpu utilization from job docker", :default => false
|
|
3060
3146
|
method_option :remote, :type => :boolean, :aliases => ["--remote"], :default => false
|
|
3061
3147
|
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
|
3062
3148
|
method_option :force, :type => :boolean, :aliases => ["-f", "--force"], :default => false
|
|
@@ -3064,6 +3150,7 @@ module Cnvrg
|
|
|
3064
3150
|
method_option :periodic_sync, :type => :string, :aliases => ["-ps", "--periodic_sync"], :default => ""
|
|
3065
3151
|
method_option :output_dir, :type => :string, :aliases => ["-o", "--output_dir"], :default => nil
|
|
3066
3152
|
method_option :data_query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
|
3153
|
+
method_option :use_bash, :type => :boolean, :aliases => ["-b", "--use_bash"], :default => false
|
|
3067
3154
|
|
|
3068
3155
|
def exec(*cmd)
|
|
3069
3156
|
log = []
|
|
@@ -3130,8 +3217,12 @@ module Cnvrg
|
|
|
3130
3217
|
end
|
|
3131
3218
|
remote = options["remote"]
|
|
3132
3219
|
if remote
|
|
3133
|
-
docker_id
|
|
3134
|
-
|
|
3220
|
+
if options["docker_id"].present?
|
|
3221
|
+
docker_id = options["docker_id"]
|
|
3222
|
+
else
|
|
3223
|
+
docker_id = `cat /etc/hostname`
|
|
3224
|
+
docker_id = docker_id.strip()
|
|
3225
|
+
end
|
|
3135
3226
|
end
|
|
3136
3227
|
is_on_gpu = options["gpu"]
|
|
3137
3228
|
start_commit = @project.last_local_commit
|
|
@@ -3141,9 +3232,9 @@ module Cnvrg
|
|
|
3141
3232
|
|
|
3142
3233
|
platform = RUBY_PLATFORM
|
|
3143
3234
|
machine_name = Socket.gethostname
|
|
3235
|
+
machine_activity_slug = ENV["CNVRG_MACHINE_ACTIVITY"]
|
|
3144
3236
|
begin
|
|
3145
|
-
|
|
3146
|
-
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity, script_path, sync_before_terminate, periodic_sync)
|
|
3237
|
+
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity_slug, script_path, sync_before_terminate, periodic_sync)
|
|
3147
3238
|
log_message("Experiment's live results: #{Cnvrg::Helpers.remote_url}/#{@project.owner}/projects/#{@project.slug}/experiments/#{@exp.slug}", Thor::Shell::Color::GREEN)
|
|
3148
3239
|
log_message("Running: #{cmd}\n", Thor::Shell::Color::BLUE)
|
|
3149
3240
|
unless @exp.slug.nil?
|
|
@@ -3161,7 +3252,7 @@ module Cnvrg
|
|
|
3161
3252
|
begin
|
|
3162
3253
|
stats = remote ? usage_metrics_in_docker(docker_id) : Helpers.ubuntu? ? {memory: memory_usage, cpu: cpu_usage} : {}
|
|
3163
3254
|
if is_on_gpu
|
|
3164
|
-
gu = gpu_util
|
|
3255
|
+
gu = gpu_util(take_from_docker: options["gpu_util_from_docker"], docker_id: docker_id)
|
|
3165
3256
|
stats['gpu_util'] = gu[0]
|
|
3166
3257
|
stats['gpu'] = gu[1]
|
|
3167
3258
|
end
|
|
@@ -3173,6 +3264,16 @@ module Cnvrg
|
|
|
3173
3264
|
end
|
|
3174
3265
|
end
|
|
3175
3266
|
start_time = Time.now
|
|
3267
|
+
shell_type = options["use_bash"] ? "bash -l" : "sh"
|
|
3268
|
+
if @exp.get_cmd.present?
|
|
3269
|
+
cmd = @exp.get_cmd
|
|
3270
|
+
if options["docker_id"].present? # Escape for docker exec
|
|
3271
|
+
cmd = cmd.gsub("\"", "\\\"")
|
|
3272
|
+
end
|
|
3273
|
+
end
|
|
3274
|
+
if options["docker_id"].present?
|
|
3275
|
+
cmd = "docker exec -it #{options["docker_id"]} #{shell_type} -c \"#{cmd}\""
|
|
3276
|
+
end
|
|
3176
3277
|
PTY.spawn(@exp.as_env, cmd) do |stdout, stdin, pid, stderr|
|
|
3177
3278
|
begin
|
|
3178
3279
|
stdout.each do |line|
|
|
@@ -3187,7 +3288,7 @@ module Cnvrg
|
|
|
3187
3288
|
puts line
|
|
3188
3289
|
end
|
|
3189
3290
|
log << cur_log
|
|
3190
|
-
if log.size >=
|
|
3291
|
+
if log.size >= 1
|
|
3191
3292
|
@exp.upload_temp_log(log) unless log.empty?
|
|
3192
3293
|
log = []
|
|
3193
3294
|
elsif (start_time + 15.seconds) <= Time.now
|
|
@@ -3237,29 +3338,26 @@ module Cnvrg
|
|
|
3237
3338
|
exp_success = false
|
|
3238
3339
|
end
|
|
3239
3340
|
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
3243
|
-
|
|
3244
|
-
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
# invoke :upload, [false, false, true, ignore, true, true], :output_dir => output_dir, :force=>true, :job_type=>'Experiment', :job_slug=>@exp.slug
|
|
3248
|
-
end
|
|
3249
|
-
else
|
|
3250
|
-
upload(false, false, true, ignore, true, true,nil,"Experiment",@exp.slug )
|
|
3251
|
-
|
|
3252
|
-
# invoke :upload, [false, false, true, ignore,true, true], :job_type=>'Experiment', :job_slug=>@exp.slug, :force=>true
|
|
3341
|
+
if sync_after
|
|
3342
|
+
@exp.job_log(["Syncing Experiment"])
|
|
3343
|
+
# Sync after run
|
|
3344
|
+
if @project.is_git
|
|
3345
|
+
output_dir = output_dir || @exp.output_dir
|
|
3346
|
+
if output_dir.present?
|
|
3347
|
+
upload(false, false, true, ignore, true, true, output_dir, "Experiment", @exp.slug, true )
|
|
3253
3348
|
end
|
|
3254
|
-
|
|
3349
|
+
else
|
|
3350
|
+
upload(false, false, true, ignore, true, true, nil, "Experiment", @exp.slug, true )
|
|
3255
3351
|
end
|
|
3352
|
+
end
|
|
3353
|
+
|
|
3256
3354
|
end_commit = @project.last_local_commit
|
|
3257
3355
|
if end_commit.present?
|
|
3258
3356
|
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
|
3259
3357
|
end
|
|
3260
3358
|
|
|
3261
3359
|
# log_thread.join
|
|
3262
|
-
|
|
3360
|
+
stats_thread.join
|
|
3263
3361
|
|
|
3264
3362
|
res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average, end_time: end_time)
|
|
3265
3363
|
|
|
@@ -3407,8 +3505,8 @@ module Cnvrg
|
|
|
3407
3505
|
local_folders_options = options["local_folders"]
|
|
3408
3506
|
options_hash.except!("schedule", "recurring", "machine_type", "image", "upload_output", "grid", "data", "data_commit", "title",
|
|
3409
3507
|
"local", "small", "medium", "large", "gpu", "gpuxl", "gpuxxl","max_time","dataset_only_tree",
|
|
3410
|
-
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
|
3411
|
-
"email_notification_error", "email_notification_success", "emails")
|
|
3508
|
+
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
|
3509
|
+
"requirements", "prerun", "email_notification_error", "email_notification_success", "emails")
|
|
3412
3510
|
exec_options = options_hash.map {|x| "--#{x[0]}=#{x[1]}"}.flatten.join(" ")
|
|
3413
3511
|
command = "#{exec_options} #{remote} #{upload_output_option} #{cmd.flatten.join(" ")}"
|
|
3414
3512
|
commit_to_run = options["commit"] || nil
|
|
@@ -4233,144 +4331,6 @@ module Cnvrg
|
|
|
4233
4331
|
|
|
4234
4332
|
end
|
|
4235
4333
|
|
|
4236
|
-
method_option :small, :type => :boolean, :aliases => ["-sm", "--small"], :default => false
|
|
4237
|
-
method_option :medium, :type => :boolean, :aliases => ["-md", "--medium"], :default => false
|
|
4238
|
-
method_option :large, :type => :boolean, :aliases => ["-lg", "--large"], :default => false
|
|
4239
|
-
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
|
4240
|
-
method_option :gpuxl, :type => :boolean, :aliases => ["--gpuxl"], :default => false
|
|
4241
|
-
method_option :gpuxxl, :type => :boolean, :aliases => ["--gpuxxl"], :default => false
|
|
4242
|
-
method_option :image, :type => :string, :aliases => ["-i", "--image"], :default => ""
|
|
4243
|
-
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
|
4244
|
-
method_option :base, :type => :boolean, :aliases => ["-b", "--base"], :default => false
|
|
4245
|
-
method_option :python3, :type => :boolean, :aliases => ["--python3"], :default => false
|
|
4246
|
-
method_option :docker_path, :type => :string, :aliases => ["--docker_path"], :default => ""
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
desc 'create_custom_image', 'run commands inside containers', :hide => true
|
|
4250
|
-
|
|
4251
|
-
def build_image(image_name)
|
|
4252
|
-
begin
|
|
4253
|
-
verify_logged_in(false)
|
|
4254
|
-
log_start(__method__, args, options)
|
|
4255
|
-
instances = {"small" => options["small"], "medium" => options["medium"], "large" => options["large"],
|
|
4256
|
-
"gpu" => options["gpu"], "gpuxl" => options["gpuxl"], "gpuxxl" => options["gpuxxl"]}
|
|
4257
|
-
instance_type = get_instance_type(instances)
|
|
4258
|
-
image_extend = options["image"]
|
|
4259
|
-
public = options["public"]
|
|
4260
|
-
base = options["base"]
|
|
4261
|
-
python3 = options["python3"]
|
|
4262
|
-
docker_path = options["docker_path"]
|
|
4263
|
-
owner = CLI.get_owner
|
|
4264
|
-
checks = Helpers.checkmark()
|
|
4265
|
-
tar_path = nil
|
|
4266
|
-
if !docker_path.nil? and !docker_path.empty?
|
|
4267
|
-
docker_path = File.absolute_path(docker_path)
|
|
4268
|
-
#create tar of the docker path: it could be a docker file, and it could be a docker folder
|
|
4269
|
-
tar_path = File.expand_path('~') + "/.cnvrg/tmp/docker_#{File.basename docker_path}.tar.gz"
|
|
4270
|
-
resp = create_docker_tar(docker_path, tar_path)
|
|
4271
|
-
if !resp
|
|
4272
|
-
log_message("Couldn't create tar from docker path", Thor::Shell::Color::RED)
|
|
4273
|
-
FileUtils.rm_rf tar_path
|
|
4274
|
-
exit(1)
|
|
4275
|
-
end
|
|
4276
|
-
files = Cnvrg::Files.new(owner, "")
|
|
4277
|
-
resp = Images.create_new_custom_image_with_docker(instance_type, owner, image_name, public, base, image_extend, python3, tar_path, files)
|
|
4278
|
-
if resp
|
|
4279
|
-
end
|
|
4280
|
-
else
|
|
4281
|
-
log_message("Creating machine for your custom image, this may take a few moments...", Thor::Shell::Color::BLUE)
|
|
4282
|
-
resp = Images.create_new_custom_image(instance_type, owner, image_name, public, base, image_extend, python3, nil)
|
|
4283
|
-
|
|
4284
|
-
end
|
|
4285
|
-
|
|
4286
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
|
4287
|
-
image_slug = resp["result"]["slug"]
|
|
4288
|
-
container = resp["result"]["machine_c"]
|
|
4289
|
-
log_message("#{checks} Created image and machine successfully", Thor::Shell::Color::GREEN)
|
|
4290
|
-
log_message("Connecting to machine", Thor::Shell::Color::BLUE)
|
|
4291
|
-
ssh = Ssh.new(resp)
|
|
4292
|
-
if !ssh.is_ssh
|
|
4293
|
-
log_message("Couldn't connect to machine,aborting", Thor::Shell::Color::RED)
|
|
4294
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4295
|
-
end
|
|
4296
|
-
log_message("run command until ctrl + c or quit is initiated", Thor::Shell::Color::BLUE)
|
|
4297
|
-
begin
|
|
4298
|
-
logs = []
|
|
4299
|
-
|
|
4300
|
-
while true
|
|
4301
|
-
command = ask("$>")
|
|
4302
|
-
logs << {time: Time.now,
|
|
4303
|
-
message: command,
|
|
4304
|
-
type: "stdout"
|
|
4305
|
-
}
|
|
4306
|
-
if command.eql? "quit"
|
|
4307
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
|
4308
|
-
break
|
|
4309
|
-
end
|
|
4310
|
-
res = ssh.exec_command(command)
|
|
4311
|
-
begin
|
|
4312
|
-
res_parsed = JSON.parse(res)
|
|
4313
|
-
res = res_parsed.join(",")
|
|
4314
|
-
end
|
|
4315
|
-
|
|
4316
|
-
puts res
|
|
4317
|
-
logs << {time: Time.now,
|
|
4318
|
-
message: res,
|
|
4319
|
-
type: "stdout"
|
|
4320
|
-
}
|
|
4321
|
-
logs.flatten!
|
|
4322
|
-
|
|
4323
|
-
end
|
|
4324
|
-
|
|
4325
|
-
rescue SignalException
|
|
4326
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
|
4327
|
-
|
|
4328
|
-
end
|
|
4329
|
-
resp = Images.commit_custom_image(owner, image_slug, logs)
|
|
4330
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
|
4331
|
-
log_message("#{checks} Image commited successfuly, email will be sent when image is ready", Thor::Shell::Color::GREEN)
|
|
4332
|
-
else
|
|
4333
|
-
if image_slug
|
|
4334
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4335
|
-
end
|
|
4336
|
-
if ssh
|
|
4337
|
-
ssh.close_ssh()
|
|
4338
|
-
end
|
|
4339
|
-
log_message("Image couldn't be commited, rolling back changes", Thor::Shell::Color::RED)
|
|
4340
|
-
|
|
4341
|
-
exit(1)
|
|
4342
|
-
end
|
|
4343
|
-
if ssh
|
|
4344
|
-
ssh.close_ssh()
|
|
4345
|
-
end
|
|
4346
|
-
|
|
4347
|
-
|
|
4348
|
-
end
|
|
4349
|
-
rescue => e
|
|
4350
|
-
log_message("Error occurd, aborting", Thor::Shell::Color::RED)
|
|
4351
|
-
|
|
4352
|
-
log_error(e)
|
|
4353
|
-
if image_slug
|
|
4354
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4355
|
-
end
|
|
4356
|
-
if ssh
|
|
4357
|
-
ssh.close_ssh()
|
|
4358
|
-
end
|
|
4359
|
-
|
|
4360
|
-
|
|
4361
|
-
rescue SignalException
|
|
4362
|
-
if image_slug
|
|
4363
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4364
|
-
end
|
|
4365
|
-
if ssh
|
|
4366
|
-
ssh.close_ssh
|
|
4367
|
-
end
|
|
4368
|
-
say "\nAborting"
|
|
4369
|
-
exit(1)
|
|
4370
|
-
end
|
|
4371
|
-
|
|
4372
|
-
end
|
|
4373
|
-
|
|
4374
4334
|
|
|
4375
4335
|
desc 'build', 'run commands inside containers', :hide => true
|
|
4376
4336
|
method_option :install, :type => :string, :aliases => ["--i"], :default => nil, :desc => "Install from the given instructions file"
|
|
@@ -4564,66 +4524,7 @@ module Cnvrg
|
|
|
4564
4524
|
end
|
|
4565
4525
|
|
|
4566
4526
|
|
|
4567
|
-
desc 'upload_image', 'commit notebook changes to create a new notebook image', :hide =>true
|
|
4568
|
-
|
|
4569
|
-
def upload_image_old(image_id, is_public, is_base, *message)
|
|
4570
|
-
verify_logged_in(true)
|
|
4571
|
-
log_start(__method__, args, options)
|
|
4572
|
-
image = Docker::Image.get(image_id)
|
|
4573
|
-
project_home = get_project_home
|
|
4574
|
-
@project = Project.new(project_home)
|
|
4575
|
-
last_local_commit = @project.last_local_commit
|
|
4576
|
-
image_name = @project.slug + "#{last_local_commit}"
|
|
4577
|
-
path = File.expand_path('~') + "/.cnvrg/tmp/#{image_name}.tar"
|
|
4578
|
-
owner = Cnvrg::CLI.get_owner()
|
|
4579
|
-
if !message.nil? or !message.empty?
|
|
4580
|
-
message = message.join(" ")
|
|
4581
|
-
end
|
|
4582
|
-
|
|
4583
|
-
log_message("Saving image's current state", Thor::Shell::Color::BLUE)
|
|
4584
|
-
image.save(path)
|
|
4585
|
-
|
|
4586
|
-
begin
|
|
4587
|
-
log_message("Compressing image file to upload", Thor::Shell::Color::BLUE)
|
|
4588
|
-
gzipRes = system("gzip -f #{path}")
|
|
4589
|
-
if !gzipRes
|
|
4590
|
-
|
|
4591
|
-
log_message("Couldn't create tar file from image", Thor::Shell::Color::RED)
|
|
4592
|
-
exit(1)
|
|
4593
|
-
end
|
|
4594
|
-
path = path + ".gz"
|
|
4595
|
-
@files = Cnvrg::Files.new(owner, "")
|
|
4596
|
-
|
|
4597
|
-
exit_status = $?.exitstatus
|
|
4598
|
-
if exit_status == 0
|
|
4599
|
-
log_message("Uploading image file", Thor::Shell::Color::BLUE)
|
|
4600
|
-
|
|
4601
|
-
diff = container_changes(Dir.pwd)
|
|
4602
|
-
res = @files.upload_image(path, image_name, owner, is_public, is_base, diff[1], diff[0], diff[2], message, image.commit_id)
|
|
4603
|
-
if res
|
|
4604
|
-
File.delete(path)
|
|
4605
|
-
image_loc = is_project_with_docker(Dir.pwd)
|
|
4606
|
-
image_loc.update_slug(res["result"]["id"])
|
|
4607
|
-
|
|
4608
|
-
checks = Helpers.checkmark()
|
|
4609
|
-
log_message("#{checks} Done", Thor::Shell::Color::GREEN)
|
|
4610
|
-
else
|
|
4611
|
-
log_message("Couldn't upload image", Thor::Shell::Color::RED)
|
|
4612
|
-
|
|
4613
|
-
end
|
|
4614
|
-
else
|
|
4615
|
-
log_message("Couldn't create image file for: #{image_name}", Thor::Shell::Color::RED)
|
|
4616
|
-
exit(1)
|
|
4617
|
-
end
|
|
4618
|
-
rescue => e
|
|
4619
|
-
log_message("Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED)
|
|
4620
|
-
log_error(e)
|
|
4621
|
-
rescue SignalException
|
|
4622
4527
|
|
|
4623
|
-
say "Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED
|
|
4624
|
-
exit(1)
|
|
4625
|
-
end
|
|
4626
|
-
end
|
|
4627
4528
|
|
|
4628
4529
|
desc '', '', :hide => true
|
|
4629
4530
|
|
|
@@ -4634,278 +4535,30 @@ module Cnvrg
|
|
|
4634
4535
|
|
|
4635
4536
|
end
|
|
4636
4537
|
|
|
4637
|
-
desc '', '', :hide => true
|
|
4638
|
-
|
|
4639
|
-
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
4643
|
-
|
|
4644
|
-
|
|
4645
|
-
|
|
4646
|
-
|
|
4647
|
-
|
|
4648
|
-
|
|
4649
|
-
|
|
4650
|
-
|
|
4651
|
-
|
|
4652
|
-
|
|
4653
|
-
|
|
4654
|
-
|
|
4655
|
-
|
|
4656
|
-
|
|
4657
|
-
def tensor_port_container(container_id)
|
|
4658
|
-
container = Docker::Container.get(container_id)
|
|
4659
|
-
say container.json["HostConfig"]["PortBindings"]["6006/tcp"][0]["HostPort"]
|
|
4660
|
-
end
|
|
4661
|
-
|
|
4662
|
-
desc '', '', :hide => true
|
|
4663
|
-
|
|
4664
|
-
def stop_container(container_id)
|
|
4665
|
-
container = Docker::Container.get(container_id)
|
|
4666
|
-
container.stop()
|
|
4667
|
-
container.remove()
|
|
4668
|
-
|
|
4669
|
-
end
|
|
4670
|
-
|
|
4671
|
-
desc '', '', :hide => true
|
|
4672
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4673
|
-
method_option :app_dir, :type => :string, :aliases => ["-d"], :default => "/home/ds/notebooks"
|
|
4674
|
-
method_option :cmd, :type => :string, :aliases => ["-c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
|
4675
|
-
|
|
4676
|
-
|
|
4677
|
-
def config_remote(image_name, port = 7654, tensport = 6006)
|
|
4678
|
-
local_images = Docker::Image.all
|
|
4679
|
-
|
|
4680
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4681
|
-
if docker_image_local.empty?
|
|
4682
|
-
say "no image"
|
|
4683
|
-
exit(1)
|
|
4684
|
-
end
|
|
4685
|
-
|
|
4686
|
-
begin
|
|
4687
|
-
login_content = options["login"]
|
|
4688
|
-
app_dir = options["app_dir"]
|
|
4689
|
-
cmd = options["cmd"]
|
|
4690
|
-
volume_from = options["volume"]
|
|
4691
|
-
|
|
4692
|
-
image_settings = {
|
|
4693
|
-
'Image' => "#{image_name}:latest",
|
|
4694
|
-
|
|
4695
|
-
'Cmd' => cmd,
|
|
4696
|
-
'WorkingDir' => app_dir,
|
|
4697
|
-
'ExposedPorts' => {
|
|
4698
|
-
'8888/tcp' => {},
|
|
4699
|
-
},
|
|
4700
|
-
'HostConfig' => {
|
|
4701
|
-
'Binds' => ["/var/run/docker.sock:/var/run/docker.sock", "/usr/bin/docker:/usr/bin/docker"],
|
|
4702
|
-
'PortBindings' => {
|
|
4703
|
-
'8888/tcp' => [
|
|
4704
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4705
|
-
],
|
|
4706
|
-
'6006/tcp' => [
|
|
4707
|
-
{'HostPort' => "#{tensport}", 'HostIp' => 'localhost'}
|
|
4708
|
-
],
|
|
4709
|
-
},
|
|
4710
|
-
},
|
|
4711
|
-
}
|
|
4712
|
-
container = Docker::Container.create(image_settings)
|
|
4713
|
-
container.start()
|
|
4714
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4715
|
-
container.exec(command, tty: true)
|
|
4716
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
|
4717
|
-
# container.exec(command, tty: true)
|
|
4718
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
|
4719
|
-
# container.exec(command, tty: true)
|
|
4720
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4721
|
-
container.exec(command, tty: true)
|
|
4722
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4723
|
-
container.exec(command, tty: true)
|
|
4724
|
-
say "#{container.id}:#{port}##{tensport}"
|
|
4725
|
-
rescue => e
|
|
4726
|
-
puts e
|
|
4727
|
-
if e.message.include? "is not running"
|
|
4728
|
-
return config_remote(image_name, port - 1, tensport - 1)
|
|
4729
|
-
end
|
|
4730
|
-
|
|
4731
|
-
if container
|
|
4732
|
-
container.kill()
|
|
4733
|
-
end
|
|
4734
|
-
return false
|
|
4735
|
-
end
|
|
4736
|
-
end
|
|
4737
|
-
|
|
4738
|
-
|
|
4739
|
-
desc '', '', :hide => true
|
|
4740
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4741
|
-
|
|
4742
|
-
def config_netrc(container)
|
|
4743
|
-
|
|
4744
|
-
login_content = options["login"]
|
|
4745
|
-
|
|
4746
|
-
container = Docker::Container.get(container)
|
|
4747
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4748
|
-
container.exec(command, tty: true)
|
|
4749
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4750
|
-
container.exec(command, tty: true)
|
|
4751
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4752
|
-
container.exec(command, tty: true)
|
|
4753
|
-
say "OK"
|
|
4754
|
-
|
|
4755
|
-
end
|
|
4756
|
-
|
|
4757
|
-
desc '', '', :hide => true
|
|
4758
|
-
method_option :login, :type => :string, :aliases => ["-l", "--l"], :default => ""
|
|
4759
|
-
method_option :app_dir, :type => :string, :aliases => ["-d", "--d"], :default => "/home/ds/notebooks"
|
|
4760
|
-
method_option :cmd, :type => :string, :aliases => ["-c", "--c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
|
4761
|
-
|
|
4762
|
-
|
|
4763
|
-
def config_remote_gpu(image_name, port = 7654, tensport = 6006)
|
|
4764
|
-
local_images = Docker::Image.all
|
|
4765
|
-
|
|
4766
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4767
|
-
if docker_image_local.empty?
|
|
4768
|
-
say "no image"
|
|
4769
|
-
exit(1)
|
|
4770
|
-
end
|
|
4771
|
-
|
|
4772
|
-
begin
|
|
4773
|
-
login_content = options["login"]
|
|
4774
|
-
app_dir = options["app_dir"]
|
|
4775
|
-
cmd = options["cmd"]
|
|
4776
|
-
|
|
4777
|
-
# image_settings = {
|
|
4778
|
-
# 'Image' => "#{image_name}:latest",
|
|
4779
|
-
# 'User' => 'ds',
|
|
4780
|
-
# 'Cmd' => cmd,
|
|
4781
|
-
# 'WorkingDir' => app_dir,
|
|
4782
|
-
# 'ExposedPorts' => {
|
|
4783
|
-
# '8888/tcp' => {},
|
|
4784
|
-
# },
|
|
4785
|
-
# 'HostConfig' => {
|
|
4786
|
-
# 'PortBindings' => {
|
|
4787
|
-
# '8888/tcp' => [
|
|
4788
|
-
# {'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4789
|
-
# ],
|
|
4790
|
-
# '6006/tcp' => [
|
|
4791
|
-
# {'HostPort' => "6006", 'HostIp' => 'localhost'}
|
|
4792
|
-
# ],
|
|
4793
|
-
# },
|
|
4794
|
-
# },
|
|
4795
|
-
# }
|
|
4796
|
-
|
|
4797
|
-
container_id = `nvidia-docker run -itd -p #{port}:8888 -p #{tensport}:6006 -w #{app_dir} -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker #{image_name}:latest #{cmd} `
|
|
4798
|
-
container_id = container_id.gsub("\n", "")
|
|
4799
|
-
container = Docker::Container.get(container_id)
|
|
4800
|
-
# container.start()
|
|
4801
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4802
|
-
container.exec(command, tty: true)
|
|
4803
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4804
|
-
container.exec(command, tty: true)
|
|
4805
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4806
|
-
container.exec(command, tty: true)
|
|
4807
|
-
say "#{container.id}:#{port}##{tensport}"
|
|
4808
|
-
rescue => e
|
|
4809
|
-
if e.message.include? "is not running"
|
|
4810
|
-
puts "running asgain with: #{port - 1} #{tensport - 1}"
|
|
4811
|
-
return config_remote_gpu(image_name, port - 1, tensport - 1)
|
|
4812
|
-
end
|
|
4813
|
-
|
|
4814
|
-
if container
|
|
4815
|
-
container.kill()
|
|
4816
|
-
end
|
|
4817
|
-
return false
|
|
4818
|
-
end
|
|
4819
|
-
end
|
|
4820
|
-
|
|
4821
|
-
desc '', '', :hide => true
|
|
4822
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4823
|
-
|
|
4824
|
-
def config_flask_remote(image_name, port = 80)
|
|
4825
|
-
local_images = Docker::Image.all
|
|
4826
|
-
|
|
4827
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4828
|
-
if docker_image_local.empty?
|
|
4829
|
-
say "no image"
|
|
4830
|
-
exit(1)
|
|
4831
|
-
end
|
|
4832
|
-
|
|
4833
|
-
begin
|
|
4834
|
-
login_content = options["login"]
|
|
4835
|
-
image_settings = {
|
|
4836
|
-
'Image' => "#{image_name}:latest",
|
|
4837
|
-
'User' => 'ds',
|
|
4838
|
-
'Cmd' => '/usr/local/cnvrg/start_super.sh',
|
|
4839
|
-
'WorkingDir' => '/home/ds/app',
|
|
4840
|
-
'ExposedPorts' => {
|
|
4841
|
-
'80/tcp' => {},
|
|
4842
|
-
},
|
|
4843
|
-
'HostConfig' => {
|
|
4844
|
-
'PortBindings' => {
|
|
4845
|
-
'80/tcp' => [
|
|
4846
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4847
|
-
],
|
|
4848
|
-
},
|
|
4849
|
-
},
|
|
4850
|
-
}
|
|
4851
|
-
container = Docker::Container.create(image_settings)
|
|
4852
|
-
container.start()
|
|
4853
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4854
|
-
container.exec(command, tty: true)
|
|
4855
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4856
|
-
container.exec(command, tty: true)
|
|
4857
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4858
|
-
container.exec(command, tty: true)
|
|
4859
|
-
say "#{container.id}:#{port}"
|
|
4860
|
-
rescue => e
|
|
4861
|
-
pus e
|
|
4862
|
-
if e.message.include? "is not running"
|
|
4863
|
-
return "port is taken"
|
|
4864
|
-
end
|
|
4865
|
-
puts "error"
|
|
4866
|
-
if container
|
|
4867
|
-
container.kill()
|
|
4538
|
+
desc 'Collect and send job utilization', '', :hide => true
|
|
4539
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch"
|
|
4540
|
+
method_option :is_on_gpu, :type => :boolean, :aliases => ["--is_on_gpu"], :desc => "is on gpu", :default => true
|
|
4541
|
+
def get_utilization()
|
|
4542
|
+
@exp = Experiment.new(ENV['CNVRG_OWNER'], ENV['CNVRG_PROJECT'], job_id: ENV['CNVRG_JOB_ID'])
|
|
4543
|
+
docker_id = options["docker_id"]
|
|
4544
|
+
while true do
|
|
4545
|
+
sleep 30
|
|
4546
|
+
begin
|
|
4547
|
+
stats = usage_metrics_in_docker(docker_id)
|
|
4548
|
+
if options["is_on_gpu"]
|
|
4549
|
+
gu = gpu_util(take_from_docker: true, docker_id: docker_id)
|
|
4550
|
+
stats['gpu_util'] = gu[0]
|
|
4551
|
+
stats['gpu'] = gu[1]
|
|
4552
|
+
end
|
|
4553
|
+
stats['docker_id'] = docker_id
|
|
4554
|
+
@exp.send_machine_stats [stats] unless stats.empty?
|
|
4555
|
+
rescue => e
|
|
4556
|
+
log_error(e)
|
|
4557
|
+
log_message("Failed to upload ongoing stats, continuing with experiment", Thor::Shell::Color::YELLOW)
|
|
4868
4558
|
end
|
|
4869
|
-
return false
|
|
4870
4559
|
end
|
|
4871
4560
|
end
|
|
4872
4561
|
|
|
4873
|
-
desc '', '', :hide => true
|
|
4874
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4875
|
-
|
|
4876
|
-
def config_flask_remote_gpu(image_name, port = 80)
|
|
4877
|
-
local_images = Docker::Image.all
|
|
4878
|
-
|
|
4879
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4880
|
-
if docker_image_local.empty?
|
|
4881
|
-
say "no image"
|
|
4882
|
-
exit(1)
|
|
4883
|
-
end
|
|
4884
|
-
|
|
4885
|
-
begin
|
|
4886
|
-
login_content = options["login"]
|
|
4887
|
-
container_id = `nvidia-docker run -itd -p 80:80 -w /home/ds/app #{image_name}:latest /usr/local/cnvrg/start_super.sh`
|
|
4888
|
-
container_id = container_id.gsub("\n", "")
|
|
4889
|
-
container = Docker::Container.get(container_id)
|
|
4890
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4891
|
-
container.exec(command, tty: true)
|
|
4892
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4893
|
-
container.exec(command, tty: true)
|
|
4894
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4895
|
-
container.exec(command, tty: true)
|
|
4896
|
-
say "#{container.id}:#{port}"
|
|
4897
|
-
rescue => e
|
|
4898
|
-
puts e
|
|
4899
|
-
if e.message.include? "is not running"
|
|
4900
|
-
return "port is taken"
|
|
4901
|
-
end
|
|
4902
|
-
puts "error"
|
|
4903
|
-
if container
|
|
4904
|
-
container.kill()
|
|
4905
|
-
end
|
|
4906
|
-
return false
|
|
4907
|
-
end
|
|
4908
|
-
end
|
|
4909
4562
|
|
|
4910
4563
|
desc '', '', :hide => true
|
|
4911
4564
|
|
|
@@ -4931,39 +4584,10 @@ module Cnvrg
|
|
|
4931
4584
|
|
|
4932
4585
|
end
|
|
4933
4586
|
|
|
4934
|
-
desc '
|
|
4935
|
-
|
|
4936
|
-
|
|
4937
|
-
|
|
4938
|
-
method_option :gpu, :type => :boolean, :aliases => ["-g","--gpu"], :default => false
|
|
4939
|
-
def upload_image(image_name,image_path)
|
|
4940
|
-
begin
|
|
4941
|
-
verify_logged_in(false)
|
|
4942
|
-
log_start(__method__, args, options)
|
|
4943
|
-
|
|
4944
|
-
@image = Cnvrg::Images.new()
|
|
4945
|
-
say "Uploading new docker image file", Thor::Shell::Color::BLUE
|
|
4946
|
-
workdir = options[:workdir]
|
|
4947
|
-
description = options[:description]
|
|
4948
|
-
user = options[:user]
|
|
4949
|
-
is_gpu = options[:gpu]
|
|
4950
|
-
res = @image.upload_docker_image(image_path, image_name, workdir, user, description, is_gpu)
|
|
4951
|
-
if res["status"] == 200
|
|
4952
|
-
image_slug = res["id"]
|
|
4953
|
-
owner = CLI.get_owner
|
|
4954
|
-
image_url = "#{Cnvrg::Helpers.remote_url}/#{owner}/settings/images/#{image_slug}"
|
|
4955
|
-
log_message("Successfully uploaded image: #{image_url}", Thor::Shell::Color::GREEN, true)
|
|
4956
|
-
|
|
4957
|
-
|
|
4958
|
-
else
|
|
4959
|
-
log_message("Couldn't upload image: #{image_name}", Thor::Shell::Color::RED, true)
|
|
4960
|
-
|
|
4961
|
-
end
|
|
4962
|
-
rescue => e
|
|
4963
|
-
log_error(e)
|
|
4964
|
-
end
|
|
4965
|
-
|
|
4966
|
-
|
|
4587
|
+
desc 'file_exists', '', :hide => true
|
|
4588
|
+
def file_exists(file)
|
|
4589
|
+
exit(0) if File.exists? file
|
|
4590
|
+
exit(1)
|
|
4967
4591
|
end
|
|
4968
4592
|
|
|
4969
4593
|
|
|
@@ -5143,29 +4767,40 @@ module Cnvrg
|
|
|
5143
4767
|
method_option :project_slug, :type => :string, :aliases => ["-s"], :desc => "project slug"
|
|
5144
4768
|
method_option :project_owner, :type => :string, :aliases => ["-o"], :desc => "project slug"
|
|
5145
4769
|
method_option :frequency, :type => :numeric, :aliases => ["-f"], :desc => "poll frequency"
|
|
4770
|
+
method_option :fetch_slugs, :type => :boolean, :default => false, :desc => "Fetch experiments slugs to compare"
|
|
5146
4771
|
|
|
5147
4772
|
def compare_experiments
|
|
5148
4773
|
verify_logged_in(true)
|
|
5149
4774
|
log_start(__method__, args, options)
|
|
5150
4775
|
exps_map = {}
|
|
4776
|
+
copied_commits = []
|
|
5151
4777
|
|
|
5152
|
-
if options[:slugs].blank?
|
|
4778
|
+
if options[:slugs].blank? and options[:fetch_slugs].blank?
|
|
5153
4779
|
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
5154
4780
|
return false
|
|
5155
4781
|
end
|
|
5156
|
-
|
|
5157
|
-
|
|
5158
|
-
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
5159
|
-
return false
|
|
4782
|
+
if options[:slugs].present?
|
|
4783
|
+
slugs = options[:slugs].split(",")
|
|
5160
4784
|
end
|
|
4785
|
+
|
|
5161
4786
|
frequency = options[:frequency] || 5
|
|
5162
4787
|
namespace = options[:namespace]
|
|
5163
4788
|
project_dir = is_cnvrg_dir(Dir.pwd)
|
|
5164
4789
|
@project = Project.new(project_home=project_dir, slug: options[:project_slug], owner: options[:project_owner])
|
|
4790
|
+
fetch_slugs = options[:fetch_slugs]
|
|
4791
|
+
webapp_slug = ENV["CNVRG_JOB_ID"]
|
|
4792
|
+
if fetch_slugs and webapp_slug.present?
|
|
4793
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug)
|
|
4794
|
+
end
|
|
4795
|
+
if slugs.blank?
|
|
4796
|
+
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
4797
|
+
return false
|
|
4798
|
+
end
|
|
5165
4799
|
|
|
4800
|
+
log_message("compare is running")
|
|
5166
4801
|
while true
|
|
4802
|
+
log_message("compare is running for slugs #{slugs}")
|
|
5167
4803
|
slugs.each do |exp_slug|
|
|
5168
|
-
|
|
5169
4804
|
begin
|
|
5170
4805
|
if exps_map[exp_slug].blank?
|
|
5171
4806
|
exp = @project.get_experiment(exp_slug)["experiment"]
|
|
@@ -5179,15 +4814,23 @@ module Cnvrg
|
|
|
5179
4814
|
log_message("#{exp_name} has ended, getting files from end commit", Thor::Shell::Color::BLUE)
|
|
5180
4815
|
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project)
|
|
5181
4816
|
exps_map[exp_slug] = exp
|
|
5182
|
-
|
|
4817
|
+
else
|
|
5183
4818
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
|
5184
|
-
Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
|
4819
|
+
success = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
|
4820
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
|
4821
|
+
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
|
4822
|
+
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
|
4823
|
+
copied_commits << exp["last_successful_commit"]
|
|
4824
|
+
end
|
|
5185
4825
|
end
|
|
5186
4826
|
rescue => e
|
|
5187
4827
|
Cnvrg::Logger.log_error(e)
|
|
5188
4828
|
end
|
|
5189
4829
|
end
|
|
5190
4830
|
sleep frequency
|
|
4831
|
+
if fetch_slugs
|
|
4832
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug, slugs: slugs)
|
|
4833
|
+
end
|
|
5191
4834
|
end
|
|
5192
4835
|
end
|
|
5193
4836
|
|
|
@@ -5273,127 +4916,6 @@ module Cnvrg
|
|
|
5273
4916
|
end
|
|
5274
4917
|
|
|
5275
4918
|
|
|
5276
|
-
desc 'pull_image', 'downloads and loads an image', :hide => true
|
|
5277
|
-
|
|
5278
|
-
def pull_image(image_name)
|
|
5279
|
-
begin
|
|
5280
|
-
verify_logged_in(false)
|
|
5281
|
-
log_start(__method__, args, options)
|
|
5282
|
-
owner = Cnvrg::CLI.get_owner()
|
|
5283
|
-
image = Cnvrg::Images.image_exist(owner, image_name)
|
|
5284
|
-
if !image
|
|
5285
|
-
log_message("Couldn't find image in cnvrg repository", Thor::Shell::Color::RED)
|
|
5286
|
-
exit(1)
|
|
5287
|
-
end
|
|
5288
|
-
path = download_image(image_name, image["slug"])
|
|
5289
|
-
if path
|
|
5290
|
-
log_message("Building image", Thor::Shell::Color::BLUE)
|
|
5291
|
-
Docker.options[:read_timeout] = 216000
|
|
5292
|
-
image = Docker::Image.build_from_dir(path, {'dockerfile' => 'Dockerfile.cpu', 't' => "#{image_name}:latest"}) do |v|
|
|
5293
|
-
begin
|
|
5294
|
-
if (log = JSON.parse(v)) && log.has_key?("stream")
|
|
5295
|
-
next if log["stream"].starts_with? "Step"
|
|
5296
|
-
$stdout.puts log["stream"]
|
|
5297
|
-
end
|
|
5298
|
-
rescue
|
|
5299
|
-
end
|
|
5300
|
-
|
|
5301
|
-
end
|
|
5302
|
-
|
|
5303
|
-
if not image.nil?
|
|
5304
|
-
FileUtils.rm_rf(path)
|
|
5305
|
-
checks = Helpers.checkmark()
|
|
5306
|
-
log_message("#{checks} Image built successfully", Thor::Shell::Color::GREEN)
|
|
5307
|
-
return image
|
|
5308
|
-
else
|
|
5309
|
-
|
|
5310
|
-
log_message("Could not build image", Thor::Shell::Color::RED)
|
|
5311
|
-
return false
|
|
5312
|
-
end
|
|
5313
|
-
else
|
|
5314
|
-
|
|
5315
|
-
log_message("Could not download image", Thor::Shell::Color::RED)
|
|
5316
|
-
return false
|
|
5317
|
-
|
|
5318
|
-
|
|
5319
|
-
end
|
|
5320
|
-
|
|
5321
|
-
# else
|
|
5322
|
-
# path = download_image(image_name,image["slug"])
|
|
5323
|
-
# if path
|
|
5324
|
-
# image = Docker::Image.import(path)
|
|
5325
|
-
# image.tag('repo' => image_name, 'tag' => 'latest')
|
|
5326
|
-
# if not image.nil?
|
|
5327
|
-
# say "Finished downloading image, cleaning up..", Thor::Shell::Color::GREEN
|
|
5328
|
-
# FileUtils.rm(path)
|
|
5329
|
-
# checks = Helpers.checkmark()
|
|
5330
|
-
# say "#{checks} Done", Thor::Shell::Color::GREEN
|
|
5331
|
-
# log_end(0)
|
|
5332
|
-
# return image
|
|
5333
|
-
# log_end(0)
|
|
5334
|
-
# else
|
|
5335
|
-
# say "Could not download image", Thor::Shell::Color::RED
|
|
5336
|
-
# return false
|
|
5337
|
-
# end
|
|
5338
|
-
#
|
|
5339
|
-
# end
|
|
5340
|
-
# end
|
|
5341
|
-
rescue => e
|
|
5342
|
-
|
|
5343
|
-
log_message "Error: couldn't build image", Thor::Shell::Color::RED
|
|
5344
|
-
log_error(e)
|
|
5345
|
-
|
|
5346
|
-
rescue SignalException
|
|
5347
|
-
say "\nAborting"
|
|
5348
|
-
exit(1)
|
|
5349
|
-
ensure
|
|
5350
|
-
if path
|
|
5351
|
-
FileUtils.rm_rf(path)
|
|
5352
|
-
|
|
5353
|
-
end
|
|
5354
|
-
end
|
|
5355
|
-
|
|
5356
|
-
|
|
5357
|
-
end
|
|
5358
|
-
|
|
5359
|
-
desc 'set_image', 'set image to a porject', :hide => true
|
|
5360
|
-
|
|
5361
|
-
def set_image(docker_image)
|
|
5362
|
-
verify_logged_in(true)
|
|
5363
|
-
log_start(__method__, args, options)
|
|
5364
|
-
working_dir = is_cnvrg_dir
|
|
5365
|
-
project = Project.new(working_dir)
|
|
5366
|
-
|
|
5367
|
-
local_images = Docker::Image.all
|
|
5368
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.include? docker_image}.flatten
|
|
5369
|
-
if docker_image_local.size == 0
|
|
5370
|
-
|
|
5371
|
-
if yes? "Image wasn't found locally, pull image from cnvrg repository?", Thor::Shell::Color::YELLOW
|
|
5372
|
-
image = pull(docker_image)
|
|
5373
|
-
if image
|
|
5374
|
-
log_message("downloaded image: #{docker_image}", Thor::Shell::Color::BLUE)
|
|
5375
|
-
@image = Images.new(working_dir, docker_image)
|
|
5376
|
-
else
|
|
5377
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
|
5378
|
-
exit(1)
|
|
5379
|
-
end
|
|
5380
|
-
else
|
|
5381
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
|
5382
|
-
exit(1)
|
|
5383
|
-
|
|
5384
|
-
end
|
|
5385
|
-
elsif docker_image_local.size == 1
|
|
5386
|
-
log_message("found image: #{docker_image_local[0]}, setting it up..", Thor::Shell::Color::BLUE)
|
|
5387
|
-
@image = Images.new(working_dir, docker_image_local[0])
|
|
5388
|
-
elsif docker_image_local.size > 1
|
|
5389
|
-
log_message("found #{docker_image_local.size} images, choose the image name you want to use", Thor::Shell::Color::BLUE)
|
|
5390
|
-
image_name = ask "#{docker_image_local.join("\n")}\n", Thor::Shell::Color::BLUE
|
|
5391
|
-
image_name = image_name.strip
|
|
5392
|
-
@image = Images.new(working_dir, image_name)
|
|
5393
|
-
end
|
|
5394
|
-
@image.update_image_activity(project.last_local_commit, nil)
|
|
5395
|
-
end
|
|
5396
|
-
|
|
5397
4919
|
desc 'check_pod_restart', 'Check pod restart', :hide => true
|
|
5398
4920
|
def check_pod_restart
|
|
5399
4921
|
Cnvrg::CLI.new.log_start(__method__, args, options)
|
|
@@ -5668,7 +5190,7 @@ module Cnvrg
|
|
|
5668
5190
|
|
|
5669
5191
|
if dirs.size == 0
|
|
5670
5192
|
log_message("Couldn't find cnvrg directory. Please start a new project", Thor::Shell::Color::RED)
|
|
5671
|
-
|
|
5193
|
+
puts Thread.current.backtrace
|
|
5672
5194
|
exit(1)
|
|
5673
5195
|
end
|
|
5674
5196
|
return dirs.join("/")
|
|
@@ -5771,7 +5293,7 @@ module Cnvrg
|
|
|
5771
5293
|
is_cnvrg = is_cnvrg_dir
|
|
5772
5294
|
if !is_cnvrg
|
|
5773
5295
|
say "You're not in a cnvrg project directory", Thor::Shell::Color::RED
|
|
5774
|
-
exit(
|
|
5296
|
+
exit(1)
|
|
5775
5297
|
end
|
|
5776
5298
|
|
|
5777
5299
|
end
|
|
@@ -5917,21 +5439,6 @@ module Cnvrg
|
|
|
5917
5439
|
|
|
5918
5440
|
end
|
|
5919
5441
|
|
|
5920
|
-
def container_changes(dir)
|
|
5921
|
-
container_id = is_project_with_docker(dir)
|
|
5922
|
-
if not container_id
|
|
5923
|
-
return false
|
|
5924
|
-
end
|
|
5925
|
-
container = Docker::Container.get(container_id)
|
|
5926
|
-
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
|
5927
|
-
pip = container.exec(command, tty: true)[0]
|
|
5928
|
-
command = ["/bin/bash", "-lc", "dpkg -l"]
|
|
5929
|
-
dpkg = container.exec(command, tty: true)[0]
|
|
5930
|
-
command = ["/bin/bash", "-lc", "cat /home/ds/.bash_history"]
|
|
5931
|
-
history = container.exec(command, tty: true)[0]
|
|
5932
|
-
diff = [pip, dpkg, history]
|
|
5933
|
-
return diff
|
|
5934
|
-
end
|
|
5935
5442
|
|
|
5936
5443
|
def is_port_taken(ip = Cnvrg::CLI::IP, port = Cnvrg::CLI::PORT, seconds = 1)
|
|
5937
5444
|
Timeout::timeout(seconds) do
|
|
@@ -6114,13 +5621,17 @@ module Cnvrg
|
|
|
6114
5621
|
|
|
6115
5622
|
end
|
|
6116
5623
|
|
|
6117
|
-
def gpu_util
|
|
5624
|
+
def gpu_util(take_from_docker: false, docker_id: nil)
|
|
6118
5625
|
if !Helpers.ubuntu?
|
|
6119
5626
|
return 0.0
|
|
6120
5627
|
end
|
|
6121
5628
|
stats = [[],[]]
|
|
6122
5629
|
begin
|
|
6123
|
-
|
|
5630
|
+
if take_from_docker
|
|
5631
|
+
gpu_stats = `docker exec -it #{docker_id} sh -c 'nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv'`
|
|
5632
|
+
else
|
|
5633
|
+
gpu_stats = `nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv`
|
|
5634
|
+
end
|
|
6124
5635
|
|
|
6125
5636
|
if !gpu_stats.nil?
|
|
6126
5637
|
gpu_stats = gpu_stats.split("\n")[1..-1]
|