cnvrg 1.6.38 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/cnvrg.gemspec +1 -4
- data/lib/cnvrg/Images.rb +0 -148
- data/lib/cnvrg/api.rb +8 -8
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/cli.rb +288 -781
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +65 -12
- data/lib/cnvrg/datafiles.rb +483 -201
- data/lib/cnvrg/dataset.rb +65 -29
- data/lib/cnvrg/experiment.rb +10 -4
- data/lib/cnvrg/files.rb +46 -14
- data/lib/cnvrg/helpers.rb +34 -26
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +162 -258
- data/lib/cnvrg/job_cli.rb +28 -53
- data/lib/cnvrg/job_ssh.rb +47 -0
- data/lib/cnvrg/logger.rb +4 -0
- data/lib/cnvrg/project.rb +45 -16
- data/lib/cnvrg/ssh.rb +0 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +9 -33
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e708ef034df38ed0b4f5c1ac4bb02fa79a26c93b188f571256f75dbc9d2eaaa6
|
|
4
|
+
data.tar.gz: 6badf54b65660776e63c02c7d3c5dbbab83d0e1e83f6e877b48d77fad5ba3036
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 21d89ec4fb99c4102bc1e8e0e50df516339a1c9e9660ee8f0dd8acf3ae30bd27067f5ea4fe979de3b737bd6f748ced98f023100487a4226b7f21eed17975142c
|
|
7
|
+
data.tar.gz: 91fb2d10994c11e9b28ef3bbc128f847ac2efd641892c29ec1ec2b16d4b125266e85a6166153b66ab9e9e1c475190f6eca771e42d739a02c1136dbe8cb6c3abb
|
data/cnvrg.gemspec
CHANGED
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.add_runtime_dependency 'open4', '~> 1.3', '>= 1.3.4'
|
|
32
32
|
spec.add_runtime_dependency 'highline', '~> 1.7', '>= 1.7.8'
|
|
33
33
|
spec.add_runtime_dependency 'thor', '~> 0.19.0','>=0.19.1'
|
|
34
|
-
spec.add_runtime_dependency 'aws-sdk', '~>
|
|
34
|
+
spec.add_runtime_dependency 'aws-sdk', '~> 3.0'
|
|
35
35
|
spec.add_runtime_dependency 'signet', '~> 0.11.0'
|
|
36
36
|
spec.add_runtime_dependency 'google-cloud-env', '~> 1.2.1'
|
|
37
37
|
spec.add_runtime_dependency 'google-cloud-core', '~> 1.3.2'
|
|
@@ -40,11 +40,8 @@ Gem::Specification.new do |spec|
|
|
|
40
40
|
spec.add_runtime_dependency 'urlcrypt', '~> 0.1.1'
|
|
41
41
|
spec.add_runtime_dependency 'parallel', '~> 1.12.0'
|
|
42
42
|
spec.add_runtime_dependency 'azure-storage-blob', '~> 1.1.0'
|
|
43
|
-
|
|
44
43
|
spec.add_runtime_dependency 'logstash-logger', '~> 0.22.1'
|
|
45
|
-
spec.add_runtime_dependency 'docker-api', '~> 1.33'
|
|
46
44
|
spec.add_runtime_dependency 'activesupport', '~> 5.2.0'
|
|
47
45
|
spec.add_runtime_dependency 'ruby-progressbar'
|
|
48
|
-
spec.add_runtime_dependency 'net-ssh'
|
|
49
46
|
spec.add_runtime_dependency 'down'
|
|
50
47
|
end
|
data/lib/cnvrg/Images.rb
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
require 'fileutils'
|
|
2
2
|
require 'cnvrg/files'
|
|
3
|
-
require 'docker'
|
|
4
|
-
require 'net/ssh'
|
|
5
3
|
require 'mimemagic'
|
|
6
4
|
|
|
7
5
|
|
|
@@ -175,58 +173,6 @@ module Cnvrg
|
|
|
175
173
|
response = Cnvrg::API.request("users/#{owner}/images/#{slug}/commit_custom_image", 'POST', {image_logs:logs})
|
|
176
174
|
return response
|
|
177
175
|
end
|
|
178
|
-
def self.ssh_to_machine(resp)
|
|
179
|
-
|
|
180
|
-
sts_path = resp["result"]["sts_path"]
|
|
181
|
-
|
|
182
|
-
uri = URI.parse(sts_path)
|
|
183
|
-
|
|
184
|
-
http_object = Net::HTTP.new(uri.host, uri.port)
|
|
185
|
-
http_object.use_ssl = true if uri.scheme == 'https'
|
|
186
|
-
request = Net::HTTP::Get.new(sts_path)
|
|
187
|
-
|
|
188
|
-
body = ""
|
|
189
|
-
http_object.start do |http|
|
|
190
|
-
response = http.request request
|
|
191
|
-
body = response.read_body
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
URLcrypt::key = [body].pack('H*')
|
|
195
|
-
|
|
196
|
-
ip = URLcrypt.decrypt(resp["result"]["machine_i"])
|
|
197
|
-
|
|
198
|
-
user = URLcrypt.decrypt(resp["result"]["machine_u"])
|
|
199
|
-
key = URLcrypt.decrypt(resp["result"]["machine_k"])
|
|
200
|
-
tempssh = Tempfile.new "sshkey"
|
|
201
|
-
tempssh.write open(key).read
|
|
202
|
-
tempssh.rewind
|
|
203
|
-
key_path = tempssh.path
|
|
204
|
-
count = 0
|
|
205
|
-
while count < 5
|
|
206
|
-
|
|
207
|
-
begin
|
|
208
|
-
ssh = Net::SSH.start(ip, user=user, :keys => key_path, :timeout => 10)
|
|
209
|
-
if !ssh.nil?
|
|
210
|
-
return ssh
|
|
211
|
-
else
|
|
212
|
-
count+=1
|
|
213
|
-
sleep(2)
|
|
214
|
-
|
|
215
|
-
end
|
|
216
|
-
rescue
|
|
217
|
-
count+=1
|
|
218
|
-
sleep(2)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
if tempssh
|
|
224
|
-
tempssh.close
|
|
225
|
-
tempssh.unlink
|
|
226
|
-
end
|
|
227
|
-
return false
|
|
228
|
-
end
|
|
229
|
-
|
|
230
176
|
|
|
231
177
|
|
|
232
178
|
def create_custom_image(new_image_name,working_dir,stored_commands)
|
|
@@ -270,100 +216,6 @@ module Cnvrg
|
|
|
270
216
|
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
|
271
217
|
end
|
|
272
218
|
|
|
273
|
-
def get_container(stop=false)
|
|
274
|
-
begin
|
|
275
|
-
container_id=is_container_exist()
|
|
276
|
-
|
|
277
|
-
if !container_id
|
|
278
|
-
return create_container()
|
|
279
|
-
else
|
|
280
|
-
container = Docker::Container.get(container_id)
|
|
281
|
-
status = container.json["State"]["Status"]
|
|
282
|
-
|
|
283
|
-
if status == "running"
|
|
284
|
-
return container
|
|
285
|
-
else
|
|
286
|
-
if stop
|
|
287
|
-
return false
|
|
288
|
-
end
|
|
289
|
-
res = container.start()
|
|
290
|
-
if res.info["State"]["Status"].eql? "exited" and res.info["State"]["Error"].include? "port is already allocated"
|
|
291
|
-
return create_container()
|
|
292
|
-
end
|
|
293
|
-
return container
|
|
294
|
-
end
|
|
295
|
-
end
|
|
296
|
-
rescue => e
|
|
297
|
-
if e.message.include? "No such container"
|
|
298
|
-
|
|
299
|
-
return create_container()
|
|
300
|
-
else
|
|
301
|
-
return false
|
|
302
|
-
end
|
|
303
|
-
end
|
|
304
|
-
|
|
305
|
-
end
|
|
306
|
-
|
|
307
|
-
def create_container(port=7654, is_remote=false)
|
|
308
|
-
begin
|
|
309
|
-
image_settings = {
|
|
310
|
-
'Image' => "#{@image_name}:latest",
|
|
311
|
-
'User' => 'ds',
|
|
312
|
-
'Cmd' => '/usr/local/cnvrg/run_ipython.sh',
|
|
313
|
-
'WorkingDir' => '/home/ds/notebooks',
|
|
314
|
-
'ExposedPorts' => {
|
|
315
|
-
'8888/tcp' => {},
|
|
316
|
-
},
|
|
317
|
-
'HostConfig' => {
|
|
318
|
-
'Binds' => ["#{@working_dir}:/home/ds/notebooks"],
|
|
319
|
-
'PortBindings' => {
|
|
320
|
-
'8888/tcp' => [
|
|
321
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
322
|
-
],
|
|
323
|
-
},
|
|
324
|
-
},
|
|
325
|
-
}
|
|
326
|
-
container = Docker::Container.create(image_settings)
|
|
327
|
-
container.start()
|
|
328
|
-
netrc = File.open(File.expand_path('~')+"/.netrc", "rb")
|
|
329
|
-
netrc_content = netrc.read
|
|
330
|
-
container.store_file("/home/ds/.netrc", netrc_content)
|
|
331
|
-
command = ["/bin/bash", "-lc", "sudo chmod 600 /home/ds/.netrc"]
|
|
332
|
-
p = container.exec(command, tty: true)
|
|
333
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.netrc"]
|
|
334
|
-
p = container.exec(command, tty: true)
|
|
335
|
-
config = File.open(File.expand_path('~')+"/.cnvrg/config.yml", "rb")
|
|
336
|
-
config_content = config.read
|
|
337
|
-
container.store_file("/home/ds/.cnvrg/config.yml", config_content)
|
|
338
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg"]
|
|
339
|
-
container.exec(command, tty: true)
|
|
340
|
-
# Libraries instlled
|
|
341
|
-
save_installed_libraries(container)
|
|
342
|
-
config = {project_name: @project_name,
|
|
343
|
-
project_slug: @project_slug,
|
|
344
|
-
owner: @owner,
|
|
345
|
-
docker: true, image_base: @image_name, image_tag: @image_tag, container: container.id, port: port, image_slug: @image_slug}
|
|
346
|
-
|
|
347
|
-
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
return container
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
rescue => e
|
|
354
|
-
if e.message.include? "is not running"
|
|
355
|
-
return create_container(port-1)
|
|
356
|
-
end
|
|
357
|
-
return false
|
|
358
|
-
rescue SignalException
|
|
359
|
-
|
|
360
|
-
say "\nAborting", Thor::Shell::Color::RED
|
|
361
|
-
exit(1)
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
end
|
|
366
|
-
|
|
367
219
|
def save_installed_libraries(container)
|
|
368
220
|
begin
|
|
369
221
|
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
data/lib/cnvrg/api.rb
CHANGED
|
@@ -77,20 +77,22 @@ module Cnvrg
|
|
|
77
77
|
if response.to_hash[:status] == 404
|
|
78
78
|
return false
|
|
79
79
|
end
|
|
80
|
-
if parse_request
|
|
80
|
+
if parse_request
|
|
81
81
|
JSON.parse(response.body)
|
|
82
82
|
else
|
|
83
83
|
response
|
|
84
84
|
end
|
|
85
|
-
|
|
85
|
+
when 'POST', 'PUT'
|
|
86
86
|
conn.options.timeout = 4200
|
|
87
|
-
conn.options.open_timeout=180
|
|
87
|
+
conn.options.open_timeout = 180
|
|
88
|
+
conn.headers['Content-Type'] = "application/json"
|
|
88
89
|
retries = 0
|
|
89
90
|
success = false
|
|
91
|
+
data = data || {}
|
|
90
92
|
while !success and retries < 20
|
|
91
93
|
begin
|
|
92
|
-
response = conn.post "#{resource}", data if method.eql? 'POST'
|
|
93
|
-
response = conn.put "#{resource}", data if method.eql? 'PUT'
|
|
94
|
+
response = conn.post "#{resource}", data.to_json if method.eql? 'POST'
|
|
95
|
+
response = conn.put "#{resource}", data.to_json if method.eql? 'PUT'
|
|
94
96
|
success = true
|
|
95
97
|
Cnvrg::API.parse_version(response)
|
|
96
98
|
|
|
@@ -113,7 +115,7 @@ module Cnvrg
|
|
|
113
115
|
end
|
|
114
116
|
when 'POST_JSON'
|
|
115
117
|
conn.options.timeout = 4200
|
|
116
|
-
conn.options.open_timeout =4200
|
|
118
|
+
conn.options.open_timeout = 4200
|
|
117
119
|
conn.headers['Content-Type'] = "application/json"
|
|
118
120
|
new_data = JSON.dump(data)
|
|
119
121
|
|
|
@@ -124,8 +126,6 @@ module Cnvrg
|
|
|
124
126
|
begin
|
|
125
127
|
response = conn.post "#{resource}", new_data
|
|
126
128
|
success = true
|
|
127
|
-
Cnvrg::API.parse_version(response)
|
|
128
|
-
|
|
129
129
|
rescue => e
|
|
130
130
|
Cnvrg::Logger.log_error(e)
|
|
131
131
|
sleep(5)
|
data/lib/cnvrg/api_v2.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module Cnvrg
|
|
2
|
+
class API_V2 < API
|
|
3
|
+
ENDPOINT_VERSION = 'v2'
|
|
4
|
+
|
|
5
|
+
def self.endpoint_uri
|
|
6
|
+
api = get_api()
|
|
7
|
+
return "#{api}/#{Cnvrg::API_V2::ENDPOINT_VERSION}"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.is_response_success(response)
|
|
11
|
+
raise Exception.new("Bad status in response #{response.status}") if response.status != 200
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
data/lib/cnvrg/cli.rb
CHANGED
|
@@ -12,7 +12,6 @@ require 'digest' # sha1up
|
|
|
12
12
|
require "highline/import"
|
|
13
13
|
require 'socket'
|
|
14
14
|
require 'thor'
|
|
15
|
-
require 'docker'
|
|
16
15
|
require 'socket'
|
|
17
16
|
require 'timeout'
|
|
18
17
|
require 'fileutils'
|
|
@@ -28,13 +27,11 @@ require 'cnvrg/auth'
|
|
|
28
27
|
require 'cnvrg/project'
|
|
29
28
|
require 'cnvrg/files'
|
|
30
29
|
require 'cnvrg/experiment'
|
|
31
|
-
require 'cnvrg/Images'
|
|
32
30
|
require 'cnvrg/image'
|
|
33
31
|
require 'cnvrg/dataset'
|
|
34
32
|
require 'cnvrg/datafiles'
|
|
35
33
|
require 'cnvrg/data'
|
|
36
34
|
require 'cnvrg/storage'
|
|
37
|
-
require 'cnvrg/ssh'
|
|
38
35
|
require 'cnvrg/result'
|
|
39
36
|
require 'cnvrg/logger'
|
|
40
37
|
require 'cnvrg/org_helpers'
|
|
@@ -49,6 +46,9 @@ require 'cnvrg/downloader/clients/s3_client'
|
|
|
49
46
|
require 'cnvrg/downloader/clients/gcp_client'
|
|
50
47
|
require 'cnvrg/downloader/clients/azure_client'
|
|
51
48
|
require 'cnvrg/job_cli'
|
|
49
|
+
require 'cnvrg/job_ssh'
|
|
50
|
+
require 'cnvrg/connect_job_ssh'
|
|
51
|
+
require 'cnvrg/api_v2'
|
|
52
52
|
|
|
53
53
|
class Thor
|
|
54
54
|
module Base
|
|
@@ -175,6 +175,9 @@ module Cnvrg
|
|
|
175
175
|
desc "job", "manage running jobs", :hide => false
|
|
176
176
|
subcommand "job", JobCli
|
|
177
177
|
|
|
178
|
+
desc "ssh", "ssh into running jobs", :hide => false
|
|
179
|
+
subcommand "ssh", JobSsh
|
|
180
|
+
|
|
178
181
|
desc "image [COMMAND]", "build existing images", :hide => true
|
|
179
182
|
subcommand "image", ImageCli
|
|
180
183
|
|
|
@@ -819,9 +822,9 @@ module Cnvrg
|
|
|
819
822
|
end
|
|
820
823
|
|
|
821
824
|
desc 'data verify', 'Verify datasets', :hide => true
|
|
822
|
-
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default =>
|
|
825
|
+
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default => nil
|
|
823
826
|
|
|
824
|
-
def verify_datasets(dataset_titles, timeout=
|
|
827
|
+
def verify_datasets(dataset_titles, timeout=nil)
|
|
825
828
|
begin
|
|
826
829
|
verify_logged_in(false)
|
|
827
830
|
log_start(__method__, args, options)
|
|
@@ -830,21 +833,31 @@ module Cnvrg
|
|
|
830
833
|
log_message("All datasets are verified", Thor::Shell::Color::BLUE) if verified
|
|
831
834
|
log_message("Failed to verify datasets", Thor::Shell::Color::RED) if !verified
|
|
832
835
|
exit(1) if !verified
|
|
833
|
-
|
|
834
836
|
rescue SignalException
|
|
835
837
|
say "\nAborting", Thor::Shell::Color::RED
|
|
836
838
|
exit(1)
|
|
837
839
|
end
|
|
838
840
|
end
|
|
839
841
|
|
|
842
|
+
desc 'data scan', 'Lookup datasets', :hide => true
|
|
843
|
+
def scan_datasets()
|
|
844
|
+
begin
|
|
845
|
+
verify_logged_in(false)
|
|
846
|
+
log_start(__method__, args, options)
|
|
847
|
+
log_message("Scanning datasets", Thor::Shell::Color::BLUE)
|
|
848
|
+
datasets = Dataset.scan_datasets()
|
|
849
|
+
puts(datasets.to_json)
|
|
850
|
+
end
|
|
851
|
+
end
|
|
852
|
+
|
|
840
853
|
desc 'data clone', 'Clone dataset', :hide => true
|
|
841
854
|
method_option :commit, :type => :string, :aliases => ["-c", "--commit"], :default => ""
|
|
842
855
|
method_option :only_tree, :type => :boolean, :aliases => ["-t", "--tree"], :default => false
|
|
843
856
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
|
844
857
|
method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
|
|
845
858
|
method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
|
|
846
|
-
|
|
847
|
-
def clone_data(dataset_url,only_tree=false,commit=nil,query=nil,read=false,remote=false, relative: false)
|
|
859
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
860
|
+
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false)
|
|
848
861
|
begin
|
|
849
862
|
verify_logged_in(false)
|
|
850
863
|
log_start(__method__, args, options)
|
|
@@ -853,10 +866,10 @@ module Cnvrg
|
|
|
853
866
|
read = options["read"] || read || false
|
|
854
867
|
remote = options["remote"] || remote || false
|
|
855
868
|
query = options['query'].presence || query.presence
|
|
869
|
+
soft = options['soft'] || soft
|
|
856
870
|
if query.present?
|
|
857
|
-
return clone_data_query(dataset_url, query)
|
|
871
|
+
return clone_data_query(dataset_url, query, flatten, soft: soft)
|
|
858
872
|
end
|
|
859
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
|
860
873
|
|
|
861
874
|
url_parts = dataset_url.split("/")
|
|
862
875
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
@@ -868,6 +881,8 @@ module Cnvrg
|
|
|
868
881
|
dataset_name = response["result"]["name"]
|
|
869
882
|
dataset_home = Dir.pwd+"/"+dataset_name
|
|
870
883
|
|
|
884
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name, commit: response["result"]["commit"]) if soft
|
|
885
|
+
|
|
871
886
|
check = Helpers.checkmark
|
|
872
887
|
if @dataset.init_home(remote:remote)
|
|
873
888
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
|
@@ -875,14 +890,12 @@ module Cnvrg
|
|
|
875
890
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
|
876
891
|
if @dataset.softlinked?
|
|
877
892
|
@files.cp_ds(relative: relative)
|
|
878
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
|
879
893
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
880
894
|
@dataset.write_success
|
|
881
895
|
return
|
|
882
896
|
end
|
|
883
897
|
|
|
884
898
|
if only_tree
|
|
885
|
-
|
|
886
899
|
success = Dataset.clone_tree(commit: commit, dataset_home: dataset_home)
|
|
887
900
|
return if success
|
|
888
901
|
end
|
|
@@ -900,7 +913,7 @@ module Cnvrg
|
|
|
900
913
|
|
|
901
914
|
while files['keys'].length > 0
|
|
902
915
|
Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
|
|
903
|
-
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read)
|
|
916
|
+
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten)
|
|
904
917
|
|
|
905
918
|
downloaded_files += files['keys'].length
|
|
906
919
|
files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
|
|
@@ -908,7 +921,6 @@ module Cnvrg
|
|
|
908
921
|
progressbar.finish
|
|
909
922
|
if downloaded_files == files_count
|
|
910
923
|
Dataset.verify_cnvrgignore_exist(dataset_name, false)
|
|
911
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
|
912
924
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
913
925
|
@dataset.write_success
|
|
914
926
|
### if read, dont generate idx (but create idx.yml) if not read, generate idx.
|
|
@@ -930,12 +942,14 @@ module Cnvrg
|
|
|
930
942
|
|
|
931
943
|
desc 'data clone_query', 'Clone dataset _query', :hide => true
|
|
932
944
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => ""
|
|
933
|
-
|
|
945
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
946
|
+
def clone_data_query(dataset_url, query=nil, flatten=false, soft: false)
|
|
934
947
|
begin
|
|
935
948
|
verify_logged_in(false)
|
|
936
|
-
|
|
949
|
+
#@executer = Cnvrg::Helpers::Executer.get_executer
|
|
937
950
|
log_start(__method__, args, options)
|
|
938
951
|
query = options["query"] || query
|
|
952
|
+
soft = options["soft"] || soft
|
|
939
953
|
if !query.present?
|
|
940
954
|
log_message("Argument missing : query", Thor::Shell::Color::RED)
|
|
941
955
|
exit(1)
|
|
@@ -945,13 +959,14 @@ module Cnvrg
|
|
|
945
959
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
946
960
|
slug = url_parts[project_index + 1]
|
|
947
961
|
owner = url_parts[project_index - 1]
|
|
948
|
-
|
|
949
962
|
response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/search/#{query}", 'GET')
|
|
950
963
|
Cnvrg::CLI.is_response_success(response,true)
|
|
951
964
|
dataset_name = response["results"]["name"]
|
|
952
965
|
dataset_slug = response["results"]["slug"]
|
|
953
|
-
dataset_home =
|
|
966
|
+
dataset_home = Dir.pwd+"/"+dataset_slug
|
|
967
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name) if soft
|
|
954
968
|
|
|
969
|
+
# dataset_home = Dir.pwd
|
|
955
970
|
if Dataset.blank_clone(owner, dataset_name, dataset_slug)
|
|
956
971
|
dataset = Dataset.new(dataset_home)
|
|
957
972
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
|
@@ -966,6 +981,7 @@ module Cnvrg
|
|
|
966
981
|
},
|
|
967
982
|
in_threads: ParallelThreads
|
|
968
983
|
}
|
|
984
|
+
|
|
969
985
|
begin
|
|
970
986
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
|
971
987
|
Parallel.map((response["results"]["query_files"]), parallel_options) do |f|
|
|
@@ -974,6 +990,7 @@ module Cnvrg
|
|
|
974
990
|
file_name = relative_path_dir.pop()
|
|
975
991
|
relative_path_dir = relative_path_dir.join("/")
|
|
976
992
|
abs_path = dataset_home + "/" + relative_path_dir
|
|
993
|
+
abs_path = dataset_home if flatten
|
|
977
994
|
begin
|
|
978
995
|
FileUtils.mkdir_p(abs_path) unless File.exist? (abs_path + "/" + file_name)
|
|
979
996
|
rescue
|
|
@@ -981,14 +998,14 @@ module Cnvrg
|
|
|
981
998
|
exit(1)
|
|
982
999
|
end
|
|
983
1000
|
begin
|
|
984
|
-
File.write "#{abs_path}/#{file_name}", open(f["
|
|
985
|
-
rescue
|
|
1001
|
+
File.write "#{abs_path}/#{file_name}", open(f["url"]).read unless File.exist? (abs_path + "/" + file_name)
|
|
1002
|
+
rescue => e
|
|
986
1003
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
|
987
1004
|
exit(1)
|
|
988
1005
|
end
|
|
989
1006
|
|
|
990
1007
|
end
|
|
991
|
-
|
|
1008
|
+
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
|
992
1009
|
rescue Interrupt
|
|
993
1010
|
log_message("Couldn't download", Thor::Shell::Color::RED)
|
|
994
1011
|
exit(1)
|
|
@@ -998,7 +1015,7 @@ module Cnvrg
|
|
|
998
1015
|
check = Helpers.checkmark
|
|
999
1016
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
1000
1017
|
dataset.write_success(in_folder=true)
|
|
1001
|
-
rescue
|
|
1018
|
+
rescue => e
|
|
1002
1019
|
exit(1)
|
|
1003
1020
|
end
|
|
1004
1021
|
end
|
|
@@ -1008,32 +1025,6 @@ module Cnvrg
|
|
|
1008
1025
|
end
|
|
1009
1026
|
end
|
|
1010
1027
|
|
|
1011
|
-
desc 'init_data_container', 'Init dataset directory', :hide => true
|
|
1012
|
-
method_option :login_content, :type => :string, :aliases => ["-l"], :default => ""
|
|
1013
|
-
|
|
1014
|
-
def init_data_container(container)
|
|
1015
|
-
begin
|
|
1016
|
-
login_content = options["login_content"]
|
|
1017
|
-
|
|
1018
|
-
container = Docker::Container.get(container)
|
|
1019
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
1020
|
-
container.exec(command, tty: true)
|
|
1021
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
|
1022
|
-
container.exec(command, tty: true)
|
|
1023
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
|
1024
|
-
container.exec(command, tty: true)
|
|
1025
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg /home/ds/.netrc"]
|
|
1026
|
-
container.exec(command, tty: true)
|
|
1027
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
1028
|
-
container.exec(command, tty: true)
|
|
1029
|
-
|
|
1030
|
-
rescue SignalException
|
|
1031
|
-
|
|
1032
|
-
say "\nAborting", Thor::Shell::Color::RED
|
|
1033
|
-
exit(1)
|
|
1034
|
-
end
|
|
1035
|
-
end
|
|
1036
|
-
|
|
1037
1028
|
desc 'data_snap', 'Init dataset directory', :hide => true
|
|
1038
1029
|
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
|
1039
1030
|
|
|
@@ -1184,17 +1175,29 @@ module Cnvrg
|
|
|
1184
1175
|
end
|
|
1185
1176
|
|
|
1186
1177
|
desc '', '', :hide => true
|
|
1187
|
-
def
|
|
1178
|
+
def get_owner_slug(url_or_slug)
|
|
1179
|
+
if url_or_slug =~ URI::regexp
|
|
1180
|
+
# Find owner and slug in url
|
|
1181
|
+
url_parts = url_or_slug.split("/")
|
|
1182
|
+
project_index = Cnvrg::Helpers.look_for_in_path(url_or_slug, "datasets")
|
|
1183
|
+
slug = url_parts[project_index + 1]
|
|
1184
|
+
owner = url_parts[project_index - 1]
|
|
1185
|
+
else
|
|
1186
|
+
# Find owner in config file
|
|
1187
|
+
owner = CLI.get_owner
|
|
1188
|
+
slug = url_or_slug
|
|
1189
|
+
end
|
|
1190
|
+
return owner, slug
|
|
1191
|
+
end
|
|
1192
|
+
|
|
1193
|
+
desc '', '', :hide => true
|
|
1194
|
+
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, threads: 15, message: nil)
|
|
1188
1195
|
begin
|
|
1189
1196
|
verify_logged_in(false)
|
|
1190
1197
|
log_start(__method__, args, options)
|
|
1191
1198
|
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
1195
|
-
slug = url_parts[project_index + 1]
|
|
1196
|
-
owner = url_parts[project_index - 1]
|
|
1197
|
-
@dataset = Dataset.new(dataset_url: dataset_url)
|
|
1199
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1200
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1198
1201
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1199
1202
|
@files = @datafiles.verify_files_exists(files)
|
|
1200
1203
|
|
|
@@ -1218,28 +1221,33 @@ module Cnvrg
|
|
|
1218
1221
|
else
|
|
1219
1222
|
@commit = commit
|
|
1220
1223
|
end
|
|
1221
|
-
|
|
1224
|
+
|
|
1225
|
+
# dir shouldnt have starting or ending slash.
|
|
1222
1226
|
dir = dir[0..-2] if dir.end_with? '/'
|
|
1223
1227
|
dir = dir[1..-1] if dir.start_with? '/'
|
|
1224
1228
|
|
|
1225
|
-
@
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
+
@datafiles.upload_multiple_files_optimized(
|
|
1230
|
+
@files,
|
|
1231
|
+
@commit,
|
|
1232
|
+
force: force,
|
|
1233
|
+
chunk_size: chunk_size,
|
|
1234
|
+
prefix: dir,
|
|
1235
|
+
threads: threads
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
# This is for backwards compatibility only and should be removed in future versions:
|
|
1239
|
+
res = @datafiles.put_commit(@commit)
|
|
1240
|
+
unless res.is_success?
|
|
1241
|
+
raise SignalException.new(1, res.msg)
|
|
1229
1242
|
end
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
res = @datafiles.end_commit(@commit,false, success: true )
|
|
1237
|
-
msg = res['result']
|
|
1238
|
-
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1239
|
-
unless response.is_success?
|
|
1240
|
-
raise SignalException.new(1, res.msg)
|
|
1241
|
-
end
|
|
1243
|
+
|
|
1244
|
+
res = @datafiles.end_commit(@commit,false, success: true, commit_type: "put")
|
|
1245
|
+
msg = res['result']
|
|
1246
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1247
|
+
unless response.is_success?
|
|
1248
|
+
raise SignalException.new(1, res.msg)
|
|
1242
1249
|
end
|
|
1250
|
+
|
|
1243
1251
|
log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1244
1252
|
rescue SignalException => e
|
|
1245
1253
|
log_message(e.message, Thor::Shell::Color::RED)
|
|
@@ -1248,7 +1256,49 @@ module Cnvrg
|
|
|
1248
1256
|
end
|
|
1249
1257
|
|
|
1250
1258
|
|
|
1259
|
+
desc '', '', :hide => true
|
|
1260
|
+
def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
|
|
1261
|
+
begin
|
|
1262
|
+
verify_logged_in(false)
|
|
1263
|
+
log_start(__method__, args, options)
|
|
1251
1264
|
|
|
1265
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1266
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1267
|
+
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1268
|
+
|
|
1269
|
+
# Init a new commit
|
|
1270
|
+
response = @datafiles.start_commit(false, true, chunks: 1, message: message )
|
|
1271
|
+
unless response #means we failed in the start commit.
|
|
1272
|
+
raise SignalException.new(1, "Cant put files into dataset, check the dataset id")
|
|
1273
|
+
end
|
|
1274
|
+
@commit = response['result']['commit_sha1']
|
|
1275
|
+
files_to_delete, folders_to_delete, job_id = @datafiles.delete_multiple_files(@commit, regex_list)
|
|
1276
|
+
log_message("Deleting #{files_to_delete} files and #{folders_to_delete} folders", Thor::Shell::Color::GREEN)
|
|
1277
|
+
|
|
1278
|
+
total_files = files_to_delete + folders_to_delete
|
|
1279
|
+
current_progress = 0
|
|
1280
|
+
progressbar = @datafiles.create_progressbar("Delete Progress", total_files)
|
|
1281
|
+
chunk_size = 1000
|
|
1282
|
+
offset = 0
|
|
1283
|
+
while current_progress < total_files
|
|
1284
|
+
current_progress = @datafiles.delete_file_chunk(@commit, regex_list, chunk_size, offset)
|
|
1285
|
+
progressbar.progress = current_progress
|
|
1286
|
+
offset += chunk_size
|
|
1287
|
+
end
|
|
1288
|
+
|
|
1289
|
+
res = @datafiles.end_commit(@commit,false, success: true)
|
|
1290
|
+
msg = res['result']
|
|
1291
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1292
|
+
unless response.is_success?
|
|
1293
|
+
raise SignalException.new(1, res.msg)
|
|
1294
|
+
end
|
|
1295
|
+
|
|
1296
|
+
log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1297
|
+
rescue SignalException => e
|
|
1298
|
+
log_message(e.message, Thor::Shell::Color::RED)
|
|
1299
|
+
return false
|
|
1300
|
+
end
|
|
1301
|
+
end
|
|
1252
1302
|
|
|
1253
1303
|
desc 'upload_data', 'Upload data files', :hide => true
|
|
1254
1304
|
method_option :ignore, :type => :array, :aliases => ["-i", "--i"], :desc => "ignore following files"
|
|
@@ -1699,18 +1749,22 @@ module Cnvrg
|
|
|
1699
1749
|
end
|
|
1700
1750
|
|
|
1701
1751
|
desc 'data commits', 'List all commits for a specific dataset', :hide => true
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
verify_logged_in(true)
|
|
1752
|
+
def list_dataset_commits(dataset_url, commit_sha1: nil)
|
|
1753
|
+
verify_logged_in(false)
|
|
1705
1754
|
log_start(__method__, args, options)
|
|
1706
1755
|
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1756
|
+
if dataset_url == "."
|
|
1757
|
+
dataset_dir = is_cnvrg_dir(Dir.pwd)
|
|
1758
|
+
@dataset = Dataset.new(dataset_dir)
|
|
1759
|
+
else
|
|
1760
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1761
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1762
|
+
end
|
|
1763
|
+
|
|
1764
|
+
result = @dataset.list_commits(commit_sha1:commit_sha1)
|
|
1710
1765
|
list = result["result"]["list"]
|
|
1711
1766
|
|
|
1712
1767
|
print_table(list)
|
|
1713
|
-
|
|
1714
1768
|
end
|
|
1715
1769
|
|
|
1716
1770
|
desc 'commits', 'List all commits for a specific Project'
|
|
@@ -1741,17 +1795,17 @@ module Cnvrg
|
|
|
1741
1795
|
|
|
1742
1796
|
|
|
1743
1797
|
desc 'git_clone', 'Clone project'
|
|
1798
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
1744
1799
|
def git_clone(slug, owner)
|
|
1745
1800
|
verify_logged_in(false)
|
|
1746
1801
|
log_start(__method__, args, options)
|
|
1747
|
-
|
|
1802
|
+
project_home = Dir.pwd
|
|
1803
|
+
soft = options["soft"] || false
|
|
1804
|
+
Project.stop_if_project_present(project_home, slug) if soft
|
|
1748
1805
|
clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
|
|
1749
|
-
|
|
1806
|
+
exit 1 if not clone_resp
|
|
1807
|
+
idx_status = Project.new(get_project_home).generate_idx(files:[])
|
|
1750
1808
|
FileUtils.mkdir_p File.join(get_project_home, ENV['CNVRG_OUTPUT_DIR']) if ENV['CNVRG_OUTPUT_DIR'].present?
|
|
1751
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
|
1752
|
-
if @executer.present?
|
|
1753
|
-
@executer.update_git_commit
|
|
1754
|
-
end
|
|
1755
1809
|
end
|
|
1756
1810
|
|
|
1757
1811
|
|
|
@@ -1791,7 +1845,7 @@ module Cnvrg
|
|
|
1791
1845
|
desc 'clone PROJECT_URL', 'Clone project'
|
|
1792
1846
|
method_option :remote, :type => :boolean, :aliases => ["-r", "--r"], :default => false
|
|
1793
1847
|
method_option :commit, :type => :string, :aliases => ["-c", "--c"], :default => nil
|
|
1794
|
-
|
|
1848
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
1795
1849
|
def clone(project_url)
|
|
1796
1850
|
begin
|
|
1797
1851
|
verify_logged_in(false)
|
|
@@ -1801,6 +1855,8 @@ module Cnvrg
|
|
|
1801
1855
|
slug = url_parts[project_index + 1]
|
|
1802
1856
|
owner = url_parts[project_index - 1]
|
|
1803
1857
|
remote = options["remote"] || false
|
|
1858
|
+
soft = options["soft"] || false
|
|
1859
|
+
|
|
1804
1860
|
|
|
1805
1861
|
response = Cnvrg::API.request("users/#{owner}/projects/#{slug}/get_project", 'GET')
|
|
1806
1862
|
Cnvrg::CLI.is_response_success(response)
|
|
@@ -1814,6 +1870,8 @@ module Cnvrg
|
|
|
1814
1870
|
clone_resp = false
|
|
1815
1871
|
project_home = Dir.pwd
|
|
1816
1872
|
|
|
1873
|
+
Project.stop_if_project_present(project_home, project_name) if soft
|
|
1874
|
+
|
|
1817
1875
|
if remote and !git
|
|
1818
1876
|
clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
|
|
1819
1877
|
elsif git
|
|
@@ -1954,8 +2012,6 @@ module Cnvrg
|
|
|
1954
2012
|
method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
|
|
1955
2013
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
|
1956
2014
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
2015
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
|
1960
2016
|
verify_logged_in(true)
|
|
1961
2017
|
log_start(__method__, args, options)
|
|
@@ -1964,11 +2020,13 @@ module Cnvrg
|
|
|
1964
2020
|
# w(verbose=false, new_branch=false,sync=false, commit=nil,all_files=true)
|
|
1965
2021
|
total_deleted, total_downloaded = invoke :download_data_new,[verbose, new_branch, true, commit, all_files], :new_branch=>new_branch, :direct=>false, :force =>force
|
|
1966
2022
|
end
|
|
1967
|
-
|
|
2023
|
+
|
|
1968
2024
|
invoke :upload_data_new,[new_branch, verbose, true, force, tags, chunk_size, message:message, total_deleted: total_deleted, total_downloaded: total_downloaded],
|
|
1969
2025
|
:new_branch=>new_branch, :direct=>false, :force =>force, :sync =>true, :tags =>tags, :parallel => parallel, :message => message
|
|
1970
2026
|
|
|
1971
2027
|
end
|
|
2028
|
+
|
|
2029
|
+
|
|
1972
2030
|
desc 'upload_data_new', 'upload_data_new', :hide => true
|
|
1973
2031
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
|
1974
2032
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
|
@@ -2214,13 +2272,24 @@ module Cnvrg
|
|
|
2214
2272
|
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2215
2273
|
method_option :job_slug, :type => :string, :aliases => ["--job"], :default => nil, :hide=>true
|
|
2216
2274
|
method_option :job_type, :type => :string, :aliases => [ "--job_type"], :default => nil, :hide=>true
|
|
2275
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
|
2276
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
|
2217
2277
|
|
|
2218
|
-
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil)
|
|
2278
|
+
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil, suppress_exceptions = true)
|
|
2219
2279
|
begin
|
|
2220
2280
|
# we are passing "force" twice.. doesnt really make sense :\\
|
|
2221
2281
|
verify_logged_in(true)
|
|
2222
2282
|
log_start(__method__, args, options)
|
|
2223
2283
|
@project = Project.new(get_project_home)
|
|
2284
|
+
|
|
2285
|
+
# Enable local/experiment exception logging
|
|
2286
|
+
suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
|
|
2287
|
+
if in_exp
|
|
2288
|
+
exp_obj = Experiment.new(@project.owner, @project.slug, job_id: job_slug)
|
|
2289
|
+
else
|
|
2290
|
+
exp_obj = nil
|
|
2291
|
+
end
|
|
2292
|
+
|
|
2224
2293
|
commit_msg = options["message"]
|
|
2225
2294
|
if commit_msg.nil? or commit_msg.empty?
|
|
2226
2295
|
commit_msg = ""
|
|
@@ -2292,8 +2361,6 @@ module Cnvrg
|
|
|
2292
2361
|
end
|
|
2293
2362
|
update_count = 0
|
|
2294
2363
|
update_total = result["added"].size + result["updated_on_local"].size + result["deleted"].size
|
|
2295
|
-
successful_updates = []
|
|
2296
|
-
successful_deletions = []
|
|
2297
2364
|
if options["verbose"]
|
|
2298
2365
|
if update_total == 1
|
|
2299
2366
|
log_message("Updating #{update_total} file", Thor::Shell::Color::BLUE)
|
|
@@ -2313,8 +2380,11 @@ module Cnvrg
|
|
|
2313
2380
|
end
|
|
2314
2381
|
job_type = options['job_type'] || job_type
|
|
2315
2382
|
job_slug = options['job_slug'] || job_slug
|
|
2316
|
-
commit_sha1 = @files.start_commit(
|
|
2317
|
-
|
|
2383
|
+
commit_sha1 = @files.start_commit(
|
|
2384
|
+
new_branch, force: force, exp_start_commit: exp_start_commit,
|
|
2385
|
+
job_type: job_type, job_slug: job_slug, start_commit: current_commit,message: options["message"],
|
|
2386
|
+
debug_mode: options["debug_mode"]
|
|
2387
|
+
)["result"]["commit_sha1"]
|
|
2318
2388
|
# upload / update
|
|
2319
2389
|
# delete
|
|
2320
2390
|
to_upload = result["added"] + result["updated_on_local"]
|
|
@@ -2325,32 +2395,30 @@ module Cnvrg
|
|
|
2325
2395
|
:starting_at => 0,
|
|
2326
2396
|
:total => (to_upload.size + deleted.size),
|
|
2327
2397
|
:autofinish => true)
|
|
2328
|
-
@files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar)
|
|
2329
2398
|
|
|
2330
|
-
@files.
|
|
2399
|
+
buffered_errors = @files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar, suppress_exceptions: suppress_exceptions)
|
|
2400
|
+
@files.delete_files_from_server(deleted, commit_sha1, suppress_exceptions: suppress_exceptions)
|
|
2331
2401
|
|
|
2332
2402
|
progressbar.finish
|
|
2403
|
+
|
|
2404
|
+
if buffered_errors.is_a?(Hash)
|
|
2405
|
+
buffered_errors.keys.each do |file|
|
|
2406
|
+
to_upload.delete(file)
|
|
2407
|
+
Cnvrg::CLI.log_message(buffered_errors[file], 'red')
|
|
2408
|
+
exp_obj.job_log([buffered_errors[file]]) unless exp_obj.nil?
|
|
2409
|
+
end
|
|
2410
|
+
end
|
|
2411
|
+
|
|
2333
2412
|
res = @files.end_commit(commit_sha1, force: force, message: commit_msg)
|
|
2334
2413
|
unless Cnvrg::CLI.is_response_success(res, false)
|
|
2335
2414
|
raise StandardError.new("Cant end commit")
|
|
2336
2415
|
end
|
|
2416
|
+
|
|
2337
2417
|
# save idx
|
|
2338
2418
|
@project.update_idx_with_files_commits!((to_upload + deleted), res["result"]["commit_time"])
|
|
2339
2419
|
@project.update_idx_with_commit!(commit_sha1)
|
|
2340
2420
|
if options["verbose"]
|
|
2341
2421
|
log_message("#{check} Done", Thor::Shell::Color::BLUE)
|
|
2342
|
-
if successful_updates.size > 0
|
|
2343
|
-
successful_updates.flatten!
|
|
2344
|
-
log_message("Updated:", Thor::Shell::Color::GREEN)
|
|
2345
|
-
suc = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
|
2346
|
-
log_message(suc.join("\n"), Thor::Shell::Color::GREEN)
|
|
2347
|
-
end
|
|
2348
|
-
if successful_deletions.size > 0
|
|
2349
|
-
successful_deletions.flatten!
|
|
2350
|
-
log_message("Deleted:", Thor::Shell::Color::GREEN)
|
|
2351
|
-
del = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
|
2352
|
-
log_message(del.join("\n"), Thor::Shell::Color::GREEN)
|
|
2353
|
-
end
|
|
2354
2422
|
log_message("Total of #{update_count} / #{update_total} files.", Thor::Shell::Color::GREEN)
|
|
2355
2423
|
else
|
|
2356
2424
|
if return_id
|
|
@@ -2375,9 +2443,13 @@ module Cnvrg
|
|
|
2375
2443
|
if e.is_a? SignalException
|
|
2376
2444
|
say "\nAborting", Thor::Shell::Color::BLUE
|
|
2377
2445
|
say "\nRolling back all changes", Thor::Shell::Color::BLUE
|
|
2446
|
+
|
|
2447
|
+
exp_obj.job_log(["Aborting", "Rolling back all changes"]) unless exp_obj.nil?
|
|
2378
2448
|
else
|
|
2379
2449
|
log_message(error_message, Thor::Shell::Color::RED)
|
|
2380
2450
|
log_error(e)
|
|
2451
|
+
|
|
2452
|
+
exp_obj.job_log([error_message, e]) unless exp_obj.nil?
|
|
2381
2453
|
end
|
|
2382
2454
|
@files.rollback_commit(commit_sha1) unless commit_sha1.nil?
|
|
2383
2455
|
print_res = {
|
|
@@ -2896,6 +2968,10 @@ module Cnvrg
|
|
|
2896
2968
|
method_option :files, :type => :string, :aliases => ["--files"], :default => nil
|
|
2897
2969
|
method_option :output_dir, :type => :string, :aliases => ["--output_dir"], :default => nil
|
|
2898
2970
|
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2971
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
|
2972
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
|
2973
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2974
|
+
|
|
2899
2975
|
def sync(direct = true)
|
|
2900
2976
|
verify_logged_in(true) if direct
|
|
2901
2977
|
@project = Project.new(get_project_home)
|
|
@@ -2907,16 +2983,20 @@ module Cnvrg
|
|
|
2907
2983
|
is_git = ENV['CNVRG_GIT_PROJECT'] == "true" || @project.is_git
|
|
2908
2984
|
in_exp = options["in_exp"] || (job_slug.present? and job_type.present?)
|
|
2909
2985
|
in_exp = false if job_type.present? and job_type == "NotebookSession"
|
|
2986
|
+
output_dir = options["output_dir"] || ENV['CNVRG_OUTPUT_DIR']
|
|
2987
|
+
|
|
2910
2988
|
run_download = true
|
|
2911
|
-
if
|
|
2989
|
+
if (job_type == "NotebookSession" and is_git) or job_type == "Experiment" or options['force']
|
|
2912
2990
|
run_download = false
|
|
2913
2991
|
end
|
|
2914
|
-
|
|
2992
|
+
|
|
2993
|
+
if run_download or options['debug_mode']
|
|
2915
2994
|
invoke :download, [true, "", in_exp ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true
|
|
2916
2995
|
end
|
|
2917
|
-
invoke :upload, [false, true, direct, "",in_exp,options[:force],
|
|
2996
|
+
invoke :upload, [false, true, direct, "",in_exp,options[:force], output_dir, job_type, job_slug ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true,
|
|
2918
2997
|
:ignore => options[:ignore], :force => options[:force], :message => options[:message], :deploy => options["deploy"], :return_id => options["return_id"],
|
|
2919
|
-
:files => options["files"], :output_dir =>
|
|
2998
|
+
:files => options["files"], :output_dir => output_dir, :job_slug => job_slug, :job_type => job_type, :suppress_exceptions => options["suppress_exceptions"], :debug_mode => options['debug_mode'], :git_diff => options["git_diff"]
|
|
2999
|
+
|
|
2920
3000
|
end
|
|
2921
3001
|
|
|
2922
3002
|
desc 'run cmd', 'Runs an experiment'
|
|
@@ -3061,6 +3141,8 @@ module Cnvrg
|
|
|
3061
3141
|
method_option :data, :type => :string, :aliases => ["-d", "--data"], :default => ""
|
|
3062
3142
|
method_option :data_commit, :type => :string, :aliases => ["-dc", "--data_commit"], :default => ""
|
|
3063
3143
|
method_option :ignore, :type => :string, :aliases => ["-i", "--ignore"], :desc => "ignore following files", :default => ""
|
|
3144
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch", :default => ""
|
|
3145
|
+
method_option :gpu_util_from_docker, :type => :boolean, :aliases => ["--gpu-util-from-docker"], :desc => "take gpu utilization from job docker", :default => false
|
|
3064
3146
|
method_option :remote, :type => :boolean, :aliases => ["--remote"], :default => false
|
|
3065
3147
|
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
|
3066
3148
|
method_option :force, :type => :boolean, :aliases => ["-f", "--force"], :default => false
|
|
@@ -3068,6 +3150,7 @@ module Cnvrg
|
|
|
3068
3150
|
method_option :periodic_sync, :type => :string, :aliases => ["-ps", "--periodic_sync"], :default => ""
|
|
3069
3151
|
method_option :output_dir, :type => :string, :aliases => ["-o", "--output_dir"], :default => nil
|
|
3070
3152
|
method_option :data_query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
|
3153
|
+
method_option :use_bash, :type => :boolean, :aliases => ["-b", "--use_bash"], :default => false
|
|
3071
3154
|
|
|
3072
3155
|
def exec(*cmd)
|
|
3073
3156
|
log = []
|
|
@@ -3134,8 +3217,12 @@ module Cnvrg
|
|
|
3134
3217
|
end
|
|
3135
3218
|
remote = options["remote"]
|
|
3136
3219
|
if remote
|
|
3137
|
-
docker_id
|
|
3138
|
-
|
|
3220
|
+
if options["docker_id"].present?
|
|
3221
|
+
docker_id = options["docker_id"]
|
|
3222
|
+
else
|
|
3223
|
+
docker_id = `cat /etc/hostname`
|
|
3224
|
+
docker_id = docker_id.strip()
|
|
3225
|
+
end
|
|
3139
3226
|
end
|
|
3140
3227
|
is_on_gpu = options["gpu"]
|
|
3141
3228
|
start_commit = @project.last_local_commit
|
|
@@ -3145,9 +3232,9 @@ module Cnvrg
|
|
|
3145
3232
|
|
|
3146
3233
|
platform = RUBY_PLATFORM
|
|
3147
3234
|
machine_name = Socket.gethostname
|
|
3235
|
+
machine_activity_slug = ENV["CNVRG_MACHINE_ACTIVITY"]
|
|
3148
3236
|
begin
|
|
3149
|
-
|
|
3150
|
-
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity, script_path, sync_before_terminate, periodic_sync)
|
|
3237
|
+
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity_slug, script_path, sync_before_terminate, periodic_sync)
|
|
3151
3238
|
log_message("Experiment's live results: #{Cnvrg::Helpers.remote_url}/#{@project.owner}/projects/#{@project.slug}/experiments/#{@exp.slug}", Thor::Shell::Color::GREEN)
|
|
3152
3239
|
log_message("Running: #{cmd}\n", Thor::Shell::Color::BLUE)
|
|
3153
3240
|
unless @exp.slug.nil?
|
|
@@ -3165,7 +3252,7 @@ module Cnvrg
|
|
|
3165
3252
|
begin
|
|
3166
3253
|
stats = remote ? usage_metrics_in_docker(docker_id) : Helpers.ubuntu? ? {memory: memory_usage, cpu: cpu_usage} : {}
|
|
3167
3254
|
if is_on_gpu
|
|
3168
|
-
gu = gpu_util
|
|
3255
|
+
gu = gpu_util(take_from_docker: options["gpu_util_from_docker"], docker_id: docker_id)
|
|
3169
3256
|
stats['gpu_util'] = gu[0]
|
|
3170
3257
|
stats['gpu'] = gu[1]
|
|
3171
3258
|
end
|
|
@@ -3177,6 +3264,16 @@ module Cnvrg
|
|
|
3177
3264
|
end
|
|
3178
3265
|
end
|
|
3179
3266
|
start_time = Time.now
|
|
3267
|
+
shell_type = options["use_bash"] ? "bash -l" : "sh"
|
|
3268
|
+
if @exp.get_cmd.present?
|
|
3269
|
+
cmd = @exp.get_cmd
|
|
3270
|
+
if options["docker_id"].present? # Escape for docker exec
|
|
3271
|
+
cmd = cmd.gsub("\"", "\\\"")
|
|
3272
|
+
end
|
|
3273
|
+
end
|
|
3274
|
+
if options["docker_id"].present?
|
|
3275
|
+
cmd = "docker exec -it #{options["docker_id"]} #{shell_type} -c \"#{cmd}\""
|
|
3276
|
+
end
|
|
3180
3277
|
PTY.spawn(@exp.as_env, cmd) do |stdout, stdin, pid, stderr|
|
|
3181
3278
|
begin
|
|
3182
3279
|
stdout.each do |line|
|
|
@@ -3191,7 +3288,7 @@ module Cnvrg
|
|
|
3191
3288
|
puts line
|
|
3192
3289
|
end
|
|
3193
3290
|
log << cur_log
|
|
3194
|
-
if log.size >=
|
|
3291
|
+
if log.size >= 1
|
|
3195
3292
|
@exp.upload_temp_log(log) unless log.empty?
|
|
3196
3293
|
log = []
|
|
3197
3294
|
elsif (start_time + 15.seconds) <= Time.now
|
|
@@ -3241,29 +3338,26 @@ module Cnvrg
|
|
|
3241
3338
|
exp_success = false
|
|
3242
3339
|
end
|
|
3243
3340
|
|
|
3244
|
-
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
# invoke :upload, [false, false, true, ignore, true, true], :output_dir => output_dir, :force=>true, :job_type=>'Experiment', :job_slug=>@exp.slug
|
|
3252
|
-
end
|
|
3253
|
-
else
|
|
3254
|
-
upload(false, false, true, ignore, true, true,nil,"Experiment",@exp.slug )
|
|
3255
|
-
|
|
3256
|
-
# invoke :upload, [false, false, true, ignore,true, true], :job_type=>'Experiment', :job_slug=>@exp.slug, :force=>true
|
|
3341
|
+
if sync_after
|
|
3342
|
+
@exp.job_log(["Syncing Experiment"])
|
|
3343
|
+
# Sync after run
|
|
3344
|
+
if @project.is_git
|
|
3345
|
+
output_dir = output_dir || @exp.output_dir
|
|
3346
|
+
if output_dir.present?
|
|
3347
|
+
upload(false, false, true, ignore, true, true, output_dir, "Experiment", @exp.slug, true )
|
|
3257
3348
|
end
|
|
3258
|
-
|
|
3349
|
+
else
|
|
3350
|
+
upload(false, false, true, ignore, true, true, nil, "Experiment", @exp.slug, true )
|
|
3259
3351
|
end
|
|
3352
|
+
end
|
|
3353
|
+
|
|
3260
3354
|
end_commit = @project.last_local_commit
|
|
3261
3355
|
if end_commit.present?
|
|
3262
3356
|
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
|
3263
3357
|
end
|
|
3264
3358
|
|
|
3265
3359
|
# log_thread.join
|
|
3266
|
-
|
|
3360
|
+
stats_thread.join
|
|
3267
3361
|
|
|
3268
3362
|
res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average, end_time: end_time)
|
|
3269
3363
|
|
|
@@ -3411,8 +3505,8 @@ module Cnvrg
|
|
|
3411
3505
|
local_folders_options = options["local_folders"]
|
|
3412
3506
|
options_hash.except!("schedule", "recurring", "machine_type", "image", "upload_output", "grid", "data", "data_commit", "title",
|
|
3413
3507
|
"local", "small", "medium", "large", "gpu", "gpuxl", "gpuxxl","max_time","dataset_only_tree",
|
|
3414
|
-
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
|
3415
|
-
"email_notification_error", "email_notification_success", "emails")
|
|
3508
|
+
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
|
3509
|
+
"requirements", "prerun", "email_notification_error", "email_notification_success", "emails")
|
|
3416
3510
|
exec_options = options_hash.map {|x| "--#{x[0]}=#{x[1]}"}.flatten.join(" ")
|
|
3417
3511
|
command = "#{exec_options} #{remote} #{upload_output_option} #{cmd.flatten.join(" ")}"
|
|
3418
3512
|
commit_to_run = options["commit"] || nil
|
|
@@ -4237,144 +4331,6 @@ module Cnvrg
|
|
|
4237
4331
|
|
|
4238
4332
|
end
|
|
4239
4333
|
|
|
4240
|
-
method_option :small, :type => :boolean, :aliases => ["-sm", "--small"], :default => false
|
|
4241
|
-
method_option :medium, :type => :boolean, :aliases => ["-md", "--medium"], :default => false
|
|
4242
|
-
method_option :large, :type => :boolean, :aliases => ["-lg", "--large"], :default => false
|
|
4243
|
-
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
|
4244
|
-
method_option :gpuxl, :type => :boolean, :aliases => ["--gpuxl"], :default => false
|
|
4245
|
-
method_option :gpuxxl, :type => :boolean, :aliases => ["--gpuxxl"], :default => false
|
|
4246
|
-
method_option :image, :type => :string, :aliases => ["-i", "--image"], :default => ""
|
|
4247
|
-
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
|
4248
|
-
method_option :base, :type => :boolean, :aliases => ["-b", "--base"], :default => false
|
|
4249
|
-
method_option :python3, :type => :boolean, :aliases => ["--python3"], :default => false
|
|
4250
|
-
method_option :docker_path, :type => :string, :aliases => ["--docker_path"], :default => ""
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
desc 'create_custom_image', 'run commands inside containers', :hide => true
|
|
4254
|
-
|
|
4255
|
-
def build_image(image_name)
|
|
4256
|
-
begin
|
|
4257
|
-
verify_logged_in(false)
|
|
4258
|
-
log_start(__method__, args, options)
|
|
4259
|
-
instances = {"small" => options["small"], "medium" => options["medium"], "large" => options["large"],
|
|
4260
|
-
"gpu" => options["gpu"], "gpuxl" => options["gpuxl"], "gpuxxl" => options["gpuxxl"]}
|
|
4261
|
-
instance_type = get_instance_type(instances)
|
|
4262
|
-
image_extend = options["image"]
|
|
4263
|
-
public = options["public"]
|
|
4264
|
-
base = options["base"]
|
|
4265
|
-
python3 = options["python3"]
|
|
4266
|
-
docker_path = options["docker_path"]
|
|
4267
|
-
owner = CLI.get_owner
|
|
4268
|
-
checks = Helpers.checkmark()
|
|
4269
|
-
tar_path = nil
|
|
4270
|
-
if !docker_path.nil? and !docker_path.empty?
|
|
4271
|
-
docker_path = File.absolute_path(docker_path)
|
|
4272
|
-
#create tar of the docker path: it could be a docker file, and it could be a docker folder
|
|
4273
|
-
tar_path = File.expand_path('~') + "/.cnvrg/tmp/docker_#{File.basename docker_path}.tar.gz"
|
|
4274
|
-
resp = create_docker_tar(docker_path, tar_path)
|
|
4275
|
-
if !resp
|
|
4276
|
-
log_message("Couldn't create tar from docker path", Thor::Shell::Color::RED)
|
|
4277
|
-
FileUtils.rm_rf tar_path
|
|
4278
|
-
exit(1)
|
|
4279
|
-
end
|
|
4280
|
-
files = Cnvrg::Files.new(owner, "")
|
|
4281
|
-
resp = Images.create_new_custom_image_with_docker(instance_type, owner, image_name, public, base, image_extend, python3, tar_path, files)
|
|
4282
|
-
if resp
|
|
4283
|
-
end
|
|
4284
|
-
else
|
|
4285
|
-
log_message("Creating machine for your custom image, this may take a few moments...", Thor::Shell::Color::BLUE)
|
|
4286
|
-
resp = Images.create_new_custom_image(instance_type, owner, image_name, public, base, image_extend, python3, nil)
|
|
4287
|
-
|
|
4288
|
-
end
|
|
4289
|
-
|
|
4290
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
|
4291
|
-
image_slug = resp["result"]["slug"]
|
|
4292
|
-
container = resp["result"]["machine_c"]
|
|
4293
|
-
log_message("#{checks} Created image and machine successfully", Thor::Shell::Color::GREEN)
|
|
4294
|
-
log_message("Connecting to machine", Thor::Shell::Color::BLUE)
|
|
4295
|
-
ssh = Ssh.new(resp)
|
|
4296
|
-
if !ssh.is_ssh
|
|
4297
|
-
log_message("Couldn't connect to machine,aborting", Thor::Shell::Color::RED)
|
|
4298
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4299
|
-
end
|
|
4300
|
-
log_message("run command until ctrl + c or quit is initiated", Thor::Shell::Color::BLUE)
|
|
4301
|
-
begin
|
|
4302
|
-
logs = []
|
|
4303
|
-
|
|
4304
|
-
while true
|
|
4305
|
-
command = ask("$>")
|
|
4306
|
-
logs << {time: Time.now,
|
|
4307
|
-
message: command,
|
|
4308
|
-
type: "stdout"
|
|
4309
|
-
}
|
|
4310
|
-
if command.eql? "quit"
|
|
4311
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
|
4312
|
-
break
|
|
4313
|
-
end
|
|
4314
|
-
res = ssh.exec_command(command)
|
|
4315
|
-
begin
|
|
4316
|
-
res_parsed = JSON.parse(res)
|
|
4317
|
-
res = res_parsed.join(",")
|
|
4318
|
-
end
|
|
4319
|
-
|
|
4320
|
-
puts res
|
|
4321
|
-
logs << {time: Time.now,
|
|
4322
|
-
message: res,
|
|
4323
|
-
type: "stdout"
|
|
4324
|
-
}
|
|
4325
|
-
logs.flatten!
|
|
4326
|
-
|
|
4327
|
-
end
|
|
4328
|
-
|
|
4329
|
-
rescue SignalException
|
|
4330
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
|
4331
|
-
|
|
4332
|
-
end
|
|
4333
|
-
resp = Images.commit_custom_image(owner, image_slug, logs)
|
|
4334
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
|
4335
|
-
log_message("#{checks} Image commited successfuly, email will be sent when image is ready", Thor::Shell::Color::GREEN)
|
|
4336
|
-
else
|
|
4337
|
-
if image_slug
|
|
4338
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4339
|
-
end
|
|
4340
|
-
if ssh
|
|
4341
|
-
ssh.close_ssh()
|
|
4342
|
-
end
|
|
4343
|
-
log_message("Image couldn't be commited, rolling back changes", Thor::Shell::Color::RED)
|
|
4344
|
-
|
|
4345
|
-
exit(1)
|
|
4346
|
-
end
|
|
4347
|
-
if ssh
|
|
4348
|
-
ssh.close_ssh()
|
|
4349
|
-
end
|
|
4350
|
-
|
|
4351
|
-
|
|
4352
|
-
end
|
|
4353
|
-
rescue => e
|
|
4354
|
-
log_message("Error occurd, aborting", Thor::Shell::Color::RED)
|
|
4355
|
-
|
|
4356
|
-
log_error(e)
|
|
4357
|
-
if image_slug
|
|
4358
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4359
|
-
end
|
|
4360
|
-
if ssh
|
|
4361
|
-
ssh.close_ssh()
|
|
4362
|
-
end
|
|
4363
|
-
|
|
4364
|
-
|
|
4365
|
-
rescue SignalException
|
|
4366
|
-
if image_slug
|
|
4367
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4368
|
-
end
|
|
4369
|
-
if ssh
|
|
4370
|
-
ssh.close_ssh
|
|
4371
|
-
end
|
|
4372
|
-
say "\nAborting"
|
|
4373
|
-
exit(1)
|
|
4374
|
-
end
|
|
4375
|
-
|
|
4376
|
-
end
|
|
4377
|
-
|
|
4378
4334
|
|
|
4379
4335
|
desc 'build', 'run commands inside containers', :hide => true
|
|
4380
4336
|
method_option :install, :type => :string, :aliases => ["--i"], :default => nil, :desc => "Install from the given instructions file"
|
|
@@ -4568,66 +4524,7 @@ module Cnvrg
|
|
|
4568
4524
|
end
|
|
4569
4525
|
|
|
4570
4526
|
|
|
4571
|
-
desc 'upload_image', 'commit notebook changes to create a new notebook image', :hide =>true
|
|
4572
|
-
|
|
4573
|
-
def upload_image_old(image_id, is_public, is_base, *message)
|
|
4574
|
-
verify_logged_in(true)
|
|
4575
|
-
log_start(__method__, args, options)
|
|
4576
|
-
image = Docker::Image.get(image_id)
|
|
4577
|
-
project_home = get_project_home
|
|
4578
|
-
@project = Project.new(project_home)
|
|
4579
|
-
last_local_commit = @project.last_local_commit
|
|
4580
|
-
image_name = @project.slug + "#{last_local_commit}"
|
|
4581
|
-
path = File.expand_path('~') + "/.cnvrg/tmp/#{image_name}.tar"
|
|
4582
|
-
owner = Cnvrg::CLI.get_owner()
|
|
4583
|
-
if !message.nil? or !message.empty?
|
|
4584
|
-
message = message.join(" ")
|
|
4585
|
-
end
|
|
4586
|
-
|
|
4587
|
-
log_message("Saving image's current state", Thor::Shell::Color::BLUE)
|
|
4588
|
-
image.save(path)
|
|
4589
|
-
|
|
4590
|
-
begin
|
|
4591
|
-
log_message("Compressing image file to upload", Thor::Shell::Color::BLUE)
|
|
4592
|
-
gzipRes = system("gzip -f #{path}")
|
|
4593
|
-
if !gzipRes
|
|
4594
|
-
|
|
4595
|
-
log_message("Couldn't create tar file from image", Thor::Shell::Color::RED)
|
|
4596
|
-
exit(1)
|
|
4597
|
-
end
|
|
4598
|
-
path = path + ".gz"
|
|
4599
|
-
@files = Cnvrg::Files.new(owner, "")
|
|
4600
|
-
|
|
4601
|
-
exit_status = $?.exitstatus
|
|
4602
|
-
if exit_status == 0
|
|
4603
|
-
log_message("Uploading image file", Thor::Shell::Color::BLUE)
|
|
4604
|
-
|
|
4605
|
-
diff = container_changes(Dir.pwd)
|
|
4606
|
-
res = @files.upload_image(path, image_name, owner, is_public, is_base, diff[1], diff[0], diff[2], message, image.commit_id)
|
|
4607
|
-
if res
|
|
4608
|
-
File.delete(path)
|
|
4609
|
-
image_loc = is_project_with_docker(Dir.pwd)
|
|
4610
|
-
image_loc.update_slug(res["result"]["id"])
|
|
4611
|
-
|
|
4612
|
-
checks = Helpers.checkmark()
|
|
4613
|
-
log_message("#{checks} Done", Thor::Shell::Color::GREEN)
|
|
4614
|
-
else
|
|
4615
|
-
log_message("Couldn't upload image", Thor::Shell::Color::RED)
|
|
4616
|
-
|
|
4617
|
-
end
|
|
4618
|
-
else
|
|
4619
|
-
log_message("Couldn't create image file for: #{image_name}", Thor::Shell::Color::RED)
|
|
4620
|
-
exit(1)
|
|
4621
|
-
end
|
|
4622
|
-
rescue => e
|
|
4623
|
-
log_message("Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED)
|
|
4624
|
-
log_error(e)
|
|
4625
|
-
rescue SignalException
|
|
4626
4527
|
|
|
4627
|
-
say "Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED
|
|
4628
|
-
exit(1)
|
|
4629
|
-
end
|
|
4630
|
-
end
|
|
4631
4528
|
|
|
4632
4529
|
desc '', '', :hide => true
|
|
4633
4530
|
|
|
@@ -4638,278 +4535,30 @@ module Cnvrg
|
|
|
4638
4535
|
|
|
4639
4536
|
end
|
|
4640
4537
|
|
|
4641
|
-
desc '', '', :hide => true
|
|
4642
|
-
|
|
4643
|
-
|
|
4644
|
-
|
|
4645
|
-
|
|
4646
|
-
|
|
4647
|
-
|
|
4648
|
-
|
|
4649
|
-
|
|
4650
|
-
|
|
4651
|
-
|
|
4652
|
-
|
|
4653
|
-
|
|
4654
|
-
|
|
4655
|
-
|
|
4656
|
-
|
|
4657
|
-
|
|
4658
|
-
|
|
4659
|
-
|
|
4660
|
-
|
|
4661
|
-
def tensor_port_container(container_id)
|
|
4662
|
-
container = Docker::Container.get(container_id)
|
|
4663
|
-
say container.json["HostConfig"]["PortBindings"]["6006/tcp"][0]["HostPort"]
|
|
4664
|
-
end
|
|
4665
|
-
|
|
4666
|
-
desc '', '', :hide => true
|
|
4667
|
-
|
|
4668
|
-
def stop_container(container_id)
|
|
4669
|
-
container = Docker::Container.get(container_id)
|
|
4670
|
-
container.stop()
|
|
4671
|
-
container.remove()
|
|
4672
|
-
|
|
4673
|
-
end
|
|
4674
|
-
|
|
4675
|
-
desc '', '', :hide => true
|
|
4676
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4677
|
-
method_option :app_dir, :type => :string, :aliases => ["-d"], :default => "/home/ds/notebooks"
|
|
4678
|
-
method_option :cmd, :type => :string, :aliases => ["-c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
|
4679
|
-
|
|
4680
|
-
|
|
4681
|
-
def config_remote(image_name, port = 7654, tensport = 6006)
|
|
4682
|
-
local_images = Docker::Image.all
|
|
4683
|
-
|
|
4684
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4685
|
-
if docker_image_local.empty?
|
|
4686
|
-
say "no image"
|
|
4687
|
-
exit(1)
|
|
4688
|
-
end
|
|
4689
|
-
|
|
4690
|
-
begin
|
|
4691
|
-
login_content = options["login"]
|
|
4692
|
-
app_dir = options["app_dir"]
|
|
4693
|
-
cmd = options["cmd"]
|
|
4694
|
-
volume_from = options["volume"]
|
|
4695
|
-
|
|
4696
|
-
image_settings = {
|
|
4697
|
-
'Image' => "#{image_name}:latest",
|
|
4698
|
-
|
|
4699
|
-
'Cmd' => cmd,
|
|
4700
|
-
'WorkingDir' => app_dir,
|
|
4701
|
-
'ExposedPorts' => {
|
|
4702
|
-
'8888/tcp' => {},
|
|
4703
|
-
},
|
|
4704
|
-
'HostConfig' => {
|
|
4705
|
-
'Binds' => ["/var/run/docker.sock:/var/run/docker.sock", "/usr/bin/docker:/usr/bin/docker"],
|
|
4706
|
-
'PortBindings' => {
|
|
4707
|
-
'8888/tcp' => [
|
|
4708
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4709
|
-
],
|
|
4710
|
-
'6006/tcp' => [
|
|
4711
|
-
{'HostPort' => "#{tensport}", 'HostIp' => 'localhost'}
|
|
4712
|
-
],
|
|
4713
|
-
},
|
|
4714
|
-
},
|
|
4715
|
-
}
|
|
4716
|
-
container = Docker::Container.create(image_settings)
|
|
4717
|
-
container.start()
|
|
4718
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4719
|
-
container.exec(command, tty: true)
|
|
4720
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
|
4721
|
-
# container.exec(command, tty: true)
|
|
4722
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
|
4723
|
-
# container.exec(command, tty: true)
|
|
4724
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4725
|
-
container.exec(command, tty: true)
|
|
4726
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4727
|
-
container.exec(command, tty: true)
|
|
4728
|
-
say "#{container.id}:#{port}##{tensport}"
|
|
4729
|
-
rescue => e
|
|
4730
|
-
puts e
|
|
4731
|
-
if e.message.include? "is not running"
|
|
4732
|
-
return config_remote(image_name, port - 1, tensport - 1)
|
|
4733
|
-
end
|
|
4734
|
-
|
|
4735
|
-
if container
|
|
4736
|
-
container.kill()
|
|
4737
|
-
end
|
|
4738
|
-
return false
|
|
4739
|
-
end
|
|
4740
|
-
end
|
|
4741
|
-
|
|
4742
|
-
|
|
4743
|
-
desc '', '', :hide => true
|
|
4744
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4745
|
-
|
|
4746
|
-
def config_netrc(container)
|
|
4747
|
-
|
|
4748
|
-
login_content = options["login"]
|
|
4749
|
-
|
|
4750
|
-
container = Docker::Container.get(container)
|
|
4751
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4752
|
-
container.exec(command, tty: true)
|
|
4753
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4754
|
-
container.exec(command, tty: true)
|
|
4755
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4756
|
-
container.exec(command, tty: true)
|
|
4757
|
-
say "OK"
|
|
4758
|
-
|
|
4759
|
-
end
|
|
4760
|
-
|
|
4761
|
-
desc '', '', :hide => true
|
|
4762
|
-
method_option :login, :type => :string, :aliases => ["-l", "--l"], :default => ""
|
|
4763
|
-
method_option :app_dir, :type => :string, :aliases => ["-d", "--d"], :default => "/home/ds/notebooks"
|
|
4764
|
-
method_option :cmd, :type => :string, :aliases => ["-c", "--c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
|
4765
|
-
|
|
4766
|
-
|
|
4767
|
-
def config_remote_gpu(image_name, port = 7654, tensport = 6006)
|
|
4768
|
-
local_images = Docker::Image.all
|
|
4769
|
-
|
|
4770
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4771
|
-
if docker_image_local.empty?
|
|
4772
|
-
say "no image"
|
|
4773
|
-
exit(1)
|
|
4774
|
-
end
|
|
4775
|
-
|
|
4776
|
-
begin
|
|
4777
|
-
login_content = options["login"]
|
|
4778
|
-
app_dir = options["app_dir"]
|
|
4779
|
-
cmd = options["cmd"]
|
|
4780
|
-
|
|
4781
|
-
# image_settings = {
|
|
4782
|
-
# 'Image' => "#{image_name}:latest",
|
|
4783
|
-
# 'User' => 'ds',
|
|
4784
|
-
# 'Cmd' => cmd,
|
|
4785
|
-
# 'WorkingDir' => app_dir,
|
|
4786
|
-
# 'ExposedPorts' => {
|
|
4787
|
-
# '8888/tcp' => {},
|
|
4788
|
-
# },
|
|
4789
|
-
# 'HostConfig' => {
|
|
4790
|
-
# 'PortBindings' => {
|
|
4791
|
-
# '8888/tcp' => [
|
|
4792
|
-
# {'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4793
|
-
# ],
|
|
4794
|
-
# '6006/tcp' => [
|
|
4795
|
-
# {'HostPort' => "6006", 'HostIp' => 'localhost'}
|
|
4796
|
-
# ],
|
|
4797
|
-
# },
|
|
4798
|
-
# },
|
|
4799
|
-
# }
|
|
4800
|
-
|
|
4801
|
-
container_id = `nvidia-docker run -itd -p #{port}:8888 -p #{tensport}:6006 -w #{app_dir} -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker #{image_name}:latest #{cmd} `
|
|
4802
|
-
container_id = container_id.gsub("\n", "")
|
|
4803
|
-
container = Docker::Container.get(container_id)
|
|
4804
|
-
# container.start()
|
|
4805
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4806
|
-
container.exec(command, tty: true)
|
|
4807
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4808
|
-
container.exec(command, tty: true)
|
|
4809
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4810
|
-
container.exec(command, tty: true)
|
|
4811
|
-
say "#{container.id}:#{port}##{tensport}"
|
|
4812
|
-
rescue => e
|
|
4813
|
-
if e.message.include? "is not running"
|
|
4814
|
-
puts "running asgain with: #{port - 1} #{tensport - 1}"
|
|
4815
|
-
return config_remote_gpu(image_name, port - 1, tensport - 1)
|
|
4816
|
-
end
|
|
4817
|
-
|
|
4818
|
-
if container
|
|
4819
|
-
container.kill()
|
|
4538
|
+
desc 'Collect and send job utilization', '', :hide => true
|
|
4539
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch"
|
|
4540
|
+
method_option :is_on_gpu, :type => :boolean, :aliases => ["--is_on_gpu"], :desc => "is on gpu", :default => true
|
|
4541
|
+
def get_utilization()
|
|
4542
|
+
@exp = Experiment.new(ENV['CNVRG_OWNER'], ENV['CNVRG_PROJECT'], job_id: ENV['CNVRG_JOB_ID'])
|
|
4543
|
+
docker_id = options["docker_id"]
|
|
4544
|
+
while true do
|
|
4545
|
+
sleep 30
|
|
4546
|
+
begin
|
|
4547
|
+
stats = usage_metrics_in_docker(docker_id)
|
|
4548
|
+
if options["is_on_gpu"]
|
|
4549
|
+
gu = gpu_util(take_from_docker: true, docker_id: docker_id)
|
|
4550
|
+
stats['gpu_util'] = gu[0]
|
|
4551
|
+
stats['gpu'] = gu[1]
|
|
4552
|
+
end
|
|
4553
|
+
stats['docker_id'] = docker_id
|
|
4554
|
+
@exp.send_machine_stats [stats] unless stats.empty?
|
|
4555
|
+
rescue => e
|
|
4556
|
+
log_error(e)
|
|
4557
|
+
log_message("Failed to upload ongoing stats, continuing with experiment", Thor::Shell::Color::YELLOW)
|
|
4820
4558
|
end
|
|
4821
|
-
return false
|
|
4822
4559
|
end
|
|
4823
4560
|
end
|
|
4824
4561
|
|
|
4825
|
-
desc '', '', :hide => true
|
|
4826
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4827
|
-
|
|
4828
|
-
def config_flask_remote(image_name, port = 80)
|
|
4829
|
-
local_images = Docker::Image.all
|
|
4830
|
-
|
|
4831
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4832
|
-
if docker_image_local.empty?
|
|
4833
|
-
say "no image"
|
|
4834
|
-
exit(1)
|
|
4835
|
-
end
|
|
4836
|
-
|
|
4837
|
-
begin
|
|
4838
|
-
login_content = options["login"]
|
|
4839
|
-
image_settings = {
|
|
4840
|
-
'Image' => "#{image_name}:latest",
|
|
4841
|
-
'User' => 'ds',
|
|
4842
|
-
'Cmd' => '/usr/local/cnvrg/start_super.sh',
|
|
4843
|
-
'WorkingDir' => '/home/ds/app',
|
|
4844
|
-
'ExposedPorts' => {
|
|
4845
|
-
'80/tcp' => {},
|
|
4846
|
-
},
|
|
4847
|
-
'HostConfig' => {
|
|
4848
|
-
'PortBindings' => {
|
|
4849
|
-
'80/tcp' => [
|
|
4850
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4851
|
-
],
|
|
4852
|
-
},
|
|
4853
|
-
},
|
|
4854
|
-
}
|
|
4855
|
-
container = Docker::Container.create(image_settings)
|
|
4856
|
-
container.start()
|
|
4857
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4858
|
-
container.exec(command, tty: true)
|
|
4859
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4860
|
-
container.exec(command, tty: true)
|
|
4861
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4862
|
-
container.exec(command, tty: true)
|
|
4863
|
-
say "#{container.id}:#{port}"
|
|
4864
|
-
rescue => e
|
|
4865
|
-
pus e
|
|
4866
|
-
if e.message.include? "is not running"
|
|
4867
|
-
return "port is taken"
|
|
4868
|
-
end
|
|
4869
|
-
puts "error"
|
|
4870
|
-
if container
|
|
4871
|
-
container.kill()
|
|
4872
|
-
end
|
|
4873
|
-
return false
|
|
4874
|
-
end
|
|
4875
|
-
end
|
|
4876
|
-
|
|
4877
|
-
desc '', '', :hide => true
|
|
4878
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4879
|
-
|
|
4880
|
-
def config_flask_remote_gpu(image_name, port = 80)
|
|
4881
|
-
local_images = Docker::Image.all
|
|
4882
|
-
|
|
4883
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4884
|
-
if docker_image_local.empty?
|
|
4885
|
-
say "no image"
|
|
4886
|
-
exit(1)
|
|
4887
|
-
end
|
|
4888
|
-
|
|
4889
|
-
begin
|
|
4890
|
-
login_content = options["login"]
|
|
4891
|
-
container_id = `nvidia-docker run -itd -p 80:80 -w /home/ds/app #{image_name}:latest /usr/local/cnvrg/start_super.sh`
|
|
4892
|
-
container_id = container_id.gsub("\n", "")
|
|
4893
|
-
container = Docker::Container.get(container_id)
|
|
4894
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4895
|
-
container.exec(command, tty: true)
|
|
4896
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4897
|
-
container.exec(command, tty: true)
|
|
4898
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4899
|
-
container.exec(command, tty: true)
|
|
4900
|
-
say "#{container.id}:#{port}"
|
|
4901
|
-
rescue => e
|
|
4902
|
-
puts e
|
|
4903
|
-
if e.message.include? "is not running"
|
|
4904
|
-
return "port is taken"
|
|
4905
|
-
end
|
|
4906
|
-
puts "error"
|
|
4907
|
-
if container
|
|
4908
|
-
container.kill()
|
|
4909
|
-
end
|
|
4910
|
-
return false
|
|
4911
|
-
end
|
|
4912
|
-
end
|
|
4913
4562
|
|
|
4914
4563
|
desc '', '', :hide => true
|
|
4915
4564
|
|
|
@@ -4935,39 +4584,10 @@ module Cnvrg
|
|
|
4935
4584
|
|
|
4936
4585
|
end
|
|
4937
4586
|
|
|
4938
|
-
desc '
|
|
4939
|
-
|
|
4940
|
-
|
|
4941
|
-
|
|
4942
|
-
method_option :gpu, :type => :boolean, :aliases => ["-g","--gpu"], :default => false
|
|
4943
|
-
def upload_image(image_name,image_path)
|
|
4944
|
-
begin
|
|
4945
|
-
verify_logged_in(false)
|
|
4946
|
-
log_start(__method__, args, options)
|
|
4947
|
-
|
|
4948
|
-
@image = Cnvrg::Images.new()
|
|
4949
|
-
say "Uploading new docker image file", Thor::Shell::Color::BLUE
|
|
4950
|
-
workdir = options[:workdir]
|
|
4951
|
-
description = options[:description]
|
|
4952
|
-
user = options[:user]
|
|
4953
|
-
is_gpu = options[:gpu]
|
|
4954
|
-
res = @image.upload_docker_image(image_path, image_name, workdir, user, description, is_gpu)
|
|
4955
|
-
if res["status"] == 200
|
|
4956
|
-
image_slug = res["id"]
|
|
4957
|
-
owner = CLI.get_owner
|
|
4958
|
-
image_url = "#{Cnvrg::Helpers.remote_url}/#{owner}/settings/images/#{image_slug}"
|
|
4959
|
-
log_message("Successfully uploaded image: #{image_url}", Thor::Shell::Color::GREEN, true)
|
|
4960
|
-
|
|
4961
|
-
|
|
4962
|
-
else
|
|
4963
|
-
log_message("Couldn't upload image: #{image_name}", Thor::Shell::Color::RED, true)
|
|
4964
|
-
|
|
4965
|
-
end
|
|
4966
|
-
rescue => e
|
|
4967
|
-
log_error(e)
|
|
4968
|
-
end
|
|
4969
|
-
|
|
4970
|
-
|
|
4587
|
+
desc 'file_exists', description: '', hide: true
|
|
4588
|
+
def file_exists(file)
|
|
4589
|
+
exit(0) if File.exists? file
|
|
4590
|
+
exit(1)
|
|
4971
4591
|
end
|
|
4972
4592
|
|
|
4973
4593
|
|
|
@@ -5147,29 +4767,40 @@ module Cnvrg
|
|
|
5147
4767
|
method_option :project_slug, :type => :string, :aliases => ["-s"], :desc => "project slug"
|
|
5148
4768
|
method_option :project_owner, :type => :string, :aliases => ["-o"], :desc => "project slug"
|
|
5149
4769
|
method_option :frequency, :type => :numeric, :aliases => ["-f"], :desc => "poll frequency"
|
|
4770
|
+
method_option :fetch_slugs, :type => :boolean, :default => false, :desc => "Fetch experiments slugs to compare"
|
|
5150
4771
|
|
|
5151
4772
|
def compare_experiments
|
|
5152
4773
|
verify_logged_in(true)
|
|
5153
4774
|
log_start(__method__, args, options)
|
|
5154
4775
|
exps_map = {}
|
|
4776
|
+
copied_commits = []
|
|
5155
4777
|
|
|
5156
|
-
if options[:slugs].blank?
|
|
4778
|
+
if options[:slugs].blank? and options[:fetch_slugs].blank?
|
|
5157
4779
|
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
5158
4780
|
return false
|
|
5159
4781
|
end
|
|
5160
|
-
|
|
5161
|
-
|
|
5162
|
-
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
5163
|
-
return false
|
|
4782
|
+
if options[:slugs].present?
|
|
4783
|
+
slugs = options[:slugs].split(",")
|
|
5164
4784
|
end
|
|
4785
|
+
|
|
5165
4786
|
frequency = options[:frequency] || 5
|
|
5166
4787
|
namespace = options[:namespace]
|
|
5167
4788
|
project_dir = is_cnvrg_dir(Dir.pwd)
|
|
5168
4789
|
@project = Project.new(project_home=project_dir, slug: options[:project_slug], owner: options[:project_owner])
|
|
4790
|
+
fetch_slugs = options[:fetch_slugs]
|
|
4791
|
+
webapp_slug = ENV["CNVRG_JOB_ID"]
|
|
4792
|
+
if fetch_slugs and webapp_slug.present?
|
|
4793
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug)
|
|
4794
|
+
end
|
|
4795
|
+
if slugs.blank?
|
|
4796
|
+
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
4797
|
+
return false
|
|
4798
|
+
end
|
|
5169
4799
|
|
|
4800
|
+
log_message("compare is running")
|
|
5170
4801
|
while true
|
|
4802
|
+
log_message("compare is running for slugs #{slugs}")
|
|
5171
4803
|
slugs.each do |exp_slug|
|
|
5172
|
-
|
|
5173
4804
|
begin
|
|
5174
4805
|
if exps_map[exp_slug].blank?
|
|
5175
4806
|
exp = @project.get_experiment(exp_slug)["experiment"]
|
|
@@ -5183,15 +4814,23 @@ module Cnvrg
|
|
|
5183
4814
|
log_message("#{exp_name} has ended, getting files from end commit", Thor::Shell::Color::BLUE)
|
|
5184
4815
|
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project)
|
|
5185
4816
|
exps_map[exp_slug] = exp
|
|
5186
|
-
|
|
4817
|
+
else
|
|
5187
4818
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
|
5188
|
-
Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
|
4819
|
+
success = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
|
4820
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
|
4821
|
+
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
|
4822
|
+
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
|
4823
|
+
copied_commits << exp["last_successful_commit"]
|
|
4824
|
+
end
|
|
5189
4825
|
end
|
|
5190
4826
|
rescue => e
|
|
5191
4827
|
Cnvrg::Logger.log_error(e)
|
|
5192
4828
|
end
|
|
5193
4829
|
end
|
|
5194
4830
|
sleep frequency
|
|
4831
|
+
if fetch_slugs
|
|
4832
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug, slugs: slugs)
|
|
4833
|
+
end
|
|
5195
4834
|
end
|
|
5196
4835
|
end
|
|
5197
4836
|
|
|
@@ -5277,127 +4916,6 @@ module Cnvrg
|
|
|
5277
4916
|
end
|
|
5278
4917
|
|
|
5279
4918
|
|
|
5280
|
-
desc 'pull_image', 'downloads and loads an image', :hide => true
|
|
5281
|
-
|
|
5282
|
-
def pull_image(image_name)
|
|
5283
|
-
begin
|
|
5284
|
-
verify_logged_in(false)
|
|
5285
|
-
log_start(__method__, args, options)
|
|
5286
|
-
owner = Cnvrg::CLI.get_owner()
|
|
5287
|
-
image = Cnvrg::Images.image_exist(owner, image_name)
|
|
5288
|
-
if !image
|
|
5289
|
-
log_message("Couldn't find image in cnvrg repository", Thor::Shell::Color::RED)
|
|
5290
|
-
exit(1)
|
|
5291
|
-
end
|
|
5292
|
-
path = download_image(image_name, image["slug"])
|
|
5293
|
-
if path
|
|
5294
|
-
log_message("Building image", Thor::Shell::Color::BLUE)
|
|
5295
|
-
Docker.options[:read_timeout] = 216000
|
|
5296
|
-
image = Docker::Image.build_from_dir(path, {'dockerfile' => 'Dockerfile.cpu', 't' => "#{image_name}:latest"}) do |v|
|
|
5297
|
-
begin
|
|
5298
|
-
if (log = JSON.parse(v)) && log.has_key?("stream")
|
|
5299
|
-
next if log["stream"].starts_with? "Step"
|
|
5300
|
-
$stdout.puts log["stream"]
|
|
5301
|
-
end
|
|
5302
|
-
rescue
|
|
5303
|
-
end
|
|
5304
|
-
|
|
5305
|
-
end
|
|
5306
|
-
|
|
5307
|
-
if not image.nil?
|
|
5308
|
-
FileUtils.rm_rf(path)
|
|
5309
|
-
checks = Helpers.checkmark()
|
|
5310
|
-
log_message("#{checks} Image built successfully", Thor::Shell::Color::GREEN)
|
|
5311
|
-
return image
|
|
5312
|
-
else
|
|
5313
|
-
|
|
5314
|
-
log_message("Could not build image", Thor::Shell::Color::RED)
|
|
5315
|
-
return false
|
|
5316
|
-
end
|
|
5317
|
-
else
|
|
5318
|
-
|
|
5319
|
-
log_message("Could not download image", Thor::Shell::Color::RED)
|
|
5320
|
-
return false
|
|
5321
|
-
|
|
5322
|
-
|
|
5323
|
-
end
|
|
5324
|
-
|
|
5325
|
-
# else
|
|
5326
|
-
# path = download_image(image_name,image["slug"])
|
|
5327
|
-
# if path
|
|
5328
|
-
# image = Docker::Image.import(path)
|
|
5329
|
-
# image.tag('repo' => image_name, 'tag' => 'latest')
|
|
5330
|
-
# if not image.nil?
|
|
5331
|
-
# say "Finished downloading image, cleaning up..", Thor::Shell::Color::GREEN
|
|
5332
|
-
# FileUtils.rm(path)
|
|
5333
|
-
# checks = Helpers.checkmark()
|
|
5334
|
-
# say "#{checks} Done", Thor::Shell::Color::GREEN
|
|
5335
|
-
# log_end(0)
|
|
5336
|
-
# return image
|
|
5337
|
-
# log_end(0)
|
|
5338
|
-
# else
|
|
5339
|
-
# say "Could not download image", Thor::Shell::Color::RED
|
|
5340
|
-
# return false
|
|
5341
|
-
# end
|
|
5342
|
-
#
|
|
5343
|
-
# end
|
|
5344
|
-
# end
|
|
5345
|
-
rescue => e
|
|
5346
|
-
|
|
5347
|
-
log_message "Error: couldn't build image", Thor::Shell::Color::RED
|
|
5348
|
-
log_error(e)
|
|
5349
|
-
|
|
5350
|
-
rescue SignalException
|
|
5351
|
-
say "\nAborting"
|
|
5352
|
-
exit(1)
|
|
5353
|
-
ensure
|
|
5354
|
-
if path
|
|
5355
|
-
FileUtils.rm_rf(path)
|
|
5356
|
-
|
|
5357
|
-
end
|
|
5358
|
-
end
|
|
5359
|
-
|
|
5360
|
-
|
|
5361
|
-
end
|
|
5362
|
-
|
|
5363
|
-
desc 'set_image', 'set image to a porject', :hide => true
|
|
5364
|
-
|
|
5365
|
-
def set_image(docker_image)
|
|
5366
|
-
verify_logged_in(true)
|
|
5367
|
-
log_start(__method__, args, options)
|
|
5368
|
-
working_dir = is_cnvrg_dir
|
|
5369
|
-
project = Project.new(working_dir)
|
|
5370
|
-
|
|
5371
|
-
local_images = Docker::Image.all
|
|
5372
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.include? docker_image}.flatten
|
|
5373
|
-
if docker_image_local.size == 0
|
|
5374
|
-
|
|
5375
|
-
if yes? "Image wasn't found locally, pull image from cnvrg repository?", Thor::Shell::Color::YELLOW
|
|
5376
|
-
image = pull(docker_image)
|
|
5377
|
-
if image
|
|
5378
|
-
log_message("downloaded image: #{docker_image}", Thor::Shell::Color::BLUE)
|
|
5379
|
-
@image = Images.new(working_dir, docker_image)
|
|
5380
|
-
else
|
|
5381
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
|
5382
|
-
exit(1)
|
|
5383
|
-
end
|
|
5384
|
-
else
|
|
5385
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
|
5386
|
-
exit(1)
|
|
5387
|
-
|
|
5388
|
-
end
|
|
5389
|
-
elsif docker_image_local.size == 1
|
|
5390
|
-
log_message("found image: #{docker_image_local[0]}, setting it up..", Thor::Shell::Color::BLUE)
|
|
5391
|
-
@image = Images.new(working_dir, docker_image_local[0])
|
|
5392
|
-
elsif docker_image_local.size > 1
|
|
5393
|
-
log_message("found #{docker_image_local.size} images, choose the image name you want to use", Thor::Shell::Color::BLUE)
|
|
5394
|
-
image_name = ask "#{docker_image_local.join("\n")}\n", Thor::Shell::Color::BLUE
|
|
5395
|
-
image_name = image_name.strip
|
|
5396
|
-
@image = Images.new(working_dir, image_name)
|
|
5397
|
-
end
|
|
5398
|
-
@image.update_image_activity(project.last_local_commit, nil)
|
|
5399
|
-
end
|
|
5400
|
-
|
|
5401
4919
|
desc 'check_pod_restart', 'Check pod restart', :hide => true
|
|
5402
4920
|
def check_pod_restart
|
|
5403
4921
|
Cnvrg::CLI.new.log_start(__method__, args, options)
|
|
@@ -5672,7 +5190,7 @@ module Cnvrg
|
|
|
5672
5190
|
|
|
5673
5191
|
if dirs.size == 0
|
|
5674
5192
|
log_message("Couldn't find cnvrg directory. Please start a new project", Thor::Shell::Color::RED)
|
|
5675
|
-
|
|
5193
|
+
puts Thread.current.backtrace
|
|
5676
5194
|
exit(1)
|
|
5677
5195
|
end
|
|
5678
5196
|
return dirs.join("/")
|
|
@@ -5775,7 +5293,7 @@ module Cnvrg
|
|
|
5775
5293
|
is_cnvrg = is_cnvrg_dir
|
|
5776
5294
|
if !is_cnvrg
|
|
5777
5295
|
say "You're not in a cnvrg project directory", Thor::Shell::Color::RED
|
|
5778
|
-
exit(
|
|
5296
|
+
exit(1)
|
|
5779
5297
|
end
|
|
5780
5298
|
|
|
5781
5299
|
end
|
|
@@ -5921,21 +5439,6 @@ module Cnvrg
|
|
|
5921
5439
|
|
|
5922
5440
|
end
|
|
5923
5441
|
|
|
5924
|
-
def container_changes(dir)
|
|
5925
|
-
container_id = is_project_with_docker(dir)
|
|
5926
|
-
if not container_id
|
|
5927
|
-
return false
|
|
5928
|
-
end
|
|
5929
|
-
container = Docker::Container.get(container_id)
|
|
5930
|
-
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
|
5931
|
-
pip = container.exec(command, tty: true)[0]
|
|
5932
|
-
command = ["/bin/bash", "-lc", "dpkg -l"]
|
|
5933
|
-
dpkg = container.exec(command, tty: true)[0]
|
|
5934
|
-
command = ["/bin/bash", "-lc", "cat /home/ds/.bash_history"]
|
|
5935
|
-
history = container.exec(command, tty: true)[0]
|
|
5936
|
-
diff = [pip, dpkg, history]
|
|
5937
|
-
return diff
|
|
5938
|
-
end
|
|
5939
5442
|
|
|
5940
5443
|
def is_port_taken(ip = Cnvrg::CLI::IP, port = Cnvrg::CLI::PORT, seconds = 1)
|
|
5941
5444
|
Timeout::timeout(seconds) do
|
|
@@ -6118,13 +5621,17 @@ module Cnvrg
|
|
|
6118
5621
|
|
|
6119
5622
|
end
|
|
6120
5623
|
|
|
6121
|
-
def gpu_util
|
|
5624
|
+
def gpu_util(take_from_docker: false, docker_id: nil)
|
|
6122
5625
|
if !Helpers.ubuntu?
|
|
6123
5626
|
return 0.0
|
|
6124
5627
|
end
|
|
6125
5628
|
stats = [[],[]]
|
|
6126
5629
|
begin
|
|
6127
|
-
|
|
5630
|
+
if take_from_docker
|
|
5631
|
+
gpu_stats = `docker exec -it #{docker_id} sh -c 'nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv'`
|
|
5632
|
+
else
|
|
5633
|
+
gpu_stats = `nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv`
|
|
5634
|
+
end
|
|
6128
5635
|
|
|
6129
5636
|
if !gpu_stats.nil?
|
|
6130
5637
|
gpu_stats = gpu_stats.split("\n")[1..-1]
|