cnvrg 1.6.32 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/cnvrg.gemspec +1 -4
- data/lib/cnvrg/Images.rb +0 -148
- data/lib/cnvrg/api.rb +8 -8
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/cli.rb +299 -790
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +68 -12
- data/lib/cnvrg/datafiles.rb +333 -37
- data/lib/cnvrg/dataset.rb +65 -29
- data/lib/cnvrg/experiment.rb +10 -4
- data/lib/cnvrg/files.rb +68 -15
- data/lib/cnvrg/helpers.rb +34 -26
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +162 -258
- data/lib/cnvrg/job_cli.rb +28 -53
- data/lib/cnvrg/job_ssh.rb +47 -0
- data/lib/cnvrg/logger.rb +4 -0
- data/lib/cnvrg/project.rb +53 -17
- data/lib/cnvrg/ssh.rb +0 -1
- data/lib/cnvrg/version.rb +1 -1
- metadata +9 -33
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e708ef034df38ed0b4f5c1ac4bb02fa79a26c93b188f571256f75dbc9d2eaaa6
|
|
4
|
+
data.tar.gz: 6badf54b65660776e63c02c7d3c5dbbab83d0e1e83f6e877b48d77fad5ba3036
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 21d89ec4fb99c4102bc1e8e0e50df516339a1c9e9660ee8f0dd8acf3ae30bd27067f5ea4fe979de3b737bd6f748ced98f023100487a4226b7f21eed17975142c
|
|
7
|
+
data.tar.gz: 91fb2d10994c11e9b28ef3bbc128f847ac2efd641892c29ec1ec2b16d4b125266e85a6166153b66ab9e9e1c475190f6eca771e42d739a02c1136dbe8cb6c3abb
|
data/cnvrg.gemspec
CHANGED
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.add_runtime_dependency 'open4', '~> 1.3', '>= 1.3.4'
|
|
32
32
|
spec.add_runtime_dependency 'highline', '~> 1.7', '>= 1.7.8'
|
|
33
33
|
spec.add_runtime_dependency 'thor', '~> 0.19.0','>=0.19.1'
|
|
34
|
-
spec.add_runtime_dependency 'aws-sdk', '~>
|
|
34
|
+
spec.add_runtime_dependency 'aws-sdk', '~> 3.0'
|
|
35
35
|
spec.add_runtime_dependency 'signet', '~> 0.11.0'
|
|
36
36
|
spec.add_runtime_dependency 'google-cloud-env', '~> 1.2.1'
|
|
37
37
|
spec.add_runtime_dependency 'google-cloud-core', '~> 1.3.2'
|
|
@@ -40,11 +40,8 @@ Gem::Specification.new do |spec|
|
|
|
40
40
|
spec.add_runtime_dependency 'urlcrypt', '~> 0.1.1'
|
|
41
41
|
spec.add_runtime_dependency 'parallel', '~> 1.12.0'
|
|
42
42
|
spec.add_runtime_dependency 'azure-storage-blob', '~> 1.1.0'
|
|
43
|
-
|
|
44
43
|
spec.add_runtime_dependency 'logstash-logger', '~> 0.22.1'
|
|
45
|
-
spec.add_runtime_dependency 'docker-api', '~> 1.33'
|
|
46
44
|
spec.add_runtime_dependency 'activesupport', '~> 5.2.0'
|
|
47
45
|
spec.add_runtime_dependency 'ruby-progressbar'
|
|
48
|
-
spec.add_runtime_dependency 'net-ssh'
|
|
49
46
|
spec.add_runtime_dependency 'down'
|
|
50
47
|
end
|
data/lib/cnvrg/Images.rb
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
require 'fileutils'
|
|
2
2
|
require 'cnvrg/files'
|
|
3
|
-
require 'docker'
|
|
4
|
-
require 'net/ssh'
|
|
5
3
|
require 'mimemagic'
|
|
6
4
|
|
|
7
5
|
|
|
@@ -175,58 +173,6 @@ module Cnvrg
|
|
|
175
173
|
response = Cnvrg::API.request("users/#{owner}/images/#{slug}/commit_custom_image", 'POST', {image_logs:logs})
|
|
176
174
|
return response
|
|
177
175
|
end
|
|
178
|
-
def self.ssh_to_machine(resp)
|
|
179
|
-
|
|
180
|
-
sts_path = resp["result"]["sts_path"]
|
|
181
|
-
|
|
182
|
-
uri = URI.parse(sts_path)
|
|
183
|
-
|
|
184
|
-
http_object = Net::HTTP.new(uri.host, uri.port)
|
|
185
|
-
http_object.use_ssl = true if uri.scheme == 'https'
|
|
186
|
-
request = Net::HTTP::Get.new(sts_path)
|
|
187
|
-
|
|
188
|
-
body = ""
|
|
189
|
-
http_object.start do |http|
|
|
190
|
-
response = http.request request
|
|
191
|
-
body = response.read_body
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
URLcrypt::key = [body].pack('H*')
|
|
195
|
-
|
|
196
|
-
ip = URLcrypt.decrypt(resp["result"]["machine_i"])
|
|
197
|
-
|
|
198
|
-
user = URLcrypt.decrypt(resp["result"]["machine_u"])
|
|
199
|
-
key = URLcrypt.decrypt(resp["result"]["machine_k"])
|
|
200
|
-
tempssh = Tempfile.new "sshkey"
|
|
201
|
-
tempssh.write open(key).read
|
|
202
|
-
tempssh.rewind
|
|
203
|
-
key_path = tempssh.path
|
|
204
|
-
count = 0
|
|
205
|
-
while count < 5
|
|
206
|
-
|
|
207
|
-
begin
|
|
208
|
-
ssh = Net::SSH.start(ip, user=user, :keys => key_path, :timeout => 10)
|
|
209
|
-
if !ssh.nil?
|
|
210
|
-
return ssh
|
|
211
|
-
else
|
|
212
|
-
count+=1
|
|
213
|
-
sleep(2)
|
|
214
|
-
|
|
215
|
-
end
|
|
216
|
-
rescue
|
|
217
|
-
count+=1
|
|
218
|
-
sleep(2)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
end
|
|
222
|
-
end
|
|
223
|
-
if tempssh
|
|
224
|
-
tempssh.close
|
|
225
|
-
tempssh.unlink
|
|
226
|
-
end
|
|
227
|
-
return false
|
|
228
|
-
end
|
|
229
|
-
|
|
230
176
|
|
|
231
177
|
|
|
232
178
|
def create_custom_image(new_image_name,working_dir,stored_commands)
|
|
@@ -270,100 +216,6 @@ module Cnvrg
|
|
|
270
216
|
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
|
271
217
|
end
|
|
272
218
|
|
|
273
|
-
def get_container(stop=false)
|
|
274
|
-
begin
|
|
275
|
-
container_id=is_container_exist()
|
|
276
|
-
|
|
277
|
-
if !container_id
|
|
278
|
-
return create_container()
|
|
279
|
-
else
|
|
280
|
-
container = Docker::Container.get(container_id)
|
|
281
|
-
status = container.json["State"]["Status"]
|
|
282
|
-
|
|
283
|
-
if status == "running"
|
|
284
|
-
return container
|
|
285
|
-
else
|
|
286
|
-
if stop
|
|
287
|
-
return false
|
|
288
|
-
end
|
|
289
|
-
res = container.start()
|
|
290
|
-
if res.info["State"]["Status"].eql? "exited" and res.info["State"]["Error"].include? "port is already allocated"
|
|
291
|
-
return create_container()
|
|
292
|
-
end
|
|
293
|
-
return container
|
|
294
|
-
end
|
|
295
|
-
end
|
|
296
|
-
rescue => e
|
|
297
|
-
if e.message.include? "No such container"
|
|
298
|
-
|
|
299
|
-
return create_container()
|
|
300
|
-
else
|
|
301
|
-
return false
|
|
302
|
-
end
|
|
303
|
-
end
|
|
304
|
-
|
|
305
|
-
end
|
|
306
|
-
|
|
307
|
-
def create_container(port=7654, is_remote=false)
|
|
308
|
-
begin
|
|
309
|
-
image_settings = {
|
|
310
|
-
'Image' => "#{@image_name}:latest",
|
|
311
|
-
'User' => 'ds',
|
|
312
|
-
'Cmd' => '/usr/local/cnvrg/run_ipython.sh',
|
|
313
|
-
'WorkingDir' => '/home/ds/notebooks',
|
|
314
|
-
'ExposedPorts' => {
|
|
315
|
-
'8888/tcp' => {},
|
|
316
|
-
},
|
|
317
|
-
'HostConfig' => {
|
|
318
|
-
'Binds' => ["#{@working_dir}:/home/ds/notebooks"],
|
|
319
|
-
'PortBindings' => {
|
|
320
|
-
'8888/tcp' => [
|
|
321
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
322
|
-
],
|
|
323
|
-
},
|
|
324
|
-
},
|
|
325
|
-
}
|
|
326
|
-
container = Docker::Container.create(image_settings)
|
|
327
|
-
container.start()
|
|
328
|
-
netrc = File.open(File.expand_path('~')+"/.netrc", "rb")
|
|
329
|
-
netrc_content = netrc.read
|
|
330
|
-
container.store_file("/home/ds/.netrc", netrc_content)
|
|
331
|
-
command = ["/bin/bash", "-lc", "sudo chmod 600 /home/ds/.netrc"]
|
|
332
|
-
p = container.exec(command, tty: true)
|
|
333
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.netrc"]
|
|
334
|
-
p = container.exec(command, tty: true)
|
|
335
|
-
config = File.open(File.expand_path('~')+"/.cnvrg/config.yml", "rb")
|
|
336
|
-
config_content = config.read
|
|
337
|
-
container.store_file("/home/ds/.cnvrg/config.yml", config_content)
|
|
338
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg"]
|
|
339
|
-
container.exec(command, tty: true)
|
|
340
|
-
# Libraries instlled
|
|
341
|
-
save_installed_libraries(container)
|
|
342
|
-
config = {project_name: @project_name,
|
|
343
|
-
project_slug: @project_slug,
|
|
344
|
-
owner: @owner,
|
|
345
|
-
docker: true, image_base: @image_name, image_tag: @image_tag, container: container.id, port: port, image_slug: @image_slug}
|
|
346
|
-
|
|
347
|
-
File.open(@working_dir+"/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
return container
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
rescue => e
|
|
354
|
-
if e.message.include? "is not running"
|
|
355
|
-
return create_container(port-1)
|
|
356
|
-
end
|
|
357
|
-
return false
|
|
358
|
-
rescue SignalException
|
|
359
|
-
|
|
360
|
-
say "\nAborting", Thor::Shell::Color::RED
|
|
361
|
-
exit(1)
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
end
|
|
366
|
-
|
|
367
219
|
def save_installed_libraries(container)
|
|
368
220
|
begin
|
|
369
221
|
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
data/lib/cnvrg/api.rb
CHANGED
|
@@ -77,20 +77,22 @@ module Cnvrg
|
|
|
77
77
|
if response.to_hash[:status] == 404
|
|
78
78
|
return false
|
|
79
79
|
end
|
|
80
|
-
if parse_request
|
|
80
|
+
if parse_request
|
|
81
81
|
JSON.parse(response.body)
|
|
82
82
|
else
|
|
83
83
|
response
|
|
84
84
|
end
|
|
85
|
-
|
|
85
|
+
when 'POST', 'PUT'
|
|
86
86
|
conn.options.timeout = 4200
|
|
87
|
-
conn.options.open_timeout=180
|
|
87
|
+
conn.options.open_timeout = 180
|
|
88
|
+
conn.headers['Content-Type'] = "application/json"
|
|
88
89
|
retries = 0
|
|
89
90
|
success = false
|
|
91
|
+
data = data || {}
|
|
90
92
|
while !success and retries < 20
|
|
91
93
|
begin
|
|
92
|
-
response = conn.post "#{resource}", data if method.eql? 'POST'
|
|
93
|
-
response = conn.put "#{resource}", data if method.eql? 'PUT'
|
|
94
|
+
response = conn.post "#{resource}", data.to_json if method.eql? 'POST'
|
|
95
|
+
response = conn.put "#{resource}", data.to_json if method.eql? 'PUT'
|
|
94
96
|
success = true
|
|
95
97
|
Cnvrg::API.parse_version(response)
|
|
96
98
|
|
|
@@ -113,7 +115,7 @@ module Cnvrg
|
|
|
113
115
|
end
|
|
114
116
|
when 'POST_JSON'
|
|
115
117
|
conn.options.timeout = 4200
|
|
116
|
-
conn.options.open_timeout =4200
|
|
118
|
+
conn.options.open_timeout = 4200
|
|
117
119
|
conn.headers['Content-Type'] = "application/json"
|
|
118
120
|
new_data = JSON.dump(data)
|
|
119
121
|
|
|
@@ -124,8 +126,6 @@ module Cnvrg
|
|
|
124
126
|
begin
|
|
125
127
|
response = conn.post "#{resource}", new_data
|
|
126
128
|
success = true
|
|
127
|
-
Cnvrg::API.parse_version(response)
|
|
128
|
-
|
|
129
129
|
rescue => e
|
|
130
130
|
Cnvrg::Logger.log_error(e)
|
|
131
131
|
sleep(5)
|
data/lib/cnvrg/api_v2.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module Cnvrg
|
|
2
|
+
class API_V2 < API
|
|
3
|
+
ENDPOINT_VERSION = 'v2'
|
|
4
|
+
|
|
5
|
+
def self.endpoint_uri
|
|
6
|
+
api = get_api()
|
|
7
|
+
return "#{api}/#{Cnvrg::API_V2::ENDPOINT_VERSION}"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.is_response_success(response)
|
|
11
|
+
raise Exception.new("Bad status in response #{response.status}") if response.status != 200
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
data/lib/cnvrg/cli.rb
CHANGED
|
@@ -12,7 +12,6 @@ require 'digest' # sha1up
|
|
|
12
12
|
require "highline/import"
|
|
13
13
|
require 'socket'
|
|
14
14
|
require 'thor'
|
|
15
|
-
require 'docker'
|
|
16
15
|
require 'socket'
|
|
17
16
|
require 'timeout'
|
|
18
17
|
require 'fileutils'
|
|
@@ -28,13 +27,11 @@ require 'cnvrg/auth'
|
|
|
28
27
|
require 'cnvrg/project'
|
|
29
28
|
require 'cnvrg/files'
|
|
30
29
|
require 'cnvrg/experiment'
|
|
31
|
-
require 'cnvrg/Images'
|
|
32
30
|
require 'cnvrg/image'
|
|
33
31
|
require 'cnvrg/dataset'
|
|
34
32
|
require 'cnvrg/datafiles'
|
|
35
33
|
require 'cnvrg/data'
|
|
36
34
|
require 'cnvrg/storage'
|
|
37
|
-
require 'cnvrg/ssh'
|
|
38
35
|
require 'cnvrg/result'
|
|
39
36
|
require 'cnvrg/logger'
|
|
40
37
|
require 'cnvrg/org_helpers'
|
|
@@ -49,6 +46,9 @@ require 'cnvrg/downloader/clients/s3_client'
|
|
|
49
46
|
require 'cnvrg/downloader/clients/gcp_client'
|
|
50
47
|
require 'cnvrg/downloader/clients/azure_client'
|
|
51
48
|
require 'cnvrg/job_cli'
|
|
49
|
+
require 'cnvrg/job_ssh'
|
|
50
|
+
require 'cnvrg/connect_job_ssh'
|
|
51
|
+
require 'cnvrg/api_v2'
|
|
52
52
|
|
|
53
53
|
class Thor
|
|
54
54
|
module Base
|
|
@@ -175,6 +175,9 @@ module Cnvrg
|
|
|
175
175
|
desc "job", "manage running jobs", :hide => false
|
|
176
176
|
subcommand "job", JobCli
|
|
177
177
|
|
|
178
|
+
desc "ssh", "ssh into running jobs", :hide => false
|
|
179
|
+
subcommand "ssh", JobSsh
|
|
180
|
+
|
|
178
181
|
desc "image [COMMAND]", "build existing images", :hide => true
|
|
179
182
|
subcommand "image", ImageCli
|
|
180
183
|
|
|
@@ -819,9 +822,9 @@ module Cnvrg
|
|
|
819
822
|
end
|
|
820
823
|
|
|
821
824
|
desc 'data verify', 'Verify datasets', :hide => true
|
|
822
|
-
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default =>
|
|
825
|
+
method_option :timeout, :type => :numeric, :aliases => ["-t", "--timeout"], :desc => "Time to wait before returning final answer", :default => nil
|
|
823
826
|
|
|
824
|
-
def verify_datasets(dataset_titles, timeout=
|
|
827
|
+
def verify_datasets(dataset_titles, timeout=nil)
|
|
825
828
|
begin
|
|
826
829
|
verify_logged_in(false)
|
|
827
830
|
log_start(__method__, args, options)
|
|
@@ -830,21 +833,31 @@ module Cnvrg
|
|
|
830
833
|
log_message("All datasets are verified", Thor::Shell::Color::BLUE) if verified
|
|
831
834
|
log_message("Failed to verify datasets", Thor::Shell::Color::RED) if !verified
|
|
832
835
|
exit(1) if !verified
|
|
833
|
-
|
|
834
836
|
rescue SignalException
|
|
835
837
|
say "\nAborting", Thor::Shell::Color::RED
|
|
836
838
|
exit(1)
|
|
837
839
|
end
|
|
838
840
|
end
|
|
839
841
|
|
|
842
|
+
desc 'data scan', 'Lookup datasets', :hide => true
|
|
843
|
+
def scan_datasets()
|
|
844
|
+
begin
|
|
845
|
+
verify_logged_in(false)
|
|
846
|
+
log_start(__method__, args, options)
|
|
847
|
+
log_message("Scanning datasets", Thor::Shell::Color::BLUE)
|
|
848
|
+
datasets = Dataset.scan_datasets()
|
|
849
|
+
puts(datasets.to_json)
|
|
850
|
+
end
|
|
851
|
+
end
|
|
852
|
+
|
|
840
853
|
desc 'data clone', 'Clone dataset', :hide => true
|
|
841
854
|
method_option :commit, :type => :string, :aliases => ["-c", "--commit"], :default => ""
|
|
842
855
|
method_option :only_tree, :type => :boolean, :aliases => ["-t", "--tree"], :default => false
|
|
843
856
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
|
844
857
|
method_option :read, :type => :boolean, :aliases => ["-r", "--read"], :default => false
|
|
845
858
|
method_option :remote, :type => :boolean, :aliases => ["-h", "--remote"], :default => false
|
|
846
|
-
|
|
847
|
-
def clone_data(dataset_url,only_tree=false,commit=nil,query=nil,read=false,remote=false, relative: false)
|
|
859
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
860
|
+
def clone_data(dataset_url, only_tree=false, commit=nil, query=nil, read=false, remote=false, flatten: false, relative: false, soft: false)
|
|
848
861
|
begin
|
|
849
862
|
verify_logged_in(false)
|
|
850
863
|
log_start(__method__, args, options)
|
|
@@ -853,10 +866,10 @@ module Cnvrg
|
|
|
853
866
|
read = options["read"] || read || false
|
|
854
867
|
remote = options["remote"] || remote || false
|
|
855
868
|
query = options['query'].presence || query.presence
|
|
869
|
+
soft = options['soft'] || soft
|
|
856
870
|
if query.present?
|
|
857
|
-
return clone_data_query(dataset_url, query)
|
|
871
|
+
return clone_data_query(dataset_url, query, flatten, soft: soft)
|
|
858
872
|
end
|
|
859
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
|
860
873
|
|
|
861
874
|
url_parts = dataset_url.split("/")
|
|
862
875
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
@@ -868,6 +881,8 @@ module Cnvrg
|
|
|
868
881
|
dataset_name = response["result"]["name"]
|
|
869
882
|
dataset_home = Dir.pwd+"/"+dataset_name
|
|
870
883
|
|
|
884
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name, commit: response["result"]["commit"]) if soft
|
|
885
|
+
|
|
871
886
|
check = Helpers.checkmark
|
|
872
887
|
if @dataset.init_home(remote:remote)
|
|
873
888
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
|
@@ -875,14 +890,12 @@ module Cnvrg
|
|
|
875
890
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
|
876
891
|
if @dataset.softlinked?
|
|
877
892
|
@files.cp_ds(relative: relative)
|
|
878
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
|
879
893
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
880
894
|
@dataset.write_success
|
|
881
895
|
return
|
|
882
896
|
end
|
|
883
897
|
|
|
884
898
|
if only_tree
|
|
885
|
-
|
|
886
899
|
success = Dataset.clone_tree(commit: commit, dataset_home: dataset_home)
|
|
887
900
|
return if success
|
|
888
901
|
end
|
|
@@ -900,7 +913,7 @@ module Cnvrg
|
|
|
900
913
|
|
|
901
914
|
while files['keys'].length > 0
|
|
902
915
|
Cnvrg::Logger.log_info("download multiple files, #{downloaded_files.size} files downloaded")
|
|
903
|
-
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read)
|
|
916
|
+
@files.download_multiple_files_s3(files, @dataset.local_path, progressbar: progressbar, read_only: read, flatten: flatten)
|
|
904
917
|
|
|
905
918
|
downloaded_files += files['keys'].length
|
|
906
919
|
files = @files.get_clone_chunk(commit: commit, latest_id: files['latest'])
|
|
@@ -908,7 +921,6 @@ module Cnvrg
|
|
|
908
921
|
progressbar.finish
|
|
909
922
|
if downloaded_files == files_count
|
|
910
923
|
Dataset.verify_cnvrgignore_exist(dataset_name, false)
|
|
911
|
-
@executer.set_dataset_status(dataset: @dataset.slug, status: "cloned") if @executer
|
|
912
924
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
913
925
|
@dataset.write_success
|
|
914
926
|
### if read, dont generate idx (but create idx.yml) if not read, generate idx.
|
|
@@ -930,12 +942,14 @@ module Cnvrg
|
|
|
930
942
|
|
|
931
943
|
desc 'data clone_query', 'Clone dataset _query', :hide => true
|
|
932
944
|
method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => ""
|
|
933
|
-
|
|
945
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
946
|
+
def clone_data_query(dataset_url, query=nil, flatten=false, soft: false)
|
|
934
947
|
begin
|
|
935
948
|
verify_logged_in(false)
|
|
936
|
-
|
|
949
|
+
#@executer = Cnvrg::Helpers::Executer.get_executer
|
|
937
950
|
log_start(__method__, args, options)
|
|
938
951
|
query = options["query"] || query
|
|
952
|
+
soft = options["soft"] || soft
|
|
939
953
|
if !query.present?
|
|
940
954
|
log_message("Argument missing : query", Thor::Shell::Color::RED)
|
|
941
955
|
exit(1)
|
|
@@ -945,13 +959,14 @@ module Cnvrg
|
|
|
945
959
|
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
946
960
|
slug = url_parts[project_index + 1]
|
|
947
961
|
owner = url_parts[project_index - 1]
|
|
948
|
-
|
|
949
962
|
response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/search/#{query}", 'GET')
|
|
950
963
|
Cnvrg::CLI.is_response_success(response,true)
|
|
951
964
|
dataset_name = response["results"]["name"]
|
|
952
965
|
dataset_slug = response["results"]["slug"]
|
|
953
|
-
dataset_home =
|
|
966
|
+
dataset_home = Dir.pwd+"/"+dataset_slug
|
|
967
|
+
Dataset.stop_if_dataset_present(dataset_home, dataset_name) if soft
|
|
954
968
|
|
|
969
|
+
# dataset_home = Dir.pwd
|
|
955
970
|
if Dataset.blank_clone(owner, dataset_name, dataset_slug)
|
|
956
971
|
dataset = Dataset.new(dataset_home)
|
|
957
972
|
log_message("Cloning #{dataset_name}", Thor::Shell::Color::BLUE)
|
|
@@ -966,6 +981,7 @@ module Cnvrg
|
|
|
966
981
|
},
|
|
967
982
|
in_threads: ParallelThreads
|
|
968
983
|
}
|
|
984
|
+
|
|
969
985
|
begin
|
|
970
986
|
log_message("Downloading files", Thor::Shell::Color::BLUE)
|
|
971
987
|
Parallel.map((response["results"]["query_files"]), parallel_options) do |f|
|
|
@@ -974,6 +990,7 @@ module Cnvrg
|
|
|
974
990
|
file_name = relative_path_dir.pop()
|
|
975
991
|
relative_path_dir = relative_path_dir.join("/")
|
|
976
992
|
abs_path = dataset_home + "/" + relative_path_dir
|
|
993
|
+
abs_path = dataset_home if flatten
|
|
977
994
|
begin
|
|
978
995
|
FileUtils.mkdir_p(abs_path) unless File.exist? (abs_path + "/" + file_name)
|
|
979
996
|
rescue
|
|
@@ -981,14 +998,14 @@ module Cnvrg
|
|
|
981
998
|
exit(1)
|
|
982
999
|
end
|
|
983
1000
|
begin
|
|
984
|
-
File.write "#{abs_path}/#{file_name}", open(f["
|
|
985
|
-
rescue
|
|
1001
|
+
File.write "#{abs_path}/#{file_name}", open(f["url"]).read unless File.exist? (abs_path + "/" + file_name)
|
|
1002
|
+
rescue => e
|
|
986
1003
|
log_message("Could not download file: #{f["fullpath"]}", Thor::Shell::Color::RED)
|
|
987
1004
|
exit(1)
|
|
988
1005
|
end
|
|
989
1006
|
|
|
990
1007
|
end
|
|
991
|
-
|
|
1008
|
+
#@executer.set_dataset_status(dataset: dataset.slug, status: "cloned") if @executer.present?
|
|
992
1009
|
rescue Interrupt
|
|
993
1010
|
log_message("Couldn't download", Thor::Shell::Color::RED)
|
|
994
1011
|
exit(1)
|
|
@@ -998,7 +1015,7 @@ module Cnvrg
|
|
|
998
1015
|
check = Helpers.checkmark
|
|
999
1016
|
log_message("#{check} Clone finished successfully", Thor::Shell::Color::GREEN)
|
|
1000
1017
|
dataset.write_success(in_folder=true)
|
|
1001
|
-
rescue
|
|
1018
|
+
rescue => e
|
|
1002
1019
|
exit(1)
|
|
1003
1020
|
end
|
|
1004
1021
|
end
|
|
@@ -1008,32 +1025,6 @@ module Cnvrg
|
|
|
1008
1025
|
end
|
|
1009
1026
|
end
|
|
1010
1027
|
|
|
1011
|
-
desc 'init_data_container', 'Init dataset directory', :hide => true
|
|
1012
|
-
method_option :login_content, :type => :string, :aliases => ["-l"], :default => ""
|
|
1013
|
-
|
|
1014
|
-
def init_data_container(container)
|
|
1015
|
-
begin
|
|
1016
|
-
login_content = options["login_content"]
|
|
1017
|
-
|
|
1018
|
-
container = Docker::Container.get(container)
|
|
1019
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
1020
|
-
container.exec(command, tty: true)
|
|
1021
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
|
1022
|
-
container.exec(command, tty: true)
|
|
1023
|
-
command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
|
1024
|
-
container.exec(command, tty: true)
|
|
1025
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds /home/ds/.cnvrg /home/ds/.netrc"]
|
|
1026
|
-
container.exec(command, tty: true)
|
|
1027
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
1028
|
-
container.exec(command, tty: true)
|
|
1029
|
-
|
|
1030
|
-
rescue SignalException
|
|
1031
|
-
|
|
1032
|
-
say "\nAborting", Thor::Shell::Color::RED
|
|
1033
|
-
exit(1)
|
|
1034
|
-
end
|
|
1035
|
-
end
|
|
1036
|
-
|
|
1037
1028
|
desc 'data_snap', 'Init dataset directory', :hide => true
|
|
1038
1029
|
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
|
1039
1030
|
|
|
@@ -1184,17 +1175,29 @@ module Cnvrg
|
|
|
1184
1175
|
end
|
|
1185
1176
|
|
|
1186
1177
|
desc '', '', :hide => true
|
|
1187
|
-
def
|
|
1178
|
+
def get_owner_slug(url_or_slug)
|
|
1179
|
+
if url_or_slug =~ URI::regexp
|
|
1180
|
+
# Find owner and slug in url
|
|
1181
|
+
url_parts = url_or_slug.split("/")
|
|
1182
|
+
project_index = Cnvrg::Helpers.look_for_in_path(url_or_slug, "datasets")
|
|
1183
|
+
slug = url_parts[project_index + 1]
|
|
1184
|
+
owner = url_parts[project_index - 1]
|
|
1185
|
+
else
|
|
1186
|
+
# Find owner in config file
|
|
1187
|
+
owner = CLI.get_owner
|
|
1188
|
+
slug = url_or_slug
|
|
1189
|
+
end
|
|
1190
|
+
return owner, slug
|
|
1191
|
+
end
|
|
1192
|
+
|
|
1193
|
+
desc '', '', :hide => true
|
|
1194
|
+
def data_put(dataset_url, files: [], dir: '', commit: '', chunk_size: 1000, force: false, threads: 15, message: nil)
|
|
1188
1195
|
begin
|
|
1189
1196
|
verify_logged_in(false)
|
|
1190
1197
|
log_start(__method__, args, options)
|
|
1191
1198
|
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
project_index = Cnvrg::Helpers.look_for_in_path(dataset_url, "datasets")
|
|
1195
|
-
slug = url_parts[project_index + 1]
|
|
1196
|
-
owner = url_parts[project_index - 1]
|
|
1197
|
-
@dataset = Dataset.new(dataset_url: dataset_url)
|
|
1199
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1200
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1198
1201
|
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1199
1202
|
@files = @datafiles.verify_files_exists(files)
|
|
1200
1203
|
|
|
@@ -1204,7 +1207,7 @@ module Cnvrg
|
|
|
1204
1207
|
log_message("Uploading #{@files.size} files", Thor::Shell::Color::GREEN)
|
|
1205
1208
|
number_of_chunks = (@files.size.to_f / chunk_size).ceil
|
|
1206
1209
|
if commit.blank?
|
|
1207
|
-
response = @datafiles.start_commit(false, true, chunks: number_of_chunks)
|
|
1210
|
+
response = @datafiles.start_commit(false, true, chunks: number_of_chunks, message: message )
|
|
1208
1211
|
unless response #means we failed in the start commit.
|
|
1209
1212
|
raise SignalException.new(1, "Cant put files into dataset, check the dataset id")
|
|
1210
1213
|
end
|
|
@@ -1218,28 +1221,33 @@ module Cnvrg
|
|
|
1218
1221
|
else
|
|
1219
1222
|
@commit = commit
|
|
1220
1223
|
end
|
|
1221
|
-
|
|
1224
|
+
|
|
1225
|
+
# dir shouldnt have starting or ending slash.
|
|
1222
1226
|
dir = dir[0..-2] if dir.end_with? '/'
|
|
1223
1227
|
dir = dir[1..-1] if dir.start_with? '/'
|
|
1224
1228
|
|
|
1225
|
-
@
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
+
@datafiles.upload_multiple_files_optimized(
|
|
1230
|
+
@files,
|
|
1231
|
+
@commit,
|
|
1232
|
+
force: force,
|
|
1233
|
+
chunk_size: chunk_size,
|
|
1234
|
+
prefix: dir,
|
|
1235
|
+
threads: threads
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
# This is for backwards compatibility only and should be removed in future versions:
|
|
1239
|
+
res = @datafiles.put_commit(@commit)
|
|
1240
|
+
unless res.is_success?
|
|
1241
|
+
raise SignalException.new(1, res.msg)
|
|
1229
1242
|
end
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
res = @datafiles.end_commit(@commit,false, success: true )
|
|
1237
|
-
msg = res['result']
|
|
1238
|
-
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1239
|
-
unless response.is_success?
|
|
1240
|
-
raise SignalException.new(1, res.msg)
|
|
1241
|
-
end
|
|
1243
|
+
|
|
1244
|
+
res = @datafiles.end_commit(@commit,false, success: true, commit_type: "put")
|
|
1245
|
+
msg = res['result']
|
|
1246
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1247
|
+
unless response.is_success?
|
|
1248
|
+
raise SignalException.new(1, res.msg)
|
|
1242
1249
|
end
|
|
1250
|
+
|
|
1243
1251
|
log_message("Uploading files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1244
1252
|
rescue SignalException => e
|
|
1245
1253
|
log_message(e.message, Thor::Shell::Color::RED)
|
|
@@ -1248,7 +1256,49 @@ module Cnvrg
|
|
|
1248
1256
|
end
|
|
1249
1257
|
|
|
1250
1258
|
|
|
1259
|
+
desc '', '', :hide => true
|
|
1260
|
+
def data_rm(dataset_url, regex_list: [], commit: '', message: nil)
|
|
1261
|
+
begin
|
|
1262
|
+
verify_logged_in(false)
|
|
1263
|
+
log_start(__method__, args, options)
|
|
1264
|
+
|
|
1265
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1266
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1267
|
+
@datafiles = Cnvrg::Datafiles.new(owner, slug, dataset: @dataset)
|
|
1268
|
+
|
|
1269
|
+
# Init a new commit
|
|
1270
|
+
response = @datafiles.start_commit(false, true, chunks: 1, message: message )
|
|
1271
|
+
unless response #means we failed in the start commit.
|
|
1272
|
+
raise SignalException.new(1, "Cant put files into dataset, check the dataset id")
|
|
1273
|
+
end
|
|
1274
|
+
@commit = response['result']['commit_sha1']
|
|
1275
|
+
files_to_delete, folders_to_delete, job_id = @datafiles.delete_multiple_files(@commit, regex_list)
|
|
1276
|
+
log_message("Deleting #{files_to_delete} files and #{folders_to_delete} folders", Thor::Shell::Color::GREEN)
|
|
1251
1277
|
|
|
1278
|
+
total_files = files_to_delete + folders_to_delete
|
|
1279
|
+
current_progress = 0
|
|
1280
|
+
progressbar = @datafiles.create_progressbar("Delete Progress", total_files)
|
|
1281
|
+
chunk_size = 1000
|
|
1282
|
+
offset = 0
|
|
1283
|
+
while current_progress < total_files
|
|
1284
|
+
current_progress = @datafiles.delete_file_chunk(@commit, regex_list, chunk_size, offset)
|
|
1285
|
+
progressbar.progress = current_progress
|
|
1286
|
+
offset += chunk_size
|
|
1287
|
+
end
|
|
1288
|
+
|
|
1289
|
+
res = @datafiles.end_commit(@commit,false, success: true)
|
|
1290
|
+
msg = res['result']
|
|
1291
|
+
response = Cnvrg::Result.new(Cnvrg::CLI.is_response_success(res, true), msg)
|
|
1292
|
+
unless response.is_success?
|
|
1293
|
+
raise SignalException.new(1, res.msg)
|
|
1294
|
+
end
|
|
1295
|
+
|
|
1296
|
+
log_message("Deleting files finished Successfully", Thor::Shell::Color::GREEN)
|
|
1297
|
+
rescue SignalException => e
|
|
1298
|
+
log_message(e.message, Thor::Shell::Color::RED)
|
|
1299
|
+
return false
|
|
1300
|
+
end
|
|
1301
|
+
end
|
|
1252
1302
|
|
|
1253
1303
|
desc 'upload_data', 'Upload data files', :hide => true
|
|
1254
1304
|
method_option :ignore, :type => :array, :aliases => ["-i", "--i"], :desc => "ignore following files"
|
|
@@ -1699,18 +1749,22 @@ module Cnvrg
|
|
|
1699
1749
|
end
|
|
1700
1750
|
|
|
1701
1751
|
desc 'data commits', 'List all commits for a specific dataset', :hide => true
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
verify_logged_in(true)
|
|
1752
|
+
def list_dataset_commits(dataset_url, commit_sha1: nil)
|
|
1753
|
+
verify_logged_in(false)
|
|
1705
1754
|
log_start(__method__, args, options)
|
|
1706
1755
|
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1756
|
+
if dataset_url == "."
|
|
1757
|
+
dataset_dir = is_cnvrg_dir(Dir.pwd)
|
|
1758
|
+
@dataset = Dataset.new(dataset_dir)
|
|
1759
|
+
else
|
|
1760
|
+
owner, slug = get_owner_slug(dataset_url)
|
|
1761
|
+
@dataset = Dataset.new(dataset_info: {:owner => owner, :slug => slug})
|
|
1762
|
+
end
|
|
1763
|
+
|
|
1764
|
+
result = @dataset.list_commits(commit_sha1:commit_sha1)
|
|
1710
1765
|
list = result["result"]["list"]
|
|
1711
1766
|
|
|
1712
1767
|
print_table(list)
|
|
1713
|
-
|
|
1714
1768
|
end
|
|
1715
1769
|
|
|
1716
1770
|
desc 'commits', 'List all commits for a specific Project'
|
|
@@ -1741,17 +1795,17 @@ module Cnvrg
|
|
|
1741
1795
|
|
|
1742
1796
|
|
|
1743
1797
|
desc 'git_clone', 'Clone project'
|
|
1798
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
1744
1799
|
def git_clone(slug, owner)
|
|
1745
1800
|
verify_logged_in(false)
|
|
1746
1801
|
log_start(__method__, args, options)
|
|
1747
|
-
|
|
1802
|
+
project_home = Dir.pwd
|
|
1803
|
+
soft = options["soft"] || false
|
|
1804
|
+
Project.stop_if_project_present(project_home, slug) if soft
|
|
1748
1805
|
clone_resp = Project.clone_dir_remote(slug, owner, slug,true)
|
|
1749
|
-
|
|
1806
|
+
exit 1 if not clone_resp
|
|
1807
|
+
idx_status = Project.new(get_project_home).generate_idx(files:[])
|
|
1750
1808
|
FileUtils.mkdir_p File.join(get_project_home, ENV['CNVRG_OUTPUT_DIR']) if ENV['CNVRG_OUTPUT_DIR'].present?
|
|
1751
|
-
@executer = Cnvrg::Helpers::Executer.get_executer
|
|
1752
|
-
if @executer.present?
|
|
1753
|
-
@executer.update_git_commit
|
|
1754
|
-
end
|
|
1755
1809
|
end
|
|
1756
1810
|
|
|
1757
1811
|
|
|
@@ -1791,7 +1845,7 @@ module Cnvrg
|
|
|
1791
1845
|
desc 'clone PROJECT_URL', 'Clone project'
|
|
1792
1846
|
method_option :remote, :type => :boolean, :aliases => ["-r", "--r"], :default => false
|
|
1793
1847
|
method_option :commit, :type => :string, :aliases => ["-c", "--c"], :default => nil
|
|
1794
|
-
|
|
1848
|
+
method_option :soft, :type => :boolean, :aliases => ["-s", "--soft"], :default => false, :hide => true
|
|
1795
1849
|
def clone(project_url)
|
|
1796
1850
|
begin
|
|
1797
1851
|
verify_logged_in(false)
|
|
@@ -1801,6 +1855,8 @@ module Cnvrg
|
|
|
1801
1855
|
slug = url_parts[project_index + 1]
|
|
1802
1856
|
owner = url_parts[project_index - 1]
|
|
1803
1857
|
remote = options["remote"] || false
|
|
1858
|
+
soft = options["soft"] || false
|
|
1859
|
+
|
|
1804
1860
|
|
|
1805
1861
|
response = Cnvrg::API.request("users/#{owner}/projects/#{slug}/get_project", 'GET')
|
|
1806
1862
|
Cnvrg::CLI.is_response_success(response)
|
|
@@ -1814,6 +1870,8 @@ module Cnvrg
|
|
|
1814
1870
|
clone_resp = false
|
|
1815
1871
|
project_home = Dir.pwd
|
|
1816
1872
|
|
|
1873
|
+
Project.stop_if_project_present(project_home, project_name) if soft
|
|
1874
|
+
|
|
1817
1875
|
if remote and !git
|
|
1818
1876
|
clone_resp = Project.clone_dir_remote(slug, owner, project_name,git)
|
|
1819
1877
|
elsif git
|
|
@@ -1837,8 +1895,6 @@ module Cnvrg
|
|
|
1837
1895
|
end
|
|
1838
1896
|
clone_resp = Project.clone_dir(slug, owner, project_name,git)
|
|
1839
1897
|
project_home = Dir.pwd + "/" + project_name
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
1898
|
end
|
|
1843
1899
|
|
|
1844
1900
|
if clone_resp
|
|
@@ -1956,8 +2012,6 @@ module Cnvrg
|
|
|
1956
2012
|
method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
|
|
1957
2013
|
method_option :init, :type => :boolean, :aliases => ["--initial"], :desc => "initial sync", :default => false
|
|
1958
2014
|
method_option :message, :type => :string, :aliases => ["--message"], :desc => "create commit with message", :default => nil
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
2015
|
def sync_data_new(new_branch, force, verbose, commit, all_files, tags ,parallel, chunk_size, init, message)
|
|
1962
2016
|
verify_logged_in(true)
|
|
1963
2017
|
log_start(__method__, args, options)
|
|
@@ -1966,11 +2020,13 @@ module Cnvrg
|
|
|
1966
2020
|
# w(verbose=false, new_branch=false,sync=false, commit=nil,all_files=true)
|
|
1967
2021
|
total_deleted, total_downloaded = invoke :download_data_new,[verbose, new_branch, true, commit, all_files], :new_branch=>new_branch, :direct=>false, :force =>force
|
|
1968
2022
|
end
|
|
1969
|
-
|
|
2023
|
+
|
|
1970
2024
|
invoke :upload_data_new,[new_branch, verbose, true, force, tags, chunk_size, message:message, total_deleted: total_deleted, total_downloaded: total_downloaded],
|
|
1971
2025
|
:new_branch=>new_branch, :direct=>false, :force =>force, :sync =>true, :tags =>tags, :parallel => parallel, :message => message
|
|
1972
2026
|
|
|
1973
2027
|
end
|
|
2028
|
+
|
|
2029
|
+
|
|
1974
2030
|
desc 'upload_data_new', 'upload_data_new', :hide => true
|
|
1975
2031
|
method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
|
|
1976
2032
|
method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
|
|
@@ -2213,15 +2269,27 @@ module Cnvrg
|
|
|
2213
2269
|
method_option :return_id, :type => :boolean, :aliases => ["-r", "--return_id"], :default => false
|
|
2214
2270
|
method_option :files, :type => :string, :aliases => ["--files"], :default => nil
|
|
2215
2271
|
method_option :output_dir, :type => :string, :aliases => ["--output_dir"], :default => nil
|
|
2272
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2216
2273
|
method_option :job_slug, :type => :string, :aliases => ["--job"], :default => nil, :hide=>true
|
|
2217
2274
|
method_option :job_type, :type => :string, :aliases => [ "--job_type"], :default => nil, :hide=>true
|
|
2275
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
|
2276
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
|
2218
2277
|
|
|
2219
|
-
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil)
|
|
2278
|
+
def upload(link = false, sync = false, direct = false, ignore_list = "", in_exp = false, force = false, output_dir = "output", job_type = nil, job_slug = nil, suppress_exceptions = true)
|
|
2220
2279
|
begin
|
|
2221
2280
|
# we are passing "force" twice.. doesnt really make sense :\\
|
|
2222
2281
|
verify_logged_in(true)
|
|
2223
2282
|
log_start(__method__, args, options)
|
|
2224
2283
|
@project = Project.new(get_project_home)
|
|
2284
|
+
|
|
2285
|
+
# Enable local/experiment exception logging
|
|
2286
|
+
suppress_exceptions = suppress_exceptions ? suppress_exceptions : options[:suppress_exceptions]
|
|
2287
|
+
if in_exp
|
|
2288
|
+
exp_obj = Experiment.new(@project.owner, @project.slug, job_id: job_slug)
|
|
2289
|
+
else
|
|
2290
|
+
exp_obj = nil
|
|
2291
|
+
end
|
|
2292
|
+
|
|
2225
2293
|
commit_msg = options["message"]
|
|
2226
2294
|
if commit_msg.nil? or commit_msg.empty?
|
|
2227
2295
|
commit_msg = ""
|
|
@@ -2237,19 +2305,21 @@ module Cnvrg
|
|
|
2237
2305
|
spec_files_to_upload = spec_files_to_upload.split(",")
|
|
2238
2306
|
end
|
|
2239
2307
|
if @project.is_git
|
|
2308
|
+
list = []
|
|
2240
2309
|
git_output_dir = options["output_dir"] || output_dir
|
|
2241
2310
|
if git_output_dir.present?
|
|
2242
2311
|
if git_output_dir.ends_with? "/"
|
|
2243
2312
|
git_output_dir = git_output_dir[0..-2]
|
|
2244
2313
|
end
|
|
2245
2314
|
list = @project.generate_output_dir(git_output_dir)
|
|
2246
|
-
spec_files_to_upload = list
|
|
2247
|
-
if spec_files_to_upload.blank?
|
|
2248
|
-
log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
|
|
2249
|
-
return true
|
|
2250
|
-
end
|
|
2251
|
-
force = true
|
|
2252
2315
|
end
|
|
2316
|
+
list += @project.generate_git_diff if options["git_diff"]
|
|
2317
|
+
spec_files_to_upload = list
|
|
2318
|
+
if spec_files_to_upload.blank?
|
|
2319
|
+
log_message("#{check} Project is up to date", Thor::Shell::Color::GREEN, (((options["sync"] or sync) and !direct) ? false : true))
|
|
2320
|
+
return true
|
|
2321
|
+
end
|
|
2322
|
+
force = true
|
|
2253
2323
|
end
|
|
2254
2324
|
|
|
2255
2325
|
if ignore.nil? or ignore.empty?
|
|
@@ -2291,8 +2361,6 @@ module Cnvrg
|
|
|
2291
2361
|
end
|
|
2292
2362
|
update_count = 0
|
|
2293
2363
|
update_total = result["added"].size + result["updated_on_local"].size + result["deleted"].size
|
|
2294
|
-
successful_updates = []
|
|
2295
|
-
successful_deletions = []
|
|
2296
2364
|
if options["verbose"]
|
|
2297
2365
|
if update_total == 1
|
|
2298
2366
|
log_message("Updating #{update_total} file", Thor::Shell::Color::BLUE)
|
|
@@ -2312,8 +2380,11 @@ module Cnvrg
|
|
|
2312
2380
|
end
|
|
2313
2381
|
job_type = options['job_type'] || job_type
|
|
2314
2382
|
job_slug = options['job_slug'] || job_slug
|
|
2315
|
-
commit_sha1 = @files.start_commit(
|
|
2316
|
-
|
|
2383
|
+
commit_sha1 = @files.start_commit(
|
|
2384
|
+
new_branch, force: force, exp_start_commit: exp_start_commit,
|
|
2385
|
+
job_type: job_type, job_slug: job_slug, start_commit: current_commit,message: options["message"],
|
|
2386
|
+
debug_mode: options["debug_mode"]
|
|
2387
|
+
)["result"]["commit_sha1"]
|
|
2317
2388
|
# upload / update
|
|
2318
2389
|
# delete
|
|
2319
2390
|
to_upload = result["added"] + result["updated_on_local"]
|
|
@@ -2324,32 +2395,30 @@ module Cnvrg
|
|
|
2324
2395
|
:starting_at => 0,
|
|
2325
2396
|
:total => (to_upload.size + deleted.size),
|
|
2326
2397
|
:autofinish => true)
|
|
2327
|
-
@files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar)
|
|
2328
2398
|
|
|
2329
|
-
@files.
|
|
2399
|
+
buffered_errors = @files.upload_multiple_files(to_upload, commit_sha1, progress: progressbar, suppress_exceptions: suppress_exceptions)
|
|
2400
|
+
@files.delete_files_from_server(deleted, commit_sha1, suppress_exceptions: suppress_exceptions)
|
|
2330
2401
|
|
|
2331
2402
|
progressbar.finish
|
|
2403
|
+
|
|
2404
|
+
if buffered_errors.is_a?(Hash)
|
|
2405
|
+
buffered_errors.keys.each do |file|
|
|
2406
|
+
to_upload.delete(file)
|
|
2407
|
+
Cnvrg::CLI.log_message(buffered_errors[file], 'red')
|
|
2408
|
+
exp_obj.job_log([buffered_errors[file]]) unless exp_obj.nil?
|
|
2409
|
+
end
|
|
2410
|
+
end
|
|
2411
|
+
|
|
2332
2412
|
res = @files.end_commit(commit_sha1, force: force, message: commit_msg)
|
|
2333
2413
|
unless Cnvrg::CLI.is_response_success(res, false)
|
|
2334
2414
|
raise StandardError.new("Cant end commit")
|
|
2335
2415
|
end
|
|
2416
|
+
|
|
2336
2417
|
# save idx
|
|
2337
2418
|
@project.update_idx_with_files_commits!((to_upload + deleted), res["result"]["commit_time"])
|
|
2338
2419
|
@project.update_idx_with_commit!(commit_sha1)
|
|
2339
2420
|
if options["verbose"]
|
|
2340
2421
|
log_message("#{check} Done", Thor::Shell::Color::BLUE)
|
|
2341
|
-
if successful_updates.size > 0
|
|
2342
|
-
successful_updates.flatten!
|
|
2343
|
-
log_message("Updated:", Thor::Shell::Color::GREEN)
|
|
2344
|
-
suc = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
|
2345
|
-
log_message(suc.join("\n"), Thor::Shell::Color::GREEN)
|
|
2346
|
-
end
|
|
2347
|
-
if successful_deletions.size > 0
|
|
2348
|
-
successful_deletions.flatten!
|
|
2349
|
-
log_message("Deleted:", Thor::Shell::Color::GREEN)
|
|
2350
|
-
del = successful_updates.map {|x| x = Helpers.checkmark() + " " + x}
|
|
2351
|
-
log_message(del.join("\n"), Thor::Shell::Color::GREEN)
|
|
2352
|
-
end
|
|
2353
2422
|
log_message("Total of #{update_count} / #{update_total} files.", Thor::Shell::Color::GREEN)
|
|
2354
2423
|
else
|
|
2355
2424
|
if return_id
|
|
@@ -2374,9 +2443,13 @@ module Cnvrg
|
|
|
2374
2443
|
if e.is_a? SignalException
|
|
2375
2444
|
say "\nAborting", Thor::Shell::Color::BLUE
|
|
2376
2445
|
say "\nRolling back all changes", Thor::Shell::Color::BLUE
|
|
2446
|
+
|
|
2447
|
+
exp_obj.job_log(["Aborting", "Rolling back all changes"]) unless exp_obj.nil?
|
|
2377
2448
|
else
|
|
2378
2449
|
log_message(error_message, Thor::Shell::Color::RED)
|
|
2379
2450
|
log_error(e)
|
|
2451
|
+
|
|
2452
|
+
exp_obj.job_log([error_message, e]) unless exp_obj.nil?
|
|
2380
2453
|
end
|
|
2381
2454
|
@files.rollback_commit(commit_sha1) unless commit_sha1.nil?
|
|
2382
2455
|
print_res = {
|
|
@@ -2894,6 +2967,11 @@ module Cnvrg
|
|
|
2894
2967
|
method_option :job_type, :type => :string, :aliases => ["-jt", "--job_type"], :default => nil
|
|
2895
2968
|
method_option :files, :type => :string, :aliases => ["--files"], :default => nil
|
|
2896
2969
|
method_option :output_dir, :type => :string, :aliases => ["--output_dir"], :default => nil
|
|
2970
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2971
|
+
method_option :suppress_exceptions, :type => :boolean, :aliases => ["--suppress-exceptions"], :default => true
|
|
2972
|
+
method_option :debug_mode, :type => :boolean, :aliases => ["--debug-mode"], :default => false
|
|
2973
|
+
method_option :git_diff, :type => :boolean, :aliases => ["--git_diff"], :default => false
|
|
2974
|
+
|
|
2897
2975
|
def sync(direct = true)
|
|
2898
2976
|
verify_logged_in(true) if direct
|
|
2899
2977
|
@project = Project.new(get_project_home)
|
|
@@ -2905,16 +2983,20 @@ module Cnvrg
|
|
|
2905
2983
|
is_git = ENV['CNVRG_GIT_PROJECT'] == "true" || @project.is_git
|
|
2906
2984
|
in_exp = options["in_exp"] || (job_slug.present? and job_type.present?)
|
|
2907
2985
|
in_exp = false if job_type.present? and job_type == "NotebookSession"
|
|
2986
|
+
output_dir = options["output_dir"] || ENV['CNVRG_OUTPUT_DIR']
|
|
2987
|
+
|
|
2908
2988
|
run_download = true
|
|
2909
|
-
if
|
|
2989
|
+
if (job_type == "NotebookSession" and is_git) or job_type == "Experiment" or options['force']
|
|
2910
2990
|
run_download = false
|
|
2911
2991
|
end
|
|
2912
|
-
|
|
2992
|
+
|
|
2993
|
+
if run_download or options['debug_mode']
|
|
2913
2994
|
invoke :download, [true, "", in_exp ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true
|
|
2914
2995
|
end
|
|
2915
|
-
invoke :upload, [false, true, direct, "",in_exp,options[:force],
|
|
2996
|
+
invoke :upload, [false, true, direct, "",in_exp,options[:force], output_dir, job_type, job_slug ], :new_branch => options["new_branch"], :verbose => options["verbose"], :sync => true,
|
|
2916
2997
|
:ignore => options[:ignore], :force => options[:force], :message => options[:message], :deploy => options["deploy"], :return_id => options["return_id"],
|
|
2917
|
-
:files => options["files"], :output_dir => options["
|
|
2998
|
+
:files => options["files"], :output_dir => output_dir, :job_slug => job_slug, :job_type => job_type, :suppress_exceptions => options["suppress_exceptions"], :debug_mode => options['debug_mode'], :git_diff => options["git_diff"]
|
|
2999
|
+
|
|
2918
3000
|
end
|
|
2919
3001
|
|
|
2920
3002
|
desc 'run cmd', 'Runs an experiment'
|
|
@@ -3059,6 +3141,8 @@ module Cnvrg
|
|
|
3059
3141
|
method_option :data, :type => :string, :aliases => ["-d", "--data"], :default => ""
|
|
3060
3142
|
method_option :data_commit, :type => :string, :aliases => ["-dc", "--data_commit"], :default => ""
|
|
3061
3143
|
method_option :ignore, :type => :string, :aliases => ["-i", "--ignore"], :desc => "ignore following files", :default => ""
|
|
3144
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch", :default => ""
|
|
3145
|
+
method_option :gpu_util_from_docker, :type => :boolean, :aliases => ["--gpu-util-from-docker"], :desc => "take gpu utilization from job docker", :default => false
|
|
3062
3146
|
method_option :remote, :type => :boolean, :aliases => ["--remote"], :default => false
|
|
3063
3147
|
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
|
3064
3148
|
method_option :force, :type => :boolean, :aliases => ["-f", "--force"], :default => false
|
|
@@ -3066,6 +3150,7 @@ module Cnvrg
|
|
|
3066
3150
|
method_option :periodic_sync, :type => :string, :aliases => ["-ps", "--periodic_sync"], :default => ""
|
|
3067
3151
|
method_option :output_dir, :type => :string, :aliases => ["-o", "--output_dir"], :default => nil
|
|
3068
3152
|
method_option :data_query, :type => :string, :aliases => ["-q", "--query"], :default => nil
|
|
3153
|
+
method_option :use_bash, :type => :boolean, :aliases => ["-b", "--use_bash"], :default => false
|
|
3069
3154
|
|
|
3070
3155
|
def exec(*cmd)
|
|
3071
3156
|
log = []
|
|
@@ -3132,8 +3217,12 @@ module Cnvrg
|
|
|
3132
3217
|
end
|
|
3133
3218
|
remote = options["remote"]
|
|
3134
3219
|
if remote
|
|
3135
|
-
docker_id
|
|
3136
|
-
|
|
3220
|
+
if options["docker_id"].present?
|
|
3221
|
+
docker_id = options["docker_id"]
|
|
3222
|
+
else
|
|
3223
|
+
docker_id = `cat /etc/hostname`
|
|
3224
|
+
docker_id = docker_id.strip()
|
|
3225
|
+
end
|
|
3137
3226
|
end
|
|
3138
3227
|
is_on_gpu = options["gpu"]
|
|
3139
3228
|
start_commit = @project.last_local_commit
|
|
@@ -3143,9 +3232,9 @@ module Cnvrg
|
|
|
3143
3232
|
|
|
3144
3233
|
platform = RUBY_PLATFORM
|
|
3145
3234
|
machine_name = Socket.gethostname
|
|
3235
|
+
machine_activity_slug = ENV["CNVRG_MACHINE_ACTIVITY"]
|
|
3146
3236
|
begin
|
|
3147
|
-
|
|
3148
|
-
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity, script_path, sync_before_terminate, periodic_sync)
|
|
3237
|
+
@exp.start(cmd, platform, machine_name, start_commit, title, email_notification, machine_activity_slug, script_path, sync_before_terminate, periodic_sync)
|
|
3149
3238
|
log_message("Experiment's live results: #{Cnvrg::Helpers.remote_url}/#{@project.owner}/projects/#{@project.slug}/experiments/#{@exp.slug}", Thor::Shell::Color::GREEN)
|
|
3150
3239
|
log_message("Running: #{cmd}\n", Thor::Shell::Color::BLUE)
|
|
3151
3240
|
unless @exp.slug.nil?
|
|
@@ -3163,7 +3252,7 @@ module Cnvrg
|
|
|
3163
3252
|
begin
|
|
3164
3253
|
stats = remote ? usage_metrics_in_docker(docker_id) : Helpers.ubuntu? ? {memory: memory_usage, cpu: cpu_usage} : {}
|
|
3165
3254
|
if is_on_gpu
|
|
3166
|
-
gu = gpu_util
|
|
3255
|
+
gu = gpu_util(take_from_docker: options["gpu_util_from_docker"], docker_id: docker_id)
|
|
3167
3256
|
stats['gpu_util'] = gu[0]
|
|
3168
3257
|
stats['gpu'] = gu[1]
|
|
3169
3258
|
end
|
|
@@ -3175,6 +3264,16 @@ module Cnvrg
|
|
|
3175
3264
|
end
|
|
3176
3265
|
end
|
|
3177
3266
|
start_time = Time.now
|
|
3267
|
+
shell_type = options["use_bash"] ? "bash -l" : "sh"
|
|
3268
|
+
if @exp.get_cmd.present?
|
|
3269
|
+
cmd = @exp.get_cmd
|
|
3270
|
+
if options["docker_id"].present? # Escape for docker exec
|
|
3271
|
+
cmd = cmd.gsub("\"", "\\\"")
|
|
3272
|
+
end
|
|
3273
|
+
end
|
|
3274
|
+
if options["docker_id"].present?
|
|
3275
|
+
cmd = "docker exec -it #{options["docker_id"]} #{shell_type} -c \"#{cmd}\""
|
|
3276
|
+
end
|
|
3178
3277
|
PTY.spawn(@exp.as_env, cmd) do |stdout, stdin, pid, stderr|
|
|
3179
3278
|
begin
|
|
3180
3279
|
stdout.each do |line|
|
|
@@ -3189,7 +3288,7 @@ module Cnvrg
|
|
|
3189
3288
|
puts line
|
|
3190
3289
|
end
|
|
3191
3290
|
log << cur_log
|
|
3192
|
-
if log.size >=
|
|
3291
|
+
if log.size >= 1
|
|
3193
3292
|
@exp.upload_temp_log(log) unless log.empty?
|
|
3194
3293
|
log = []
|
|
3195
3294
|
elsif (start_time + 15.seconds) <= Time.now
|
|
@@ -3239,29 +3338,26 @@ module Cnvrg
|
|
|
3239
3338
|
exp_success = false
|
|
3240
3339
|
end
|
|
3241
3340
|
|
|
3242
|
-
|
|
3243
|
-
|
|
3244
|
-
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
# invoke :upload, [false, false, true, ignore, true, true], :output_dir => output_dir, :force=>true, :job_type=>'Experiment', :job_slug=>@exp.slug
|
|
3250
|
-
end
|
|
3251
|
-
else
|
|
3252
|
-
upload(false, false, true, ignore, true, true,nil,"Experiment",@exp.slug )
|
|
3253
|
-
|
|
3254
|
-
# invoke :upload, [false, false, true, ignore,true, true], :job_type=>'Experiment', :job_slug=>@exp.slug, :force=>true
|
|
3341
|
+
if sync_after
|
|
3342
|
+
@exp.job_log(["Syncing Experiment"])
|
|
3343
|
+
# Sync after run
|
|
3344
|
+
if @project.is_git
|
|
3345
|
+
output_dir = output_dir || @exp.output_dir
|
|
3346
|
+
if output_dir.present?
|
|
3347
|
+
upload(false, false, true, ignore, true, true, output_dir, "Experiment", @exp.slug, true )
|
|
3255
3348
|
end
|
|
3256
|
-
|
|
3349
|
+
else
|
|
3350
|
+
upload(false, false, true, ignore, true, true, nil, "Experiment", @exp.slug, true )
|
|
3257
3351
|
end
|
|
3352
|
+
end
|
|
3353
|
+
|
|
3258
3354
|
end_commit = @project.last_local_commit
|
|
3259
3355
|
if end_commit.present?
|
|
3260
3356
|
@exp.job_log(["Experiment end commit: #{end_commit}"])
|
|
3261
3357
|
end
|
|
3262
3358
|
|
|
3263
3359
|
# log_thread.join
|
|
3264
|
-
|
|
3360
|
+
stats_thread.join
|
|
3265
3361
|
|
|
3266
3362
|
res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average, end_time: end_time)
|
|
3267
3363
|
|
|
@@ -3409,8 +3505,8 @@ module Cnvrg
|
|
|
3409
3505
|
local_folders_options = options["local_folders"]
|
|
3410
3506
|
options_hash.except!("schedule", "recurring", "machine_type", "image", "upload_output", "grid", "data", "data_commit", "title",
|
|
3411
3507
|
"local", "small", "medium", "large", "gpu", "gpuxl", "gpuxxl","max_time","dataset_only_tree",
|
|
3412
|
-
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
|
3413
|
-
"email_notification_error", "email_notification_success", "emails")
|
|
3508
|
+
"data_query", "git_commit","git_branch", "restart_if_stuck","local_folders","output_dir", "commit", "datasets",
|
|
3509
|
+
"requirements", "prerun", "email_notification_error", "email_notification_success", "emails")
|
|
3414
3510
|
exec_options = options_hash.map {|x| "--#{x[0]}=#{x[1]}"}.flatten.join(" ")
|
|
3415
3511
|
command = "#{exec_options} #{remote} #{upload_output_option} #{cmd.flatten.join(" ")}"
|
|
3416
3512
|
commit_to_run = options["commit"] || nil
|
|
@@ -4235,144 +4331,6 @@ module Cnvrg
|
|
|
4235
4331
|
|
|
4236
4332
|
end
|
|
4237
4333
|
|
|
4238
|
-
method_option :small, :type => :boolean, :aliases => ["-sm", "--small"], :default => false
|
|
4239
|
-
method_option :medium, :type => :boolean, :aliases => ["-md", "--medium"], :default => false
|
|
4240
|
-
method_option :large, :type => :boolean, :aliases => ["-lg", "--large"], :default => false
|
|
4241
|
-
method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
|
|
4242
|
-
method_option :gpuxl, :type => :boolean, :aliases => ["--gpuxl"], :default => false
|
|
4243
|
-
method_option :gpuxxl, :type => :boolean, :aliases => ["--gpuxxl"], :default => false
|
|
4244
|
-
method_option :image, :type => :string, :aliases => ["-i", "--image"], :default => ""
|
|
4245
|
-
method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
|
|
4246
|
-
method_option :base, :type => :boolean, :aliases => ["-b", "--base"], :default => false
|
|
4247
|
-
method_option :python3, :type => :boolean, :aliases => ["--python3"], :default => false
|
|
4248
|
-
method_option :docker_path, :type => :string, :aliases => ["--docker_path"], :default => ""
|
|
4249
|
-
|
|
4250
|
-
|
|
4251
|
-
desc 'create_custom_image', 'run commands inside containers', :hide => true
|
|
4252
|
-
|
|
4253
|
-
def build_image(image_name)
|
|
4254
|
-
begin
|
|
4255
|
-
verify_logged_in(false)
|
|
4256
|
-
log_start(__method__, args, options)
|
|
4257
|
-
instances = {"small" => options["small"], "medium" => options["medium"], "large" => options["large"],
|
|
4258
|
-
"gpu" => options["gpu"], "gpuxl" => options["gpuxl"], "gpuxxl" => options["gpuxxl"]}
|
|
4259
|
-
instance_type = get_instance_type(instances)
|
|
4260
|
-
image_extend = options["image"]
|
|
4261
|
-
public = options["public"]
|
|
4262
|
-
base = options["base"]
|
|
4263
|
-
python3 = options["python3"]
|
|
4264
|
-
docker_path = options["docker_path"]
|
|
4265
|
-
owner = CLI.get_owner
|
|
4266
|
-
checks = Helpers.checkmark()
|
|
4267
|
-
tar_path = nil
|
|
4268
|
-
if !docker_path.nil? and !docker_path.empty?
|
|
4269
|
-
docker_path = File.absolute_path(docker_path)
|
|
4270
|
-
#create tar of the docker path: it could be a docker file, and it could be a docker folder
|
|
4271
|
-
tar_path = File.expand_path('~') + "/.cnvrg/tmp/docker_#{File.basename docker_path}.tar.gz"
|
|
4272
|
-
resp = create_docker_tar(docker_path, tar_path)
|
|
4273
|
-
if !resp
|
|
4274
|
-
log_message("Couldn't create tar from docker path", Thor::Shell::Color::RED)
|
|
4275
|
-
FileUtils.rm_rf tar_path
|
|
4276
|
-
exit(1)
|
|
4277
|
-
end
|
|
4278
|
-
files = Cnvrg::Files.new(owner, "")
|
|
4279
|
-
resp = Images.create_new_custom_image_with_docker(instance_type, owner, image_name, public, base, image_extend, python3, tar_path, files)
|
|
4280
|
-
if resp
|
|
4281
|
-
end
|
|
4282
|
-
else
|
|
4283
|
-
log_message("Creating machine for your custom image, this may take a few moments...", Thor::Shell::Color::BLUE)
|
|
4284
|
-
resp = Images.create_new_custom_image(instance_type, owner, image_name, public, base, image_extend, python3, nil)
|
|
4285
|
-
|
|
4286
|
-
end
|
|
4287
|
-
|
|
4288
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
|
4289
|
-
image_slug = resp["result"]["slug"]
|
|
4290
|
-
container = resp["result"]["machine_c"]
|
|
4291
|
-
log_message("#{checks} Created image and machine successfully", Thor::Shell::Color::GREEN)
|
|
4292
|
-
log_message("Connecting to machine", Thor::Shell::Color::BLUE)
|
|
4293
|
-
ssh = Ssh.new(resp)
|
|
4294
|
-
if !ssh.is_ssh
|
|
4295
|
-
log_message("Couldn't connect to machine,aborting", Thor::Shell::Color::RED)
|
|
4296
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4297
|
-
end
|
|
4298
|
-
log_message("run command until ctrl + c or quit is initiated", Thor::Shell::Color::BLUE)
|
|
4299
|
-
begin
|
|
4300
|
-
logs = []
|
|
4301
|
-
|
|
4302
|
-
while true
|
|
4303
|
-
command = ask("$>")
|
|
4304
|
-
logs << {time: Time.now,
|
|
4305
|
-
message: command,
|
|
4306
|
-
type: "stdout"
|
|
4307
|
-
}
|
|
4308
|
-
if command.eql? "quit"
|
|
4309
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
|
4310
|
-
break
|
|
4311
|
-
end
|
|
4312
|
-
res = ssh.exec_command(command)
|
|
4313
|
-
begin
|
|
4314
|
-
res_parsed = JSON.parse(res)
|
|
4315
|
-
res = res_parsed.join(",")
|
|
4316
|
-
end
|
|
4317
|
-
|
|
4318
|
-
puts res
|
|
4319
|
-
logs << {time: Time.now,
|
|
4320
|
-
message: res,
|
|
4321
|
-
type: "stdout"
|
|
4322
|
-
}
|
|
4323
|
-
logs.flatten!
|
|
4324
|
-
|
|
4325
|
-
end
|
|
4326
|
-
|
|
4327
|
-
rescue SignalException
|
|
4328
|
-
log_message("Commiting Image..", Thor::Shell::Color::BLUE)
|
|
4329
|
-
|
|
4330
|
-
end
|
|
4331
|
-
resp = Images.commit_custom_image(owner, image_slug, logs)
|
|
4332
|
-
if Cnvrg::CLI.is_response_success(resp, false)
|
|
4333
|
-
log_message("#{checks} Image commited successfuly, email will be sent when image is ready", Thor::Shell::Color::GREEN)
|
|
4334
|
-
else
|
|
4335
|
-
if image_slug
|
|
4336
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4337
|
-
end
|
|
4338
|
-
if ssh
|
|
4339
|
-
ssh.close_ssh()
|
|
4340
|
-
end
|
|
4341
|
-
log_message("Image couldn't be commited, rolling back changes", Thor::Shell::Color::RED)
|
|
4342
|
-
|
|
4343
|
-
exit(1)
|
|
4344
|
-
end
|
|
4345
|
-
if ssh
|
|
4346
|
-
ssh.close_ssh()
|
|
4347
|
-
end
|
|
4348
|
-
|
|
4349
|
-
|
|
4350
|
-
end
|
|
4351
|
-
rescue => e
|
|
4352
|
-
log_message("Error occurd, aborting", Thor::Shell::Color::RED)
|
|
4353
|
-
|
|
4354
|
-
log_error(e)
|
|
4355
|
-
if image_slug
|
|
4356
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4357
|
-
end
|
|
4358
|
-
if ssh
|
|
4359
|
-
ssh.close_ssh()
|
|
4360
|
-
end
|
|
4361
|
-
|
|
4362
|
-
|
|
4363
|
-
rescue SignalException
|
|
4364
|
-
if image_slug
|
|
4365
|
-
Images.revoke_custom_new_image(owner, image_slug)
|
|
4366
|
-
end
|
|
4367
|
-
if ssh
|
|
4368
|
-
ssh.close_ssh
|
|
4369
|
-
end
|
|
4370
|
-
say "\nAborting"
|
|
4371
|
-
exit(1)
|
|
4372
|
-
end
|
|
4373
|
-
|
|
4374
|
-
end
|
|
4375
|
-
|
|
4376
4334
|
|
|
4377
4335
|
desc 'build', 'run commands inside containers', :hide => true
|
|
4378
4336
|
method_option :install, :type => :string, :aliases => ["--i"], :default => nil, :desc => "Install from the given instructions file"
|
|
@@ -4566,66 +4524,7 @@ module Cnvrg
|
|
|
4566
4524
|
end
|
|
4567
4525
|
|
|
4568
4526
|
|
|
4569
|
-
desc 'upload_image', 'commit notebook changes to create a new notebook image', :hide =>true
|
|
4570
|
-
|
|
4571
|
-
def upload_image_old(image_id, is_public, is_base, *message)
|
|
4572
|
-
verify_logged_in(true)
|
|
4573
|
-
log_start(__method__, args, options)
|
|
4574
|
-
image = Docker::Image.get(image_id)
|
|
4575
|
-
project_home = get_project_home
|
|
4576
|
-
@project = Project.new(project_home)
|
|
4577
|
-
last_local_commit = @project.last_local_commit
|
|
4578
|
-
image_name = @project.slug + "#{last_local_commit}"
|
|
4579
|
-
path = File.expand_path('~') + "/.cnvrg/tmp/#{image_name}.tar"
|
|
4580
|
-
owner = Cnvrg::CLI.get_owner()
|
|
4581
|
-
if !message.nil? or !message.empty?
|
|
4582
|
-
message = message.join(" ")
|
|
4583
|
-
end
|
|
4584
|
-
|
|
4585
|
-
log_message("Saving image's current state", Thor::Shell::Color::BLUE)
|
|
4586
|
-
image.save(path)
|
|
4587
|
-
|
|
4588
|
-
begin
|
|
4589
|
-
log_message("Compressing image file to upload", Thor::Shell::Color::BLUE)
|
|
4590
|
-
gzipRes = system("gzip -f #{path}")
|
|
4591
|
-
if !gzipRes
|
|
4592
|
-
|
|
4593
|
-
log_message("Couldn't create tar file from image", Thor::Shell::Color::RED)
|
|
4594
|
-
exit(1)
|
|
4595
|
-
end
|
|
4596
|
-
path = path + ".gz"
|
|
4597
|
-
@files = Cnvrg::Files.new(owner, "")
|
|
4598
|
-
|
|
4599
|
-
exit_status = $?.exitstatus
|
|
4600
|
-
if exit_status == 0
|
|
4601
|
-
log_message("Uploading image file", Thor::Shell::Color::BLUE)
|
|
4602
|
-
|
|
4603
|
-
diff = container_changes(Dir.pwd)
|
|
4604
|
-
res = @files.upload_image(path, image_name, owner, is_public, is_base, diff[1], diff[0], diff[2], message, image.commit_id)
|
|
4605
|
-
if res
|
|
4606
|
-
File.delete(path)
|
|
4607
|
-
image_loc = is_project_with_docker(Dir.pwd)
|
|
4608
|
-
image_loc.update_slug(res["result"]["id"])
|
|
4609
|
-
|
|
4610
|
-
checks = Helpers.checkmark()
|
|
4611
|
-
log_message("#{checks} Done", Thor::Shell::Color::GREEN)
|
|
4612
|
-
else
|
|
4613
|
-
log_message("Couldn't upload image", Thor::Shell::Color::RED)
|
|
4614
|
-
|
|
4615
|
-
end
|
|
4616
|
-
else
|
|
4617
|
-
log_message("Couldn't create image file for: #{image_name}", Thor::Shell::Color::RED)
|
|
4618
|
-
exit(1)
|
|
4619
|
-
end
|
|
4620
|
-
rescue => e
|
|
4621
|
-
log_message("Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED)
|
|
4622
|
-
log_error(e)
|
|
4623
|
-
rescue SignalException
|
|
4624
4527
|
|
|
4625
|
-
say "Couldn't upload image file for: #{image_name}", Thor::Shell::Color::RED
|
|
4626
|
-
exit(1)
|
|
4627
|
-
end
|
|
4628
|
-
end
|
|
4629
4528
|
|
|
4630
4529
|
desc '', '', :hide => true
|
|
4631
4530
|
|
|
@@ -4636,278 +4535,30 @@ module Cnvrg
|
|
|
4636
4535
|
|
|
4637
4536
|
end
|
|
4638
4537
|
|
|
4639
|
-
desc '', '', :hide => true
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
4643
|
-
|
|
4644
|
-
|
|
4645
|
-
|
|
4646
|
-
|
|
4647
|
-
|
|
4648
|
-
|
|
4649
|
-
|
|
4650
|
-
|
|
4651
|
-
|
|
4652
|
-
|
|
4653
|
-
|
|
4654
|
-
|
|
4655
|
-
|
|
4656
|
-
|
|
4657
|
-
|
|
4658
|
-
|
|
4659
|
-
def tensor_port_container(container_id)
|
|
4660
|
-
container = Docker::Container.get(container_id)
|
|
4661
|
-
say container.json["HostConfig"]["PortBindings"]["6006/tcp"][0]["HostPort"]
|
|
4662
|
-
end
|
|
4663
|
-
|
|
4664
|
-
desc '', '', :hide => true
|
|
4665
|
-
|
|
4666
|
-
def stop_container(container_id)
|
|
4667
|
-
container = Docker::Container.get(container_id)
|
|
4668
|
-
container.stop()
|
|
4669
|
-
container.remove()
|
|
4670
|
-
|
|
4671
|
-
end
|
|
4672
|
-
|
|
4673
|
-
desc '', '', :hide => true
|
|
4674
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4675
|
-
method_option :app_dir, :type => :string, :aliases => ["-d"], :default => "/home/ds/notebooks"
|
|
4676
|
-
method_option :cmd, :type => :string, :aliases => ["-c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
|
4677
|
-
|
|
4678
|
-
|
|
4679
|
-
def config_remote(image_name, port = 7654, tensport = 6006)
|
|
4680
|
-
local_images = Docker::Image.all
|
|
4681
|
-
|
|
4682
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4683
|
-
if docker_image_local.empty?
|
|
4684
|
-
say "no image"
|
|
4685
|
-
exit(1)
|
|
4686
|
-
end
|
|
4687
|
-
|
|
4688
|
-
begin
|
|
4689
|
-
login_content = options["login"]
|
|
4690
|
-
app_dir = options["app_dir"]
|
|
4691
|
-
cmd = options["cmd"]
|
|
4692
|
-
volume_from = options["volume"]
|
|
4693
|
-
|
|
4694
|
-
image_settings = {
|
|
4695
|
-
'Image' => "#{image_name}:latest",
|
|
4696
|
-
|
|
4697
|
-
'Cmd' => cmd,
|
|
4698
|
-
'WorkingDir' => app_dir,
|
|
4699
|
-
'ExposedPorts' => {
|
|
4700
|
-
'8888/tcp' => {},
|
|
4701
|
-
},
|
|
4702
|
-
'HostConfig' => {
|
|
4703
|
-
'Binds' => ["/var/run/docker.sock:/var/run/docker.sock", "/usr/bin/docker:/usr/bin/docker"],
|
|
4704
|
-
'PortBindings' => {
|
|
4705
|
-
'8888/tcp' => [
|
|
4706
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4707
|
-
],
|
|
4708
|
-
'6006/tcp' => [
|
|
4709
|
-
{'HostPort' => "#{tensport}", 'HostIp' => 'localhost'}
|
|
4710
|
-
],
|
|
4711
|
-
},
|
|
4712
|
-
},
|
|
4713
|
-
}
|
|
4714
|
-
container = Docker::Container.create(image_settings)
|
|
4715
|
-
container.start()
|
|
4716
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4717
|
-
container.exec(command, tty: true)
|
|
4718
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg"]
|
|
4719
|
-
# container.exec(command, tty: true)
|
|
4720
|
-
# command = ["/bin/bash", "-lc", "mkdir /home/ds/.cnvrg/tmp"]
|
|
4721
|
-
# container.exec(command, tty: true)
|
|
4722
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4723
|
-
container.exec(command, tty: true)
|
|
4724
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4725
|
-
container.exec(command, tty: true)
|
|
4726
|
-
say "#{container.id}:#{port}##{tensport}"
|
|
4727
|
-
rescue => e
|
|
4728
|
-
puts e
|
|
4729
|
-
if e.message.include? "is not running"
|
|
4730
|
-
return config_remote(image_name, port - 1, tensport - 1)
|
|
4731
|
-
end
|
|
4732
|
-
|
|
4733
|
-
if container
|
|
4734
|
-
container.kill()
|
|
4735
|
-
end
|
|
4736
|
-
return false
|
|
4737
|
-
end
|
|
4738
|
-
end
|
|
4739
|
-
|
|
4740
|
-
|
|
4741
|
-
desc '', '', :hide => true
|
|
4742
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4743
|
-
|
|
4744
|
-
def config_netrc(container)
|
|
4745
|
-
|
|
4746
|
-
login_content = options["login"]
|
|
4747
|
-
|
|
4748
|
-
container = Docker::Container.get(container)
|
|
4749
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4750
|
-
container.exec(command, tty: true)
|
|
4751
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4752
|
-
container.exec(command, tty: true)
|
|
4753
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4754
|
-
container.exec(command, tty: true)
|
|
4755
|
-
say "OK"
|
|
4756
|
-
|
|
4757
|
-
end
|
|
4758
|
-
|
|
4759
|
-
desc '', '', :hide => true
|
|
4760
|
-
method_option :login, :type => :string, :aliases => ["-l", "--l"], :default => ""
|
|
4761
|
-
method_option :app_dir, :type => :string, :aliases => ["-d", "--d"], :default => "/home/ds/notebooks"
|
|
4762
|
-
method_option :cmd, :type => :string, :aliases => ["-c", "--c"], :default => "/usr/local/cnvrg/run_ipython.sh"
|
|
4763
|
-
|
|
4764
|
-
|
|
4765
|
-
def config_remote_gpu(image_name, port = 7654, tensport = 6006)
|
|
4766
|
-
local_images = Docker::Image.all
|
|
4767
|
-
|
|
4768
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4769
|
-
if docker_image_local.empty?
|
|
4770
|
-
say "no image"
|
|
4771
|
-
exit(1)
|
|
4772
|
-
end
|
|
4773
|
-
|
|
4774
|
-
begin
|
|
4775
|
-
login_content = options["login"]
|
|
4776
|
-
app_dir = options["app_dir"]
|
|
4777
|
-
cmd = options["cmd"]
|
|
4778
|
-
|
|
4779
|
-
# image_settings = {
|
|
4780
|
-
# 'Image' => "#{image_name}:latest",
|
|
4781
|
-
# 'User' => 'ds',
|
|
4782
|
-
# 'Cmd' => cmd,
|
|
4783
|
-
# 'WorkingDir' => app_dir,
|
|
4784
|
-
# 'ExposedPorts' => {
|
|
4785
|
-
# '8888/tcp' => {},
|
|
4786
|
-
# },
|
|
4787
|
-
# 'HostConfig' => {
|
|
4788
|
-
# 'PortBindings' => {
|
|
4789
|
-
# '8888/tcp' => [
|
|
4790
|
-
# {'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4791
|
-
# ],
|
|
4792
|
-
# '6006/tcp' => [
|
|
4793
|
-
# {'HostPort' => "6006", 'HostIp' => 'localhost'}
|
|
4794
|
-
# ],
|
|
4795
|
-
# },
|
|
4796
|
-
# },
|
|
4797
|
-
# }
|
|
4798
|
-
|
|
4799
|
-
container_id = `nvidia-docker run -itd -p #{port}:8888 -p #{tensport}:6006 -w #{app_dir} -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker #{image_name}:latest #{cmd} `
|
|
4800
|
-
container_id = container_id.gsub("\n", "")
|
|
4801
|
-
container = Docker::Container.get(container_id)
|
|
4802
|
-
# container.start()
|
|
4803
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4804
|
-
container.exec(command, tty: true)
|
|
4805
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4806
|
-
container.exec(command, tty: true)
|
|
4807
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4808
|
-
container.exec(command, tty: true)
|
|
4809
|
-
say "#{container.id}:#{port}##{tensport}"
|
|
4810
|
-
rescue => e
|
|
4811
|
-
if e.message.include? "is not running"
|
|
4812
|
-
puts "running asgain with: #{port - 1} #{tensport - 1}"
|
|
4813
|
-
return config_remote_gpu(image_name, port - 1, tensport - 1)
|
|
4814
|
-
end
|
|
4815
|
-
|
|
4816
|
-
if container
|
|
4817
|
-
container.kill()
|
|
4538
|
+
desc 'Collect and send job utilization', '', :hide => true
|
|
4539
|
+
method_option :docker_id, :type => :string, :aliases => ["--docker_id"], :desc => "docker id to watch"
|
|
4540
|
+
method_option :is_on_gpu, :type => :boolean, :aliases => ["--is_on_gpu"], :desc => "is on gpu", :default => true
|
|
4541
|
+
def get_utilization()
|
|
4542
|
+
@exp = Experiment.new(ENV['CNVRG_OWNER'], ENV['CNVRG_PROJECT'], job_id: ENV['CNVRG_JOB_ID'])
|
|
4543
|
+
docker_id = options["docker_id"]
|
|
4544
|
+
while true do
|
|
4545
|
+
sleep 30
|
|
4546
|
+
begin
|
|
4547
|
+
stats = usage_metrics_in_docker(docker_id)
|
|
4548
|
+
if options["is_on_gpu"]
|
|
4549
|
+
gu = gpu_util(take_from_docker: true, docker_id: docker_id)
|
|
4550
|
+
stats['gpu_util'] = gu[0]
|
|
4551
|
+
stats['gpu'] = gu[1]
|
|
4552
|
+
end
|
|
4553
|
+
stats['docker_id'] = docker_id
|
|
4554
|
+
@exp.send_machine_stats [stats] unless stats.empty?
|
|
4555
|
+
rescue => e
|
|
4556
|
+
log_error(e)
|
|
4557
|
+
log_message("Failed to upload ongoing stats, continuing with experiment", Thor::Shell::Color::YELLOW)
|
|
4818
4558
|
end
|
|
4819
|
-
return false
|
|
4820
4559
|
end
|
|
4821
4560
|
end
|
|
4822
4561
|
|
|
4823
|
-
desc '', '', :hide => true
|
|
4824
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4825
|
-
|
|
4826
|
-
def config_flask_remote(image_name, port = 80)
|
|
4827
|
-
local_images = Docker::Image.all
|
|
4828
|
-
|
|
4829
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4830
|
-
if docker_image_local.empty?
|
|
4831
|
-
say "no image"
|
|
4832
|
-
exit(1)
|
|
4833
|
-
end
|
|
4834
|
-
|
|
4835
|
-
begin
|
|
4836
|
-
login_content = options["login"]
|
|
4837
|
-
image_settings = {
|
|
4838
|
-
'Image' => "#{image_name}:latest",
|
|
4839
|
-
'User' => 'ds',
|
|
4840
|
-
'Cmd' => '/usr/local/cnvrg/start_super.sh',
|
|
4841
|
-
'WorkingDir' => '/home/ds/app',
|
|
4842
|
-
'ExposedPorts' => {
|
|
4843
|
-
'80/tcp' => {},
|
|
4844
|
-
},
|
|
4845
|
-
'HostConfig' => {
|
|
4846
|
-
'PortBindings' => {
|
|
4847
|
-
'80/tcp' => [
|
|
4848
|
-
{'HostPort' => "#{port}", 'HostIp' => 'localhost'}
|
|
4849
|
-
],
|
|
4850
|
-
},
|
|
4851
|
-
},
|
|
4852
|
-
}
|
|
4853
|
-
container = Docker::Container.create(image_settings)
|
|
4854
|
-
container.start()
|
|
4855
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4856
|
-
container.exec(command, tty: true)
|
|
4857
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4858
|
-
container.exec(command, tty: true)
|
|
4859
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4860
|
-
container.exec(command, tty: true)
|
|
4861
|
-
say "#{container.id}:#{port}"
|
|
4862
|
-
rescue => e
|
|
4863
|
-
pus e
|
|
4864
|
-
if e.message.include? "is not running"
|
|
4865
|
-
return "port is taken"
|
|
4866
|
-
end
|
|
4867
|
-
puts "error"
|
|
4868
|
-
if container
|
|
4869
|
-
container.kill()
|
|
4870
|
-
end
|
|
4871
|
-
return false
|
|
4872
|
-
end
|
|
4873
|
-
end
|
|
4874
|
-
|
|
4875
|
-
desc '', '', :hide => true
|
|
4876
|
-
method_option :login, :type => :string, :aliases => ["-l"], :default => ""
|
|
4877
|
-
|
|
4878
|
-
def config_flask_remote_gpu(image_name, port = 80)
|
|
4879
|
-
local_images = Docker::Image.all
|
|
4880
|
-
|
|
4881
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.eql? "#{image_name}:latest"}.flatten
|
|
4882
|
-
if docker_image_local.empty?
|
|
4883
|
-
say "no image"
|
|
4884
|
-
exit(1)
|
|
4885
|
-
end
|
|
4886
|
-
|
|
4887
|
-
begin
|
|
4888
|
-
login_content = options["login"]
|
|
4889
|
-
container_id = `nvidia-docker run -itd -p 80:80 -w /home/ds/app #{image_name}:latest /usr/local/cnvrg/start_super.sh`
|
|
4890
|
-
container_id = container_id.gsub("\n", "")
|
|
4891
|
-
container = Docker::Container.get(container_id)
|
|
4892
|
-
command = ["/bin/bash", "-lc", "sudo echo -e \"#{login_content}\" >/home/ds/.netrc"]
|
|
4893
|
-
container.exec(command, tty: true)
|
|
4894
|
-
command = ["/bin/bash", "-lc", "sudo chown -R ds:ds /home/ds/.netrc"]
|
|
4895
|
-
container.exec(command, tty: true)
|
|
4896
|
-
command = ["/bin/bash", "-lc", "sudo chmod 0600 /home/ds/.netrc"]
|
|
4897
|
-
container.exec(command, tty: true)
|
|
4898
|
-
say "#{container.id}:#{port}"
|
|
4899
|
-
rescue => e
|
|
4900
|
-
puts e
|
|
4901
|
-
if e.message.include? "is not running"
|
|
4902
|
-
return "port is taken"
|
|
4903
|
-
end
|
|
4904
|
-
puts "error"
|
|
4905
|
-
if container
|
|
4906
|
-
container.kill()
|
|
4907
|
-
end
|
|
4908
|
-
return false
|
|
4909
|
-
end
|
|
4910
|
-
end
|
|
4911
4562
|
|
|
4912
4563
|
desc '', '', :hide => true
|
|
4913
4564
|
|
|
@@ -4933,39 +4584,10 @@ module Cnvrg
|
|
|
4933
4584
|
|
|
4934
4585
|
end
|
|
4935
4586
|
|
|
4936
|
-
desc '
|
|
4937
|
-
|
|
4938
|
-
|
|
4939
|
-
|
|
4940
|
-
method_option :gpu, :type => :boolean, :aliases => ["-g","--gpu"], :default => false
|
|
4941
|
-
def upload_image(image_name,image_path)
|
|
4942
|
-
begin
|
|
4943
|
-
verify_logged_in(false)
|
|
4944
|
-
log_start(__method__, args, options)
|
|
4945
|
-
|
|
4946
|
-
@image = Cnvrg::Images.new()
|
|
4947
|
-
say "Uploading new docker image file", Thor::Shell::Color::BLUE
|
|
4948
|
-
workdir = options[:workdir]
|
|
4949
|
-
description = options[:description]
|
|
4950
|
-
user = options[:user]
|
|
4951
|
-
is_gpu = options[:gpu]
|
|
4952
|
-
res = @image.upload_docker_image(image_path, image_name, workdir, user, description, is_gpu)
|
|
4953
|
-
if res["status"] == 200
|
|
4954
|
-
image_slug = res["id"]
|
|
4955
|
-
owner = CLI.get_owner
|
|
4956
|
-
image_url = "#{Cnvrg::Helpers.remote_url}/#{owner}/settings/images/#{image_slug}"
|
|
4957
|
-
log_message("Successfully uploaded image: #{image_url}", Thor::Shell::Color::GREEN, true)
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
else
|
|
4961
|
-
log_message("Couldn't upload image: #{image_name}", Thor::Shell::Color::RED, true)
|
|
4962
|
-
|
|
4963
|
-
end
|
|
4964
|
-
rescue => e
|
|
4965
|
-
log_error(e)
|
|
4966
|
-
end
|
|
4967
|
-
|
|
4968
|
-
|
|
4587
|
+
desc 'file_exists', description: '', hide: true
|
|
4588
|
+
def file_exists(file)
|
|
4589
|
+
exit(0) if File.exists? file
|
|
4590
|
+
exit(1)
|
|
4969
4591
|
end
|
|
4970
4592
|
|
|
4971
4593
|
|
|
@@ -5145,29 +4767,40 @@ module Cnvrg
|
|
|
5145
4767
|
method_option :project_slug, :type => :string, :aliases => ["-s"], :desc => "project slug"
|
|
5146
4768
|
method_option :project_owner, :type => :string, :aliases => ["-o"], :desc => "project slug"
|
|
5147
4769
|
method_option :frequency, :type => :numeric, :aliases => ["-f"], :desc => "poll frequency"
|
|
4770
|
+
method_option :fetch_slugs, :type => :boolean, :default => false, :desc => "Fetch experiments slugs to compare"
|
|
5148
4771
|
|
|
5149
4772
|
def compare_experiments
|
|
5150
4773
|
verify_logged_in(true)
|
|
5151
4774
|
log_start(__method__, args, options)
|
|
5152
4775
|
exps_map = {}
|
|
4776
|
+
copied_commits = []
|
|
5153
4777
|
|
|
5154
|
-
if options[:slugs].blank?
|
|
4778
|
+
if options[:slugs].blank? and options[:fetch_slugs].blank?
|
|
5155
4779
|
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
5156
4780
|
return false
|
|
5157
4781
|
end
|
|
5158
|
-
|
|
5159
|
-
|
|
5160
|
-
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
5161
|
-
return false
|
|
4782
|
+
if options[:slugs].present?
|
|
4783
|
+
slugs = options[:slugs].split(",")
|
|
5162
4784
|
end
|
|
4785
|
+
|
|
5163
4786
|
frequency = options[:frequency] || 5
|
|
5164
4787
|
namespace = options[:namespace]
|
|
5165
4788
|
project_dir = is_cnvrg_dir(Dir.pwd)
|
|
5166
4789
|
@project = Project.new(project_home=project_dir, slug: options[:project_slug], owner: options[:project_owner])
|
|
4790
|
+
fetch_slugs = options[:fetch_slugs]
|
|
4791
|
+
webapp_slug = ENV["CNVRG_JOB_ID"]
|
|
4792
|
+
if fetch_slugs and webapp_slug.present?
|
|
4793
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug)
|
|
4794
|
+
end
|
|
4795
|
+
if slugs.blank?
|
|
4796
|
+
log_message("No experiments slugs given", Thor::Shell::Color::RED)
|
|
4797
|
+
return false
|
|
4798
|
+
end
|
|
5167
4799
|
|
|
4800
|
+
log_message("compare is running")
|
|
5168
4801
|
while true
|
|
4802
|
+
log_message("compare is running for slugs #{slugs}")
|
|
5169
4803
|
slugs.each do |exp_slug|
|
|
5170
|
-
|
|
5171
4804
|
begin
|
|
5172
4805
|
if exps_map[exp_slug].blank?
|
|
5173
4806
|
exp = @project.get_experiment(exp_slug)["experiment"]
|
|
@@ -5181,15 +4814,23 @@ module Cnvrg
|
|
|
5181
4814
|
log_message("#{exp_name} has ended, getting files from end commit", Thor::Shell::Color::BLUE)
|
|
5182
4815
|
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project)
|
|
5183
4816
|
exps_map[exp_slug] = exp
|
|
5184
|
-
|
|
4817
|
+
else
|
|
5185
4818
|
log_message("#{exp_name} is running should get logs", Thor::Shell::Color::BLUE)
|
|
5186
|
-
Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
|
4819
|
+
success = Cnvrg::Helpers.get_experiment_events_log_via_kubectl(exp, namespace)
|
|
4820
|
+
if !success and exp["last_successful_commit"].present? and !copied_commits.include?(exp["last_successful_commit"])
|
|
4821
|
+
log_message("Failed to get kube files, using last commit", Thor::Shell::Color::BLUE)
|
|
4822
|
+
Cnvrg::Helpers.get_experiment_events_log_from_server(exp, @project, commit: exp["last_successful_commit"])
|
|
4823
|
+
copied_commits << exp["last_successful_commit"]
|
|
4824
|
+
end
|
|
5187
4825
|
end
|
|
5188
4826
|
rescue => e
|
|
5189
4827
|
Cnvrg::Logger.log_error(e)
|
|
5190
4828
|
end
|
|
5191
4829
|
end
|
|
5192
4830
|
sleep frequency
|
|
4831
|
+
if fetch_slugs
|
|
4832
|
+
slugs = @project.fetch_webapp_slugs(webapp_slug, slugs: slugs)
|
|
4833
|
+
end
|
|
5193
4834
|
end
|
|
5194
4835
|
end
|
|
5195
4836
|
|
|
@@ -5275,127 +4916,6 @@ module Cnvrg
|
|
|
5275
4916
|
end
|
|
5276
4917
|
|
|
5277
4918
|
|
|
5278
|
-
desc 'pull_image', 'downloads and loads an image', :hide => true
|
|
5279
|
-
|
|
5280
|
-
def pull_image(image_name)
|
|
5281
|
-
begin
|
|
5282
|
-
verify_logged_in(false)
|
|
5283
|
-
log_start(__method__, args, options)
|
|
5284
|
-
owner = Cnvrg::CLI.get_owner()
|
|
5285
|
-
image = Cnvrg::Images.image_exist(owner, image_name)
|
|
5286
|
-
if !image
|
|
5287
|
-
log_message("Couldn't find image in cnvrg repository", Thor::Shell::Color::RED)
|
|
5288
|
-
exit(1)
|
|
5289
|
-
end
|
|
5290
|
-
path = download_image(image_name, image["slug"])
|
|
5291
|
-
if path
|
|
5292
|
-
log_message("Building image", Thor::Shell::Color::BLUE)
|
|
5293
|
-
Docker.options[:read_timeout] = 216000
|
|
5294
|
-
image = Docker::Image.build_from_dir(path, {'dockerfile' => 'Dockerfile.cpu', 't' => "#{image_name}:latest"}) do |v|
|
|
5295
|
-
begin
|
|
5296
|
-
if (log = JSON.parse(v)) && log.has_key?("stream")
|
|
5297
|
-
next if log["stream"].starts_with? "Step"
|
|
5298
|
-
$stdout.puts log["stream"]
|
|
5299
|
-
end
|
|
5300
|
-
rescue
|
|
5301
|
-
end
|
|
5302
|
-
|
|
5303
|
-
end
|
|
5304
|
-
|
|
5305
|
-
if not image.nil?
|
|
5306
|
-
FileUtils.rm_rf(path)
|
|
5307
|
-
checks = Helpers.checkmark()
|
|
5308
|
-
log_message("#{checks} Image built successfully", Thor::Shell::Color::GREEN)
|
|
5309
|
-
return image
|
|
5310
|
-
else
|
|
5311
|
-
|
|
5312
|
-
log_message("Could not build image", Thor::Shell::Color::RED)
|
|
5313
|
-
return false
|
|
5314
|
-
end
|
|
5315
|
-
else
|
|
5316
|
-
|
|
5317
|
-
log_message("Could not download image", Thor::Shell::Color::RED)
|
|
5318
|
-
return false
|
|
5319
|
-
|
|
5320
|
-
|
|
5321
|
-
end
|
|
5322
|
-
|
|
5323
|
-
# else
|
|
5324
|
-
# path = download_image(image_name,image["slug"])
|
|
5325
|
-
# if path
|
|
5326
|
-
# image = Docker::Image.import(path)
|
|
5327
|
-
# image.tag('repo' => image_name, 'tag' => 'latest')
|
|
5328
|
-
# if not image.nil?
|
|
5329
|
-
# say "Finished downloading image, cleaning up..", Thor::Shell::Color::GREEN
|
|
5330
|
-
# FileUtils.rm(path)
|
|
5331
|
-
# checks = Helpers.checkmark()
|
|
5332
|
-
# say "#{checks} Done", Thor::Shell::Color::GREEN
|
|
5333
|
-
# log_end(0)
|
|
5334
|
-
# return image
|
|
5335
|
-
# log_end(0)
|
|
5336
|
-
# else
|
|
5337
|
-
# say "Could not download image", Thor::Shell::Color::RED
|
|
5338
|
-
# return false
|
|
5339
|
-
# end
|
|
5340
|
-
#
|
|
5341
|
-
# end
|
|
5342
|
-
# end
|
|
5343
|
-
rescue => e
|
|
5344
|
-
|
|
5345
|
-
log_message "Error: couldn't build image", Thor::Shell::Color::RED
|
|
5346
|
-
log_error(e)
|
|
5347
|
-
|
|
5348
|
-
rescue SignalException
|
|
5349
|
-
say "\nAborting"
|
|
5350
|
-
exit(1)
|
|
5351
|
-
ensure
|
|
5352
|
-
if path
|
|
5353
|
-
FileUtils.rm_rf(path)
|
|
5354
|
-
|
|
5355
|
-
end
|
|
5356
|
-
end
|
|
5357
|
-
|
|
5358
|
-
|
|
5359
|
-
end
|
|
5360
|
-
|
|
5361
|
-
desc 'set_image', 'set image to a porject', :hide => true
|
|
5362
|
-
|
|
5363
|
-
def set_image(docker_image)
|
|
5364
|
-
verify_logged_in(true)
|
|
5365
|
-
log_start(__method__, args, options)
|
|
5366
|
-
working_dir = is_cnvrg_dir
|
|
5367
|
-
project = Project.new(working_dir)
|
|
5368
|
-
|
|
5369
|
-
local_images = Docker::Image.all
|
|
5370
|
-
docker_image_local = local_images.map {|x| x.info["RepoTags"]}.flatten.select {|y| y.include? docker_image}.flatten
|
|
5371
|
-
if docker_image_local.size == 0
|
|
5372
|
-
|
|
5373
|
-
if yes? "Image wasn't found locally, pull image from cnvrg repository?", Thor::Shell::Color::YELLOW
|
|
5374
|
-
image = pull(docker_image)
|
|
5375
|
-
if image
|
|
5376
|
-
log_message("downloaded image: #{docker_image}", Thor::Shell::Color::BLUE)
|
|
5377
|
-
@image = Images.new(working_dir, docker_image)
|
|
5378
|
-
else
|
|
5379
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
|
5380
|
-
exit(1)
|
|
5381
|
-
end
|
|
5382
|
-
else
|
|
5383
|
-
log_message("Could not create a new project with docker, image was not found", Thor::Shell::Color::RED)
|
|
5384
|
-
exit(1)
|
|
5385
|
-
|
|
5386
|
-
end
|
|
5387
|
-
elsif docker_image_local.size == 1
|
|
5388
|
-
log_message("found image: #{docker_image_local[0]}, setting it up..", Thor::Shell::Color::BLUE)
|
|
5389
|
-
@image = Images.new(working_dir, docker_image_local[0])
|
|
5390
|
-
elsif docker_image_local.size > 1
|
|
5391
|
-
log_message("found #{docker_image_local.size} images, choose the image name you want to use", Thor::Shell::Color::BLUE)
|
|
5392
|
-
image_name = ask "#{docker_image_local.join("\n")}\n", Thor::Shell::Color::BLUE
|
|
5393
|
-
image_name = image_name.strip
|
|
5394
|
-
@image = Images.new(working_dir, image_name)
|
|
5395
|
-
end
|
|
5396
|
-
@image.update_image_activity(project.last_local_commit, nil)
|
|
5397
|
-
end
|
|
5398
|
-
|
|
5399
4919
|
desc 'check_pod_restart', 'Check pod restart', :hide => true
|
|
5400
4920
|
def check_pod_restart
|
|
5401
4921
|
Cnvrg::CLI.new.log_start(__method__, args, options)
|
|
@@ -5670,7 +5190,7 @@ module Cnvrg
|
|
|
5670
5190
|
|
|
5671
5191
|
if dirs.size == 0
|
|
5672
5192
|
log_message("Couldn't find cnvrg directory. Please start a new project", Thor::Shell::Color::RED)
|
|
5673
|
-
|
|
5193
|
+
puts Thread.current.backtrace
|
|
5674
5194
|
exit(1)
|
|
5675
5195
|
end
|
|
5676
5196
|
return dirs.join("/")
|
|
@@ -5773,7 +5293,7 @@ module Cnvrg
|
|
|
5773
5293
|
is_cnvrg = is_cnvrg_dir
|
|
5774
5294
|
if !is_cnvrg
|
|
5775
5295
|
say "You're not in a cnvrg project directory", Thor::Shell::Color::RED
|
|
5776
|
-
exit(
|
|
5296
|
+
exit(1)
|
|
5777
5297
|
end
|
|
5778
5298
|
|
|
5779
5299
|
end
|
|
@@ -5919,21 +5439,6 @@ module Cnvrg
|
|
|
5919
5439
|
|
|
5920
5440
|
end
|
|
5921
5441
|
|
|
5922
|
-
def container_changes(dir)
|
|
5923
|
-
container_id = is_project_with_docker(dir)
|
|
5924
|
-
if not container_id
|
|
5925
|
-
return false
|
|
5926
|
-
end
|
|
5927
|
-
container = Docker::Container.get(container_id)
|
|
5928
|
-
command = ['/bin/bash', '-lc', '/opt/ds/bin/pip freeze']
|
|
5929
|
-
pip = container.exec(command, tty: true)[0]
|
|
5930
|
-
command = ["/bin/bash", "-lc", "dpkg -l"]
|
|
5931
|
-
dpkg = container.exec(command, tty: true)[0]
|
|
5932
|
-
command = ["/bin/bash", "-lc", "cat /home/ds/.bash_history"]
|
|
5933
|
-
history = container.exec(command, tty: true)[0]
|
|
5934
|
-
diff = [pip, dpkg, history]
|
|
5935
|
-
return diff
|
|
5936
|
-
end
|
|
5937
5442
|
|
|
5938
5443
|
def is_port_taken(ip = Cnvrg::CLI::IP, port = Cnvrg::CLI::PORT, seconds = 1)
|
|
5939
5444
|
Timeout::timeout(seconds) do
|
|
@@ -6116,13 +5621,17 @@ module Cnvrg
|
|
|
6116
5621
|
|
|
6117
5622
|
end
|
|
6118
5623
|
|
|
6119
|
-
def gpu_util
|
|
5624
|
+
def gpu_util(take_from_docker: false, docker_id: nil)
|
|
6120
5625
|
if !Helpers.ubuntu?
|
|
6121
5626
|
return 0.0
|
|
6122
5627
|
end
|
|
6123
5628
|
stats = [[],[]]
|
|
6124
5629
|
begin
|
|
6125
|
-
|
|
5630
|
+
if take_from_docker
|
|
5631
|
+
gpu_stats = `docker exec -it #{docker_id} sh -c 'nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv'`
|
|
5632
|
+
else
|
|
5633
|
+
gpu_stats = `nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv`
|
|
5634
|
+
end
|
|
6126
5635
|
|
|
6127
5636
|
if !gpu_stats.nil?
|
|
6128
5637
|
gpu_stats = gpu_stats.split("\n")[1..-1]
|