cnvrg 1.9.9.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/cnvrg +9 -0
- data/cnvrg.gemspec +47 -0
- data/lib/cnvrg.rb +7 -0
- data/lib/cnvrg/Images.rb +351 -0
- data/lib/cnvrg/api.rb +247 -0
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/auth.rb +79 -0
- data/lib/cnvrg/cli.rb +5715 -0
- data/lib/cnvrg/cli/flow.rb +166 -0
- data/lib/cnvrg/cli/library_cli.rb +33 -0
- data/lib/cnvrg/cli/subcommand.rb +28 -0
- data/lib/cnvrg/cli/task.rb +116 -0
- data/lib/cnvrg/colors.rb +8 -0
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +335 -0
- data/lib/cnvrg/datafiles.rb +1325 -0
- data/lib/cnvrg/dataset.rb +892 -0
- data/lib/cnvrg/downloader/client.rb +101 -0
- data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
- data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
- data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
- data/lib/cnvrg/experiment.rb +209 -0
- data/lib/cnvrg/files.rb +1047 -0
- data/lib/cnvrg/flow.rb +137 -0
- data/lib/cnvrg/helpers.rb +422 -0
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +213 -0
- data/lib/cnvrg/hyper.rb +21 -0
- data/lib/cnvrg/image.rb +113 -0
- data/lib/cnvrg/image_cli.rb +25 -0
- data/lib/cnvrg/job_cli.rb +73 -0
- data/lib/cnvrg/job_ssh.rb +48 -0
- data/lib/cnvrg/logger.rb +111 -0
- data/lib/cnvrg/org_helpers.rb +5 -0
- data/lib/cnvrg/project.rb +822 -0
- data/lib/cnvrg/result.rb +29 -0
- data/lib/cnvrg/runner.rb +49 -0
- data/lib/cnvrg/ssh.rb +94 -0
- data/lib/cnvrg/storage.rb +128 -0
- data/lib/cnvrg/task.rb +165 -0
- data/lib/cnvrg/version.rb +3 -0
- metadata +460 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
|
2
|
+
module Cnvrg
|
3
|
+
module Downloader
|
4
|
+
OLD_SERVER_VERSION_MESSAGE = "Your server version is not relevant for this cli version please contact support for further help."
|
5
|
+
MAXIMUM_BACKOFF = 64
|
6
|
+
RETRIES = ENV['UPLOAD_FILE_RETRIES'].try(:to_i) || 20
|
7
|
+
attr_accessor :bucket, :client
|
8
|
+
class Client
|
9
|
+
def initialize(params)
|
10
|
+
@key = ''
|
11
|
+
@iv = ''
|
12
|
+
@client = ''
|
13
|
+
@bucket = ''
|
14
|
+
end
|
15
|
+
|
16
|
+
def extract_key_iv(sts_path)
|
17
|
+
sts = open(sts_path).read rescue nil
|
18
|
+
raise StandardError.new("Cant open sts") if sts.blank?
|
19
|
+
sts.split("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
def cut_prefix(prefix, file)
|
23
|
+
file.gsub(prefix, '').gsub(/^\/*/, '')
|
24
|
+
end
|
25
|
+
|
26
|
+
def download(storage_path, local_path)
|
27
|
+
### need to be implemented..
|
28
|
+
end
|
29
|
+
|
30
|
+
def upload(storage_path, local_path)
|
31
|
+
### need to be implemented..
|
32
|
+
end
|
33
|
+
|
34
|
+
def mkdir(path, recursive: false)
|
35
|
+
recursive ? FileUtils.mkdir_p(path) : FileUtils.mkdir(path)
|
36
|
+
end
|
37
|
+
|
38
|
+
def prepare_download(local_path)
|
39
|
+
mkdir(File.dirname(local_path), recursive: true)
|
40
|
+
end
|
41
|
+
|
42
|
+
def decrypt(str)
|
43
|
+
Cnvrg::Helpers.decrypt(@key, @iv, str)
|
44
|
+
end
|
45
|
+
|
46
|
+
def safe_upload(storage_path, local_path)
|
47
|
+
n = 1
|
48
|
+
error = nil
|
49
|
+
while n <= RETRIES
|
50
|
+
begin
|
51
|
+
self.upload(storage_path, local_path)
|
52
|
+
error = nil
|
53
|
+
break
|
54
|
+
rescue => e
|
55
|
+
backoff_time_seconds = backoff_time(n)
|
56
|
+
|
57
|
+
message = "Got error: #{e.class.name} with message: #{e.message} while uploading a single file: #{local_path}, retry: #{n} of: #{RETRIES}"
|
58
|
+
if n < RETRIES
|
59
|
+
message += ", next retry in: #{backoff_time_seconds} seconds"
|
60
|
+
else
|
61
|
+
message += ", done retry, continuing to the next file"
|
62
|
+
end
|
63
|
+
Cnvrg::Logger.log_error_message(message)
|
64
|
+
|
65
|
+
sleep backoff_time_seconds
|
66
|
+
|
67
|
+
n += 1
|
68
|
+
error = e
|
69
|
+
end
|
70
|
+
end
|
71
|
+
raise error if error.present?
|
72
|
+
true
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.factory(params)
|
76
|
+
params = params.as_json
|
77
|
+
case params["storage"]
|
78
|
+
when 's3', 'minio'
|
79
|
+
return Cnvrg::Downloader::Clients::S3Client.new(sts_path: params["path_sts"], access_key: params["sts_a"], secret: params["sts_s"], session_token: params["sts_st"], region: params["region"], bucket: params["bucket"], encryption: params["encryption"], endpoint: params["endpoint"], storage: params["storage"])
|
80
|
+
when 'azure'
|
81
|
+
azure_params = params.symbolize_keys.slice(*[:storage_account_name, :storage_access_key, :container, :sts])
|
82
|
+
return Cnvrg::Downloader::Clients::AzureClient.new(**azure_params)
|
83
|
+
when 'gcp'
|
84
|
+
return Cnvrg::Downloader::Clients::GcpClient.new(project_id: params["project_id"], credentials: params["credentials"], bucket_name: params["bucket_name"], sts: params["sts"])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def random_number_milliseconds
|
91
|
+
rand(1000) / 1000.0
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
def backoff_time(n)
|
96
|
+
return [((2**n)+random_number_milliseconds), MAXIMUM_BACKOFF].min
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'azure/storage/blob'
|
2
|
+
|
3
|
+
module Cnvrg
|
4
|
+
module Downloader
|
5
|
+
module Clients
|
6
|
+
class AzureClient < Client
|
7
|
+
def initialize(storage_account_name: nil, storage_access_key: nil, container: nil, sts: nil)
|
8
|
+
@key, @iv = extract_key_iv(sts)
|
9
|
+
@account_name = Cnvrg::Helpers.decrypt(@key, @iv, storage_account_name)
|
10
|
+
@access_key = Cnvrg::Helpers.decrypt(@key, @iv, storage_access_key)
|
11
|
+
@container = Cnvrg::Helpers.decrypt(@key, @iv, container)
|
12
|
+
end
|
13
|
+
|
14
|
+
def download(storage_path, local_path, decrypt: true)
|
15
|
+
prepare_download(local_path)
|
16
|
+
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
17
|
+
blob, content = client.get_blob(@container, storage_path)
|
18
|
+
::File.open(local_path, 'wb') {|f| f.write(content)}
|
19
|
+
blob
|
20
|
+
end
|
21
|
+
|
22
|
+
def upload(storage_path, local_path)
|
23
|
+
begin
|
24
|
+
client.create_block_blob(@container, storage_path, File.open(local_path, "rb"))
|
25
|
+
rescue => e
|
26
|
+
raise e
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def fetch_files(prefix: nil, marker: nil, limit: 10000)
|
31
|
+
blobs = client.list_blobs(@container, prefix: prefix, max_results: limit, marker: marker)
|
32
|
+
next_marker = blobs.continuation_token
|
33
|
+
files = blobs.map{|x| x.name}
|
34
|
+
[files, next_marker]
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
private
|
39
|
+
def client
|
40
|
+
Azure::Storage::Blob::BlobService.create(storage_account_name: @account_name, storage_access_key: @access_key)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require "google/cloud/storage"
|
2
|
+
|
3
|
+
module Cnvrg
|
4
|
+
module Downloader
|
5
|
+
module Clients
|
6
|
+
class GcpClient < Client
|
7
|
+
def initialize(project_id: nil, credentials: nil, bucket_name: nil, sts: nil)
|
8
|
+
@key, @iv = extract_key_iv(sts)
|
9
|
+
@project_id = Cnvrg::Helpers.decrypt(@key, @iv, project_id)
|
10
|
+
@credentials_path = Cnvrg::Helpers.decrypt(@key, @iv, credentials)
|
11
|
+
@tempfile = nil
|
12
|
+
@bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket_name)
|
13
|
+
init_gcp_credentials
|
14
|
+
@storage = Google::Cloud::Storage.new(project_id: @project_id, credentials: @credentials, retries: 20)
|
15
|
+
@bucket = @storage.bucket(@bucket_name)
|
16
|
+
@bucket.name
|
17
|
+
rescue => e
|
18
|
+
Cnvrg::Logger.log_error(e)
|
19
|
+
Cnvrg::Logger.log_info("Tried to init gcp client without success.")
|
20
|
+
Cnvrg::CLI.log_message("Cannot init client. please contact support to check your bucket credentials.")
|
21
|
+
exit(1)
|
22
|
+
end
|
23
|
+
|
24
|
+
def init_gcp_credentials
|
25
|
+
t = Tempfile.new
|
26
|
+
f = open(@credentials_path).read
|
27
|
+
t.binmode
|
28
|
+
t.write(f)
|
29
|
+
t.rewind
|
30
|
+
@credentials = t.path
|
31
|
+
@tempfile = t
|
32
|
+
end
|
33
|
+
|
34
|
+
def download(storage_path, local_path)
|
35
|
+
prepare_download(local_path)
|
36
|
+
file = @bucket.file(decrypt(storage_path))
|
37
|
+
file.download local_path
|
38
|
+
end
|
39
|
+
|
40
|
+
def upload(storage_path, local_path)
|
41
|
+
begin
|
42
|
+
@bucket.create_file(local_path, storage_path)
|
43
|
+
rescue => e
|
44
|
+
raise e
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
module Downloader
|
3
|
+
module Clients
|
4
|
+
class S3Client < Client
|
5
|
+
def initialize(sts_path: nil, access_key: nil, secret: nil, session_token: nil, region: nil, bucket: nil, encryption: nil, endpoint: nil, storage: nil)
|
6
|
+
@key, @iv = extract_key_iv(sts_path)
|
7
|
+
@access_key = Cnvrg::Helpers.decrypt(@key, @iv, access_key)
|
8
|
+
@secret = Cnvrg::Helpers.decrypt(@key, @iv, secret)
|
9
|
+
@session_token = Cnvrg::Helpers.decrypt(@key, @iv, session_token)
|
10
|
+
@region = Cnvrg::Helpers.decrypt(@key, @iv, region)
|
11
|
+
@bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket)
|
12
|
+
@endpoint = Cnvrg::Helpers.decrypt(@key, @iv, endpoint)
|
13
|
+
options = {
|
14
|
+
:access_key_id => @access_key,
|
15
|
+
:secret_access_key => @secret,
|
16
|
+
:session_token => @session_token,
|
17
|
+
:region => @region,
|
18
|
+
:http_open_timeout => 60, :retry_limit => 20
|
19
|
+
}
|
20
|
+
if storage == 'minio'
|
21
|
+
options.delete(:session_token)
|
22
|
+
options = options.merge({
|
23
|
+
:force_path_style => true,
|
24
|
+
:ssl_verify_peer => false,
|
25
|
+
:endpoint => @endpoint,
|
26
|
+
})
|
27
|
+
end
|
28
|
+
|
29
|
+
@options = options
|
30
|
+
|
31
|
+
#@client = Aws::S3::Client.new(options)
|
32
|
+
#@bucket = Aws::S3::Resource.new(client: @client).bucket(@bucket_name)
|
33
|
+
@upload_options = {:use_accelerate_endpoint => storage == 's3'}
|
34
|
+
if encryption.present?
|
35
|
+
@upload_options[:server_side_encryption] = encryption
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def download(storage_path, local_path, decrypt: true)
|
40
|
+
prepare_download(local_path)
|
41
|
+
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
42
|
+
resp = nil
|
43
|
+
File.open(local_path, 'w+') do |file|
|
44
|
+
resp = aws_client.get_object({bucket: @bucket_name, key: storage_path}, target: file)
|
45
|
+
end
|
46
|
+
resp
|
47
|
+
rescue => e
|
48
|
+
Cnvrg::Logger.log_error(e)
|
49
|
+
raise e
|
50
|
+
end
|
51
|
+
|
52
|
+
def upload(storage_path, local_path)
|
53
|
+
### storage path is the path inside s3 (after the bucket)
|
54
|
+
# local path is fullpath for the file /home/ubuntu/user.../hazilim.py
|
55
|
+
o = aws_bucket.object(storage_path)
|
56
|
+
success = o.upload_file(local_path, @upload_options)
|
57
|
+
return success
|
58
|
+
rescue => e
|
59
|
+
raise e
|
60
|
+
end
|
61
|
+
|
62
|
+
def fetch_files(prefix: nil, marker: nil, limit: 1000)
|
63
|
+
batch_files = aws_bucket.objects(prefix: prefix, marker: marker).first(limit)
|
64
|
+
batch_files.to_a.map(&:key)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def aws_client
|
69
|
+
Aws::S3::Client.new(@options)
|
70
|
+
end
|
71
|
+
|
72
|
+
def aws_bucket
|
73
|
+
Aws::S3::Resource.new(client: aws_client).bucket(@bucket_name)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
module Cnvrg
|
3
|
+
class Experiment
|
4
|
+
attr_reader :slug
|
5
|
+
attr_reader :sync_before_terminate
|
6
|
+
attr_reader :sync_delay_time
|
7
|
+
attr_reader :output_dir
|
8
|
+
|
9
|
+
def initialize(owner, project_slug, job_id: nil)
|
10
|
+
@project_slug = project_slug
|
11
|
+
@owner = owner
|
12
|
+
@command = nil
|
13
|
+
@base_resource = "users/#{owner}/projects/#{project_slug}/"
|
14
|
+
@slug = job_id
|
15
|
+
@sync_before_terminate = nil
|
16
|
+
@sync_delay_time = nil
|
17
|
+
@output_dir = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def as_env
|
21
|
+
return {
|
22
|
+
CNVRG_JOB_ID: @slug,
|
23
|
+
CNVRG_JOB_TYPE: "Experiment",
|
24
|
+
CNVRG_PROJECT: @project_slug,
|
25
|
+
CNVRG_OWNER: @owner,
|
26
|
+
}.as_json
|
27
|
+
end
|
28
|
+
|
29
|
+
def start(input, platform, machine_name, start_commit, name, email_notification, machine_activity,script_path,
|
30
|
+
sync_before_terminate, periodic_sync)
|
31
|
+
|
32
|
+
res = Cnvrg::API.request(@base_resource + "experiment/start", 'POST',
|
33
|
+
{input: input, platform: platform, machine_name: machine_name, start_commit: start_commit,
|
34
|
+
title: name, email_notification: email_notification, machine_activity: machine_activity,script_path:script_path})
|
35
|
+
Cnvrg::CLI.is_response_success(res,false)
|
36
|
+
|
37
|
+
@slug = res.to_h["result"].to_h["slug"]
|
38
|
+
@sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
|
39
|
+
@sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
|
40
|
+
@output_dir = res.to_h["result"].to_h["output_dir"]
|
41
|
+
@command = res.to_h["result"].to_h["command"] rescue nil
|
42
|
+
|
43
|
+
return res
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def start_notebook_session(kernel, start_commit, token, port, remote, notebook_path)
|
48
|
+
res = Cnvrg::API.request(@base_resource + "notebook/start_session", 'POST',
|
49
|
+
{kernel: kernel, start_commit: start_commit,
|
50
|
+
token: token, port: port, remote: remote, notebook_path: notebook_path})
|
51
|
+
Cnvrg::CLI.is_response_success(res)
|
52
|
+
|
53
|
+
@slug = res["result"]["id"]
|
54
|
+
|
55
|
+
|
56
|
+
return @slug
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
def end_notebook_session(notebook_slug)
|
61
|
+
res = Cnvrg::API.request(@base_resource + "notebook/#{notebook_slug}/stop", 'GET')
|
62
|
+
Cnvrg::CLI.is_response_success(res,false)
|
63
|
+
|
64
|
+
return res
|
65
|
+
|
66
|
+
end
|
67
|
+
def update_notebook_slug(proj_dir, slug)
|
68
|
+
begin
|
69
|
+
file = proj_dir+"/.cnvrg/notebook_slug"
|
70
|
+
FileUtils.touch file
|
71
|
+
|
72
|
+
File.open(file, "w+") { |f| f.write slug }
|
73
|
+
rescue
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_notebook_slug(proj_dir)
|
79
|
+
begin
|
80
|
+
notebook_slug = File.open(proj_dir + "/.cnvrg/notebook_slug", "rb").read
|
81
|
+
notebook_slug = notebook_slug.gsub("/n", "")
|
82
|
+
notebook_slug = notebook_slug.to_s.strip
|
83
|
+
return notebook_slug
|
84
|
+
rescue
|
85
|
+
return nil
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
def get_machine_activity(working_dir)
|
91
|
+
begin
|
92
|
+
machine_activity = File.open("#{working_dir}/.cnvrg/machine_activity", "rb").read
|
93
|
+
machine_activity = machine_activity.to_s.strip
|
94
|
+
ma_id = machine_activity.to_i
|
95
|
+
return ma_id
|
96
|
+
rescue
|
97
|
+
return nil
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
def job_log(logs, level: 'info', step: nil, job_type: nil, job_id: nil)
|
104
|
+
logs = [logs].flatten
|
105
|
+
logs.each_slice(10).each do |temp_logs|
|
106
|
+
Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/jobs/experiment/#{@slug}/log", "POST", {job_type: "Experiment", job_id: @slug, logs: temp_logs, log_level: level, step: step, timestamp: Time.now})
|
107
|
+
sleep(1)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def exec_remote(command, commit_to_run, instance_type, image_slug,schedule,local_timestamp, grid,path_to_cmd,data, data_commit,periodic_sync,
|
112
|
+
sync_before_terminate, max_time, ds_sync_options=0,output_dir=nil,data_query=nil,
|
113
|
+
git_commit=nil, git_branch=nil, restart_if_stuck=nil, local_folders=nil,title=nil, datasets=nil, prerun: true, requirements: true, recurring: nil,
|
114
|
+
email_notification_error: false, email_notification_success: false, emails_to_notify: nil)
|
115
|
+
response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/experiment/remote", 'POST', {command: command, image_slug: image_slug,
|
116
|
+
commit_sha1: commit_to_run,
|
117
|
+
instance_type: instance_type,
|
118
|
+
schedule:schedule,
|
119
|
+
local_timestamp:local_timestamp,
|
120
|
+
datasets: datasets,
|
121
|
+
grid: grid,
|
122
|
+
path_to_cmd:path_to_cmd,dataset_slug:data,
|
123
|
+
dataset_commit: data_commit,max_time:max_time,
|
124
|
+
periodic_sync:periodic_sync, sync_before_terminate:sync_before_terminate,
|
125
|
+
dataset_sync_options:ds_sync_options,output_dir:output_dir,
|
126
|
+
dataset_query:data_query,git_commit:git_commit,git_branch:git_branch,
|
127
|
+
restart_if_stuck:restart_if_stuck, local_folders: local_folders, title:title,
|
128
|
+
prerun: prerun, requirements: requirements, recurring: recurring,
|
129
|
+
email_notification_error: email_notification_error, email_notification_success: email_notification_success,
|
130
|
+
emails_to_notify: emails_to_notify})
|
131
|
+
|
132
|
+
return response
|
133
|
+
end
|
134
|
+
def remote_notebook(instance_type, commit, data, data_commit, notebook_type,ds_sync_options=0,data_query=nil, image = nil, datasets = nil)
|
135
|
+
response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/notebook/remote", 'POST', {instance_type: instance_type,dataset_slug:data,
|
136
|
+
dataset_commit: data_commit,image_slug:image,
|
137
|
+
datasets: datasets,
|
138
|
+
commit:commit,notebook_type:notebook_type,dataset_sync_options:ds_sync_options,
|
139
|
+
dataset_query:data_query})
|
140
|
+
return response
|
141
|
+
end
|
142
|
+
|
143
|
+
def upload_temp_log(temp_log)
|
144
|
+
response = Cnvrg::API.request(@base_resource + "experiment/upload_temp_log", 'POST', {output: temp_log,
|
145
|
+
exp_slug: @slug})
|
146
|
+
Cnvrg::CLI.is_response_success(response,false)
|
147
|
+
end
|
148
|
+
|
149
|
+
def send_machine_stats(stats)
|
150
|
+
response = Cnvrg::API.request(@base_resource + "experiment/upload_stats", "POST", {exp_slug: @slug, stats: stats.map{|s| s.merge!({time: Time.now})}})
|
151
|
+
Cnvrg::CLI.is_response_success(response,false)
|
152
|
+
end
|
153
|
+
|
154
|
+
def end(output, exit_status, end_commit, cpu_average, memory_average, end_time: nil)
|
155
|
+
#if remote try to remove
|
156
|
+
tries = 0
|
157
|
+
success = false
|
158
|
+
end_time ||= Time.now
|
159
|
+
while tries < 10 and success.blank?
|
160
|
+
sleep (tries*rand) ** 2 ### exponential backoff
|
161
|
+
## this call is super important so we cant let it crash.
|
162
|
+
|
163
|
+
tries += 1
|
164
|
+
response = Cnvrg::API.request(@base_resource + "experiment/end", 'POST', {output: output, exp_slug: @slug,
|
165
|
+
exit_status: exit_status, end_commit: end_commit,
|
166
|
+
cpu_average: cpu_average, memory_average: memory_average, end_time: end_time})
|
167
|
+
success = Cnvrg::CLI.is_response_success(response,false)
|
168
|
+
end
|
169
|
+
|
170
|
+
begin
|
171
|
+
FileUtils.rm_rf(["/home/ds/.cnvrg/tmp/exec.log"])
|
172
|
+
rescue
|
173
|
+
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def get_cmd
|
178
|
+
return @command
|
179
|
+
end
|
180
|
+
|
181
|
+
def restart_spot_instance
|
182
|
+
|
183
|
+
restart = false
|
184
|
+
begin
|
185
|
+
url = URI.parse('http://169.254.169.254/latest/meta-data/spot/termination-time')
|
186
|
+
req = Net::HTTP::Get.new(url.to_s)
|
187
|
+
res = Net::HTTP.start(url.host, url.port) {|http|
|
188
|
+
http.request(req)
|
189
|
+
}
|
190
|
+
unless res.body.include? "404"
|
191
|
+
restart = true
|
192
|
+
end
|
193
|
+
if res.body.include? "Empty reply from server"
|
194
|
+
restart = false
|
195
|
+
end
|
196
|
+
rescue
|
197
|
+
restart = false
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
return restart
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
def send_restart_request(sha1=nil)
|
206
|
+
Cnvrg::API.request(@base_resource + "experiment/check_spot_instance", 'POST', {exp_slug: @slug, end_commit: sha1})
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|