cnvrg 1.9.9.9.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/cnvrg +9 -0
- data/cnvrg.gemspec +47 -0
- data/lib/cnvrg.rb +7 -0
- data/lib/cnvrg/Images.rb +351 -0
- data/lib/cnvrg/api.rb +247 -0
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/auth.rb +79 -0
- data/lib/cnvrg/cli.rb +5715 -0
- data/lib/cnvrg/cli/flow.rb +166 -0
- data/lib/cnvrg/cli/library_cli.rb +33 -0
- data/lib/cnvrg/cli/subcommand.rb +28 -0
- data/lib/cnvrg/cli/task.rb +116 -0
- data/lib/cnvrg/colors.rb +8 -0
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +335 -0
- data/lib/cnvrg/datafiles.rb +1325 -0
- data/lib/cnvrg/dataset.rb +892 -0
- data/lib/cnvrg/downloader/client.rb +101 -0
- data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
- data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
- data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
- data/lib/cnvrg/experiment.rb +209 -0
- data/lib/cnvrg/files.rb +1047 -0
- data/lib/cnvrg/flow.rb +137 -0
- data/lib/cnvrg/helpers.rb +422 -0
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +213 -0
- data/lib/cnvrg/hyper.rb +21 -0
- data/lib/cnvrg/image.rb +113 -0
- data/lib/cnvrg/image_cli.rb +25 -0
- data/lib/cnvrg/job_cli.rb +73 -0
- data/lib/cnvrg/job_ssh.rb +48 -0
- data/lib/cnvrg/logger.rb +111 -0
- data/lib/cnvrg/org_helpers.rb +5 -0
- data/lib/cnvrg/project.rb +822 -0
- data/lib/cnvrg/result.rb +29 -0
- data/lib/cnvrg/runner.rb +49 -0
- data/lib/cnvrg/ssh.rb +94 -0
- data/lib/cnvrg/storage.rb +128 -0
- data/lib/cnvrg/task.rb +165 -0
- data/lib/cnvrg/version.rb +3 -0
- metadata +460 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
|
2
|
+
module Cnvrg
|
3
|
+
module Downloader
|
4
|
+
OLD_SERVER_VERSION_MESSAGE = "Your server version is not relevant for this cli version please contact support for further help."
|
5
|
+
MAXIMUM_BACKOFF = 64
|
6
|
+
RETRIES = ENV['UPLOAD_FILE_RETRIES'].try(:to_i) || 20
|
7
|
+
attr_accessor :bucket, :client
|
8
|
+
class Client
|
9
|
+
def initialize(params)
|
10
|
+
@key = ''
|
11
|
+
@iv = ''
|
12
|
+
@client = ''
|
13
|
+
@bucket = ''
|
14
|
+
end
|
15
|
+
|
16
|
+
def extract_key_iv(sts_path)
|
17
|
+
sts = open(sts_path).read rescue nil
|
18
|
+
raise StandardError.new("Cant open sts") if sts.blank?
|
19
|
+
sts.split("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
def cut_prefix(prefix, file)
|
23
|
+
file.gsub(prefix, '').gsub(/^\/*/, '')
|
24
|
+
end
|
25
|
+
|
26
|
+
def download(storage_path, local_path)
|
27
|
+
### need to be implemented..
|
28
|
+
end
|
29
|
+
|
30
|
+
def upload(storage_path, local_path)
|
31
|
+
### need to be implemented..
|
32
|
+
end
|
33
|
+
|
34
|
+
def mkdir(path, recursive: false)
|
35
|
+
recursive ? FileUtils.mkdir_p(path) : FileUtils.mkdir(path)
|
36
|
+
end
|
37
|
+
|
38
|
+
def prepare_download(local_path)
|
39
|
+
mkdir(File.dirname(local_path), recursive: true)
|
40
|
+
end
|
41
|
+
|
42
|
+
def decrypt(str)
|
43
|
+
Cnvrg::Helpers.decrypt(@key, @iv, str)
|
44
|
+
end
|
45
|
+
|
46
|
+
def safe_upload(storage_path, local_path)
|
47
|
+
n = 1
|
48
|
+
error = nil
|
49
|
+
while n <= RETRIES
|
50
|
+
begin
|
51
|
+
self.upload(storage_path, local_path)
|
52
|
+
error = nil
|
53
|
+
break
|
54
|
+
rescue => e
|
55
|
+
backoff_time_seconds = backoff_time(n)
|
56
|
+
|
57
|
+
message = "Got error: #{e.class.name} with message: #{e.message} while uploading a single file: #{local_path}, retry: #{n} of: #{RETRIES}"
|
58
|
+
if n < RETRIES
|
59
|
+
message += ", next retry in: #{backoff_time_seconds} seconds"
|
60
|
+
else
|
61
|
+
message += ", done retry, continuing to the next file"
|
62
|
+
end
|
63
|
+
Cnvrg::Logger.log_error_message(message)
|
64
|
+
|
65
|
+
sleep backoff_time_seconds
|
66
|
+
|
67
|
+
n += 1
|
68
|
+
error = e
|
69
|
+
end
|
70
|
+
end
|
71
|
+
raise error if error.present?
|
72
|
+
true
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.factory(params)
|
76
|
+
params = params.as_json
|
77
|
+
case params["storage"]
|
78
|
+
when 's3', 'minio'
|
79
|
+
return Cnvrg::Downloader::Clients::S3Client.new(sts_path: params["path_sts"], access_key: params["sts_a"], secret: params["sts_s"], session_token: params["sts_st"], region: params["region"], bucket: params["bucket"], encryption: params["encryption"], endpoint: params["endpoint"], storage: params["storage"])
|
80
|
+
when 'azure'
|
81
|
+
azure_params = params.symbolize_keys.slice(*[:storage_account_name, :storage_access_key, :container, :sts])
|
82
|
+
return Cnvrg::Downloader::Clients::AzureClient.new(**azure_params)
|
83
|
+
when 'gcp'
|
84
|
+
return Cnvrg::Downloader::Clients::GcpClient.new(project_id: params["project_id"], credentials: params["credentials"], bucket_name: params["bucket_name"], sts: params["sts"])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def random_number_milliseconds
|
91
|
+
rand(1000) / 1000.0
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
def backoff_time(n)
|
96
|
+
return [((2**n)+random_number_milliseconds), MAXIMUM_BACKOFF].min
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'azure/storage/blob'
|
2
|
+
|
3
|
+
module Cnvrg
|
4
|
+
module Downloader
|
5
|
+
module Clients
|
6
|
+
class AzureClient < Client
|
7
|
+
def initialize(storage_account_name: nil, storage_access_key: nil, container: nil, sts: nil)
|
8
|
+
@key, @iv = extract_key_iv(sts)
|
9
|
+
@account_name = Cnvrg::Helpers.decrypt(@key, @iv, storage_account_name)
|
10
|
+
@access_key = Cnvrg::Helpers.decrypt(@key, @iv, storage_access_key)
|
11
|
+
@container = Cnvrg::Helpers.decrypt(@key, @iv, container)
|
12
|
+
end
|
13
|
+
|
14
|
+
def download(storage_path, local_path, decrypt: true)
|
15
|
+
prepare_download(local_path)
|
16
|
+
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
17
|
+
blob, content = client.get_blob(@container, storage_path)
|
18
|
+
::File.open(local_path, 'wb') {|f| f.write(content)}
|
19
|
+
blob
|
20
|
+
end
|
21
|
+
|
22
|
+
def upload(storage_path, local_path)
|
23
|
+
begin
|
24
|
+
client.create_block_blob(@container, storage_path, File.open(local_path, "rb"))
|
25
|
+
rescue => e
|
26
|
+
raise e
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def fetch_files(prefix: nil, marker: nil, limit: 10000)
|
31
|
+
blobs = client.list_blobs(@container, prefix: prefix, max_results: limit, marker: marker)
|
32
|
+
next_marker = blobs.continuation_token
|
33
|
+
files = blobs.map{|x| x.name}
|
34
|
+
[files, next_marker]
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
private
|
39
|
+
def client
|
40
|
+
Azure::Storage::Blob::BlobService.create(storage_account_name: @account_name, storage_access_key: @access_key)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require "google/cloud/storage"
|
2
|
+
|
3
|
+
module Cnvrg
|
4
|
+
module Downloader
|
5
|
+
module Clients
|
6
|
+
class GcpClient < Client
|
7
|
+
def initialize(project_id: nil, credentials: nil, bucket_name: nil, sts: nil)
|
8
|
+
@key, @iv = extract_key_iv(sts)
|
9
|
+
@project_id = Cnvrg::Helpers.decrypt(@key, @iv, project_id)
|
10
|
+
@credentials_path = Cnvrg::Helpers.decrypt(@key, @iv, credentials)
|
11
|
+
@tempfile = nil
|
12
|
+
@bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket_name)
|
13
|
+
init_gcp_credentials
|
14
|
+
@storage = Google::Cloud::Storage.new(project_id: @project_id, credentials: @credentials, retries: 20)
|
15
|
+
@bucket = @storage.bucket(@bucket_name)
|
16
|
+
@bucket.name
|
17
|
+
rescue => e
|
18
|
+
Cnvrg::Logger.log_error(e)
|
19
|
+
Cnvrg::Logger.log_info("Tried to init gcp client without success.")
|
20
|
+
Cnvrg::CLI.log_message("Cannot init client. please contact support to check your bucket credentials.")
|
21
|
+
exit(1)
|
22
|
+
end
|
23
|
+
|
24
|
+
def init_gcp_credentials
|
25
|
+
t = Tempfile.new
|
26
|
+
f = open(@credentials_path).read
|
27
|
+
t.binmode
|
28
|
+
t.write(f)
|
29
|
+
t.rewind
|
30
|
+
@credentials = t.path
|
31
|
+
@tempfile = t
|
32
|
+
end
|
33
|
+
|
34
|
+
def download(storage_path, local_path)
|
35
|
+
prepare_download(local_path)
|
36
|
+
file = @bucket.file(decrypt(storage_path))
|
37
|
+
file.download local_path
|
38
|
+
end
|
39
|
+
|
40
|
+
def upload(storage_path, local_path)
|
41
|
+
begin
|
42
|
+
@bucket.create_file(local_path, storage_path)
|
43
|
+
rescue => e
|
44
|
+
raise e
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Cnvrg
|
2
|
+
module Downloader
|
3
|
+
module Clients
|
4
|
+
class S3Client < Client
|
5
|
+
def initialize(sts_path: nil, access_key: nil, secret: nil, session_token: nil, region: nil, bucket: nil, encryption: nil, endpoint: nil, storage: nil)
|
6
|
+
@key, @iv = extract_key_iv(sts_path)
|
7
|
+
@access_key = Cnvrg::Helpers.decrypt(@key, @iv, access_key)
|
8
|
+
@secret = Cnvrg::Helpers.decrypt(@key, @iv, secret)
|
9
|
+
@session_token = Cnvrg::Helpers.decrypt(@key, @iv, session_token)
|
10
|
+
@region = Cnvrg::Helpers.decrypt(@key, @iv, region)
|
11
|
+
@bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket)
|
12
|
+
@endpoint = Cnvrg::Helpers.decrypt(@key, @iv, endpoint)
|
13
|
+
options = {
|
14
|
+
:access_key_id => @access_key,
|
15
|
+
:secret_access_key => @secret,
|
16
|
+
:session_token => @session_token,
|
17
|
+
:region => @region,
|
18
|
+
:http_open_timeout => 60, :retry_limit => 20
|
19
|
+
}
|
20
|
+
if storage == 'minio'
|
21
|
+
options.delete(:session_token)
|
22
|
+
options = options.merge({
|
23
|
+
:force_path_style => true,
|
24
|
+
:ssl_verify_peer => false,
|
25
|
+
:endpoint => @endpoint,
|
26
|
+
})
|
27
|
+
end
|
28
|
+
|
29
|
+
@options = options
|
30
|
+
|
31
|
+
#@client = Aws::S3::Client.new(options)
|
32
|
+
#@bucket = Aws::S3::Resource.new(client: @client).bucket(@bucket_name)
|
33
|
+
@upload_options = {:use_accelerate_endpoint => storage == 's3'}
|
34
|
+
if encryption.present?
|
35
|
+
@upload_options[:server_side_encryption] = encryption
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def download(storage_path, local_path, decrypt: true)
|
40
|
+
prepare_download(local_path)
|
41
|
+
storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
|
42
|
+
resp = nil
|
43
|
+
File.open(local_path, 'w+') do |file|
|
44
|
+
resp = aws_client.get_object({bucket: @bucket_name, key: storage_path}, target: file)
|
45
|
+
end
|
46
|
+
resp
|
47
|
+
rescue => e
|
48
|
+
Cnvrg::Logger.log_error(e)
|
49
|
+
raise e
|
50
|
+
end
|
51
|
+
|
52
|
+
def upload(storage_path, local_path)
|
53
|
+
### storage path is the path inside s3 (after the bucket)
|
54
|
+
# local path is fullpath for the file /home/ubuntu/user.../hazilim.py
|
55
|
+
o = aws_bucket.object(storage_path)
|
56
|
+
success = o.upload_file(local_path, @upload_options)
|
57
|
+
return success
|
58
|
+
rescue => e
|
59
|
+
raise e
|
60
|
+
end
|
61
|
+
|
62
|
+
def fetch_files(prefix: nil, marker: nil, limit: 1000)
|
63
|
+
batch_files = aws_bucket.objects(prefix: prefix, marker: marker).first(limit)
|
64
|
+
batch_files.to_a.map(&:key)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def aws_client
|
69
|
+
Aws::S3::Client.new(@options)
|
70
|
+
end
|
71
|
+
|
72
|
+
def aws_bucket
|
73
|
+
Aws::S3::Resource.new(client: aws_client).bucket(@bucket_name)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
module Cnvrg
|
3
|
+
class Experiment
|
4
|
+
attr_reader :slug
|
5
|
+
attr_reader :sync_before_terminate
|
6
|
+
attr_reader :sync_delay_time
|
7
|
+
attr_reader :output_dir
|
8
|
+
|
9
|
+
def initialize(owner, project_slug, job_id: nil)
|
10
|
+
@project_slug = project_slug
|
11
|
+
@owner = owner
|
12
|
+
@command = nil
|
13
|
+
@base_resource = "users/#{owner}/projects/#{project_slug}/"
|
14
|
+
@slug = job_id
|
15
|
+
@sync_before_terminate = nil
|
16
|
+
@sync_delay_time = nil
|
17
|
+
@output_dir = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def as_env
|
21
|
+
return {
|
22
|
+
CNVRG_JOB_ID: @slug,
|
23
|
+
CNVRG_JOB_TYPE: "Experiment",
|
24
|
+
CNVRG_PROJECT: @project_slug,
|
25
|
+
CNVRG_OWNER: @owner,
|
26
|
+
}.as_json
|
27
|
+
end
|
28
|
+
|
29
|
+
def start(input, platform, machine_name, start_commit, name, email_notification, machine_activity,script_path,
|
30
|
+
sync_before_terminate, periodic_sync)
|
31
|
+
|
32
|
+
res = Cnvrg::API.request(@base_resource + "experiment/start", 'POST',
|
33
|
+
{input: input, platform: platform, machine_name: machine_name, start_commit: start_commit,
|
34
|
+
title: name, email_notification: email_notification, machine_activity: machine_activity,script_path:script_path})
|
35
|
+
Cnvrg::CLI.is_response_success(res,false)
|
36
|
+
|
37
|
+
@slug = res.to_h["result"].to_h["slug"]
|
38
|
+
@sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
|
39
|
+
@sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
|
40
|
+
@output_dir = res.to_h["result"].to_h["output_dir"]
|
41
|
+
@command = res.to_h["result"].to_h["command"] rescue nil
|
42
|
+
|
43
|
+
return res
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def start_notebook_session(kernel, start_commit, token, port, remote, notebook_path)
|
48
|
+
res = Cnvrg::API.request(@base_resource + "notebook/start_session", 'POST',
|
49
|
+
{kernel: kernel, start_commit: start_commit,
|
50
|
+
token: token, port: port, remote: remote, notebook_path: notebook_path})
|
51
|
+
Cnvrg::CLI.is_response_success(res)
|
52
|
+
|
53
|
+
@slug = res["result"]["id"]
|
54
|
+
|
55
|
+
|
56
|
+
return @slug
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
def end_notebook_session(notebook_slug)
|
61
|
+
res = Cnvrg::API.request(@base_resource + "notebook/#{notebook_slug}/stop", 'GET')
|
62
|
+
Cnvrg::CLI.is_response_success(res,false)
|
63
|
+
|
64
|
+
return res
|
65
|
+
|
66
|
+
end
|
67
|
+
def update_notebook_slug(proj_dir, slug)
|
68
|
+
begin
|
69
|
+
file = proj_dir+"/.cnvrg/notebook_slug"
|
70
|
+
FileUtils.touch file
|
71
|
+
|
72
|
+
File.open(file, "w+") { |f| f.write slug }
|
73
|
+
rescue
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_notebook_slug(proj_dir)
|
79
|
+
begin
|
80
|
+
notebook_slug = File.open(proj_dir + "/.cnvrg/notebook_slug", "rb").read
|
81
|
+
notebook_slug = notebook_slug.gsub("/n", "")
|
82
|
+
notebook_slug = notebook_slug.to_s.strip
|
83
|
+
return notebook_slug
|
84
|
+
rescue
|
85
|
+
return nil
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
def get_machine_activity(working_dir)
|
91
|
+
begin
|
92
|
+
machine_activity = File.open("#{working_dir}/.cnvrg/machine_activity", "rb").read
|
93
|
+
machine_activity = machine_activity.to_s.strip
|
94
|
+
ma_id = machine_activity.to_i
|
95
|
+
return ma_id
|
96
|
+
rescue
|
97
|
+
return nil
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
def job_log(logs, level: 'info', step: nil, job_type: nil, job_id: nil)
|
104
|
+
logs = [logs].flatten
|
105
|
+
logs.each_slice(10).each do |temp_logs|
|
106
|
+
Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/jobs/experiment/#{@slug}/log", "POST", {job_type: "Experiment", job_id: @slug, logs: temp_logs, log_level: level, step: step, timestamp: Time.now})
|
107
|
+
sleep(1)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def exec_remote(command, commit_to_run, instance_type, image_slug,schedule,local_timestamp, grid,path_to_cmd,data, data_commit,periodic_sync,
|
112
|
+
sync_before_terminate, max_time, ds_sync_options=0,output_dir=nil,data_query=nil,
|
113
|
+
git_commit=nil, git_branch=nil, restart_if_stuck=nil, local_folders=nil,title=nil, datasets=nil, prerun: true, requirements: true, recurring: nil,
|
114
|
+
email_notification_error: false, email_notification_success: false, emails_to_notify: nil)
|
115
|
+
response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/experiment/remote", 'POST', {command: command, image_slug: image_slug,
|
116
|
+
commit_sha1: commit_to_run,
|
117
|
+
instance_type: instance_type,
|
118
|
+
schedule:schedule,
|
119
|
+
local_timestamp:local_timestamp,
|
120
|
+
datasets: datasets,
|
121
|
+
grid: grid,
|
122
|
+
path_to_cmd:path_to_cmd,dataset_slug:data,
|
123
|
+
dataset_commit: data_commit,max_time:max_time,
|
124
|
+
periodic_sync:periodic_sync, sync_before_terminate:sync_before_terminate,
|
125
|
+
dataset_sync_options:ds_sync_options,output_dir:output_dir,
|
126
|
+
dataset_query:data_query,git_commit:git_commit,git_branch:git_branch,
|
127
|
+
restart_if_stuck:restart_if_stuck, local_folders: local_folders, title:title,
|
128
|
+
prerun: prerun, requirements: requirements, recurring: recurring,
|
129
|
+
email_notification_error: email_notification_error, email_notification_success: email_notification_success,
|
130
|
+
emails_to_notify: emails_to_notify})
|
131
|
+
|
132
|
+
return response
|
133
|
+
end
|
134
|
+
def remote_notebook(instance_type, commit, data, data_commit, notebook_type,ds_sync_options=0,data_query=nil, image = nil, datasets = nil)
|
135
|
+
response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/notebook/remote", 'POST', {instance_type: instance_type,dataset_slug:data,
|
136
|
+
dataset_commit: data_commit,image_slug:image,
|
137
|
+
datasets: datasets,
|
138
|
+
commit:commit,notebook_type:notebook_type,dataset_sync_options:ds_sync_options,
|
139
|
+
dataset_query:data_query})
|
140
|
+
return response
|
141
|
+
end
|
142
|
+
|
143
|
+
def upload_temp_log(temp_log)
|
144
|
+
response = Cnvrg::API.request(@base_resource + "experiment/upload_temp_log", 'POST', {output: temp_log,
|
145
|
+
exp_slug: @slug})
|
146
|
+
Cnvrg::CLI.is_response_success(response,false)
|
147
|
+
end
|
148
|
+
|
149
|
+
def send_machine_stats(stats)
|
150
|
+
response = Cnvrg::API.request(@base_resource + "experiment/upload_stats", "POST", {exp_slug: @slug, stats: stats.map{|s| s.merge!({time: Time.now})}})
|
151
|
+
Cnvrg::CLI.is_response_success(response,false)
|
152
|
+
end
|
153
|
+
|
154
|
+
def end(output, exit_status, end_commit, cpu_average, memory_average, end_time: nil)
|
155
|
+
#if remote try to remove
|
156
|
+
tries = 0
|
157
|
+
success = false
|
158
|
+
end_time ||= Time.now
|
159
|
+
while tries < 10 and success.blank?
|
160
|
+
sleep (tries*rand) ** 2 ### exponential backoff
|
161
|
+
## this call is super important so we cant let it crash.
|
162
|
+
|
163
|
+
tries += 1
|
164
|
+
response = Cnvrg::API.request(@base_resource + "experiment/end", 'POST', {output: output, exp_slug: @slug,
|
165
|
+
exit_status: exit_status, end_commit: end_commit,
|
166
|
+
cpu_average: cpu_average, memory_average: memory_average, end_time: end_time})
|
167
|
+
success = Cnvrg::CLI.is_response_success(response,false)
|
168
|
+
end
|
169
|
+
|
170
|
+
begin
|
171
|
+
FileUtils.rm_rf(["/home/ds/.cnvrg/tmp/exec.log"])
|
172
|
+
rescue
|
173
|
+
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def get_cmd
|
178
|
+
return @command
|
179
|
+
end
|
180
|
+
|
181
|
+
def restart_spot_instance
|
182
|
+
|
183
|
+
restart = false
|
184
|
+
begin
|
185
|
+
url = URI.parse('http://169.254.169.254/latest/meta-data/spot/termination-time')
|
186
|
+
req = Net::HTTP::Get.new(url.to_s)
|
187
|
+
res = Net::HTTP.start(url.host, url.port) {|http|
|
188
|
+
http.request(req)
|
189
|
+
}
|
190
|
+
unless res.body.include? "404"
|
191
|
+
restart = true
|
192
|
+
end
|
193
|
+
if res.body.include? "Empty reply from server"
|
194
|
+
restart = false
|
195
|
+
end
|
196
|
+
rescue
|
197
|
+
restart = false
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
return restart
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
def send_restart_request(sha1=nil)
|
206
|
+
Cnvrg::API.request(@base_resource + "experiment/check_spot_instance", 'POST', {exp_slug: @slug, end_commit: sha1})
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|