cnvrg 1.9.9.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/cnvrg +9 -0
  3. data/cnvrg.gemspec +47 -0
  4. data/lib/cnvrg.rb +7 -0
  5. data/lib/cnvrg/Images.rb +351 -0
  6. data/lib/cnvrg/api.rb +247 -0
  7. data/lib/cnvrg/api_v2.rb +14 -0
  8. data/lib/cnvrg/auth.rb +79 -0
  9. data/lib/cnvrg/cli.rb +5715 -0
  10. data/lib/cnvrg/cli/flow.rb +166 -0
  11. data/lib/cnvrg/cli/library_cli.rb +33 -0
  12. data/lib/cnvrg/cli/subcommand.rb +28 -0
  13. data/lib/cnvrg/cli/task.rb +116 -0
  14. data/lib/cnvrg/colors.rb +8 -0
  15. data/lib/cnvrg/connect_job_ssh.rb +31 -0
  16. data/lib/cnvrg/data.rb +335 -0
  17. data/lib/cnvrg/datafiles.rb +1325 -0
  18. data/lib/cnvrg/dataset.rb +892 -0
  19. data/lib/cnvrg/downloader/client.rb +101 -0
  20. data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
  21. data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
  22. data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
  23. data/lib/cnvrg/experiment.rb +209 -0
  24. data/lib/cnvrg/files.rb +1047 -0
  25. data/lib/cnvrg/flow.rb +137 -0
  26. data/lib/cnvrg/helpers.rb +422 -0
  27. data/lib/cnvrg/helpers/agent.rb +188 -0
  28. data/lib/cnvrg/helpers/executer.rb +213 -0
  29. data/lib/cnvrg/hyper.rb +21 -0
  30. data/lib/cnvrg/image.rb +113 -0
  31. data/lib/cnvrg/image_cli.rb +25 -0
  32. data/lib/cnvrg/job_cli.rb +73 -0
  33. data/lib/cnvrg/job_ssh.rb +48 -0
  34. data/lib/cnvrg/logger.rb +111 -0
  35. data/lib/cnvrg/org_helpers.rb +5 -0
  36. data/lib/cnvrg/project.rb +822 -0
  37. data/lib/cnvrg/result.rb +29 -0
  38. data/lib/cnvrg/runner.rb +49 -0
  39. data/lib/cnvrg/ssh.rb +94 -0
  40. data/lib/cnvrg/storage.rb +128 -0
  41. data/lib/cnvrg/task.rb +165 -0
  42. data/lib/cnvrg/version.rb +3 -0
  43. metadata +460 -0
@@ -0,0 +1,101 @@
1
+
2
+ module Cnvrg
3
+ module Downloader
4
+ OLD_SERVER_VERSION_MESSAGE = "Your server version is not relevant for this cli version please contact support for further help."
5
+ MAXIMUM_BACKOFF = 64
6
+ RETRIES = ENV['UPLOAD_FILE_RETRIES'].try(:to_i) || 20
7
+ attr_accessor :bucket, :client
8
+ class Client
9
+ def initialize(params)
10
+ @key = ''
11
+ @iv = ''
12
+ @client = ''
13
+ @bucket = ''
14
+ end
15
+
16
+ def extract_key_iv(sts_path)
17
+ sts = open(sts_path).read rescue nil
18
+ raise StandardError.new("Cant open sts") if sts.blank?
19
+ sts.split("\n")
20
+ end
21
+
22
+ def cut_prefix(prefix, file)
23
+ file.gsub(prefix, '').gsub(/^\/*/, '')
24
+ end
25
+
26
+ def download(storage_path, local_path)
27
+ ### need to be implemented..
28
+ end
29
+
30
+ def upload(storage_path, local_path)
31
+ ### need to be implemented..
32
+ end
33
+
34
+ def mkdir(path, recursive: false)
35
+ recursive ? FileUtils.mkdir_p(path) : FileUtils.mkdir(path)
36
+ end
37
+
38
+ def prepare_download(local_path)
39
+ mkdir(File.dirname(local_path), recursive: true)
40
+ end
41
+
42
+ def decrypt(str)
43
+ Cnvrg::Helpers.decrypt(@key, @iv, str)
44
+ end
45
+
46
+ def safe_upload(storage_path, local_path)
47
+ n = 1
48
+ error = nil
49
+ while n <= RETRIES
50
+ begin
51
+ self.upload(storage_path, local_path)
52
+ error = nil
53
+ break
54
+ rescue => e
55
+ backoff_time_seconds = backoff_time(n)
56
+
57
+ message = "Got error: #{e.class.name} with message: #{e.message} while uploading a single file: #{local_path}, retry: #{n} of: #{RETRIES}"
58
+ if n < RETRIES
59
+ message += ", next retry in: #{backoff_time_seconds} seconds"
60
+ else
61
+ message += ", done retry, continuing to the next file"
62
+ end
63
+ Cnvrg::Logger.log_error_message(message)
64
+
65
+ sleep backoff_time_seconds
66
+
67
+ n += 1
68
+ error = e
69
+ end
70
+ end
71
+ raise error if error.present?
72
+ true
73
+ end
74
+
75
+ def self.factory(params)
76
+ params = params.as_json
77
+ case params["storage"]
78
+ when 's3', 'minio'
79
+ return Cnvrg::Downloader::Clients::S3Client.new(sts_path: params["path_sts"], access_key: params["sts_a"], secret: params["sts_s"], session_token: params["sts_st"], region: params["region"], bucket: params["bucket"], encryption: params["encryption"], endpoint: params["endpoint"], storage: params["storage"])
80
+ when 'azure'
81
+ azure_params = params.symbolize_keys.slice(*[:storage_account_name, :storage_access_key, :container, :sts])
82
+ return Cnvrg::Downloader::Clients::AzureClient.new(**azure_params)
83
+ when 'gcp'
84
+ return Cnvrg::Downloader::Clients::GcpClient.new(project_id: params["project_id"], credentials: params["credentials"], bucket_name: params["bucket_name"], sts: params["sts"])
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ def random_number_milliseconds
91
+ rand(1000) / 1000.0
92
+ end
93
+
94
+
95
+ def backoff_time(n)
96
+ return [((2**n)+random_number_milliseconds), MAXIMUM_BACKOFF].min
97
+ end
98
+
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,45 @@
1
+ require 'azure/storage/blob'
2
+
3
+ module Cnvrg
4
+ module Downloader
5
+ module Clients
6
+ class AzureClient < Client
7
+ def initialize(storage_account_name: nil, storage_access_key: nil, container: nil, sts: nil)
8
+ @key, @iv = extract_key_iv(sts)
9
+ @account_name = Cnvrg::Helpers.decrypt(@key, @iv, storage_account_name)
10
+ @access_key = Cnvrg::Helpers.decrypt(@key, @iv, storage_access_key)
11
+ @container = Cnvrg::Helpers.decrypt(@key, @iv, container)
12
+ end
13
+
14
+ def download(storage_path, local_path, decrypt: true)
15
+ prepare_download(local_path)
16
+ storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
17
+ blob, content = client.get_blob(@container, storage_path)
18
+ ::File.open(local_path, 'wb') {|f| f.write(content)}
19
+ blob
20
+ end
21
+
22
+ def upload(storage_path, local_path)
23
+ begin
24
+ client.create_block_blob(@container, storage_path, File.open(local_path, "rb"))
25
+ rescue => e
26
+ raise e
27
+ end
28
+ end
29
+
30
+ def fetch_files(prefix: nil, marker: nil, limit: 10000)
31
+ blobs = client.list_blobs(@container, prefix: prefix, max_results: limit, marker: marker)
32
+ next_marker = blobs.continuation_token
33
+ files = blobs.map{|x| x.name}
34
+ [files, next_marker]
35
+ end
36
+
37
+
38
+ private
39
+ def client
40
+ Azure::Storage::Blob::BlobService.create(storage_account_name: @account_name, storage_access_key: @access_key)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,50 @@
1
+ require "google/cloud/storage"
2
+
3
+ module Cnvrg
4
+ module Downloader
5
+ module Clients
6
+ class GcpClient < Client
7
+ def initialize(project_id: nil, credentials: nil, bucket_name: nil, sts: nil)
8
+ @key, @iv = extract_key_iv(sts)
9
+ @project_id = Cnvrg::Helpers.decrypt(@key, @iv, project_id)
10
+ @credentials_path = Cnvrg::Helpers.decrypt(@key, @iv, credentials)
11
+ @tempfile = nil
12
+ @bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket_name)
13
+ init_gcp_credentials
14
+ @storage = Google::Cloud::Storage.new(project_id: @project_id, credentials: @credentials, retries: 20)
15
+ @bucket = @storage.bucket(@bucket_name)
16
+ @bucket.name
17
+ rescue => e
18
+ Cnvrg::Logger.log_error(e)
19
+ Cnvrg::Logger.log_info("Tried to init gcp client without success.")
20
+ Cnvrg::CLI.log_message("Cannot init client. please contact support to check your bucket credentials.")
21
+ exit(1)
22
+ end
23
+
24
+ def init_gcp_credentials
25
+ t = Tempfile.new
26
+ f = open(@credentials_path).read
27
+ t.binmode
28
+ t.write(f)
29
+ t.rewind
30
+ @credentials = t.path
31
+ @tempfile = t
32
+ end
33
+
34
+ def download(storage_path, local_path)
35
+ prepare_download(local_path)
36
+ file = @bucket.file(decrypt(storage_path))
37
+ file.download local_path
38
+ end
39
+
40
+ def upload(storage_path, local_path)
41
+ begin
42
+ @bucket.create_file(local_path, storage_path)
43
+ rescue => e
44
+ raise e
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,78 @@
1
+ module Cnvrg
2
+ module Downloader
3
+ module Clients
4
+ class S3Client < Client
5
+ def initialize(sts_path: nil, access_key: nil, secret: nil, session_token: nil, region: nil, bucket: nil, encryption: nil, endpoint: nil, storage: nil)
6
+ @key, @iv = extract_key_iv(sts_path)
7
+ @access_key = Cnvrg::Helpers.decrypt(@key, @iv, access_key)
8
+ @secret = Cnvrg::Helpers.decrypt(@key, @iv, secret)
9
+ @session_token = Cnvrg::Helpers.decrypt(@key, @iv, session_token)
10
+ @region = Cnvrg::Helpers.decrypt(@key, @iv, region)
11
+ @bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket)
12
+ @endpoint = Cnvrg::Helpers.decrypt(@key, @iv, endpoint)
13
+ options = {
14
+ :access_key_id => @access_key,
15
+ :secret_access_key => @secret,
16
+ :session_token => @session_token,
17
+ :region => @region,
18
+ :http_open_timeout => 60, :retry_limit => 20
19
+ }
20
+ if storage == 'minio'
21
+ options.delete(:session_token)
22
+ options = options.merge({
23
+ :force_path_style => true,
24
+ :ssl_verify_peer => false,
25
+ :endpoint => @endpoint,
26
+ })
27
+ end
28
+
29
+ @options = options
30
+
31
+ #@client = Aws::S3::Client.new(options)
32
+ #@bucket = Aws::S3::Resource.new(client: @client).bucket(@bucket_name)
33
+ @upload_options = {:use_accelerate_endpoint => storage == 's3'}
34
+ if encryption.present?
35
+ @upload_options[:server_side_encryption] = encryption
36
+ end
37
+ end
38
+
39
+ def download(storage_path, local_path, decrypt: true)
40
+ prepare_download(local_path)
41
+ storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
42
+ resp = nil
43
+ File.open(local_path, 'w+') do |file|
44
+ resp = aws_client.get_object({bucket: @bucket_name, key: storage_path}, target: file)
45
+ end
46
+ resp
47
+ rescue => e
48
+ Cnvrg::Logger.log_error(e)
49
+ raise e
50
+ end
51
+
52
+ def upload(storage_path, local_path)
53
+ ### storage path is the path inside s3 (after the bucket)
54
+ # local path is fullpath for the file /home/ubuntu/user.../hazilim.py
55
+ o = aws_bucket.object(storage_path)
56
+ success = o.upload_file(local_path, @upload_options)
57
+ return success
58
+ rescue => e
59
+ raise e
60
+ end
61
+
62
+ def fetch_files(prefix: nil, marker: nil, limit: 1000)
63
+ batch_files = aws_bucket.objects(prefix: prefix, marker: marker).first(limit)
64
+ batch_files.to_a.map(&:key)
65
+ end
66
+
67
+ private
68
+ def aws_client
69
+ Aws::S3::Client.new(@options)
70
+ end
71
+
72
+ def aws_bucket
73
+ Aws::S3::Resource.new(client: aws_client).bucket(@bucket_name)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,209 @@
1
+ require 'net/http'
2
+ module Cnvrg
3
+ class Experiment
4
+ attr_reader :slug
5
+ attr_reader :sync_before_terminate
6
+ attr_reader :sync_delay_time
7
+ attr_reader :output_dir
8
+
9
+ def initialize(owner, project_slug, job_id: nil)
10
+ @project_slug = project_slug
11
+ @owner = owner
12
+ @command = nil
13
+ @base_resource = "users/#{owner}/projects/#{project_slug}/"
14
+ @slug = job_id
15
+ @sync_before_terminate = nil
16
+ @sync_delay_time = nil
17
+ @output_dir = nil
18
+ end
19
+
20
+ def as_env
21
+ return {
22
+ CNVRG_JOB_ID: @slug,
23
+ CNVRG_JOB_TYPE: "Experiment",
24
+ CNVRG_PROJECT: @project_slug,
25
+ CNVRG_OWNER: @owner,
26
+ }.as_json
27
+ end
28
+
29
+ def start(input, platform, machine_name, start_commit, name, email_notification, machine_activity,script_path,
30
+ sync_before_terminate, periodic_sync)
31
+
32
+ res = Cnvrg::API.request(@base_resource + "experiment/start", 'POST',
33
+ {input: input, platform: platform, machine_name: machine_name, start_commit: start_commit,
34
+ title: name, email_notification: email_notification, machine_activity: machine_activity,script_path:script_path})
35
+ Cnvrg::CLI.is_response_success(res,false)
36
+
37
+ @slug = res.to_h["result"].to_h["slug"]
38
+ @sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
39
+ @sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
40
+ @output_dir = res.to_h["result"].to_h["output_dir"]
41
+ @command = res.to_h["result"].to_h["command"] rescue nil
42
+
43
+ return res
44
+
45
+ end
46
+
47
+ def start_notebook_session(kernel, start_commit, token, port, remote, notebook_path)
48
+ res = Cnvrg::API.request(@base_resource + "notebook/start_session", 'POST',
49
+ {kernel: kernel, start_commit: start_commit,
50
+ token: token, port: port, remote: remote, notebook_path: notebook_path})
51
+ Cnvrg::CLI.is_response_success(res)
52
+
53
+ @slug = res["result"]["id"]
54
+
55
+
56
+ return @slug
57
+
58
+ end
59
+
60
+ def end_notebook_session(notebook_slug)
61
+ res = Cnvrg::API.request(@base_resource + "notebook/#{notebook_slug}/stop", 'GET')
62
+ Cnvrg::CLI.is_response_success(res,false)
63
+
64
+ return res
65
+
66
+ end
67
+ def update_notebook_slug(proj_dir, slug)
68
+ begin
69
+ file = proj_dir+"/.cnvrg/notebook_slug"
70
+ FileUtils.touch file
71
+
72
+ File.open(file, "w+") { |f| f.write slug }
73
+ rescue
74
+ end
75
+
76
+ end
77
+
78
+ def get_notebook_slug(proj_dir)
79
+ begin
80
+ notebook_slug = File.open(proj_dir + "/.cnvrg/notebook_slug", "rb").read
81
+ notebook_slug = notebook_slug.gsub("/n", "")
82
+ notebook_slug = notebook_slug.to_s.strip
83
+ return notebook_slug
84
+ rescue
85
+ return nil
86
+ end
87
+
88
+
89
+ end
90
+ def get_machine_activity(working_dir)
91
+ begin
92
+ machine_activity = File.open("#{working_dir}/.cnvrg/machine_activity", "rb").read
93
+ machine_activity = machine_activity.to_s.strip
94
+ ma_id = machine_activity.to_i
95
+ return ma_id
96
+ rescue
97
+ return nil
98
+ end
99
+
100
+
101
+ end
102
+
103
+ def job_log(logs, level: 'info', step: nil, job_type: nil, job_id: nil)
104
+ logs = [logs].flatten
105
+ logs.each_slice(10).each do |temp_logs|
106
+ Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/jobs/experiment/#{@slug}/log", "POST", {job_type: "Experiment", job_id: @slug, logs: temp_logs, log_level: level, step: step, timestamp: Time.now})
107
+ sleep(1)
108
+ end
109
+ end
110
+
111
+ def exec_remote(command, commit_to_run, instance_type, image_slug,schedule,local_timestamp, grid,path_to_cmd,data, data_commit,periodic_sync,
112
+ sync_before_terminate, max_time, ds_sync_options=0,output_dir=nil,data_query=nil,
113
+ git_commit=nil, git_branch=nil, restart_if_stuck=nil, local_folders=nil,title=nil, datasets=nil, prerun: true, requirements: true, recurring: nil,
114
+ email_notification_error: false, email_notification_success: false, emails_to_notify: nil)
115
+ response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/experiment/remote", 'POST', {command: command, image_slug: image_slug,
116
+ commit_sha1: commit_to_run,
117
+ instance_type: instance_type,
118
+ schedule:schedule,
119
+ local_timestamp:local_timestamp,
120
+ datasets: datasets,
121
+ grid: grid,
122
+ path_to_cmd:path_to_cmd,dataset_slug:data,
123
+ dataset_commit: data_commit,max_time:max_time,
124
+ periodic_sync:periodic_sync, sync_before_terminate:sync_before_terminate,
125
+ dataset_sync_options:ds_sync_options,output_dir:output_dir,
126
+ dataset_query:data_query,git_commit:git_commit,git_branch:git_branch,
127
+ restart_if_stuck:restart_if_stuck, local_folders: local_folders, title:title,
128
+ prerun: prerun, requirements: requirements, recurring: recurring,
129
+ email_notification_error: email_notification_error, email_notification_success: email_notification_success,
130
+ emails_to_notify: emails_to_notify})
131
+
132
+ return response
133
+ end
134
+ def remote_notebook(instance_type, commit, data, data_commit, notebook_type,ds_sync_options=0,data_query=nil, image = nil, datasets = nil)
135
+ response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/notebook/remote", 'POST', {instance_type: instance_type,dataset_slug:data,
136
+ dataset_commit: data_commit,image_slug:image,
137
+ datasets: datasets,
138
+ commit:commit,notebook_type:notebook_type,dataset_sync_options:ds_sync_options,
139
+ dataset_query:data_query})
140
+ return response
141
+ end
142
+
143
+ def upload_temp_log(temp_log)
144
+ response = Cnvrg::API.request(@base_resource + "experiment/upload_temp_log", 'POST', {output: temp_log,
145
+ exp_slug: @slug})
146
+ Cnvrg::CLI.is_response_success(response,false)
147
+ end
148
+
149
+ def send_machine_stats(stats)
150
+ response = Cnvrg::API.request(@base_resource + "experiment/upload_stats", "POST", {exp_slug: @slug, stats: stats.map{|s| s.merge!({time: Time.now})}})
151
+ Cnvrg::CLI.is_response_success(response,false)
152
+ end
153
+
154
+ def end(output, exit_status, end_commit, cpu_average, memory_average, end_time: nil)
155
+ #if remote try to remove
156
+ tries = 0
157
+ success = false
158
+ end_time ||= Time.now
159
+ while tries < 10 and success.blank?
160
+ sleep (tries*rand) ** 2 ### exponential backoff
161
+ ## this call is super important so we cant let it crash.
162
+
163
+ tries += 1
164
+ response = Cnvrg::API.request(@base_resource + "experiment/end", 'POST', {output: output, exp_slug: @slug,
165
+ exit_status: exit_status, end_commit: end_commit,
166
+ cpu_average: cpu_average, memory_average: memory_average, end_time: end_time})
167
+ success = Cnvrg::CLI.is_response_success(response,false)
168
+ end
169
+
170
+ begin
171
+ FileUtils.rm_rf(["/home/ds/.cnvrg/tmp/exec.log"])
172
+ rescue
173
+
174
+ end
175
+ end
176
+
177
+ def get_cmd
178
+ return @command
179
+ end
180
+
181
+ def restart_spot_instance
182
+
183
+ restart = false
184
+ begin
185
+ url = URI.parse('http://169.254.169.254/latest/meta-data/spot/termination-time')
186
+ req = Net::HTTP::Get.new(url.to_s)
187
+ res = Net::HTTP.start(url.host, url.port) {|http|
188
+ http.request(req)
189
+ }
190
+ unless res.body.include? "404"
191
+ restart = true
192
+ end
193
+ if res.body.include? "Empty reply from server"
194
+ restart = false
195
+ end
196
+ rescue
197
+ restart = false
198
+
199
+ end
200
+
201
+ return restart
202
+
203
+ end
204
+
205
+ def send_restart_request(sha1=nil)
206
+ Cnvrg::API.request(@base_resource + "experiment/check_spot_instance", 'POST', {exp_slug: @slug, end_commit: sha1})
207
+ end
208
+ end
209
+ end