cnvrg 1.9.9.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/cnvrg +9 -0
  3. data/cnvrg.gemspec +47 -0
  4. data/lib/cnvrg.rb +7 -0
  5. data/lib/cnvrg/Images.rb +351 -0
  6. data/lib/cnvrg/api.rb +247 -0
  7. data/lib/cnvrg/api_v2.rb +14 -0
  8. data/lib/cnvrg/auth.rb +79 -0
  9. data/lib/cnvrg/cli.rb +5715 -0
  10. data/lib/cnvrg/cli/flow.rb +166 -0
  11. data/lib/cnvrg/cli/library_cli.rb +33 -0
  12. data/lib/cnvrg/cli/subcommand.rb +28 -0
  13. data/lib/cnvrg/cli/task.rb +116 -0
  14. data/lib/cnvrg/colors.rb +8 -0
  15. data/lib/cnvrg/connect_job_ssh.rb +31 -0
  16. data/lib/cnvrg/data.rb +335 -0
  17. data/lib/cnvrg/datafiles.rb +1325 -0
  18. data/lib/cnvrg/dataset.rb +892 -0
  19. data/lib/cnvrg/downloader/client.rb +101 -0
  20. data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
  21. data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
  22. data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
  23. data/lib/cnvrg/experiment.rb +209 -0
  24. data/lib/cnvrg/files.rb +1047 -0
  25. data/lib/cnvrg/flow.rb +137 -0
  26. data/lib/cnvrg/helpers.rb +422 -0
  27. data/lib/cnvrg/helpers/agent.rb +188 -0
  28. data/lib/cnvrg/helpers/executer.rb +213 -0
  29. data/lib/cnvrg/hyper.rb +21 -0
  30. data/lib/cnvrg/image.rb +113 -0
  31. data/lib/cnvrg/image_cli.rb +25 -0
  32. data/lib/cnvrg/job_cli.rb +73 -0
  33. data/lib/cnvrg/job_ssh.rb +48 -0
  34. data/lib/cnvrg/logger.rb +111 -0
  35. data/lib/cnvrg/org_helpers.rb +5 -0
  36. data/lib/cnvrg/project.rb +822 -0
  37. data/lib/cnvrg/result.rb +29 -0
  38. data/lib/cnvrg/runner.rb +49 -0
  39. data/lib/cnvrg/ssh.rb +94 -0
  40. data/lib/cnvrg/storage.rb +128 -0
  41. data/lib/cnvrg/task.rb +165 -0
  42. data/lib/cnvrg/version.rb +3 -0
  43. metadata +460 -0
@@ -0,0 +1,101 @@
1
+
2
+ module Cnvrg
3
+ module Downloader
4
+ OLD_SERVER_VERSION_MESSAGE = "Your server version is not relevant for this cli version please contact support for further help."
5
+ MAXIMUM_BACKOFF = 64
6
+ RETRIES = ENV['UPLOAD_FILE_RETRIES'].try(:to_i) || 20
7
+ attr_accessor :bucket, :client
8
+ class Client
9
+ def initialize(params)
10
+ @key = ''
11
+ @iv = ''
12
+ @client = ''
13
+ @bucket = ''
14
+ end
15
+
16
+ def extract_key_iv(sts_path)
17
+ sts = open(sts_path).read rescue nil
18
+ raise StandardError.new("Cant open sts") if sts.blank?
19
+ sts.split("\n")
20
+ end
21
+
22
+ def cut_prefix(prefix, file)
23
+ file.gsub(prefix, '').gsub(/^\/*/, '')
24
+ end
25
+
26
+ def download(storage_path, local_path)
27
+ ### need to be implemented..
28
+ end
29
+
30
+ def upload(storage_path, local_path)
31
+ ### need to be implemented..
32
+ end
33
+
34
+ def mkdir(path, recursive: false)
35
+ recursive ? FileUtils.mkdir_p(path) : FileUtils.mkdir(path)
36
+ end
37
+
38
+ def prepare_download(local_path)
39
+ mkdir(File.dirname(local_path), recursive: true)
40
+ end
41
+
42
+ def decrypt(str)
43
+ Cnvrg::Helpers.decrypt(@key, @iv, str)
44
+ end
45
+
46
+ def safe_upload(storage_path, local_path)
47
+ n = 1
48
+ error = nil
49
+ while n <= RETRIES
50
+ begin
51
+ self.upload(storage_path, local_path)
52
+ error = nil
53
+ break
54
+ rescue => e
55
+ backoff_time_seconds = backoff_time(n)
56
+
57
+ message = "Got error: #{e.class.name} with message: #{e.message} while uploading a single file: #{local_path}, retry: #{n} of: #{RETRIES}"
58
+ if n < RETRIES
59
+ message += ", next retry in: #{backoff_time_seconds} seconds"
60
+ else
61
+ message += ", done retry, continuing to the next file"
62
+ end
63
+ Cnvrg::Logger.log_error_message(message)
64
+
65
+ sleep backoff_time_seconds
66
+
67
+ n += 1
68
+ error = e
69
+ end
70
+ end
71
+ raise error if error.present?
72
+ true
73
+ end
74
+
75
+ def self.factory(params)
76
+ params = params.as_json
77
+ case params["storage"]
78
+ when 's3', 'minio'
79
+ return Cnvrg::Downloader::Clients::S3Client.new(sts_path: params["path_sts"], access_key: params["sts_a"], secret: params["sts_s"], session_token: params["sts_st"], region: params["region"], bucket: params["bucket"], encryption: params["encryption"], endpoint: params["endpoint"], storage: params["storage"])
80
+ when 'azure'
81
+ azure_params = params.symbolize_keys.slice(*[:storage_account_name, :storage_access_key, :container, :sts])
82
+ return Cnvrg::Downloader::Clients::AzureClient.new(**azure_params)
83
+ when 'gcp'
84
+ return Cnvrg::Downloader::Clients::GcpClient.new(project_id: params["project_id"], credentials: params["credentials"], bucket_name: params["bucket_name"], sts: params["sts"])
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ def random_number_milliseconds
91
+ rand(1000) / 1000.0
92
+ end
93
+
94
+
95
+ def backoff_time(n)
96
+ return [((2**n)+random_number_milliseconds), MAXIMUM_BACKOFF].min
97
+ end
98
+
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,45 @@
1
+ require 'azure/storage/blob'
2
+
3
+ module Cnvrg
4
+ module Downloader
5
+ module Clients
6
+ class AzureClient < Client
7
+ def initialize(storage_account_name: nil, storage_access_key: nil, container: nil, sts: nil)
8
+ @key, @iv = extract_key_iv(sts)
9
+ @account_name = Cnvrg::Helpers.decrypt(@key, @iv, storage_account_name)
10
+ @access_key = Cnvrg::Helpers.decrypt(@key, @iv, storage_access_key)
11
+ @container = Cnvrg::Helpers.decrypt(@key, @iv, container)
12
+ end
13
+
14
+ def download(storage_path, local_path, decrypt: true)
15
+ prepare_download(local_path)
16
+ storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
17
+ blob, content = client.get_blob(@container, storage_path)
18
+ ::File.open(local_path, 'wb') {|f| f.write(content)}
19
+ blob
20
+ end
21
+
22
+ def upload(storage_path, local_path)
23
+ begin
24
+ client.create_block_blob(@container, storage_path, File.open(local_path, "rb"))
25
+ rescue => e
26
+ raise e
27
+ end
28
+ end
29
+
30
+ def fetch_files(prefix: nil, marker: nil, limit: 10000)
31
+ blobs = client.list_blobs(@container, prefix: prefix, max_results: limit, marker: marker)
32
+ next_marker = blobs.continuation_token
33
+ files = blobs.map{|x| x.name}
34
+ [files, next_marker]
35
+ end
36
+
37
+
38
+ private
39
+ def client
40
+ Azure::Storage::Blob::BlobService.create(storage_account_name: @account_name, storage_access_key: @access_key)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,50 @@
1
+ require "google/cloud/storage"
2
+
3
+ module Cnvrg
4
+ module Downloader
5
+ module Clients
6
+ class GcpClient < Client
7
+ def initialize(project_id: nil, credentials: nil, bucket_name: nil, sts: nil)
8
+ @key, @iv = extract_key_iv(sts)
9
+ @project_id = Cnvrg::Helpers.decrypt(@key, @iv, project_id)
10
+ @credentials_path = Cnvrg::Helpers.decrypt(@key, @iv, credentials)
11
+ @tempfile = nil
12
+ @bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket_name)
13
+ init_gcp_credentials
14
+ @storage = Google::Cloud::Storage.new(project_id: @project_id, credentials: @credentials, retries: 20)
15
+ @bucket = @storage.bucket(@bucket_name)
16
+ @bucket.name
17
+ rescue => e
18
+ Cnvrg::Logger.log_error(e)
19
+ Cnvrg::Logger.log_info("Tried to init gcp client without success.")
20
+ Cnvrg::CLI.log_message("Cannot init client. please contact support to check your bucket credentials.")
21
+ exit(1)
22
+ end
23
+
24
+ def init_gcp_credentials
25
+ t = Tempfile.new
26
+ f = open(@credentials_path).read
27
+ t.binmode
28
+ t.write(f)
29
+ t.rewind
30
+ @credentials = t.path
31
+ @tempfile = t
32
+ end
33
+
34
+ def download(storage_path, local_path)
35
+ prepare_download(local_path)
36
+ file = @bucket.file(decrypt(storage_path))
37
+ file.download local_path
38
+ end
39
+
40
+ def upload(storage_path, local_path)
41
+ begin
42
+ @bucket.create_file(local_path, storage_path)
43
+ rescue => e
44
+ raise e
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,78 @@
1
+ module Cnvrg
2
+ module Downloader
3
+ module Clients
4
+ class S3Client < Client
5
+ def initialize(sts_path: nil, access_key: nil, secret: nil, session_token: nil, region: nil, bucket: nil, encryption: nil, endpoint: nil, storage: nil)
6
+ @key, @iv = extract_key_iv(sts_path)
7
+ @access_key = Cnvrg::Helpers.decrypt(@key, @iv, access_key)
8
+ @secret = Cnvrg::Helpers.decrypt(@key, @iv, secret)
9
+ @session_token = Cnvrg::Helpers.decrypt(@key, @iv, session_token)
10
+ @region = Cnvrg::Helpers.decrypt(@key, @iv, region)
11
+ @bucket_name = Cnvrg::Helpers.decrypt(@key, @iv, bucket)
12
+ @endpoint = Cnvrg::Helpers.decrypt(@key, @iv, endpoint)
13
+ options = {
14
+ :access_key_id => @access_key,
15
+ :secret_access_key => @secret,
16
+ :session_token => @session_token,
17
+ :region => @region,
18
+ :http_open_timeout => 60, :retry_limit => 20
19
+ }
20
+ if storage == 'minio'
21
+ options.delete(:session_token)
22
+ options = options.merge({
23
+ :force_path_style => true,
24
+ :ssl_verify_peer => false,
25
+ :endpoint => @endpoint,
26
+ })
27
+ end
28
+
29
+ @options = options
30
+
31
+ #@client = Aws::S3::Client.new(options)
32
+ #@bucket = Aws::S3::Resource.new(client: @client).bucket(@bucket_name)
33
+ @upload_options = {:use_accelerate_endpoint => storage == 's3'}
34
+ if encryption.present?
35
+ @upload_options[:server_side_encryption] = encryption
36
+ end
37
+ end
38
+
39
+ def download(storage_path, local_path, decrypt: true)
40
+ prepare_download(local_path)
41
+ storage_path = Cnvrg::Helpers.decrypt(@key, @iv, storage_path) if decrypt
42
+ resp = nil
43
+ File.open(local_path, 'w+') do |file|
44
+ resp = aws_client.get_object({bucket: @bucket_name, key: storage_path}, target: file)
45
+ end
46
+ resp
47
+ rescue => e
48
+ Cnvrg::Logger.log_error(e)
49
+ raise e
50
+ end
51
+
52
+ def upload(storage_path, local_path)
53
+ ### storage path is the path inside s3 (after the bucket)
54
+ # local path is fullpath for the file /home/ubuntu/user.../hazilim.py
55
+ o = aws_bucket.object(storage_path)
56
+ success = o.upload_file(local_path, @upload_options)
57
+ return success
58
+ rescue => e
59
+ raise e
60
+ end
61
+
62
+ def fetch_files(prefix: nil, marker: nil, limit: 1000)
63
+ batch_files = aws_bucket.objects(prefix: prefix, marker: marker).first(limit)
64
+ batch_files.to_a.map(&:key)
65
+ end
66
+
67
+ private
68
+ def aws_client
69
+ Aws::S3::Client.new(@options)
70
+ end
71
+
72
+ def aws_bucket
73
+ Aws::S3::Resource.new(client: aws_client).bucket(@bucket_name)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,209 @@
1
+ require 'net/http'
2
+ module Cnvrg
3
+ class Experiment
4
+ attr_reader :slug
5
+ attr_reader :sync_before_terminate
6
+ attr_reader :sync_delay_time
7
+ attr_reader :output_dir
8
+
9
+ def initialize(owner, project_slug, job_id: nil)
10
+ @project_slug = project_slug
11
+ @owner = owner
12
+ @command = nil
13
+ @base_resource = "users/#{owner}/projects/#{project_slug}/"
14
+ @slug = job_id
15
+ @sync_before_terminate = nil
16
+ @sync_delay_time = nil
17
+ @output_dir = nil
18
+ end
19
+
20
+ def as_env
21
+ return {
22
+ CNVRG_JOB_ID: @slug,
23
+ CNVRG_JOB_TYPE: "Experiment",
24
+ CNVRG_PROJECT: @project_slug,
25
+ CNVRG_OWNER: @owner,
26
+ }.as_json
27
+ end
28
+
29
+ def start(input, platform, machine_name, start_commit, name, email_notification, machine_activity,script_path,
30
+ sync_before_terminate, periodic_sync)
31
+
32
+ res = Cnvrg::API.request(@base_resource + "experiment/start", 'POST',
33
+ {input: input, platform: platform, machine_name: machine_name, start_commit: start_commit,
34
+ title: name, email_notification: email_notification, machine_activity: machine_activity,script_path:script_path})
35
+ Cnvrg::CLI.is_response_success(res,false)
36
+
37
+ @slug = res.to_h["result"].to_h["slug"]
38
+ @sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
39
+ @sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
40
+ @output_dir = res.to_h["result"].to_h["output_dir"]
41
+ @command = res.to_h["result"].to_h["command"] rescue nil
42
+
43
+ return res
44
+
45
+ end
46
+
47
+ def start_notebook_session(kernel, start_commit, token, port, remote, notebook_path)
48
+ res = Cnvrg::API.request(@base_resource + "notebook/start_session", 'POST',
49
+ {kernel: kernel, start_commit: start_commit,
50
+ token: token, port: port, remote: remote, notebook_path: notebook_path})
51
+ Cnvrg::CLI.is_response_success(res)
52
+
53
+ @slug = res["result"]["id"]
54
+
55
+
56
+ return @slug
57
+
58
+ end
59
+
60
+ def end_notebook_session(notebook_slug)
61
+ res = Cnvrg::API.request(@base_resource + "notebook/#{notebook_slug}/stop", 'GET')
62
+ Cnvrg::CLI.is_response_success(res,false)
63
+
64
+ return res
65
+
66
+ end
67
+ def update_notebook_slug(proj_dir, slug)
68
+ begin
69
+ file = proj_dir+"/.cnvrg/notebook_slug"
70
+ FileUtils.touch file
71
+
72
+ File.open(file, "w+") { |f| f.write slug }
73
+ rescue
74
+ end
75
+
76
+ end
77
+
78
+ def get_notebook_slug(proj_dir)
79
+ begin
80
+ notebook_slug = File.open(proj_dir + "/.cnvrg/notebook_slug", "rb").read
81
+ notebook_slug = notebook_slug.gsub("/n", "")
82
+ notebook_slug = notebook_slug.to_s.strip
83
+ return notebook_slug
84
+ rescue
85
+ return nil
86
+ end
87
+
88
+
89
+ end
90
+ def get_machine_activity(working_dir)
91
+ begin
92
+ machine_activity = File.open("#{working_dir}/.cnvrg/machine_activity", "rb").read
93
+ machine_activity = machine_activity.to_s.strip
94
+ ma_id = machine_activity.to_i
95
+ return ma_id
96
+ rescue
97
+ return nil
98
+ end
99
+
100
+
101
+ end
102
+
103
+ def job_log(logs, level: 'info', step: nil, job_type: nil, job_id: nil)
104
+ logs = [logs].flatten
105
+ logs.each_slice(10).each do |temp_logs|
106
+ Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/jobs/experiment/#{@slug}/log", "POST", {job_type: "Experiment", job_id: @slug, logs: temp_logs, log_level: level, step: step, timestamp: Time.now})
107
+ sleep(1)
108
+ end
109
+ end
110
+
111
+ def exec_remote(command, commit_to_run, instance_type, image_slug,schedule,local_timestamp, grid,path_to_cmd,data, data_commit,periodic_sync,
112
+ sync_before_terminate, max_time, ds_sync_options=0,output_dir=nil,data_query=nil,
113
+ git_commit=nil, git_branch=nil, restart_if_stuck=nil, local_folders=nil,title=nil, datasets=nil, prerun: true, requirements: true, recurring: nil,
114
+ email_notification_error: false, email_notification_success: false, emails_to_notify: nil)
115
+ response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/experiment/remote", 'POST', {command: command, image_slug: image_slug,
116
+ commit_sha1: commit_to_run,
117
+ instance_type: instance_type,
118
+ schedule:schedule,
119
+ local_timestamp:local_timestamp,
120
+ datasets: datasets,
121
+ grid: grid,
122
+ path_to_cmd:path_to_cmd,dataset_slug:data,
123
+ dataset_commit: data_commit,max_time:max_time,
124
+ periodic_sync:periodic_sync, sync_before_terminate:sync_before_terminate,
125
+ dataset_sync_options:ds_sync_options,output_dir:output_dir,
126
+ dataset_query:data_query,git_commit:git_commit,git_branch:git_branch,
127
+ restart_if_stuck:restart_if_stuck, local_folders: local_folders, title:title,
128
+ prerun: prerun, requirements: requirements, recurring: recurring,
129
+ email_notification_error: email_notification_error, email_notification_success: email_notification_success,
130
+ emails_to_notify: emails_to_notify})
131
+
132
+ return response
133
+ end
134
+ def remote_notebook(instance_type, commit, data, data_commit, notebook_type,ds_sync_options=0,data_query=nil, image = nil, datasets = nil)
135
+ response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/notebook/remote", 'POST', {instance_type: instance_type,dataset_slug:data,
136
+ dataset_commit: data_commit,image_slug:image,
137
+ datasets: datasets,
138
+ commit:commit,notebook_type:notebook_type,dataset_sync_options:ds_sync_options,
139
+ dataset_query:data_query})
140
+ return response
141
+ end
142
+
143
+ def upload_temp_log(temp_log)
144
+ response = Cnvrg::API.request(@base_resource + "experiment/upload_temp_log", 'POST', {output: temp_log,
145
+ exp_slug: @slug})
146
+ Cnvrg::CLI.is_response_success(response,false)
147
+ end
148
+
149
+ def send_machine_stats(stats)
150
+ response = Cnvrg::API.request(@base_resource + "experiment/upload_stats", "POST", {exp_slug: @slug, stats: stats.map{|s| s.merge!({time: Time.now})}})
151
+ Cnvrg::CLI.is_response_success(response,false)
152
+ end
153
+
154
+ def end(output, exit_status, end_commit, cpu_average, memory_average, end_time: nil)
155
+ #if remote try to remove
156
+ tries = 0
157
+ success = false
158
+ end_time ||= Time.now
159
+ while tries < 10 and success.blank?
160
+ sleep (tries*rand) ** 2 ### exponential backoff
161
+ ## this call is super important so we cant let it crash.
162
+
163
+ tries += 1
164
+ response = Cnvrg::API.request(@base_resource + "experiment/end", 'POST', {output: output, exp_slug: @slug,
165
+ exit_status: exit_status, end_commit: end_commit,
166
+ cpu_average: cpu_average, memory_average: memory_average, end_time: end_time})
167
+ success = Cnvrg::CLI.is_response_success(response,false)
168
+ end
169
+
170
+ begin
171
+ FileUtils.rm_rf(["/home/ds/.cnvrg/tmp/exec.log"])
172
+ rescue
173
+
174
+ end
175
+ end
176
+
177
+ def get_cmd
178
+ return @command
179
+ end
180
+
181
+ def restart_spot_instance
182
+
183
+ restart = false
184
+ begin
185
+ url = URI.parse('http://169.254.169.254/latest/meta-data/spot/termination-time')
186
+ req = Net::HTTP::Get.new(url.to_s)
187
+ res = Net::HTTP.start(url.host, url.port) {|http|
188
+ http.request(req)
189
+ }
190
+ unless res.body.include? "404"
191
+ restart = true
192
+ end
193
+ if res.body.include? "Empty reply from server"
194
+ restart = false
195
+ end
196
+ rescue
197
+ restart = false
198
+
199
+ end
200
+
201
+ return restart
202
+
203
+ end
204
+
205
+ def send_restart_request(sha1=nil)
206
+ Cnvrg::API.request(@base_resource + "experiment/check_spot_instance", 'POST', {exp_slug: @slug, end_commit: sha1})
207
+ end
208
+ end
209
+ end