cnvrg 1.11.9 → 1.11.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cnvrg/api.rb +22 -3
- data/lib/cnvrg/cli.rb +3 -2
- data/lib/cnvrg/datafiles.rb +9 -1
- data/lib/cnvrg/downloader/client.rb +3 -1
- data/lib/cnvrg/files.rb +17 -17
- data/lib/cnvrg/helpers/agent.rb +7 -2
- data/lib/cnvrg/helpers/executer.rb +24 -10
- data/lib/cnvrg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8798ab84db1e0742dc733a0f74b5830e2c5396099fc52bdb4a04ef108a43408e
|
4
|
+
data.tar.gz: 795988ec46ba506091136240bda258f923469e4b3000d58e84371df09f65840d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0497d165396b52d2975c3d3ab8c097596c173f04c63711519cf2c122fb9dc5f90ecc493aab7ba2cba0d7064dc14b52a7e1d7f2f6f5e68d3af020d4b3f1f5b35c'
|
7
|
+
data.tar.gz: 72c65364f4c9ad8d5acf68546f0a428c113dd9697613674ce83b52a3ce7fcec8064a7517d9b5b5ae7f21a8690ba935d028622688d72f87eb3e1927d7fe5ea9e2
|
data/lib/cnvrg/api.rb
CHANGED
@@ -65,6 +65,16 @@ module Cnvrg
|
|
65
65
|
response = conn.get "#{resource}", data
|
66
66
|
success = true
|
67
67
|
Cnvrg::API.parse_version(response)
|
68
|
+
if response.to_hash[:status].to_i != 200
|
69
|
+
Cnvrg::Logger.log_info("Got back bad status #{response.to_hash[:status]}")
|
70
|
+
end
|
71
|
+
if [503, 502, 429].include?(response.to_hash[:status].to_i)
|
72
|
+
Cnvrg::Logger.log_info("Got back status #{response.to_hash[:status]}, will retry in #{5 * retries} seconds")
|
73
|
+
success = false
|
74
|
+
sleep(5 * retries)
|
75
|
+
retries +=1
|
76
|
+
next
|
77
|
+
end
|
68
78
|
rescue => e
|
69
79
|
Cnvrg::Logger.log_error(e)
|
70
80
|
sleep(5)
|
@@ -95,11 +105,20 @@ module Cnvrg
|
|
95
105
|
response = conn.put "#{resource}", data.to_json if method.eql? 'PUT'
|
96
106
|
success = true
|
97
107
|
Cnvrg::API.parse_version(response)
|
98
|
-
|
108
|
+
if response.to_hash[:status].to_i != 200
|
109
|
+
Cnvrg::Logger.log_info("Got back bad status #{response.to_hash[:status]}")
|
110
|
+
end
|
111
|
+
if [503, 502, 429].include?(response.to_hash[:status].to_i)
|
112
|
+
Cnvrg::Logger.log_info("Got back status #{response.to_hash[:status]}, will retry in #{5 * retries} seconds")
|
113
|
+
success = false
|
114
|
+
sleep(5 * retries)
|
115
|
+
retries +=1
|
116
|
+
next
|
117
|
+
end
|
99
118
|
rescue => e
|
100
119
|
Cnvrg::Logger.log_error(e)
|
101
|
-
|
102
|
-
|
120
|
+
sleep(5)
|
121
|
+
retries +=1
|
103
122
|
end
|
104
123
|
end
|
105
124
|
if !success
|
data/lib/cnvrg/cli.rb
CHANGED
@@ -3382,10 +3382,10 @@ module Cnvrg
|
|
3382
3382
|
if @project.is_git
|
3383
3383
|
output_dir = output_dir || @exp.output_dir
|
3384
3384
|
if output_dir.present?
|
3385
|
-
upload(false, false, true, ignore, true,
|
3385
|
+
upload(false, false, true, ignore, true, false, output_dir, "Experiment", @exp.slug, true )
|
3386
3386
|
end
|
3387
3387
|
else
|
3388
|
-
upload(false, false, true, ignore, true,
|
3388
|
+
upload(false, false, true, ignore, true, false, nil, "Experiment", @exp.slug, true )
|
3389
3389
|
end
|
3390
3390
|
end
|
3391
3391
|
|
@@ -3652,6 +3652,7 @@ module Cnvrg
|
|
3652
3652
|
sleep 3
|
3653
3653
|
tries += 1
|
3654
3654
|
retry if tries <= 5
|
3655
|
+
exit(1)
|
3655
3656
|
end
|
3656
3657
|
end
|
3657
3658
|
end
|
data/lib/cnvrg/datafiles.rb
CHANGED
@@ -11,6 +11,7 @@ module Cnvrg
|
|
11
11
|
|
12
12
|
LARGE_FILE=1024*1024*5
|
13
13
|
MULTIPART_SPLIT=10000000
|
14
|
+
RETRIES = ENV['UPLOAD_FILE_RETRIES'].try(:to_i) || 10
|
14
15
|
|
15
16
|
attr_reader :base_resource
|
16
17
|
|
@@ -256,6 +257,7 @@ module Cnvrg
|
|
256
257
|
end
|
257
258
|
|
258
259
|
def delete_file_chunk(commit_sha1, regex_list, chunk_size, offset)
|
260
|
+
retry_count = 0
|
259
261
|
begin
|
260
262
|
resp = Cnvrg::API.request(
|
261
263
|
@base_resource + "delete_files_by_chunk",
|
@@ -268,13 +270,19 @@ module Cnvrg
|
|
268
270
|
}
|
269
271
|
)
|
270
272
|
unless Cnvrg::CLI.is_response_success(resp, false)
|
271
|
-
Cnvrg::Logger.log_method(bind: binding)
|
272
273
|
raise Exception.new("Got an error message from server, #{resp.try(:fetch, "message")}")
|
273
274
|
end
|
274
275
|
return resp["total_changes"]
|
275
276
|
rescue => e
|
276
277
|
Cnvrg::Logger.log_method(bind: binding)
|
277
278
|
Cnvrg::Logger.log_error(e)
|
279
|
+
|
280
|
+
if retry_count < RETRIES
|
281
|
+
sleep(2**retry_count) # Exponential backoff
|
282
|
+
retry_count += 1
|
283
|
+
retry
|
284
|
+
end
|
285
|
+
|
278
286
|
raise e
|
279
287
|
end
|
280
288
|
end
|
@@ -14,11 +14,13 @@ module Cnvrg
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def extract_key_iv(sts_path)
|
17
|
-
count =
|
17
|
+
count = 0
|
18
18
|
begin
|
19
19
|
count += 1
|
20
20
|
sts = open(sts_path, {ssl_verify_mode: 0}).read rescue nil
|
21
21
|
rescue => e
|
22
|
+
backoff_time_seconds = backoff_time(count)
|
23
|
+
sleep backoff_time_seconds
|
22
24
|
Cnvrg::Logger.log_error(e)
|
23
25
|
retry if count <= 20
|
24
26
|
raise StandardError.new("Cant access storage: #{e.message}")
|
data/lib/cnvrg/files.rb
CHANGED
@@ -733,7 +733,7 @@ module Cnvrg
|
|
733
733
|
unless Cnvrg::CLI.is_response_success(res, false)
|
734
734
|
raise SignalException.new("Cant download files from the server.")
|
735
735
|
end
|
736
|
-
self.
|
736
|
+
self.download_multiple_files_s3(res['result'], @project_home, postfix: postfix, progress: progress, threads: threads)
|
737
737
|
end
|
738
738
|
|
739
739
|
|
@@ -750,7 +750,8 @@ module Cnvrg
|
|
750
750
|
conflicted.each{|file| self.delete_conflict(file); progress.progress += 1 if progress.present?}
|
751
751
|
end
|
752
752
|
|
753
|
-
def
|
753
|
+
def download_multiple_files_s3(files, project_home, postfix: '', progress: nil, threads: 15)
|
754
|
+
cli = Cnvrg::CLI.new()
|
754
755
|
begin
|
755
756
|
props = {}
|
756
757
|
client = props[:client]
|
@@ -774,11 +775,15 @@ module Cnvrg
|
|
774
775
|
file_path = f["name"]
|
775
776
|
if file_path.end_with? "/"
|
776
777
|
# dir
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
778
|
+
begin
|
779
|
+
if download_dir(file_path, file_path, project_home)
|
780
|
+
download_succ_count += 1
|
781
|
+
else
|
782
|
+
raise StandardError.new("Could not create directory #{file_path}.")
|
783
|
+
end
|
784
|
+
rescue => e
|
785
|
+
cli.log_message("Could not create directory #{file_path}. error: #{e.message}", Thor::Shell::Color::RED)
|
786
|
+
raise e
|
782
787
|
end
|
783
788
|
else
|
784
789
|
file_path += postfix
|
@@ -793,25 +798,20 @@ module Cnvrg
|
|
793
798
|
progress.progress += 1 if progress.present?
|
794
799
|
download_succ_count += 1
|
795
800
|
rescue => e
|
796
|
-
|
797
|
-
raise
|
801
|
+
cli.log_message("Could not download file #{file_path}. error: #{e.message}", Thor::Shell::Color::RED)
|
802
|
+
raise e
|
798
803
|
end
|
799
|
-
|
800
|
-
|
801
|
-
|
802
804
|
end
|
803
805
|
end
|
804
806
|
if download_succ_count == files["keys"].size
|
805
807
|
return Cnvrg::Result.new(true,"Done.\nDownloaded #{download_succ_count} files")
|
806
808
|
end
|
807
809
|
rescue => e
|
808
|
-
|
810
|
+
cli.log_error(e)
|
811
|
+
raise e
|
809
812
|
end
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
813
|
end
|
814
|
+
|
815
815
|
def download_file(absolute_path, relative_path, project_home, conflict=false)
|
816
816
|
res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path})
|
817
817
|
Cnvrg::CLI.is_response_success(res, false)
|
data/lib/cnvrg/helpers/agent.rb
CHANGED
@@ -75,9 +75,11 @@ class Cnvrg::Helpers::Agent
|
|
75
75
|
|
76
76
|
def exec!
|
77
77
|
log_internal("Command: #{@command} with slug: #{@slug} started!")
|
78
|
-
if
|
78
|
+
if @command.blank?
|
79
|
+
@exit_status = 0
|
80
|
+
elsif should_run?
|
79
81
|
send_logs(status: Status::STARTED)
|
80
|
-
periodic_thread
|
82
|
+
periodic_thread_handle = periodic_thread
|
81
83
|
execute_command
|
82
84
|
else
|
83
85
|
@exit_status = 127
|
@@ -86,6 +88,9 @@ class Cnvrg::Helpers::Agent
|
|
86
88
|
finish_log += " after #{@real_execution_retries} retries" if @real_execution_retries > 0
|
87
89
|
log_internal(finish_log)
|
88
90
|
send_logs(exit_status: @exit_status, status: Status::FINISHED)
|
91
|
+
if periodic_thread_handle.present?
|
92
|
+
periodic_thread_handle.join
|
93
|
+
end
|
89
94
|
end
|
90
95
|
|
91
96
|
def get_logs_to_send
|
@@ -120,17 +120,31 @@ class Cnvrg::Helpers::Executer
|
|
120
120
|
end
|
121
121
|
|
122
122
|
def init
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
123
|
+
retries = 0
|
124
|
+
success = false
|
125
|
+
puts("Agent started, connecting to #{Cnvrg::API.get_api}")
|
126
|
+
STDOUT.flush
|
127
|
+
while !success and retries < 100
|
128
|
+
begin
|
129
|
+
resp = Cnvrg::API.request(activity_url, "PUT", {stats: executer_stats})
|
130
|
+
machine_activity = resp["machine_activity"]
|
131
|
+
success = true
|
132
|
+
puts("Connected to server")
|
133
|
+
STDOUT.flush
|
134
|
+
Cnvrg::Logger.log_info("Got back machine activity #{machine_activity}")
|
135
|
+
if machine_activity.present? and @machine_activity != machine_activity
|
136
|
+
Cnvrg::Logger.log_info("Changing to machine activity #{machine_activity}")
|
137
|
+
machine_activity_yml = {slug: machine_activity}
|
138
|
+
File.open("/conf/.machine_activity.yml", "w+") {|f| f.write machine_activity_yml.to_yaml}
|
139
|
+
@machine_activity = machine_activity
|
140
|
+
end
|
141
|
+
rescue => e
|
142
|
+
Cnvrg::Logger.log_error(e)
|
143
|
+
Cnvrg::Logger.info("Sleeping for #{5 * retries}")
|
144
|
+
sleep(5 * retries)
|
145
|
+
retries +=1
|
146
|
+
end
|
131
147
|
end
|
132
|
-
rescue => e
|
133
|
-
Cnvrg::Logger.log_error(e)
|
134
148
|
end
|
135
149
|
|
136
150
|
def polling_thread
|
data/lib/cnvrg/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cnvrg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yochay Ettun
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2021-01-25 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|