heroku_hatchet 6.0.0 → 7.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +2 -0
- data/CHANGELOG.md +36 -1
- data/README.md +774 -174
- data/bin/hatchet +13 -6
- data/hatchet.gemspec +1 -2
- data/hatchet.json +2 -1
- data/hatchet.lock +2 -0
- data/lib/hatchet.rb +2 -3
- data/lib/hatchet/api_rate_limit.rb +6 -17
- data/lib/hatchet/app.rb +150 -41
- data/lib/hatchet/config.rb +1 -1
- data/lib/hatchet/git_app.rb +29 -2
- data/lib/hatchet/reaper.rb +159 -56
- data/lib/hatchet/reaper/app_age.rb +49 -0
- data/lib/hatchet/reaper/reaper_throttle.rb +55 -0
- data/lib/hatchet/shell_throttle.rb +71 -0
- data/lib/hatchet/test_run.rb +2 -1
- data/lib/hatchet/version.rb +1 -1
- data/spec/hatchet/allow_failure_git_spec.rb +42 -2
- data/spec/hatchet/app_spec.rb +145 -6
- data/spec/hatchet/ci_spec.rb +10 -1
- data/spec/hatchet/git_spec.rb +9 -3
- data/spec/hatchet/lock_spec.rb +63 -1
- data/spec/unit/reaper_spec.rb +169 -0
- data/spec/unit/shell_throttle.rb +28 -0
- metadata +16 -23
data/lib/hatchet/config.rb
CHANGED
@@ -46,7 +46,7 @@ module Hatchet
|
|
46
46
|
def path_for_name(name)
|
47
47
|
possible_paths = [repos[name.to_s], "repos/#{name}", name].compact
|
48
48
|
path = possible_paths.detect do |path|
|
49
|
-
Dir[path]
|
49
|
+
!Dir[path]&.empty?
|
50
50
|
end
|
51
51
|
raise BadRepoName.new(name, possible_paths) if path.nil? || path.empty?
|
52
52
|
path
|
data/lib/hatchet/git_app.rb
CHANGED
@@ -5,11 +5,38 @@ module Hatchet
|
|
5
5
|
"https://git.heroku.com/#{name}.git"
|
6
6
|
end
|
7
7
|
|
8
|
+
|
8
9
|
def push_without_retry!
|
9
|
-
output =
|
10
|
+
output = ""
|
11
|
+
|
12
|
+
ShellThrottle.new(platform_api: @platform_api).call do
|
13
|
+
output = git_push_heroku_yall
|
14
|
+
rescue FailedDeploy => e
|
15
|
+
if e.output.match?(/reached the API rate limit/)
|
16
|
+
throw(:throttle)
|
17
|
+
elsif @allow_failure
|
18
|
+
output = e.output
|
19
|
+
else
|
20
|
+
raise e
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
return output
|
25
|
+
end
|
26
|
+
|
27
|
+
private def git_push_heroku_yall
|
28
|
+
output = `git push #{git_repo} HEAD:main 2>&1`
|
29
|
+
|
10
30
|
if !$?.success?
|
11
|
-
raise
|
31
|
+
raise FailedDeployError.new(self, "Buildpack: #{@buildpack.inspect}\nRepo: #{git_repo}", output: output)
|
12
32
|
end
|
33
|
+
|
34
|
+
releases = platform_api.release.list(name)
|
35
|
+
if releases.last["status"] == "failed"
|
36
|
+
commit! # An empty commit allows us to deploy again
|
37
|
+
raise FailedReleaseError.new(self, "Buildpack: #{@buildpack.inspect}\nRepo: #{git_repo}", output: output)
|
38
|
+
end
|
39
|
+
|
13
40
|
return output
|
14
41
|
end
|
15
42
|
end
|
data/lib/hatchet/reaper.rb
CHANGED
@@ -1,88 +1,191 @@
|
|
1
1
|
require 'tmpdir'
|
2
2
|
|
3
3
|
module Hatchet
|
4
|
-
#
|
5
|
-
# the reaper is designed to allow the most recent apps to stay alive
|
6
|
-
# while keeping the total number of apps under the global Heroku limit.
|
7
|
-
# Any time you're worried about hitting the limit call @reaper.cycle
|
4
|
+
# This class lazilly deletes hatchet apps
|
8
5
|
#
|
6
|
+
# When the reaper is called, it will check if the system has too many apps (Bassed off of HATCHET_APP_LIMIT), if so it will attempt
|
7
|
+
# to delete an app to free up capacity. The goal of lazilly deleting apps is to temporarilly keep
|
8
|
+
# apps around for debugging if they fail.
|
9
|
+
#
|
10
|
+
# When App#teardown! is called on an app it is marked as being in a "finished" state by turning
|
11
|
+
# on maintenance mode. The reaper will delete these in order (oldest first).
|
12
|
+
#
|
13
|
+
# If no apps are marked as being "finished" then the reaper will check to see if the oldest app
|
14
|
+
# has been alive for a long enough period for it's tests to finish (configured by HATCHET_ALIVE_TTL_MINUTES env var).
|
15
|
+
# If the "unfinished" app has been alive that long it will be deleted. If not, the system will sleep for a period of time
|
16
|
+
# in an attempt to allow other apps to move to be "finished".
|
17
|
+
#
|
18
|
+
# This class only limits and the number of "hatchet" apps on the system. Prevously there was a maximum of 100 apps on a
|
19
|
+
# Heroku account. Now a user can belong to multiple orgs and the total number of apps they have access to is no longer
|
20
|
+
# fixed at 100. Instead of hard coding a maximum limit, this failure mode is handled by forcing deletion of
|
21
|
+
# an app when app creation fails. In the future we may find a better way of detecting this failure mode
|
22
|
+
#
|
23
|
+
# Notes:
|
24
|
+
#
|
25
|
+
# - The class uses a file mutex so that multiple processes on the same machine do not attempt to run the
|
26
|
+
# reaper at the same time.
|
27
|
+
# - AlreadyDeletedError will be raised if an app has already been deleted (possibly by another test run on
|
28
|
+
# another machine). When this happens, the system will automatically attempt to reap another app.
|
9
29
|
class Reaper
|
10
|
-
|
11
|
-
|
30
|
+
class AlreadyDeletedError < StandardError; end
|
31
|
+
|
32
|
+
HATCHET_APP_LIMIT = Integer(ENV["HATCHET_APP_LIMIT"] || 20) # the number of apps hatchet keeps around
|
12
33
|
DEFAULT_REGEX = /^#{Regexp.escape(Hatchet::APP_PREFIX)}[a-f0-9]+/
|
13
|
-
attr_accessor :apps
|
14
34
|
|
15
|
-
|
35
|
+
attr_accessor :io, :hatchet_app_limit
|
36
|
+
|
37
|
+
def initialize(api_rate_limit: , regex: DEFAULT_REGEX, io: STDOUT, hatchet_app_limit: HATCHET_APP_LIMIT, initial_sleep: 10)
|
16
38
|
@api_rate_limit = api_rate_limit
|
17
|
-
@regex
|
39
|
+
@regex = regex
|
40
|
+
@io = io
|
41
|
+
@finished_hatchet_apps = []
|
42
|
+
@unfinished_hatchet_apps = []
|
43
|
+
@app_count = 0
|
44
|
+
@hatchet_app_limit = hatchet_app_limit
|
45
|
+
@reaper_throttle = ReaperThrottle.new(initial_sleep: initial_sleep)
|
18
46
|
end
|
19
47
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
48
|
+
def cycle(app_exception_message: false)
|
49
|
+
# Protect against parallel deletion of the same app on the same system
|
50
|
+
mutex_file = File.open("#{Dir.tmpdir()}/hatchet_reaper_mutex", File::CREAT)
|
51
|
+
mutex_file.flock(File::LOCK_EX)
|
52
|
+
|
53
|
+
refresh_app_list if @finished_hatchet_apps.empty?
|
54
|
+
|
55
|
+
# To be safe try to delete an app even if we're not over the limit
|
56
|
+
# since the exception may have been caused by going over the maximum account limit
|
57
|
+
if app_exception_message
|
58
|
+
io.puts <<~EOM
|
59
|
+
WARNING: Running reaper due to exception on app
|
60
|
+
#{stats_string}
|
61
|
+
Exception: #{app_exception_message}
|
62
|
+
EOM
|
63
|
+
reap_once
|
64
|
+
end
|
65
|
+
|
66
|
+
while over_limit?
|
67
|
+
reap_once
|
68
|
+
end
|
69
|
+
ensure
|
70
|
+
mutex_file.close
|
25
71
|
end
|
26
72
|
|
27
|
-
def
|
28
|
-
#
|
29
|
-
|
30
|
-
mutex = File.open("#{Dir.tmpdir()}/hatchet_reaper_mutex", File::CREAT)
|
31
|
-
mutex.flock(File::LOCK_EX)
|
73
|
+
def stats_string
|
74
|
+
"total_app_count: #{@app_count}, hatchet_app_count: #{hatchet_app_count}/#{HATCHET_APP_LIMIT}, finished: #{@finished_hatchet_apps.length}, unfinished: #{@unfinished_hatchet_apps.length}"
|
75
|
+
end
|
32
76
|
|
33
|
-
|
34
|
-
|
77
|
+
def over_limit?
|
78
|
+
hatchet_app_count > hatchet_app_limit
|
79
|
+
end
|
35
80
|
|
36
|
-
|
81
|
+
# No guardrails, will delete all apps that match the hatchet namespace
|
82
|
+
def destroy_all
|
83
|
+
refresh_app_list
|
37
84
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
puts "Warning: Reached Heroku app limit of #{HEROKU_APP_LIMIT}."
|
44
|
-
break
|
85
|
+
(@finished_hatchet_apps + @unfinished_hatchet_apps).each do |app|
|
86
|
+
begin
|
87
|
+
destroy_with_log(name: app["name"], id: app["id"])
|
88
|
+
rescue AlreadyDeletedError
|
89
|
+
# Ignore, keep going
|
45
90
|
end
|
46
91
|
end
|
92
|
+
end
|
47
93
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
94
|
+
private def reap_once
|
95
|
+
refresh_app_list if @finished_hatchet_apps.empty?
|
96
|
+
|
97
|
+
if (app = @finished_hatchet_apps.pop)
|
98
|
+
destroy_with_log(name: app["name"], id: app["id"])
|
99
|
+
elsif (app = @unfinished_hatchet_apps.pop)
|
100
|
+
destroy_if_old_enough(app)
|
101
|
+
end
|
102
|
+
rescue AlreadyDeletedError
|
103
|
+
retry
|
104
|
+
end
|
105
|
+
|
106
|
+
# Checks to see if the given app is older than the HATCHET_ALIVE_TTL_MINUTES
|
107
|
+
# if so, then the app is deleted, otherwise the reaper sleeps for a period of time after which
|
108
|
+
# It can try again to delete another app. The hope is that some apps will be marked as finished
|
109
|
+
# in that time
|
110
|
+
private def destroy_if_old_enough(app)
|
111
|
+
age = AppAge.new(
|
112
|
+
created_at: app["created_at"],
|
113
|
+
ttl_minutes: ENV.fetch("HATCHET_ALIVE_TTL_MINUTES", "7").to_i
|
114
|
+
)
|
115
|
+
if age.can_delete?
|
116
|
+
io.puts "WARNING: Destroying an app without maintenance mode on, app: #{app["name"]}, app_age: #{age.in_minutes} minutes"
|
117
|
+
|
118
|
+
destroy_with_log(name: app["name"], id: app["id"])
|
119
|
+
else
|
120
|
+
# We're not going to delete it yet, so put it back
|
121
|
+
@unfinished_hatchet_apps << app
|
122
|
+
|
123
|
+
# Sleep, try again later
|
124
|
+
@reaper_throttle.call(max_sleep: age.sleep_for_ttl) do |sleep_for|
|
125
|
+
io.puts <<~EOM
|
126
|
+
WARNING: Attempting to destroy an app without maintenance mode on, but it is not old enough. app: #{app["name"]}, app_age: #{age.in_minutes} minutes
|
127
|
+
This can happen if App#teardown! is not called on an application, which will leave it in an 'unfinished' state
|
128
|
+
This can also happen if you're trying to run more tests concurrently than your currently set value for HATCHET_APP_COUNT
|
129
|
+
Sleeping: #{sleep_for} seconds before trying to find another app to reap"
|
130
|
+
#{stats_string}, HATCHET_ALIVE_TTL_MINUTES=#{age.ttl_minutes}
|
131
|
+
EOM
|
132
|
+
|
133
|
+
sleep(sleep_for)
|
134
|
+
end
|
57
135
|
end
|
58
|
-
raise e
|
59
|
-
ensure
|
60
|
-
# don't forget to close the mutex; this also releases our lock
|
61
|
-
mutex.close
|
62
136
|
end
|
63
137
|
|
64
|
-
def
|
65
|
-
|
66
|
-
destroy_by_id(name: oldest["name"], id: oldest["id"], details: "Hatchet app limit: #{HATCHET_APP_LIMT}")
|
138
|
+
private def get_heroku_apps
|
139
|
+
@api_rate_limit.call.app.list
|
67
140
|
end
|
68
141
|
|
69
|
-
def
|
70
|
-
|
71
|
-
|
72
|
-
|
142
|
+
private def refresh_app_list
|
143
|
+
apps = get_heroku_apps.
|
144
|
+
map {|app| app["created_at"] = DateTime.parse(app["created_at"].to_s); app }.
|
145
|
+
sort_by { |app| app["created_at"] }.
|
146
|
+
reverse # Ascending order, oldest is last
|
147
|
+
|
148
|
+
@app_count = apps.length
|
149
|
+
|
150
|
+
@finished_hatchet_apps.clear
|
151
|
+
@unfinished_hatchet_apps.clear
|
152
|
+
apps.each do |app|
|
153
|
+
next unless app["name"].match(@regex)
|
154
|
+
|
155
|
+
if app["maintenance"]
|
156
|
+
@finished_hatchet_apps << app
|
157
|
+
else
|
158
|
+
@unfinished_hatchet_apps << app
|
159
|
+
end
|
73
160
|
end
|
74
161
|
end
|
75
162
|
|
76
|
-
def
|
77
|
-
|
78
|
-
|
163
|
+
private def destroy_with_log(name:, id:)
|
164
|
+
message = "Destroying #{name.inspect}: #{id}, #{stats_string}"
|
165
|
+
|
79
166
|
@api_rate_limit.call.app.delete(id)
|
80
|
-
end
|
81
167
|
|
82
|
-
|
168
|
+
io.puts message
|
169
|
+
rescue Excon::Error::NotFound => e
|
170
|
+
body = e.response.body
|
171
|
+
request_id = e.response.headers["Request-Id"]
|
172
|
+
if body =~ /Couldn\'t find that app./
|
173
|
+
io.puts "Duplicate destoy attempted #{name.inspect}: #{id}, status: 404, request_id: #{request_id}"
|
174
|
+
raise AlreadyDeletedError.new
|
175
|
+
else
|
176
|
+
raise e
|
177
|
+
end
|
178
|
+
rescue Excon::Error::Forbidden => e
|
179
|
+
request_id = e.response.headers["Request-Id"]
|
180
|
+
io.puts "Duplicate destoy attempted #{name.inspect}: #{id}, status: 403, request_id: #{request_id}"
|
181
|
+
raise AlreadyDeletedError.new
|
182
|
+
end
|
83
183
|
|
84
|
-
def
|
85
|
-
@
|
184
|
+
private def hatchet_app_count
|
185
|
+
@finished_hatchet_apps.length + @unfinished_hatchet_apps.length
|
86
186
|
end
|
87
187
|
end
|
88
188
|
end
|
189
|
+
|
190
|
+
require_relative "reaper/app_age"
|
191
|
+
require_relative "reaper/reaper_throttle"
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Hatchet
|
2
|
+
class Reaper
|
3
|
+
# Class for figuring out how old a given time is relative to another time
|
4
|
+
#
|
5
|
+
# Expects inputs as a DateTime instance
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# time_now = DateTime.parse("2020-07-28T14:40:00Z")
|
10
|
+
# age = AppAge.new(created_at: DateTIme.parse("2020-07-28T14:40:00Z"), time_now: time_now, ttl_minutes: 1)
|
11
|
+
# age.in_minutes => 0.0
|
12
|
+
# age.too_young_to_die? # => true
|
13
|
+
# age.can_delete? # => false
|
14
|
+
# age.sleep_for_ttl #=> 60
|
15
|
+
class AppAge
|
16
|
+
SECONDS_IN_A_DAY = 24 * 60 * 60
|
17
|
+
|
18
|
+
attr_reader :ttl_minutes
|
19
|
+
|
20
|
+
def initialize(created_at:, ttl_minutes:, time_now: DateTime.now.new_offset(0))
|
21
|
+
@seconds_ago = date_time_diff_in_seconds(time_now, created_at)
|
22
|
+
@ttl_minutes = ttl_minutes
|
23
|
+
@ttl_seconds = ttl_minutes * 60
|
24
|
+
end
|
25
|
+
|
26
|
+
def date_time_diff_in_seconds(now, whence)
|
27
|
+
(now - whence) * SECONDS_IN_A_DAY
|
28
|
+
end
|
29
|
+
|
30
|
+
def too_young_to_die?
|
31
|
+
!can_delete?
|
32
|
+
end
|
33
|
+
|
34
|
+
def can_delete?
|
35
|
+
@seconds_ago > @ttl_seconds
|
36
|
+
end
|
37
|
+
|
38
|
+
def sleep_for_ttl
|
39
|
+
return 0 if can_delete?
|
40
|
+
|
41
|
+
@ttl_seconds - @seconds_ago
|
42
|
+
end
|
43
|
+
|
44
|
+
def in_minutes
|
45
|
+
(@seconds_ago / 60.0).round(2)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Hatchet
|
2
|
+
class Reaper
|
3
|
+
# This class retains and increments a sleep value between executions
|
4
|
+
#
|
5
|
+
# Every time we pause, we increase the duration of the pause 2x. If we
|
6
|
+
# do not sleep for long enough then we will burn API requests that we don't need to make.
|
7
|
+
#
|
8
|
+
# To help prevent sleeping for too long, the reaper will sleep for a maximum amount of time
|
9
|
+
# equal to the age_sleep_for_ttl. If that happens, it's likely a fairly large value and the
|
10
|
+
# internal incremental value can be reset
|
11
|
+
#
|
12
|
+
# Example:
|
13
|
+
#
|
14
|
+
# reaper_throttle = ReaperThrottle.new(initial_sleep: 2)
|
15
|
+
# reaper_throttle.call(max_sleep: 5) do |sleep_for|
|
16
|
+
# puts sleep_for # => 2
|
17
|
+
# end
|
18
|
+
# reaper_throttle.call(max_sleep: 5) do |sleep_for|
|
19
|
+
# puts sleep_for # => 4
|
20
|
+
# end
|
21
|
+
# reaper_throttle.call(max_sleep: 5) do |sleep_for|
|
22
|
+
# puts sleep_for # => 5
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# # The throttle is now reset since it hit the max_sleep value
|
26
|
+
#
|
27
|
+
# reaper_throttle.call(max_sleep: 5) do |sleep_for|
|
28
|
+
# puts sleep_for # => 2
|
29
|
+
# end
|
30
|
+
class ReaperThrottle
|
31
|
+
def initialize(initial_sleep: )
|
32
|
+
@initial_sleep = initial_sleep
|
33
|
+
@sleep_for = @initial_sleep
|
34
|
+
end
|
35
|
+
|
36
|
+
def call(max_sleep: )
|
37
|
+
raise "Must call with a block" unless block_given?
|
38
|
+
|
39
|
+
sleep_for = [@sleep_for, max_sleep].min
|
40
|
+
|
41
|
+
yield sleep_for
|
42
|
+
|
43
|
+
if sleep_for < @sleep_for
|
44
|
+
reset!
|
45
|
+
else
|
46
|
+
@sleep_for *= 2
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def reset!
|
51
|
+
@sleep_for = @initial_sleep
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Hatchet
|
2
|
+
# A class for throttling non-http resources
|
3
|
+
#
|
4
|
+
# Non-http calls can be rate-limited for example shell calls to `heroku run ` and `git push heroku`
|
5
|
+
# this class provides an easy interface to leverage the rate throttling behavior baked into `PlatformAPI`
|
6
|
+
# for calls things that do not have a real associated web request
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# output = ""
|
11
|
+
# ShellThrottle.new(platform_api: @platform_api).call
|
12
|
+
# output = `git push heroku main`
|
13
|
+
# throw(:throttle) if output.match?(/reached the API rate limit/)
|
14
|
+
# end
|
15
|
+
# puts output
|
16
|
+
#
|
17
|
+
# In this example `git push heroku main` will retry and backoff until the output no longer matches `reached the API rate limit`.
|
18
|
+
#
|
19
|
+
class ShellThrottle
|
20
|
+
def initialize(platform_api: )
|
21
|
+
@platform_api = platform_api
|
22
|
+
end
|
23
|
+
|
24
|
+
def call
|
25
|
+
out = nil
|
26
|
+
PlatformAPI.rate_throttle.call do
|
27
|
+
catch(:throttle) do
|
28
|
+
out = yield
|
29
|
+
return
|
30
|
+
end
|
31
|
+
|
32
|
+
try_again
|
33
|
+
end
|
34
|
+
return out
|
35
|
+
end
|
36
|
+
|
37
|
+
private def success
|
38
|
+
FakeResponse.new(status: 200, remaining: remaining)
|
39
|
+
end
|
40
|
+
|
41
|
+
private def try_again
|
42
|
+
FakeResponse.new(status: 429, remaining: remaining)
|
43
|
+
end
|
44
|
+
|
45
|
+
private def remaining
|
46
|
+
@platform_api.rate_limit.info["remaining"]
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
# Helper class to be used along with the PlatformAPI.rate_throttle interface
|
51
|
+
# that expects a response object
|
52
|
+
#
|
53
|
+
# Example:
|
54
|
+
#
|
55
|
+
# FakeResponse.new(status: 200, remaining: 2).status #=> 200
|
56
|
+
# FakeResponse.new(status: 200, remaining: 2).headers["RateLimit-Remaining"] #=> 2
|
57
|
+
class FakeResponse
|
58
|
+
attr_reader :status, :headers
|
59
|
+
|
60
|
+
def initialize(status:, remaining: )
|
61
|
+
@status = status
|
62
|
+
|
63
|
+
@headers = {
|
64
|
+
"RateLimit-Remaining" => remaining,
|
65
|
+
"RateLimit-Multiplier" => 1,
|
66
|
+
"Content-Type" => "text/plain".freeze
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|