outerbounds 0.3.179rc5__py3-none-any.whl → 0.3.180rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- outerbounds/_vendor/spinner/__init__.py +4 -0
- outerbounds/_vendor/spinner/spinners.py +478 -0
- outerbounds/_vendor/spinner.LICENSE +21 -0
- outerbounds/apps/_state_machine.py +358 -0
- outerbounds/apps/app_cli.py +587 -36
- outerbounds/apps/capsule.py +324 -68
- outerbounds/apps/cli_to_config.py +9 -1
- outerbounds/apps/config_schema.yaml +5 -0
- outerbounds/apps/utils.py +132 -0
- outerbounds/apps/validations.py +0 -12
- outerbounds/command_groups/cli.py +0 -2
- {outerbounds-0.3.179rc5.dist-info → outerbounds-0.3.180rc0.dist-info}/METADATA +3 -3
- {outerbounds-0.3.179rc5.dist-info → outerbounds-0.3.180rc0.dist-info}/RECORD +15 -12
- outerbounds/command_groups/flowprojects_cli.py +0 -137
- {outerbounds-0.3.179rc5.dist-info → outerbounds-0.3.180rc0.dist-info}/WHEEL +0 -0
- {outerbounds-0.3.179rc5.dist-info → outerbounds-0.3.180rc0.dist-info}/entry_points.txt +0 -0
outerbounds/apps/capsule.py
CHANGED
@@ -8,62 +8,61 @@ from typing import Optional, List, Dict, Any, Tuple
|
|
8
8
|
from .utils import TODOException, safe_requests_wrapper, MaximumRetriesExceeded
|
9
9
|
from .app_config import AppConfig, CAPSULE_DEBUG, AuthType
|
10
10
|
from . import experimental
|
11
|
+
from ._state_machine import (
|
12
|
+
_capsule_worker_semantic_status,
|
13
|
+
_capsule_worker_status_diff,
|
14
|
+
CapsuleWorkerSemanticStatus,
|
15
|
+
WorkerStatus,
|
16
|
+
DEPLOYMENT_READY_CONDITIONS,
|
17
|
+
)
|
11
18
|
|
12
19
|
|
13
20
|
class CapsuleStateMachine:
|
14
21
|
"""
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
- Every capsule create call will return a `identifier` and a `version` of the object.
|
23
|
+
- Each update call will return a new version.
|
24
|
+
- The status.currentlyServedVersion will be the version that is currently serving traffic.
|
25
|
+
- The status.updateInProgress will be True if an upgrade is in progress.
|
26
|
+
|
27
|
+
CapsuleState Transition:
|
28
|
+
- Every capsule create call will return a `identifier` and a `version` of the object.
|
29
|
+
- Happy Path:
|
30
|
+
- First time Create :
|
31
|
+
- wait for status.updateInProgress to be set to False
|
32
|
+
- (interleved) Poll the worker endpoints to check their status
|
33
|
+
- showcase how many workers are coming up if things are on the cli side.
|
34
|
+
- If the user has set some flag like `--dont-wait-to-fully-finish` then we check the `status.currentlyServedVersion` to see if even one replica is ready to
|
35
|
+
serve traffic.
|
36
|
+
- once the status.updateInProgress is set to False, it means that the replicas are ready
|
37
|
+
- Upgrade:
|
38
|
+
- wait for status.updateInProgress to be set to False
|
39
|
+
- (interleved) Poll the worker endpoints to check their status and signal the user the number replicas coming up
|
40
|
+
- If the user has set some flag like `--dont-wait-to-fully-finish` then we check the `status.currentlyServedVersion` to see if even one replica is ready to
|
41
|
+
serve traffic.
|
42
|
+
- Unhappy Path:
|
43
|
+
- First time Create :
|
44
|
+
- wait for status.updateInProgress to be set to False,
|
45
|
+
- (interleved) Poll the workers to check their status.
|
46
|
+
- If the worker pertaining the current deployment instance version is crashlooping then crash the deployment process with the error messages and logs.
|
47
|
+
- Upgrade:
|
48
|
+
- wait for status.updateInProgress to be set to False,
|
49
|
+
- (interleved) Poll the workers to check their status.
|
50
|
+
- If the worker pertaining the current deployment instance version is crashlooping then crash the deployment process with the error messages and logs.
|
18
51
|
|
19
|
-
|
52
|
+
"""
|
20
53
|
|
21
|
-
def __init__(self, capsule_id: str):
|
54
|
+
def __init__(self, capsule_id: str, current_deployment_instance_version: str):
|
22
55
|
self._capsule_id = capsule_id
|
23
56
|
self._status_trail = []
|
24
|
-
|
25
|
-
def is_completely_new_capsule(self):
|
26
|
-
# This is a heuristic. Not a fully tested.
|
27
|
-
# If we create a completely new capsule then the status
|
28
|
-
# field might be a completely empty dictionary.
|
29
|
-
assert (
|
30
|
-
len(self._status_trail) > 0
|
31
|
-
), "status_trail cannot be none to infer if its a new capsule"
|
32
|
-
return self._empty_status(self._status_trail[0].get("status"))
|
57
|
+
self._current_deployment_instance_version = current_deployment_instance_version
|
33
58
|
|
34
59
|
def get_status_trail(self):
|
35
60
|
return self._status_trail
|
36
61
|
|
37
|
-
@staticmethod
|
38
|
-
def _empty_status(status):
|
39
|
-
if json.dumps(status) == "{}":
|
40
|
-
return True
|
41
|
-
return False
|
42
|
-
|
43
|
-
@staticmethod
|
44
|
-
def _parse_conditions(conditions):
|
45
|
-
curr_conditons = {}
|
46
|
-
for condition in conditions:
|
47
|
-
curr_conditons[condition["type"]] = condition["status"]
|
48
|
-
return curr_conditons
|
49
|
-
|
50
62
|
def add_status(self, status: dict):
|
51
63
|
assert type(status) == dict, "TODO: Make this check somewhere else"
|
52
64
|
self._status_trail.append({"timestamp": time.time(), "status": status})
|
53
65
|
|
54
|
-
@staticmethod
|
55
|
-
def _condition_change_emoji(previous_condition_status, current_condition_status):
|
56
|
-
if previous_condition_status == current_condition_status:
|
57
|
-
if previous_condition_status == "True":
|
58
|
-
return "✅"
|
59
|
-
else:
|
60
|
-
return "❌"
|
61
|
-
if previous_condition_status == "True" and current_condition_status == "False":
|
62
|
-
return "🔴 --> 🟢"
|
63
|
-
if previous_condition_status == "False" and current_condition_status == "True":
|
64
|
-
return "🚀"
|
65
|
-
return "🟡"
|
66
|
-
|
67
66
|
@property
|
68
67
|
def current_status(self):
|
69
68
|
return self._status_trail[-1].get("status")
|
@@ -84,6 +83,14 @@ class CapsuleStateMachine:
|
|
84
83
|
return f"https://{url}"
|
85
84
|
return None
|
86
85
|
|
86
|
+
@property
|
87
|
+
def update_in_progress(self):
|
88
|
+
return self.current_status.get("updateInProgress", False)
|
89
|
+
|
90
|
+
@property
|
91
|
+
def currently_served_version(self):
|
92
|
+
return self.current_status.get("currentlyServedVersion", None)
|
93
|
+
|
87
94
|
@property
|
88
95
|
def ready_to_serve_traffic(self):
|
89
96
|
if self.current_status.get("readyToServeTraffic", False):
|
@@ -97,17 +104,7 @@ class CapsuleStateMachine:
|
|
97
104
|
return self.current_status.get("availableReplicas", 0)
|
98
105
|
|
99
106
|
def report_current_status(self, logger):
|
100
|
-
|
101
|
-
return
|
102
|
-
previous_status, current_status = self._status_trail[-2].get(
|
103
|
-
"status"
|
104
|
-
), self._status_trail[-1].get("status")
|
105
|
-
if self._empty_status(current_status):
|
106
|
-
return
|
107
|
-
|
108
|
-
if self._empty_status(previous_status):
|
109
|
-
logger("💊 %s Deployment has started ... 🚀" % self._capsule_id)
|
110
|
-
return
|
107
|
+
pass
|
111
108
|
|
112
109
|
def check_for_debug(self, state_dir: str):
|
113
110
|
if CAPSULE_DEBUG:
|
@@ -118,6 +115,113 @@ class CapsuleStateMachine:
|
|
118
115
|
json.dump(self._status_trail, f, indent=4)
|
119
116
|
|
120
117
|
|
118
|
+
class CapsuleWorkersStateMachine:
|
119
|
+
def __init__(
|
120
|
+
self,
|
121
|
+
capsule_id: str,
|
122
|
+
end_state_capsule_version: str,
|
123
|
+
deployment_mode: str = DEPLOYMENT_READY_CONDITIONS.ATLEAST_ONE_RUNNING,
|
124
|
+
minimum_replicas: int = 1,
|
125
|
+
):
|
126
|
+
self._capsule_id = capsule_id
|
127
|
+
self._end_state_capsule_version = end_state_capsule_version
|
128
|
+
self._deployment_mode = deployment_mode
|
129
|
+
self._minimum_replicas = minimum_replicas
|
130
|
+
self._status_trail = []
|
131
|
+
|
132
|
+
def get_status_trail(self):
|
133
|
+
return self._status_trail
|
134
|
+
|
135
|
+
def add_status(self, worker_list_response: List[WorkerStatus]):
|
136
|
+
"""
|
137
|
+
worker_list_response: List[Dict[str, Any]]
|
138
|
+
[
|
139
|
+
{
|
140
|
+
"workerId": "c-4pqikm-659dd9ccdc-5hcwz",
|
141
|
+
"phase": "Running",
|
142
|
+
"activity": 0,
|
143
|
+
"activityDataAvailable": false,
|
144
|
+
"version": "0xhgaewiqb"
|
145
|
+
},
|
146
|
+
{
|
147
|
+
"workerId": "c-4pqikm-b8559688b-xk2jh",
|
148
|
+
"phase": "Pending",
|
149
|
+
"activity": 0,
|
150
|
+
"activityDataAvailable": false,
|
151
|
+
"version": "421h48qh95"
|
152
|
+
}
|
153
|
+
]
|
154
|
+
"""
|
155
|
+
self._status_trail.append(
|
156
|
+
{"timestamp": time.time(), "status": worker_list_response}
|
157
|
+
)
|
158
|
+
|
159
|
+
def check_for_debug(self, state_dir: str):
|
160
|
+
if CAPSULE_DEBUG:
|
161
|
+
debug_path = os.path.join(
|
162
|
+
state_dir, f"debug_capsule_workers_{self._capsule_id}_trail.json"
|
163
|
+
)
|
164
|
+
with open(debug_path, "w") as f:
|
165
|
+
json.dump(self._status_trail, f, indent=4)
|
166
|
+
|
167
|
+
status_path = os.path.join(
|
168
|
+
state_dir, f"debug_capsule_workers_{self._capsule_id}_status.json"
|
169
|
+
)
|
170
|
+
with open(status_path, "w") as f:
|
171
|
+
json.dump(self.current_version_deployment_status(), f, indent=4)
|
172
|
+
|
173
|
+
def report_current_status(self, logger):
|
174
|
+
if len(self._status_trail) == 0:
|
175
|
+
return
|
176
|
+
older_status = None
|
177
|
+
if len(self._status_trail) >= 2:
|
178
|
+
older_status = _capsule_worker_semantic_status(
|
179
|
+
self._status_trail[-2].get("status"),
|
180
|
+
self._end_state_capsule_version,
|
181
|
+
self._minimum_replicas,
|
182
|
+
)
|
183
|
+
current_status = self.current_version_deployment_status()
|
184
|
+
changes = _capsule_worker_status_diff(current_status, older_status)
|
185
|
+
if len(changes) > 0:
|
186
|
+
logger(*changes)
|
187
|
+
|
188
|
+
@property
|
189
|
+
def current_status(self) -> List[WorkerStatus]:
|
190
|
+
return self._status_trail[-1].get("status")
|
191
|
+
|
192
|
+
def current_version_deployment_status(self) -> CapsuleWorkerSemanticStatus:
|
193
|
+
return _capsule_worker_semantic_status(
|
194
|
+
self.current_status, self._end_state_capsule_version, self._minimum_replicas
|
195
|
+
)
|
196
|
+
|
197
|
+
@property
|
198
|
+
def is_terminal_state(self):
|
199
|
+
return any(
|
200
|
+
s is not None for s in [self.is_success_state, self.is_failure_state]
|
201
|
+
)
|
202
|
+
|
203
|
+
@property
|
204
|
+
def is_success_state(self):
|
205
|
+
status = self.current_version_deployment_status()
|
206
|
+
if self._deployment_mode == DEPLOYMENT_READY_CONDITIONS.ATLEAST_ONE_RUNNING:
|
207
|
+
return status["status"]["at_least_one_running"]
|
208
|
+
elif self._deployment_mode == DEPLOYMENT_READY_CONDITIONS.ALL_RUNNING:
|
209
|
+
return status["status"]["all_running"]
|
210
|
+
elif self._deployment_mode == DEPLOYMENT_READY_CONDITIONS.FULLY_FINISHED:
|
211
|
+
return (
|
212
|
+
status["status"]["current_info"]["running"] == self._minimum_replicas
|
213
|
+
and status["status"]["current_info"]["pending"] == 0
|
214
|
+
and status["status"]["current_info"]["crashlooping"] == 0
|
215
|
+
)
|
216
|
+
else:
|
217
|
+
raise ValueError(f"Unknown deployment mode: {self._deployment_mode}")
|
218
|
+
|
219
|
+
@property
|
220
|
+
def is_failure_state(self):
|
221
|
+
status = self.current_version_deployment_status()
|
222
|
+
return status["status"]["at_least_one_crashlooping"]
|
223
|
+
|
224
|
+
|
121
225
|
class CapsuleInput:
|
122
226
|
@classmethod
|
123
227
|
def construct_exec_command(cls, commands: list[str]):
|
@@ -220,6 +324,7 @@ class CapsuleInput:
|
|
220
324
|
],
|
221
325
|
"port": app_config.get_state("port"),
|
222
326
|
"displayName": app_config.get_state("name"),
|
327
|
+
"forceUpdate": app_config.get_state("force_upgrade", False),
|
223
328
|
}
|
224
329
|
|
225
330
|
|
@@ -245,6 +350,15 @@ class CapsuleApiException(Exception):
|
|
245
350
|
)
|
246
351
|
|
247
352
|
|
353
|
+
class CapsuleDeploymentException(Exception):
|
354
|
+
def __init__(self, capsule_id: str, message: str):
|
355
|
+
self.capsule_id = capsule_id
|
356
|
+
self.message = message
|
357
|
+
|
358
|
+
def __str__(self):
|
359
|
+
return f"CapsuleDeploymentException: {self.capsule_id} \n\n {self.message}"
|
360
|
+
|
361
|
+
|
248
362
|
class CapsuleApi:
|
249
363
|
def __init__(self, base_url: str, perimeter: str):
|
250
364
|
self._base_url = self._create_base_url(base_url, perimeter)
|
@@ -391,12 +505,18 @@ class CapsuleApi:
|
|
391
505
|
message="Capsule JSON decode failed",
|
392
506
|
)
|
393
507
|
|
394
|
-
def logs(
|
508
|
+
def logs(
|
509
|
+
self, capsule_id: str, worker_id: str, previous: bool = False
|
510
|
+
) -> List[str]:
|
395
511
|
_url = os.path.join(self._base_url, capsule_id, "workers", worker_id, "logs")
|
512
|
+
options = None
|
513
|
+
if previous:
|
514
|
+
options = {"previous": True}
|
396
515
|
response = self._wrapped_api_caller(
|
397
516
|
requests.get,
|
398
517
|
_url,
|
399
518
|
retryable_status_codes=[409], # todo : verify me
|
519
|
+
params=options,
|
400
520
|
)
|
401
521
|
try:
|
402
522
|
return response.json().get("logs", []) or []
|
@@ -411,9 +531,9 @@ class CapsuleApi:
|
|
411
531
|
|
412
532
|
|
413
533
|
def list_and_filter_capsules(
|
414
|
-
|
534
|
+
capsule_api: CapsuleApi, project, branch, name, tags, auth_type, capsule_id
|
415
535
|
):
|
416
|
-
capsules =
|
536
|
+
capsules = capsule_api.list()
|
417
537
|
|
418
538
|
def _tags_match(tags, key, value):
|
419
539
|
for t in tags:
|
@@ -473,11 +593,20 @@ class CapsuleDeployer:
|
|
473
593
|
base_url: str,
|
474
594
|
create_timeout: int = 60 * 5,
|
475
595
|
debug_dir: Optional[str] = None,
|
596
|
+
success_terminal_state_condition: str = DEPLOYMENT_READY_CONDITIONS.ATLEAST_ONE_RUNNING,
|
597
|
+
readiness_wait_time: int = 20,
|
476
598
|
):
|
477
599
|
self._app_config = app_config
|
478
600
|
self._capsule_api = CapsuleApi(base_url, app_config.get_state("perimeter"))
|
479
601
|
self._create_timeout = create_timeout
|
480
602
|
self._debug_dir = debug_dir
|
603
|
+
self._capsule_deploy_response = None
|
604
|
+
self._success_terminal_state_condition = success_terminal_state_condition
|
605
|
+
self._readiness_wait_time = readiness_wait_time
|
606
|
+
|
607
|
+
@property
|
608
|
+
def capsule_api(self):
|
609
|
+
return self._capsule_api
|
481
610
|
|
482
611
|
@property
|
483
612
|
def capsule_type(self):
|
@@ -498,41 +627,168 @@ class CapsuleDeployer:
|
|
498
627
|
self._app_config, CapsuleInput.from_app_config(self._app_config)
|
499
628
|
)
|
500
629
|
|
630
|
+
@property
|
631
|
+
def current_deployment_instance_version(self):
|
632
|
+
"""
|
633
|
+
The backend `create` call returns a version of the object that will be
|
634
|
+
"""
|
635
|
+
if self._capsule_deploy_response is None:
|
636
|
+
return None
|
637
|
+
return self._capsule_deploy_response.get("version", None)
|
638
|
+
|
501
639
|
def create(self):
|
502
640
|
capsule_response = self._capsule_api.create(self.create_input())
|
503
641
|
self.identifier = capsule_response.get("id")
|
642
|
+
self._capsule_deploy_response = capsule_response
|
504
643
|
return self.identifier
|
505
644
|
|
506
645
|
def get(self):
|
507
646
|
return self._capsule_api.get(self.identifier)
|
508
647
|
|
509
|
-
def
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
648
|
+
def get_workers(self):
|
649
|
+
return self._capsule_api.get_workers(self.identifier)
|
650
|
+
|
651
|
+
def _backend_version_mismatch_check(
|
652
|
+
self, capsule_response: dict, current_deployment_instance_version: str
|
653
|
+
):
|
654
|
+
"""
|
655
|
+
- `capsule_response.version` contains the version of the object present in the database
|
656
|
+
- `current_deployment_instance_version` contains the version of the object that was deployed by this instance of the deployer.
|
657
|
+
In the situtation that the versions of the objects become a mismatch then it means that current deployment process is not giving the user the
|
658
|
+
output that they desire.
|
659
|
+
"""
|
660
|
+
if capsule_response.get("version", None) != current_deployment_instance_version:
|
661
|
+
raise CapsuleDeploymentException(
|
662
|
+
self.identifier,
|
663
|
+
f"A capsule upgrade was triggered outside current deployment instance. Current deployment version was discarded. Current deployment version: {current_deployment_instance_version} and new version: {capsule_response.get('version', None)}",
|
664
|
+
)
|
665
|
+
|
666
|
+
def _monitor_worker_readiness(
|
667
|
+
self, workers_sm: "CapsuleWorkersStateMachine", logger=print
|
668
|
+
):
|
669
|
+
""" """
|
670
|
+
for i in range(self._readiness_wait_time):
|
671
|
+
time.sleep(1)
|
672
|
+
workers_response = self.get_workers()
|
673
|
+
workers_sm.add_status(workers_response)
|
674
|
+
workers_sm.report_current_status(logger)
|
675
|
+
|
676
|
+
def _extract_logs_from_crashlooping_worker(
|
677
|
+
self, workers_sm: "CapsuleWorkersStateMachine"
|
678
|
+
):
|
679
|
+
def _extract_worker_id_of_crashlooping_worker(
|
680
|
+
workers_status: List[WorkerStatus],
|
681
|
+
):
|
682
|
+
for worker in workers_status:
|
683
|
+
if worker["phase"] == "CrashLoopBackOff":
|
684
|
+
return worker["workerId"]
|
685
|
+
return None
|
686
|
+
|
687
|
+
worker_id = _extract_worker_id_of_crashlooping_worker(workers_sm.current_status)
|
688
|
+
if worker_id is None:
|
689
|
+
return None, None
|
690
|
+
logs = self.capsule_api.logs(self.identifier, worker_id, previous=True)
|
691
|
+
return logs, worker_id
|
692
|
+
|
693
|
+
def wait_for_terminal_state(
|
694
|
+
self,
|
695
|
+
logger=print,
|
696
|
+
):
|
697
|
+
""" """
|
698
|
+
state_machine = CapsuleStateMachine(
|
699
|
+
self.identifier, self.current_deployment_instance_version
|
700
|
+
)
|
701
|
+
min_replicas = self._app_config.get_state("replicas", {}).get("min", 1)
|
702
|
+
workers_state_machine = CapsuleWorkersStateMachine(
|
703
|
+
self.identifier,
|
704
|
+
self.current_deployment_instance_version,
|
705
|
+
deployment_mode=self._success_terminal_state_condition,
|
706
|
+
minimum_replicas=min_replicas,
|
514
707
|
)
|
515
708
|
self.status = state_machine
|
516
709
|
for i in range(self._create_timeout):
|
710
|
+
time.sleep(1)
|
517
711
|
capsule_response = self.get()
|
712
|
+
workers_response = self.get_workers()
|
713
|
+
|
714
|
+
# We first need to check if someone has not upgraded the capsule under the hood and
|
715
|
+
# the current deployment instance is invalid.
|
716
|
+
self._backend_version_mismatch_check(
|
717
|
+
capsule_response, self.current_deployment_instance_version
|
718
|
+
)
|
518
719
|
state_machine.add_status(capsule_response.get("status", {}))
|
519
|
-
|
720
|
+
workers_state_machine.add_status(workers_response)
|
520
721
|
state_machine.report_current_status(logger)
|
521
|
-
|
722
|
+
|
723
|
+
workers_state_machine.report_current_status(logger)
|
724
|
+
# Deployment readiness checks will determine what is the terminal state
|
725
|
+
# of the workerstate machine. If we detect a terminal state in the workers,
|
726
|
+
# then even if the capsule upgrade is still in progress we will end up crashing
|
727
|
+
# the deployment.
|
728
|
+
if (
|
729
|
+
not state_machine.update_in_progress
|
730
|
+
and workers_state_machine.is_terminal_state
|
731
|
+
) or (workers_state_machine.is_failure_state):
|
522
732
|
logger(
|
523
|
-
"💊 %s %s
|
733
|
+
"💊 %s deployment status: %s | worker states: [success :%s | failure :%s ] "
|
524
734
|
% (
|
525
|
-
self.capsule_type,
|
526
|
-
|
527
|
-
state_machine.
|
528
|
-
|
735
|
+
self.capsule_type.title(),
|
736
|
+
"in progress"
|
737
|
+
if state_machine.update_in_progress
|
738
|
+
else "completed",
|
739
|
+
workers_state_machine.is_success_state,
|
740
|
+
workers_state_machine.is_failure_state,
|
741
|
+
)
|
529
742
|
)
|
743
|
+
if workers_state_machine.is_success_state:
|
744
|
+
# HACK : monitor the workers for N seconds to make sure they are healthy
|
745
|
+
# this is a hack. Ideally we should implment a healtcheck as a first class citizen
|
746
|
+
# but it will take some time to do that so in the meanwhile a timeout set on the cli
|
747
|
+
# side will be really helpful.
|
748
|
+
self._monitor_worker_readiness(workers_state_machine, logger)
|
749
|
+
|
750
|
+
elif workers_state_machine.is_failure_state:
|
751
|
+
# hit the logs endpoint for the worker and get the logs
|
752
|
+
# Print those logs out on the terminal
|
753
|
+
# raise an exception that should be caught gracefully by the cli
|
754
|
+
logs, worker_id = self._extract_logs_from_crashlooping_worker(
|
755
|
+
workers_state_machine
|
756
|
+
)
|
757
|
+
if logs is not None:
|
758
|
+
# todo: It would be really odd if the logs are not present and we discover something is crashlooping.
|
759
|
+
# Handle that condition later
|
760
|
+
logger(
|
761
|
+
*(
|
762
|
+
[
|
763
|
+
f"💥 Worker ID ({worker_id}) is crashlooping. Please check the following logs for more information: "
|
764
|
+
]
|
765
|
+
+ ["\t" + l["message"] for l in logs]
|
766
|
+
)
|
767
|
+
)
|
768
|
+
raise CapsuleDeploymentException(
|
769
|
+
self.identifier,
|
770
|
+
f"Worker ID ({worker_id}) is crashlooping. Please check the logs for more information.",
|
771
|
+
)
|
772
|
+
|
773
|
+
if state_machine.ready_to_serve_traffic:
|
774
|
+
logger(
|
775
|
+
"💊 %s %s is ready to serve traffic on the URL: %s"
|
776
|
+
% (
|
777
|
+
self.capsule_type,
|
778
|
+
self.identifier,
|
779
|
+
state_machine.out_of_cluster_url,
|
780
|
+
),
|
781
|
+
)
|
782
|
+
|
530
783
|
break
|
784
|
+
|
531
785
|
if self._debug_dir:
|
532
786
|
state_machine.check_for_debug(self._debug_dir)
|
787
|
+
workers_state_machine.check_for_debug(self._debug_dir)
|
533
788
|
|
534
789
|
if not self.status.ready_to_serve_traffic:
|
535
|
-
raise
|
536
|
-
|
790
|
+
raise CapsuleDeploymentException(
|
791
|
+
self.identifier,
|
792
|
+
f"Capsule {self.identifier} failed to be ready to serve traffic",
|
537
793
|
)
|
538
794
|
return capsule_response
|
@@ -6,7 +6,15 @@ def build_config_from_options(options):
|
|
6
6
|
config = {}
|
7
7
|
|
8
8
|
# Set basic fields
|
9
|
-
for key in [
|
9
|
+
for key in [
|
10
|
+
"name",
|
11
|
+
"port",
|
12
|
+
"image",
|
13
|
+
"compute_pools",
|
14
|
+
"description",
|
15
|
+
"app_type",
|
16
|
+
"force_upgrade",
|
17
|
+
]:
|
10
18
|
if options.get(key):
|
11
19
|
config[key] = options[key]
|
12
20
|
|
@@ -38,6 +38,11 @@ properties:
|
|
38
38
|
type: string
|
39
39
|
description: The description of the app to deploy.
|
40
40
|
example: "This is a description of my app."
|
41
|
+
force_upgrade: # Only used in `deploy` command
|
42
|
+
allow_union: true
|
43
|
+
type: boolean
|
44
|
+
description: Whether to force upgrade the app even if it is currently being upgraded.
|
45
|
+
example: true
|
41
46
|
app_type: # Only used in `deploy` command
|
42
47
|
allow_union: true
|
43
48
|
type: string
|
outerbounds/apps/utils.py
CHANGED
@@ -5,6 +5,138 @@ import json
|
|
5
5
|
import requests
|
6
6
|
from metaflow._vendor import click
|
7
7
|
from .app_config import CAPSULE_DEBUG
|
8
|
+
import sys
|
9
|
+
import threading
|
10
|
+
import time
|
11
|
+
import logging
|
12
|
+
import itertools
|
13
|
+
|
14
|
+
from outerbounds._vendor.spinner import (
|
15
|
+
Spinners,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
class MultiStepSpinner:
|
20
|
+
"""
|
21
|
+
A spinner that supports multi-step progress and configurable alignment.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
spinner : Spinners
|
26
|
+
Which spinner frames/interval to use.
|
27
|
+
text : str
|
28
|
+
Static text to display beside the spinner.
|
29
|
+
color : str, optional
|
30
|
+
Click color name.
|
31
|
+
align : {'left','right'}
|
32
|
+
Whether to render the spinner to the left (default) or right of the text.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
spinner: Spinners = Spinners.dots,
|
38
|
+
text: str = "",
|
39
|
+
color: str = None,
|
40
|
+
align: str = "right",
|
41
|
+
file=sys.stdout,
|
42
|
+
):
|
43
|
+
cfg = spinner.value
|
44
|
+
self.frames = cfg["frames"]
|
45
|
+
self.interval = cfg["interval"] / 1000.0
|
46
|
+
self.text = text
|
47
|
+
self.color = color
|
48
|
+
if align not in ("left", "right"):
|
49
|
+
raise ValueError("align must be 'left' or 'right'")
|
50
|
+
self.align = align
|
51
|
+
self._write_file = file
|
52
|
+
# precompute clear length: max frame width + space + text length
|
53
|
+
max_frame = max(self.frames, key=len)
|
54
|
+
self.clear_len = len(self.main_text) + len(max_frame) + 1
|
55
|
+
|
56
|
+
self._stop_evt = threading.Event()
|
57
|
+
self._pause_evt = threading.Event()
|
58
|
+
self._thread = None
|
59
|
+
self._write_lock = threading.Lock()
|
60
|
+
|
61
|
+
@property
|
62
|
+
def main_text(self):
|
63
|
+
# if self.text is a callable then call it
|
64
|
+
if callable(self.text):
|
65
|
+
return self.text()
|
66
|
+
return self.text
|
67
|
+
|
68
|
+
def _spin(self):
|
69
|
+
for frame in itertools.cycle(self.frames):
|
70
|
+
if self._stop_evt.is_set():
|
71
|
+
break
|
72
|
+
if self._pause_evt.is_set():
|
73
|
+
time.sleep(0.05)
|
74
|
+
continue
|
75
|
+
|
76
|
+
# ---- Core logging critical section ----
|
77
|
+
with self._write_lock:
|
78
|
+
symbol = click.style(frame, fg=self.color) if self.color else frame
|
79
|
+
if self.align == "left":
|
80
|
+
msg = f"{symbol} {self.main_text}"
|
81
|
+
else:
|
82
|
+
msg = f"{self.main_text} {symbol}"
|
83
|
+
|
84
|
+
click.echo(msg, nl=False, file=self._write_file)
|
85
|
+
click.echo("\r", nl=False, file=self._write_file)
|
86
|
+
self._write_file.flush()
|
87
|
+
# ---- End of critical section ----
|
88
|
+
time.sleep(self.interval)
|
89
|
+
# clear the line when done
|
90
|
+
self._clear_line()
|
91
|
+
|
92
|
+
def _clear_line(self):
|
93
|
+
with self._write_lock:
|
94
|
+
click.echo(" " * self.clear_len, nl=False, file=self._write_file)
|
95
|
+
click.echo("\r", nl=False, file=self._write_file)
|
96
|
+
self._write_file.flush()
|
97
|
+
|
98
|
+
def start(self):
|
99
|
+
if self._thread and self._thread.is_alive():
|
100
|
+
return
|
101
|
+
self._stop_evt.clear()
|
102
|
+
self._pause_evt.clear()
|
103
|
+
self._thread = threading.Thread(target=self._spin, daemon=True)
|
104
|
+
self._thread.start()
|
105
|
+
|
106
|
+
def stop(self):
|
107
|
+
self._stop_evt.set()
|
108
|
+
if self._thread:
|
109
|
+
self._thread.join()
|
110
|
+
|
111
|
+
def log(self, *messages: str):
|
112
|
+
"""Pause the spinner, emit a ✔ + message, then resume."""
|
113
|
+
self._pause_evt.set()
|
114
|
+
self._clear_line()
|
115
|
+
# ---- Core logging critical section ----
|
116
|
+
with self._write_lock:
|
117
|
+
self._write_file.flush()
|
118
|
+
for message in messages:
|
119
|
+
click.echo(f"{message}", file=self._write_file, nl=True)
|
120
|
+
self._write_file.flush()
|
121
|
+
# ---- End of critical section ----
|
122
|
+
self._pause_evt.clear()
|
123
|
+
|
124
|
+
def __enter__(self):
|
125
|
+
self.start()
|
126
|
+
return self
|
127
|
+
|
128
|
+
def __exit__(self, exc_type, exc, tb):
|
129
|
+
self.stop()
|
130
|
+
|
131
|
+
|
132
|
+
class SpinnerLogHandler(logging.Handler):
|
133
|
+
def __init__(self, spinner: MultiStepSpinner, *args, **kwargs):
|
134
|
+
super().__init__(*args, **kwargs)
|
135
|
+
self.spinner = spinner
|
136
|
+
|
137
|
+
def emit(self, record):
|
138
|
+
msg = self.format(record)
|
139
|
+
self.spinner.log(msg)
|
8
140
|
|
9
141
|
|
10
142
|
class MaximumRetriesExceeded(Exception):
|