outerbounds 0.3.182rc2__py3-none-any.whl → 0.3.183rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- outerbounds/apps/_state_machine.py +89 -0
- outerbounds/apps/app_cli.py +40 -20
- outerbounds/apps/capsule.py +93 -63
- outerbounds/apps/utils.py +10 -4
- {outerbounds-0.3.182rc2.dist-info → outerbounds-0.3.183rc0.dist-info}/METADATA +4 -4
- {outerbounds-0.3.182rc2.dist-info → outerbounds-0.3.183rc0.dist-info}/RECORD +8 -8
- {outerbounds-0.3.182rc2.dist-info → outerbounds-0.3.183rc0.dist-info}/WHEEL +0 -0
- {outerbounds-0.3.182rc2.dist-info → outerbounds-0.3.183rc0.dist-info}/entry_points.txt +0 -0
@@ -137,6 +137,19 @@ class _capsuleDeployerStateMachine:
|
|
137
137
|
from typing import TypedDict
|
138
138
|
|
139
139
|
|
140
|
+
class AccessInfo(TypedDict):
|
141
|
+
outOfClusterURL: str
|
142
|
+
inClusterURL: str
|
143
|
+
|
144
|
+
|
145
|
+
class CapsuleStatus(TypedDict):
|
146
|
+
availableReplicas: int
|
147
|
+
readyToServeTraffic: bool
|
148
|
+
accessInfo: AccessInfo
|
149
|
+
updateInProgress: bool
|
150
|
+
currentlyServedVersion: str
|
151
|
+
|
152
|
+
|
140
153
|
class WorkerStatus(TypedDict):
|
141
154
|
workerId: str
|
142
155
|
phase: str
|
@@ -196,6 +209,82 @@ class DEPLOYMENT_READY_CONDITIONS:
|
|
196
209
|
# `ASYNC` implies that the deployment will be assumed ready after the URL is minted and the worker statuses are not checked.
|
197
210
|
ASYNC = "async"
|
198
211
|
|
212
|
+
@classmethod
|
213
|
+
def check_failure_condition(
|
214
|
+
cls,
|
215
|
+
capsule_status: CapsuleStatus,
|
216
|
+
worker_semantic_status: "CapsuleWorkerSemanticStatus",
|
217
|
+
) -> bool:
|
218
|
+
"""
|
219
|
+
Check if the deployment has failed based on the current capsule and worker status.
|
220
|
+
"""
|
221
|
+
return worker_semantic_status["status"]["at_least_one_crashlooping"]
|
222
|
+
|
223
|
+
@classmethod
|
224
|
+
def check_readiness_condition(
|
225
|
+
cls,
|
226
|
+
capsule_status: CapsuleStatus,
|
227
|
+
worker_semantic_status: "CapsuleWorkerSemanticStatus",
|
228
|
+
readiness_condition: str,
|
229
|
+
) -> Tuple[bool, bool]:
|
230
|
+
"""
|
231
|
+
Check if the deployment readiness condition is satisfied based on current capsule and worker status.
|
232
|
+
|
233
|
+
This method evaluates whether a deployment has reached its desired ready state according to
|
234
|
+
the specified readiness condition. Different conditions have different criteria for what
|
235
|
+
constitutes a "ready" deployment.
|
236
|
+
|
237
|
+
Parameters
|
238
|
+
----------
|
239
|
+
capsule_status : CapsuleStatus
|
240
|
+
The current status of the capsule deployment, including update progress information.
|
241
|
+
worker_semantic_status : CapsuleWorkerSemanticStatus
|
242
|
+
Semantic status information about the workers, including counts and states.
|
243
|
+
readiness_condition : str
|
244
|
+
The readiness condition to evaluate. Must be one of the class constants:
|
245
|
+
- ATLEAST_ONE_RUNNING: At least one worker is running and update is not in progress
|
246
|
+
- ALL_RUNNING: All required workers are running and update is not in progress
|
247
|
+
- FULLY_FINISHED: All workers running with no pending/crashlooping workers and update is not in progress
|
248
|
+
- ASYNC: Deployment ready when update is no longer in progress
|
249
|
+
|
250
|
+
Returns
|
251
|
+
-------
|
252
|
+
Tuple[bool, bool]
|
253
|
+
A tuple containing:
|
254
|
+
- First element: Boolean indicating if the readiness condition is satisfied
|
255
|
+
- Second element: Boolean indicating if additional worker readiness checks
|
256
|
+
should be performed (False for ASYNC mode, True for all others)
|
257
|
+
|
258
|
+
Raises
|
259
|
+
------
|
260
|
+
ValueError
|
261
|
+
If an invalid readiness condition is provided.
|
262
|
+
"""
|
263
|
+
_worker_readiness_check = True
|
264
|
+
_readiness_condition_satisfied = False
|
265
|
+
if readiness_condition == cls.ATLEAST_ONE_RUNNING:
|
266
|
+
_readiness_condition_satisfied = (
|
267
|
+
worker_semantic_status["status"]["at_least_one_running"]
|
268
|
+
and not capsule_status["updateInProgress"]
|
269
|
+
)
|
270
|
+
elif readiness_condition == cls.ALL_RUNNING:
|
271
|
+
_readiness_condition_satisfied = (
|
272
|
+
worker_semantic_status["status"]["all_running"]
|
273
|
+
and not capsule_status["updateInProgress"]
|
274
|
+
)
|
275
|
+
elif readiness_condition == cls.FULLY_FINISHED:
|
276
|
+
_readiness_condition_satisfied = (
|
277
|
+
worker_semantic_status["status"]["fully_finished"]
|
278
|
+
and not capsule_status["updateInProgress"]
|
279
|
+
)
|
280
|
+
elif readiness_condition == cls.ASYNC:
|
281
|
+
_readiness_condition_satisfied = not capsule_status["updateInProgress"]
|
282
|
+
_worker_readiness_check = False
|
283
|
+
else:
|
284
|
+
raise ValueError(f"Invalid readiness condition: {readiness_condition}")
|
285
|
+
|
286
|
+
return _readiness_condition_satisfied, _worker_readiness_check
|
287
|
+
|
199
288
|
@classmethod
|
200
289
|
def docstring(cls):
|
201
290
|
return cls.__doc__
|
outerbounds/apps/app_cli.py
CHANGED
@@ -223,6 +223,7 @@ class ColorTheme:
|
|
223
223
|
LOADING_COLOR = "cyan"
|
224
224
|
BAD_COLOR = "red"
|
225
225
|
INFO_COLOR = "green"
|
226
|
+
DEBUG_COLOR = "yellow"
|
226
227
|
|
227
228
|
TL_HEADER_COLOR = "magenta"
|
228
229
|
ROW_COLOR = "bright_white"
|
@@ -809,9 +810,9 @@ def deploy(
|
|
809
810
|
ctx.obj.app_state_dir, app_config.get("name", "default")
|
810
811
|
)
|
811
812
|
|
812
|
-
def _non_spinner_logger(*msg):
|
813
|
+
def _non_spinner_logger(*msg, **kwargs):
|
813
814
|
for m in msg:
|
814
|
-
logger(m)
|
815
|
+
logger(m, **kwargs)
|
815
816
|
|
816
817
|
deploy_validations(
|
817
818
|
app_config,
|
@@ -847,14 +848,34 @@ def deploy(
|
|
847
848
|
|
848
849
|
app_config.set_state("perimeter", ctx.obj.perimeter)
|
849
850
|
|
851
|
+
capsule_spinner = None
|
852
|
+
capsule_logger = _non_spinner_logger
|
853
|
+
if not no_loader:
|
854
|
+
capsule_spinner = MultiStepSpinner(
|
855
|
+
text=lambda: _logger_styled(
|
856
|
+
"💊 Waiting for %s %s to be ready to serve traffic"
|
857
|
+
% (capsule.capsule_type.lower(), capsule.identifier),
|
858
|
+
timestamp=True,
|
859
|
+
),
|
860
|
+
color=ColorTheme.LOADING_COLOR,
|
861
|
+
)
|
862
|
+
capsule_logger = partial(_spinner_logger, capsule_spinner)
|
863
|
+
capsule_spinner.start()
|
864
|
+
|
865
|
+
_current_instance_debug_dir = os.path.join(
|
866
|
+
cache_dir, f"debug_deployment_instance_{time.time()}"
|
867
|
+
)
|
868
|
+
if CAPSULE_DEBUG:
|
869
|
+
os.makedirs(_current_instance_debug_dir, exist_ok=True)
|
850
870
|
# 2. Convert to the IR that the backend accepts
|
851
871
|
capsule = CapsuleDeployer(
|
852
872
|
app_config,
|
853
873
|
ctx.obj.api_url,
|
854
|
-
debug_dir=
|
874
|
+
debug_dir=_current_instance_debug_dir,
|
855
875
|
success_terminal_state_condition=readiness_condition,
|
856
876
|
create_timeout=max_wait_time,
|
857
877
|
readiness_wait_time=readiness_wait_time,
|
878
|
+
logger_fn=capsule_logger,
|
858
879
|
)
|
859
880
|
currently_present_capsules = list_and_filter_capsules(
|
860
881
|
capsule.capsule_api,
|
@@ -887,13 +908,13 @@ def deploy(
|
|
887
908
|
"If you wish to force upgrade, you can do so by providing the `--force-upgrade` flag."
|
888
909
|
)
|
889
910
|
raise AppConfigError(message)
|
890
|
-
|
911
|
+
capsule_logger(
|
891
912
|
f"🚀 {'' if not force_upgrade else 'Force'} Upgrading {capsule.capsule_type.lower()} `{capsule.name}`....",
|
892
913
|
color=ColorTheme.INFO_COLOR,
|
893
914
|
system_msg=True,
|
894
915
|
)
|
895
916
|
else:
|
896
|
-
|
917
|
+
capsule_logger(
|
897
918
|
f"🚀 Deploying {capsule.capsule_type.lower()} to the platform....",
|
898
919
|
color=ColorTheme.INFO_COLOR,
|
899
920
|
system_msg=True,
|
@@ -902,23 +923,9 @@ def deploy(
|
|
902
923
|
capsule.create()
|
903
924
|
_post_create_debug(capsule, cache_dir)
|
904
925
|
|
905
|
-
capsule_spinner = None
|
906
|
-
capsule_logger = _non_spinner_logger
|
907
|
-
if not no_loader:
|
908
|
-
capsule_spinner = MultiStepSpinner(
|
909
|
-
text=lambda: _logger_styled(
|
910
|
-
"💊 Waiting for %s %s to be ready to serve traffic"
|
911
|
-
% (capsule.capsule_type.lower(), capsule.identifier),
|
912
|
-
timestamp=True,
|
913
|
-
),
|
914
|
-
color=ColorTheme.LOADING_COLOR,
|
915
|
-
)
|
916
|
-
capsule_logger = partial(_spinner_logger, capsule_spinner)
|
917
|
-
capsule_spinner.start()
|
918
|
-
|
919
926
|
# We only get the `capsule_response` if the deployment is has reached
|
920
927
|
# a successful terminal state.
|
921
|
-
final_status = capsule.wait_for_terminal_state(
|
928
|
+
final_status = capsule.wait_for_terminal_state()
|
922
929
|
if capsule_spinner:
|
923
930
|
capsule_spinner.stop()
|
924
931
|
|
@@ -928,6 +935,17 @@ def deploy(
|
|
928
935
|
system_msg=True,
|
929
936
|
)
|
930
937
|
|
938
|
+
if CAPSULE_DEBUG:
|
939
|
+
logger(
|
940
|
+
f"[debug] 💊 {capsule.capsule_type} {app_config.config['name']} ({capsule.identifier}) deployment status [on completion]: {final_status}",
|
941
|
+
color=ColorTheme.DEBUG_COLOR,
|
942
|
+
)
|
943
|
+
logger(
|
944
|
+
f"[debug] 💊 {capsule.capsule_type} {app_config.config['name']} ({capsule.identifier}) debug info saved to `{_current_instance_debug_dir}`",
|
945
|
+
color=ColorTheme.DEBUG_COLOR,
|
946
|
+
)
|
947
|
+
final_status["debug_dir"] = _current_instance_debug_dir
|
948
|
+
|
931
949
|
if status_file:
|
932
950
|
# Create the file if it doesn't exist
|
933
951
|
with open(status_file, "w") as f:
|
@@ -944,6 +962,8 @@ def deploy(
|
|
944
962
|
bad=True,
|
945
963
|
system_msg=True,
|
946
964
|
)
|
965
|
+
if CAPSULE_DEBUG:
|
966
|
+
raise e
|
947
967
|
exit(1)
|
948
968
|
|
949
969
|
|
outerbounds/apps/capsule.py
CHANGED
@@ -3,7 +3,9 @@ import json
|
|
3
3
|
import os
|
4
4
|
import pathlib
|
5
5
|
import requests
|
6
|
+
import sys
|
6
7
|
import time
|
8
|
+
from functools import partial
|
7
9
|
import shlex
|
8
10
|
from typing import Optional, List, Dict, Any, Tuple, Union
|
9
11
|
from .utils import TODOException, safe_requests_wrapper, MaximumRetriesExceeded
|
@@ -14,6 +16,7 @@ from ._state_machine import (
|
|
14
16
|
_capsule_worker_status_diff,
|
15
17
|
CapsuleWorkerSemanticStatus,
|
16
18
|
WorkerStatus,
|
19
|
+
CapsuleStatus,
|
17
20
|
DEPLOYMENT_READY_CONDITIONS,
|
18
21
|
)
|
19
22
|
|
@@ -60,7 +63,7 @@ class CapsuleStateMachine:
|
|
60
63
|
def get_status_trail(self):
|
61
64
|
return self._status_trail
|
62
65
|
|
63
|
-
def add_status(self, status:
|
66
|
+
def add_status(self, status: CapsuleStatus):
|
64
67
|
assert type(status) == dict, "TODO: Make this check somewhere else"
|
65
68
|
self._status_trail.append({"timestamp": time.time(), "status": status})
|
66
69
|
|
@@ -107,13 +110,10 @@ class CapsuleStateMachine:
|
|
107
110
|
def report_current_status(self, logger):
|
108
111
|
pass
|
109
112
|
|
110
|
-
def
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
)
|
115
|
-
with open(debug_path, "w") as f:
|
116
|
-
json.dump(self._status_trail, f, indent=4)
|
113
|
+
def save_debug_info(self, state_dir: str):
|
114
|
+
debug_path = os.path.join(state_dir, f"debug_capsule_{self._capsule_id}.json")
|
115
|
+
with open(debug_path, "w") as f:
|
116
|
+
json.dump(self._status_trail, f, indent=4)
|
117
117
|
|
118
118
|
|
119
119
|
class CapsuleWorkersStateMachine:
|
@@ -157,19 +157,18 @@ class CapsuleWorkersStateMachine:
|
|
157
157
|
{"timestamp": time.time(), "status": worker_list_response}
|
158
158
|
)
|
159
159
|
|
160
|
-
def
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
json.dump(self._status_trail, f, indent=4)
|
160
|
+
def save_debug_info(self, state_dir: str):
|
161
|
+
debug_path = os.path.join(
|
162
|
+
state_dir, f"debug_capsule_workers_{self._capsule_id}_trail.json"
|
163
|
+
)
|
164
|
+
with open(debug_path, "w") as f:
|
165
|
+
json.dump(self._status_trail, f, indent=4)
|
167
166
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
167
|
+
status_path = os.path.join(
|
168
|
+
state_dir, f"debug_capsule_workers_{self._capsule_id}_status.json"
|
169
|
+
)
|
170
|
+
with open(status_path, "w") as f:
|
171
|
+
json.dump(self.current_version_deployment_status(), f, indent=4)
|
173
172
|
|
174
173
|
def report_current_status(self, logger):
|
175
174
|
if len(self._status_trail) == 0:
|
@@ -196,29 +195,7 @@ class CapsuleWorkersStateMachine:
|
|
196
195
|
)
|
197
196
|
|
198
197
|
@property
|
199
|
-
def
|
200
|
-
return any(
|
201
|
-
s is not None for s in [self.is_success_state, self.is_failure_state]
|
202
|
-
)
|
203
|
-
|
204
|
-
@property
|
205
|
-
def is_success_state(self):
|
206
|
-
status = self.current_version_deployment_status()
|
207
|
-
if self._deployment_mode == DEPLOYMENT_READY_CONDITIONS.ATLEAST_ONE_RUNNING:
|
208
|
-
return status["status"]["at_least_one_running"]
|
209
|
-
elif self._deployment_mode == DEPLOYMENT_READY_CONDITIONS.ALL_RUNNING:
|
210
|
-
return status["status"]["all_running"]
|
211
|
-
elif self._deployment_mode == DEPLOYMENT_READY_CONDITIONS.FULLY_FINISHED:
|
212
|
-
return (
|
213
|
-
status["status"]["current_info"]["running"] == self._minimum_replicas
|
214
|
-
and status["status"]["current_info"]["pending"] == 0
|
215
|
-
and status["status"]["current_info"]["crashlooping"] == 0
|
216
|
-
)
|
217
|
-
else:
|
218
|
-
raise ValueError(f"Unknown deployment mode: {self._deployment_mode}")
|
219
|
-
|
220
|
-
@property
|
221
|
-
def is_failure_state(self):
|
198
|
+
def is_crashlooping(self) -> bool:
|
222
199
|
status = self.current_version_deployment_status()
|
223
200
|
return status["status"]["at_least_one_crashlooping"]
|
224
201
|
|
@@ -352,19 +329,24 @@ class CapsuleApiException(Exception):
|
|
352
329
|
|
353
330
|
|
354
331
|
class CapsuleDeploymentException(Exception):
|
355
|
-
def __init__(
|
332
|
+
def __init__(
|
333
|
+
self,
|
334
|
+
capsule_id: str,
|
335
|
+
message: str,
|
336
|
+
):
|
356
337
|
self.capsule_id = capsule_id
|
357
338
|
self.message = message
|
358
339
|
|
359
340
|
def __str__(self):
|
360
|
-
return f"CapsuleDeploymentException: {self.capsule_id}
|
341
|
+
return f"CapsuleDeploymentException: [{self.capsule_id}] :: {self.message}"
|
361
342
|
|
362
343
|
|
363
344
|
class CapsuleApi:
|
364
|
-
def __init__(self, base_url: str, perimeter: str):
|
345
|
+
def __init__(self, base_url: str, perimeter: str, logger_fn=None):
|
365
346
|
self._base_url = self._create_base_url(base_url, perimeter)
|
366
347
|
from metaflow.metaflow_config import SERVICE_HEADERS
|
367
348
|
|
349
|
+
self._logger_fn = logger_fn
|
368
350
|
self._request_headers = {
|
369
351
|
**{"Content-Type": "application/json", "Connection": "keep-alive"},
|
370
352
|
**(SERVICE_HEADERS or {}),
|
@@ -386,6 +368,7 @@ class CapsuleApi:
|
|
386
368
|
method_func,
|
387
369
|
*args,
|
388
370
|
headers=self._request_headers,
|
371
|
+
logger_fn=self._logger_fn,
|
389
372
|
**kwargs,
|
390
373
|
)
|
391
374
|
except MaximumRetriesExceeded as e:
|
@@ -600,10 +583,16 @@ class CapsuleDeployer:
|
|
600
583
|
debug_dir: Optional[str] = None,
|
601
584
|
success_terminal_state_condition: str = DEPLOYMENT_READY_CONDITIONS.ATLEAST_ONE_RUNNING,
|
602
585
|
readiness_wait_time: int = 20,
|
586
|
+
logger_fn=None,
|
603
587
|
):
|
604
588
|
self._app_config = app_config
|
605
|
-
self._capsule_api = CapsuleApi(
|
589
|
+
self._capsule_api = CapsuleApi(
|
590
|
+
base_url,
|
591
|
+
app_config.get_state("perimeter"),
|
592
|
+
logger_fn=logger_fn or partial(print, file=sys.stderr),
|
593
|
+
)
|
606
594
|
self._create_timeout = create_timeout
|
595
|
+
self._logger_fn = logger_fn
|
607
596
|
self._debug_dir = debug_dir
|
608
597
|
self._capsule_deploy_response = None
|
609
598
|
self._success_terminal_state_condition = success_terminal_state_condition
|
@@ -669,14 +658,19 @@ class CapsuleDeployer:
|
|
669
658
|
)
|
670
659
|
|
671
660
|
def _monitor_worker_readiness(
|
672
|
-
self,
|
661
|
+
self,
|
662
|
+
workers_sm: "CapsuleWorkersStateMachine",
|
673
663
|
):
|
674
|
-
""" """
|
664
|
+
"""returns True if the worker is crashlooping, False otherwise"""
|
665
|
+
logger = self._logger_fn or partial(print, file=sys.stderr)
|
675
666
|
for i in range(self._readiness_wait_time):
|
676
667
|
time.sleep(1)
|
677
668
|
workers_response = self.get_workers()
|
678
669
|
workers_sm.add_status(workers_response)
|
679
670
|
workers_sm.report_current_status(logger)
|
671
|
+
if workers_sm.is_crashlooping:
|
672
|
+
return True
|
673
|
+
return False
|
680
674
|
|
681
675
|
def _extract_logs_from_crashlooping_worker(
|
682
676
|
self, workers_sm: "CapsuleWorkersStateMachine"
|
@@ -697,13 +691,14 @@ class CapsuleDeployer:
|
|
697
691
|
|
698
692
|
def wait_for_terminal_state(
|
699
693
|
self,
|
700
|
-
logger=print,
|
701
694
|
):
|
702
695
|
""" """
|
696
|
+
logger = self._logger_fn or partial(print, file=sys.stderr)
|
703
697
|
state_machine = CapsuleStateMachine(
|
704
698
|
self.identifier, self.current_deployment_instance_version
|
705
699
|
)
|
706
|
-
min_replicas
|
700
|
+
# min_replicas will always be present
|
701
|
+
min_replicas = self._app_config.get_state("replicas", {}).get("min")
|
707
702
|
workers_state_machine = CapsuleWorkersStateMachine(
|
708
703
|
self.identifier,
|
709
704
|
self.current_deployment_instance_version,
|
@@ -730,10 +725,22 @@ class CapsuleDeployer:
|
|
730
725
|
# of the workerstate machine. If we detect a terminal state in the workers,
|
731
726
|
# then even if the capsule upgrade is still in progress we will end up crashing
|
732
727
|
# the deployment.
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
)
|
728
|
+
(
|
729
|
+
capsule_ready,
|
730
|
+
further_check_worker_readiness,
|
731
|
+
) = DEPLOYMENT_READY_CONDITIONS.check_readiness_condition(
|
732
|
+
state_machine.current_status,
|
733
|
+
workers_state_machine.current_version_deployment_status(),
|
734
|
+
self._success_terminal_state_condition,
|
735
|
+
)
|
736
|
+
|
737
|
+
failure_condition_satisfied = (
|
738
|
+
DEPLOYMENT_READY_CONDITIONS.check_failure_condition(
|
739
|
+
state_machine.current_status,
|
740
|
+
workers_state_machine.current_version_deployment_status(),
|
741
|
+
)
|
742
|
+
)
|
743
|
+
if capsule_ready or failure_condition_satisfied:
|
737
744
|
logger(
|
738
745
|
"💊 %s deployment status: %s | worker states: [success :%s | failure :%s ] "
|
739
746
|
% (
|
@@ -741,19 +748,31 @@ class CapsuleDeployer:
|
|
741
748
|
"in progress"
|
742
749
|
if state_machine.update_in_progress
|
743
750
|
else "completed",
|
744
|
-
|
745
|
-
|
751
|
+
capsule_ready,
|
752
|
+
failure_condition_satisfied,
|
746
753
|
)
|
747
754
|
)
|
748
|
-
|
755
|
+
_further_readiness_check_failed = False
|
756
|
+
if further_check_worker_readiness:
|
749
757
|
# HACK : monitor the workers for N seconds to make sure they are healthy
|
750
758
|
# this is a hack. Ideally we should implment a healtcheck as a first class citizen
|
751
759
|
# but it will take some time to do that so in the meanwhile a timeout set on the cli
|
752
760
|
# side will be really helpful.
|
753
|
-
|
761
|
+
logger(
|
762
|
+
"💊 running last minute readiness check for %s..."
|
763
|
+
% self.identifier
|
764
|
+
)
|
765
|
+
_further_readiness_check_failed = self._monitor_worker_readiness(
|
766
|
+
workers_state_machine
|
767
|
+
)
|
768
|
+
|
769
|
+
if CAPSULE_DEBUG:
|
770
|
+
logger(
|
771
|
+
f"[debug] 💊 {self.capsule_type} {self.identifier}: further_check_worker_readiness {_further_readiness_check_failed} | failure_condition_satisfied {failure_condition_satisfied}"
|
772
|
+
)
|
754
773
|
|
755
774
|
# We should still check for failure state and crash if we detect something in the readiness check
|
756
|
-
if
|
775
|
+
if failure_condition_satisfied or _further_readiness_check_failed:
|
757
776
|
# hit the logs endpoint for the worker and get the logs
|
758
777
|
# Print those logs out on the terminal
|
759
778
|
# raise an exception that should be caught gracefully by the cli
|
@@ -788,9 +807,13 @@ class CapsuleDeployer:
|
|
788
807
|
|
789
808
|
break
|
790
809
|
|
791
|
-
if self._debug_dir:
|
792
|
-
state_machine.
|
793
|
-
workers_state_machine.
|
810
|
+
if CAPSULE_DEBUG and self._debug_dir:
|
811
|
+
state_machine.save_debug_info(self._debug_dir)
|
812
|
+
workers_state_machine.save_debug_info(self._debug_dir)
|
813
|
+
if i % 3 == 0: # Every 3 seconds report the status
|
814
|
+
logger(
|
815
|
+
f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status}"
|
816
|
+
)
|
794
817
|
|
795
818
|
if not self.status.ready_to_serve_traffic:
|
796
819
|
raise CapsuleDeploymentException(
|
@@ -798,6 +821,13 @@ class CapsuleDeployer:
|
|
798
821
|
f"Capsule {self.identifier} failed to be ready to serve traffic",
|
799
822
|
)
|
800
823
|
|
824
|
+
if CAPSULE_DEBUG and self._debug_dir:
|
825
|
+
state_machine.save_debug_info(self._debug_dir)
|
826
|
+
workers_state_machine.save_debug_info(self._debug_dir)
|
827
|
+
logger(
|
828
|
+
f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status [on return]: {state_machine.current_status} | worker states: {workers_state_machine.current_status}"
|
829
|
+
)
|
830
|
+
|
801
831
|
return dict(
|
802
832
|
id=self.identifier,
|
803
833
|
auth_type=self.capsule_type,
|
outerbounds/apps/utils.py
CHANGED
@@ -176,6 +176,7 @@ def safe_requests_wrapper(
|
|
176
176
|
*args,
|
177
177
|
conn_error_retries=2,
|
178
178
|
retryable_status_codes=[409],
|
179
|
+
logger_fn=None,
|
179
180
|
**kwargs,
|
180
181
|
):
|
181
182
|
"""
|
@@ -206,10 +207,15 @@ def safe_requests_wrapper(
|
|
206
207
|
if response.status_code not in retryable_status_codes:
|
207
208
|
return response
|
208
209
|
if CAPSULE_DEBUG:
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
210
|
+
if logger_fn:
|
211
|
+
logger_fn(
|
212
|
+
f"[outerbounds-debug] safe_requests_wrapper: {response.url}[{requests_module_fn.__name__}] {response.status_code} {response.text}",
|
213
|
+
)
|
214
|
+
else:
|
215
|
+
print(
|
216
|
+
f"[outerbounds-debug] safe_requests_wrapper: {response.url}[{requests_module_fn.__name__}] {response.status_code} {response.text}",
|
217
|
+
file=sys.stderr,
|
218
|
+
)
|
213
219
|
_num_retries += 1
|
214
220
|
time.sleep((2 ** (_num_retries + 1)) + noise)
|
215
221
|
except requests.exceptions.ConnectionError:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: outerbounds
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.183rc0
|
4
4
|
Summary: More Data Science, Less Administration
|
5
5
|
License: Proprietary
|
6
6
|
Keywords: data science,machine learning,MLOps
|
@@ -28,9 +28,9 @@ Requires-Dist: google-auth (>=2.27.0,<3.0.0) ; extra == "gcp"
|
|
28
28
|
Requires-Dist: google-cloud-secret-manager (>=2.20.0,<3.0.0) ; extra == "gcp"
|
29
29
|
Requires-Dist: google-cloud-storage (>=2.14.0,<3.0.0) ; extra == "gcp"
|
30
30
|
Requires-Dist: metaflow-checkpoint (==0.2.1)
|
31
|
-
Requires-Dist: ob-metaflow (==2.15.
|
32
|
-
Requires-Dist: ob-metaflow-extensions (==1.1.
|
33
|
-
Requires-Dist: ob-metaflow-stubs (==6.0.3.
|
31
|
+
Requires-Dist: ob-metaflow (==2.15.18.1)
|
32
|
+
Requires-Dist: ob-metaflow-extensions (==1.1.171rc0)
|
33
|
+
Requires-Dist: ob-metaflow-stubs (==6.0.3.183rc0)
|
34
34
|
Requires-Dist: opentelemetry-distro (>=0.41b0) ; extra == "otel"
|
35
35
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.20.0) ; extra == "otel"
|
36
36
|
Requires-Dist: opentelemetry-instrumentation-requests (>=0.41b0) ; extra == "otel"
|
@@ -43,11 +43,11 @@ outerbounds/_vendor/yaml/scanner.py,sha256=ZcI8IngR56PaQ0m27WU2vxCqmDCuRjz-hr7pi
|
|
43
43
|
outerbounds/_vendor/yaml/serializer.py,sha256=8wFZRy9SsQSktF_f9OOroroqsh4qVUe53ry07P9UgCc,4368
|
44
44
|
outerbounds/_vendor/yaml/tokens.py,sha256=JBSu38wihGr4l73JwbfMA7Ks1-X84g8-NskTz7KwPmA,2578
|
45
45
|
outerbounds/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
outerbounds/apps/_state_machine.py,sha256=
|
47
|
-
outerbounds/apps/app_cli.py,sha256=
|
46
|
+
outerbounds/apps/_state_machine.py,sha256=PaegyxSxNZxyLTxU9_kekd3MPM9sW76RZPkibeMTMfY,18314
|
47
|
+
outerbounds/apps/app_cli.py,sha256=n_NABDjdgY4ApgNvdQMmpJfPGzCwJxr_G0w6-5LZ85I,51940
|
48
48
|
outerbounds/apps/app_config.py,sha256=UHVK8JLIuW-OcGg5WxDm4QHeImPGtohD4KpJryZntC4,11307
|
49
49
|
outerbounds/apps/artifacts.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
-
outerbounds/apps/capsule.py,sha256=
|
50
|
+
outerbounds/apps/capsule.py,sha256=NC9ajD06y6U-COi-8Qw6k_N1ltbQAio2O_Xs2RTrAVA,32857
|
51
51
|
outerbounds/apps/cli_to_config.py,sha256=Thc5jXRxoU6Pr8kAVVOX-5Es5ha6y6Vh_GBzL__oI7Q,3299
|
52
52
|
outerbounds/apps/click_importer.py,sha256=nnkPOR6TKrtIpc3a5Fna1zVJoQqDZvUXlNA9CdiNKFc,995
|
53
53
|
outerbounds/apps/code_package/__init__.py,sha256=8McF7pgx8ghvjRnazp2Qktlxi9yYwNiwESSQrk-2oW8,68
|
@@ -59,7 +59,7 @@ outerbounds/apps/deployer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
59
59
|
outerbounds/apps/experimental/__init__.py,sha256=RUZBAyqFnX3pRQxTjNmS1-qpgQcc9xQGQD2yJh4MA_M,3349
|
60
60
|
outerbounds/apps/perimeters.py,sha256=1J1_-5legFPskv3HTRwQMpzTytE3TO8KRT2IvVOrWcQ,1584
|
61
61
|
outerbounds/apps/secrets.py,sha256=aWzcAayQEJghQgFP_qp9w6jyvan_hoL4_ceqZ0ZjLd4,6126
|
62
|
-
outerbounds/apps/utils.py,sha256=
|
62
|
+
outerbounds/apps/utils.py,sha256=C-4GLU5GHwwWHbW962Qac-wecvtdiBXezq0c8i9aJvs,7908
|
63
63
|
outerbounds/apps/validations.py,sha256=kR2eXckx0XJ4kUOOLkMRepbTh0INtL1Z8aV4-fZpfc8,678
|
64
64
|
outerbounds/cli_main.py,sha256=e9UMnPysmc7gbrimq2I4KfltggyU7pw59Cn9aEguVcU,74
|
65
65
|
outerbounds/command_groups/__init__.py,sha256=QPWtj5wDRTINDxVUL7XPqG3HoxHNvYOg08EnuSZB2Hc,21
|
@@ -78,7 +78,7 @@ outerbounds/utils/metaflowconfig.py,sha256=l2vJbgPkLISU-XPGZFaC8ZKmYFyJemlD6bwB-
|
|
78
78
|
outerbounds/utils/schema.py,sha256=lMUr9kNgn9wy-sO_t_Tlxmbt63yLeN4b0xQXbDUDj4A,2331
|
79
79
|
outerbounds/utils/utils.py,sha256=4Z8cszNob_8kDYCLNTrP-wWads_S_MdL3Uj3ju4mEsk,501
|
80
80
|
outerbounds/vendor.py,sha256=gRLRJNXtZBeUpPEog0LOeIsl6GosaFFbCxUvR4bW6IQ,5093
|
81
|
-
outerbounds-0.3.
|
82
|
-
outerbounds-0.3.
|
83
|
-
outerbounds-0.3.
|
84
|
-
outerbounds-0.3.
|
81
|
+
outerbounds-0.3.183rc0.dist-info/METADATA,sha256=wY0FgePmu807U8X1eBnglevz656zQgPuAjooGlbNpMU,1846
|
82
|
+
outerbounds-0.3.183rc0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
83
|
+
outerbounds-0.3.183rc0.dist-info/entry_points.txt,sha256=AP6rZg7y5SK9e9a9iVq0Fi9Q2KPjPZSwtZ6R98rLw-8,56
|
84
|
+
outerbounds-0.3.183rc0.dist-info/RECORD,,
|
File without changes
|
File without changes
|