outerbounds 0.3.178__py3-none-any.whl → 0.3.179rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,538 @@
1
+ import json
2
+ import os
3
+ import pathlib
4
+ import requests
5
+ import time
6
+ import shlex
7
+ from typing import Optional, List, Dict, Any, Tuple
8
+ from .utils import TODOException, safe_requests_wrapper, MaximumRetriesExceeded
9
+ from .app_config import AppConfig, CAPSULE_DEBUG, AuthType
10
+ from . import experimental
11
+
12
+
13
+ class CapsuleStateMachine:
14
+ """
15
+ Since capsules are a kinda newer concept, we will treat the state transitions based on the conditions and the
16
+ availability of certain fields in the status dictionary.
17
+ """
18
+
19
+ CONDITIONS = ["Ready", "DeploymentReplicasAvailable", "IngressObjectReady"]
20
+
21
+ def __init__(self, capsule_id: str):
22
+ self._capsule_id = capsule_id
23
+ self._status_trail = []
24
+
25
+ def is_completely_new_capsule(self):
26
+ # This is a heuristic. Not a fully tested.
27
+ # If we create a completely new capsule then the status
28
+ # field might be a completely empty dictionary.
29
+ assert (
30
+ len(self._status_trail) > 0
31
+ ), "status_trail cannot be none to infer if its a new capsule"
32
+ return self._empty_status(self._status_trail[0].get("status"))
33
+
34
+ def get_status_trail(self):
35
+ return self._status_trail
36
+
37
+ @staticmethod
38
+ def _empty_status(status):
39
+ if json.dumps(status) == "{}":
40
+ return True
41
+ return False
42
+
43
+ @staticmethod
44
+ def _parse_conditions(conditions):
45
+ curr_conditons = {}
46
+ for condition in conditions:
47
+ curr_conditons[condition["type"]] = condition["status"]
48
+ return curr_conditons
49
+
50
+ def add_status(self, status: dict):
51
+ assert type(status) == dict, "TODO: Make this check somewhere else"
52
+ self._status_trail.append({"timestamp": time.time(), "status": status})
53
+
54
+ @staticmethod
55
+ def _condition_change_emoji(previous_condition_status, current_condition_status):
56
+ if previous_condition_status == current_condition_status:
57
+ if previous_condition_status == "True":
58
+ return "✅"
59
+ else:
60
+ return "❌"
61
+ if previous_condition_status == "True" and current_condition_status == "False":
62
+ return "🔴 --> 🟢"
63
+ if previous_condition_status == "False" and current_condition_status == "True":
64
+ return "🚀"
65
+ return "🟡"
66
+
67
+ @property
68
+ def current_status(self):
69
+ return self._status_trail[-1].get("status")
70
+
71
+ @property
72
+ def out_of_cluster_url(self):
73
+ access_info = self.current_status.get("accessInfo", {}) or {}
74
+ url = access_info.get("outOfClusterURL", None)
75
+ if url is not None:
76
+ return f"https://{url}"
77
+ return None
78
+
79
+ @property
80
+ def in_cluster_url(self):
81
+ access_info = self.current_status.get("accessInfo", {}) or {}
82
+ url = access_info.get("inClusterURL", None)
83
+ if url is not None:
84
+ return f"https://{url}"
85
+ return None
86
+
87
+ @property
88
+ def ready_to_serve_traffic(self):
89
+ if self.current_status.get("readyToServeTraffic", False):
90
+ return any(
91
+ i is not None for i in [self.out_of_cluster_url, self.in_cluster_url]
92
+ )
93
+ return False
94
+
95
+ @property
96
+ def available_replicas(self):
97
+ return self.current_status.get("availableReplicas", 0)
98
+
99
+ def report_current_status(self, logger):
100
+ if len(self._status_trail) < 2:
101
+ return
102
+ previous_status, current_status = self._status_trail[-2].get(
103
+ "status"
104
+ ), self._status_trail[-1].get("status")
105
+ if self._empty_status(current_status):
106
+ return
107
+
108
+ if self._empty_status(previous_status):
109
+ logger("💊 %s Deployment has started ... 🚀" % self._capsule_id)
110
+ return
111
+
112
+ def check_for_debug(self, state_dir: str):
113
+ if CAPSULE_DEBUG:
114
+ debug_path = os.path.join(
115
+ state_dir, f"debug_capsule_{self._capsule_id}.json"
116
+ )
117
+ with open(debug_path, "w") as f:
118
+ json.dump(self._status_trail, f, indent=4)
119
+
120
+
121
+ class CapsuleInput:
122
+ @classmethod
123
+ def construct_exec_command(cls, commands: list[str]):
124
+ commands = ["set -eEuo pipefail"] + commands
125
+ command_string = "\n".join(commands)
126
+ # First constuct a base64 encoded string of the quoted command
127
+ # One of the reasons we don't directly pass the command string to the backend with a `\n` join
128
+ # is because the backend controller doesnt play nice when the command can be a multi-line string.
129
+ # So we encode it to a base64 string and then decode it back to a command string at runtime to provide to
130
+ # `bash -c`. The ideal thing to have done is to run "bash -c {shlex.quote(command_string)}" and call it a day
131
+ # but the backend controller yields the following error:
132
+ # `error parsing template: error converting YAML to JSON: yaml: line 111: mapping values are not allowed in this context`
133
+ # So we go to great length to ensure the command is provided in base64 to avoid any issues with the backend controller.
134
+ import base64
135
+
136
+ encoded_command = base64.b64encode(command_string.encode()).decode()
137
+ decode_cmd = f"echo {encoded_command} | base64 -d > ./_ob_app_run.sh"
138
+ return (
139
+ f"bash -c '{decode_cmd} && cat ./_ob_app_run.sh && bash ./_ob_app_run.sh'"
140
+ )
141
+
142
+ @classmethod
143
+ def _marshal_environment_variables(cls, app_config: AppConfig):
144
+ envs = app_config.get_state("environment", {}).copy()
145
+ _return = []
146
+ for k, v in envs.items():
147
+ _v = v
148
+ if isinstance(v, dict):
149
+ _v = json.dumps(v)
150
+ elif isinstance(v, list):
151
+ _v = json.dumps(v)
152
+ else:
153
+ _v = str(v)
154
+ _return.append(
155
+ {
156
+ "name": k,
157
+ "value": _v,
158
+ }
159
+ )
160
+ return _return
161
+
162
+ @classmethod
163
+ def from_app_config(self, app_config: AppConfig):
164
+ gpu_resource = app_config.get_state("resources").get("gpu")
165
+ resources = {}
166
+ shared_memory = app_config.get_state("resources").get("shared_memory")
167
+ if gpu_resource:
168
+ resources["gpu"] = gpu_resource
169
+ if shared_memory:
170
+ resources["sharedMemory"] = shared_memory
171
+
172
+ _scheduling_config = {}
173
+ if app_config.get_state("compute_pools", None):
174
+ _scheduling_config["schedulingConfig"] = {
175
+ "computePools": [
176
+ {"name": x} for x in app_config.get_state("compute_pools")
177
+ ]
178
+ }
179
+ _description = app_config.get_state("description")
180
+ _app_type = app_config.get_state("app_type")
181
+ _final_info = {}
182
+ if _description:
183
+ _final_info["description"] = _description
184
+ if _app_type:
185
+ _final_info["endpointType"] = _app_type
186
+ return {
187
+ "perimeter": app_config.get_state("perimeter"),
188
+ **_final_info,
189
+ "codePackagePath": app_config.get_state("code_package_url"),
190
+ "image": app_config.get_state("image"),
191
+ "resourceIntegrations": [
192
+ {"name": x} for x in app_config.get_state("secrets", [])
193
+ ],
194
+ "resourceConfig": {
195
+ "cpu": str(app_config.get_state("resources").get("cpu")),
196
+ "memory": str(app_config.get_state("resources").get("memory")),
197
+ "ephemeralStorage": str(app_config.get_state("resources").get("disk")),
198
+ **resources,
199
+ },
200
+ "autoscalingConfig": {
201
+ "minReplicas": app_config.get_state("replicas", {}).get("min", 1),
202
+ "maxReplicas": app_config.get_state("replicas", {}).get("max", 1),
203
+ },
204
+ **_scheduling_config,
205
+ "containerStartupConfig": {
206
+ "entrypoint": self.construct_exec_command(
207
+ app_config.get_state("commands")
208
+ )
209
+ },
210
+ "environmentVariables": self._marshal_environment_variables(app_config),
211
+ # "assets": [{"name": "startup-script.sh"}],
212
+ "authConfig": {
213
+ "authType": app_config.get_state("auth").get("type"),
214
+ "publicToDeployment": app_config.get_state("auth").get("public"),
215
+ },
216
+ "tags": [
217
+ dict(key=k, value=v)
218
+ for tag in app_config.get_state("tags", [])
219
+ for k, v in tag.items()
220
+ ],
221
+ "port": app_config.get_state("port"),
222
+ "displayName": app_config.get_state("name"),
223
+ }
224
+
225
+
226
+ class CapsuleApiException(Exception):
227
+ def __init__(
228
+ self,
229
+ url: str,
230
+ method: str,
231
+ status_code: int,
232
+ text: str,
233
+ message: Optional[str] = None,
234
+ ):
235
+ self.url = url
236
+ self.method = method
237
+ self.status_code = status_code
238
+ self.text = text
239
+ self.message = message
240
+
241
+ def __str__(self):
242
+ return (
243
+ f"CapsuleApiException: {self.url} [{self.method}]: Status Code: {self.status_code} \n\n {self.text}"
244
+ + (f"\n\n {self.message}" if self.message else "")
245
+ )
246
+
247
+
248
+ class CapsuleApi:
249
+ def __init__(self, base_url: str, perimeter: str):
250
+ self._base_url = self._create_base_url(base_url, perimeter)
251
+ from metaflow.metaflow_config import SERVICE_HEADERS
252
+
253
+ self._request_headers = {
254
+ **{"Content-Type": "application/json", "Connection": "keep-alive"},
255
+ **(SERVICE_HEADERS or {}),
256
+ }
257
+
258
+ @staticmethod
259
+ def _create_base_url(base_url: str, perimeter: str):
260
+ return os.path.join(
261
+ base_url,
262
+ "v1",
263
+ "perimeters",
264
+ perimeter,
265
+ "capsules",
266
+ )
267
+
268
+ def _wrapped_api_caller(self, method_func, *args, **kwargs):
269
+ try:
270
+ response = safe_requests_wrapper(
271
+ method_func,
272
+ *args,
273
+ headers=self._request_headers,
274
+ **kwargs,
275
+ )
276
+ except MaximumRetriesExceeded as e:
277
+ raise CapsuleApiException(
278
+ e.url,
279
+ e.method,
280
+ e.status_code,
281
+ e.text,
282
+ message=f"Maximum retries exceeded for {e.url} [{e.method}]",
283
+ )
284
+ if response.status_code >= 400:
285
+ raise CapsuleApiException(
286
+ args[0],
287
+ method_func.__name__,
288
+ response.status_code,
289
+ response.text,
290
+ )
291
+ return response
292
+
293
+ def create(self, capsule_input: dict):
294
+ _data = json.dumps(capsule_input)
295
+ response = self._wrapped_api_caller(
296
+ requests.post,
297
+ self._base_url,
298
+ data=_data,
299
+ )
300
+ try:
301
+ return response.json()
302
+ except json.JSONDecodeError as e:
303
+ raise CapsuleApiException(
304
+ self._base_url,
305
+ "post",
306
+ response.status_code,
307
+ response.text,
308
+ message="Capsule JSON decode failed",
309
+ )
310
+
311
+ def get(self, capsule_id: str):
312
+ _url = os.path.join(self._base_url, capsule_id)
313
+ response = self._wrapped_api_caller(
314
+ requests.get,
315
+ _url,
316
+ retryable_status_codes=[409, 404], # todo : verify me
317
+ conn_error_retries=3,
318
+ )
319
+ try:
320
+ return response.json()
321
+ except json.JSONDecodeError as e:
322
+ raise CapsuleApiException(
323
+ _url,
324
+ "get",
325
+ response.status_code,
326
+ response.text,
327
+ message="Capsule JSON decode failed",
328
+ )
329
+
330
+ def list(self):
331
+ response = self._wrapped_api_caller(
332
+ requests.get,
333
+ self._base_url,
334
+ retryable_status_codes=[409], # todo : verify me
335
+ conn_error_retries=3,
336
+ )
337
+ try:
338
+ response_json = response.json()
339
+ except json.JSONDecodeError as e:
340
+ raise CapsuleApiException(
341
+ self._base_url,
342
+ "get",
343
+ response.status_code,
344
+ response.text,
345
+ message="Capsule JSON decode failed",
346
+ )
347
+ if "capsules" not in response_json:
348
+ raise CapsuleApiException(
349
+ self._base_url,
350
+ "get",
351
+ response.status_code,
352
+ response.text,
353
+ message="Capsule JSON decode failed",
354
+ )
355
+ return response_json.get("capsules", []) or []
356
+
357
+ def delete(self, capsule_id: str):
358
+ _url = os.path.join(self._base_url, capsule_id)
359
+ response = self._wrapped_api_caller(
360
+ requests.delete,
361
+ _url,
362
+ retryable_status_codes=[409], # todo : verify me
363
+ )
364
+ if response.status_code >= 400:
365
+ raise CapsuleApiException(
366
+ _url,
367
+ "delete",
368
+ response.status_code,
369
+ response.text,
370
+ )
371
+
372
+ if response.status_code == 200:
373
+ return True
374
+ return False
375
+
376
+ def get_workers(self, capsule_id: str) -> List[Dict[str, Any]]:
377
+ _url = os.path.join(self._base_url, capsule_id, "workers")
378
+ response = self._wrapped_api_caller(
379
+ requests.get,
380
+ _url,
381
+ retryable_status_codes=[409], # todo : verify me
382
+ )
383
+ try:
384
+ return response.json().get("workers", []) or []
385
+ except json.JSONDecodeError as e:
386
+ raise CapsuleApiException(
387
+ _url,
388
+ "get",
389
+ response.status_code,
390
+ response.text,
391
+ message="Capsule JSON decode failed",
392
+ )
393
+
394
+ def logs(self, capsule_id: str, worker_id: str) -> List[str]:
395
+ _url = os.path.join(self._base_url, capsule_id, "workers", worker_id, "logs")
396
+ response = self._wrapped_api_caller(
397
+ requests.get,
398
+ _url,
399
+ retryable_status_codes=[409], # todo : verify me
400
+ )
401
+ try:
402
+ return response.json().get("logs", []) or []
403
+ except json.JSONDecodeError as e:
404
+ raise CapsuleApiException(
405
+ _url,
406
+ "get",
407
+ response.status_code,
408
+ response.text,
409
+ message="Capsule JSON decode failed",
410
+ )
411
+
412
+
413
+ def list_and_filter_capsules(
414
+ api_url, perimeter, project, branch, name, tags, auth_type, capsule_id
415
+ ):
416
+ capsules = CapsuleApi(api_url, perimeter).list()
417
+
418
+ def _tags_match(tags, key, value):
419
+ for t in tags:
420
+ if t["key"] == key and t["value"] == value:
421
+ return True
422
+ return False
423
+
424
+ def _all_tags_match(tags, tags_to_match):
425
+ return all([_tags_match(tags, t["key"], t["value"]) for t in tags_to_match])
426
+
427
+ def _filter_capsules(capsules, project, branch, name, tags, auth_type, capsule_id):
428
+ _filtered_capsules = []
429
+ for capsule in capsules:
430
+ set_tags = capsule.get("spec", {}).get("tags", [])
431
+ display_name = capsule.get("spec", {}).get("displayName", None)
432
+ set_id = capsule.get("id", None)
433
+ set_auth_type = (
434
+ capsule.get("spec", {}).get("authConfig", {}).get("authType", None)
435
+ )
436
+
437
+ if auth_type and set_auth_type != auth_type:
438
+ continue
439
+ if project and not _tags_match(set_tags, "project", project):
440
+ continue
441
+ if branch and not _tags_match(set_tags, "branch", branch):
442
+ continue
443
+ if name and display_name != name:
444
+ continue
445
+ if tags and not _all_tags_match(set_tags, tags):
446
+ continue
447
+ if capsule_id and set_id != capsule_id:
448
+ continue
449
+
450
+ _filtered_capsules.append(capsule)
451
+ return _filtered_capsules
452
+
453
+ return _filter_capsules(
454
+ capsules, project, branch, name, tags, auth_type, capsule_id
455
+ )
456
+
457
+
458
+ from collections import namedtuple
459
+
460
+ CapsuleInfo = namedtuple("CapsuleInfo", ["info", "workers"])
461
+
462
+
463
+ class CapsuleDeployer:
464
+
465
+ status: CapsuleStateMachine
466
+
467
+ identifier = None
468
+
469
+ # TODO: Current default timeout is very large of 5 minutes. Ideally we should have finished the deployed in less than 1 minutes.
470
+ def __init__(
471
+ self,
472
+ app_config: AppConfig,
473
+ base_url: str,
474
+ create_timeout: int = 60 * 5,
475
+ debug_dir: Optional[str] = None,
476
+ ):
477
+ self._app_config = app_config
478
+ self._capsule_api = CapsuleApi(base_url, app_config.get_state("perimeter"))
479
+ self._create_timeout = create_timeout
480
+ self._debug_dir = debug_dir
481
+
482
+ @property
483
+ def capsule_type(self):
484
+ auth_type = self._app_config.get_state("auth", {}).get("type", AuthType.default)
485
+ if auth_type == AuthType.BROWSER:
486
+ return "App"
487
+ elif auth_type == AuthType.API:
488
+ return "Endpoint"
489
+ else:
490
+ raise TODOException(f"Unknown auth type: {auth_type}")
491
+
492
+ @property
493
+ def name(self):
494
+ return self._app_config.get_state("name")
495
+
496
+ def create_input(self):
497
+ return experimental.capsule_input_overrides(
498
+ self._app_config, CapsuleInput.from_app_config(self._app_config)
499
+ )
500
+
501
+ def create(self):
502
+ capsule_response = self._capsule_api.create(self.create_input())
503
+ self.identifier = capsule_response.get("id")
504
+ return self.identifier
505
+
506
+ def get(self):
507
+ return self._capsule_api.get(self.identifier)
508
+
509
+ def wait_for_terminal_state(self, logger=print):
510
+ state_machine = CapsuleStateMachine(self.identifier)
511
+ logger(
512
+ "💊 Waiting for %s %s to be ready to serve traffic"
513
+ % (self.capsule_type.lower(), self.identifier)
514
+ )
515
+ self.status = state_machine
516
+ for i in range(self._create_timeout):
517
+ capsule_response = self.get()
518
+ state_machine.add_status(capsule_response.get("status", {}))
519
+ time.sleep(1)
520
+ state_machine.report_current_status(logger)
521
+ if state_machine.ready_to_serve_traffic:
522
+ logger(
523
+ "💊 %s %s is ready to serve traffic on the URL: %s"
524
+ % (
525
+ self.capsule_type,
526
+ self.identifier,
527
+ state_machine.out_of_cluster_url,
528
+ ),
529
+ )
530
+ break
531
+ if self._debug_dir:
532
+ state_machine.check_for_debug(self._debug_dir)
533
+
534
+ if not self.status.ready_to_serve_traffic:
535
+ raise TODOException(
536
+ f"Capsule {self.identifier} failed to be ready to serve traffic"
537
+ )
538
+ return capsule_response
@@ -0,0 +1,91 @@
1
+ from . import experimental
2
+
3
+
4
+ def build_config_from_options(options):
5
+ """Build an app configuration from CLI options."""
6
+ config = {}
7
+
8
+ # Set basic fields
9
+ for key in ["name", "port", "image", "compute_pools", "description", "app_type"]:
10
+ if options.get(key):
11
+ config[key] = options[key]
12
+
13
+ # Handle list fields
14
+ if options.get("tags"):
15
+ config["tags"] = list(options["tags"])
16
+ if options.get("secrets"):
17
+ config["secrets"] = list(options["secrets"])
18
+
19
+ # Build env dict from key-value pairs
20
+ if options.get("envs"):
21
+ env_dict = {}
22
+ for env_item in options["envs"]:
23
+ env_dict.update(env_item)
24
+ config["environment"] = env_dict
25
+
26
+ # Handle dependencies (only one type allowed)
27
+ deps = {}
28
+ if options.get("dep_from_task"):
29
+ deps["from_task"] = options["dep_from_task"]
30
+ elif options.get("dep_from_run"):
31
+ deps["from_run"] = options["dep_from_run"]
32
+ elif options.get("dep_from_requirements"):
33
+ deps["from_requirements_file"] = options["dep_from_requirements"]
34
+ elif options.get("dep_from_pyproject"):
35
+ deps["from_pyproject_toml"] = options["dep_from_pyproject"]
36
+
37
+ # TODO: [FIX ME]: Get better CLI abstraction for pypi/conda dependencies
38
+
39
+ if deps:
40
+ config["dependencies"] = deps
41
+
42
+ # Handle resources
43
+ resources = {}
44
+ for key in ["cpu", "memory", "gpu", "storage"]:
45
+ if options.get(key):
46
+ resources[key] = options[key]
47
+
48
+ if resources:
49
+ config["resources"] = resources
50
+
51
+ # Handle health check options
52
+ health_check = {}
53
+ if options.get("health_check_enabled") is not None:
54
+ health_check["enabled"] = options["health_check_enabled"]
55
+ if options.get("health_check_path"):
56
+ health_check["path"] = options["health_check_path"]
57
+ if options.get("health_check_initial_delay") is not None:
58
+ health_check["initial_delay_seconds"] = options["health_check_initial_delay"]
59
+ if options.get("health_check_period") is not None:
60
+ health_check["period_seconds"] = options["health_check_period"]
61
+
62
+ if health_check:
63
+ config["health_check"] = health_check
64
+
65
+ # Handle package options
66
+ if options.get("package_src_path") or options.get("package_suffixes"):
67
+ config["package"] = {}
68
+ if options.get("package_src_path"):
69
+ config["package"]["src_path"] = options["package_src_path"]
70
+ if options.get("package_suffixes"):
71
+ config["package"]["suffixes"] = options["package_suffixes"]
72
+
73
+ # Handle auth options
74
+ if options.get("auth_type") or options.get("auth_public"):
75
+ config["auth"] = {}
76
+ if options.get("auth_type"):
77
+ config["auth"]["type"] = options["auth_type"]
78
+ if options.get("auth_public"):
79
+ config["auth"]["public"] = options["auth_public"]
80
+
81
+ replicas = {}
82
+ if options.get("min_replicas"):
83
+ replicas["min"] = options["min_replicas"]
84
+ if options.get("max_replicas"):
85
+ replicas["max"] = options["max_replicas"]
86
+ if len(replicas) > 0:
87
+ config["replicas"] = replicas
88
+
89
+ config.update(experimental.build_config_from_options(options))
90
+
91
+ return config
@@ -0,0 +1,3 @@
1
+ from .code_packager import CodePackager
2
+
3
+ __all__ = ["CodePackager"]