ob-metaflow-extensions 1.4.33__py2.py3-none-any.whl → 1.6.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. metaflow_extensions/outerbounds/plugins/__init__.py +8 -1
  2. metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +8 -2
  3. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +6 -6
  4. metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +1 -19
  5. metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +333 -0
  6. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +150 -79
  7. metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +4 -1
  8. metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +4 -0
  9. metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +103 -5
  10. metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +12 -1
  11. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +100 -6
  12. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +141 -2
  13. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +74 -37
  14. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +6 -6
  15. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +2 -2
  16. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +1102 -105
  17. metaflow_extensions/outerbounds/plugins/apps/core/exceptions.py +341 -0
  18. metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +42 -6
  19. metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +43 -3
  20. metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +10 -1
  21. metaflow_extensions/outerbounds/plugins/optuna/__init__.py +2 -1
  22. metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +37 -7
  23. metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +18 -8
  24. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +6 -0
  25. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +39 -15
  26. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +5 -2
  27. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +2 -2
  28. metaflow_extensions/outerbounds/remote_config.py +20 -7
  29. metaflow_extensions/outerbounds/toplevel/apps/__init__.py +9 -0
  30. metaflow_extensions/outerbounds/toplevel/apps/exceptions.py +11 -0
  31. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +1 -1
  32. metaflow_extensions/outerbounds/toplevel/ob_internal.py +1 -1
  33. {ob_metaflow_extensions-1.4.33.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/METADATA +2 -2
  34. {ob_metaflow_extensions-1.4.33.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/RECORD +36 -34
  35. metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +0 -146
  36. metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +0 -1200
  37. {ob_metaflow_extensions-1.4.33.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/WHEEL +0 -0
  38. {ob_metaflow_extensions-1.4.33.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,341 @@
1
+ import json
2
+ from typing import Optional, List, Dict
3
+ from ._state_machine import LogLine
4
+
5
+
6
+ class OuterboundsBackendUnhealthyException(Exception):
7
+ """This exception is raised when the Outerbounds platform is unhealthy (5xx errors) or unreachable."""
8
+
9
+ def __init__(
10
+ self,
11
+ url: str,
12
+ method: str,
13
+ status_code: Optional[int] = None,
14
+ text: Optional[str] = None,
15
+ message: Optional[str] = None,
16
+ ):
17
+ self.url = url
18
+ self.method = method
19
+ self.status_code = status_code
20
+ self.text = text
21
+ self.message = message
22
+ super().__init__(self.message)
23
+
24
+
25
+ class OuterboundsForbiddenException(Exception):
26
+ """This exception is raised when access to an Outerbounds API is forbidden (HTTP 403)."""
27
+
28
+ def __init__(
29
+ self,
30
+ url: str,
31
+ method: str,
32
+ text: str,
33
+ ):
34
+ self.url = url
35
+ self.method = method
36
+ self.text = text
37
+ self.message = (
38
+ f"Access forbidden (HTTP 403) when calling {url}. "
39
+ "This typically means your credentials lack permission for this operation. "
40
+ "Please verify that you outerbounds / metaflow configuration has access to the "
41
+ "correct perimeter.\n"
42
+ "If you believe this is an error, contact your Outerbounds administrator."
43
+ )
44
+ super().__init__(self.message)
45
+
46
+
47
+ class OuterboundsConfigurationException(Exception):
48
+ """This exception is raised when Outerbounds configuration is missing or invalid."""
49
+
50
+ def __init__(self, missing_config: str):
51
+ self.missing_config = missing_config
52
+ self.message = (
53
+ f"Outerbounds configuration '{missing_config}' not found.\n\n"
54
+ "If running locally:\n"
55
+ " - Run: outerbounds configure <magic-string-from-outerbounds-ui>\n"
56
+ "If running remotely (e.g., in a Metaflow task):\n"
57
+ " - Ensure you have the Outerbounds distribution installed (pip install outerbounds)\n"
58
+ " and not the open-source Metaflow. The Outerbounds fork injects configuration\n"
59
+ " into remote tasks automatically which may not be present in open source metaflow.\n"
60
+ )
61
+ super().__init__(self.message)
62
+
63
+
64
+ class CapsuleApiException(Exception):
65
+ def __init__(
66
+ self,
67
+ url: str,
68
+ method: str,
69
+ status_code: int,
70
+ text: str,
71
+ message: Optional[str] = None,
72
+ ):
73
+ self.url = url
74
+ self.method = method
75
+ self.status_code = status_code
76
+ self.text = text
77
+ self.message = message
78
+
79
+ def __str__(self):
80
+ return (
81
+ f"CapsuleApiException: {self.url} [{self.method}]: Status Code: {self.status_code} \n\n {self.text}"
82
+ + (f"\n\n {self.message}" if self.message else "")
83
+ )
84
+
85
+
86
+ class CapsuleDeploymentException(Exception):
87
+ """Base exception for all capsule deployment failures."""
88
+
89
+ def __init__(
90
+ self,
91
+ capsule_id: str,
92
+ message: str,
93
+ ):
94
+ self.capsule_id = capsule_id
95
+ self.message = message
96
+ super().__init__(self.message)
97
+
98
+ def __str__(self):
99
+ return f"CapsuleDeploymentException: [{self.capsule_id}] :: {self.message}"
100
+
101
+
102
+ class CapsuleCrashLoopException(CapsuleDeploymentException):
103
+ """Raised when a worker enters CrashLoopBackOff or Failed state."""
104
+
105
+ def __init__(
106
+ self,
107
+ capsule_id: str,
108
+ worker_id: str,
109
+ logs: Optional[List[LogLine]] = None,
110
+ ):
111
+ self.worker_id = worker_id
112
+ self.logs = logs or []
113
+ message = f"Worker ID ({worker_id}) is crashlooping. Please check the logs for more information."
114
+ super().__init__(capsule_id, message)
115
+
116
+ def __str__(self):
117
+ return f"CapsuleCrashLoopException: [{self.capsule_id}] :: {self.message}"
118
+
119
+
120
+ class CapsuleReadinessException(CapsuleDeploymentException):
121
+ """Raised when capsule fails to meet readiness conditions within timeout."""
122
+
123
+ def __init__(
124
+ self,
125
+ capsule_id: str,
126
+ reason: Optional[str] = None,
127
+ ):
128
+ message = f"Capsule {capsule_id} failed to be ready to serve traffic"
129
+ if reason:
130
+ message += f": {reason}"
131
+ super().__init__(capsule_id, message)
132
+
133
+ def __str__(self):
134
+ return f"CapsuleReadinessException: [{self.capsule_id}] :: {self.message}"
135
+
136
+
137
+ class CapsuleConcurrentUpgradeException(CapsuleDeploymentException):
138
+ """Raised when a concurrent upgrade invalidates the current deployment."""
139
+
140
+ def __init__(
141
+ self,
142
+ capsule_id: str,
143
+ expected_version: str,
144
+ actual_version: str,
145
+ modified_by: Optional[str] = None,
146
+ modified_at: Optional[str] = None,
147
+ ):
148
+ self.expected_version = expected_version
149
+ self.actual_version = actual_version
150
+ self.modified_by = modified_by
151
+ self.modified_at = modified_at
152
+ message = (
153
+ f"A capsule upgrade was triggered outside current deployment instance. "
154
+ f"Expected version: {expected_version}, actual version: {actual_version}"
155
+ )
156
+ if modified_by:
157
+ message += f". Modified by: {modified_by}"
158
+ if modified_at:
159
+ message += f" at {modified_at}"
160
+ super().__init__(capsule_id, message)
161
+
162
+ def __str__(self):
163
+ return (
164
+ f"CapsuleConcurrentUpgradeException: [{self.capsule_id}] :: {self.message}"
165
+ )
166
+
167
+
168
+ class CapsuleDeletedDuringDeploymentException(CapsuleDeploymentException):
169
+ """Raised when a capsule is deleted while deployment is in progress."""
170
+
171
+ def __init__(self, capsule_id: str):
172
+ super().__init__(capsule_id, "Capsule was deleted during deployment")
173
+
174
+
175
+ class CodePackagingException(Exception):
176
+ """Exception raised when code packaging fails."""
177
+
178
+ pass
179
+
180
+
181
+ class AppNotFoundException(Exception):
182
+ pass
183
+
184
+
185
+ class AppCreationFailedException(Exception):
186
+ """Raised when app deployment submission fails due to an API error."""
187
+
188
+ def __init__(
189
+ self,
190
+ app_name: str,
191
+ status_code: int,
192
+ error_text: str,
193
+ ):
194
+ self.status_code = status_code
195
+ self.error_text = error_text
196
+ message = f"Failed to submit app deployment: HTTP {status_code} - {error_text}"
197
+ if status_code == 400:
198
+ # 400 = validation error; the app configuration is invalid and must be fixed.
199
+ message = "Invalid app deployment configuration submitted: "
200
+ try:
201
+ reason_for_failure = json.loads(error_text).get("message", error_text)
202
+ except json.JSONDecodeError:
203
+ reason_for_failure = error_text
204
+ message += reason_for_failure
205
+ message += (
206
+ "\n\nCheck your config file or CLI parameters if deploying via CLI, "
207
+ "or AppDeployer parameters if deploying programmatically."
208
+ )
209
+
210
+ self.message = message
211
+ super().__init__(message)
212
+
213
+ def __str__(self):
214
+ return f"AppCreationFailedException: {self.message}"
215
+
216
+
217
+ class AppDeploymentException(Exception):
218
+ """Base exception for all individual app deployment failures."""
219
+
220
+ def __init__(self, app_id: str, message: str):
221
+ self.app_id = app_id
222
+ self.message = message
223
+ self._deployed_app = None
224
+ super().__init__(self.message)
225
+
226
+ def __str__(self):
227
+ return f"AppDeploymentException: [{self.app_id}] :: {self.message}"
228
+
229
+ @property
230
+ def deployed_app(self):
231
+ from .deployer import DeployedApp
232
+
233
+ return DeployedApp._from_capsule_id(self.app_id)
234
+
235
+
236
+ class AppCrashLoopException(AppDeploymentException):
237
+ """Raised when an app worker enters CrashLoopBackOff or Failed state."""
238
+
239
+ def __init__(
240
+ self,
241
+ app_id: str,
242
+ worker_id: str,
243
+ logs: Optional[List] = None,
244
+ ):
245
+ self.worker_id = worker_id
246
+ self.logs = logs or []
247
+ message = f"Worker ({worker_id}) is crashlooping. Please check the logs for more information."
248
+ super().__init__(app_id, message)
249
+
250
+ def __str__(self):
251
+ return f"AppCrashLoopException: [{self.app_id}] :: {self.message}"
252
+
253
+
254
+ class AppReadinessException(AppDeploymentException):
255
+ """Raised when app fails to meet readiness conditions within timeout."""
256
+
257
+ def __init__(self, app_id: str, reason: Optional[str] = None):
258
+ message = f"App {app_id} failed to be ready to serve traffic"
259
+ if reason:
260
+ message += f": {reason}"
261
+ super().__init__(app_id, message)
262
+
263
+ def __str__(self):
264
+ return f"AppReadinessException: [{self.app_id}] :: {self.message}"
265
+
266
+
267
+ class AppUpgradeInProgressException(AppDeploymentException):
268
+ """Raised when attempting to deploy while another upgrade is already in progress."""
269
+
270
+ def __init__(
271
+ self,
272
+ app_id: str,
273
+ upgrader: Optional[str] = None,
274
+ ):
275
+ self.upgrader = upgrader
276
+ if upgrader:
277
+ message = (
278
+ f"App {app_id} is currently being upgraded by {upgrader}. "
279
+ "Use force_upgrade=True in AppDeployer to override."
280
+ )
281
+ else:
282
+ message = (
283
+ f"App {app_id} is currently being upgraded. "
284
+ "Use force_upgrade=True in AppDeployer to override."
285
+ )
286
+ super().__init__(app_id, message)
287
+
288
+ def __str__(self):
289
+ return f"AppUpgradeInProgressException: [{self.app_id}] :: {self.message}"
290
+
291
+
292
+ class AppConcurrentUpgradeException(AppDeploymentException):
293
+ """Raised when a concurrent upgrade invalidates the current deployment mid-flight."""
294
+
295
+ def __init__(
296
+ self,
297
+ app_id: str,
298
+ expected_version: str,
299
+ actual_version: str,
300
+ modified_by: Optional[str] = None,
301
+ modified_at: Optional[str] = None,
302
+ ):
303
+ self.expected_version = expected_version
304
+ self.actual_version = actual_version
305
+ self.modified_by = modified_by
306
+ self.modified_at = modified_at
307
+
308
+ modifier_info = ""
309
+ if modified_by:
310
+ modifier_info = f" by '{modified_by}'"
311
+ if modified_at:
312
+ modifier_info += f" at {modified_at}"
313
+
314
+ message = (
315
+ f"Another deployment was triggered{modifier_info} while this deployment was in progress.\n\n"
316
+ f"This deployment expected to be working with version '{expected_version}', but the app "
317
+ f"is now at version '{actual_version}'. The current deployment has been invalidated.\n\n"
318
+ "To avoid this in the future, you can either use a unique `name` for each deployment "
319
+ "or coordinate deployments to ensure concurrent upgrades to the same app don't overlap."
320
+ )
321
+ super().__init__(app_id, message)
322
+
323
+ def __str__(self):
324
+ return f"AppConcurrentUpgradeException: [{self.app_id}] :: {self.message}"
325
+
326
+
327
+ class AppDeletedDuringDeploymentException(AppDeploymentException):
328
+ """Raised when an app is deleted while deployment is in progress."""
329
+
330
+ def __init__(self, app_id: str):
331
+ message = (
332
+ f"App '{app_id}' was deleted while this deployment was in progress.\n\n"
333
+ "This can happen when another process or user deletes the app during deployment. "
334
+ "Since apps can be programmatically created and deleted, concurrent operations "
335
+ "may conflict. If you did not intend to delete this app, check for other processes "
336
+ "or users that may be managing deployments in this perimeter."
337
+ )
338
+ super().__init__(app_id, message)
339
+
340
+ def __str__(self):
341
+ return f"AppDeletedDuringDeploymentException: [{self.app_id}] :: {self.message}"
@@ -2,8 +2,15 @@ import os
2
2
  import json
3
3
  from typing import Tuple, Union
4
4
 
5
+ import requests
6
+ from .utils import safe_requests_wrapper
7
+ from .exceptions import OuterboundsConfigurationException
8
+
5
9
 
6
10
  class PerimeterExtractor:
11
+
12
+ config = None
13
+
7
14
  @classmethod
8
15
  def for_ob_cli(
9
16
  cls, config_dir: str, profile: str
@@ -40,7 +47,7 @@ class PerimeterExtractor:
40
47
  return perimeter, api_server # type: ignore
41
48
 
42
49
  @classmethod
43
- def during_metaflow_execution(cls) -> Union[Tuple[str, str], Tuple[None, None]]:
50
+ def during_programmatic_access(cls) -> Union[Tuple[str, str], Tuple[None, None]]:
44
51
  from metaflow.metaflow_config_funcs import init_config
45
52
 
46
53
  clean_url = (
@@ -55,9 +62,7 @@ class PerimeterExtractor:
55
62
  "OBP_PERIMETER", os.environ.get("OBP_PERIMETER", perimeter)
56
63
  )
57
64
  if perimeter is None:
58
- raise RuntimeError(
59
- "Perimeter not found in metaflow config or environment variables"
60
- )
65
+ raise OuterboundsConfigurationException("OBP_PERIMETER")
61
66
 
62
67
  api_server = config.get(
63
68
  "OBP_API_SERVER", os.environ.get("OBP_API_SERVER", api_server)
@@ -80,8 +85,39 @@ class PerimeterExtractor:
80
85
  api_server = integrations_url.rstrip("/integrations")
81
86
 
82
87
  if api_server is None:
88
+ raise OuterboundsConfigurationException("OBP_API_SERVER")
89
+
90
+ return perimeter, api_server
91
+
92
+ @classmethod
93
+ def config_during_programmatic_access(cls) -> dict:
94
+ #!HACK: Resolving remote configs is a PITA (all the variable piping we need to do via metaflow)
95
+ # So instead we will just derive the URL. We are in this situation because its a pain
96
+ # to load configurations at arbitrary points in the runtime.
97
+ if cls.config is not None:
98
+ return json.loads(json.dumps(cls.config)) # Return fresh copy
99
+ from metaflow.metaflow_config import SERVICE_HEADERS
100
+
101
+ perimeter, api_server = cls.during_programmatic_access()
102
+ response = safe_requests_wrapper(
103
+ requests.get,
104
+ f"{api_server}/v1/perimeters/{perimeter}/metaflowconfigs/default",
105
+ headers=SERVICE_HEADERS,
106
+ )
107
+ if response.status_code >= 400:
83
108
  raise RuntimeError(
84
- "API server not found in metaflow config or environment variables"
109
+ f"Server error: {response.text}. Please reach out to your Outerbounds support team."
85
110
  )
111
+ try:
112
+ remote_config = response.json()
86
113
 
87
- return perimeter, api_server
114
+ if not remote_config.get("config"):
115
+ raise json.JSONDecodeError
116
+ except json.JSONDecodeError:
117
+ raise RuntimeError(
118
+ "Exception retrieving remote outerbounds configuration. "
119
+ "Please reach out to Outerbounds suport team with this stack trace."
120
+ )
121
+
122
+ cls.config = remote_config.get("config")
123
+ return json.loads(json.dumps(cls.config)) # Return fresh copy
@@ -25,10 +25,29 @@ class assume_role(FlowMutator):
25
25
  def end(self):
26
26
  from metaflow import get_aws_client
27
27
  client = get_aws_client("dynamodb") # Automatically uses the role in the flow decorator
28
+
29
+ You can also filter which steps should use the role:
30
+ @assume_role(role_arn="arn:aws:iam::123456789012:role/my-iam-role", steps=["start", "process"])
31
+ class MyFlow(FlowSpec):
32
+ @step
33
+ def start(self):
34
+ # user code in this step will use the assumed role
35
+ pass
36
+
37
+ @step
38
+ def process(self):
39
+ # user code in this step will use the assumed role
40
+ pass
41
+
42
+ @step
43
+ def end(self):
44
+ # user code in this step will NOT use the assumed role
45
+ pass
28
46
  """
29
47
 
30
48
  def init(self, *args, **kwargs):
31
49
  self.role_arn = kwargs.get("role_arn", None)
50
+ self.steps = kwargs.get("steps", None)
32
51
 
33
52
  if self.role_arn is None:
34
53
  raise ValueError(
@@ -40,6 +59,13 @@ class assume_role(FlowMutator):
40
59
  "`role_arn` must be a valid AWS IAM role ARN starting with 'arn:aws:iam::'"
41
60
  )
42
61
 
62
+ # Validate steps parameter
63
+ if self.steps is not None:
64
+ if not isinstance(self.steps, (list, tuple)):
65
+ raise ValueError("`steps` must be a list or tuple of step names")
66
+ if not all(isinstance(s, str) for s in self.steps):
67
+ raise ValueError("All step names in `steps` must be strings")
68
+
43
69
  def pre_mutate(self, mutable_flow: MutableFlow) -> None:
44
70
  """
45
71
  This method is called by Metaflow to apply the decorator to the flow.
@@ -49,6 +75,18 @@ class assume_role(FlowMutator):
49
75
  # Import environment decorator at runtime to avoid circular imports
50
76
  from metaflow import environment
51
77
 
78
+ # Validate that all specified steps exist in the flow
79
+ if self.steps is not None:
80
+ flow_step_names = {step_name for step_name, _ in mutable_flow.steps}
81
+ specified_steps = set(self.steps)
82
+ missing_steps = specified_steps - flow_step_names
83
+
84
+ if missing_steps:
85
+ raise ValueError(
86
+ f"Step(s) {sorted(missing_steps)} specified in `steps` parameter "
87
+ f"do not exist in the flow. Available steps: {sorted(flow_step_names)}"
88
+ )
89
+
52
90
  def _swap_environment_variables(step: MutableStep, role_arn: str) -> None:
53
91
  _step_has_env_set = True
54
92
  _env_kwargs = {OBP_ASSUME_ROLE_ARN_ENV_VAR: role_arn}
@@ -73,6 +111,8 @@ class assume_role(FlowMutator):
73
111
  def _setup_role_assumption(step: MutableStep) -> None:
74
112
  _swap_environment_variables(step, self.role_arn)
75
113
 
76
- # Apply the role assumption setup to all steps in the flow
77
- for _, step in mutable_flow.steps:
78
- _setup_role_assumption(step)
114
+ # Apply the role assumption setup to all steps in the flow (or filtered steps)
115
+ for step_name, step in mutable_flow.steps:
116
+ # If steps filter is specified, only apply to those steps
117
+ if self.steps is None or step_name in self.steps:
118
+ _setup_role_assumption(step)
@@ -27,6 +27,7 @@ def bake_image(
27
27
  conda_packages: Optional[Dict[str, str]] = None,
28
28
  base_image: Optional[str] = None,
29
29
  logger: Optional[Callable[[str], Any]] = None,
30
+ fast_bakery_url: Optional[str] = None,
30
31
  ) -> FastBakeryApiResponse:
31
32
  """
32
33
  Bakes a Docker image with the specified dependencies.
@@ -39,6 +40,7 @@ def bake_image(
39
40
  conda_packages: Dictionary of Conda packages and versions
40
41
  base_image: Base Docker image to use
41
42
  logger: Optional logger function to output progress
43
+ fast_bakery_url: Optional FB URL
42
44
 
43
45
  Returns:
44
46
  FastBakeryApiResponse: The response from the bakery service
@@ -50,6 +52,13 @@ def bake_image(
50
52
  if logger is None:
51
53
  logger = partial(print, file=sys.stderr)
52
54
 
55
+ if all([fast_bakery_url is None and FAST_BAKERY_URL is None]):
56
+ raise BakerException(
57
+ "Image Bakery endpoint missing. METAFLOW_FAST_BAKERY_URL environment/configuration variable not found."
58
+ )
59
+
60
+ fast_bakery_url = fast_bakery_url or FAST_BAKERY_URL
61
+
53
62
  # Thread lock for logging
54
63
  logger_lock = threading.Lock()
55
64
  images_baked = 0
@@ -63,7 +72,7 @@ def bake_image(
63
72
  base_image=None,
64
73
  ):
65
74
  try:
66
- bakery = FastBakery(url=FAST_BAKERY_URL)
75
+ bakery = FastBakery(url=fast_bakery_url)
67
76
  bakery._reset_payload()
68
77
  bakery.python_version(python)
69
78
  bakery.pypi_packages(pypi_packages)
@@ -19,7 +19,8 @@ def get_deployment_db_access_endpoint(name: str):
19
19
  from ..apps.core.perimeters import PerimeterExtractor
20
20
  from ..apps.core.capsule import CapsuleApi
21
21
 
22
- perimeter, cap_url = PerimeterExtractor.during_metaflow_execution()
22
+ perimeter, cap_url = PerimeterExtractor.during_programmatic_access()
23
+ # TODO: Find more natural way to get rid of CapsuleApi here.
23
24
  deployment = CapsuleApi(cap_url, perimeter).get_by_name(name)
24
25
  if not deployment:
25
26
  raise Exception(f"No app deployment found with name `{name}`")
@@ -83,22 +83,32 @@ def get_snowflake_token(user: str = "", role: str = "", integration: str = "") -
83
83
  return response.json()["token"]
84
84
 
85
85
 
86
- def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
86
+ def get_oauth_connection_params(
87
+ user: str = "", role: str = "", integration: str = "", **kwargs
88
+ ) -> Dict:
87
89
  """
88
- Connect to snowflake using the token minted by Outerbounds
90
+ Get OAuth connection parameters for Snowflake authentication using Outerbounds integration.
91
+
92
+ This is a helper function that returns connection parameters dict that can be used
93
+ with both snowflake-connector-python and snowflake-snowpark-python.
94
+
89
95
  user: str
90
96
  The user name used to authenticate with snowflake
91
97
  role: str
92
- The role to request when connect with snowflake
98
+ The role to request when connecting with snowflake
93
99
  integration: str
94
- The name of the snowflake integration to use. If not set, an existing integration will be used provided that only one exists in the current perimeter. If integration is not set and more than one exists in the current perimeter, then we raise an exception.
100
+ The name of the snowflake integration to use. If not set, an existing integration
101
+ will be used provided that only one exists in the current perimeter.
95
102
  kwargs: dict
96
- Additional arguments to pass to the python snowflake connector
103
+ Additional arguments to include in the connection parameters
104
+
105
+ Returns:
106
+ Dict with connection parameters including OAuth token
97
107
  """
98
108
  # ensure password is not set
99
109
  if "password" in kwargs:
100
110
  raise OuterboundsSnowflakeConnectorException(
101
- "Password should not be set when using Outerbounds snowflake connector."
111
+ "Password should not be set when using Outerbounds OAuth authentication."
102
112
  )
103
113
 
104
114
  provisioner = SnowflakeIntegrationProvisioner(integration)
@@ -137,11 +147,31 @@ def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
137
147
  kwargs["role"] = role
138
148
  kwargs["user"] = user
139
149
 
150
+ return kwargs
151
+
152
+
153
+ def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
154
+ """
155
+ Connect to snowflake using the token minted by Outerbounds
156
+ user: str
157
+ The user name used to authenticate with snowflake
158
+ role: str
159
+ The role to request when connect with snowflake
160
+ integration: str
161
+ The name of the snowflake integration to use. If not set, an existing integration will be used provided that only one exists in the current perimeter. If integration is not set and more than one exists in the current perimeter, then we raise an exception.
162
+ kwargs: dict
163
+ Additional arguments to pass to the python snowflake connector
164
+ """
165
+ # Get OAuth connection params using the helper
166
+ connection_params = get_oauth_connection_params(
167
+ user=user, role=role, integration=integration, **kwargs
168
+ )
169
+
140
170
  # connect to snowflake
141
171
  try:
142
172
  from snowflake.connector import connect
143
173
 
144
- cn = connect(**kwargs)
174
+ cn = connect(**connection_params)
145
175
  return cn
146
176
  except ImportError as ie:
147
177
  raise OuterboundsSnowflakeConnectorException(
@@ -1,9 +1,11 @@
1
1
  import os
2
+ import re
2
3
  import shlex
3
4
  import atexit
4
5
  import json
5
6
  import math
6
7
  import time
8
+ import hashlib
7
9
 
8
10
  from metaflow import util
9
11
 
@@ -57,21 +59,29 @@ class Snowpark(object):
57
59
  atexit.register(lambda: self.job.kill() if hasattr(self, "job") else None)
58
60
 
59
61
  def _job_name(self, user, flow_name, run_id, step_name, task_id, retry_count):
60
- return "{user}-{flow_name}-{run_id}-{step_name}-{task_id}-{retry_count}".format(
61
- user=user,
62
- flow_name=flow_name,
63
- run_id=str(run_id) if run_id is not None else "",
64
- step_name=step_name,
65
- task_id=str(task_id) if task_id is not None else "",
66
- retry_count=str(retry_count) if retry_count is not None else "",
62
+ unique_str = (
63
+ "{user}-{flow_name}-{run_id}-{step_name}-{task_id}-{retry_count}".format(
64
+ user=user,
65
+ flow_name=flow_name,
66
+ run_id=str(run_id) if run_id is not None else "",
67
+ step_name=step_name,
68
+ task_id=str(task_id) if task_id is not None else "",
69
+ retry_count=str(retry_count) if retry_count is not None else "",
70
+ )
67
71
  )
72
+ unique_hash = hashlib.md5(unique_str.encode("utf-8")).hexdigest()[:8]
73
+ raw_prefix = f"{flow_name}-{step_name}"
74
+ safe_prefix = re.sub(r"[^a-z0-9]", "-", raw_prefix.lower())
75
+ safe_prefix = safe_prefix[:54]
76
+ safe_prefix = safe_prefix.lstrip("-")
77
+ return f"{safe_prefix}-{unique_hash}"
68
78
 
69
79
  def _command(self, environment, code_package_url, step_name, step_cmds, task_spec):
70
80
  mflog_expr = export_mflog_env_vars(
71
81
  datastore_type=self.datastore.TYPE,
72
82
  stdout_path=STDOUT_PATH,
73
83
  stderr_path=STDERR_PATH,
74
- **task_spec
84
+ **task_spec,
75
85
  )
76
86
  init_cmds = environment.get_package_commands(
77
87
  code_package_url, self.datastore.TYPE