ob-metaflow-extensions 1.1.151__py2.py3-none-any.whl → 1.6.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. metaflow_extensions/outerbounds/__init__.py +1 -1
  2. metaflow_extensions/outerbounds/plugins/__init__.py +24 -3
  3. metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
  4. metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +16 -0
  5. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
  6. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
  7. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
  8. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
  9. metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
  10. metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +333 -0
  11. metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
  12. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +1029 -0
  13. metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
  14. metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
  15. metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
  16. metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
  17. metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
  18. metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
  19. metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
  20. metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
  21. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
  22. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
  23. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
  24. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
  25. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
  26. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +1300 -0
  27. metaflow_extensions/outerbounds/plugins/apps/core/exceptions.py +341 -0
  28. metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
  29. metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +123 -0
  30. metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
  31. metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
  32. metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
  33. metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
  34. metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
  35. metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
  36. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
  37. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
  38. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
  39. metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +119 -0
  40. metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
  41. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
  42. metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
  43. metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
  44. metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
  45. metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
  46. metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
  47. metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
  48. metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
  49. metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +32 -8
  50. metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +1 -1
  51. metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
  52. metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
  53. metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
  54. metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
  55. metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
  56. metaflow_extensions/outerbounds/plugins/optuna/__init__.py +49 -0
  57. metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
  58. metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  59. metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
  60. metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
  61. metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  62. metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
  63. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  64. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
  65. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
  66. metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +37 -7
  67. metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +18 -8
  68. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +6 -0
  69. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +45 -18
  70. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +18 -9
  71. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +10 -4
  72. metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
  73. metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
  74. metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
  75. metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
  76. metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
  77. metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
  78. metaflow_extensions/outerbounds/remote_config.py +46 -9
  79. metaflow_extensions/outerbounds/toplevel/apps/__init__.py +9 -0
  80. metaflow_extensions/outerbounds/toplevel/apps/exceptions.py +11 -0
  81. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +86 -2
  82. metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
  83. metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
  84. metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
  85. metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
  86. metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  87. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/METADATA +2 -2
  88. ob_metaflow_extensions-1.6.2.dist-info/RECORD +136 -0
  89. metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
  90. ob_metaflow_extensions-1.1.151.dist-info/RECORD +0 -74
  91. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/WHEEL +0 -0
  92. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,93 @@
1
+ import json
2
+ import time
3
+ from typing import Dict, Optional
4
+
5
+ from .exceptions import S3ProxyConfigException, S3ProxyApiException
6
+
7
+
8
+ class S3ProxyConfigResponse:
9
+ def __init__(self, data: Dict):
10
+ self.bucket_name = data.get("bucket_name")
11
+ self.endpoint_url = data.get("endpoint_url")
12
+ self.access_key_id = data.get("access_key_id")
13
+ self.secret_access_key = data.get("secret_access_key")
14
+ self.region = data.get("region")
15
+
16
+
17
+ class S3ProxyApiClient:
18
+ def __init__(self):
19
+ self.perimeter, self.integrations_url = self._get_api_configs()
20
+
21
+ def _get_api_configs(self):
22
+ from metaflow_extensions.outerbounds.remote_config import init_config
23
+ from os import environ
24
+
25
+ conf = init_config()
26
+ perimeter = conf.get("OBP_PERIMETER") or environ.get("OBP_PERIMETER", "")
27
+ integrations_url = conf.get("OBP_INTEGRATIONS_URL") or environ.get(
28
+ "OBP_INTEGRATIONS_URL", ""
29
+ )
30
+
31
+ if not perimeter:
32
+ raise S3ProxyConfigException(
33
+ "No perimeter set. Please run `outerbounds configure` command."
34
+ )
35
+
36
+ if not integrations_url:
37
+ raise S3ProxyConfigException(
38
+ "No integrations URL set. Please contact your Outerbounds support team."
39
+ )
40
+
41
+ return perimeter, integrations_url
42
+
43
+ def fetch_s3_proxy_config(
44
+ self, integration_name: Optional[str] = None
45
+ ) -> S3ProxyConfigResponse:
46
+ url = f"{self.integrations_url}/s3proxy"
47
+
48
+ payload = {"perimeter_name": self.perimeter}
49
+ if integration_name:
50
+ payload["integration_name"] = integration_name
51
+
52
+ headers = {"Content-Type": "application/json"}
53
+
54
+ try:
55
+ from metaflow.metaflow_config import SERVICE_HEADERS
56
+
57
+ headers.update(SERVICE_HEADERS or {})
58
+ except ImportError:
59
+ pass
60
+
61
+ response = self._make_request(url, headers, payload)
62
+ return S3ProxyConfigResponse(response)
63
+
64
+ def _make_request(self, url: str, headers: Dict, payload: Dict) -> Dict:
65
+ from metaflow_extensions.outerbounds.plugins.secrets.secrets import (
66
+ _api_server_get,
67
+ )
68
+
69
+ retryable_status_codes = [409]
70
+ json_payload = json.dumps(payload)
71
+
72
+ for attempt in range(3):
73
+ response = _api_server_get(
74
+ url, data=json_payload, headers=headers, conn_error_retries=5
75
+ )
76
+
77
+ if response.status_code not in retryable_status_codes:
78
+ break
79
+
80
+ if attempt < 2:
81
+ time.sleep(0.5 * (attempt + 1))
82
+
83
+ if response.status_code != 200:
84
+ error_msg = f"API request failed with status {response.status_code}"
85
+ try:
86
+ error_data = response.json()
87
+ if "message" in error_data:
88
+ error_msg = error_data["message"]
89
+ except:
90
+ pass
91
+ raise S3ProxyApiException(error_msg)
92
+
93
+ return response.json()
@@ -0,0 +1,250 @@
1
+ import os
2
+ import functools
3
+ import json
4
+ import signal
5
+ import time
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ from metaflow import current
9
+ from metaflow.decorators import StepDecorator
10
+ from .exceptions import S3ProxyException
11
+ from .constants import S3_PROXY_WRITE_MODES
12
+ from collections import namedtuple
13
+
14
+ S3ProxyBinaryConfig = namedtuple(
15
+ "S3ProxyBinaryConfig", ["integration_name", "write_mode", "debug"]
16
+ )
17
+
18
+
19
+ def monkey_patch_environment(
20
+ environment, step_name_and_deco_attrs: Dict[str, S3ProxyBinaryConfig]
21
+ ):
22
+ wrapping_func = environment.bootstrap_commands
23
+
24
+ @functools.wraps(wrapping_func)
25
+ def wrapper(step_name, ds_type, *args):
26
+ base_boostrap_cmd = wrapping_func(step_name, ds_type, *args)
27
+ additional_cmd = []
28
+
29
+ if step_name in step_name_and_deco_attrs:
30
+ integration_name = step_name_and_deco_attrs[step_name].integration_name
31
+ write_mode = step_name_and_deco_attrs[step_name].write_mode
32
+ debug = step_name_and_deco_attrs[step_name].debug
33
+ additional_cmd = [
34
+ "echo 'Setting up the S3 proxy.'",
35
+ f"python -m metaflow_extensions.outerbounds.plugins.s3_proxy.proxy_bootstrap bootstrap --integration-name {integration_name} --write-mode {write_mode} --debug {debug} --uc-proxy-cfg-write-path ./.uc_proxy_cfg_file --proxy-status-write-path ./.proxy_status_file",
36
+ "export METAFLOW_S3_PROXY_USER_CODE_CONFIG=$(cat ./.uc_proxy_cfg_file)",
37
+ "export METAFLOW_S3_PROXY_STATUS=$(cat ./.proxy_status_file)",
38
+ "export METAFLOW_S3_PROXY_SETUP_SUCCESS=True",
39
+ "flush_mflogs",
40
+ ]
41
+ return base_boostrap_cmd + additional_cmd
42
+
43
+ environment.bootstrap_commands = wrapper
44
+
45
+
46
+ class S3ProxyDecorator(StepDecorator):
47
+ """
48
+ Set up an S3 proxy that caches objects in an external, S3‑compatible bucket
49
+ for S3 read and write requests.
50
+
51
+ This decorator requires an integration in the Outerbounds platform that
52
+ points to an external bucket. It affects S3 operations performed via
53
+ Metaflow's `get_aws_client` and `S3` within a `@step`.
54
+
55
+ Read operations
56
+ ---------------
57
+ All read operations pass through the proxy. If an object does not already
58
+ exist in the external bucket, it is cached there. For example, if code reads
59
+ from buckets `FOO` and `BAR` using the `S3` interface, objects from both
60
+ buckets are cached in the external bucket.
61
+
62
+ During task execution, all S3‑related read requests are routed through the
63
+ proxy:
64
+ - If the object is present in the external object store, the proxy
65
+ streams it directly from there without accessing the requested origin
66
+ bucket.
67
+ - If the object is not present in the external storage, the proxy
68
+ fetches it from the requested bucket, caches it in the external
69
+ storage, and streams the response from the origin bucket.
70
+
71
+ Warning
72
+ -------
73
+ All READ operations (e.g., GetObject, HeadObject) pass through the external
74
+ bucket regardless of the bucket specified in user code. Even
75
+ `S3(run=self)` and `S3(s3root="mybucketfoo")` requests go through the
76
+ external bucket cache.
77
+
78
+ Write operations
79
+ ----------------
80
+ Write behavior is controlled by the `write_mode` parameter, which determines
81
+ whether writes also persist objects in the cache.
82
+
83
+ `write_mode` values:
84
+ - `origin-and-cache`: objects are written both to the cache and to their
85
+ intended origin bucket.
86
+ - `origin`: objects are written only to their intended origin bucket.
87
+
88
+ Parameters
89
+ ----------
90
+ integration_name : str, optional
91
+ [Outerbounds integration name](https://docs.outerbounds.com/outerbounds/configuring-secrets/#integrations-view)
92
+ that holds the configuration for the external, S3‑compatible object
93
+ storage bucket. If not specified, the only available S3 proxy
94
+ integration in the namespace is used (fails if multiple exist).
95
+ write_mode : str, optional
96
+ Controls whether writes also go to the external bucket.
97
+ - `origin` (default)
98
+ - `origin-and-cache`
99
+ debug : bool, optional
100
+ Enables debug logging for proxy operations.
101
+ """
102
+
103
+ name = "s3_proxy"
104
+ defaults = {
105
+ "integration_name": None,
106
+ "write_mode": None,
107
+ "debug": False,
108
+ }
109
+
110
+ _environment_patched = False
111
+
112
+ _proxy_status = None
113
+
114
+ @classmethod
115
+ def patch_environment(cls, flow, environment):
116
+ """
117
+ We need to patch the environment boostrap command so that
118
+ we can launch the s3 proxy before the step code execution.
119
+ We also want to ensure that we are running the proxy bootstrap
120
+ only for the steps that have the decorator set. This is why we pass down all
121
+ the step names that will change the boostrap commands.
122
+ """
123
+ if cls._environment_patched:
124
+ return
125
+
126
+ steps_with_s3_proxy = [
127
+ step
128
+ for step in flow
129
+ if any(deco.name == "s3_proxy" for deco in step.decorators)
130
+ ]
131
+ if len(steps_with_s3_proxy) == 0: # weird but y not?
132
+ return
133
+
134
+ step_names_and_deco_attrs = {}
135
+ for s in steps_with_s3_proxy:
136
+ _decos = [x for x in s.decorators if x.name == "s3_proxy"]
137
+ deco = _decos[0]
138
+ step_names_and_deco_attrs[s.name] = S3ProxyBinaryConfig(
139
+ integration_name=deco.attributes["integration_name"],
140
+ write_mode=deco.attributes["write_mode"],
141
+ debug=deco.attributes["debug"],
142
+ )
143
+
144
+ monkey_patch_environment(environment, step_names_and_deco_attrs)
145
+ cls._environment_patched = True
146
+
147
+ def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
148
+ write_mode = self.attributes["write_mode"]
149
+ if write_mode and write_mode not in S3_PROXY_WRITE_MODES:
150
+ raise S3ProxyException(
151
+ f"unexpected write_mode specified: {write_mode}. Allowed values are: {','.join(S3_PROXY_WRITE_MODES)}."
152
+ )
153
+
154
+ self.patch_environment(flow, environment)
155
+ if (
156
+ os.environ.get("METAFLOW_S3_PROXY_USER_CODE_CONFIG")
157
+ and os.environ.get("METAFLOW_S3_PROXY_STATUS")
158
+ and self.attributes["debug"]
159
+ ):
160
+ print("[@s3_proxy] S3 Proxy detected. Debug mode is enabled.")
161
+
162
+ if os.environ.get("METAFLOW_S3_PROXY_STATUS"):
163
+ proxy_status = json.loads(os.environ.get("METAFLOW_S3_PROXY_STATUS"))
164
+ self._proxy_status = proxy_status
165
+
166
+ def task_pre_step(
167
+ self,
168
+ step_name,
169
+ task_datastore,
170
+ metadata,
171
+ run_id,
172
+ task_id,
173
+ flow,
174
+ graph,
175
+ retry_count,
176
+ max_user_code_retries,
177
+ ubf_context,
178
+ inputs,
179
+ ):
180
+ """Setup S3 proxy before step execution"""
181
+ pass
182
+
183
+ def task_finished(
184
+ self, step_name, flow, graph, is_task_ok, retry_count, max_retries
185
+ ):
186
+ if not self._proxy_status:
187
+ return
188
+
189
+ status = self._proxy_status
190
+ proxy_pid = status.get("proxy_pid")
191
+ config_path = status.get("config_path")
192
+ binary_path = status.get("binary_path")
193
+
194
+ # 1) Stop processes: try to terminate the process group for clean child shutdown
195
+ if proxy_pid:
196
+ try:
197
+ pgid = os.getpgid(proxy_pid)
198
+ os.killpg(pgid, signal.SIGTERM)
199
+ time.sleep(1)
200
+ except Exception:
201
+ # Fall back to killing the pid directly if pgid is unavailable
202
+ try:
203
+ os.kill(proxy_pid, signal.SIGTERM)
204
+ except Exception:
205
+ pass
206
+
207
+ # 2) Clear files based on status
208
+ for path in (config_path, binary_path):
209
+ try:
210
+ if path and os.path.exists(path):
211
+ os.remove(path)
212
+ except Exception:
213
+ pass
214
+
215
+
216
+ class NebiusS3ProxyDecorator(S3ProxyDecorator):
217
+
218
+ __doc__ = (
219
+ """
220
+ `@nebius_s3_proxy` is a Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
221
+ It exists to make it easier for users to know that this decorator should only be used with
222
+ a Neo Cloud like Nebius. The underlying mechanics of the decorator is the same as the `@s3_proxy`:\n
223
+ """
224
+ + S3ProxyDecorator.__doc__
225
+ )
226
+
227
+ name = "nebius_s3_proxy"
228
+ defaults = {
229
+ "integration_name": None,
230
+ "write_mode": None,
231
+ "debug": False,
232
+ }
233
+
234
+
235
+ class CoreWeaveS3ProxyDecorator(S3ProxyDecorator):
236
+ __doc__ = (
237
+ """
238
+ `@coreweave_s3_proxy` is a CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
239
+ It exists to make it easier for users to know that this decorator should only be used with
240
+ a Neo Cloud like CoreWeave. The underlying mechanics of the decorator is the same as the `@s3_proxy`:\n
241
+ """
242
+ + S3ProxyDecorator.__doc__
243
+ )
244
+
245
+ name = "coreweave_s3_proxy"
246
+ defaults = {
247
+ "integration_name": None,
248
+ "write_mode": None,
249
+ "debug": False,
250
+ }
@@ -0,0 +1,225 @@
1
+ import os
2
+ import json
3
+ import gzip
4
+ import sys
5
+ import time
6
+ import threading
7
+ import subprocess
8
+ from pathlib import Path
9
+ from typing import Optional, Tuple
10
+
11
+ import requests
12
+
13
+ from .constants import (
14
+ S3_PROXY_BINARY_URLS,
15
+ DEFAULT_PROXY_PORT,
16
+ DEFAULT_PROXY_HOST,
17
+ )
18
+ from metaflow.metaflow_config import AWS_SECRETS_MANAGER_DEFAULT_REGION
19
+ from .s3_proxy_api import S3ProxyApiClient
20
+ from .exceptions import S3ProxyException
21
+
22
+
23
+ class S3ProxyManager:
24
+ def __init__(
25
+ self,
26
+ integration_name: Optional[str] = None,
27
+ write_mode: Optional[str] = None,
28
+ debug: bool = False,
29
+ ):
30
+ self.integration_name = integration_name
31
+ self.write_mode = write_mode
32
+ self.debug = debug
33
+ self.process = None
34
+ self.binary_path = None
35
+ self.config_path = None
36
+ self.api_client = S3ProxyApiClient()
37
+ self.proxy_config = None
38
+
39
+ def setup_proxy(self) -> Tuple[dict, int, str, str]:
40
+ try:
41
+ if self._is_running_in_kubernetes():
42
+ config_data = self.api_client.fetch_s3_proxy_config(
43
+ self.integration_name
44
+ )
45
+ self.binary_path = self._download_binary()
46
+ self.config_path = self._write_config_file(config_data)
47
+ # In the new world where the binary is being called
48
+ # before even the metaflow code exection starts,
49
+ # so this implies a few important things:
50
+ # 1, We start the actual proxy process via another python file that safely ships logs to mflog.
51
+ # 2. We passback the right values to the metaflow step process via env vars.
52
+ # 3. Metaflow step code relies on env vars to decide if clients need to have s3 proxy in them.
53
+ self.process = self._start_proxy_process()
54
+
55
+ user_code_proxy_config = self._setup_proxy_config(config_data)
56
+
57
+ return_tuple = (
58
+ user_code_proxy_config, # this is the config that will be used within the metaflow `step` code.
59
+ self.process.pid, # This is the pid of the process that will jumpstart, monitor and ship logs to MFLOG for the proxy process
60
+ self.config_path, # This is the path to the config that is derived from the integration. It contains the actual bucket path and name where external objects are stored.
61
+ self.binary_path, # This is the path to the binary for the proxy.
62
+ )
63
+ # We return a tuple because these values need to be passed down to the metaflow step process where
64
+ # it will handle thier removal gracefully after the step is finished.
65
+ return return_tuple
66
+
67
+ print(
68
+ "[@s3_proxy] skipping s3-proxy set up because metaflow has not detected a Kubernetes environment"
69
+ )
70
+ raise S3ProxyException(
71
+ "S3 proxy setup failed because metaflow has not detected a Kubernetes environment"
72
+ )
73
+ except Exception as e:
74
+ if self.debug:
75
+ print(f"[@s3_proxy] Setup failed: {e}")
76
+ self.cleanup()
77
+ raise
78
+
79
+ def _is_running_in_kubernetes(self) -> bool:
80
+ """Check if running inside a Kubernetes pod by checking for Kubernetes service account token."""
81
+ return (
82
+ os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount/token")
83
+ and os.environ.get("KUBERNETES_SERVICE_HOST") is not None
84
+ )
85
+
86
+ def _download_binary(self) -> str:
87
+ binary_path = Path("/tmp/s3-proxy")
88
+ if binary_path.exists():
89
+ if self.debug:
90
+ print("[@s3_proxy] Binary already exists, skipping download")
91
+ return str(binary_path.absolute())
92
+
93
+ try:
94
+ if self.debug:
95
+ print("[@s3_proxy] Downloading binary...")
96
+
97
+ from platform import machine
98
+
99
+ arch = machine()
100
+ if arch not in S3_PROXY_BINARY_URLS:
101
+ raise S3ProxyException(
102
+ f"unsupported platform architecture: {arch}. Please reach out to your Outerbounds Support team for more help."
103
+ )
104
+
105
+ response = requests.get(S3_PROXY_BINARY_URLS[arch], stream=True, timeout=60)
106
+ response.raise_for_status()
107
+
108
+ with open(binary_path, "wb") as f:
109
+ with gzip.GzipFile(fileobj=response.raw) as gz:
110
+ f.write(gz.read())
111
+
112
+ binary_path.chmod(0o755)
113
+
114
+ if self.debug:
115
+ print("[@s3_proxy] Binary downloaded successfully")
116
+
117
+ return str(binary_path.absolute())
118
+
119
+ except Exception as e:
120
+ if self.debug:
121
+ print(f"[@s3_proxy] Binary download failed: {e}")
122
+ raise S3ProxyException(f"Failed to download S3 proxy binary: {e}")
123
+
124
+ def _write_config_file(self, config_data) -> str:
125
+ config_path = Path("/tmp/s3-proxy-config.json")
126
+
127
+ proxy_config = {
128
+ "bucketName": config_data.bucket_name,
129
+ "endpointUrl": config_data.endpoint_url,
130
+ "accessKeyId": config_data.access_key_id,
131
+ "accessKeySecret": config_data.secret_access_key,
132
+ "region": config_data.region,
133
+ }
134
+
135
+ config_path.write_text(json.dumps(proxy_config, indent=2))
136
+
137
+ if self.debug:
138
+ print(f"[@s3_proxy] Config written to {config_path}")
139
+
140
+ return str(config_path.absolute())
141
+
142
+ def _start_proxy_process(self) -> subprocess.Popen:
143
+ # This command will jump start a process that will then call the proxy binary
144
+ # The reason we do something like this is because we need to run all of this before
145
+ # even the `step` command is called. So we need a python process that will ship the logs
146
+ # of the proxy process to MFLOG instead of setting print statements. We need this process
147
+ # to run independently since the S3ProxyManager gets called in the boostrap_proxy which will
148
+ # exit after jump starting the proxy process.
149
+ cmd = [self.binary_path, "--bucket-config", self.config_path, "serve"]
150
+ _env = os.environ.copy()
151
+ _env["S3_PROXY_BINARY_COMMAND"] = " ".join(cmd)
152
+ if self.debug:
153
+ _env["S3_PROXY_BINARY_DEBUG"] = "True"
154
+ _cmd = [
155
+ sys.executable,
156
+ "-m",
157
+ "metaflow_extensions.outerbounds.plugins.s3_proxy.binary_caller",
158
+ ]
159
+ devnull = subprocess.DEVNULL
160
+ process = subprocess.Popen(
161
+ _cmd,
162
+ stdout=devnull,
163
+ stderr=devnull,
164
+ text=True,
165
+ start_new_session=True,
166
+ env=_env,
167
+ )
168
+ time.sleep(3)
169
+
170
+ if process.poll() is None:
171
+ if self.debug:
172
+ print(f"[@s3_proxy] Proxy started successfully (pid: {process.pid})")
173
+
174
+ return process
175
+ else:
176
+ stdout_data, stderr_data = process.communicate()
177
+ if self.debug:
178
+ print(f"[@s3_proxy] Proxy failed to start - output: {stdout_data}")
179
+ raise S3ProxyException(f"S3 proxy failed to start: {stdout_data}")
180
+
181
+ def _setup_proxy_config(self, config_data):
182
+ from metaflow.metaflow_config import AWS_SECRETS_MANAGER_DEFAULT_REGION
183
+
184
+ region = os.environ.get(
185
+ "METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION",
186
+ AWS_SECRETS_MANAGER_DEFAULT_REGION,
187
+ )
188
+
189
+ proxy_config = {
190
+ "endpoint_url": f"http://{DEFAULT_PROXY_HOST}:{DEFAULT_PROXY_PORT}",
191
+ "region": region,
192
+ "bucket_name": config_data.bucket_name,
193
+ "active": True,
194
+ }
195
+
196
+ if self.write_mode:
197
+ proxy_config["write_mode"] = self.write_mode
198
+
199
+ self.proxy_config = proxy_config
200
+ return proxy_config
201
+
202
+ def cleanup(self):
203
+ try:
204
+ from metaflow_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import (
205
+ clear_s3_proxy_config,
206
+ )
207
+
208
+ clear_s3_proxy_config()
209
+
210
+ if self.process and self.process.poll() is None:
211
+ self.process.terminate()
212
+ self.process.wait(timeout=5)
213
+ if self.debug:
214
+ print("[@s3_proxy] Proxy process stopped")
215
+
216
+ from os import remove
217
+
218
+ remove(self.config_path)
219
+ remove(self.binary_path)
220
+
221
+ except Exception as e:
222
+ if self.debug:
223
+ print(f"[@s3_proxy] Cleanup error: {e}")
224
+ finally:
225
+ self.proxy_config = None
@@ -83,22 +83,32 @@ def get_snowflake_token(user: str = "", role: str = "", integration: str = "") -
83
83
  return response.json()["token"]
84
84
 
85
85
 
86
- def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
86
+ def get_oauth_connection_params(
87
+ user: str = "", role: str = "", integration: str = "", **kwargs
88
+ ) -> Dict:
87
89
  """
88
- Connect to snowflake using the token minted by Outerbounds
90
+ Get OAuth connection parameters for Snowflake authentication using Outerbounds integration.
91
+
92
+ This is a helper function that returns connection parameters dict that can be used
93
+ with both snowflake-connector-python and snowflake-snowpark-python.
94
+
89
95
  user: str
90
96
  The user name used to authenticate with snowflake
91
97
  role: str
92
- The role to request when connect with snowflake
98
+ The role to request when connecting with snowflake
93
99
  integration: str
94
- The name of the snowflake integration to use. If not set, an existing integration will be used provided that only one exists in the current perimeter. If integration is not set and more than one exists in the current perimeter, then we raise an exception.
100
+ The name of the snowflake integration to use. If not set, an existing integration
101
+ will be used provided that only one exists in the current perimeter.
95
102
  kwargs: dict
96
- Additional arguments to pass to the python snowflake connector
103
+ Additional arguments to include in the connection parameters
104
+
105
+ Returns:
106
+ Dict with connection parameters including OAuth token
97
107
  """
98
108
  # ensure password is not set
99
109
  if "password" in kwargs:
100
110
  raise OuterboundsSnowflakeConnectorException(
101
- "Password should not be set when using Outerbounds snowflake connector."
111
+ "Password should not be set when using Outerbounds OAuth authentication."
102
112
  )
103
113
 
104
114
  provisioner = SnowflakeIntegrationProvisioner(integration)
@@ -137,11 +147,31 @@ def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
137
147
  kwargs["role"] = role
138
148
  kwargs["user"] = user
139
149
 
150
+ return kwargs
151
+
152
+
153
+ def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
154
+ """
155
+ Connect to snowflake using the token minted by Outerbounds
156
+ user: str
157
+ The user name used to authenticate with snowflake
158
+ role: str
159
+ The role to request when connect with snowflake
160
+ integration: str
161
+ The name of the snowflake integration to use. If not set, an existing integration will be used provided that only one exists in the current perimeter. If integration is not set and more than one exists in the current perimeter, then we raise an exception.
162
+ kwargs: dict
163
+ Additional arguments to pass to the python snowflake connector
164
+ """
165
+ # Get OAuth connection params using the helper
166
+ connection_params = get_oauth_connection_params(
167
+ user=user, role=role, integration=integration, **kwargs
168
+ )
169
+
140
170
  # connect to snowflake
141
171
  try:
142
172
  from snowflake.connector import connect
143
173
 
144
- cn = connect(**kwargs)
174
+ cn = connect(**connection_params)
145
175
  return cn
146
176
  except ImportError as ie:
147
177
  raise OuterboundsSnowflakeConnectorException(
@@ -1,9 +1,11 @@
1
1
  import os
2
+ import re
2
3
  import shlex
3
4
  import atexit
4
5
  import json
5
6
  import math
6
7
  import time
8
+ import hashlib
7
9
 
8
10
  from metaflow import util
9
11
 
@@ -57,21 +59,29 @@ class Snowpark(object):
57
59
  atexit.register(lambda: self.job.kill() if hasattr(self, "job") else None)
58
60
 
59
61
  def _job_name(self, user, flow_name, run_id, step_name, task_id, retry_count):
60
- return "{user}-{flow_name}-{run_id}-{step_name}-{task_id}-{retry_count}".format(
61
- user=user,
62
- flow_name=flow_name,
63
- run_id=str(run_id) if run_id is not None else "",
64
- step_name=step_name,
65
- task_id=str(task_id) if task_id is not None else "",
66
- retry_count=str(retry_count) if retry_count is not None else "",
62
+ unique_str = (
63
+ "{user}-{flow_name}-{run_id}-{step_name}-{task_id}-{retry_count}".format(
64
+ user=user,
65
+ flow_name=flow_name,
66
+ run_id=str(run_id) if run_id is not None else "",
67
+ step_name=step_name,
68
+ task_id=str(task_id) if task_id is not None else "",
69
+ retry_count=str(retry_count) if retry_count is not None else "",
70
+ )
67
71
  )
72
+ unique_hash = hashlib.md5(unique_str.encode("utf-8")).hexdigest()[:8]
73
+ raw_prefix = f"{flow_name}-{step_name}"
74
+ safe_prefix = re.sub(r"[^a-z0-9]", "-", raw_prefix.lower())
75
+ safe_prefix = safe_prefix[:54]
76
+ safe_prefix = safe_prefix.lstrip("-")
77
+ return f"{safe_prefix}-{unique_hash}"
68
78
 
69
79
  def _command(self, environment, code_package_url, step_name, step_cmds, task_spec):
70
80
  mflog_expr = export_mflog_env_vars(
71
81
  datastore_type=self.datastore.TYPE,
72
82
  stdout_path=STDOUT_PATH,
73
83
  stderr_path=STDERR_PATH,
74
- **task_spec
84
+ **task_spec,
75
85
  )
76
86
  init_cmds = environment.get_package_commands(
77
87
  code_package_url, self.datastore.TYPE