ob-metaflow-extensions 1.4.17__py2.py3-none-any.whl → 1.4.18__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -0,0 +1,132 @@
1
+ import sys
2
+ import os
3
+ import subprocess
4
+ from metaflow.mflog.mflog import decorate
5
+ from metaflow.mflog import TASK_LOG_SOURCE
6
+ from typing import Union, TextIO, BinaryIO, Callable, Optional
7
+ from queue import Queue, Empty
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ import subprocess
10
+
11
+
12
+ def enqueue_output(file, queue):
13
+ for line in iter(file.readline, ""):
14
+ queue.put(line)
15
+ file.close()
16
+
17
+
18
+ def read_popen_pipes(p: subprocess.Popen):
19
+
20
+ with ThreadPoolExecutor(2) as pool:
21
+ q_stdout, q_stderr = Queue(), Queue()
22
+ pool.submit(enqueue_output, p.stdout, q_stdout)
23
+ pool.submit(enqueue_output, p.stderr, q_stderr)
24
+ while True:
25
+
26
+ if p.poll() is not None and q_stdout.empty() and q_stderr.empty():
27
+ break
28
+
29
+ out_line = err_line = ""
30
+
31
+ try:
32
+ out_line = q_stdout.get_nowait()
33
+ except Empty:
34
+ pass
35
+ try:
36
+ err_line = q_stderr.get_nowait()
37
+ except Empty:
38
+ pass
39
+
40
+ yield (out_line, err_line)
41
+
42
+
43
+ class LogBroadcaster:
44
+ def __init__(
45
+ self,
46
+ process: subprocess.Popen,
47
+ ):
48
+ self._process = process
49
+ self._file_descriptors_and_parsers = []
50
+
51
+ def add_channel(
52
+ self, file_path: str, parser: Optional[Callable[[str], str]] = None
53
+ ):
54
+ self._file_descriptors_and_parsers.append((open(file_path, "a"), parser))
55
+
56
+ def _broadcast_lines(self, out_line: str, err_line: str):
57
+ for file_descriptor, parser in self._file_descriptors_and_parsers:
58
+ if out_line != "":
59
+ if parser:
60
+ out_line = parser(out_line)
61
+ print(out_line, file=file_descriptor, end="", flush=True)
62
+ if err_line != "":
63
+ if parser:
64
+ err_line = parser(err_line)
65
+ print(err_line, file=file_descriptor, end="", flush=True)
66
+
67
+ def publish_line(self, out_line: str, err_line: str):
68
+ self._broadcast_lines(out_line, err_line)
69
+
70
+ def broadcast_logs_to_files(self):
71
+ for out_line, err_line in read_popen_pipes(self._process):
72
+ self._broadcast_lines(out_line, err_line)
73
+
74
+ self._process.wait()
75
+
76
+ for file_descriptor, _ in self._file_descriptors_and_parsers:
77
+ file_descriptor.close()
78
+
79
+
80
+ def run_with_mflog_capture(command, debug=False):
81
+ """
82
+ Run a subprocess with proper mflog integration for stdout/stderr capture.
83
+ This mimics what bash_capture_logs does but in Python.
84
+ """
85
+ # Get the log file paths from environment variables
86
+ stdout_path = os.environ.get("MFLOG_STDOUT")
87
+ stderr_path = os.environ.get("MFLOG_STDERR")
88
+
89
+ if not stdout_path or not stderr_path:
90
+ # Fallback to regular subprocess if mflog env vars aren't set
91
+ return subprocess.run(command, check=True, shell=True)
92
+
93
+ pipe = subprocess.PIPE if debug else subprocess.DEVNULL
94
+ # Start the subprocess with pipes
95
+ process = subprocess.Popen(
96
+ command,
97
+ shell=True,
98
+ stdout=pipe,
99
+ stderr=pipe,
100
+ text=False, # Use bytes for proper mflog handling
101
+ bufsize=0, # Unbuffered for real-time logging
102
+ )
103
+
104
+ broadcaster = LogBroadcaster(process)
105
+
106
+ broadcaster.add_channel(
107
+ stderr_path, lambda line: decorate(TASK_LOG_SOURCE, line).decode("utf-8")
108
+ )
109
+ broadcaster.publish_line(f"[S3 PROXY] Starting Fast S3 Proxy.....\n", "")
110
+ broadcaster.broadcast_logs_to_files()
111
+
112
+ # Check the return code and raise if non-zero
113
+ if process.returncode != 0:
114
+ raise subprocess.CalledProcessError(process.returncode, command)
115
+
116
+ return process.returncode
117
+
118
+
119
+ if __name__ == "__main__":
120
+ s3_proxy_binary_path = os.environ.get("S3_PROXY_BINARY_COMMAND")
121
+ s3_proxy_debug = bool(os.environ.get("S3_PROXY_BINARY_DEBUG", False))
122
+ if not s3_proxy_binary_path:
123
+ print("S3_PROXY_BINARY_COMMAND environment variable not set")
124
+ sys.exit(1)
125
+
126
+ try:
127
+ run_with_mflog_capture(s3_proxy_binary_path, debug=s3_proxy_debug)
128
+ except subprocess.CalledProcessError as e:
129
+ sys.exit(e.returncode)
130
+ except Exception as e:
131
+ print(f"Error running S3 proxy binary: {e}", file=sys.stderr)
132
+ sys.exit(1)
@@ -5,4 +5,7 @@ S3_PROXY_BINARY_URLS = {
5
5
 
6
6
  DEFAULT_PROXY_PORT = 8081
7
7
  DEFAULT_PROXY_HOST = "localhost"
8
- S3_PROXY_WRITE_MODES = ["origin-and-cache", "origin", "cache"]
8
+ S3_PROXY_WRITE_MODES = [
9
+ "origin-and-cache",
10
+ "origin",
11
+ ]
@@ -0,0 +1,59 @@
1
+ from .s3_proxy_manager import S3ProxyManager
2
+ from metaflow._vendor import click
3
+ from metaflow import JSONType
4
+ import json
5
+
6
+
7
+ @click.group()
8
+ def cli():
9
+ pass
10
+
11
+
12
+ @cli.command()
13
+ @click.option(
14
+ "--integration-name", type=str, help="The integration name", required=True
15
+ )
16
+ @click.option("--write-mode", type=str, help="The write mode")
17
+ @click.option("--debug", type=bool, help="The debug mode", default=False)
18
+ @click.option(
19
+ "--uc-proxy-cfg-write-path",
20
+ type=str,
21
+ help="The path to write the user code proxy config",
22
+ required=True,
23
+ )
24
+ @click.option(
25
+ "--proxy-status-write-path",
26
+ type=str,
27
+ help="The path to write the proxy status",
28
+ required=True,
29
+ )
30
+ def bootstrap(
31
+ integration_name,
32
+ write_mode,
33
+ debug,
34
+ uc_proxy_cfg_write_path,
35
+ proxy_status_write_path,
36
+ ):
37
+ manager = S3ProxyManager(
38
+ integration_name=integration_name,
39
+ write_mode=write_mode,
40
+ debug=debug,
41
+ )
42
+ user_code_proxy_config, proxy_pid, config_path, binary_path = manager.setup_proxy()
43
+ with open(uc_proxy_cfg_write_path, "w") as f:
44
+ f.write(json.dumps(user_code_proxy_config))
45
+ with open(proxy_status_write_path, "w") as f:
46
+ f.write(
47
+ json.dumps(
48
+ {
49
+ "proxy_pid": proxy_pid,
50
+ "config_path": config_path,
51
+ "binary_path": binary_path,
52
+ }
53
+ )
54
+ )
55
+
56
+
57
+ if __name__ == "__main__":
58
+ print("[@s3_proxy] Jumpstarting the proxy....")
59
+ cli()
@@ -1,32 +1,103 @@
1
+ import os
1
2
  import functools
2
- from typing import Optional
3
+ import json
4
+ import signal
5
+ import time
6
+ from typing import Dict, List, Optional, Tuple
3
7
 
4
8
  from metaflow import current
5
9
  from metaflow.decorators import StepDecorator
6
-
7
- from .s3_proxy_manager import S3ProxyManager
8
10
  from .exceptions import S3ProxyException
9
11
  from .constants import S3_PROXY_WRITE_MODES
12
+ from collections import namedtuple
13
+
14
+ S3ProxyBinaryConfig = namedtuple(
15
+ "S3ProxyBinaryConfig", ["integration_name", "write_mode", "debug"]
16
+ )
17
+
18
+
19
+ def monkey_patch_environment(
20
+ environment, step_name_and_deco_attrs: Dict[str, S3ProxyBinaryConfig]
21
+ ):
22
+ wrapping_func = environment.bootstrap_commands
23
+
24
+ @functools.wraps(wrapping_func)
25
+ def wrapper(step_name, ds_type, *args):
26
+ base_boostrap_cmd = wrapping_func(step_name, ds_type, *args)
27
+ additional_cmd = []
28
+
29
+ if step_name in step_name_and_deco_attrs:
30
+ integration_name = step_name_and_deco_attrs[step_name].integration_name
31
+ write_mode = step_name_and_deco_attrs[step_name].write_mode
32
+ debug = step_name_and_deco_attrs[step_name].debug
33
+ additional_cmd = [
34
+ "echo 'Setting up the S3 proxy.'",
35
+ f"python -m metaflow_extensions.outerbounds.plugins.s3_proxy.proxy_bootstrap bootstrap --integration-name {integration_name} --write-mode {write_mode} --debug {debug} --uc-proxy-cfg-write-path ./.uc_proxy_cfg_file --proxy-status-write-path ./.proxy_status_file",
36
+ "export METAFLOW_S3_PROXY_USER_CODE_CONFIG=$(cat ./.uc_proxy_cfg_file)",
37
+ "export METAFLOW_S3_PROXY_STATUS=$(cat ./.proxy_status_file)",
38
+ "export METAFLOW_S3_PROXY_SETUP_SUCCESS=True",
39
+ "flush_mflogs",
40
+ ]
41
+ return base_boostrap_cmd + additional_cmd
42
+
43
+ environment.bootstrap_commands = wrapper
10
44
 
11
45
 
12
46
  class S3ProxyDecorator(StepDecorator):
13
47
  """
14
- S3 Proxy decorator for routing S3 requests through a local proxy service.
48
+ Set up an S3 proxy that caches objects in an external, S3‑compatible bucket
49
+ for S3 read and write requests.
50
+
51
+ This decorator requires an integration in the Outerbounds platform that
52
+ points to an external bucket. It affects S3 operations performed via
53
+ Metaflow's `get_aws_client` and `S3` within a `@step`.
54
+
55
+ Read operations
56
+ ---------------
57
+ All read operations pass through the proxy. If an object does not already
58
+ exist in the external bucket, it is cached there. For example, if code reads
59
+ from buckets `FOO` and `BAR` using the `S3` interface, objects from both
60
+ buckets are cached in the external bucket.
61
+
62
+ During task execution, all S3‑related read requests are routed through the
63
+ proxy:
64
+ - If the object is present in the external object store, the proxy
65
+ streams it directly from there without accessing the requested origin
66
+ bucket.
67
+ - If the object is not present in the external storage, the proxy
68
+ fetches it from the requested bucket, caches it in the external
69
+ storage, and streams the response from the origin bucket.
70
+
71
+ Warning
72
+ -------
73
+ All READ operations (e.g., GetObject, HeadObject) pass through the external
74
+ bucket regardless of the bucket specified in user code. Even
75
+ `S3(run=self)` and `S3(s3root="mybucketfoo")` requests go through the
76
+ external bucket cache.
77
+
78
+ Write operations
79
+ ----------------
80
+ Write behavior is controlled by the `write_mode` parameter, which determines
81
+ whether writes also persist objects in the cache.
82
+
83
+ `write_mode` values:
84
+ - `origin-and-cache`: objects are written both to the cache and to their
85
+ intended origin bucket.
86
+ - `origin`: objects are written only to their intended origin bucket.
15
87
 
16
88
  Parameters
17
89
  ----------
18
90
  integration_name : str, optional
19
- Name of the S3 proxy integration. If not specified, will use the only
20
- available S3 proxy integration in the namespace (fails if multiple exist).
91
+ [Outerbounds integration name](https://docs.outerbounds.com/outerbounds/configuring-secrets/#integrations-view)
92
+ that holds the configuration for the external, S3‑compatible object
93
+ storage bucket. If not specified, the only available S3 proxy
94
+ integration in the namespace is used (fails if multiple exist).
21
95
  write_mode : str, optional
22
- The desired behavior during write operations to target (origin) S3 bucket.
23
- allowed options are:
24
- "origin-and-cache" -> write to both the target S3 bucket and local object
25
- storage
26
- "origin" -> only write to the target S3 bucket
27
- "cache" -> only write to the object storage service used for caching
96
+ Controls whether writes also go to the external bucket.
97
+ - `origin` (default)
98
+ - `origin-and-cache`
28
99
  debug : bool, optional
29
- Enable debug logging for proxy operations.
100
+ Enables debug logging for proxy operations.
30
101
  """
31
102
 
32
103
  name = "s3_proxy"
@@ -36,6 +107,43 @@ class S3ProxyDecorator(StepDecorator):
36
107
  "debug": False,
37
108
  }
38
109
 
110
+ _environment_patched = False
111
+
112
+ _proxy_status = None
113
+
114
+ @classmethod
115
+ def patch_environment(cls, flow, environment):
116
+ """
117
+ We need to patch the environment boostrap command so that
118
+ we can launch the s3 proxy before the step code execution.
119
+ We also want to ensure that we are running the proxy bootstrap
120
+ only for the steps that have the decorator set. This is why we pass down all
121
+ the step names that will change the boostrap commands.
122
+ """
123
+ if cls._environment_patched:
124
+ return
125
+
126
+ steps_with_s3_proxy = [
127
+ step
128
+ for step in flow
129
+ if any(deco.name == "s3_proxy" for deco in step.decorators)
130
+ ]
131
+ if len(steps_with_s3_proxy) == 0: # weird but y not?
132
+ return
133
+
134
+ step_names_and_deco_attrs = {}
135
+ for s in steps_with_s3_proxy:
136
+ _decos = [x for x in s.decorators if x.name == "s3_proxy"]
137
+ deco = _decos[0]
138
+ step_names_and_deco_attrs[s.name] = S3ProxyBinaryConfig(
139
+ integration_name=deco.attributes["integration_name"],
140
+ write_mode=deco.attributes["write_mode"],
141
+ debug=deco.attributes["debug"],
142
+ )
143
+
144
+ monkey_patch_environment(environment, step_names_and_deco_attrs)
145
+ cls._environment_patched = True
146
+
39
147
  def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
40
148
  write_mode = self.attributes["write_mode"]
41
149
  if write_mode and write_mode not in S3_PROXY_WRITE_MODES:
@@ -43,13 +151,17 @@ class S3ProxyDecorator(StepDecorator):
43
151
  f"unexpected write_mode specified: {write_mode}. Allowed values are: {','.join(S3_PROXY_WRITE_MODES)}."
44
152
  )
45
153
 
46
- self.manager = S3ProxyManager(
47
- integration_name=self.attributes["integration_name"],
48
- write_mode=self.attributes["write_mode"],
49
- debug=self.attributes["debug"],
50
- )
154
+ self.patch_environment(flow, environment)
155
+ if (
156
+ os.environ.get("METAFLOW_S3_PROXY_USER_CODE_CONFIG")
157
+ and os.environ.get("METAFLOW_S3_PROXY_STATUS")
158
+ and self.attributes["debug"]
159
+ ):
160
+ print("[@s3_proxy] S3 Proxy detected. Debug mode is enabled.")
51
161
 
52
- current._update_env({"s3_proxy": self.manager})
162
+ if os.environ.get("METAFLOW_S3_PROXY_STATUS"):
163
+ proxy_status = json.loads(os.environ.get("METAFLOW_S3_PROXY_STATUS"))
164
+ self._proxy_status = proxy_status
53
165
 
54
166
  def task_pre_step(
55
167
  self,
@@ -66,22 +178,51 @@ class S3ProxyDecorator(StepDecorator):
66
178
  inputs,
67
179
  ):
68
180
  """Setup S3 proxy before step execution"""
69
- self.manager.setup_proxy()
181
+ pass
70
182
 
71
183
  def task_finished(
72
184
  self, step_name, flow, graph, is_task_ok, retry_count, max_retries
73
185
  ):
74
- """Cleanup S3 proxy after step execution"""
75
- if self.manager:
76
- self.manager.cleanup()
186
+ if not self._proxy_status:
187
+ return
188
+
189
+ status = self._proxy_status
190
+ proxy_pid = status.get("proxy_pid")
191
+ config_path = status.get("config_path")
192
+ binary_path = status.get("binary_path")
193
+
194
+ # 1) Stop processes: try to terminate the process group for clean child shutdown
195
+ if proxy_pid:
196
+ try:
197
+ pgid = os.getpgid(proxy_pid)
198
+ os.killpg(pgid, signal.SIGTERM)
199
+ time.sleep(1)
200
+ except Exception:
201
+ # Fall back to killing the pid directly if pgid is unavailable
202
+ try:
203
+ os.kill(proxy_pid, signal.SIGTERM)
204
+ except Exception:
205
+ pass
206
+
207
+ # 2) Clear files based on status
208
+ for path in (config_path, binary_path):
209
+ try:
210
+ if path and os.path.exists(path):
211
+ os.remove(path)
212
+ except Exception:
213
+ pass
77
214
 
78
215
 
79
216
  class NebiusS3ProxyDecorator(S3ProxyDecorator):
80
- """
81
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
217
+
218
+ __doc__ = (
219
+ """
220
+ `@nebius_s3_proxy` is a Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
82
221
  It exists to make it easier for users to know that this decorator should only be used with
83
- a Neo Cloud like Nebius.
222
+ a Neo Cloud like Nebius. The underlying mechanics of the decorator is the same as the `@s3_proxy`:\n
84
223
  """
224
+ + S3ProxyDecorator.__doc__
225
+ )
85
226
 
86
227
  name = "nebius_s3_proxy"
87
228
  defaults = {
@@ -92,11 +233,14 @@ class NebiusS3ProxyDecorator(S3ProxyDecorator):
92
233
 
93
234
 
94
235
  class CoreWeaveS3ProxyDecorator(S3ProxyDecorator):
95
- """
96
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
236
+ __doc__ = (
237
+ """
238
+ `@coreweave_s3_proxy` is a CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
97
239
  It exists to make it easier for users to know that this decorator should only be used with
98
- a Neo Cloud like CoreWeave.
240
+ a Neo Cloud like CoreWeave. The underlying mechanics of the decorator is the same as the `@s3_proxy`:\n
99
241
  """
242
+ + S3ProxyDecorator.__doc__
243
+ )
100
244
 
101
245
  name = "coreweave_s3_proxy"
102
246
  defaults = {
@@ -1,11 +1,12 @@
1
1
  import os
2
2
  import json
3
3
  import gzip
4
+ import sys
4
5
  import time
5
6
  import threading
6
7
  import subprocess
7
8
  from pathlib import Path
8
- from typing import Optional
9
+ from typing import Optional, Tuple
9
10
 
10
11
  import requests
11
12
 
@@ -35,7 +36,7 @@ class S3ProxyManager:
35
36
  self.api_client = S3ProxyApiClient()
36
37
  self.proxy_config = None
37
38
 
38
- def setup_proxy(self) -> bool:
39
+ def setup_proxy(self) -> Tuple[dict, int, str, str]:
39
40
  try:
40
41
  if self._is_running_in_kubernetes():
41
42
  config_data = self.api_client.fetch_s3_proxy_config(
@@ -43,14 +44,32 @@ class S3ProxyManager:
43
44
  )
44
45
  self.binary_path = self._download_binary()
45
46
  self.config_path = self._write_config_file(config_data)
47
+ # In the new world where the binary is being called
48
+ # before even the metaflow code exection starts,
49
+ # so this implies a few important things:
50
+ # 1, We start the actual proxy process via another python file that safely ships logs to mflog.
51
+ # 2. We passback the right values to the metaflow step process via env vars.
52
+ # 3. Metaflow step code relies on env vars to decide if clients need to have s3 proxy in them.
46
53
  self.process = self._start_proxy_process()
47
- self._setup_proxy_config(config_data)
48
- return True
54
+
55
+ user_code_proxy_config = self._setup_proxy_config(config_data)
56
+
57
+ return_tuple = (
58
+ user_code_proxy_config, # this is the config that will be used within the metaflow `step` code.
59
+ self.process.pid, # This is the pid of the process that will jumpstart, monitor and ship logs to MFLOG for the proxy process
60
+ self.config_path, # This is the path to the config that is derived from the integration. It contains the actual bucket path and name where external objects are stored.
61
+ self.binary_path, # This is the path to the binary for the proxy.
62
+ )
63
+ # We return a tuple because these values need to be passed down to the metaflow step process where
64
+ # it will handle thier removal gracefully after the step is finished.
65
+ return return_tuple
49
66
 
50
67
  print(
51
68
  "[@s3_proxy] skipping s3-proxy set up because metaflow has not detected a Kubernetes environment"
52
69
  )
53
- return False
70
+ raise S3ProxyException(
71
+ "S3 proxy setup failed because metaflow has not detected a Kubernetes environment"
72
+ )
54
73
  except Exception as e:
55
74
  if self.debug:
56
75
  print(f"[@s3_proxy] Setup failed: {e}")
@@ -121,21 +140,31 @@ class S3ProxyManager:
121
140
  return str(config_path.absolute())
122
141
 
123
142
  def _start_proxy_process(self) -> subprocess.Popen:
143
+ # This command will jump start a process that will then call the proxy binary
144
+ # The reason we do something like this is because we need to run all of this before
145
+ # even the `step` command is called. So we need a python process that will ship the logs
146
+ # of the proxy process to MFLOG instead of setting print statements. We need this process
147
+ # to run independently since the S3ProxyManager gets called in the boostrap_proxy which will
148
+ # exit after jump starting the proxy process.
124
149
  cmd = [self.binary_path, "--bucket-config", self.config_path, "serve"]
125
-
150
+ _env = os.environ.copy()
151
+ _env["S3_PROXY_BINARY_COMMAND"] = " ".join(cmd)
126
152
  if self.debug:
127
- print(f"[@s3_proxy] Starting proxy: {' '.join(cmd)}")
128
-
153
+ _env["S3_PROXY_BINARY_DEBUG"] = "True"
154
+ _cmd = [
155
+ sys.executable,
156
+ "-m",
157
+ "metaflow_extensions.outerbounds.plugins.s3_proxy.binary_caller",
158
+ ]
159
+ devnull = subprocess.DEVNULL
129
160
  process = subprocess.Popen(
130
- cmd,
131
- stdout=subprocess.PIPE,
132
- stderr=subprocess.STDOUT, # Redirect stderr to stdout
161
+ _cmd,
162
+ stdout=devnull,
163
+ stderr=devnull,
133
164
  text=True,
134
165
  start_new_session=True,
166
+ env=_env,
135
167
  )
136
-
137
- self._setup_log_streaming(process)
138
-
139
168
  time.sleep(3)
140
169
 
141
170
  if process.poll() is None:
@@ -149,30 +178,7 @@ class S3ProxyManager:
149
178
  print(f"[@s3_proxy] Proxy failed to start - output: {stdout_data}")
150
179
  raise S3ProxyException(f"S3 proxy failed to start: {stdout_data}")
151
180
 
152
- def _setup_log_streaming(self, process: subprocess.Popen):
153
- def stream_logs():
154
- try:
155
- # Read stdout line by line (stderr is redirected to stdout)
156
- while True:
157
- line = process.stdout.readline()
158
- if not line:
159
- # Process has ended
160
- break
161
- line = line.strip()
162
- if line and self.debug:
163
- print(f"[@s3_proxy] {line}")
164
-
165
- except Exception as e:
166
- if self.debug:
167
- print(f"[@s3_proxy] Log streaming error: {e}")
168
-
169
- log_thread = threading.Thread(target=stream_logs, daemon=True)
170
- log_thread.start()
171
-
172
181
  def _setup_proxy_config(self, config_data):
173
- from metaflow_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import (
174
- set_s3_proxy_config,
175
- )
176
182
  from metaflow.metaflow_config import AWS_SECRETS_MANAGER_DEFAULT_REGION
177
183
 
178
184
  region = os.environ.get(
@@ -190,11 +196,8 @@ class S3ProxyManager:
190
196
  if self.write_mode:
191
197
  proxy_config["write_mode"] = self.write_mode
192
198
 
193
- set_s3_proxy_config(proxy_config)
194
199
  self.proxy_config = proxy_config
195
-
196
- if self.debug:
197
- print("[@s3_proxy] Global S3 proxy configuration activated")
200
+ return proxy_config
198
201
 
199
202
  def cleanup(self):
200
203
  try:
@@ -24,9 +24,26 @@ def clear_s3_proxy_config():
24
24
 
25
25
 
26
26
  def get_s3_proxy_config():
27
+ global _S3_PROXY_CONFIG
28
+ if _S3_PROXY_CONFIG is None:
29
+ set_s3_proxy_config(get_s3_proxy_config_from_env())
27
30
  return _S3_PROXY_CONFIG
28
31
 
29
32
 
33
+ # TODO: Refactor out the _S3_PROXY_CONFIG global variable and instead use the function that
34
+ # extracts it from the environment variables.
35
+
36
+ import os
37
+ import json
38
+
39
+
40
+ def get_s3_proxy_config_from_env():
41
+ env_conf = os.environ.get("METAFLOW_S3_PROXY_USER_CODE_CONFIG")
42
+ if env_conf:
43
+ return json.loads(env_conf)
44
+ return None
45
+
46
+
30
47
  # Must match the signature of metaflow.plugins.aws.aws_client.get_aws_client
31
48
  # This function is called by the "userland" code inside tasks. Metaflow internals
32
49
  # will call the function in metaflow.plugins.aws.aws_client.get_aws_client directly.
@@ -55,9 +72,14 @@ def get_aws_client(
55
72
  if decorator_role_arn:
56
73
  role_arn = decorator_role_arn
57
74
 
58
- if module == "s3" and _S3_PROXY_CONFIG is not None:
75
+ if module == "s3" and get_s3_proxy_config() is not None:
59
76
  return get_aws_client_with_s3_proxy(
60
- module, with_error, role_arn, session_vars, client_params, _S3_PROXY_CONFIG
77
+ module,
78
+ with_error,
79
+ role_arn,
80
+ session_vars,
81
+ client_params,
82
+ get_s3_proxy_config(),
61
83
  )
62
84
 
63
85
  client = metaflow.plugins.aws.aws_client.get_aws_client(
@@ -97,12 +119,18 @@ def S3(*args, **kwargs):
97
119
  kwargs["role"] = USE_CSPR_ROLE_ARN_IF_SET
98
120
 
99
121
  # Check if S3 proxy is active using module variable (like CSPR)
100
- if _S3_PROXY_CONFIG is not None:
101
- return get_S3_with_s3_proxy(_S3_PROXY_CONFIG, *args, **kwargs)
122
+ if get_s3_proxy_config() is not None:
123
+ return get_S3_with_s3_proxy(get_s3_proxy_config(), *args, **kwargs)
102
124
 
103
125
  return metaflow.plugins.datatools.s3.S3(*args, **kwargs)
104
126
 
105
127
 
128
+ # Setting the S3 client docstring in order to ensure that
129
+ # stubs get generated properly.
130
+ import metaflow.plugins.datatools.s3
131
+
132
+ S3.__doc__ = metaflow.plugins.datatools.s3.S3.__doc__
133
+
106
134
  from .. import profilers
107
135
  from ..plugins.snowflake import Snowflake
108
136
  from ..plugins.checkpoint_datastores import nebius_checkpoints, coreweave_checkpoints
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.4.17
3
+ Version: 1.4.18
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -88,11 +88,13 @@ metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py,sha256=oI_C3c
88
88
  metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py,sha256=gDHQ2sMIp4NuZSzUspbSd8RGdFAoO5mgZAyFcZ2a51Y,2619
89
89
  metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py,sha256=4W9tLGCmkFx-4XYLa1xF6qMiaWOBYYFx_RclZDKej30,3259
90
90
  metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py,sha256=9Kw86B331pQJAzkfBMPIDoPrJsW0LVRHXBYikbcc2xk,204
91
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py,sha256=Jjsd3cuo3IMi8rcKsUJx2PK188hMhFNyPTNKCFKfAQI,319
91
+ metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py,sha256=NxgyDF6KBH7VB2-Lqg9XvLjcHHVBeNJaTw66GXe6q5I,4253
92
+ metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py,sha256=ugjVUv_C2JW5Dy6hAunaivxS4rlOvCMiwVCe8gyQ_FI,321
92
93
  metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py,sha256=IkPqDvSeYQukNeu0aIVCmfQWTvUHsTs-qv7nvry2KjM,305
94
+ metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py,sha256=xtYoyydd-JV7l6YxR7UwrErXT9HvRSPGjJQAoV83rgE,1507
93
95
  metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py,sha256=WjpprW0tCICLOihFywEtgJbCnx-OFmwuT_hR27ACl2A,3007
94
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py,sha256=yqQryVjaUQ6Aq_SMI8IRHXwzPokkznHncLDpLSEcQeM,3285
95
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py,sha256=qmSypJXY-l7P2sI4mO6y-Rut5vGL2m1TjvGIXHUi6vs,7379
96
+ metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py,sha256=kbsAAY8gnE6F9kSzKCliUhw3h-ol-_v8qdv8meC4JBA,9206
97
+ metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py,sha256=Grl7eLqH5iV0RPgSUl7v0BMRLkTvYpMiwKhPHeZqJ3M,8600
96
98
  metaflow_extensions/outerbounds/plugins/secrets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
99
  metaflow_extensions/outerbounds/plugins/secrets/secrets.py,sha256=3s98hO_twKkM22tKyDdcUjGQNfYpSXW_jLKISV9ju_U,8433
98
100
  metaflow_extensions/outerbounds/plugins/snowflake/__init__.py,sha256=RG4ixt3jwqcK1_tt0QxLcUbNmf7wWAMnZhBx-ZMGgLk,114
@@ -115,7 +117,7 @@ metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py,sha256=sp_TX2SrImJG
115
117
  metaflow_extensions/outerbounds/profilers/__init__.py,sha256=wa_jhnCBr82TBxoS0e8b6_6sLyZX0fdHicuGJZNTqKw,29
116
118
  metaflow_extensions/outerbounds/profilers/gpu.py,sha256=3Er8uKQzfm_082uadg4yn_D4Y-iSCgzUfFmguYxZsz4,27485
117
119
  metaflow_extensions/outerbounds/toplevel/__init__.py,sha256=qWUJSv_r5hXJ7jV_On4nEasKIfUCm6_UjkjXWA_A1Ts,90
118
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=StTRMBHjuxfxe-wQs8ikoAZc4xnhlceY0R4avaJ1Ps8,3823
120
+ metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=EQbWEatFfsJah7kEiUVrCC2rNTj2UeISJB9N7gtGitc,4546
119
121
  metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=DXCaAtLzlE-bFIiVWEv-iV2JKIWsoSGaUeH4jIQZ9gs,193
120
122
  metaflow_extensions/outerbounds/toplevel/s3_proxy.py,sha256=zdqG7Z12cGuoYYCi2P4kqC3WsgL3xfdJGIb7ejecHH4,2862
121
123
  metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
@@ -126,7 +128,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py,sha256=6D1wL
126
128
  metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
127
129
  metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
128
130
  metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py,sha256=ekcgD3KVydf-a0xMI60P4uy6ePkSEoFHiGnDq1JM940,45
129
- ob_metaflow_extensions-1.4.17.dist-info/METADATA,sha256=NPN-hn-2Op0MGVkoboQuZa_wncU7TcAFQuzm6IDAlOA,519
130
- ob_metaflow_extensions-1.4.17.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
131
- ob_metaflow_extensions-1.4.17.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
132
- ob_metaflow_extensions-1.4.17.dist-info/RECORD,,
131
+ ob_metaflow_extensions-1.4.18.dist-info/METADATA,sha256=B8raSpsbfBI7AcT8RmPVmuZSaXViuF7VV_qwPuri_Js,519
132
+ ob_metaflow_extensions-1.4.18.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
133
+ ob_metaflow_extensions-1.4.18.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
134
+ ob_metaflow_extensions-1.4.18.dist-info/RECORD,,