ob-metaflow-extensions 1.1.142__py2.py3-none-any.whl → 1.4.33__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow_extensions/outerbounds/__init__.py +1 -1
- metaflow_extensions/outerbounds/plugins/__init__.py +26 -5
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +78 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
- metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +100 -19
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
- metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
- metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/secrets/secrets.py +38 -2
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +44 -4
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/remote_config.py +27 -3
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +87 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.4.33.dist-info/RECORD +134 -0
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob_metaflow_extensions-1.1.142.dist-info/RECORD +0 -64
- {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
from metaflow.cards import Markdown, Table, VegaChart
|
|
2
|
+
from metaflow.metaflow_current import current
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from ..card_utilities.async_cards import CardRefresher
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OllamaStatusCard(CardRefresher):
|
|
11
|
+
"""
|
|
12
|
+
Real-time status card for Ollama system monitoring.
|
|
13
|
+
Shows circuit breaker state, server health, model status, and recent events.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
CARD_ID = "ollama_status"
|
|
17
|
+
|
|
18
|
+
def __init__(self, refresh_interval=10):
|
|
19
|
+
self.refresh_interval = refresh_interval
|
|
20
|
+
self.status_data = {
|
|
21
|
+
"circuit_breaker": {
|
|
22
|
+
"state": "CLOSED",
|
|
23
|
+
"failure_count": 0,
|
|
24
|
+
"last_failure_time": None,
|
|
25
|
+
"last_open_time": None,
|
|
26
|
+
},
|
|
27
|
+
"server": {
|
|
28
|
+
"status": "Starting",
|
|
29
|
+
"uptime_start": None,
|
|
30
|
+
"restart_attempts": 0,
|
|
31
|
+
"last_health_check": None,
|
|
32
|
+
"health_status": "Unknown",
|
|
33
|
+
},
|
|
34
|
+
"models": {}, # model_name -> {status, pull_time, load_time, etc}
|
|
35
|
+
"performance": {
|
|
36
|
+
"install_time": None,
|
|
37
|
+
"server_startup_time": None,
|
|
38
|
+
"total_initialization_time": None,
|
|
39
|
+
},
|
|
40
|
+
"versions": {
|
|
41
|
+
"ollama_system": "Detecting...",
|
|
42
|
+
"ollama_python": "Detecting...",
|
|
43
|
+
},
|
|
44
|
+
"cache": {
|
|
45
|
+
"policy": "auto",
|
|
46
|
+
"model_status": {}, # model_name -> cache status
|
|
47
|
+
},
|
|
48
|
+
"events": [], # Recent events log
|
|
49
|
+
}
|
|
50
|
+
self._lock = threading.Lock()
|
|
51
|
+
self._already_rendered = False
|
|
52
|
+
|
|
53
|
+
def update_status(self, category, data):
|
|
54
|
+
"""Thread-safe method to update status data"""
|
|
55
|
+
with self._lock:
|
|
56
|
+
if category in self.status_data:
|
|
57
|
+
self.status_data[category].update(data)
|
|
58
|
+
|
|
59
|
+
def add_event(self, event_type, message, timestamp=None):
|
|
60
|
+
"""Add an event to the timeline"""
|
|
61
|
+
if timestamp is None:
|
|
62
|
+
timestamp = datetime.now()
|
|
63
|
+
|
|
64
|
+
with self._lock:
|
|
65
|
+
self.status_data["events"].insert(
|
|
66
|
+
0,
|
|
67
|
+
{
|
|
68
|
+
"type": event_type, # 'info', 'warning', 'error', 'success'
|
|
69
|
+
"message": message,
|
|
70
|
+
"timestamp": timestamp,
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
# Keep only last 10 events
|
|
74
|
+
self.status_data["events"] = self.status_data["events"][:10]
|
|
75
|
+
|
|
76
|
+
def get_circuit_breaker_emoji(self, state):
|
|
77
|
+
"""Get status emoji for circuit breaker state"""
|
|
78
|
+
emoji_map = {"CLOSED": "🟢", "OPEN": "🔴", "HALF_OPEN": "🟡"}
|
|
79
|
+
return emoji_map.get(state, "⚪")
|
|
80
|
+
|
|
81
|
+
def get_uptime_string(self, start_time):
|
|
82
|
+
"""Calculate uptime string"""
|
|
83
|
+
if not start_time:
|
|
84
|
+
return "Not started"
|
|
85
|
+
|
|
86
|
+
uptime = datetime.now() - start_time
|
|
87
|
+
hours, remainder = divmod(int(uptime.total_seconds()), 3600)
|
|
88
|
+
minutes, seconds = divmod(remainder, 60)
|
|
89
|
+
|
|
90
|
+
if hours > 0:
|
|
91
|
+
return f"{hours}h {minutes}m {seconds}s"
|
|
92
|
+
elif minutes > 0:
|
|
93
|
+
return f"{minutes}m {seconds}s"
|
|
94
|
+
else:
|
|
95
|
+
return f"{seconds}s"
|
|
96
|
+
|
|
97
|
+
def on_startup(self, current_card):
|
|
98
|
+
"""Initialize the card when monitoring starts"""
|
|
99
|
+
current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
|
|
100
|
+
current_card.append(Markdown("_Initializing Ollama system..._"))
|
|
101
|
+
current_card.refresh()
|
|
102
|
+
|
|
103
|
+
def render_card_fresh(self, current_card, data):
|
|
104
|
+
"""Render the complete card with all status information"""
|
|
105
|
+
self._already_rendered = True
|
|
106
|
+
current_card.clear()
|
|
107
|
+
|
|
108
|
+
# Header with version information
|
|
109
|
+
current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
|
|
110
|
+
|
|
111
|
+
# Version information in header
|
|
112
|
+
versions = data.get("versions", {})
|
|
113
|
+
system_version = versions.get("ollama_system", "Unknown")
|
|
114
|
+
python_version = versions.get("ollama_python", "Unknown")
|
|
115
|
+
current_card.append(
|
|
116
|
+
Markdown(
|
|
117
|
+
f"**System:** `{system_version}` | **Python Client:** `{python_version}`"
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Cache policy information
|
|
122
|
+
cache_info = data.get("cache", {})
|
|
123
|
+
cache_policy = cache_info.get("policy", "auto")
|
|
124
|
+
current_card.append(Markdown(f"**Cache Policy:** `{cache_policy}`"))
|
|
125
|
+
|
|
126
|
+
current_card.append(
|
|
127
|
+
Markdown(f"_Last updated: {datetime.now().strftime('%H:%M:%S')}_")
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Circuit Breaker Status
|
|
131
|
+
cb_data = data["circuit_breaker"]
|
|
132
|
+
cb_emoji = self.get_circuit_breaker_emoji(cb_data["state"])
|
|
133
|
+
cb_status = f"{cb_emoji} **{cb_data['state']}**"
|
|
134
|
+
if cb_data["failure_count"] > 0:
|
|
135
|
+
cb_status += f" (failures: {cb_data['failure_count']})"
|
|
136
|
+
|
|
137
|
+
# Server Status
|
|
138
|
+
server_data = data["server"]
|
|
139
|
+
uptime = self.get_uptime_string(server_data.get("uptime_start"))
|
|
140
|
+
server_status = f"**{server_data['status']}**"
|
|
141
|
+
if server_data["restart_attempts"] > 0:
|
|
142
|
+
server_status += f" (restarts: {server_data['restart_attempts']})"
|
|
143
|
+
|
|
144
|
+
# Status Overview Table
|
|
145
|
+
status_table = [
|
|
146
|
+
["Circuit Breaker", Markdown(cb_status)],
|
|
147
|
+
["Server Status", Markdown(server_status)],
|
|
148
|
+
["Server Uptime", Markdown(uptime)],
|
|
149
|
+
[
|
|
150
|
+
"Last Health Check",
|
|
151
|
+
Markdown(server_data.get("health_status", "Unknown")),
|
|
152
|
+
],
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
current_card.append(Markdown("## System Status"))
|
|
156
|
+
current_card.append(Table(status_table, headers=["Component", "Status"]))
|
|
157
|
+
|
|
158
|
+
# Models Status
|
|
159
|
+
if data["models"]:
|
|
160
|
+
current_card.append(Markdown("## Models"))
|
|
161
|
+
model_table = []
|
|
162
|
+
cache_model_status = cache_info.get("model_status", {})
|
|
163
|
+
|
|
164
|
+
for model_name, model_info in data["models"].items():
|
|
165
|
+
status = model_info.get("status", "Unknown")
|
|
166
|
+
pull_time = model_info.get("pull_time", "N/A")
|
|
167
|
+
if isinstance(pull_time, (int, float)):
|
|
168
|
+
pull_time = f"{pull_time:.1f}s"
|
|
169
|
+
|
|
170
|
+
# Add cache status indicator
|
|
171
|
+
cache_status = cache_model_status.get(model_name, "unknown")
|
|
172
|
+
cache_emoji = {
|
|
173
|
+
"exists": "💾",
|
|
174
|
+
"missing": "❌",
|
|
175
|
+
"error": "⚠️",
|
|
176
|
+
"unknown": "❓",
|
|
177
|
+
}.get(cache_status, "❓")
|
|
178
|
+
|
|
179
|
+
# Get model metadata
|
|
180
|
+
size_formatted = model_info.get("size_formatted", "Unknown")
|
|
181
|
+
blob_count = model_info.get("blob_count", "Unknown")
|
|
182
|
+
if blob_count == 0:
|
|
183
|
+
blob_count = "Unknown"
|
|
184
|
+
|
|
185
|
+
model_table.append(
|
|
186
|
+
[
|
|
187
|
+
f"{model_name} {cache_emoji}",
|
|
188
|
+
status,
|
|
189
|
+
pull_time,
|
|
190
|
+
size_formatted,
|
|
191
|
+
str(blob_count),
|
|
192
|
+
]
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
current_card.append(
|
|
196
|
+
Table(
|
|
197
|
+
model_table,
|
|
198
|
+
headers=["Model (Cache)", "Status", "Pull Time", "Size", "Blobs"],
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Performance Metrics
|
|
203
|
+
perf_data = data["performance"]
|
|
204
|
+
if any(v is not None for v in perf_data.values()):
|
|
205
|
+
current_card.append(Markdown("## Performance"))
|
|
206
|
+
|
|
207
|
+
# Separate initialization and shutdown metrics
|
|
208
|
+
init_metrics = []
|
|
209
|
+
shutdown_metrics = []
|
|
210
|
+
other_metrics = []
|
|
211
|
+
|
|
212
|
+
for metric, value in perf_data.items():
|
|
213
|
+
if value is not None:
|
|
214
|
+
display_value = value
|
|
215
|
+
if isinstance(value, (int, float)):
|
|
216
|
+
display_value = f"{value:.1f}s"
|
|
217
|
+
|
|
218
|
+
metric_display = metric.replace("_", " ").title()
|
|
219
|
+
|
|
220
|
+
if "shutdown" in metric.lower():
|
|
221
|
+
shutdown_metrics.append([metric_display, display_value])
|
|
222
|
+
elif metric in [
|
|
223
|
+
"install_time",
|
|
224
|
+
"server_startup_time",
|
|
225
|
+
"total_initialization_time",
|
|
226
|
+
]:
|
|
227
|
+
init_metrics.append([metric_display, display_value])
|
|
228
|
+
else:
|
|
229
|
+
other_metrics.append([metric_display, display_value])
|
|
230
|
+
|
|
231
|
+
# Display metrics in organized sections
|
|
232
|
+
if init_metrics:
|
|
233
|
+
current_card.append(Markdown("### Initialization"))
|
|
234
|
+
current_card.append(Table(init_metrics, headers=["Metric", "Duration"]))
|
|
235
|
+
|
|
236
|
+
if shutdown_metrics:
|
|
237
|
+
current_card.append(Markdown("### Shutdown"))
|
|
238
|
+
current_card.append(
|
|
239
|
+
Table(shutdown_metrics, headers=["Metric", "Value"])
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if other_metrics:
|
|
243
|
+
current_card.append(Markdown("### Other"))
|
|
244
|
+
current_card.append(Table(other_metrics, headers=["Metric", "Value"]))
|
|
245
|
+
|
|
246
|
+
# Recent Events
|
|
247
|
+
if data["events"]:
|
|
248
|
+
current_card.append(Markdown("## Recent Events"))
|
|
249
|
+
events_table = []
|
|
250
|
+
for event in data["events"][:5]: # Show last 5 events
|
|
251
|
+
timestamp = event["timestamp"].strftime("%H:%M:%S")
|
|
252
|
+
event_type = event["type"]
|
|
253
|
+
message = event["message"]
|
|
254
|
+
|
|
255
|
+
# Add emoji based on event type
|
|
256
|
+
type_emoji = {
|
|
257
|
+
"info": "ℹ️",
|
|
258
|
+
"success": "✅",
|
|
259
|
+
"warning": "⚠️",
|
|
260
|
+
"error": "❌",
|
|
261
|
+
}.get(event_type, "ℹ️")
|
|
262
|
+
|
|
263
|
+
events_table.append([timestamp, f"{type_emoji} {message}"])
|
|
264
|
+
|
|
265
|
+
current_card.append(Table(events_table, headers=["Time", "Event"]))
|
|
266
|
+
|
|
267
|
+
current_card.refresh()
|
|
268
|
+
|
|
269
|
+
def on_error(self, current_card, error_message):
|
|
270
|
+
"""Handle errors in card rendering"""
|
|
271
|
+
if not self._already_rendered:
|
|
272
|
+
current_card.clear()
|
|
273
|
+
current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
|
|
274
|
+
current_card.append(Markdown(f"## ❌ Error: {str(error_message)}"))
|
|
275
|
+
current_card.refresh()
|
|
276
|
+
|
|
277
|
+
def on_update(self, current_card, data_object):
|
|
278
|
+
"""Update the card with new data"""
|
|
279
|
+
with self._lock:
|
|
280
|
+
current_data = self.status_data.copy()
|
|
281
|
+
|
|
282
|
+
if not self._already_rendered:
|
|
283
|
+
self.render_card_fresh(current_card, current_data)
|
|
284
|
+
else:
|
|
285
|
+
# For frequent updates, we could implement incremental updates here
|
|
286
|
+
# For now, just re-render the whole card
|
|
287
|
+
self.render_card_fresh(current_card, current_data)
|
|
288
|
+
|
|
289
|
+
def sqlite_fetch_func(self, conn):
|
|
290
|
+
"""Required by CardRefresher (which needs a refactor), but we use in-memory data instead"""
|
|
291
|
+
with self._lock:
|
|
292
|
+
return {"status": self.status_data}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
__mf_promote_submodules__ = ["plugins.optuna"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def auth():
|
|
8
|
+
from metaflow.metaflow_config_funcs import init_config
|
|
9
|
+
|
|
10
|
+
conf = init_config()
|
|
11
|
+
if conf:
|
|
12
|
+
headers = {"x-api-key": conf["METAFLOW_SERVICE_AUTH_KEY"]}
|
|
13
|
+
else:
|
|
14
|
+
headers = json.loads(os.environ["METAFLOW_SERVICE_HEADERS"])
|
|
15
|
+
return headers
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_deployment_db_access_endpoint(name: str):
|
|
19
|
+
from ..apps.core.perimeters import PerimeterExtractor
|
|
20
|
+
from ..apps.core.capsule import CapsuleApi
|
|
21
|
+
|
|
22
|
+
perimeter, cap_url = PerimeterExtractor.during_metaflow_execution()
|
|
23
|
+
deployment = CapsuleApi(cap_url, perimeter).get_by_name(name)
|
|
24
|
+
if not deployment:
|
|
25
|
+
raise Exception(f"No app deployment found with name `{name}`")
|
|
26
|
+
|
|
27
|
+
if (
|
|
28
|
+
"status" in deployment
|
|
29
|
+
and "accessInfo" in deployment["status"]
|
|
30
|
+
and "extraAccessUrls" in deployment["status"]["accessInfo"]
|
|
31
|
+
):
|
|
32
|
+
for extra_url in deployment["status"]["accessInfo"]["extraAccessUrls"]:
|
|
33
|
+
if extra_url["name"] == "in_cluster_db_access":
|
|
34
|
+
db_url = extra_url["url"].replace("http://", "")
|
|
35
|
+
return db_url
|
|
36
|
+
|
|
37
|
+
raise Exception(f"No db access endpoint found for deployment `{name}`")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_db_url(app_name: str):
|
|
41
|
+
"""
|
|
42
|
+
Example usage:
|
|
43
|
+
>>> from metaflow.plugins.optuna import get_db_url
|
|
44
|
+
>>> s = optuna.create_study(..., storage=get_db_url("optuna-dashboard"))
|
|
45
|
+
"""
|
|
46
|
+
mf_token = auth()["x-api-key"]
|
|
47
|
+
app_url = get_deployment_db_access_endpoint(app_name)
|
|
48
|
+
return f"postgresql://userspace_default:{mf_token}@{app_url}/userspace_default?sslmode=disable"
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from metaflow.decorators import StepDecorator
|
|
3
|
+
from ..card_utilities.injector import CardDecoratorInjector
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DynamicCardAppendDecorator(StepDecorator):
|
|
7
|
+
"""
|
|
8
|
+
A simple decorator that demonstrates using CardDecoratorInjector
|
|
9
|
+
to inject a card and render simple markdown content.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
name = "test_append_card"
|
|
13
|
+
|
|
14
|
+
defaults = {
|
|
15
|
+
"title": "Simple Card",
|
|
16
|
+
"message": "Hello from DynamicCardAppendDecorator!",
|
|
17
|
+
"show_timestamp": True,
|
|
18
|
+
"refresh_interval": 5,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
CARD_ID = "simple_card"
|
|
22
|
+
|
|
23
|
+
def step_init(
|
|
24
|
+
self, flow, graph, step_name, decorators, environment, flow_datastore, logger
|
|
25
|
+
):
|
|
26
|
+
"""Initialize the decorator and inject the card."""
|
|
27
|
+
self.deco_injector = CardDecoratorInjector()
|
|
28
|
+
self.deco_injector.attach_card_decorator(
|
|
29
|
+
flow,
|
|
30
|
+
step_name,
|
|
31
|
+
self.CARD_ID,
|
|
32
|
+
"blank",
|
|
33
|
+
refresh_interval=self.attributes["refresh_interval"],
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def task_decorate(
|
|
37
|
+
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
|
|
38
|
+
):
|
|
39
|
+
"""Decorate the step function to add card content."""
|
|
40
|
+
from metaflow import current
|
|
41
|
+
from metaflow.cards import Markdown
|
|
42
|
+
|
|
43
|
+
# Create the card content
|
|
44
|
+
title = self.attributes["title"]
|
|
45
|
+
message = self.attributes["message"]
|
|
46
|
+
show_timestamp = self.attributes["show_timestamp"]
|
|
47
|
+
|
|
48
|
+
# Add title to the card
|
|
49
|
+
current.card[self.CARD_ID].append(Markdown(f"# {title}"))
|
|
50
|
+
|
|
51
|
+
# Add message to the card
|
|
52
|
+
current.card[self.CARD_ID].append(Markdown(f"**Message:** {message}"))
|
|
53
|
+
|
|
54
|
+
# Add timestamp if requested
|
|
55
|
+
if show_timestamp:
|
|
56
|
+
timestamp = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S %z")
|
|
57
|
+
current.card[self.CARD_ID].append(Markdown(f"**Created at:** {timestamp}"))
|
|
58
|
+
|
|
59
|
+
# Add step information
|
|
60
|
+
current.card[self.CARD_ID].append(Markdown(f"**Step:** `{current.pathspec}`"))
|
|
61
|
+
|
|
62
|
+
# Add a simple divider
|
|
63
|
+
current.card[self.CARD_ID].append(Markdown("---"))
|
|
64
|
+
|
|
65
|
+
# Add some dynamic content that shows this is working
|
|
66
|
+
current.card[self.CARD_ID].append(
|
|
67
|
+
Markdown("**Status:** Card successfully injected! 🎉")
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def wrapped_step_func():
|
|
71
|
+
"""Execute the original step function."""
|
|
72
|
+
try:
|
|
73
|
+
# Before execution
|
|
74
|
+
current.card[self.CARD_ID].append(
|
|
75
|
+
Markdown("**Execution:** Step started...")
|
|
76
|
+
)
|
|
77
|
+
current.card[self.CARD_ID].refresh()
|
|
78
|
+
|
|
79
|
+
# Execute the original step
|
|
80
|
+
step_func()
|
|
81
|
+
|
|
82
|
+
# After execution
|
|
83
|
+
current.card[self.CARD_ID].append(
|
|
84
|
+
Markdown("**Execution:** Step completed successfully! ✅")
|
|
85
|
+
)
|
|
86
|
+
current.card[self.CARD_ID].refresh()
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
# Handle errors
|
|
90
|
+
current.card[self.CARD_ID].append(
|
|
91
|
+
Markdown(f"**Error:** Step failed with error: `{str(e)}` ❌")
|
|
92
|
+
)
|
|
93
|
+
current.card[self.CARD_ID].refresh()
|
|
94
|
+
raise
|
|
95
|
+
|
|
96
|
+
return wrapped_step_func
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from metaflow.mflog.mflog import decorate
|
|
5
|
+
from metaflow.mflog import TASK_LOG_SOURCE
|
|
6
|
+
from typing import Union, TextIO, BinaryIO, Callable, Optional
|
|
7
|
+
from queue import Queue, Empty
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
import subprocess
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def enqueue_output(file, queue):
|
|
13
|
+
for line in iter(file.readline, ""):
|
|
14
|
+
queue.put(line)
|
|
15
|
+
file.close()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def read_popen_pipes(p: subprocess.Popen):
|
|
19
|
+
|
|
20
|
+
with ThreadPoolExecutor(2) as pool:
|
|
21
|
+
q_stdout, q_stderr = Queue(), Queue()
|
|
22
|
+
pool.submit(enqueue_output, p.stdout, q_stdout)
|
|
23
|
+
pool.submit(enqueue_output, p.stderr, q_stderr)
|
|
24
|
+
while True:
|
|
25
|
+
|
|
26
|
+
if p.poll() is not None and q_stdout.empty() and q_stderr.empty():
|
|
27
|
+
break
|
|
28
|
+
|
|
29
|
+
out_line = err_line = ""
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
out_line = q_stdout.get_nowait()
|
|
33
|
+
except Empty:
|
|
34
|
+
pass
|
|
35
|
+
try:
|
|
36
|
+
err_line = q_stderr.get_nowait()
|
|
37
|
+
except Empty:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
yield (out_line, err_line)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LogBroadcaster:
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
process: subprocess.Popen,
|
|
47
|
+
):
|
|
48
|
+
self._process = process
|
|
49
|
+
self._file_descriptors_and_parsers = []
|
|
50
|
+
|
|
51
|
+
def add_channel(
|
|
52
|
+
self, file_path: str, parser: Optional[Callable[[str], str]] = None
|
|
53
|
+
):
|
|
54
|
+
self._file_descriptors_and_parsers.append((open(file_path, "a"), parser))
|
|
55
|
+
|
|
56
|
+
def _broadcast_lines(self, out_line: str, err_line: str):
|
|
57
|
+
for file_descriptor, parser in self._file_descriptors_and_parsers:
|
|
58
|
+
if out_line != "":
|
|
59
|
+
if parser:
|
|
60
|
+
out_line = parser(out_line)
|
|
61
|
+
print(out_line, file=file_descriptor, end="", flush=True)
|
|
62
|
+
if err_line != "":
|
|
63
|
+
if parser:
|
|
64
|
+
err_line = parser(err_line)
|
|
65
|
+
print(err_line, file=file_descriptor, end="", flush=True)
|
|
66
|
+
|
|
67
|
+
def publish_line(self, out_line: str, err_line: str):
|
|
68
|
+
self._broadcast_lines(out_line, err_line)
|
|
69
|
+
|
|
70
|
+
def broadcast_logs_to_files(self):
|
|
71
|
+
for out_line, err_line in read_popen_pipes(self._process):
|
|
72
|
+
self._broadcast_lines(out_line, err_line)
|
|
73
|
+
|
|
74
|
+
self._process.wait()
|
|
75
|
+
|
|
76
|
+
for file_descriptor, _ in self._file_descriptors_and_parsers:
|
|
77
|
+
file_descriptor.close()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def run_with_mflog_capture(command, debug=False):
|
|
81
|
+
"""
|
|
82
|
+
Run a subprocess with proper mflog integration for stdout/stderr capture.
|
|
83
|
+
This mimics what bash_capture_logs does but in Python.
|
|
84
|
+
"""
|
|
85
|
+
# Get the log file paths from environment variables
|
|
86
|
+
stdout_path = os.environ.get("MFLOG_STDOUT")
|
|
87
|
+
stderr_path = os.environ.get("MFLOG_STDERR")
|
|
88
|
+
|
|
89
|
+
if not stdout_path or not stderr_path:
|
|
90
|
+
# Fallback to regular subprocess if mflog env vars aren't set
|
|
91
|
+
return subprocess.run(command, check=True, shell=True)
|
|
92
|
+
|
|
93
|
+
pipe = subprocess.PIPE if debug else subprocess.DEVNULL
|
|
94
|
+
# Start the subprocess with pipes
|
|
95
|
+
process = subprocess.Popen(
|
|
96
|
+
command,
|
|
97
|
+
shell=True,
|
|
98
|
+
stdout=pipe,
|
|
99
|
+
stderr=pipe,
|
|
100
|
+
text=False, # Use bytes for proper mflog handling
|
|
101
|
+
bufsize=0, # Unbuffered for real-time logging
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
broadcaster = LogBroadcaster(process)
|
|
105
|
+
|
|
106
|
+
broadcaster.add_channel(
|
|
107
|
+
stderr_path, lambda line: decorate(TASK_LOG_SOURCE, line).decode("utf-8")
|
|
108
|
+
)
|
|
109
|
+
broadcaster.publish_line(f"[S3 PROXY] Starting Fast S3 Proxy.....\n", "")
|
|
110
|
+
broadcaster.broadcast_logs_to_files()
|
|
111
|
+
|
|
112
|
+
# Check the return code and raise if non-zero
|
|
113
|
+
if process.returncode != 0:
|
|
114
|
+
raise subprocess.CalledProcessError(process.returncode, command)
|
|
115
|
+
|
|
116
|
+
return process.returncode
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
s3_proxy_binary_path = os.environ.get("S3_PROXY_BINARY_COMMAND")
|
|
121
|
+
s3_proxy_debug = bool(os.environ.get("S3_PROXY_BINARY_DEBUG", False))
|
|
122
|
+
if not s3_proxy_binary_path:
|
|
123
|
+
print("S3_PROXY_BINARY_COMMAND environment variable not set")
|
|
124
|
+
sys.exit(1)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
run_with_mflog_capture(s3_proxy_binary_path, debug=s3_proxy_debug)
|
|
128
|
+
except subprocess.CalledProcessError as e:
|
|
129
|
+
sys.exit(e.returncode)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
print(f"Error running S3 proxy binary: {e}", file=sys.stderr)
|
|
132
|
+
sys.exit(1)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
S3_PROXY_BINARY_URLS = {
|
|
2
|
+
"aarch64": "https://fast-s3-proxy.outerbounds.sh/linux-arm64/s3-proxy-0.1.1.gz",
|
|
3
|
+
"x86_64": "https://fast-s3-proxy.outerbounds.sh/linux-amd64/s3-proxy-0.1.1.gz",
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
DEFAULT_PROXY_PORT = 8081
|
|
7
|
+
DEFAULT_PROXY_HOST = "localhost"
|
|
8
|
+
S3_PROXY_WRITE_MODES = [
|
|
9
|
+
"origin-and-cache",
|
|
10
|
+
"origin",
|
|
11
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from metaflow.exception import MetaflowException
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class S3ProxyException(MetaflowException):
|
|
5
|
+
headline = "S3 Proxy Error"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class S3ProxyConfigException(S3ProxyException):
|
|
9
|
+
headline = "S3 Proxy Configuration Error"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class S3ProxyApiException(S3ProxyException):
|
|
13
|
+
headline = "S3 Proxy API Error"
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from .s3_proxy_manager import S3ProxyManager
|
|
2
|
+
from metaflow._vendor import click
|
|
3
|
+
from metaflow import JSONType
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@click.group()
|
|
8
|
+
def cli():
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@cli.command()
|
|
13
|
+
@click.option(
|
|
14
|
+
"--integration-name", type=str, help="The integration name", required=True
|
|
15
|
+
)
|
|
16
|
+
@click.option("--write-mode", type=str, help="The write mode")
|
|
17
|
+
@click.option("--debug", type=bool, help="The debug mode", default=False)
|
|
18
|
+
@click.option(
|
|
19
|
+
"--uc-proxy-cfg-write-path",
|
|
20
|
+
type=str,
|
|
21
|
+
help="The path to write the user code proxy config",
|
|
22
|
+
required=True,
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--proxy-status-write-path",
|
|
26
|
+
type=str,
|
|
27
|
+
help="The path to write the proxy status",
|
|
28
|
+
required=True,
|
|
29
|
+
)
|
|
30
|
+
def bootstrap(
|
|
31
|
+
integration_name,
|
|
32
|
+
write_mode,
|
|
33
|
+
debug,
|
|
34
|
+
uc_proxy_cfg_write_path,
|
|
35
|
+
proxy_status_write_path,
|
|
36
|
+
):
|
|
37
|
+
manager = S3ProxyManager(
|
|
38
|
+
integration_name=integration_name,
|
|
39
|
+
write_mode=write_mode,
|
|
40
|
+
debug=debug,
|
|
41
|
+
)
|
|
42
|
+
user_code_proxy_config, proxy_pid, config_path, binary_path = manager.setup_proxy()
|
|
43
|
+
with open(uc_proxy_cfg_write_path, "w") as f:
|
|
44
|
+
f.write(json.dumps(user_code_proxy_config))
|
|
45
|
+
with open(proxy_status_write_path, "w") as f:
|
|
46
|
+
f.write(
|
|
47
|
+
json.dumps(
|
|
48
|
+
{
|
|
49
|
+
"proxy_pid": proxy_pid,
|
|
50
|
+
"config_path": config_path,
|
|
51
|
+
"binary_path": binary_path,
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
if __name__ == "__main__":
|
|
58
|
+
print("[@s3_proxy] Jumpstarting the proxy....")
|
|
59
|
+
cli()
|