ob-metaflow-extensions 1.1.161rc1__py2.py3-none-any.whl → 1.1.162__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -0,0 +1,292 @@
1
+ from metaflow.cards import Markdown, Table, VegaChart
2
+ from metaflow.metaflow_current import current
3
+ from datetime import datetime
4
+ import threading
5
+ import time
6
+
7
+ from ..card_utilities.async_cards import CardRefresher
8
+
9
+
10
+ class OllamaStatusCard(CardRefresher):
11
+ """
12
+ Real-time status card for Ollama system monitoring.
13
+ Shows circuit breaker state, server health, model status, and recent events.
14
+ """
15
+
16
+ CARD_ID = "ollama_status"
17
+
18
+ def __init__(self, refresh_interval=10):
19
+ self.refresh_interval = refresh_interval
20
+ self.status_data = {
21
+ "circuit_breaker": {
22
+ "state": "CLOSED",
23
+ "failure_count": 0,
24
+ "last_failure_time": None,
25
+ "last_open_time": None,
26
+ },
27
+ "server": {
28
+ "status": "Starting",
29
+ "uptime_start": None,
30
+ "restart_attempts": 0,
31
+ "last_health_check": None,
32
+ "health_status": "Unknown",
33
+ },
34
+ "models": {}, # model_name -> {status, pull_time, load_time, etc}
35
+ "performance": {
36
+ "install_time": None,
37
+ "server_startup_time": None,
38
+ "total_initialization_time": None,
39
+ },
40
+ "versions": {
41
+ "ollama_system": "Detecting...",
42
+ "ollama_python": "Detecting...",
43
+ },
44
+ "cache": {
45
+ "policy": "auto",
46
+ "model_status": {}, # model_name -> cache status
47
+ },
48
+ "events": [], # Recent events log
49
+ }
50
+ self._lock = threading.Lock()
51
+ self._already_rendered = False
52
+
53
+ def update_status(self, category, data):
54
+ """Thread-safe method to update status data"""
55
+ with self._lock:
56
+ if category in self.status_data:
57
+ self.status_data[category].update(data)
58
+
59
+ def add_event(self, event_type, message, timestamp=None):
60
+ """Add an event to the timeline"""
61
+ if timestamp is None:
62
+ timestamp = datetime.now()
63
+
64
+ with self._lock:
65
+ self.status_data["events"].insert(
66
+ 0,
67
+ {
68
+ "type": event_type, # 'info', 'warning', 'error', 'success'
69
+ "message": message,
70
+ "timestamp": timestamp,
71
+ },
72
+ )
73
+ # Keep only last 10 events
74
+ self.status_data["events"] = self.status_data["events"][:10]
75
+
76
+ def get_circuit_breaker_emoji(self, state):
77
+ """Get status emoji for circuit breaker state"""
78
+ emoji_map = {"CLOSED": "🟢", "OPEN": "🔴", "HALF_OPEN": "🟡"}
79
+ return emoji_map.get(state, "⚪")
80
+
81
+ def get_uptime_string(self, start_time):
82
+ """Calculate uptime string"""
83
+ if not start_time:
84
+ return "Not started"
85
+
86
+ uptime = datetime.now() - start_time
87
+ hours, remainder = divmod(int(uptime.total_seconds()), 3600)
88
+ minutes, seconds = divmod(remainder, 60)
89
+
90
+ if hours > 0:
91
+ return f"{hours}h {minutes}m {seconds}s"
92
+ elif minutes > 0:
93
+ return f"{minutes}m {seconds}s"
94
+ else:
95
+ return f"{seconds}s"
96
+
97
+ def on_startup(self, current_card):
98
+ """Initialize the card when monitoring starts"""
99
+ current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
100
+ current_card.append(Markdown("_Initializing Ollama system..._"))
101
+ current_card.refresh()
102
+
103
+ def render_card_fresh(self, current_card, data):
104
+ """Render the complete card with all status information"""
105
+ self._already_rendered = True
106
+ current_card.clear()
107
+
108
+ # Header with version information
109
+ current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
110
+
111
+ # Version information in header
112
+ versions = data.get("versions", {})
113
+ system_version = versions.get("ollama_system", "Unknown")
114
+ python_version = versions.get("ollama_python", "Unknown")
115
+ current_card.append(
116
+ Markdown(
117
+ f"**System:** `{system_version}` | **Python Client:** `{python_version}`"
118
+ )
119
+ )
120
+
121
+ # Cache policy information
122
+ cache_info = data.get("cache", {})
123
+ cache_policy = cache_info.get("policy", "auto")
124
+ current_card.append(Markdown(f"**Cache Policy:** `{cache_policy}`"))
125
+
126
+ current_card.append(
127
+ Markdown(f"_Last updated: {datetime.now().strftime('%H:%M:%S')}_")
128
+ )
129
+
130
+ # Circuit Breaker Status
131
+ cb_data = data["circuit_breaker"]
132
+ cb_emoji = self.get_circuit_breaker_emoji(cb_data["state"])
133
+ cb_status = f"{cb_emoji} **{cb_data['state']}**"
134
+ if cb_data["failure_count"] > 0:
135
+ cb_status += f" (failures: {cb_data['failure_count']})"
136
+
137
+ # Server Status
138
+ server_data = data["server"]
139
+ uptime = self.get_uptime_string(server_data.get("uptime_start"))
140
+ server_status = f"**{server_data['status']}**"
141
+ if server_data["restart_attempts"] > 0:
142
+ server_status += f" (restarts: {server_data['restart_attempts']})"
143
+
144
+ # Status Overview Table
145
+ status_table = [
146
+ ["Circuit Breaker", Markdown(cb_status)],
147
+ ["Server Status", Markdown(server_status)],
148
+ ["Server Uptime", Markdown(uptime)],
149
+ [
150
+ "Last Health Check",
151
+ Markdown(server_data.get("health_status", "Unknown")),
152
+ ],
153
+ ]
154
+
155
+ current_card.append(Markdown("## System Status"))
156
+ current_card.append(Table(status_table, headers=["Component", "Status"]))
157
+
158
+ # Models Status
159
+ if data["models"]:
160
+ current_card.append(Markdown("## Models"))
161
+ model_table = []
162
+ cache_model_status = cache_info.get("model_status", {})
163
+
164
+ for model_name, model_info in data["models"].items():
165
+ status = model_info.get("status", "Unknown")
166
+ pull_time = model_info.get("pull_time", "N/A")
167
+ if isinstance(pull_time, (int, float)):
168
+ pull_time = f"{pull_time:.1f}s"
169
+
170
+ # Add cache status indicator
171
+ cache_status = cache_model_status.get(model_name, "unknown")
172
+ cache_emoji = {
173
+ "exists": "💾",
174
+ "missing": "❌",
175
+ "error": "⚠️",
176
+ "unknown": "❓",
177
+ }.get(cache_status, "❓")
178
+
179
+ # Get model metadata
180
+ size_formatted = model_info.get("size_formatted", "Unknown")
181
+ blob_count = model_info.get("blob_count", "Unknown")
182
+ if blob_count == 0:
183
+ blob_count = "Unknown"
184
+
185
+ model_table.append(
186
+ [
187
+ f"{model_name} {cache_emoji}",
188
+ status,
189
+ pull_time,
190
+ size_formatted,
191
+ str(blob_count),
192
+ ]
193
+ )
194
+
195
+ current_card.append(
196
+ Table(
197
+ model_table,
198
+ headers=["Model (Cache)", "Status", "Pull Time", "Size", "Blobs"],
199
+ )
200
+ )
201
+
202
+ # Performance Metrics
203
+ perf_data = data["performance"]
204
+ if any(v is not None for v in perf_data.values()):
205
+ current_card.append(Markdown("## Performance"))
206
+
207
+ # Separate initialization and shutdown metrics
208
+ init_metrics = []
209
+ shutdown_metrics = []
210
+ other_metrics = []
211
+
212
+ for metric, value in perf_data.items():
213
+ if value is not None:
214
+ display_value = value
215
+ if isinstance(value, (int, float)):
216
+ display_value = f"{value:.1f}s"
217
+
218
+ metric_display = metric.replace("_", " ").title()
219
+
220
+ if "shutdown" in metric.lower():
221
+ shutdown_metrics.append([metric_display, display_value])
222
+ elif metric in [
223
+ "install_time",
224
+ "server_startup_time",
225
+ "total_initialization_time",
226
+ ]:
227
+ init_metrics.append([metric_display, display_value])
228
+ else:
229
+ other_metrics.append([metric_display, display_value])
230
+
231
+ # Display metrics in organized sections
232
+ if init_metrics:
233
+ current_card.append(Markdown("### Initialization"))
234
+ current_card.append(Table(init_metrics, headers=["Metric", "Duration"]))
235
+
236
+ if shutdown_metrics:
237
+ current_card.append(Markdown("### Shutdown"))
238
+ current_card.append(
239
+ Table(shutdown_metrics, headers=["Metric", "Value"])
240
+ )
241
+
242
+ if other_metrics:
243
+ current_card.append(Markdown("### Other"))
244
+ current_card.append(Table(other_metrics, headers=["Metric", "Value"]))
245
+
246
+ # Recent Events
247
+ if data["events"]:
248
+ current_card.append(Markdown("## Recent Events"))
249
+ events_table = []
250
+ for event in data["events"][:5]: # Show last 5 events
251
+ timestamp = event["timestamp"].strftime("%H:%M:%S")
252
+ event_type = event["type"]
253
+ message = event["message"]
254
+
255
+ # Add emoji based on event type
256
+ type_emoji = {
257
+ "info": "ℹ️",
258
+ "success": "✅",
259
+ "warning": "⚠️",
260
+ "error": "❌",
261
+ }.get(event_type, "ℹ️")
262
+
263
+ events_table.append([timestamp, f"{type_emoji} {message}"])
264
+
265
+ current_card.append(Table(events_table, headers=["Time", "Event"]))
266
+
267
+ current_card.refresh()
268
+
269
+ def on_error(self, current_card, error_message):
270
+ """Handle errors in card rendering"""
271
+ if not self._already_rendered:
272
+ current_card.clear()
273
+ current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
274
+ current_card.append(Markdown(f"## ❌ Error: {str(error_message)}"))
275
+ current_card.refresh()
276
+
277
+ def on_update(self, current_card, data_object):
278
+ """Update the card with new data"""
279
+ with self._lock:
280
+ current_data = self.status_data.copy()
281
+
282
+ if not self._already_rendered:
283
+ self.render_card_fresh(current_card, current_data)
284
+ else:
285
+ # For frequent updates, we could implement incremental updates here
286
+ # For now, just re-render the whole card
287
+ self.render_card_fresh(current_card, current_data)
288
+
289
+ def sqlite_fetch_func(self, conn):
290
+ """Required by CardRefresher (which needs a refactor), but we use in-memory data instead"""
291
+ with self._lock:
292
+ return {"status": self.status_data}
@@ -53,4 +53,3 @@ def S3(*args, **kwargs):
53
53
  from .. import profilers
54
54
  from ..plugins.snowflake import Snowflake
55
55
  from ..plugins.checkpoint_datastores import nebius_checkpoints, coreweave_checkpoints
56
- from . import ob_internal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.161rc1
3
+ Version: 1.1.162
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -17,7 +17,6 @@ metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py,sha256
17
17
  metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py,sha256=_WzoOROFjoFa8TzsMNFp-r_1Zz7NUp-5ljn_kKlczXA,4534
18
18
  metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py,sha256=zgqDLFewCeF5jqh-hUNKmC_OAjld09ln0bb8Lkeqapc,4659
19
19
  metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py,sha256=ShE5omFBr83wkvEhL_ptRFvDNMs6wefg4BjaafQjTcM,3602
21
20
  metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py,sha256=Tl520HdBteg-aDOM7mnnJJpdDCZc49BmFFmLUc_vTi8,15018
22
21
  metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py,sha256=PE81ZB54OAMXkMGSB7JqgvgMg7N9kvoVclrWL-6jc2U,5626
23
22
  metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py,sha256=kqFyu2bJSnc9_9aYfBpz5xK6L6luWFZK_NMuh8f1eVk,1494
@@ -44,10 +43,11 @@ metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py,sha256=bB9AURhRep9PV_-b
44
43
  metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py,sha256=HKCvYn1Jh8uwLXeUqPNhxgBatq3mXNG5YIUl-zjNlHE,9429
45
44
  metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py,sha256=8IPkdvuTZNIqgAAt75gVNn-ydr-Zz2sKC8UX_6pNEKI,7091
46
45
  metaflow_extensions/outerbounds/plugins/nvct/utils.py,sha256=U4_Fu8H94j_Bbox7mmMhNnlRhlYHqnK28R5w_TMWEFM,1029
47
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=vzh8sQEfwKRdx0fsGFJ-km4mwfi0vm2q1_vsZv-EMcc,3034
46
+ metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=4T8LQqAuh8flSMvYztw6-OPoDoAorcBWhC-vPuuQPbc,9234
48
47
  metaflow_extensions/outerbounds/plugins/ollama/constants.py,sha256=hxkTpWEJp1pKHwUcG4EE3-17M6x2CyeMfbeqgUzF9TA,28
49
48
  metaflow_extensions/outerbounds/plugins/ollama/exceptions.py,sha256=8Ss296_MGZl1wXAoDNwpH-hsPe6iYLe90Ji1pczNocU,668
50
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py,sha256=oe-k1ISSMtUF2y3YpfmJhU_3yR7SP31PVilN5NPgKv0,31450
49
+ metaflow_extensions/outerbounds/plugins/ollama/ollama.py,sha256=C-6Hz8OxsJiB14AAxmunq3P4k7DrmVHsSOxE0xsP-nY,79780
50
+ metaflow_extensions/outerbounds/plugins/ollama/status_card.py,sha256=F5e4McDl28lhtjeUyInkl03bqjr1lgLxWoau8Q9xwBE,10994
51
51
  metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py,sha256=oI_C3c64XBm7n88FILqHwn-Nnc5DeT_68I67lM9rXaI,2434
52
52
  metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py,sha256=gDHQ2sMIp4NuZSzUspbSd8RGdFAoO5mgZAyFcZ2a51Y,2619
53
53
  metaflow_extensions/outerbounds/plugins/secrets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,15 +67,14 @@ metaflow_extensions/outerbounds/plugins/torchtune/__init__.py,sha256=TOXNeyhcgd8
67
67
  metaflow_extensions/outerbounds/profilers/__init__.py,sha256=wa_jhnCBr82TBxoS0e8b6_6sLyZX0fdHicuGJZNTqKw,29
68
68
  metaflow_extensions/outerbounds/profilers/gpu.py,sha256=3Er8uKQzfm_082uadg4yn_D4Y-iSCgzUfFmguYxZsz4,27485
69
69
  metaflow_extensions/outerbounds/toplevel/__init__.py,sha256=qWUJSv_r5hXJ7jV_On4nEasKIfUCm6_UjkjXWA_A1Ts,90
70
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=505fLAY4NkBCGtjpXlE0RgaKVpJ0jOUq1-Fq-EKzPew,2035
71
- metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=53xM6d_UYT3uGFFA59UzxN23H5QMO5_F39pALpmGy04,51
70
+ metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=oCRFfwoLODDju2-6JU2h74x6O-GCyO7PRRKTZsj-73k,2009
72
71
  metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
73
72
  metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3uILlEZ6ntBLKeNyqn3If8nIXZFq_Apd7Dhco,70
74
73
  metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
75
74
  metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2zwqkvlmFS6bcfYD_CX6CMko9DHQokMaH1iBshA,47
76
75
  metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
77
76
  metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
78
- ob_metaflow_extensions-1.1.161rc1.dist-info/METADATA,sha256=QZ_Y0zbT95Qr57WQ4XnvhZUSXoOQy3x2Pd-KT73mnxo,524
79
- ob_metaflow_extensions-1.1.161rc1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
80
- ob_metaflow_extensions-1.1.161rc1.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
81
- ob_metaflow_extensions-1.1.161rc1.dist-info/RECORD,,
77
+ ob_metaflow_extensions-1.1.162.dist-info/METADATA,sha256=Mp8oyzEml9MbPiZMSwCBUHsB-Tfq8cMbraHKfBbfgjY,521
78
+ ob_metaflow_extensions-1.1.162.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
79
+ ob_metaflow_extensions-1.1.162.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
80
+ ob_metaflow_extensions-1.1.162.dist-info/RECORD,,
@@ -1,110 +0,0 @@
1
- import threading
2
- import time
3
- import sys
4
- from typing import Dict, Optional, Any, Callable
5
- from functools import partial
6
- from metaflow.exception import MetaflowException
7
- from metaflow.metaflow_config import FAST_BAKERY_URL
8
-
9
- from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
10
- from .docker_environment import cache_request
11
-
12
- BAKERY_METAFILE = ".imagebakery-cache"
13
-
14
-
15
- class BakerException(MetaflowException):
16
- headline = "Ran into an error while baking image"
17
-
18
- def __init__(self, msg):
19
- super(BakerException, self).__init__(msg)
20
-
21
-
22
- def bake_image(
23
- cache_file_path: str,
24
- ref: Optional[str] = None,
25
- python: Optional[str] = None,
26
- pypi_packages: Optional[Dict[str, str]] = None,
27
- conda_packages: Optional[Dict[str, str]] = None,
28
- base_image: Optional[str] = None,
29
- logger: Optional[Callable[[str], Any]] = None,
30
- ) -> FastBakeryApiResponse:
31
- """
32
- Bakes a Docker image with the specified dependencies.
33
-
34
- Args:
35
- cache_file_path: Path to the cache file
36
- ref: Reference identifier for this bake (for logging purposes)
37
- python: Python version to use
38
- pypi_packages: Dictionary of PyPI packages and versions
39
- conda_packages: Dictionary of Conda packages and versions
40
- base_image: Base Docker image to use
41
- logger: Optional logger function to output progress
42
-
43
- Returns:
44
- FastBakeryApiResponse: The response from the bakery service
45
-
46
- Raises:
47
- BakerException: If the baking process fails
48
- """
49
- # Default logger if none provided
50
- if logger is None:
51
- logger = partial(print, file=sys.stderr)
52
-
53
- # Thread lock for logging
54
- logger_lock = threading.Lock()
55
- images_baked = 0
56
-
57
- @cache_request(cache_file_path)
58
- def _cached_bake(
59
- ref=None,
60
- python=None,
61
- pypi_packages=None,
62
- conda_packages=None,
63
- base_image=None,
64
- ):
65
- try:
66
- bakery = FastBakery(url=FAST_BAKERY_URL)
67
- bakery._reset_payload()
68
- bakery.python_version(python)
69
- bakery.pypi_packages(pypi_packages)
70
- bakery.conda_packages(conda_packages)
71
- bakery.base_image(base_image)
72
- # bakery.ignore_cache()
73
-
74
- with logger_lock:
75
- logger(f"🍳 Baking [{ref}] ...")
76
- logger(f" 🐍 Python: {python}")
77
-
78
- if pypi_packages:
79
- logger(f" 📦 PyPI packages:")
80
- for package, version in pypi_packages.items():
81
- logger(f" 🔧 {package}: {version}")
82
-
83
- if conda_packages:
84
- logger(f" 📦 Conda packages:")
85
- for package, version in conda_packages.items():
86
- logger(f" 🔧 {package}: {version}")
87
-
88
- logger(f" 🏗️ Base image: {base_image}")
89
-
90
- start_time = time.time()
91
- res = bakery.bake()
92
- # TODO: Get actual bake time from bakery
93
- bake_time = time.time() - start_time
94
-
95
- with logger_lock:
96
- logger(f"🏁 Baked [{ref}] in {bake_time:.2f} seconds!")
97
- nonlocal images_baked
98
- images_baked += 1
99
- return res
100
- except FastBakeryException as ex:
101
- raise BakerException(f"Bake [{ref}] failed: {str(ex)}")
102
-
103
- # Call the cached bake function with the provided parameters
104
- return _cached_bake(
105
- ref=ref,
106
- python=python,
107
- pypi_packages=pypi_packages,
108
- conda_packages=conda_packages,
109
- base_image=base_image,
110
- )
@@ -1 +0,0 @@
1
- from ..plugins.fast_bakery.baker import bake_image