ob-metaflow-extensions 1.1.162rc0__py2.py3-none-any.whl → 1.1.162rc1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +145 -9
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1154 -43
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- {ob_metaflow_extensions-1.1.162rc0.dist-info → ob_metaflow_extensions-1.1.162rc1.dist-info}/METADATA +1 -1
- {ob_metaflow_extensions-1.1.162rc0.dist-info → ob_metaflow_extensions-1.1.162rc1.dist-info}/RECORD +7 -6
- {ob_metaflow_extensions-1.1.162rc0.dist-info → ob_metaflow_extensions-1.1.162rc1.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.162rc0.dist-info → ob_metaflow_extensions-1.1.162rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
from metaflow.cards import Markdown, Table, VegaChart
|
|
2
|
+
from metaflow.metaflow_current import current
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from ..card_utilities.async_cards import CardRefresher
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OllamaStatusCard(CardRefresher):
|
|
11
|
+
"""
|
|
12
|
+
Real-time status card for Ollama system monitoring.
|
|
13
|
+
Shows circuit breaker state, server health, model status, and recent events.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
CARD_ID = "ollama_status"
|
|
17
|
+
|
|
18
|
+
def __init__(self, refresh_interval=10):
|
|
19
|
+
self.refresh_interval = refresh_interval
|
|
20
|
+
self.status_data = {
|
|
21
|
+
"circuit_breaker": {
|
|
22
|
+
"state": "CLOSED",
|
|
23
|
+
"failure_count": 0,
|
|
24
|
+
"last_failure_time": None,
|
|
25
|
+
"last_open_time": None,
|
|
26
|
+
},
|
|
27
|
+
"server": {
|
|
28
|
+
"status": "Starting",
|
|
29
|
+
"uptime_start": None,
|
|
30
|
+
"restart_attempts": 0,
|
|
31
|
+
"last_health_check": None,
|
|
32
|
+
"health_status": "Unknown",
|
|
33
|
+
},
|
|
34
|
+
"models": {}, # model_name -> {status, pull_time, load_time, etc}
|
|
35
|
+
"performance": {
|
|
36
|
+
"install_time": None,
|
|
37
|
+
"server_startup_time": None,
|
|
38
|
+
"total_initialization_time": None,
|
|
39
|
+
},
|
|
40
|
+
"versions": {
|
|
41
|
+
"ollama_system": "Detecting...",
|
|
42
|
+
"ollama_python": "Detecting...",
|
|
43
|
+
},
|
|
44
|
+
"cache": {
|
|
45
|
+
"policy": "auto",
|
|
46
|
+
"model_status": {}, # model_name -> cache status
|
|
47
|
+
},
|
|
48
|
+
"events": [], # Recent events log
|
|
49
|
+
}
|
|
50
|
+
self._lock = threading.Lock()
|
|
51
|
+
self._already_rendered = False
|
|
52
|
+
|
|
53
|
+
def update_status(self, category, data):
|
|
54
|
+
"""Thread-safe method to update status data"""
|
|
55
|
+
with self._lock:
|
|
56
|
+
if category in self.status_data:
|
|
57
|
+
self.status_data[category].update(data)
|
|
58
|
+
|
|
59
|
+
def add_event(self, event_type, message, timestamp=None):
|
|
60
|
+
"""Add an event to the timeline"""
|
|
61
|
+
if timestamp is None:
|
|
62
|
+
timestamp = datetime.now()
|
|
63
|
+
|
|
64
|
+
with self._lock:
|
|
65
|
+
self.status_data["events"].insert(
|
|
66
|
+
0,
|
|
67
|
+
{
|
|
68
|
+
"type": event_type, # 'info', 'warning', 'error', 'success'
|
|
69
|
+
"message": message,
|
|
70
|
+
"timestamp": timestamp,
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
# Keep only last 10 events
|
|
74
|
+
self.status_data["events"] = self.status_data["events"][:10]
|
|
75
|
+
|
|
76
|
+
def get_circuit_breaker_emoji(self, state):
|
|
77
|
+
"""Get status emoji for circuit breaker state"""
|
|
78
|
+
emoji_map = {"CLOSED": "🟢", "OPEN": "🔴", "HALF_OPEN": "🟡"}
|
|
79
|
+
return emoji_map.get(state, "⚪")
|
|
80
|
+
|
|
81
|
+
def get_uptime_string(self, start_time):
|
|
82
|
+
"""Calculate uptime string"""
|
|
83
|
+
if not start_time:
|
|
84
|
+
return "Not started"
|
|
85
|
+
|
|
86
|
+
uptime = datetime.now() - start_time
|
|
87
|
+
hours, remainder = divmod(int(uptime.total_seconds()), 3600)
|
|
88
|
+
minutes, seconds = divmod(remainder, 60)
|
|
89
|
+
|
|
90
|
+
if hours > 0:
|
|
91
|
+
return f"{hours}h {minutes}m {seconds}s"
|
|
92
|
+
elif minutes > 0:
|
|
93
|
+
return f"{minutes}m {seconds}s"
|
|
94
|
+
else:
|
|
95
|
+
return f"{seconds}s"
|
|
96
|
+
|
|
97
|
+
def on_startup(self, current_card):
|
|
98
|
+
"""Initialize the card when monitoring starts"""
|
|
99
|
+
current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
|
|
100
|
+
current_card.append(Markdown("_Initializing Ollama system..._"))
|
|
101
|
+
current_card.refresh()
|
|
102
|
+
|
|
103
|
+
def render_card_fresh(self, current_card, data):
|
|
104
|
+
"""Render the complete card with all status information"""
|
|
105
|
+
self._already_rendered = True
|
|
106
|
+
current_card.clear()
|
|
107
|
+
|
|
108
|
+
# Header with version information
|
|
109
|
+
current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
|
|
110
|
+
|
|
111
|
+
# Version information in header
|
|
112
|
+
versions = data.get("versions", {})
|
|
113
|
+
system_version = versions.get("ollama_system", "Unknown")
|
|
114
|
+
python_version = versions.get("ollama_python", "Unknown")
|
|
115
|
+
current_card.append(
|
|
116
|
+
Markdown(
|
|
117
|
+
f"**System:** `{system_version}` | **Python Client:** `{python_version}`"
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Cache policy information
|
|
122
|
+
cache_info = data.get("cache", {})
|
|
123
|
+
cache_policy = cache_info.get("policy", "auto")
|
|
124
|
+
current_card.append(Markdown(f"**Cache Policy:** `{cache_policy}`"))
|
|
125
|
+
|
|
126
|
+
current_card.append(
|
|
127
|
+
Markdown(f"_Last updated: {datetime.now().strftime('%H:%M:%S')}_")
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Circuit Breaker Status
|
|
131
|
+
cb_data = data["circuit_breaker"]
|
|
132
|
+
cb_emoji = self.get_circuit_breaker_emoji(cb_data["state"])
|
|
133
|
+
cb_status = f"{cb_emoji} **{cb_data['state']}**"
|
|
134
|
+
if cb_data["failure_count"] > 0:
|
|
135
|
+
cb_status += f" (failures: {cb_data['failure_count']})"
|
|
136
|
+
|
|
137
|
+
# Server Status
|
|
138
|
+
server_data = data["server"]
|
|
139
|
+
uptime = self.get_uptime_string(server_data.get("uptime_start"))
|
|
140
|
+
server_status = f"**{server_data['status']}**"
|
|
141
|
+
if server_data["restart_attempts"] > 0:
|
|
142
|
+
server_status += f" (restarts: {server_data['restart_attempts']})"
|
|
143
|
+
|
|
144
|
+
# Status Overview Table
|
|
145
|
+
status_table = [
|
|
146
|
+
["Circuit Breaker", Markdown(cb_status)],
|
|
147
|
+
["Server Status", Markdown(server_status)],
|
|
148
|
+
["Server Uptime", Markdown(uptime)],
|
|
149
|
+
[
|
|
150
|
+
"Last Health Check",
|
|
151
|
+
Markdown(server_data.get("health_status", "Unknown")),
|
|
152
|
+
],
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
current_card.append(Markdown("## System Status"))
|
|
156
|
+
current_card.append(Table(status_table, headers=["Component", "Status"]))
|
|
157
|
+
|
|
158
|
+
# Models Status
|
|
159
|
+
if data["models"]:
|
|
160
|
+
current_card.append(Markdown("## Models"))
|
|
161
|
+
model_table = []
|
|
162
|
+
cache_model_status = cache_info.get("model_status", {})
|
|
163
|
+
|
|
164
|
+
for model_name, model_info in data["models"].items():
|
|
165
|
+
status = model_info.get("status", "Unknown")
|
|
166
|
+
pull_time = model_info.get("pull_time", "N/A")
|
|
167
|
+
if isinstance(pull_time, (int, float)):
|
|
168
|
+
pull_time = f"{pull_time:.1f}s"
|
|
169
|
+
|
|
170
|
+
# Add cache status indicator
|
|
171
|
+
cache_status = cache_model_status.get(model_name, "unknown")
|
|
172
|
+
cache_emoji = {
|
|
173
|
+
"exists": "💾",
|
|
174
|
+
"missing": "❌",
|
|
175
|
+
"error": "⚠️",
|
|
176
|
+
"unknown": "❓",
|
|
177
|
+
}.get(cache_status, "❓")
|
|
178
|
+
|
|
179
|
+
# Get model metadata
|
|
180
|
+
size_formatted = model_info.get("size_formatted", "Unknown")
|
|
181
|
+
blob_count = model_info.get("blob_count", "Unknown")
|
|
182
|
+
if blob_count == 0:
|
|
183
|
+
blob_count = "Unknown"
|
|
184
|
+
|
|
185
|
+
model_table.append(
|
|
186
|
+
[
|
|
187
|
+
f"{model_name} {cache_emoji}",
|
|
188
|
+
status,
|
|
189
|
+
pull_time,
|
|
190
|
+
size_formatted,
|
|
191
|
+
str(blob_count),
|
|
192
|
+
]
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
current_card.append(
|
|
196
|
+
Table(
|
|
197
|
+
model_table,
|
|
198
|
+
headers=["Model (Cache)", "Status", "Pull Time", "Size", "Blobs"],
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Performance Metrics
|
|
203
|
+
perf_data = data["performance"]
|
|
204
|
+
if any(v is not None for v in perf_data.values()):
|
|
205
|
+
current_card.append(Markdown("## Performance"))
|
|
206
|
+
|
|
207
|
+
# Separate initialization and shutdown metrics
|
|
208
|
+
init_metrics = []
|
|
209
|
+
shutdown_metrics = []
|
|
210
|
+
other_metrics = []
|
|
211
|
+
|
|
212
|
+
for metric, value in perf_data.items():
|
|
213
|
+
if value is not None:
|
|
214
|
+
display_value = value
|
|
215
|
+
if isinstance(value, (int, float)):
|
|
216
|
+
display_value = f"{value:.1f}s"
|
|
217
|
+
|
|
218
|
+
metric_display = metric.replace("_", " ").title()
|
|
219
|
+
|
|
220
|
+
if "shutdown" in metric.lower():
|
|
221
|
+
shutdown_metrics.append([metric_display, display_value])
|
|
222
|
+
elif metric in [
|
|
223
|
+
"install_time",
|
|
224
|
+
"server_startup_time",
|
|
225
|
+
"total_initialization_time",
|
|
226
|
+
]:
|
|
227
|
+
init_metrics.append([metric_display, display_value])
|
|
228
|
+
else:
|
|
229
|
+
other_metrics.append([metric_display, display_value])
|
|
230
|
+
|
|
231
|
+
# Display metrics in organized sections
|
|
232
|
+
if init_metrics:
|
|
233
|
+
current_card.append(Markdown("### Initialization"))
|
|
234
|
+
current_card.append(Table(init_metrics, headers=["Metric", "Duration"]))
|
|
235
|
+
|
|
236
|
+
if shutdown_metrics:
|
|
237
|
+
current_card.append(Markdown("### Shutdown"))
|
|
238
|
+
current_card.append(
|
|
239
|
+
Table(shutdown_metrics, headers=["Metric", "Value"])
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if other_metrics:
|
|
243
|
+
current_card.append(Markdown("### Other"))
|
|
244
|
+
current_card.append(Table(other_metrics, headers=["Metric", "Value"]))
|
|
245
|
+
|
|
246
|
+
# Recent Events
|
|
247
|
+
if data["events"]:
|
|
248
|
+
current_card.append(Markdown("## Recent Events"))
|
|
249
|
+
events_table = []
|
|
250
|
+
for event in data["events"][:5]: # Show last 5 events
|
|
251
|
+
timestamp = event["timestamp"].strftime("%H:%M:%S")
|
|
252
|
+
event_type = event["type"]
|
|
253
|
+
message = event["message"]
|
|
254
|
+
|
|
255
|
+
# Add emoji based on event type
|
|
256
|
+
type_emoji = {
|
|
257
|
+
"info": "ℹ️",
|
|
258
|
+
"success": "✅",
|
|
259
|
+
"warning": "⚠️",
|
|
260
|
+
"error": "❌",
|
|
261
|
+
}.get(event_type, "ℹ️")
|
|
262
|
+
|
|
263
|
+
events_table.append([timestamp, f"{type_emoji} {message}"])
|
|
264
|
+
|
|
265
|
+
current_card.append(Table(events_table, headers=["Time", "Event"]))
|
|
266
|
+
|
|
267
|
+
current_card.refresh()
|
|
268
|
+
|
|
269
|
+
def on_error(self, current_card, error_message):
|
|
270
|
+
"""Handle errors in card rendering"""
|
|
271
|
+
if not self._already_rendered:
|
|
272
|
+
current_card.clear()
|
|
273
|
+
current_card.append(Markdown("# 🦙 `@ollama` Status Dashboard"))
|
|
274
|
+
current_card.append(Markdown(f"## ❌ Error: {str(error_message)}"))
|
|
275
|
+
current_card.refresh()
|
|
276
|
+
|
|
277
|
+
def on_update(self, current_card, data_object):
|
|
278
|
+
"""Update the card with new data"""
|
|
279
|
+
with self._lock:
|
|
280
|
+
current_data = self.status_data.copy()
|
|
281
|
+
|
|
282
|
+
if not self._already_rendered:
|
|
283
|
+
self.render_card_fresh(current_card, current_data)
|
|
284
|
+
else:
|
|
285
|
+
# For frequent updates, we could implement incremental updates here
|
|
286
|
+
# For now, just re-render the whole card
|
|
287
|
+
self.render_card_fresh(current_card, current_data)
|
|
288
|
+
|
|
289
|
+
def sqlite_fetch_func(self, conn):
|
|
290
|
+
"""Required by CardRefresher (which needs a refactor), but we use in-memory data instead"""
|
|
291
|
+
with self._lock:
|
|
292
|
+
return {"status": self.status_data}
|
{ob_metaflow_extensions-1.1.162rc0.dist-info → ob_metaflow_extensions-1.1.162rc1.dist-info}/RECORD
RENAMED
|
@@ -44,10 +44,11 @@ metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py,sha256=bB9AURhRep9PV_-b
|
|
|
44
44
|
metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py,sha256=HKCvYn1Jh8uwLXeUqPNhxgBatq3mXNG5YIUl-zjNlHE,9429
|
|
45
45
|
metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py,sha256=8IPkdvuTZNIqgAAt75gVNn-ydr-Zz2sKC8UX_6pNEKI,7091
|
|
46
46
|
metaflow_extensions/outerbounds/plugins/nvct/utils.py,sha256=U4_Fu8H94j_Bbox7mmMhNnlRhlYHqnK28R5w_TMWEFM,1029
|
|
47
|
-
metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=
|
|
47
|
+
metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=4T8LQqAuh8flSMvYztw6-OPoDoAorcBWhC-vPuuQPbc,9234
|
|
48
48
|
metaflow_extensions/outerbounds/plugins/ollama/constants.py,sha256=hxkTpWEJp1pKHwUcG4EE3-17M6x2CyeMfbeqgUzF9TA,28
|
|
49
49
|
metaflow_extensions/outerbounds/plugins/ollama/exceptions.py,sha256=8Ss296_MGZl1wXAoDNwpH-hsPe6iYLe90Ji1pczNocU,668
|
|
50
|
-
metaflow_extensions/outerbounds/plugins/ollama/ollama.py,sha256=
|
|
50
|
+
metaflow_extensions/outerbounds/plugins/ollama/ollama.py,sha256=C-6Hz8OxsJiB14AAxmunq3P4k7DrmVHsSOxE0xsP-nY,79780
|
|
51
|
+
metaflow_extensions/outerbounds/plugins/ollama/status_card.py,sha256=F5e4McDl28lhtjeUyInkl03bqjr1lgLxWoau8Q9xwBE,10994
|
|
51
52
|
metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py,sha256=oI_C3c64XBm7n88FILqHwn-Nnc5DeT_68I67lM9rXaI,2434
|
|
52
53
|
metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py,sha256=gDHQ2sMIp4NuZSzUspbSd8RGdFAoO5mgZAyFcZ2a51Y,2619
|
|
53
54
|
metaflow_extensions/outerbounds/plugins/secrets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -75,7 +76,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5
|
|
|
75
76
|
metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2zwqkvlmFS6bcfYD_CX6CMko9DHQokMaH1iBshA,47
|
|
76
77
|
metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
|
|
77
78
|
metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
|
|
78
|
-
ob_metaflow_extensions-1.1.
|
|
79
|
-
ob_metaflow_extensions-1.1.
|
|
80
|
-
ob_metaflow_extensions-1.1.
|
|
81
|
-
ob_metaflow_extensions-1.1.
|
|
79
|
+
ob_metaflow_extensions-1.1.162rc1.dist-info/METADATA,sha256=8mNkviY60oPvdh3eMcloqnEKBMoWPoWIaIeqIgzMBOQ,524
|
|
80
|
+
ob_metaflow_extensions-1.1.162rc1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
81
|
+
ob_metaflow_extensions-1.1.162rc1.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
|
|
82
|
+
ob_metaflow_extensions-1.1.162rc1.dist-info/RECORD,,
|
{ob_metaflow_extensions-1.1.162rc0.dist-info → ob_metaflow_extensions-1.1.162rc1.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|