ob-metaflow-extensions 1.1.162rc0__py2.py3-none-any.whl → 1.1.162rc1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -7,6 +7,9 @@ import os
7
7
  import functools
8
8
  import json
9
9
  import requests
10
+ from enum import Enum
11
+ import threading
12
+ from datetime import datetime
10
13
 
11
14
  from .constants import OLLAMA_SUFFIX
12
15
  from .exceptions import (
@@ -22,6 +25,420 @@ class ProcessStatus:
22
25
  SUCCESSFUL = "SUCCESSFUL"
23
26
 
24
27
 
28
+ class CircuitBreakerState(Enum):
29
+ CLOSED = "CLOSED"
30
+ OPEN = "OPEN"
31
+ HALF_OPEN = "HALF_OPEN"
32
+
33
+
34
+ class CircuitBreaker:
35
+ def __init__(
36
+ self,
37
+ failure_threshold,
38
+ recovery_timeout,
39
+ reset_timeout,
40
+ debug=False,
41
+ status_card=None,
42
+ ):
43
+ self.failure_threshold = failure_threshold
44
+ self.recovery_timeout = recovery_timeout
45
+ self.reset_timeout = reset_timeout
46
+ self.state = CircuitBreakerState.CLOSED
47
+ self.failure_count = 0
48
+ self.last_failure_time = None
49
+ self.last_open_time = None
50
+ self.debug = debug
51
+ self.status_card = status_card
52
+ self.lock = threading.Lock()
53
+ self.request_count = 0 # Track total requests for pattern detection
54
+
55
+ if self.debug:
56
+ print(
57
+ f"[@ollama] CircuitBreaker initialized: threshold={failure_threshold}, recovery={recovery_timeout}, reset={reset_timeout}"
58
+ )
59
+
60
+ def _log_state_change(self, new_state):
61
+ if self.debug:
62
+ print(
63
+ f"[@ollama] Circuit Breaker state change: {self.state.value} -> {new_state.value}"
64
+ )
65
+ self.state = new_state
66
+ self._update_status_card()
67
+
68
+ def _update_status_card(self):
69
+ """Update the status card with current circuit breaker state"""
70
+ if self.status_card:
71
+ self.status_card.update_status(
72
+ "circuit_breaker",
73
+ {
74
+ "state": self.state.value,
75
+ "failure_count": self.failure_count,
76
+ "last_failure_time": self.last_failure_time,
77
+ "last_open_time": self.last_open_time,
78
+ },
79
+ )
80
+
81
+ def record_success(self):
82
+ with self.lock:
83
+ self.request_count += 1
84
+ if self.state == CircuitBreakerState.HALF_OPEN:
85
+ self._log_state_change(CircuitBreakerState.CLOSED)
86
+ self.failure_count = 0
87
+ elif self.state == CircuitBreakerState.OPEN:
88
+ # Allow transition to HALF_OPEN on success - server might have recovered
89
+ self._log_state_change(CircuitBreakerState.HALF_OPEN)
90
+ if self.debug:
91
+ print(
92
+ f"[@ollama] Success recorded while circuit OPEN. Transitioning to HALF_OPEN for testing."
93
+ )
94
+ self.failure_count = 0
95
+ self.last_failure_time = None
96
+
97
+ # Log request count milestone for pattern detection
98
+ if self.debug and self.request_count % 100 == 0:
99
+ print(f"[@ollama] Request count: {self.request_count}")
100
+
101
+ self._update_status_card()
102
+
103
+ def record_failure(self):
104
+ with self.lock:
105
+ self.request_count += 1
106
+ self.failure_count += 1
107
+ self.last_failure_time = time.time()
108
+ if (
109
+ self.failure_count >= self.failure_threshold
110
+ and self.state == CircuitBreakerState.CLOSED
111
+ ):
112
+ self._log_state_change(CircuitBreakerState.OPEN)
113
+ self.last_open_time = time.time()
114
+ elif self.state == CircuitBreakerState.HALF_OPEN:
115
+ # If we fail while testing recovery, go back to OPEN
116
+ self._log_state_change(CircuitBreakerState.OPEN)
117
+ self.last_open_time = time.time()
118
+ if self.debug:
119
+ print(
120
+ f"[@ollama] Failure recorded. Count: {self.failure_count}, State: {self.state.value}, Total requests: {self.request_count}"
121
+ )
122
+ self._update_status_card()
123
+
124
+ def should_attempt_reset(self):
125
+ """Check if we should attempt to reset/restart Ollama based on reset_timeout"""
126
+ with self.lock:
127
+ if self.state == CircuitBreakerState.OPEN and self.last_open_time:
128
+ elapsed_time = time.time() - self.last_open_time
129
+ return elapsed_time > self.reset_timeout
130
+ return False
131
+
132
+ def is_request_allowed(self):
133
+ with self.lock:
134
+ if self.state == CircuitBreakerState.OPEN:
135
+ elapsed_time = time.time() - self.last_open_time
136
+ if elapsed_time > self.recovery_timeout:
137
+ self._log_state_change(CircuitBreakerState.HALF_OPEN)
138
+ if self.debug:
139
+ print(
140
+ f"[@ollama] Circuit Breaker transitioning to HALF_OPEN after {elapsed_time:.1f}s."
141
+ )
142
+ return True # Allow a single request to test recovery
143
+ else:
144
+ if self.debug:
145
+ print(
146
+ f"[@ollama] Circuit Breaker is OPEN. Not allowing request. Time until HALF_OPEN: {self.recovery_timeout - elapsed_time:.1f}s"
147
+ )
148
+ return False
149
+ elif self.state == CircuitBreakerState.HALF_OPEN:
150
+ # In HALF_OPEN, be more restrictive - only allow one request at a time
151
+ if self.debug:
152
+ print(
153
+ f"[@ollama] Circuit Breaker is HALF_OPEN. Allowing request to test recovery."
154
+ )
155
+ return True
156
+ else: # CLOSED
157
+ return True
158
+
159
+ def get_status(self):
160
+ with self.lock:
161
+ status = {
162
+ "state": self.state.value,
163
+ "failure_count": self.failure_count,
164
+ "last_failure_time": self.last_failure_time,
165
+ "last_open_time": self.last_open_time,
166
+ }
167
+ if self.debug:
168
+ print(f"[@ollama] Circuit Breaker status: {status}")
169
+ return status
170
+
171
+
172
+ class TimeoutCommand:
173
+ def __init__(self, command, timeout, debug=False, **kwargs):
174
+ self.command = command
175
+ self.timeout = timeout
176
+ self.debug = debug
177
+ self.input_data = kwargs.pop("input", None) # Remove input from kwargs
178
+ self.kwargs = kwargs
179
+
180
+ def run(self):
181
+ if self.debug:
182
+ print(
183
+ f"[@ollama] Executing command with timeout {self.timeout}s: {' '.join(self.command)}"
184
+ )
185
+ try:
186
+ process = subprocess.Popen(
187
+ self.command,
188
+ stdout=subprocess.PIPE,
189
+ stderr=subprocess.PIPE,
190
+ text=True,
191
+ **self.kwargs,
192
+ )
193
+ stdout, stderr = process.communicate(
194
+ input=self.input_data, timeout=self.timeout
195
+ )
196
+ return process.returncode, stdout, stderr
197
+ except subprocess.TimeoutExpired:
198
+ if self.debug:
199
+ print(
200
+ f"[@ollama] Command timed out after {self.timeout}s: {' '.join(self.command)}"
201
+ )
202
+ process.kill()
203
+ stdout, stderr = process.communicate()
204
+ return (
205
+ 124,
206
+ stdout,
207
+ stderr,
208
+ ) # 124 is the standard exit code for `timeout` command
209
+ except Exception as e:
210
+ if self.debug:
211
+ print(
212
+ f"[@ollama] Error executing command {' '.join(self.command)}: {e}"
213
+ )
214
+ return 1, "", str(e)
215
+
216
+
217
+ class OllamaHealthChecker:
218
+ def __init__(self, ollama_url, circuit_breaker, ollama_manager, debug=False):
219
+ self.ollama_url = ollama_url
220
+ self.circuit_breaker = circuit_breaker
221
+ self.ollama_manager = ollama_manager
222
+ self.debug = debug
223
+ self._stop_event = threading.Event()
224
+ self._thread = None
225
+ self._interval = 30 # Check every 30 seconds (less aggressive)
226
+
227
+ def _check_health(self):
228
+ try:
229
+ health_timeout = self.ollama_manager.timeouts.get("health_check", 5)
230
+ if self.debug:
231
+ print(f"[@ollama] Health check: Pinging {self.ollama_url}/api/tags")
232
+ response = requests.get(
233
+ f"{self.ollama_url}/api/tags", timeout=health_timeout
234
+ )
235
+ if response.status_code == 200:
236
+ self.circuit_breaker.record_success()
237
+ self._update_server_health_status("Healthy")
238
+ if self.debug:
239
+ print(
240
+ f"[@ollama] Health check successful. Circuit state: {self.circuit_breaker.state.value}"
241
+ )
242
+ return True
243
+ else:
244
+ if self.debug:
245
+ print(
246
+ f"[@ollama] Health check failed. Status code: {response.status_code}. Circuit state: {self.circuit_breaker.state.value}"
247
+ )
248
+ self.circuit_breaker.record_failure()
249
+ self._update_server_health_status(
250
+ f"Unhealthy (HTTP {response.status_code})"
251
+ )
252
+ return False
253
+ except requests.exceptions.RequestException as e:
254
+ if self.debug:
255
+ print(
256
+ f"[@ollama] Health check exception: {e}. Circuit state: {self.circuit_breaker.state.value}"
257
+ )
258
+ self.circuit_breaker.record_failure()
259
+ self._update_server_health_status(f"Unhealthy ({str(e)[:50]})")
260
+ return False
261
+
262
+ def _update_server_health_status(self, status):
263
+ """Update server health status in the status card"""
264
+ if self.ollama_manager.status_card:
265
+ self.ollama_manager.status_card.update_status(
266
+ "server", {"health_status": status, "last_health_check": datetime.now()}
267
+ )
268
+
269
+ def _run_health_check_loop(self):
270
+ while not self._stop_event.is_set():
271
+ # Always perform health check to monitor server status
272
+ self._check_health()
273
+
274
+ # Check if we should attempt a restart based on reset_timeout
275
+ if self.circuit_breaker.should_attempt_reset():
276
+ try:
277
+ if self.debug:
278
+ print(
279
+ "[@ollama] Circuit breaker reset timeout reached. Attempting restart..."
280
+ )
281
+ restart_success = self.ollama_manager._attempt_ollama_restart()
282
+ if restart_success:
283
+ if self.debug:
284
+ print("[@ollama] Restart successful via health checker")
285
+ else:
286
+ if self.debug:
287
+ print("[@ollama] Restart failed via health checker")
288
+ except Exception as e:
289
+ if self.debug:
290
+ print(
291
+ f"[@ollama] Error during health checker restart attempt: {e}"
292
+ )
293
+
294
+ self._stop_event.wait(self._interval)
295
+
296
+ def start(self):
297
+ if self._thread is None or not self._thread.is_alive():
298
+ self._stop_event.clear()
299
+ self._thread = threading.Thread(
300
+ target=self._run_health_check_loop, daemon=True
301
+ )
302
+ self._thread.start()
303
+ if self.debug:
304
+ print("[@ollama] OllamaHealthChecker started.")
305
+
306
+ def stop(self):
307
+ if self._thread and self._thread.is_alive():
308
+ self._stop_event.set()
309
+ self._thread.join(timeout=self._interval + 1) # Wait for thread to finish
310
+ if self.debug:
311
+ print("[@ollama] OllamaHealthChecker stopped.")
312
+
313
+
314
+ class OllamaRequestInterceptor:
315
+ def __init__(self, circuit_breaker, debug=False):
316
+ self.circuit_breaker = circuit_breaker
317
+ self.debug = debug
318
+ self.original_methods = {}
319
+ self._protection_installed = False
320
+
321
+ def install_protection(self):
322
+ """Install request protection by monkey-patching the ollama package"""
323
+ if self._protection_installed:
324
+ return
325
+
326
+ try:
327
+ import ollama # Import the actual ollama package
328
+
329
+ # Store original methods
330
+ self.original_methods = {
331
+ "chat": getattr(ollama, "chat", None),
332
+ "generate": getattr(ollama, "generate", None),
333
+ "embeddings": getattr(ollama, "embeddings", None),
334
+ }
335
+
336
+ # Replace with protected versions
337
+ if hasattr(ollama, "chat"):
338
+ ollama.chat = self._protected_chat
339
+ if hasattr(ollama, "generate"):
340
+ ollama.generate = self._protected_generate
341
+ if hasattr(ollama, "embeddings"):
342
+ ollama.embeddings = self._protected_embeddings
343
+
344
+ self._protection_installed = True
345
+ if self.debug:
346
+ print(
347
+ "[@ollama] Request protection installed on ollama package methods"
348
+ )
349
+
350
+ except ImportError:
351
+ if self.debug:
352
+ print(
353
+ "[@ollama] Warning: Could not import ollama package for request protection"
354
+ )
355
+ except Exception as e:
356
+ if self.debug:
357
+ print(f"[@ollama] Error installing request protection: {e}")
358
+
359
+ def remove_protection(self):
360
+ """Remove request protection by restoring original methods"""
361
+ if not self._protection_installed:
362
+ return
363
+
364
+ try:
365
+ import ollama
366
+
367
+ # Restore original methods
368
+ for method_name, original_method in self.original_methods.items():
369
+ if original_method is not None and hasattr(ollama, method_name):
370
+ setattr(ollama, method_name, original_method)
371
+
372
+ self._protection_installed = False
373
+ if self.debug:
374
+ print("[@ollama] Request protection removed")
375
+
376
+ except Exception as e:
377
+ if self.debug:
378
+ print(f"[@ollama] Error removing request protection: {e}")
379
+
380
+ def _protected_chat(self, *args, **kwargs):
381
+ if not self.circuit_breaker.is_request_allowed():
382
+ raise RuntimeError(
383
+ f"Ollama server is currently unavailable. Circuit Breaker is {self.circuit_breaker.state.value}. "
384
+ "Please wait or check Ollama server status. "
385
+ f"Current status: {self.circuit_breaker.get_status()}"
386
+ )
387
+ try:
388
+ if self.debug:
389
+ # Debug: log model being used in request
390
+ model_name = kwargs.get("model", "unknown")
391
+ if args and isinstance(args[0], dict) and "model" in args[0]:
392
+ model_name = args[0]["model"]
393
+ print(f"[@ollama] DEBUG: Making chat request with model: {model_name}")
394
+ print(f"[@ollama] DEBUG: Request args: {args}")
395
+ print(f"[@ollama] DEBUG: Request kwargs keys: {list(kwargs.keys())}")
396
+
397
+ result = self.original_methods["chat"](*args, **kwargs)
398
+ self.circuit_breaker.record_success()
399
+ return result
400
+ except Exception as e:
401
+ if self.debug:
402
+ print(f"[@ollama] Protected chat call failed: {e}")
403
+ print(f"[@ollama] DEBUG: Exception type: {type(e)}")
404
+ self.circuit_breaker.record_failure()
405
+ raise
406
+
407
+ def _protected_generate(self, *args, **kwargs):
408
+ if not self.circuit_breaker.is_request_allowed():
409
+ raise RuntimeError(
410
+ f"Ollama server is currently unavailable. Circuit Breaker is {self.circuit_breaker.state.value}. "
411
+ "Please wait or check Ollama server status. "
412
+ f"Current status: {self.circuit_breaker.get_status()}"
413
+ )
414
+ try:
415
+ result = self.original_methods["generate"](*args, **kwargs)
416
+ self.circuit_breaker.record_success()
417
+ return result
418
+ except Exception as e:
419
+ if self.debug:
420
+ print(f"[@ollama] Protected generate call failed: {e}")
421
+ self.circuit_breaker.record_failure()
422
+ raise
423
+
424
+ def _protected_embeddings(self, *args, **kwargs):
425
+ if not self.circuit_breaker.is_request_allowed():
426
+ raise RuntimeError(
427
+ f"Ollama server is currently unavailable. Circuit Breaker is {self.circuit_breaker.state.value}. "
428
+ "Please wait or check Ollama server status. "
429
+ f"Current status: {self.circuit_breaker.get_status()}"
430
+ )
431
+ try:
432
+ result = self.original_methods["embeddings"](*args, **kwargs)
433
+ self.circuit_breaker.record_success()
434
+ return result
435
+ except Exception as e:
436
+ if self.debug:
437
+ print(f"[@ollama] Protected embeddings call failed: {e}")
438
+ self.circuit_breaker.record_failure()
439
+ raise
440
+
441
+
25
442
  class OllamaManager:
26
443
  """
27
444
  A process manager for Ollama runtimes.
@@ -35,8 +452,12 @@ class OllamaManager:
35
452
  flow_datastore_backend=None,
36
453
  remote_storage_root=None,
37
454
  force_pull=False,
38
- skip_push_check=False,
455
+ cache_update_policy="auto",
456
+ force_cache_update=False,
39
457
  debug=False,
458
+ circuit_breaker_config=None,
459
+ timeout_config=None,
460
+ status_card=None,
40
461
  ):
41
462
  self.models = {}
42
463
  self.processes = {}
@@ -52,19 +473,66 @@ class OllamaManager:
52
473
  "Can not determine the storage root, as both flow_datastore_backend and remote_storage_root arguments of OllamaManager are None."
53
474
  )
54
475
  self.force_pull = force_pull
55
- self.skip_push_check = skip_push_check
476
+
477
+ # New cache logic
478
+ self.cache_update_policy = cache_update_policy
479
+ if force_cache_update: # Simple override
480
+ self.cache_update_policy = "force"
481
+ self.cache_status = {} # Track cache status per model
482
+
56
483
  self.debug = debug
57
484
  self.stats = {}
58
485
  self.storage_info = {}
59
486
  self.ollama_url = "http://localhost:11434" # Ollama API base URL
487
+ self.status_card = status_card
488
+ self.initialization_start = time.time()
60
489
 
61
490
  if backend != "local":
62
491
  raise ValueError(
63
492
  "OllamaManager only supports the 'local' backend at this time."
64
493
  )
65
494
 
495
+ # Validate and set up circuit breaker config
496
+ if circuit_breaker_config is None:
497
+ circuit_breaker_config = {
498
+ "failure_threshold": 3,
499
+ "recovery_timeout": 30, # Reduced from 60s - faster testing
500
+ "reset_timeout": 60, # Reduced from 300s - faster restart
501
+ }
502
+
503
+ # Set up timeout configuration
504
+ if timeout_config is None:
505
+ timeout_config = {
506
+ "pull": 600, # 10 minutes for model pulls
507
+ "stop": 30, # 30 seconds for model stops
508
+ "health_check": 5, # 5 seconds for health checks
509
+ "install": 60, # 1 minute for Ollama installation
510
+ "server_startup": 300, # 5 minutes for server startup
511
+ }
512
+ self.timeouts = timeout_config
513
+
514
+ # Initialize Circuit Breaker and Health Checker
515
+ self.circuit_breaker = CircuitBreaker(
516
+ failure_threshold=circuit_breaker_config.get("failure_threshold", 3),
517
+ recovery_timeout=circuit_breaker_config.get("recovery_timeout", 30),
518
+ reset_timeout=circuit_breaker_config.get("reset_timeout", 60),
519
+ debug=self.debug,
520
+ status_card=self.status_card,
521
+ )
522
+ self.health_checker = OllamaHealthChecker(
523
+ self.ollama_url, self.circuit_breaker, self, self.debug
524
+ )
525
+
526
+ self._log_event("info", "Starting Ollama initialization")
66
527
  self._timeit(self._install_ollama, "install_ollama")
67
528
  self._timeit(self._launch_server, "launch_server")
529
+ self.health_checker.start()
530
+
531
+ # Collect version information
532
+ self._collect_version_info()
533
+
534
+ # Initialize cache status display
535
+ self._update_cache_status()
68
536
 
69
537
  # Pull models concurrently
70
538
  with ThreadPoolExecutor() as executor:
@@ -75,21 +543,214 @@ class OllamaManager:
75
543
  except Exception as e:
76
544
  raise RuntimeError(f"Error pulling one or more models. {e}") from e
77
545
 
546
+ # Update final cache status
547
+ self._update_cache_status()
548
+
78
549
  # Run models as background processes.
79
550
  for m in models:
80
551
  f = functools.partial(self._run_model, m)
81
552
  self._timeit(f, f"model_{m.lower()}")
82
553
 
554
+ # Record total initialization time
555
+ total_init_time = time.time() - self.initialization_start
556
+ self._update_performance("total_initialization_time", total_init_time)
557
+ self._log_event(
558
+ "success", f"Ollama initialization completed in {total_init_time:.1f}s"
559
+ )
560
+
561
+ def _collect_version_info(self):
562
+ """Collect version information for Ollama system and Python client"""
563
+ version_info = {}
564
+
565
+ # Get Ollama system version
566
+ try:
567
+ result = subprocess.run(
568
+ ["ollama", "--version"], capture_output=True, text=True, timeout=10
569
+ )
570
+ if result.returncode == 0:
571
+ # Extract version from output - handle different formats
572
+ version_line = result.stdout.strip()
573
+ # Common formats: "ollama version 0.1.0", "0.1.0", "v0.1.0"
574
+ if "version" in version_line.lower():
575
+ # Extract everything after "version"
576
+ parts = version_line.lower().split("version")
577
+ if len(parts) > 1:
578
+ version_info["ollama_system"] = parts[1].strip()
579
+ else:
580
+ version_info["ollama_system"] = version_line
581
+ elif version_line.startswith("v"):
582
+ version_info["ollama_system"] = version_line[
583
+ 1:
584
+ ] # Remove 'v' prefix
585
+ else:
586
+ version_info["ollama_system"] = version_line
587
+ else:
588
+ version_info["ollama_system"] = "Unknown"
589
+ except Exception as e:
590
+ version_info["ollama_system"] = "Error detecting"
591
+ if self.debug:
592
+ print(f"[@ollama] Error getting system version: {e}")
593
+
594
+ # Get Python ollama client version
595
+ try:
596
+ import ollama
597
+
598
+ if hasattr(ollama, "__version__"):
599
+ version_info["ollama_python"] = ollama.__version__
600
+ else:
601
+ # Try alternative methods to get version
602
+ try:
603
+ import pkg_resources
604
+
605
+ version_info["ollama_python"] = pkg_resources.get_distribution(
606
+ "ollama"
607
+ ).version
608
+ except:
609
+ try:
610
+ # Try importlib.metadata (Python 3.8+)
611
+ from importlib import metadata
612
+
613
+ version_info["ollama_python"] = metadata.version("ollama")
614
+ except:
615
+ version_info["ollama_python"] = "Unknown"
616
+ except ImportError:
617
+ version_info["ollama_python"] = "Not installed"
618
+ except Exception as e:
619
+ version_info["ollama_python"] = "Error detecting"
620
+ if self.debug:
621
+ print(f"[@ollama] Error getting Python client version: {e}")
622
+
623
+ # Update status card with version info
624
+ if self.status_card:
625
+ self.status_card.update_status("versions", version_info)
626
+ self._log_event(
627
+ "info",
628
+ f"Versions: System {version_info.get('ollama_system', 'Unknown')}, Python {version_info.get('ollama_python', 'Unknown')}",
629
+ )
630
+
631
+ def _check_cache_exists(self, m):
632
+ """Check if cache exists for the given model"""
633
+ if self.local_datastore:
634
+ # Local datastore - no remote cache
635
+ return False
636
+
637
+ if m not in self.storage_info:
638
+ # Storage not set up yet
639
+ return False
640
+
641
+ try:
642
+ from metaflow import S3
643
+ from metaflow.plugins.datatools.s3.s3 import MetaflowS3NotFound
644
+
645
+ with S3() as s3:
646
+ # Check if manifest exists in remote storage
647
+ manifest_exists = s3.get(self.storage_info[m]["manifest_remote"]).exists
648
+
649
+ if manifest_exists:
650
+ self.cache_status[m] = "exists"
651
+ self._update_cache_status()
652
+ return True
653
+ else:
654
+ self.cache_status[m] = "missing"
655
+ self._update_cache_status()
656
+ return False
657
+
658
+ except Exception as e:
659
+ if self.debug:
660
+ print(f"[@ollama {m}] Error checking cache existence: {e}")
661
+ self.cache_status[m] = "error"
662
+ self._update_cache_status()
663
+ return False
664
+
665
+ def _should_update_cache(self, m):
666
+ """Determine if we should update cache for this model based on policy"""
667
+ if self.cache_update_policy == "never":
668
+ return False
669
+ elif self.cache_update_policy == "force":
670
+ return True
671
+ elif self.cache_update_policy == "auto":
672
+ # Only update if cache doesn't exist
673
+ cache_exists = self._check_cache_exists(m)
674
+ return not cache_exists
675
+ else:
676
+ # Unknown policy, default to auto behavior
677
+ cache_exists = self._check_cache_exists(m)
678
+ return not cache_exists
679
+
680
+ def _log_event(self, event_type, message):
681
+ """Log an event to the status card"""
682
+ if self.status_card:
683
+ self.status_card.add_event(event_type, message)
684
+ if self.debug:
685
+ print(f"[@ollama] {event_type.upper()}: {message}")
686
+
687
+ def _update_server_status(self, status, **kwargs):
688
+ """Update server status in the status card"""
689
+ if self.status_card:
690
+ update_data = {"status": status}
691
+ update_data.update(kwargs)
692
+ self.status_card.update_status("server", update_data)
693
+
694
+ def _update_model_status(self, model_name, **kwargs):
695
+ """Update model status in the status card"""
696
+ if self.status_card:
697
+ current_models = self.status_card.status_data.get("models", {})
698
+ if model_name not in current_models:
699
+ current_models[model_name] = {}
700
+ current_models[model_name].update(kwargs)
701
+ self.status_card.update_status("models", current_models)
702
+
703
+ def _update_performance(self, metric, value):
704
+ """Update performance metrics in the status card"""
705
+ if self.status_card:
706
+ self.status_card.update_status("performance", {metric: value})
707
+
708
+ def _update_circuit_breaker_status(self):
709
+ """Update circuit breaker status in the status card"""
710
+ if self.status_card:
711
+ cb_status = self.circuit_breaker.get_status()
712
+ self.status_card.update_status("circuit_breaker", cb_status)
713
+
714
+ def _update_cache_status(self):
715
+ """Update cache status in the status card"""
716
+ if self.status_card:
717
+ self.status_card.update_status(
718
+ "cache",
719
+ {
720
+ "policy": self.cache_update_policy,
721
+ "model_status": self.cache_status.copy(),
722
+ },
723
+ )
724
+
83
725
  def _timeit(self, f, name):
84
726
  t0 = time.time()
85
727
  f()
86
728
  tf = time.time()
87
- self.stats[name] = {"process_runtime": tf - t0}
729
+ duration = tf - t0
730
+ self.stats[name] = {"process_runtime": duration}
731
+
732
+ # Update performance metrics for status card
733
+ if name == "install_ollama":
734
+ self._update_performance("install_time", duration)
735
+ elif name == "launch_server":
736
+ self._update_performance("server_startup_time", duration)
737
+
738
+ def _is_port_open(self, host, port, timeout=1):
739
+ """Check if a TCP port is open on a given host."""
740
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
741
+ sock.settimeout(timeout)
742
+ try:
743
+ sock.connect((host, port))
744
+ return True
745
+ except socket.error:
746
+ return False
88
747
 
89
748
  def _install_ollama(self, max_retries=3):
749
+ self._log_event("info", "Checking for existing Ollama installation")
90
750
  try:
91
751
  result = subprocess.run(["which", "ollama"], capture_output=True, text=True)
92
752
  if result.returncode == 0:
753
+ self._log_event("success", "Ollama is already installed")
93
754
  print("[@ollama] Ollama is already installed.")
94
755
  return
95
756
  except Exception as e:
@@ -100,6 +761,7 @@ class OllamaManager:
100
761
  "On macOS, please install Ollama manually from https://ollama.com/download."
101
762
  )
102
763
 
764
+ self._log_event("info", "Installing Ollama...")
103
765
  if self.debug:
104
766
  print("[@ollama] Installing Ollama...")
105
767
  env = os.environ.copy()
@@ -109,7 +771,7 @@ class OllamaManager:
109
771
  try:
110
772
  install_cmd = ["curl", "-fsSL", "https://ollama.com/install.sh"]
111
773
  curl_proc = subprocess.run(
112
- install_cmd, capture_output=True, text=True, env=env
774
+ install_cmd, capture_output=True, text=True, env=env, timeout=120
113
775
  )
114
776
  if curl_proc.returncode != 0:
115
777
  raise RuntimeError(
@@ -121,38 +783,40 @@ class OllamaManager:
121
783
  capture_output=True,
122
784
  text=True,
123
785
  env=env,
786
+ timeout=self.timeouts.get("install", 60),
124
787
  )
125
788
  if sh_proc.returncode != 0:
126
789
  raise RuntimeError(
127
790
  f"Ollama installation script failed: stdout: {sh_proc.stdout}, stderr: {sh_proc.stderr}"
128
791
  )
792
+ self._log_event("success", "Ollama installation completed successfully")
129
793
  if self.debug:
130
794
  print("[@ollama] Ollama installed successfully.")
131
795
  break
132
796
  except Exception as e:
797
+ self._log_event(
798
+ "warning", f"Installation attempt {attempt+1} failed: {str(e)}"
799
+ )
133
800
  if self.debug:
134
801
  print(f"[@ollama] Installation attempt {attempt+1} failed: {e}")
135
802
  if attempt < max_retries - 1:
136
803
  time.sleep(5)
137
804
  else:
805
+ self._log_event(
806
+ "error",
807
+ f"Ollama installation failed after {max_retries} attempts",
808
+ )
138
809
  raise RuntimeError(
139
810
  f"Error installing Ollama after {max_retries} attempts: {e}"
140
811
  ) from e
141
812
 
142
- def _is_port_open(self, host, port, timeout=1):
143
- """Check if a TCP port is open on a given host."""
144
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
145
- sock.settimeout(timeout)
146
- try:
147
- sock.connect((host, port))
148
- return True
149
- except socket.error:
150
- return False
151
-
152
813
  def _launch_server(self):
153
814
  """
154
815
  Start the Ollama server process and ensure it's running.
155
816
  """
817
+ self._update_server_status("Starting")
818
+ self._log_event("info", "Starting Ollama server...")
819
+
156
820
  try:
157
821
  print("[@ollama] Starting Ollama server...")
158
822
  process = subprocess.Popen(
@@ -181,6 +845,24 @@ class OllamaManager:
181
845
  print("[@ollama] Waiting for server to be ready...")
182
846
  elif retries % 3 == 0:
183
847
  print(f"[@ollama] Still waiting... ({retries + 1}/{max_retries})")
848
+
849
+ # Check if process terminated unexpectedly during startup
850
+ returncode = process.poll()
851
+ if returncode is not None:
852
+ # Process exited, get error details but don't call communicate() which can hang
853
+ error_details = f"Return code: {returncode}"
854
+ self.processes[process.pid]["properties"][
855
+ "error_details"
856
+ ] = error_details
857
+ self.processes[process.pid]["status"] = ProcessStatus.FAILED
858
+ self._update_server_status("Failed", error_details=error_details)
859
+ self._log_event(
860
+ "error", f"Ollama server failed to start: {error_details}"
861
+ )
862
+ raise RuntimeError(
863
+ f"Ollama server failed to start. {error_details}"
864
+ )
865
+
184
866
  time.sleep(5)
185
867
  retries += 1
186
868
 
@@ -192,25 +874,34 @@ class OllamaManager:
192
874
  "error_details"
193
875
  ] = error_details
194
876
  self.processes[process.pid]["status"] = ProcessStatus.FAILED
877
+ self._update_server_status("Failed", error_details=error_details)
878
+ self._log_event("error", f"Server startup timeout: {error_details}")
195
879
  raise RuntimeError(f"Ollama server failed to start. {error_details}")
196
880
 
197
- # Check if process terminated unexpectedly
881
+ # Final check if process terminated unexpectedly
198
882
  returncode = process.poll()
199
883
  if returncode is not None:
200
- stdout, stderr = process.communicate()
201
- error_details = f"Return code: {returncode}, Error: {stderr}"
884
+ error_details = f"Return code: {returncode}"
202
885
  self.processes[process.pid]["properties"][
203
886
  "error_details"
204
887
  ] = error_details
205
888
  self.processes[process.pid]["status"] = ProcessStatus.FAILED
889
+ self._update_server_status("Failed", error_details=error_details)
890
+ self._log_event(
891
+ "error", f"Server process died unexpectedly: {error_details}"
892
+ )
206
893
  raise RuntimeError(f"Ollama server failed to start. {error_details}")
207
894
 
895
+ self._update_server_status("Running", uptime_start=datetime.now())
896
+ self._log_event("success", "Ollama server is ready and listening")
208
897
  print("[@ollama] Server is ready.")
209
898
 
210
899
  except Exception as e:
211
900
  if "process" in locals() and process.pid in self.processes:
212
901
  self.processes[process.pid]["status"] = ProcessStatus.FAILED
213
902
  self.processes[process.pid]["properties"]["error_details"] = str(e)
903
+ self._update_server_status("Failed", error_details=str(e))
904
+ self._log_event("error", f"Error starting Ollama server: {str(e)}")
214
905
  raise RuntimeError(f"Error starting Ollama server: {e}") from e
215
906
 
216
907
  def _setup_storage(self, m):
@@ -436,6 +1127,9 @@ class OllamaManager:
436
1127
  Verify model is available using Ollama API
437
1128
  """
438
1129
  try:
1130
+ if self.debug:
1131
+ print(f"[@ollama] DEBUG: Verifying model availability for: {m}")
1132
+
439
1133
  response = requests.post(
440
1134
  f"{self.ollama_url}/api/show", json={"model": m}, timeout=10
441
1135
  )
@@ -445,10 +1139,30 @@ class OllamaManager:
445
1139
  if self.debug:
446
1140
  if available:
447
1141
  print(f"[@ollama {m}] ✓ Model is available via API.")
1142
+ # Also list all available models for debugging
1143
+ try:
1144
+ tags_response = requests.get(
1145
+ f"{self.ollama_url}/api/tags", timeout=10
1146
+ )
1147
+ if tags_response.status_code == 200:
1148
+ models = tags_response.json().get("models", [])
1149
+ model_names = [
1150
+ model.get("name", "unknown") for model in models
1151
+ ]
1152
+ print(
1153
+ f"[@ollama] DEBUG: All available models: {model_names}"
1154
+ )
1155
+ except Exception as e:
1156
+ print(f"[@ollama] DEBUG: Could not list models: {e}")
448
1157
  else:
449
1158
  print(
450
1159
  f"[@ollama {m}] ✗ Model not available via API (status: {response.status_code})."
451
1160
  )
1161
+ try:
1162
+ error_detail = response.text
1163
+ print(f"[@ollama] DEBUG: Error response: {error_detail}")
1164
+ except:
1165
+ pass
452
1166
 
453
1167
  return available
454
1168
 
@@ -525,8 +1239,44 @@ class OllamaManager:
525
1239
  """
526
1240
  Pull/setup a model, using cache when possible.
527
1241
  """
1242
+ self._update_model_status(m, status="Setting up storage")
1243
+ self._log_event("info", f"Setting up model {m}")
1244
+ pull_start_time = time.time()
1245
+
528
1246
  self._setup_storage(m)
529
1247
 
1248
+ # Check cache existence and inform user about cache strategy
1249
+ cache_exists = self._check_cache_exists(m)
1250
+ will_update_cache = self._should_update_cache(m)
1251
+
1252
+ if cache_exists:
1253
+ if will_update_cache:
1254
+ self._log_event(
1255
+ "info",
1256
+ f"Cache exists for {m}, but will be updated due to {self.cache_update_policy} policy",
1257
+ )
1258
+ print(
1259
+ f"[@ollama {m}] Cache exists but will be updated ({self.cache_update_policy} policy)"
1260
+ )
1261
+ else:
1262
+ self._log_event("info", f"Using existing cache for {m}")
1263
+ print(f"[@ollama {m}] Using existing cache")
1264
+ else:
1265
+ if will_update_cache:
1266
+ self._log_event(
1267
+ "info",
1268
+ f"No cache found for {m}, will populate after successful setup",
1269
+ )
1270
+ print(f"[@ollama {m}] No cache found, will populate cache after setup")
1271
+ else:
1272
+ self._log_event(
1273
+ "info",
1274
+ f"No cache found for {m}, but cache updates disabled ({self.cache_update_policy} policy)",
1275
+ )
1276
+ print(
1277
+ f"[@ollama {m}] No cache found, cache updates disabled ({self.cache_update_policy} policy)"
1278
+ )
1279
+
530
1280
  # Try to fetch manifest from cache first
531
1281
  manifest = None
532
1282
  try:
@@ -539,31 +1289,53 @@ class OllamaManager:
539
1289
  # If we don't have a cached manifest or force_pull is True, pull the model
540
1290
  if self.force_pull or not manifest:
541
1291
  try:
1292
+ self._update_model_status(m, status="Downloading")
1293
+ self._log_event("info", f"Downloading model {m}...")
542
1294
  print(f"[@ollama {m}] Not using cache. Downloading model {m}...")
543
1295
  result = subprocess.run(
544
- ["ollama", "pull", m], capture_output=True, text=True
1296
+ ["ollama", "pull", m],
1297
+ capture_output=True,
1298
+ text=True,
1299
+ timeout=self.timeouts.get("pull", 600),
545
1300
  )
546
1301
  if result.returncode != 0:
1302
+ self._update_model_status(m, status="Failed")
1303
+ self._log_event(
1304
+ "error", f"Failed to pull model {m}: {result.stderr}"
1305
+ )
547
1306
  raise RuntimeError(
548
1307
  f"Failed to pull model {m}: stdout: {result.stdout}, stderr: {result.stderr}"
549
1308
  )
1309
+ pull_time = time.time() - pull_start_time
1310
+ self._update_model_status(m, status="Downloaded", pull_time=pull_time)
1311
+ self._log_event("success", f"Model {m} downloaded in {pull_time:.1f}s")
550
1312
  print(f"[@ollama {m}] Model downloaded successfully.")
551
1313
  except Exception as e:
1314
+ self._update_model_status(m, status="Failed")
1315
+ self._log_event("error", f"Error pulling model {m}: {str(e)}")
552
1316
  raise RuntimeError(f"Error pulling Ollama model {m}: {e}") from e
553
1317
  else:
554
1318
  # We have a cached manifest, try to fetch the blobs
555
1319
  try:
1320
+ self._update_model_status(m, status="Loading from cache")
1321
+ self._log_event("info", f"Loading model {m} from cache")
556
1322
  self._fetch_blobs(m)
557
1323
  print(f"[@ollama {m}] Using cached model.")
558
1324
 
559
1325
  # Register the cached model with Ollama
560
1326
  if not self._verify_model_available(m):
561
1327
  if not self._register_cached_model_with_ollama(m):
1328
+ self._update_model_status(m, status="Failed")
1329
+ self._log_event("error", f"Failed to register cached model {m}")
562
1330
  raise RuntimeError(
563
1331
  f"Failed to register cached model {m} with Ollama"
564
1332
  )
565
1333
 
566
- # self.skip_push_check = True
1334
+ pull_time = time.time() - pull_start_time
1335
+ self._update_model_status(m, status="Cached", pull_time=pull_time)
1336
+ self._log_event(
1337
+ "success", f"Model {m} loaded from cache in {pull_time:.1f}s"
1338
+ )
567
1339
 
568
1340
  except (EmptyOllamaBlobCacheException, Exception) as e:
569
1341
  if self.debug:
@@ -572,25 +1344,62 @@ class OllamaManager:
572
1344
 
573
1345
  # Fallback to pulling the model
574
1346
  try:
1347
+ self._update_model_status(m, status="Downloading (fallback)")
1348
+ self._log_event(
1349
+ "warning", f"Cache failed for {m}, downloading as fallback"
1350
+ )
575
1351
  result = subprocess.run(
576
- ["ollama", "pull", m], capture_output=True, text=True
1352
+ ["ollama", "pull", m],
1353
+ capture_output=True,
1354
+ text=True,
1355
+ timeout=self.timeouts.get("pull", 600),
577
1356
  )
578
1357
  if result.returncode != 0:
1358
+ self._update_model_status(m, status="Failed")
1359
+ self._log_event("error", f"Fallback pull failed for model {m}")
579
1360
  raise RuntimeError(
580
1361
  f"Failed to pull model {m}: stdout: {result.stdout}, stderr: {result.stderr}"
581
1362
  )
1363
+ pull_time = time.time() - pull_start_time
1364
+ self._update_model_status(
1365
+ m, status="Downloaded (fallback)", pull_time=pull_time
1366
+ )
1367
+ self._log_event(
1368
+ "success",
1369
+ f"Model {m} downloaded via fallback in {pull_time:.1f}s",
1370
+ )
582
1371
  print(f"[@ollama {m}] Model downloaded successfully (fallback).")
583
1372
  except Exception as pull_e:
1373
+ self._update_model_status(m, status="Failed")
1374
+ self._log_event(
1375
+ "error",
1376
+ f"Fallback download failed for model {m}: {str(pull_e)}",
1377
+ )
584
1378
  raise RuntimeError(
585
1379
  f"Error pulling Ollama model {m} as fallback: {pull_e}"
586
1380
  ) from pull_e
587
1381
 
588
1382
  # Final verification that the model is available
589
1383
  if not self._verify_model_available(m):
1384
+ self._update_model_status(m, status="Failed")
1385
+ self._log_event("error", f"Model {m} verification failed")
590
1386
  raise RuntimeError(f"Model {m} is not available to Ollama after setup")
591
1387
 
1388
+ # Collect model metadata (size and blob count)
1389
+ metadata = self._collect_model_metadata(m)
1390
+ self._update_model_status(
1391
+ m,
1392
+ status="Ready",
1393
+ size_formatted=metadata["size_formatted"],
1394
+ blob_count=metadata["blob_count"],
1395
+ )
1396
+ self._log_event("success", f"Model {m} setup complete and verified")
592
1397
  if self.debug:
593
1398
  print(f"[@ollama {m}] Model setup complete and verified.")
1399
+ if metadata["size_formatted"] != "Unknown":
1400
+ print(
1401
+ f"[@ollama {m}] Model size: {metadata['size_formatted']}, Blobs: {metadata['blob_count']}"
1402
+ )
594
1403
 
595
1404
  def _run_model(self, m):
596
1405
  """
@@ -598,9 +1407,13 @@ class OllamaManager:
598
1407
  """
599
1408
  process = None
600
1409
  try:
1410
+ self._update_model_status(m, status="Starting process")
1411
+ self._log_event("info", f"Starting model process for {m}")
601
1412
  if self.debug:
602
1413
  print(f"[@ollama {m}] Starting model process...")
603
1414
 
1415
+ # For `ollama run`, we want it to stay running, so no timeout on Popen.
1416
+ # The health checker will detect if it becomes unresponsive.
604
1417
  process = subprocess.Popen(
605
1418
  ["ollama", "run", m],
606
1419
  stdout=subprocess.PIPE,
@@ -616,80 +1429,202 @@ class OllamaManager:
616
1429
  if self.debug:
617
1430
  print(f"[@ollama {m}] Model process PID: {process.pid}.")
618
1431
 
1432
+ # We don't want to wait here indefinitely. Just check if it failed immediately.
1433
+ # The health checker will monitor long-term responsiveness.
619
1434
  try:
620
- process.wait(timeout=1)
621
- except subprocess.TimeoutExpired:
622
- pass
623
-
624
- returncode = process.poll()
625
- if returncode is not None:
626
- stdout, stderr = process.communicate()
627
- if returncode == 0:
628
- self.processes[process.pid]["status"] = ProcessStatus.SUCCESSFUL
629
- if self.debug:
630
- print(
631
- f"[@ollama {m}] Process {process.pid} exited successfully."
632
- )
633
- else:
1435
+ process.wait(timeout=1) # Check if it exited immediately
1436
+ returncode = process.poll()
1437
+ if (
1438
+ returncode is not None and returncode != 0
1439
+ ): # If it exited immediately with an error
1440
+ stdout, stderr = process.communicate()
634
1441
  error_details = f"Return code: {returncode}, Error: {stderr}"
635
1442
  self.processes[process.pid]["properties"][
636
1443
  "error_details"
637
1444
  ] = error_details
638
1445
  self.processes[process.pid]["status"] = ProcessStatus.FAILED
1446
+ self._update_model_status(m, status="Failed")
1447
+ self._log_event(
1448
+ "error",
1449
+ f"Model {m} process failed immediately: {error_details}",
1450
+ )
1451
+ if self.debug:
1452
+ print(
1453
+ f"[@ollama {m}] Process {process.pid} failed immediately: {error_details}."
1454
+ )
1455
+ raise RuntimeError(
1456
+ f"Ollama model {m} failed to start immediately: {error_details}"
1457
+ )
1458
+ elif returncode == 0:
1459
+ # This case should ideally not happen for a long-running model
639
1460
  if self.debug:
640
1461
  print(
641
- f"[@ollama {m}] Process {process.pid} failed: {error_details}."
1462
+ f"[@ollama {m}] Process {process.pid} exited immediately with success. This might be unexpected for a model process."
642
1463
  )
1464
+ self.processes[process.pid]["status"] = ProcessStatus.SUCCESSFUL
1465
+
1466
+ except subprocess.TimeoutExpired:
1467
+ # This is the expected case: process is running and hasn't exited
1468
+ self._update_model_status(m, status="Running")
1469
+ self._log_event("success", f"Model {m} process started successfully")
1470
+ if self.debug:
1471
+ print(
1472
+ f"[@ollama {m}] Model process {process.pid} is running in background."
1473
+ )
1474
+ pass # Process is still running, which is good
1475
+
643
1476
  except Exception as e:
644
1477
  if process and process.pid in self.processes:
645
1478
  self.processes[process.pid]["status"] = ProcessStatus.FAILED
646
1479
  self.processes[process.pid]["properties"]["error_details"] = str(e)
1480
+ self._update_model_status(m, status="Failed")
1481
+ self._log_event("error", f"Error running model {m}: {str(e)}")
647
1482
  raise RuntimeError(f"Error running Ollama model {m}: {e}") from e
648
1483
 
649
1484
  def terminate_models(self, skip_push_check=None):
650
1485
  """
651
1486
  Terminate all processes gracefully and update cache.
652
1487
  """
1488
+ shutdown_start_time = time.time()
1489
+ self._log_event("info", "Starting Ollama shutdown sequence")
653
1490
  print("[@ollama] Shutting down models...")
654
1491
 
1492
+ # Stop the health checker first
1493
+ self.health_checker.stop()
1494
+
1495
+ # Handle backward compatibility for skip_push_check parameter
655
1496
  if skip_push_check is not None:
656
- assert isinstance(
657
- skip_push_check, bool
658
- ), "skip_push_check passed to terminate_models must be a bool if specified."
659
- self.skip_push_check = skip_push_check
1497
+ # Legacy parameter provided
1498
+ if skip_push_check:
1499
+ self.cache_update_policy = "never"
1500
+ self._log_event(
1501
+ "warning",
1502
+ "Using legacy skip_push_check=True, setting cache policy to 'never'",
1503
+ )
1504
+ else:
1505
+ self.cache_update_policy = "force"
1506
+ self._log_event(
1507
+ "warning",
1508
+ "Using legacy skip_push_check=False, setting cache policy to 'force'",
1509
+ )
660
1510
 
1511
+ # Shutdown models
1512
+ model_shutdown_results = {}
661
1513
  for pid, process_info in list(self.processes.items()):
662
1514
  if process_info["properties"].get("type") == "model":
663
1515
  model_name = process_info["properties"].get("model")
1516
+ model_shutdown_start = time.time()
1517
+ shutdown_cause = "graceful"
664
1518
 
1519
+ self._update_model_status(model_name, status="Stopping")
1520
+ self._log_event("info", f"Stopping model {model_name}")
665
1521
  if self.debug:
666
1522
  print(f"[@ollama {model_name}] Stopping model process...")
667
1523
 
668
1524
  try:
669
1525
  result = subprocess.run(
670
- ["ollama", "stop", model_name], capture_output=True, text=True
1526
+ ["ollama", "stop", model_name],
1527
+ capture_output=True,
1528
+ text=True,
1529
+ timeout=self.timeouts.get("stop", 30),
671
1530
  )
672
1531
  if result.returncode == 0:
673
1532
  process_info["status"] = ProcessStatus.SUCCESSFUL
1533
+ self._update_model_status(model_name, status="Stopped")
1534
+ self._log_event(
1535
+ "success", f"Model {model_name} stopped gracefully"
1536
+ )
674
1537
  if self.debug:
675
1538
  print(f"[@ollama {model_name}] Stopped successfully.")
676
1539
  else:
677
1540
  process_info["status"] = ProcessStatus.FAILED
1541
+ shutdown_cause = "force_kill"
1542
+ self._update_model_status(model_name, status="Force stopped")
1543
+ self._log_event(
1544
+ "warning",
1545
+ f"Model {model_name} stop command failed, killing process",
1546
+ )
678
1547
  if self.debug:
679
1548
  print(
680
- f"[@ollama {model_name}] Stop failed: {result.stderr}"
1549
+ f"[@ollama {model_name}] Stop failed: {result.stderr}. Attempting to kill process directly."
1550
+ )
1551
+ # Fallback: if 'ollama stop' fails, try to kill the process directly
1552
+ try:
1553
+ process_info["p"].terminate()
1554
+ process_info["p"].wait(timeout=5)
1555
+ if process_info["p"].poll() is None:
1556
+ process_info["p"].kill()
1557
+ process_info["p"].wait()
1558
+ process_info["status"] = ProcessStatus.SUCCESSFUL
1559
+ self._update_model_status(model_name, status="Killed")
1560
+ self._log_event(
1561
+ "warning", f"Model {model_name} process killed directly"
681
1562
  )
1563
+ if self.debug:
1564
+ print(
1565
+ f"[@ollama {model_name}] Process killed directly."
1566
+ )
1567
+ except Exception as kill_e:
1568
+ process_info["status"] = ProcessStatus.FAILED
1569
+ shutdown_cause = "failed"
1570
+ self._update_model_status(
1571
+ model_name, status="Failed to stop"
1572
+ )
1573
+ self._log_event(
1574
+ "error",
1575
+ f"Model {model_name} failed to stop: {str(kill_e)}",
1576
+ )
1577
+ print(
1578
+ f"[@ollama {model_name}] Error killing process directly: {kill_e}"
1579
+ )
1580
+
682
1581
  except Exception as e:
683
1582
  process_info["status"] = ProcessStatus.FAILED
1583
+ shutdown_cause = "failed"
1584
+ self._update_model_status(model_name, status="Failed to stop")
1585
+ self._log_event(
1586
+ "error", f"Model {model_name} shutdown error: {str(e)}"
1587
+ )
684
1588
  print(f"[@ollama {model_name}] Error stopping: {e}")
685
1589
 
686
- # Update cache if needed
687
- if not self.skip_push_check:
1590
+ # Record model shutdown timing
1591
+ model_shutdown_time = time.time() - model_shutdown_start
1592
+ model_shutdown_results[model_name] = {
1593
+ "shutdown_time": model_shutdown_time,
1594
+ "shutdown_cause": shutdown_cause,
1595
+ }
1596
+
1597
+ # Smart cache update logic
1598
+ should_update = self._should_update_cache(model_name)
1599
+ if should_update:
1600
+ self._log_event(
1601
+ "info",
1602
+ f"Updating cache for {model_name} ({self.cache_update_policy} policy)",
1603
+ )
688
1604
  self._update_model_cache(model_name)
1605
+ else:
1606
+ cache_reason = f"policy is '{self.cache_update_policy}'"
1607
+ if (
1608
+ self.cache_update_policy == "auto"
1609
+ and self.cache_status.get(model_name) == "exists"
1610
+ ):
1611
+ cache_reason = "cache already exists"
1612
+ self._log_event(
1613
+ "info",
1614
+ f"Skipping cache update for {model_name} ({cache_reason})",
1615
+ )
1616
+ if self.debug:
1617
+ print(
1618
+ f"[@ollama {model_name}] Skipping cache update: {cache_reason}"
1619
+ )
689
1620
 
690
1621
  # Stop the API server
1622
+ server_shutdown_cause = "graceful"
1623
+ server_shutdown_start = time.time()
691
1624
  for pid, process_info in list(self.processes.items()):
692
1625
  if process_info["properties"].get("type") == "api-server":
1626
+ self._update_server_status("Stopping")
1627
+ self._log_event("info", "Stopping Ollama API server")
693
1628
  if self.debug:
694
1629
  print(f"[@ollama] Stopping API server process PID {pid}.")
695
1630
 
@@ -699,6 +1634,11 @@ class OllamaManager:
699
1634
  try:
700
1635
  process.wait(timeout=5)
701
1636
  except subprocess.TimeoutExpired:
1637
+ server_shutdown_cause = "force_kill"
1638
+ self._log_event(
1639
+ "warning",
1640
+ "API server did not terminate gracefully, killing...",
1641
+ )
702
1642
  print(
703
1643
  f"[@ollama] API server PID {pid} did not terminate, killing..."
704
1644
  )
@@ -706,12 +1646,40 @@ class OllamaManager:
706
1646
  process.wait()
707
1647
 
708
1648
  process_info["status"] = ProcessStatus.SUCCESSFUL
1649
+ self._update_server_status("Stopped")
1650
+ self._log_event(
1651
+ "success", f"API server stopped ({server_shutdown_cause})"
1652
+ )
709
1653
  if self.debug:
710
1654
  print(f"[@ollama] API server terminated successfully.")
711
1655
  except Exception as e:
712
1656
  process_info["status"] = ProcessStatus.FAILED
1657
+ server_shutdown_cause = "failed"
1658
+ self._update_server_status("Failed to stop")
1659
+ self._log_event("error", f"API server shutdown error: {str(e)}")
713
1660
  print(f"[@ollama] Warning: Error terminating API server: {e}")
714
1661
 
1662
+ # Record total shutdown time and performance metrics
1663
+ total_shutdown_time = time.time() - shutdown_start_time
1664
+ server_shutdown_time = time.time() - server_shutdown_start
1665
+
1666
+ # Update performance metrics
1667
+ self._update_performance("server_shutdown_time", server_shutdown_time)
1668
+ self._update_performance("total_shutdown_time", total_shutdown_time)
1669
+ self._update_performance("shutdown_cause", server_shutdown_cause)
1670
+
1671
+ # Log individual model shutdown times
1672
+ for model_name, results in model_shutdown_results.items():
1673
+ self._update_performance(
1674
+ f"{model_name}_shutdown_time", results["shutdown_time"]
1675
+ )
1676
+ self._update_performance(
1677
+ f"{model_name}_shutdown_cause", results["shutdown_cause"]
1678
+ )
1679
+
1680
+ self._log_event(
1681
+ "success", f"Ollama shutdown completed in {total_shutdown_time:.1f}s"
1682
+ )
715
1683
  print("[@ollama] All models stopped.")
716
1684
 
717
1685
  # Show performance summary
@@ -811,3 +1779,146 @@ class OllamaManager:
811
1779
  else:
812
1780
  self.local_datastore = True
813
1781
  return None
1782
+
1783
+ def _attempt_ollama_restart(self):
1784
+ """Attempt to restart Ollama when circuit breaker suggests it"""
1785
+ try:
1786
+ print("[@ollama] Attempting Ollama restart due to circuit breaker...")
1787
+
1788
+ # Stop existing server processes
1789
+ server_stopped = False
1790
+ for pid, process_info in list(self.processes.items()):
1791
+ if process_info["properties"].get("type") == "api-server":
1792
+ process = process_info["p"]
1793
+ try:
1794
+ process.terminate()
1795
+ process.wait(timeout=10)
1796
+ if process.poll() is None:
1797
+ process.kill()
1798
+ process.wait()
1799
+ process_info["status"] = ProcessStatus.SUCCESSFUL
1800
+ server_stopped = True
1801
+ if self.debug:
1802
+ print(
1803
+ f"[@ollama] Stopped server process {pid} during restart"
1804
+ )
1805
+ except Exception as e:
1806
+ if self.debug:
1807
+ print(
1808
+ f"[@ollama] Error stopping server {pid} during restart: {e}"
1809
+ )
1810
+
1811
+ if not server_stopped:
1812
+ if self.debug:
1813
+ print("[@ollama] No server process found to stop during restart")
1814
+
1815
+ # Small delay to ensure cleanup
1816
+ time.sleep(2)
1817
+
1818
+ # Restart server
1819
+ self._launch_server()
1820
+
1821
+ # Verify health with multiple attempts
1822
+ health_attempts = 3
1823
+ for attempt in range(health_attempts):
1824
+ if self._verify_server_health():
1825
+ print("[@ollama] Restart successful")
1826
+ return True
1827
+ else:
1828
+ if attempt < health_attempts - 1:
1829
+ if self.debug:
1830
+ print(
1831
+ f"[@ollama] Health check failed, attempt {attempt + 1}/{health_attempts}"
1832
+ )
1833
+ time.sleep(5)
1834
+
1835
+ print(
1836
+ "[@ollama] Restart failed - server not healthy after multiple attempts"
1837
+ )
1838
+ return False
1839
+
1840
+ except Exception as e:
1841
+ print(f"[@ollama] Restart failed: {e}")
1842
+ return False
1843
+
1844
+ def _verify_server_health(self):
1845
+ """Quick health check for server availability"""
1846
+ try:
1847
+ response = requests.get(
1848
+ f"{self.ollama_url}/api/tags",
1849
+ timeout=self.timeouts.get("health_check", 5),
1850
+ )
1851
+ return response.status_code == 200
1852
+ except Exception:
1853
+ return False
1854
+
1855
+ def _collect_model_metadata(self, m):
1856
+ """
1857
+ Collect model metadata including size and blob count from manifest and API
1858
+ """
1859
+ metadata = {"size_bytes": None, "size_formatted": "Unknown", "blob_count": 0}
1860
+
1861
+ try:
1862
+ # First try to get info from manifest (works for cached models)
1863
+ manifest = self._fetch_manifest(m)
1864
+ if manifest and "layers" in manifest:
1865
+ metadata["blob_count"] = len(manifest["layers"])
1866
+
1867
+ # Calculate total size from manifest layers
1868
+ total_size = 0
1869
+ for layer in manifest["layers"]:
1870
+ if "size" in layer:
1871
+ total_size += layer["size"]
1872
+
1873
+ if total_size > 0:
1874
+ metadata["size_bytes"] = total_size
1875
+ metadata["size_formatted"] = self._format_bytes(total_size)
1876
+
1877
+ # Try to get more detailed info from Ollama API if available
1878
+ try:
1879
+ response = requests.post(
1880
+ f"{self.ollama_url}/api/show", json={"model": m}, timeout=10
1881
+ )
1882
+ if response.status_code == 200:
1883
+ model_info = response.json()
1884
+
1885
+ # Extract size if available in the response
1886
+ if (
1887
+ "details" in model_info
1888
+ and "parameter_size" in model_info["details"]
1889
+ ):
1890
+ # Sometimes the API returns parameter size info
1891
+ param_size = model_info["details"]["parameter_size"]
1892
+ if self.debug:
1893
+ print(f"[@ollama {m}] Parameter size: {param_size}")
1894
+
1895
+ # If we get model_info but didn't have manifest info, try to get layer count
1896
+ if metadata["blob_count"] == 0 and "details" in model_info:
1897
+ details = model_info["details"]
1898
+ if "families" in details or "family" in details:
1899
+ # API response structure varies, estimate blob count
1900
+ metadata["blob_count"] = "API"
1901
+
1902
+ except Exception as api_e:
1903
+ if self.debug:
1904
+ print(f"[@ollama {m}] Could not get API metadata: {api_e}")
1905
+
1906
+ except Exception as e:
1907
+ if self.debug:
1908
+ print(f"[@ollama {m}] Error collecting model metadata: {e}")
1909
+
1910
+ return metadata
1911
+
1912
+ def _format_bytes(self, bytes_count):
1913
+ """Format bytes into human-readable string"""
1914
+ if bytes_count is None:
1915
+ return "Unknown"
1916
+
1917
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
1918
+ if bytes_count < 1024.0:
1919
+ if unit == "B":
1920
+ return f"{int(bytes_count)} {unit}"
1921
+ else:
1922
+ return f"{bytes_count:.1f} {unit}"
1923
+ bytes_count /= 1024.0
1924
+ return f"{bytes_count:.1f} PB"