invarlock 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. invarlock/__init__.py +33 -0
  2. invarlock/__main__.py +10 -0
  3. invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
  4. invarlock/_data/runtime/profiles/release.yaml +23 -0
  5. invarlock/_data/runtime/tiers.yaml +76 -0
  6. invarlock/adapters/__init__.py +102 -0
  7. invarlock/adapters/_capabilities.py +45 -0
  8. invarlock/adapters/auto.py +99 -0
  9. invarlock/adapters/base.py +530 -0
  10. invarlock/adapters/base_types.py +85 -0
  11. invarlock/adapters/hf_bert.py +852 -0
  12. invarlock/adapters/hf_gpt2.py +403 -0
  13. invarlock/adapters/hf_llama.py +485 -0
  14. invarlock/adapters/hf_mixin.py +383 -0
  15. invarlock/adapters/hf_onnx.py +112 -0
  16. invarlock/adapters/hf_t5.py +137 -0
  17. invarlock/adapters/py.typed +1 -0
  18. invarlock/assurance/__init__.py +43 -0
  19. invarlock/cli/__init__.py +8 -0
  20. invarlock/cli/__main__.py +8 -0
  21. invarlock/cli/_evidence.py +25 -0
  22. invarlock/cli/_json.py +75 -0
  23. invarlock/cli/adapter_auto.py +162 -0
  24. invarlock/cli/app.py +287 -0
  25. invarlock/cli/commands/__init__.py +26 -0
  26. invarlock/cli/commands/certify.py +403 -0
  27. invarlock/cli/commands/doctor.py +1358 -0
  28. invarlock/cli/commands/explain_gates.py +151 -0
  29. invarlock/cli/commands/export_html.py +100 -0
  30. invarlock/cli/commands/plugins.py +1331 -0
  31. invarlock/cli/commands/report.py +354 -0
  32. invarlock/cli/commands/run.py +4146 -0
  33. invarlock/cli/commands/verify.py +1040 -0
  34. invarlock/cli/config.py +396 -0
  35. invarlock/cli/constants.py +68 -0
  36. invarlock/cli/device.py +92 -0
  37. invarlock/cli/doctor_helpers.py +74 -0
  38. invarlock/cli/errors.py +6 -0
  39. invarlock/cli/overhead_utils.py +60 -0
  40. invarlock/cli/provenance.py +66 -0
  41. invarlock/cli/utils.py +41 -0
  42. invarlock/config.py +56 -0
  43. invarlock/core/__init__.py +62 -0
  44. invarlock/core/abi.py +15 -0
  45. invarlock/core/api.py +274 -0
  46. invarlock/core/auto_tuning.py +317 -0
  47. invarlock/core/bootstrap.py +226 -0
  48. invarlock/core/checkpoint.py +221 -0
  49. invarlock/core/contracts.py +73 -0
  50. invarlock/core/error_utils.py +64 -0
  51. invarlock/core/events.py +298 -0
  52. invarlock/core/exceptions.py +95 -0
  53. invarlock/core/registry.py +481 -0
  54. invarlock/core/retry.py +146 -0
  55. invarlock/core/runner.py +2041 -0
  56. invarlock/core/types.py +154 -0
  57. invarlock/edits/__init__.py +12 -0
  58. invarlock/edits/_edit_utils.py +249 -0
  59. invarlock/edits/_external_utils.py +268 -0
  60. invarlock/edits/noop.py +47 -0
  61. invarlock/edits/py.typed +1 -0
  62. invarlock/edits/quant_rtn.py +801 -0
  63. invarlock/edits/registry.py +166 -0
  64. invarlock/eval/__init__.py +23 -0
  65. invarlock/eval/bench.py +1207 -0
  66. invarlock/eval/bootstrap.py +50 -0
  67. invarlock/eval/data.py +2052 -0
  68. invarlock/eval/metrics.py +2167 -0
  69. invarlock/eval/primary_metric.py +767 -0
  70. invarlock/eval/probes/__init__.py +24 -0
  71. invarlock/eval/probes/fft.py +139 -0
  72. invarlock/eval/probes/mi.py +213 -0
  73. invarlock/eval/probes/post_attention.py +323 -0
  74. invarlock/eval/providers/base.py +67 -0
  75. invarlock/eval/providers/seq2seq.py +111 -0
  76. invarlock/eval/providers/text_lm.py +113 -0
  77. invarlock/eval/providers/vision_text.py +93 -0
  78. invarlock/eval/py.typed +1 -0
  79. invarlock/guards/__init__.py +18 -0
  80. invarlock/guards/_contracts.py +9 -0
  81. invarlock/guards/invariants.py +640 -0
  82. invarlock/guards/policies.py +805 -0
  83. invarlock/guards/py.typed +1 -0
  84. invarlock/guards/rmt.py +2097 -0
  85. invarlock/guards/spectral.py +1419 -0
  86. invarlock/guards/tier_config.py +354 -0
  87. invarlock/guards/variance.py +3298 -0
  88. invarlock/guards_ref/__init__.py +15 -0
  89. invarlock/guards_ref/rmt_ref.py +40 -0
  90. invarlock/guards_ref/spectral_ref.py +135 -0
  91. invarlock/guards_ref/variance_ref.py +60 -0
  92. invarlock/model_profile.py +353 -0
  93. invarlock/model_utils.py +221 -0
  94. invarlock/observability/__init__.py +10 -0
  95. invarlock/observability/alerting.py +535 -0
  96. invarlock/observability/core.py +546 -0
  97. invarlock/observability/exporters.py +565 -0
  98. invarlock/observability/health.py +588 -0
  99. invarlock/observability/metrics.py +457 -0
  100. invarlock/observability/py.typed +1 -0
  101. invarlock/observability/utils.py +553 -0
  102. invarlock/plugins/__init__.py +12 -0
  103. invarlock/plugins/hello_guard.py +33 -0
  104. invarlock/plugins/hf_awq_adapter.py +82 -0
  105. invarlock/plugins/hf_bnb_adapter.py +79 -0
  106. invarlock/plugins/hf_gptq_adapter.py +78 -0
  107. invarlock/plugins/py.typed +1 -0
  108. invarlock/py.typed +1 -0
  109. invarlock/reporting/__init__.py +7 -0
  110. invarlock/reporting/certificate.py +3221 -0
  111. invarlock/reporting/certificate_schema.py +244 -0
  112. invarlock/reporting/dataset_hashing.py +215 -0
  113. invarlock/reporting/guards_analysis.py +948 -0
  114. invarlock/reporting/html.py +32 -0
  115. invarlock/reporting/normalizer.py +235 -0
  116. invarlock/reporting/policy_utils.py +517 -0
  117. invarlock/reporting/primary_metric_utils.py +265 -0
  118. invarlock/reporting/render.py +1442 -0
  119. invarlock/reporting/report.py +903 -0
  120. invarlock/reporting/report_types.py +278 -0
  121. invarlock/reporting/utils.py +175 -0
  122. invarlock/reporting/validate.py +631 -0
  123. invarlock/security.py +176 -0
  124. invarlock/sparsity_utils.py +323 -0
  125. invarlock/utils/__init__.py +150 -0
  126. invarlock/utils/digest.py +45 -0
  127. invarlock-0.2.0.dist-info/METADATA +586 -0
  128. invarlock-0.2.0.dist-info/RECORD +132 -0
  129. invarlock-0.2.0.dist-info/WHEEL +5 -0
  130. invarlock-0.2.0.dist-info/entry_points.txt +20 -0
  131. invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
  132. invarlock-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,588 @@
1
+ """
2
+ Health checking and status monitoring.
3
+ """
4
+
5
+ import logging
6
+ import time
7
+ import traceback
8
+ from collections.abc import Callable
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ from typing import Any
12
+
13
+ import psutil
14
+ import torch
15
+
16
+
17
+ class HealthStatus(Enum):
18
+ """Health status levels."""
19
+
20
+ HEALTHY = "healthy"
21
+ WARNING = "warning"
22
+ CRITICAL = "critical"
23
+ UNKNOWN = "unknown"
24
+
25
+
26
+ @dataclass
27
+ class ComponentHealth:
28
+ """Health status for a component."""
29
+
30
+ name: str
31
+ status: HealthStatus
32
+ message: str
33
+ details: dict[str, Any]
34
+ timestamp: float
35
+
36
+ @property
37
+ def healthy(self) -> bool:
38
+ """Check if component is healthy."""
39
+ return self.status == HealthStatus.HEALTHY
40
+
41
+ def to_dict(self) -> dict[str, Any]:
42
+ """Convert to dictionary."""
43
+ return {
44
+ "name": self.name,
45
+ "status": self.status.value,
46
+ "message": self.message,
47
+ "details": self.details,
48
+ "timestamp": self.timestamp,
49
+ "healthy": self.healthy,
50
+ }
51
+
52
+
53
+ class HealthChecker:
54
+ """System health monitoring."""
55
+
56
+ def __init__(self):
57
+ self.logger = logging.getLogger(__name__)
58
+ self.health_checks: dict[str, Callable[[], ComponentHealth]] = {}
59
+ self.last_results: dict[str, ComponentHealth] = {}
60
+
61
+ # Register default health checks
62
+ self._register_default_checks()
63
+
64
+ def register_check(self, name: str, check_func: Callable[[], ComponentHealth]):
65
+ """Register a health check function."""
66
+ self.health_checks[name] = check_func
67
+ self.logger.info(f"Registered health check: {name}")
68
+
69
+ def check_component(self, name: str) -> ComponentHealth:
70
+ """Check health of a specific component."""
71
+ if name not in self.health_checks:
72
+ return ComponentHealth(
73
+ name=name,
74
+ status=HealthStatus.UNKNOWN,
75
+ message=f"No health check registered for {name}",
76
+ details={},
77
+ timestamp=time.time(),
78
+ )
79
+
80
+ try:
81
+ result = self.health_checks[name]()
82
+ self.last_results[name] = result
83
+ return result
84
+ except Exception as e:
85
+ error_result = ComponentHealth(
86
+ name=name,
87
+ status=HealthStatus.CRITICAL,
88
+ message=f"Health check failed: {str(e)}",
89
+ details={"error": str(e), "traceback": traceback.format_exc()},
90
+ timestamp=time.time(),
91
+ )
92
+ self.last_results[name] = error_result
93
+ return error_result
94
+
95
+ def check_all(self) -> dict[str, ComponentHealth]:
96
+ """Check health of all registered components."""
97
+ results = {}
98
+ for name in self.health_checks:
99
+ results[name] = self.check_component(name)
100
+ return results
101
+
102
+ def get_overall_status(self) -> HealthStatus:
103
+ """Get overall system health status."""
104
+ if not self.last_results:
105
+ return HealthStatus.UNKNOWN
106
+
107
+ statuses = [result.status for result in self.last_results.values()]
108
+
109
+ if HealthStatus.CRITICAL in statuses:
110
+ return HealthStatus.CRITICAL
111
+ elif HealthStatus.WARNING in statuses:
112
+ return HealthStatus.WARNING
113
+ elif all(status == HealthStatus.HEALTHY for status in statuses):
114
+ return HealthStatus.HEALTHY
115
+ else:
116
+ return HealthStatus.UNKNOWN
117
+
118
+ def get_summary(self) -> dict[str, Any]:
119
+ """Get health summary."""
120
+ overall_status = self.get_overall_status()
121
+
122
+ status_counts = {status.value: 0 for status in HealthStatus}
123
+ for result in self.last_results.values():
124
+ status_counts[result.status.value] += 1
125
+
126
+ return {
127
+ "overall_status": overall_status.value,
128
+ "total_components": len(self.health_checks),
129
+ "status_counts": status_counts,
130
+ "last_check": max([r.timestamp for r in self.last_results.values()])
131
+ if self.last_results
132
+ else 0,
133
+ "components": {
134
+ name: result.to_dict() for name, result in self.last_results.items()
135
+ },
136
+ }
137
+
138
+ def _register_default_checks(self):
139
+ """Register default system health checks."""
140
+
141
+ def check_memory():
142
+ """Check system memory usage."""
143
+ try:
144
+ memory = psutil.virtual_memory()
145
+ percent = memory.percent
146
+
147
+ if percent > 90:
148
+ status = HealthStatus.CRITICAL
149
+ message = f"Critical memory usage: {percent:.1f}%"
150
+ elif percent > 80:
151
+ status = HealthStatus.WARNING
152
+ message = f"High memory usage: {percent:.1f}%"
153
+ else:
154
+ status = HealthStatus.HEALTHY
155
+ message = f"Memory usage normal: {percent:.1f}%"
156
+
157
+ return ComponentHealth(
158
+ name="memory",
159
+ status=status,
160
+ message=message,
161
+ details={
162
+ "percent": percent,
163
+ "available_gb": memory.available / (1024**3),
164
+ "used_gb": memory.used / (1024**3),
165
+ "total_gb": memory.total / (1024**3),
166
+ },
167
+ timestamp=time.time(),
168
+ )
169
+ except Exception as e:
170
+ return ComponentHealth(
171
+ name="memory",
172
+ status=HealthStatus.CRITICAL,
173
+ message=f"Failed to check memory: {e}",
174
+ details={"error": str(e)},
175
+ timestamp=time.time(),
176
+ )
177
+
178
+ def check_cpu():
179
+ """Check CPU usage."""
180
+ try:
181
+ cpu_percent = psutil.cpu_percent(interval=1)
182
+
183
+ if cpu_percent > 95:
184
+ status = HealthStatus.CRITICAL
185
+ message = f"Critical CPU usage: {cpu_percent:.1f}%"
186
+ elif cpu_percent > 85:
187
+ status = HealthStatus.WARNING
188
+ message = f"High CPU usage: {cpu_percent:.1f}%"
189
+ else:
190
+ status = HealthStatus.HEALTHY
191
+ message = f"CPU usage normal: {cpu_percent:.1f}%"
192
+
193
+ return ComponentHealth(
194
+ name="cpu",
195
+ status=status,
196
+ message=message,
197
+ details={
198
+ "percent": cpu_percent,
199
+ "core_count": psutil.cpu_count(),
200
+ "load_avg": psutil.getloadavg()
201
+ if hasattr(psutil, "getloadavg")
202
+ else None,
203
+ },
204
+ timestamp=time.time(),
205
+ )
206
+ except Exception as e:
207
+ return ComponentHealth(
208
+ name="cpu",
209
+ status=HealthStatus.CRITICAL,
210
+ message=f"Failed to check CPU: {e}",
211
+ details={"error": str(e)},
212
+ timestamp=time.time(),
213
+ )
214
+
215
+ def check_disk():
216
+ """Check disk space."""
217
+ try:
218
+ disk = psutil.disk_usage("/")
219
+ percent = (disk.used / disk.total) * 100
220
+
221
+ if percent > 95:
222
+ status = HealthStatus.CRITICAL
223
+ message = f"Critical disk usage: {percent:.1f}%"
224
+ elif percent > 85:
225
+ status = HealthStatus.WARNING
226
+ message = f"High disk usage: {percent:.1f}%"
227
+ else:
228
+ status = HealthStatus.HEALTHY
229
+ message = f"Disk usage normal: {percent:.1f}%"
230
+
231
+ return ComponentHealth(
232
+ name="disk",
233
+ status=status,
234
+ message=message,
235
+ details={
236
+ "percent": percent,
237
+ "free_gb": disk.free / (1024**3),
238
+ "used_gb": disk.used / (1024**3),
239
+ "total_gb": disk.total / (1024**3),
240
+ },
241
+ timestamp=time.time(),
242
+ )
243
+ except Exception as e:
244
+ return ComponentHealth(
245
+ name="disk",
246
+ status=HealthStatus.CRITICAL,
247
+ message=f"Failed to check disk: {e}",
248
+ details={"error": str(e)},
249
+ timestamp=time.time(),
250
+ )
251
+
252
+ def check_gpu():
253
+ """Check GPU status."""
254
+ try:
255
+ if not torch.cuda.is_available():
256
+ return ComponentHealth(
257
+ name="gpu",
258
+ status=HealthStatus.HEALTHY,
259
+ message="GPU not available (CPU-only mode)",
260
+ details={"cuda_available": False},
261
+ timestamp=time.time(),
262
+ )
263
+
264
+ gpu_count = torch.cuda.device_count()
265
+ gpu_details = {}
266
+ max_memory_percent = 0
267
+
268
+ for i in range(gpu_count):
269
+ props = torch.cuda.get_device_properties(i)
270
+ memory_stats = torch.cuda.memory_stats(i)
271
+
272
+ allocated = memory_stats.get("allocated_bytes.all.current", 0)
273
+ total = props.total_memory
274
+ percent = (allocated / total) * 100
275
+ max_memory_percent = max(max_memory_percent, percent)
276
+
277
+ gpu_details[f"gpu_{i}"] = {
278
+ "name": props.name,
279
+ "memory_allocated_gb": allocated / (1024**3),
280
+ "memory_total_gb": total / (1024**3),
281
+ "memory_percent": percent,
282
+ }
283
+
284
+ if max_memory_percent > 95:
285
+ status = HealthStatus.CRITICAL
286
+ message = f"Critical GPU memory usage: {max_memory_percent:.1f}%"
287
+ elif max_memory_percent > 85:
288
+ status = HealthStatus.WARNING
289
+ message = f"High GPU memory usage: {max_memory_percent:.1f}%"
290
+ else:
291
+ status = HealthStatus.HEALTHY
292
+ message = (
293
+ f"GPU status normal: {max_memory_percent:.1f}% memory used"
294
+ )
295
+
296
+ return ComponentHealth(
297
+ name="gpu",
298
+ status=status,
299
+ message=message,
300
+ details={
301
+ "cuda_available": True,
302
+ "device_count": gpu_count,
303
+ "max_memory_percent": max_memory_percent,
304
+ "devices": gpu_details,
305
+ },
306
+ timestamp=time.time(),
307
+ )
308
+ except Exception as e:
309
+ return ComponentHealth(
310
+ name="gpu",
311
+ status=HealthStatus.WARNING,
312
+ message=f"Failed to check GPU: {e}",
313
+ details={"error": str(e)},
314
+ timestamp=time.time(),
315
+ )
316
+
317
+ def check_pytorch():
318
+ """Check PyTorch availability and functionality."""
319
+ try:
320
+ # Basic PyTorch functionality test
321
+ test_tensor = torch.randn(10, 10)
322
+ torch.mm(test_tensor, test_tensor.t())
323
+
324
+ details = {
325
+ "version": torch.__version__,
326
+ "cuda_available": torch.cuda.is_available(),
327
+ "cuda_version": torch.version.cuda
328
+ if torch.cuda.is_available()
329
+ else None,
330
+ "device_count": torch.cuda.device_count()
331
+ if torch.cuda.is_available()
332
+ else 0,
333
+ }
334
+
335
+ # Check for MPS (Apple Silicon) availability
336
+ if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
337
+ details["mps_available"] = True
338
+
339
+ return ComponentHealth(
340
+ name="pytorch",
341
+ status=HealthStatus.HEALTHY,
342
+ message="PyTorch working correctly",
343
+ details=details,
344
+ timestamp=time.time(),
345
+ )
346
+ except Exception as e:
347
+ return ComponentHealth(
348
+ name="pytorch",
349
+ status=HealthStatus.CRITICAL,
350
+ message=f"PyTorch check failed: {e}",
351
+ details={"error": str(e)},
352
+ timestamp=time.time(),
353
+ )
354
+
355
+ # Register all default checks
356
+ self.register_check("memory", check_memory)
357
+ self.register_check("cpu", check_cpu)
358
+ self.register_check("disk", check_disk)
359
+ self.register_check("gpu", check_gpu)
360
+ self.register_check("pytorch", check_pytorch)
361
+
362
+
363
+ class InvarLockHealthChecker(HealthChecker):
364
+ """InvarLock-specific health checker with additional checks."""
365
+
366
+ def __init__(self):
367
+ super().__init__()
368
+ self._register_invarlock_checks()
369
+
370
+ def _register_invarlock_checks(self):
371
+ """Register InvarLock-specific health checks."""
372
+
373
+ def check_adapters():
374
+ """Check adapter availability."""
375
+ try:
376
+ from invarlock.adapters import (
377
+ HF_BERT_Adapter,
378
+ HF_GPT2_Adapter,
379
+ HF_LLaMA_Adapter,
380
+ )
381
+
382
+ adapters = {
383
+ "hf_gpt2": HF_GPT2_Adapter,
384
+ "hf_llama": HF_LLaMA_Adapter,
385
+ "hf_bert": HF_BERT_Adapter,
386
+ }
387
+
388
+ available_adapters = []
389
+ failed_adapters = []
390
+
391
+ for name, adapter_class in adapters.items():
392
+ try:
393
+ adapter_class()
394
+ available_adapters.append(name)
395
+ except Exception as e:
396
+ failed_adapters.append({"name": name, "error": str(e)})
397
+
398
+ if not available_adapters:
399
+ status = HealthStatus.CRITICAL
400
+ message = "No adapters available"
401
+ elif failed_adapters:
402
+ status = HealthStatus.WARNING
403
+ message = (
404
+ f"Some adapters failed: {[f['name'] for f in failed_adapters]}"
405
+ )
406
+ else:
407
+ status = HealthStatus.HEALTHY
408
+ message = f"All adapters available: {available_adapters}"
409
+
410
+ return ComponentHealth(
411
+ name="adapters",
412
+ status=status,
413
+ message=message,
414
+ details={
415
+ "available": available_adapters,
416
+ "failed": failed_adapters,
417
+ "total_adapters": len(adapters),
418
+ },
419
+ timestamp=time.time(),
420
+ )
421
+ except Exception as e:
422
+ return ComponentHealth(
423
+ name="adapters",
424
+ status=HealthStatus.CRITICAL,
425
+ message=f"Failed to check adapters: {e}",
426
+ details={"error": str(e)},
427
+ timestamp=time.time(),
428
+ )
429
+
430
+ def check_guards():
431
+ """Check guard system availability."""
432
+ try:
433
+ from invarlock.guards import (
434
+ InvariantsGuard,
435
+ RMTGuard,
436
+ SpectralGuard,
437
+ VarianceGuard,
438
+ )
439
+
440
+ guards = {
441
+ "spectral": SpectralGuard,
442
+ "rmt": RMTGuard,
443
+ "invariants": InvariantsGuard,
444
+ "variance": VarianceGuard,
445
+ }
446
+
447
+ available_guards = []
448
+ failed_guards = []
449
+
450
+ for name, guard_class in guards.items():
451
+ try:
452
+ if name == "variance":
453
+ # Variance guard needs a policy
454
+ from invarlock.guards.policies import get_variance_policy
455
+
456
+ guard_class(get_variance_policy("balanced"))
457
+ else:
458
+ guard_class()
459
+ available_guards.append(name)
460
+ except Exception as e:
461
+ failed_guards.append({"name": name, "error": str(e)})
462
+
463
+ if not available_guards:
464
+ status = HealthStatus.CRITICAL
465
+ message = "No guards available"
466
+ elif failed_guards:
467
+ status = HealthStatus.WARNING
468
+ message = (
469
+ f"Some guards failed: {[f['name'] for f in failed_guards]}"
470
+ )
471
+ else:
472
+ status = HealthStatus.HEALTHY
473
+ message = f"All guards available: {available_guards}"
474
+
475
+ return ComponentHealth(
476
+ name="guards",
477
+ status=status,
478
+ message=message,
479
+ details={
480
+ "available": available_guards,
481
+ "failed": failed_guards,
482
+ "total_guards": len(guards),
483
+ },
484
+ timestamp=time.time(),
485
+ )
486
+ except Exception as e:
487
+ return ComponentHealth(
488
+ name="guards",
489
+ status=HealthStatus.CRITICAL,
490
+ message=f"Failed to check guards: {e}",
491
+ details={"error": str(e)},
492
+ timestamp=time.time(),
493
+ )
494
+
495
+ def check_dependencies():
496
+ """Check critical dependencies."""
497
+ try:
498
+ dependencies = {
499
+ "torch": "torch",
500
+ "transformers": "transformers",
501
+ "numpy": "numpy",
502
+ "psutil": "psutil",
503
+ }
504
+
505
+ available_deps = []
506
+ missing_deps = []
507
+
508
+ for name, module_name in dependencies.items():
509
+ try:
510
+ __import__(module_name)
511
+ available_deps.append(name)
512
+ except ImportError:
513
+ missing_deps.append(name)
514
+
515
+ if missing_deps:
516
+ if "torch" in missing_deps:
517
+ status = HealthStatus.CRITICAL
518
+ message = f"Critical dependencies missing: {missing_deps}"
519
+ else:
520
+ status = HealthStatus.WARNING
521
+ message = f"Optional dependencies missing: {missing_deps}"
522
+ else:
523
+ status = HealthStatus.HEALTHY
524
+ message = "All dependencies available"
525
+
526
+ return ComponentHealth(
527
+ name="dependencies",
528
+ status=status,
529
+ message=message,
530
+ details={
531
+ "available": available_deps,
532
+ "missing": missing_deps,
533
+ "total_checked": len(dependencies),
534
+ },
535
+ timestamp=time.time(),
536
+ )
537
+ except Exception as e:
538
+ return ComponentHealth(
539
+ name="dependencies",
540
+ status=HealthStatus.CRITICAL,
541
+ message=f"Failed to check dependencies: {e}",
542
+ details={"error": str(e)},
543
+ timestamp=time.time(),
544
+ )
545
+
546
+ # Register InvarLock-specific checks
547
+ self.register_check("adapters", check_adapters)
548
+ self.register_check("guards", check_guards)
549
+ self.register_check("dependencies", check_dependencies)
550
+
551
+
552
+ def create_health_endpoint():
553
+ """Create a simple HTTP health endpoint."""
554
+ try:
555
+ import json
556
+ from http.server import BaseHTTPRequestHandler, HTTPServer
557
+
558
+ health_checker = InvarLockHealthChecker()
559
+
560
+ class HealthHandler(BaseHTTPRequestHandler):
561
+ def do_GET(self):
562
+ if self.path == "/health":
563
+ health_summary = health_checker.get_summary()
564
+
565
+ # Set response code based on overall status
566
+ if health_summary["overall_status"] == "healthy":
567
+ self.send_response(200)
568
+ elif health_summary["overall_status"] == "warning":
569
+ self.send_response(200) # Still OK, just warnings
570
+ else:
571
+ self.send_response(503) # Service unavailable
572
+
573
+ self.send_header("Content-type", "application/json")
574
+ self.end_headers()
575
+
576
+ response = json.dumps(health_summary, indent=2)
577
+ self.wfile.write(response.encode())
578
+ else:
579
+ self.send_response(404)
580
+ self.end_headers()
581
+
582
+ def log_message(self, format, *args):
583
+ # Suppress default logging
584
+ pass
585
+
586
+ return HTTPServer, HealthHandler
587
+ except ImportError:
588
+ return None, None