invarlock 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. invarlock/__init__.py +33 -0
  2. invarlock/__main__.py +10 -0
  3. invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
  4. invarlock/_data/runtime/profiles/release.yaml +23 -0
  5. invarlock/_data/runtime/tiers.yaml +76 -0
  6. invarlock/adapters/__init__.py +102 -0
  7. invarlock/adapters/_capabilities.py +45 -0
  8. invarlock/adapters/auto.py +99 -0
  9. invarlock/adapters/base.py +530 -0
  10. invarlock/adapters/base_types.py +85 -0
  11. invarlock/adapters/hf_bert.py +852 -0
  12. invarlock/adapters/hf_gpt2.py +403 -0
  13. invarlock/adapters/hf_llama.py +485 -0
  14. invarlock/adapters/hf_mixin.py +383 -0
  15. invarlock/adapters/hf_onnx.py +112 -0
  16. invarlock/adapters/hf_t5.py +137 -0
  17. invarlock/adapters/py.typed +1 -0
  18. invarlock/assurance/__init__.py +43 -0
  19. invarlock/cli/__init__.py +8 -0
  20. invarlock/cli/__main__.py +8 -0
  21. invarlock/cli/_evidence.py +25 -0
  22. invarlock/cli/_json.py +75 -0
  23. invarlock/cli/adapter_auto.py +162 -0
  24. invarlock/cli/app.py +287 -0
  25. invarlock/cli/commands/__init__.py +26 -0
  26. invarlock/cli/commands/certify.py +403 -0
  27. invarlock/cli/commands/doctor.py +1358 -0
  28. invarlock/cli/commands/explain_gates.py +151 -0
  29. invarlock/cli/commands/export_html.py +100 -0
  30. invarlock/cli/commands/plugins.py +1331 -0
  31. invarlock/cli/commands/report.py +354 -0
  32. invarlock/cli/commands/run.py +4146 -0
  33. invarlock/cli/commands/verify.py +1040 -0
  34. invarlock/cli/config.py +396 -0
  35. invarlock/cli/constants.py +68 -0
  36. invarlock/cli/device.py +92 -0
  37. invarlock/cli/doctor_helpers.py +74 -0
  38. invarlock/cli/errors.py +6 -0
  39. invarlock/cli/overhead_utils.py +60 -0
  40. invarlock/cli/provenance.py +66 -0
  41. invarlock/cli/utils.py +41 -0
  42. invarlock/config.py +56 -0
  43. invarlock/core/__init__.py +62 -0
  44. invarlock/core/abi.py +15 -0
  45. invarlock/core/api.py +274 -0
  46. invarlock/core/auto_tuning.py +317 -0
  47. invarlock/core/bootstrap.py +226 -0
  48. invarlock/core/checkpoint.py +221 -0
  49. invarlock/core/contracts.py +73 -0
  50. invarlock/core/error_utils.py +64 -0
  51. invarlock/core/events.py +298 -0
  52. invarlock/core/exceptions.py +95 -0
  53. invarlock/core/registry.py +481 -0
  54. invarlock/core/retry.py +146 -0
  55. invarlock/core/runner.py +2041 -0
  56. invarlock/core/types.py +154 -0
  57. invarlock/edits/__init__.py +12 -0
  58. invarlock/edits/_edit_utils.py +249 -0
  59. invarlock/edits/_external_utils.py +268 -0
  60. invarlock/edits/noop.py +47 -0
  61. invarlock/edits/py.typed +1 -0
  62. invarlock/edits/quant_rtn.py +801 -0
  63. invarlock/edits/registry.py +166 -0
  64. invarlock/eval/__init__.py +23 -0
  65. invarlock/eval/bench.py +1207 -0
  66. invarlock/eval/bootstrap.py +50 -0
  67. invarlock/eval/data.py +2052 -0
  68. invarlock/eval/metrics.py +2167 -0
  69. invarlock/eval/primary_metric.py +767 -0
  70. invarlock/eval/probes/__init__.py +24 -0
  71. invarlock/eval/probes/fft.py +139 -0
  72. invarlock/eval/probes/mi.py +213 -0
  73. invarlock/eval/probes/post_attention.py +323 -0
  74. invarlock/eval/providers/base.py +67 -0
  75. invarlock/eval/providers/seq2seq.py +111 -0
  76. invarlock/eval/providers/text_lm.py +113 -0
  77. invarlock/eval/providers/vision_text.py +93 -0
  78. invarlock/eval/py.typed +1 -0
  79. invarlock/guards/__init__.py +18 -0
  80. invarlock/guards/_contracts.py +9 -0
  81. invarlock/guards/invariants.py +640 -0
  82. invarlock/guards/policies.py +805 -0
  83. invarlock/guards/py.typed +1 -0
  84. invarlock/guards/rmt.py +2097 -0
  85. invarlock/guards/spectral.py +1419 -0
  86. invarlock/guards/tier_config.py +354 -0
  87. invarlock/guards/variance.py +3298 -0
  88. invarlock/guards_ref/__init__.py +15 -0
  89. invarlock/guards_ref/rmt_ref.py +40 -0
  90. invarlock/guards_ref/spectral_ref.py +135 -0
  91. invarlock/guards_ref/variance_ref.py +60 -0
  92. invarlock/model_profile.py +353 -0
  93. invarlock/model_utils.py +221 -0
  94. invarlock/observability/__init__.py +10 -0
  95. invarlock/observability/alerting.py +535 -0
  96. invarlock/observability/core.py +546 -0
  97. invarlock/observability/exporters.py +565 -0
  98. invarlock/observability/health.py +588 -0
  99. invarlock/observability/metrics.py +457 -0
  100. invarlock/observability/py.typed +1 -0
  101. invarlock/observability/utils.py +553 -0
  102. invarlock/plugins/__init__.py +12 -0
  103. invarlock/plugins/hello_guard.py +33 -0
  104. invarlock/plugins/hf_awq_adapter.py +82 -0
  105. invarlock/plugins/hf_bnb_adapter.py +79 -0
  106. invarlock/plugins/hf_gptq_adapter.py +78 -0
  107. invarlock/plugins/py.typed +1 -0
  108. invarlock/py.typed +1 -0
  109. invarlock/reporting/__init__.py +7 -0
  110. invarlock/reporting/certificate.py +3221 -0
  111. invarlock/reporting/certificate_schema.py +244 -0
  112. invarlock/reporting/dataset_hashing.py +215 -0
  113. invarlock/reporting/guards_analysis.py +948 -0
  114. invarlock/reporting/html.py +32 -0
  115. invarlock/reporting/normalizer.py +235 -0
  116. invarlock/reporting/policy_utils.py +517 -0
  117. invarlock/reporting/primary_metric_utils.py +265 -0
  118. invarlock/reporting/render.py +1442 -0
  119. invarlock/reporting/report.py +903 -0
  120. invarlock/reporting/report_types.py +278 -0
  121. invarlock/reporting/utils.py +175 -0
  122. invarlock/reporting/validate.py +631 -0
  123. invarlock/security.py +176 -0
  124. invarlock/sparsity_utils.py +323 -0
  125. invarlock/utils/__init__.py +150 -0
  126. invarlock/utils/digest.py +45 -0
  127. invarlock-0.2.0.dist-info/METADATA +586 -0
  128. invarlock-0.2.0.dist-info/RECORD +132 -0
  129. invarlock-0.2.0.dist-info/WHEEL +5 -0
  130. invarlock-0.2.0.dist-info/entry_points.txt +20 -0
  131. invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
  132. invarlock-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,221 @@
1
+ """
2
+ InvarLock Core Checkpoint System
3
+ ===========================
4
+
5
+ Checkpoint and rollback functionality for safe model editing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import shutil
12
+ from contextlib import contextmanager
13
+ from typing import Any
14
+
15
+ from .types import GuardOutcome
16
+
17
+
18
+ def _use_chunked_snapshot() -> bool:
19
+ """Return True when chunked snapshot mode is enabled."""
20
+ return os.environ.get("INVARLOCK_SNAPSHOT_MODE", "bytes").lower() == "chunked"
21
+
22
+
23
+ class PolicyCheckpoint:
24
+ """
25
+ Checkpoint manager for policy-based rollback decisions.
26
+ """
27
+
28
+ def __init__(self, model: Any, adapter: Any, policy: Any):
29
+ """
30
+ Initialize checkpoint.
31
+
32
+ Args:
33
+ model: Model to checkpoint
34
+ adapter: ModelAdapter for model operations
35
+ policy: Policy configuration
36
+ """
37
+ self.model = model
38
+ self.adapter = adapter
39
+ self.policy = policy
40
+ self.checkpoint_data: dict[str, Any] | None = None
41
+ self.rollback_performed = False
42
+
43
+ def create_checkpoint(self) -> None:
44
+ """Create a checkpoint of the current model state."""
45
+ if _use_chunked_snapshot() and hasattr(self.adapter, "snapshot_chunked"):
46
+ snapshot_path = self.adapter.snapshot_chunked(self.model)
47
+ self.checkpoint_data = {"mode": "chunked", "path": snapshot_path}
48
+ else:
49
+ self.checkpoint_data = {
50
+ "mode": "bytes",
51
+ "blob": self.adapter.snapshot(self.model),
52
+ }
53
+
54
+ def should_rollback(self, outcomes: list[GuardOutcome]) -> tuple[bool, str]:
55
+ """
56
+ Determine if rollback should be performed based on guard outcomes.
57
+
58
+ Args:
59
+ outcomes: List of guard outcomes
60
+
61
+ Returns:
62
+ (should_rollback, reason) tuple
63
+ """
64
+ # Check for abort actions
65
+ for outcome in outcomes:
66
+ if hasattr(outcome, "action") and outcome.action == "abort":
67
+ return True, "guard_abort"
68
+
69
+ # Check for rollback actions
70
+ for outcome in outcomes:
71
+ if hasattr(outcome, "action") and outcome.action == "rollback":
72
+ return True, "guard_rollback"
73
+
74
+ # Check policy configuration
75
+ if (
76
+ hasattr(self.policy, "enable_auto_rollback")
77
+ and self.policy.enable_auto_rollback
78
+ ):
79
+ # Check if any guards failed
80
+ for outcome in outcomes:
81
+ if hasattr(outcome, "passed") and not outcome.passed:
82
+ return True, "auto_rollback"
83
+
84
+ return False, ""
85
+
86
+ def rollback(self, reason: str) -> bool:
87
+ """
88
+ Perform rollback to checkpoint.
89
+
90
+ Args:
91
+ reason: Reason for rollback
92
+
93
+ Returns:
94
+ True if rollback was successful
95
+ """
96
+ if self.checkpoint_data is None:
97
+ return False
98
+ try:
99
+ mode = self.checkpoint_data.get("mode", "bytes")
100
+ if mode == "chunked":
101
+ path = self.checkpoint_data.get("path")
102
+ if not path or not hasattr(self.adapter, "restore_chunked"):
103
+ return False
104
+ self.adapter.restore_chunked(self.model, path)
105
+ else:
106
+ blob = self.checkpoint_data.get("blob")
107
+ self.adapter.restore(self.model, blob)
108
+ self.rollback_performed = True
109
+ return True
110
+ except Exception:
111
+ return False
112
+
113
+ def cleanup(self) -> None:
114
+ """Clean up checkpoint resources."""
115
+ if self.checkpoint_data and self.checkpoint_data.get("mode") == "chunked":
116
+ path = self.checkpoint_data.get("path")
117
+ if path and os.path.isdir(path):
118
+ shutil.rmtree(path, ignore_errors=True)
119
+ self.checkpoint_data = None
120
+
121
+
122
+ @contextmanager
123
+ def create_policy_checkpoint(model: Any, adapter: Any, policy: Any):
124
+ """
125
+ Context manager for policy-based checkpointing.
126
+
127
+ Args:
128
+ model: Model to checkpoint
129
+ adapter: ModelAdapter for operations
130
+ policy: Policy configuration
131
+
132
+ Yields:
133
+ PolicyCheckpoint instance
134
+ """
135
+ checkpoint = PolicyCheckpoint(model, adapter, policy)
136
+ checkpoint.create_checkpoint()
137
+
138
+ try:
139
+ yield checkpoint
140
+ finally:
141
+ checkpoint.cleanup()
142
+
143
+
144
+ class CheckpointManager:
145
+ """
146
+ Manager for model checkpoints during pipeline execution.
147
+ """
148
+
149
+ def __init__(self):
150
+ """Initialize checkpoint manager."""
151
+ self.checkpoints: dict[str, dict[str, Any]] = {}
152
+ self.next_id = 1
153
+
154
+ def create_checkpoint(self, model: Any, adapter: Any) -> str:
155
+ """
156
+ Create a checkpoint of the model.
157
+
158
+ Args:
159
+ model: Model to checkpoint
160
+ adapter: ModelAdapter for serialization
161
+
162
+ Returns:
163
+ Checkpoint ID
164
+ """
165
+ checkpoint_id = f"checkpoint_{self.next_id}"
166
+ self.next_id += 1
167
+
168
+ try:
169
+ if _use_chunked_snapshot() and hasattr(adapter, "snapshot_chunked"):
170
+ snapshot_path = adapter.snapshot_chunked(model)
171
+ checkpoint_data = {"mode": "chunked", "path": snapshot_path}
172
+ else:
173
+ checkpoint_data = {
174
+ "mode": "bytes",
175
+ "blob": adapter.snapshot(model),
176
+ }
177
+ self.checkpoints[checkpoint_id] = checkpoint_data
178
+ return checkpoint_id
179
+ except Exception as e:
180
+ raise RuntimeError(f"Failed to create checkpoint: {e}") from e
181
+
182
+ def restore_checkpoint(self, model: Any, adapter: Any, checkpoint_id: str) -> bool:
183
+ """
184
+ Restore model from checkpoint.
185
+
186
+ Args:
187
+ model: Model to restore
188
+ adapter: ModelAdapter for deserialization
189
+ checkpoint_id: ID of checkpoint to restore
190
+
191
+ Returns:
192
+ True if restoration was successful
193
+ """
194
+ if checkpoint_id not in self.checkpoints:
195
+ return False
196
+
197
+ try:
198
+ checkpoint_data = self.checkpoints[checkpoint_id]
199
+ mode = checkpoint_data.get("mode", "bytes")
200
+ if mode == "chunked":
201
+ if not hasattr(adapter, "restore_chunked"):
202
+ return False
203
+ adapter.restore_chunked(model, checkpoint_data.get("path"))
204
+ else:
205
+ adapter.restore(model, checkpoint_data.get("blob"))
206
+ return True
207
+ except Exception:
208
+ return False
209
+
210
+ def cleanup(self) -> None:
211
+ """Clean up all checkpoints."""
212
+ for data in self.checkpoints.values():
213
+ if data.get("mode") == "chunked":
214
+ path = data.get("path")
215
+ if path and os.path.isdir(path):
216
+ shutil.rmtree(path, ignore_errors=True)
217
+ self.checkpoints.clear()
218
+ self.next_id = 1
219
+
220
+
221
+ __all__ = ["PolicyCheckpoint", "create_policy_checkpoint", "CheckpointManager"]
@@ -0,0 +1,73 @@
1
+ """
2
+ InvarLock Contracts
3
+ ===============
4
+
5
+ Lightweight runtime assertions for monotonic behaviour of guard/edit operations.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import torch
11
+
12
+
13
+ def enforce_relative_spectral_cap(
14
+ weight: torch.Tensor, baseline_sigma: float | torch.Tensor, cap_ratio: float
15
+ ) -> torch.Tensor:
16
+ """Clamp the spectral norm of ``weight`` to ``cap_ratio * baseline_sigma``."""
17
+ baseline_value = float(baseline_sigma)
18
+ if not torch.isfinite(torch.tensor(baseline_value)) or baseline_value <= 0:
19
+ return weight
20
+ with torch.no_grad():
21
+ sigma = _spectral_norm(weight)
22
+ limit = baseline_value * cap_ratio
23
+ if sigma > limit and sigma > 0:
24
+ weight.mul_(limit / sigma)
25
+ return weight
26
+
27
+
28
+ def enforce_weight_energy_bound(
29
+ approx: torch.Tensor, exact: torch.Tensor, max_relative_error: float
30
+ ) -> torch.Tensor:
31
+ """Return ``approx`` if the relative error against ``exact`` is within bounds."""
32
+ denom = torch.norm(exact).clamp_min(1e-12)
33
+ rel_err = torch.norm(approx - exact) / denom
34
+ if rel_err <= max_relative_error:
35
+ return approx
36
+ return exact
37
+
38
+
39
+ def rmt_correction_is_monotone(
40
+ corrected_sigma: float,
41
+ baseline_sigma: float,
42
+ max_ratio: float,
43
+ deadband: float,
44
+ ) -> bool:
45
+ """
46
+ Validate monotonicity for RMT correction.
47
+
48
+ ``corrected_sigma`` should not exceed ``baseline_sigma * (1 + deadband)``
49
+ and must remain ≤ ``max_ratio``.
50
+ """
51
+ if corrected_sigma < 0 or baseline_sigma <= 0 or max_ratio <= 0:
52
+ return False
53
+ if corrected_sigma > max_ratio:
54
+ return False
55
+ return corrected_sigma <= baseline_sigma * (1.0 + deadband)
56
+
57
+
58
+ def _spectral_norm(weight: torch.Tensor) -> float:
59
+ """Compute the spectral norm (largest singular value) of ``weight``."""
60
+ if weight.ndim != 2:
61
+ weight = weight.view(weight.shape[0], -1)
62
+ try:
63
+ s = torch.linalg.svdvals(weight)
64
+ except RuntimeError:
65
+ s = torch.linalg.svdvals(weight.cpu()).to(weight.device)
66
+ return float(s.max().item())
67
+
68
+
69
+ __all__ = [
70
+ "enforce_relative_spectral_cap",
71
+ "enforce_weight_energy_bound",
72
+ "rmt_correction_is_monotone",
73
+ ]
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from contextlib import ContextDecorator
5
+ from dataclasses import dataclass
6
+ from typing import Any, Generic, Literal, TypeVar
7
+
8
+ from .exceptions import InvarlockError
9
+
10
+ T = TypeVar("T", bound=InvarlockError)
11
+
12
+
13
+ ContextFn = Callable[[BaseException], dict[str, Any] | None]
14
+
15
+
16
+ @dataclass
17
+ class _WrapErrors(ContextDecorator, Generic[T]): # noqa: UP046
18
+ target_exc: type[T]
19
+ code: str
20
+ message: str
21
+ context_fn: ContextFn | None = None
22
+
23
+ # Context manager protocol
24
+ def __enter__(self) -> _WrapErrors: # pragma: no cover - trivial
25
+ return self
26
+
27
+ def __exit__(
28
+ self,
29
+ exc_type: type[BaseException] | None,
30
+ exc: BaseException | None,
31
+ tb: Any,
32
+ ) -> Literal[False]:
33
+ if exc is None:
34
+ return False
35
+ # If it's already a InvarlockError, do not double-wrap
36
+ if isinstance(exc, InvarlockError):
37
+ return False
38
+ ctx = self.context_fn(exc) if self.context_fn is not None else None
39
+ wrapped = self.target_exc(code=self.code, message=self.message, details=ctx)
40
+ raise wrapped from exc
41
+
42
+
43
+ def wrap_errors( # noqa: UP047
44
+ target_exc: type[T],
45
+ code: str,
46
+ message: str,
47
+ context_fn: ContextFn | None = None,
48
+ ) -> _WrapErrors[T]:
49
+ """Return a context manager/decorator that wraps arbitrary exceptions.
50
+
51
+ Usage as context manager:
52
+ with wrap_errors(AdapterError, "E202", "ADAPTER-LOAD-FAILED", ctx):
53
+ risky()
54
+
55
+ Usage as decorator:
56
+ @wrap_errors(ValidationError, "E301", "VALIDATION-FAILED")
57
+ def f(...): ...
58
+ """
59
+ return _WrapErrors(
60
+ target_exc=target_exc, code=code, message=message, context_fn=context_fn
61
+ )
62
+
63
+
64
+ __all__ = ["wrap_errors"]
@@ -0,0 +1,298 @@
1
+ """
2
+ InvarLock Event Logger
3
+ ==================
4
+
5
+ JSONL event logging for pipeline execution tracking.
6
+ Provides structured logging for analysis and debugging.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import time
13
+ from collections.abc import Mapping, Sequence
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Any, TextIO
17
+
18
+ from .types import LogLevel
19
+
20
+ __all__ = ["EventLogger"]
21
+
22
+
23
+ class EventLogger:
24
+ """
25
+ JSONL event logger for InvarLock pipeline execution.
26
+
27
+ Logs structured events to a JSONL file for analysis,
28
+ debugging, and audit trails. Thread-safe and handles
29
+ file rotation if needed.
30
+ """
31
+
32
+ SENSITIVE_KEYWORDS: Sequence[str] = (
33
+ "token",
34
+ "secret",
35
+ "password",
36
+ "passphrase",
37
+ "api_key",
38
+ "credential",
39
+ "auth",
40
+ "email",
41
+ )
42
+
43
+ def __init__(
44
+ self,
45
+ log_path: Path,
46
+ auto_flush: bool = True,
47
+ *,
48
+ run_id: str | None = None,
49
+ redact_keywords: Sequence[str] | None = None,
50
+ max_string_length: int = 512,
51
+ ):
52
+ """
53
+ Initialize event logger.
54
+
55
+ Args:
56
+ log_path: Path to JSONL log file
57
+ auto_flush: Whether to flush after each write
58
+ run_id: Optional run identifier to include in every log entry
59
+ redact_keywords: Iterable of keywords whose values should be redacted
60
+ max_string_length: Maximum length for logged strings before truncation
61
+ """
62
+ self.log_path = Path(log_path)
63
+ self.auto_flush = auto_flush
64
+ self._file: TextIO | None = None
65
+ self._session_id = self._generate_session_id()
66
+ self._run_id = run_id
67
+ self._redact_keywords = tuple(
68
+ keyword.lower() for keyword in (redact_keywords or self.SENSITIVE_KEYWORDS)
69
+ )
70
+ # Honor caller-provided limit; clamp to a small positive minimum
71
+ self._max_string_length = max(1, int(max_string_length))
72
+
73
+ # Ensure parent directory exists
74
+ self.log_path.parent.mkdir(parents=True, exist_ok=True)
75
+
76
+ # Open file for writing
77
+ self._open_log_file()
78
+
79
+ # Log session start
80
+ session_start_payload = {
81
+ "session_id": self._session_id,
82
+ "log_path": str(self.log_path),
83
+ }
84
+ if self._run_id:
85
+ session_start_payload["run_id"] = self._run_id
86
+ self.log("logger", "session_start", LogLevel.INFO, session_start_payload)
87
+
88
+ def _generate_session_id(self) -> str:
89
+ """Generate unique session ID."""
90
+ return f"session_{int(time.time())}"
91
+
92
+ def _open_log_file(self) -> None:
93
+ """Open the log file for writing."""
94
+ try:
95
+ self._file = open(self.log_path, "a", encoding="utf-8")
96
+ except Exception as e: # pragma: no cover - defensive guard
97
+ raise OSError(f"Failed to open log file {self.log_path}: {e}") from e
98
+
99
+ def log(
100
+ self,
101
+ component: str,
102
+ operation: str,
103
+ level: LogLevel,
104
+ data: dict[str, Any] | None = None,
105
+ ) -> None:
106
+ """
107
+ Log an event.
108
+
109
+ Args:
110
+ component: Component generating the event (e.g., "runner", "edit", "guard")
111
+ operation: Operation being performed (e.g., "start", "complete", "error")
112
+ level: Log level
113
+ data: Optional additional data
114
+ """
115
+ if not self._file:
116
+ return
117
+
118
+ event: dict[str, Any] = {
119
+ "timestamp": datetime.now().isoformat(),
120
+ "session_id": self._session_id,
121
+ "component": component,
122
+ "operation": operation,
123
+ "level": level.value,
124
+ }
125
+
126
+ if self._run_id:
127
+ event["run_id"] = self._run_id
128
+
129
+ if data:
130
+ event["data"] = self._sanitize_data(data)
131
+
132
+ try:
133
+ json_line = json.dumps(event, default=self._json_serializer)
134
+ self._file.write(json_line + "\n")
135
+
136
+ if self.auto_flush:
137
+ self._file.flush()
138
+
139
+ except Exception as e: # pragma: no cover - fallback to stderr
140
+ import sys
141
+
142
+ print(f"Event logging failed: {e}", file=sys.stderr)
143
+
144
+ def _sanitize_data(self, data: dict[str, Any]) -> dict[str, Any]:
145
+ """
146
+ Sanitize data for JSON serialization.
147
+
148
+ Removes non-serializable objects and large data structures,
149
+ and redacts potential secrets.
150
+ """
151
+
152
+ def sanitize_value(key: str | None, value: Any) -> Any:
153
+ key_lower = key.lower() if isinstance(key, str) else ""
154
+ if key_lower and any(word in key_lower for word in self._redact_keywords):
155
+ return "<redacted>"
156
+
157
+ if isinstance(value, str):
158
+ if any(word in value.lower() for word in self._redact_keywords):
159
+ return "<redacted>"
160
+ if len(value) > self._max_string_length:
161
+ return f"<str len={len(value)}>"
162
+ return value
163
+
164
+ if isinstance(value, Mapping):
165
+ return {
166
+ inner_key: sanitize_value(str(inner_key), inner_value)
167
+ for inner_key, inner_value in value.items()
168
+ }
169
+
170
+ # Common numeric array-like
171
+ if hasattr(value, "tolist"):
172
+ try:
173
+ return value.tolist()
174
+ except Exception:
175
+ pass
176
+
177
+ if isinstance(value, set | frozenset):
178
+ try:
179
+ return list(value)
180
+ except Exception:
181
+ pass
182
+
183
+ if isinstance(value, Sequence) and not isinstance(
184
+ value, str | bytes | bytearray
185
+ ):
186
+ return [sanitize_value(key, item) for item in value]
187
+
188
+ if isinstance(value, bytes):
189
+ return f"<bytes len={len(value)}>"
190
+ # Preserve JSON-native scalars; coerce others to a placeholder string
191
+ if value is None or isinstance(value, bool | int | float):
192
+ return value
193
+ return f"<{type(value).__name__}>"
194
+
195
+ return {key: sanitize_value(key, value) for key, value in data.items()}
196
+
197
+ def _json_serializer(self, obj: Any) -> Any:
198
+ """Custom JSON serializer for common non-serializable types."""
199
+ if hasattr(obj, "tolist"): # numpy arrays
200
+ return obj.tolist()
201
+ if hasattr(obj, "__dict__"): # custom objects
202
+ return str(obj)
203
+ if isinstance(obj, set | frozenset):
204
+ return list(obj)
205
+ if isinstance(obj, bytes):
206
+ return obj.decode("utf-8", errors="replace")
207
+ return str(obj)
208
+
209
+ def log_error(
210
+ self,
211
+ component: str,
212
+ operation: str,
213
+ error: Exception,
214
+ context: dict[str, Any] | None = None,
215
+ ) -> None:
216
+ """
217
+ Convenience method for logging errors.
218
+
219
+ Args:
220
+ component: Component where error occurred
221
+ operation: Operation that failed
222
+ error: The exception that occurred
223
+ context: Optional additional context
224
+ """
225
+ error_data: dict[str, Any] = {
226
+ "error_type": type(error).__name__,
227
+ "error_message": str(error),
228
+ }
229
+
230
+ if context:
231
+ error_data["context"] = context
232
+
233
+ self.log(component, operation, LogLevel.ERROR, error_data)
234
+
235
+ def log_metric(
236
+ self, component: str, metric_name: str, value: float, unit: str | None = None
237
+ ) -> None:
238
+ """
239
+ Convenience method for logging metrics.
240
+
241
+ Args:
242
+ component: Component reporting the metric
243
+ metric_name: Name of the metric
244
+ value: Metric value
245
+ unit: Optional unit (e.g., "seconds", "MB", "ratio")
246
+ """
247
+ metric_data = {"metric": metric_name, "value": value}
248
+
249
+ if unit:
250
+ metric_data["unit"] = unit
251
+
252
+ self.log(component, "metric", LogLevel.INFO, metric_data)
253
+
254
+ def log_checkpoint(
255
+ self, component: str, checkpoint_id: str, operation: str
256
+ ) -> None:
257
+ """
258
+ Convenience method for logging checkpoint operations.
259
+
260
+ Args:
261
+ component: Component handling the checkpoint
262
+ checkpoint_id: Unique checkpoint identifier
263
+ operation: Checkpoint operation ("create", "restore", "delete")
264
+ """
265
+ checkpoint_data = {
266
+ "checkpoint_id": checkpoint_id,
267
+ "checkpoint_operation": operation,
268
+ }
269
+
270
+ self.log(component, "checkpoint", LogLevel.INFO, checkpoint_data)
271
+
272
+ def close(self) -> None:
273
+ """Close the log file."""
274
+ if self._file:
275
+ session_end_payload = {"session_id": self._session_id}
276
+ if self._run_id:
277
+ session_end_payload["run_id"] = self._run_id
278
+ self.log("logger", "session_end", LogLevel.INFO, session_end_payload)
279
+
280
+ try:
281
+ self._file.close()
282
+ except Exception: # pragma: no cover - best-effort cleanup
283
+ pass
284
+ finally:
285
+ self._file = None
286
+
287
+ def __enter__(self):
288
+ """Context manager entry."""
289
+ return self
290
+
291
+ def __exit__(self, exc_type, exc_val, exc_tb):
292
+ """Context manager exit."""
293
+ self.close()
294
+
295
+ def __del__(self):
296
+ """Destructor - ensure file is closed."""
297
+ if hasattr(self, "_file"):
298
+ self.close()