crprotocol 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. crp/__init__.py +126 -0
  2. crp/__main__.py +8 -0
  3. crp/_typing.py +27 -0
  4. crp/_version.py +5 -0
  5. crp/adapters.py +31 -0
  6. crp/advanced/__init__.py +40 -0
  7. crp/advanced/auto_ingest.py +400 -0
  8. crp/advanced/cqs.py +235 -0
  9. crp/advanced/cross_window.py +477 -0
  10. crp/advanced/curator.py +265 -0
  11. crp/advanced/feedback.py +146 -0
  12. crp/advanced/hierarchical.py +211 -0
  13. crp/advanced/meta_learning.py +401 -0
  14. crp/advanced/parallel.py +98 -0
  15. crp/advanced/review_cycle.py +329 -0
  16. crp/advanced/scale_mode.py +129 -0
  17. crp/advanced/source_grounding.py +207 -0
  18. crp/ckf/__init__.py +35 -0
  19. crp/ckf/community.py +377 -0
  20. crp/ckf/fabric.py +445 -0
  21. crp/ckf/gc.py +175 -0
  22. crp/ckf/graph_walk.py +87 -0
  23. crp/ckf/merge.py +133 -0
  24. crp/ckf/pattern_query.py +122 -0
  25. crp/ckf/pubsub.py +128 -0
  26. crp/ckf/semantic.py +207 -0
  27. crp/cli/__init__.py +7 -0
  28. crp/cli/main.py +329 -0
  29. crp/cli/sidecar.py +929 -0
  30. crp/cli/startup.py +272 -0
  31. crp/continuation/__init__.py +103 -0
  32. crp/continuation/completion.py +348 -0
  33. crp/continuation/degradation.py +157 -0
  34. crp/continuation/document_map.py +160 -0
  35. crp/continuation/flow.py +109 -0
  36. crp/continuation/gap.py +419 -0
  37. crp/continuation/manager.py +484 -0
  38. crp/continuation/quality_monitor.py +179 -0
  39. crp/continuation/stitch.py +419 -0
  40. crp/continuation/trigger.py +142 -0
  41. crp/continuation/voice.py +157 -0
  42. crp/core/__init__.py +69 -0
  43. crp/core/batch.py +77 -0
  44. crp/core/circuit_breaker.py +116 -0
  45. crp/core/config.py +377 -0
  46. crp/core/context_tools.py +540 -0
  47. crp/core/dispatch_router.py +3977 -0
  48. crp/core/errors.py +128 -0
  49. crp/core/extraction_facade.py +384 -0
  50. crp/core/facilitator.py +713 -0
  51. crp/core/idempotency.py +215 -0
  52. crp/core/orchestrator.py +1435 -0
  53. crp/core/relay_strategies.py +613 -0
  54. crp/core/security_manager.py +140 -0
  55. crp/core/session.py +134 -0
  56. crp/core/task_intent.py +36 -0
  57. crp/core/window.py +363 -0
  58. crp/envelope/__init__.py +30 -0
  59. crp/envelope/builder.py +288 -0
  60. crp/envelope/decomposer.py +236 -0
  61. crp/envelope/formatter.py +168 -0
  62. crp/envelope/packer.py +211 -0
  63. crp/envelope/reranker.py +209 -0
  64. crp/envelope/scoring.py +310 -0
  65. crp/extraction/__init__.py +45 -0
  66. crp/extraction/complexity.py +96 -0
  67. crp/extraction/contradiction.py +132 -0
  68. crp/extraction/pipeline.py +360 -0
  69. crp/extraction/quality_gate.py +237 -0
  70. crp/extraction/stage1_regex.py +173 -0
  71. crp/extraction/stage2_statistical.py +244 -0
  72. crp/extraction/stage3_gliner.py +210 -0
  73. crp/extraction/stage4_uie.py +183 -0
  74. crp/extraction/stage5_discourse.py +175 -0
  75. crp/extraction/stage6_llm.py +178 -0
  76. crp/extraction/structured_output.py +219 -0
  77. crp/extraction/types.py +299 -0
  78. crp/license_guard.py +722 -0
  79. crp/observability/__init__.py +30 -0
  80. crp/observability/audit.py +118 -0
  81. crp/observability/events.py +233 -0
  82. crp/observability/metrics.py +264 -0
  83. crp/observability/quality.py +135 -0
  84. crp/observability/structured_logging.py +81 -0
  85. crp/observability/telemetry.py +117 -0
  86. crp/provenance/__init__.py +314 -0
  87. crp/provenance/_embeddings.py +97 -0
  88. crp/provenance/_types.py +378 -0
  89. crp/provenance/attribution_scorer.py +252 -0
  90. crp/provenance/claim_detector.py +229 -0
  91. crp/provenance/contradiction_detector.py +243 -0
  92. crp/provenance/distortion_detector.py +397 -0
  93. crp/provenance/entailment_verifier.py +358 -0
  94. crp/provenance/fabrication_detector.py +203 -0
  95. crp/provenance/hallucination_scorer.py +320 -0
  96. crp/provenance/omission_analyzer.py +106 -0
  97. crp/provenance/provenance_chain.py +205 -0
  98. crp/provenance/report_generator.py +440 -0
  99. crp/providers/__init__.py +43 -0
  100. crp/providers/anthropic.py +270 -0
  101. crp/providers/base.py +135 -0
  102. crp/providers/custom.py +63 -0
  103. crp/providers/diagnostic.py +251 -0
  104. crp/providers/llamacpp.py +224 -0
  105. crp/providers/manager.py +139 -0
  106. crp/providers/ollama.py +243 -0
  107. crp/providers/openai.py +628 -0
  108. crp/providers/tokenizers.py +48 -0
  109. crp/py.typed +0 -0
  110. crp/resources/__init__.py +53 -0
  111. crp/resources/adaptive_allocator.py +525 -0
  112. crp/resources/cost_model.py +388 -0
  113. crp/resources/overhead_manager.py +217 -0
  114. crp/resources/resource_manager.py +262 -0
  115. crp/schemas/__init__.py +20 -0
  116. crp/schemas/cost-estimate.json +33 -0
  117. crp/schemas/crp-error.json +43 -0
  118. crp/schemas/envelope-preview.json +40 -0
  119. crp/schemas/persisted-state-header.json +27 -0
  120. crp/schemas/quality-report.json +94 -0
  121. crp/schemas/session-handle.json +33 -0
  122. crp/schemas/session-status.json +57 -0
  123. crp/schemas/stream-event.json +18 -0
  124. crp/schemas/task-intent.json +42 -0
  125. crp/security/__init__.py +93 -0
  126. crp/security/audit_trail.py +392 -0
  127. crp/security/binding.py +192 -0
  128. crp/security/compliance.py +813 -0
  129. crp/security/consent.py +593 -0
  130. crp/security/embedding_defense.py +161 -0
  131. crp/security/encryption.py +202 -0
  132. crp/security/injection.py +335 -0
  133. crp/security/integrity.py +267 -0
  134. crp/security/privacy.py +662 -0
  135. crp/security/quarantine.py +249 -0
  136. crp/security/rbac.py +221 -0
  137. crp/security/validation.py +164 -0
  138. crp/state/__init__.py +31 -0
  139. crp/state/cold_storage.py +258 -0
  140. crp/state/compaction.py +263 -0
  141. crp/state/critical_state.py +104 -0
  142. crp/state/event_log.py +313 -0
  143. crp/state/fact.py +189 -0
  144. crp/state/serialization.py +189 -0
  145. crp/state/session_cleanup.py +77 -0
  146. crp/state/snapshot.py +290 -0
  147. crp/state/warm_store.py +346 -0
  148. crprotocol-2.0.0.dist-info/METADATA +1295 -0
  149. crprotocol-2.0.0.dist-info/RECORD +153 -0
  150. crprotocol-2.0.0.dist-info/WHEEL +4 -0
  151. crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
  152. crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
  153. crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,251 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """ProviderDiagnostic — 6-step health check (§05, §06 §26.3).
4
+
5
+ Executed at init() and on demand via diagnose(). Each step produces
6
+ a diagnostic code string indicating pass/fail/warning.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from typing import Any
15
+
16
+ from crp.providers.base import LLMProvider
17
+
18
+
19
+ class DiagnosticCode(str, Enum):
20
+ """All provider diagnostic result codes."""
21
+
22
+ # Step 1: Endpoint
23
+ ENDPOINT_REACHABLE = "endpoint_reachable"
24
+ ENDPOINT_UNREACHABLE = "endpoint_unreachable"
25
+ ENDPOINT_TIMEOUT = "endpoint_timeout"
26
+
27
+ # Step 2: Auth
28
+ AUTH_VALID = "auth_valid"
29
+ AUTH_INVALID = "auth_invalid"
30
+ AUTH_EXPIRED = "auth_expired"
31
+
32
+ # Step 3: Capabilities
33
+ CAPABILITIES_DETERMINED = "capabilities_determined"
34
+ CAPABILITIES_UNAVAILABLE = "capabilities_unavailable"
35
+
36
+ # Step 4: Tokenizer
37
+ TOKENIZER_ACCURATE = "tokenizer_accurate"
38
+ TOKENIZER_MISMATCH = "tokenizer_mismatch"
39
+ TOKENIZER_UNAVAILABLE = "tokenizer_unavailable"
40
+
41
+ # Step 5: Inference
42
+ INFERENCE_WORKING = "inference_working"
43
+ INFERENCE_SLOW = "inference_slow"
44
+ RATE_LIMITED = "rate_limited"
45
+
46
+ # Step 6: Context window
47
+ CONTEXT_WINDOW_VALID = "context_window_valid"
48
+ CONTEXT_WINDOW_MISMATCH = "context_window_mismatch"
49
+
50
+
51
+ @dataclass
52
+ class DiagnosticResult:
53
+ """Result of a single diagnostic step."""
54
+
55
+ step: int
56
+ name: str
57
+ code: DiagnosticCode
58
+ latency_ms: float = 0.0
59
+ details: dict[str, Any] = field(default_factory=dict)
60
+
61
+ @property
62
+ def passed(self) -> bool:
63
+ return self.code in _PASSING_CODES
64
+
65
+
66
+ _PASSING_CODES = frozenset({
67
+ DiagnosticCode.ENDPOINT_REACHABLE,
68
+ DiagnosticCode.AUTH_VALID,
69
+ DiagnosticCode.CAPABILITIES_DETERMINED,
70
+ DiagnosticCode.TOKENIZER_ACCURATE,
71
+ DiagnosticCode.INFERENCE_WORKING,
72
+ DiagnosticCode.CONTEXT_WINDOW_VALID,
73
+ })
74
+
75
+
76
+ @dataclass
77
+ class DiagnosticReport:
78
+ """Full 6-step diagnostic report."""
79
+
80
+ provider_name: str
81
+ results: list[DiagnosticResult] = field(default_factory=list)
82
+ total_latency_ms: float = 0.0
83
+
84
+ @property
85
+ def all_passed(self) -> bool:
86
+ return all(r.passed for r in self.results)
87
+
88
+ @property
89
+ def summary(self) -> dict[str, str]:
90
+ return {r.name: r.code.value for r in self.results}
91
+
92
+
93
+ class ProviderDiagnostic:
94
+ """Execute 6-step diagnostic sequence on an LLM provider."""
95
+
96
+ # Sample texts for tokenizer validation (Step 4)
97
+ _SAMPLE_SHORT = "Hello world"
98
+ _SAMPLE_MEDIUM = "The quick brown fox jumps over the lazy dog. " * 10
99
+ _SAMPLE_LONG = "CRP context relay protocol " * 200
100
+
101
+ def diagnose(self, provider: LLMProvider) -> DiagnosticReport:
102
+ """Run all 6 diagnostic steps and return a report."""
103
+ report = DiagnosticReport(provider_name=provider.model_name)
104
+ start = time.monotonic()
105
+
106
+ report.results.append(self._step1_endpoint(provider))
107
+ report.results.append(self._step2_auth(provider))
108
+ report.results.append(self._step3_capabilities(provider))
109
+ report.results.append(self._step4_tokenizer(provider))
110
+ report.results.append(self._step5_inference(provider))
111
+ report.results.append(self._step6_context_window(provider))
112
+
113
+ report.total_latency_ms = (time.monotonic() - start) * 1000
114
+ return report
115
+
116
+ # ------------------------------------------------------------------
117
+ # Individual diagnostic steps
118
+ # ------------------------------------------------------------------
119
+
120
+ def _step1_endpoint(self, provider: LLMProvider) -> DiagnosticResult:
121
+ """Step 1: endpoint availability — verify provider is reachable."""
122
+ t0 = time.monotonic()
123
+ try:
124
+ # For CustomProvider this will always pass; cloud providers
125
+ # will override with actual HTTP ping in their adapters.
126
+ _ = provider.context_window_size()
127
+ return DiagnosticResult(
128
+ step=1,
129
+ name="endpoint",
130
+ code=DiagnosticCode.ENDPOINT_REACHABLE,
131
+ latency_ms=(time.monotonic() - t0) * 1000,
132
+ )
133
+ except TimeoutError:
134
+ return DiagnosticResult(
135
+ step=1, name="endpoint", code=DiagnosticCode.ENDPOINT_TIMEOUT,
136
+ latency_ms=(time.monotonic() - t0) * 1000,
137
+ )
138
+ except Exception as exc:
139
+ return DiagnosticResult(
140
+ step=1, name="endpoint", code=DiagnosticCode.ENDPOINT_UNREACHABLE,
141
+ latency_ms=(time.monotonic() - t0) * 1000,
142
+ details={"error": str(exc)},
143
+ )
144
+
145
+ def _step2_auth(self, provider: LLMProvider) -> DiagnosticResult:
146
+ """Step 2: authentication — verify API key/token accepted."""
147
+ # Phase 1: CustomProvider has no auth; cloud adapters will override.
148
+ return DiagnosticResult(step=2, name="auth", code=DiagnosticCode.AUTH_VALID)
149
+
150
+ def _step3_capabilities(self, provider: LLMProvider) -> DiagnosticResult:
151
+ """Step 3: capability probing — query context windows, token limits."""
152
+ t0 = time.monotonic()
153
+ try:
154
+ ctx = provider.context_window_size()
155
+ return DiagnosticResult(
156
+ step=3,
157
+ name="capabilities",
158
+ code=DiagnosticCode.CAPABILITIES_DETERMINED,
159
+ latency_ms=(time.monotonic() - t0) * 1000,
160
+ details={"context_window": ctx, "max_output": provider.max_output_tokens},
161
+ )
162
+ except Exception as exc:
163
+ return DiagnosticResult(
164
+ step=3, name="capabilities", code=DiagnosticCode.CAPABILITIES_UNAVAILABLE,
165
+ latency_ms=(time.monotonic() - t0) * 1000,
166
+ details={"error": str(exc)},
167
+ )
168
+
169
+ def _step4_tokenizer(self, provider: LLMProvider) -> DiagnosticResult:
170
+ """Step 4: tokenizer validation — test on 3 sample texts."""
171
+ t0 = time.monotonic()
172
+ try:
173
+ counts = [
174
+ provider.count_tokens(self._SAMPLE_SHORT),
175
+ provider.count_tokens(self._SAMPLE_MEDIUM),
176
+ provider.count_tokens(self._SAMPLE_LONG),
177
+ ]
178
+ # Sanity: all counts should be positive and roughly proportional
179
+ if all(c > 0 for c in counts) and counts[2] > counts[1] > counts[0]:
180
+ return DiagnosticResult(
181
+ step=4, name="tokenizer", code=DiagnosticCode.TOKENIZER_ACCURATE,
182
+ latency_ms=(time.monotonic() - t0) * 1000,
183
+ details={"sample_counts": counts},
184
+ )
185
+ return DiagnosticResult(
186
+ step=4, name="tokenizer", code=DiagnosticCode.TOKENIZER_MISMATCH,
187
+ latency_ms=(time.monotonic() - t0) * 1000,
188
+ details={"sample_counts": counts},
189
+ )
190
+ except Exception as exc:
191
+ return DiagnosticResult(
192
+ step=4, name="tokenizer", code=DiagnosticCode.TOKENIZER_UNAVAILABLE,
193
+ latency_ms=(time.monotonic() - t0) * 1000,
194
+ details={"error": str(exc)},
195
+ )
196
+
197
+ def _step5_inference(self, provider: LLMProvider) -> DiagnosticResult:
198
+ """Step 5: inference test — minimal generation request."""
199
+ t0 = time.monotonic()
200
+ try:
201
+ output, finish_reason = provider.generate_chat(
202
+ [{"role": "user", "content": "count: 1"}]
203
+ )
204
+ latency = (time.monotonic() - t0) * 1000
205
+ if latency > 30_000: # 30s threshold
206
+ code = DiagnosticCode.INFERENCE_SLOW
207
+ else:
208
+ code = DiagnosticCode.INFERENCE_WORKING
209
+ return DiagnosticResult(
210
+ step=5, name="inference", code=code,
211
+ latency_ms=latency,
212
+ details={"output_length": len(output), "finish_reason": finish_reason},
213
+ )
214
+ except Exception as exc:
215
+ err_str = str(exc).lower()
216
+ if "rate" in err_str or "429" in err_str:
217
+ code = DiagnosticCode.RATE_LIMITED
218
+ else:
219
+ code = DiagnosticCode.INFERENCE_WORKING # Can't test, assume working
220
+ return DiagnosticResult(
221
+ step=5, name="inference", code=code,
222
+ latency_ms=(time.monotonic() - t0) * 1000,
223
+ details={"error": str(exc)},
224
+ )
225
+
226
+ def _step6_context_window(self, provider: LLMProvider) -> DiagnosticResult:
227
+ """Step 6: context window test — verify reported size is accurate."""
228
+ t0 = time.monotonic()
229
+ try:
230
+ ctx = provider.context_window_size()
231
+ # Verify count_tokens is consistent with context_window_size
232
+ # by checking a text that we know the approximate token count of
233
+ test_text = "word " * (ctx // 5) # ~80% context fill
234
+ token_count = provider.count_tokens(test_text)
235
+ ratio = token_count / ctx if ctx > 0 else 0
236
+ # Token count should be roughly 80% of context (±20%)
237
+ if 0.5 < ratio < 1.0:
238
+ code = DiagnosticCode.CONTEXT_WINDOW_VALID
239
+ else:
240
+ code = DiagnosticCode.CONTEXT_WINDOW_MISMATCH
241
+ return DiagnosticResult(
242
+ step=6, name="context_window", code=code,
243
+ latency_ms=(time.monotonic() - t0) * 1000,
244
+ details={"context_window": ctx, "test_tokens": token_count, "ratio": ratio},
245
+ )
246
+ except Exception as exc:
247
+ return DiagnosticResult(
248
+ step=6, name="context_window", code=DiagnosticCode.CONTEXT_WINDOW_MISMATCH,
249
+ latency_ms=(time.monotonic() - t0) * 1000,
250
+ details={"error": str(exc)},
251
+ )
@@ -0,0 +1,224 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """llama.cpp adapter — local model inference via llama-cpp-python or HTTP (§6.1).
4
+
5
+ Supports two modes:
6
+ 1. **Python binding**: ``pip install llama-cpp-python`` (in-process inference).
7
+ 2. **HTTP server**: ``llama-server --model model.gguf --port 8080``
8
+ (talks to llama.cpp's OpenAI-compatible endpoint).
9
+
10
+ Usage (Python binding)::
11
+
12
+ from crp.providers.llamacpp import LlamaCppAdapter
13
+
14
+ provider = LlamaCppAdapter(model_path="/models/llama3-8b.gguf")
15
+ output, reason = provider.generate_chat([
16
+ {"role": "user", "content": "Hello!"},
17
+ ])
18
+
19
+ Usage (HTTP server)::
20
+
21
+ provider = LlamaCppAdapter(server_url="http://localhost:8080")
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ import logging
28
+ import os
29
+ import random
30
+ import time
31
+ import urllib.request
32
+ import urllib.error
33
+ from typing import Any
34
+
35
+ from crp.providers.base import LLMProvider
36
+
37
+ logger = logging.getLogger("crp.providers.llamacpp")
38
+
39
+
40
+ class LlamaCppAdapter(LLMProvider):
41
+ """llama.cpp adapter — local inference or HTTP server.
42
+
43
+ Args:
44
+ model_path: Path to a GGUF model file (Python binding mode).
45
+ server_url: Base URL for llama.cpp's HTTP server (e.g. "http://localhost:8080").
46
+ If provided, *model_path* is ignored.
47
+ context_size: Context window size in tokens (default: 4096).
48
+ max_tokens: Max output tokens per generation (default: 2048).
49
+ n_gpu_layers: GPU layers for Python binding (default: -1 = all).
50
+ n_threads: CPU threads for Python binding (default: os.cpu_count()).
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ *,
56
+ model_path: str | None = None,
57
+ server_url: str | None = None,
58
+ context_size: int = 4_096,
59
+ max_tokens: int = 2_048,
60
+ n_gpu_layers: int = -1,
61
+ n_threads: int | None = None,
62
+ ) -> None:
63
+ if not model_path and not server_url:
64
+ raise ValueError(
65
+ "LlamaCppAdapter requires either model_path (Python binding) "
66
+ "or server_url (HTTP mode)."
67
+ )
68
+
69
+ self._context_size = context_size
70
+ self._max_tokens = max_tokens
71
+ self._server_url = server_url
72
+ self._llama = None # Lazy-loaded Python binding
73
+ self._model_path = model_path
74
+ self._n_gpu_layers = n_gpu_layers
75
+ self._n_threads = n_threads or (os.cpu_count() or 4)
76
+
77
+ if server_url:
78
+ logger.debug("LlamaCppAdapter: HTTP mode → %s", server_url)
79
+ else:
80
+ logger.debug("LlamaCppAdapter: Python binding → %s", model_path)
81
+
82
+ # -- lazy init --------------------------------------------------------
83
+
84
+ def _ensure_model(self) -> Any:
85
+ """Lazy-load the llama-cpp-python model on first use."""
86
+ if self._llama is not None:
87
+ return self._llama
88
+
89
+ if self._server_url:
90
+ return None # HTTP mode — no local model
91
+
92
+ try:
93
+ from llama_cpp import Llama
94
+ except ImportError:
95
+ raise ImportError(
96
+ "Python binding mode requires 'llama-cpp-python'. "
97
+ "Install with: pip install llama-cpp-python"
98
+ ) from None
99
+
100
+ self._llama = Llama( # type: ignore[assignment]
101
+ model_path=self._model_path, # type: ignore[arg-type]
102
+ n_ctx=self._context_size,
103
+ n_gpu_layers=self._n_gpu_layers,
104
+ n_threads=self._n_threads,
105
+ verbose=False,
106
+ )
107
+ return self._llama
108
+
109
+ # -- LLMProvider interface --------------------------------------------
110
+
111
+ def generate_chat(
112
+ self, messages: list[dict[str, str]], **kwargs: Any
113
+ ) -> tuple[str, str]:
114
+ """Generate via Python binding or HTTP server."""
115
+ max_tokens = kwargs.pop("max_tokens", self._max_tokens)
116
+
117
+ if self._server_url:
118
+ return self._generate_http(messages, max_tokens, **kwargs)
119
+ return self._generate_binding(messages, max_tokens, **kwargs)
120
+
121
+ def count_tokens(self, text: str) -> int:
122
+ """Count tokens via llama.cpp's tokenizer or heuristic fallback."""
123
+ if self._server_url:
124
+ # HTTP mode: heuristic (no local tokenizer available)
125
+ return max(1, len(text) // 4)
126
+
127
+ model = self._ensure_model()
128
+ try:
129
+ tokens = model.tokenize(text.encode("utf-8"))
130
+ return len(tokens)
131
+ except Exception:
132
+ return max(1, len(text) // 4)
133
+
134
+ def context_window_size(self) -> int:
135
+ return self._context_size
136
+
137
+ @property
138
+ def max_output_tokens(self) -> int | None:
139
+ return self._max_tokens
140
+
141
+ @property
142
+ def model_name(self) -> str:
143
+ if self._model_path:
144
+ return os.path.basename(self._model_path)
145
+ return f"llamacpp@{self._server_url}"
146
+
147
+ # -- private ----------------------------------------------------------
148
+
149
+ def _generate_binding(
150
+ self, messages: list[dict[str, str]], max_tokens: int, **kwargs: Any
151
+ ) -> tuple[str, str]:
152
+ """Generate using llama-cpp-python's create_chat_completion."""
153
+ model = self._ensure_model()
154
+ try:
155
+ result = model.create_chat_completion(
156
+ messages=messages,
157
+ max_tokens=max_tokens,
158
+ **kwargs,
159
+ )
160
+ choice = result["choices"][0]
161
+ text = choice["message"]["content"] or ""
162
+ reason = choice.get("finish_reason", "stop")
163
+ if reason == "length":
164
+ pass # Physical wall
165
+ else:
166
+ reason = "stop"
167
+ return (text, reason)
168
+ except Exception as exc:
169
+ logger.error("llama.cpp binding error: %s", exc)
170
+ return ("", "error")
171
+
172
+ def _generate_http(
173
+ self, messages: list[dict[str, str]], max_tokens: int, **kwargs: Any
174
+ ) -> tuple[str, str]:
175
+ """Generate via llama.cpp's OpenAI-compatible HTTP endpoint with retry (§audit H5)."""
176
+ url = f"{self._server_url.rstrip('/')}/v1/chat/completions" # type: ignore[union-attr]
177
+ payload = json.dumps({
178
+ "messages": messages,
179
+ "max_tokens": max_tokens,
180
+ **kwargs,
181
+ }).encode("utf-8")
182
+
183
+ max_retries = 4
184
+ base_delay = 2.0
185
+ last_exc: Exception | None = None
186
+
187
+ for attempt in range(max_retries):
188
+ req = urllib.request.Request(
189
+ url,
190
+ data=payload,
191
+ headers={
192
+ "Content-Type": "application/json",
193
+ "Connection": "keep-alive",
194
+ },
195
+ method="POST",
196
+ )
197
+
198
+ try:
199
+ with urllib.request.urlopen(req, timeout=120) as resp:
200
+ data = json.loads(resp.read())
201
+ choice = data["choices"][0]
202
+ text = choice["message"]["content"] or ""
203
+ reason = choice.get("finish_reason", "stop")
204
+ if reason != "length":
205
+ reason = "stop"
206
+ return (text, reason)
207
+ except (urllib.error.URLError, ConnectionError, TimeoutError, OSError) as exc:
208
+ last_exc = exc
209
+ if attempt < max_retries - 1:
210
+ delay = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
211
+ logger.warning(
212
+ "llama.cpp HTTP request failed (attempt %d/%d), retrying in %.1fs: %s",
213
+ attempt + 1, max_retries, delay, exc,
214
+ )
215
+ time.sleep(delay)
216
+ continue
217
+ logger.error("llama.cpp HTTP error after %d retries: %s", max_retries, exc)
218
+ return ("", "error")
219
+ except Exception as exc:
220
+ logger.error("llama.cpp HTTP error (non-retryable): %s", exc)
221
+ return ("", "error")
222
+
223
+ logger.error("llama.cpp HTTP failed after %d retries: %s", max_retries, last_exc)
224
+ return ("", "error")
@@ -0,0 +1,139 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """LLMProviderManager — multi-provider routing with fallback chain (§05).
4
+
5
+ Supports:
6
+ - Primary provider selection
7
+ - Fallback chain: if primary fails, try registered providers in order
8
+ - Provider registration and retrieval by name
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from collections.abc import Generator
15
+
16
+ from crp.core.errors import ProviderError
17
+ from crp.providers.base import LLMProvider
18
+
19
+ logger = logging.getLogger("crp.providers.manager")
20
+
21
+
22
+ class LLMProviderManager:
23
+ """Routes requests to one or more LLM providers with fallback.
24
+
25
+ Usage::
26
+
27
+ mgr = LLMProviderManager(primary_provider)
28
+ mgr.register(fallback_provider)
29
+
30
+ # Generate with automatic fallback
31
+ output, reason = mgr.generate_with_fallback(messages)
32
+ """
33
+
34
+ def __init__(self, provider: LLMProvider) -> None:
35
+ self._primary = provider
36
+ self._providers: dict[str, LLMProvider] = {provider.model_name: provider}
37
+ self._fallback_order: list[str] = [] # model names in fallback priority
38
+
39
+ @property
40
+ def primary(self) -> LLMProvider:
41
+ return self._primary
42
+
43
+ @property
44
+ def provider_count(self) -> int:
45
+ return len(self._providers)
46
+
47
+ @property
48
+ def provider_names(self) -> list[str]:
49
+ return list(self._providers.keys())
50
+
51
+ def register(self, provider: LLMProvider) -> None:
52
+ """Register an additional provider for fallback routing."""
53
+ self._providers[provider.model_name] = provider
54
+ if provider.model_name != self._primary.model_name:
55
+ self._fallback_order.append(provider.model_name)
56
+ logger.info("Provider registered: %s (total: %d)", provider.model_name, len(self._providers))
57
+
58
+ def get(self, name: str | None = None) -> LLMProvider:
59
+ """Get a provider by name, or the primary if *name* is None."""
60
+ if name is None:
61
+ return self._primary
62
+ provider = self._providers.get(name)
63
+ if provider is None:
64
+ raise ProviderError(
65
+ f"Provider '{name}' not registered",
66
+ available=list(self._providers.keys()),
67
+ )
68
+ return provider
69
+
70
+ def generate_with_fallback(
71
+ self,
72
+ messages: list[dict[str, str]],
73
+ **kwargs: object,
74
+ ) -> tuple[str, str, str]:
75
+ """Generate using primary, fall back to registered providers on failure.
76
+
77
+ Returns:
78
+ (output_text, finish_reason, provider_name) — the provider that
79
+ successfully generated is identified in provider_name.
80
+
81
+ Raises:
82
+ ProviderError: If ALL providers fail.
83
+ """
84
+ providers_to_try = [self._primary.model_name] + self._fallback_order
85
+ last_error: Exception | None = None
86
+
87
+ for name in providers_to_try:
88
+ provider = self._providers.get(name)
89
+ if provider is None:
90
+ continue
91
+ try:
92
+ output, finish_reason = provider.generate_chat(messages, **kwargs)
93
+ if name != self._primary.model_name:
94
+ logger.warning(
95
+ "Primary provider failed, fell back to '%s'", name,
96
+ )
97
+ return output, finish_reason, name
98
+ except Exception as exc:
99
+ logger.warning("Provider '%s' failed: %s", name, exc)
100
+ last_error = exc
101
+ continue
102
+
103
+ raise ProviderError(
104
+ f"All {len(providers_to_try)} providers failed",
105
+ available=list(self._providers.keys()),
106
+ ) from last_error
107
+
108
+ def generate_with_tools_fallback(
109
+ self,
110
+ messages: list[dict[str, object]],
111
+ tools: list[dict[str, object]],
112
+ **kwargs: object,
113
+ ) -> tuple[str, str, list[dict[str, object]] | None, dict[str, object] | None, str]:
114
+ """Generate with tools using primary, fall back on failure.
115
+
116
+ Returns:
117
+ (output_text, finish_reason, tool_calls, raw_message, provider_name)
118
+ """
119
+ providers_to_try = [self._primary.model_name] + self._fallback_order
120
+ last_error: Exception | None = None
121
+
122
+ for name in providers_to_try:
123
+ provider = self._providers.get(name)
124
+ if provider is None or not provider.supports_tools():
125
+ continue
126
+ try:
127
+ result = provider.generate_chat_with_tools(messages, tools, **kwargs)
128
+ if name != self._primary.model_name:
129
+ logger.warning("Tools fallback to '%s'", name)
130
+ return (*result, name)
131
+ except Exception as exc:
132
+ logger.warning("Provider '%s' tools failed: %s", name, exc)
133
+ last_error = exc
134
+ continue
135
+
136
+ raise ProviderError(
137
+ "All providers failed for tool-mediated dispatch",
138
+ available=list(self._providers.keys()),
139
+ ) from last_error