crprotocol 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. crp/__init__.py +126 -0
  2. crp/__main__.py +8 -0
  3. crp/_typing.py +27 -0
  4. crp/_version.py +5 -0
  5. crp/adapters.py +31 -0
  6. crp/advanced/__init__.py +40 -0
  7. crp/advanced/auto_ingest.py +400 -0
  8. crp/advanced/cqs.py +235 -0
  9. crp/advanced/cross_window.py +477 -0
  10. crp/advanced/curator.py +265 -0
  11. crp/advanced/feedback.py +146 -0
  12. crp/advanced/hierarchical.py +211 -0
  13. crp/advanced/meta_learning.py +401 -0
  14. crp/advanced/parallel.py +98 -0
  15. crp/advanced/review_cycle.py +329 -0
  16. crp/advanced/scale_mode.py +129 -0
  17. crp/advanced/source_grounding.py +207 -0
  18. crp/ckf/__init__.py +35 -0
  19. crp/ckf/community.py +377 -0
  20. crp/ckf/fabric.py +445 -0
  21. crp/ckf/gc.py +175 -0
  22. crp/ckf/graph_walk.py +87 -0
  23. crp/ckf/merge.py +133 -0
  24. crp/ckf/pattern_query.py +122 -0
  25. crp/ckf/pubsub.py +128 -0
  26. crp/ckf/semantic.py +207 -0
  27. crp/cli/__init__.py +7 -0
  28. crp/cli/main.py +329 -0
  29. crp/cli/sidecar.py +929 -0
  30. crp/cli/startup.py +272 -0
  31. crp/continuation/__init__.py +103 -0
  32. crp/continuation/completion.py +348 -0
  33. crp/continuation/degradation.py +157 -0
  34. crp/continuation/document_map.py +160 -0
  35. crp/continuation/flow.py +109 -0
  36. crp/continuation/gap.py +419 -0
  37. crp/continuation/manager.py +484 -0
  38. crp/continuation/quality_monitor.py +179 -0
  39. crp/continuation/stitch.py +419 -0
  40. crp/continuation/trigger.py +142 -0
  41. crp/continuation/voice.py +157 -0
  42. crp/core/__init__.py +69 -0
  43. crp/core/batch.py +77 -0
  44. crp/core/circuit_breaker.py +116 -0
  45. crp/core/config.py +377 -0
  46. crp/core/context_tools.py +540 -0
  47. crp/core/dispatch_router.py +3977 -0
  48. crp/core/errors.py +128 -0
  49. crp/core/extraction_facade.py +384 -0
  50. crp/core/facilitator.py +713 -0
  51. crp/core/idempotency.py +215 -0
  52. crp/core/orchestrator.py +1435 -0
  53. crp/core/relay_strategies.py +613 -0
  54. crp/core/security_manager.py +140 -0
  55. crp/core/session.py +134 -0
  56. crp/core/task_intent.py +36 -0
  57. crp/core/window.py +363 -0
  58. crp/envelope/__init__.py +30 -0
  59. crp/envelope/builder.py +288 -0
  60. crp/envelope/decomposer.py +236 -0
  61. crp/envelope/formatter.py +168 -0
  62. crp/envelope/packer.py +211 -0
  63. crp/envelope/reranker.py +209 -0
  64. crp/envelope/scoring.py +310 -0
  65. crp/extraction/__init__.py +45 -0
  66. crp/extraction/complexity.py +96 -0
  67. crp/extraction/contradiction.py +132 -0
  68. crp/extraction/pipeline.py +360 -0
  69. crp/extraction/quality_gate.py +237 -0
  70. crp/extraction/stage1_regex.py +173 -0
  71. crp/extraction/stage2_statistical.py +244 -0
  72. crp/extraction/stage3_gliner.py +210 -0
  73. crp/extraction/stage4_uie.py +183 -0
  74. crp/extraction/stage5_discourse.py +175 -0
  75. crp/extraction/stage6_llm.py +178 -0
  76. crp/extraction/structured_output.py +219 -0
  77. crp/extraction/types.py +299 -0
  78. crp/license_guard.py +722 -0
  79. crp/observability/__init__.py +30 -0
  80. crp/observability/audit.py +118 -0
  81. crp/observability/events.py +233 -0
  82. crp/observability/metrics.py +264 -0
  83. crp/observability/quality.py +135 -0
  84. crp/observability/structured_logging.py +81 -0
  85. crp/observability/telemetry.py +117 -0
  86. crp/provenance/__init__.py +314 -0
  87. crp/provenance/_embeddings.py +97 -0
  88. crp/provenance/_types.py +378 -0
  89. crp/provenance/attribution_scorer.py +252 -0
  90. crp/provenance/claim_detector.py +229 -0
  91. crp/provenance/contradiction_detector.py +243 -0
  92. crp/provenance/distortion_detector.py +397 -0
  93. crp/provenance/entailment_verifier.py +358 -0
  94. crp/provenance/fabrication_detector.py +203 -0
  95. crp/provenance/hallucination_scorer.py +320 -0
  96. crp/provenance/omission_analyzer.py +106 -0
  97. crp/provenance/provenance_chain.py +205 -0
  98. crp/provenance/report_generator.py +440 -0
  99. crp/providers/__init__.py +43 -0
  100. crp/providers/anthropic.py +270 -0
  101. crp/providers/base.py +135 -0
  102. crp/providers/custom.py +63 -0
  103. crp/providers/diagnostic.py +251 -0
  104. crp/providers/llamacpp.py +224 -0
  105. crp/providers/manager.py +139 -0
  106. crp/providers/ollama.py +243 -0
  107. crp/providers/openai.py +628 -0
  108. crp/providers/tokenizers.py +48 -0
  109. crp/py.typed +0 -0
  110. crp/resources/__init__.py +53 -0
  111. crp/resources/adaptive_allocator.py +525 -0
  112. crp/resources/cost_model.py +388 -0
  113. crp/resources/overhead_manager.py +217 -0
  114. crp/resources/resource_manager.py +262 -0
  115. crp/schemas/__init__.py +20 -0
  116. crp/schemas/cost-estimate.json +33 -0
  117. crp/schemas/crp-error.json +43 -0
  118. crp/schemas/envelope-preview.json +40 -0
  119. crp/schemas/persisted-state-header.json +27 -0
  120. crp/schemas/quality-report.json +94 -0
  121. crp/schemas/session-handle.json +33 -0
  122. crp/schemas/session-status.json +57 -0
  123. crp/schemas/stream-event.json +18 -0
  124. crp/schemas/task-intent.json +42 -0
  125. crp/security/__init__.py +93 -0
  126. crp/security/audit_trail.py +392 -0
  127. crp/security/binding.py +192 -0
  128. crp/security/compliance.py +813 -0
  129. crp/security/consent.py +593 -0
  130. crp/security/embedding_defense.py +161 -0
  131. crp/security/encryption.py +202 -0
  132. crp/security/injection.py +335 -0
  133. crp/security/integrity.py +267 -0
  134. crp/security/privacy.py +662 -0
  135. crp/security/quarantine.py +249 -0
  136. crp/security/rbac.py +221 -0
  137. crp/security/validation.py +164 -0
  138. crp/state/__init__.py +31 -0
  139. crp/state/cold_storage.py +258 -0
  140. crp/state/compaction.py +263 -0
  141. crp/state/critical_state.py +104 -0
  142. crp/state/event_log.py +313 -0
  143. crp/state/fact.py +189 -0
  144. crp/state/serialization.py +189 -0
  145. crp/state/session_cleanup.py +77 -0
  146. crp/state/snapshot.py +290 -0
  147. crp/state/warm_store.py +346 -0
  148. crprotocol-2.0.0.dist-info/METADATA +1295 -0
  149. crprotocol-2.0.0.dist-info/RECORD +153 -0
  150. crprotocol-2.0.0.dist-info/WHEEL +4 -0
  151. crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
  152. crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
  153. crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,628 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """OpenAI adapter — GPT-4o, GPT-4, GPT-3.5-turbo, o1/o3 families (§6.1).
4
+
5
+ Requires ``openai>=1.0`` (``pip install crprotocol[full]``).
6
+
7
+ Usage::
8
+
9
+ from crp.providers.openai import OpenAIAdapter
10
+
11
+ provider = OpenAIAdapter(model="gpt-4o")
12
+ output, reason = provider.generate_chat([
13
+ {"role": "system", "content": "You are helpful."},
14
+ {"role": "user", "content": "Hello!"},
15
+ ])
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import logging
22
+ import os
23
+ import random
24
+ import time
25
+ import urllib.request
26
+ import urllib.error
27
+ from typing import Any
28
+
29
+ from crp.providers.base import LLMProvider
30
+
31
+ logger = logging.getLogger("crp.providers.openai")
32
+
33
+ # Model → context window (tokens). Updated as of 2025-Q2.
34
+ # Primary table: exact OpenAI model names.
35
+ _MODEL_CONTEXT: dict[str, int] = {
36
+ "gpt-4o": 128_000,
37
+ "gpt-4o-mini": 128_000,
38
+ "gpt-4-turbo": 128_000,
39
+ "gpt-4": 8_192,
40
+ "gpt-3.5-turbo": 16_385,
41
+ "o1": 200_000,
42
+ "o1-mini": 128_000,
43
+ "o1-preview": 128_000,
44
+ "o3": 200_000,
45
+ "o3-mini": 200_000,
46
+ "o4-mini": 200_000,
47
+ }
48
+
49
+ # Model → max output tokens (hard cap set by OpenAI).
50
+ _MODEL_MAX_OUTPUT: dict[str, int] = {
51
+ "gpt-4o": 16_384,
52
+ "gpt-4o-mini": 16_384,
53
+ "gpt-4-turbo": 4_096,
54
+ "gpt-4": 8_192,
55
+ "gpt-3.5-turbo": 4_096,
56
+ "o1": 100_000,
57
+ "o1-mini": 65_536,
58
+ "o1-preview": 32_768,
59
+ "o3": 100_000,
60
+ "o3-mini": 100_000,
61
+ "o4-mini": 100_000,
62
+ }
63
+
64
+ # ── Model family table (prefix-matched) ─────────────────────────────
65
+ # Used when the model name is NOT in the primary OpenAI table.
66
+ # This covers open-source models served via OpenAI-compatible APIs
67
+ # (LM Studio, vLLM, llama.cpp server, Ollama OpenAI compat, TGI, etc.).
68
+ # Ordered longest-prefix-first to ensure specific matches win.
69
+ _MODEL_FAMILY_CONTEXT: list[tuple[str, int, int]] = [
70
+ # (prefix, context_window, max_output_tokens)
71
+ # Qwen family
72
+ ("qwen3", 40_960, 8_192),
73
+ ("qwen2.5", 128_000, 8_192),
74
+ ("qwen2", 128_000, 8_192),
75
+ ("qwen", 32_768, 4_096),
76
+ # LLaMA family
77
+ ("llama-3.3", 128_000, 4_096),
78
+ ("llama-3.2", 128_000, 4_096),
79
+ ("llama-3.1", 128_000, 4_096),
80
+ ("llama3.3", 128_000, 4_096),
81
+ ("llama3.2", 128_000, 4_096),
82
+ ("llama3.1", 128_000, 4_096),
83
+ ("llama-3", 8_192, 4_096),
84
+ ("llama3", 8_192, 4_096),
85
+ ("llama-2", 4_096, 4_096),
86
+ ("llama2", 4_096, 4_096),
87
+ ("codellama", 16_384, 4_096),
88
+ # Gemma family
89
+ ("gemma-3-27b", 128_000, 8_192),
90
+ ("gemma-3", 32_768, 4_096), # Smaller gemma-3 variants
91
+ ("gemma3", 128_000, 8_192),
92
+ ("gemma-2", 8_192, 4_096),
93
+ ("gemma2", 8_192, 4_096),
94
+ ("gemma", 8_192, 4_096),
95
+ # Mistral family
96
+ ("mistral-large", 128_000, 8_192),
97
+ ("mistral-medium", 32_768, 4_096),
98
+ ("mistral-small", 32_768, 4_096),
99
+ ("mixtral", 32_768, 4_096),
100
+ ("mistral", 32_768, 4_096),
101
+ # Phi family
102
+ ("phi-4", 16_384, 4_096),
103
+ ("phi4", 16_384, 4_096),
104
+ ("phi-3", 128_000, 4_096),
105
+ ("phi3", 128_000, 4_096),
106
+ # DeepSeek family
107
+ ("deepseek-r1", 128_000, 16_384),
108
+ ("deepseek-v3", 128_000, 16_384),
109
+ ("deepseek-v2", 128_000, 8_192),
110
+ ("deepseek-coder", 128_000, 8_192),
111
+ ("deepseek", 128_000, 8_192),
112
+ # Command-R family
113
+ ("command-r-plus", 128_000, 4_096),
114
+ ("command-r", 128_000, 4_096),
115
+ # RWKV family
116
+ ("rwkv", 100_000, 4_096),
117
+ # Yi family
118
+ ("yi-", 200_000, 4_096),
119
+ # InternLM family
120
+ ("internlm", 256_000, 8_192),
121
+ # Anthropic (when proxied through OpenAI-compat)
122
+ ("claude-3.5", 200_000, 8_192),
123
+ ("claude-3", 200_000, 4_096),
124
+ ("claude", 200_000, 4_096),
125
+ # Youtu (WASA native)
126
+ ("youtu", 128_000, 16_384),
127
+ ]
128
+
129
+
130
+ def _resolve_model_capabilities(
131
+ model: str,
132
+ base_url: str | None = None,
133
+ ) -> tuple[int, int]:
134
+ """Resolve context window and max output for any model.
135
+
136
+ Strategy (3-layer precedence):
137
+ 1. Exact match in primary OpenAI table
138
+ 2. Prefix match against model family table (open-source models)
139
+ 3. Server-side probing via /v1/models/{model} (if base_url set)
140
+ 4. Conservative fallback (8_192, 4_096) — NOT 128K
141
+
142
+ Returns (context_window, max_output_tokens).
143
+ """
144
+ # Layer 1: exact match (OpenAI models)
145
+ if model in _MODEL_CONTEXT:
146
+ return (_MODEL_CONTEXT[model], _MODEL_MAX_OUTPUT.get(model, 4_096))
147
+
148
+ # Layer 2: prefix match against model families
149
+ lower = model.lower()
150
+ for prefix, ctx, max_out in _MODEL_FAMILY_CONTEXT:
151
+ if lower.startswith(prefix):
152
+ logger.info(
153
+ "Model '%s' matched family '%s': ctx=%d, max_out=%d",
154
+ model, prefix, ctx, max_out,
155
+ )
156
+ return (ctx, max_out)
157
+
158
+ # Layer 3: server-side probing (non-OpenAI servers may expose metadata)
159
+ if base_url:
160
+ probed = _probe_server_model_info(model, base_url)
161
+ if probed:
162
+ return probed
163
+
164
+ # Layer 4: conservative fallback — NOT 128K (that's dangerous for small models)
165
+ logger.warning(
166
+ "Model '%s' not in any known table. Using conservative defaults "
167
+ "(ctx=8192, max_out=4096). Override with context_size= parameter "
168
+ "or add model to _MODEL_FAMILY_CONTEXT.",
169
+ model,
170
+ )
171
+ return (8_192, 4_096)
172
+
173
+
174
+ def _probe_server_model_info(
175
+ model: str,
176
+ base_url: str,
177
+ ) -> tuple[int, int] | None:
178
+ """Probe the server for model metadata.
179
+
180
+ Tries (in order):
181
+ 1. GET /v1/models/{model} — some servers include context_length
182
+ 2. GET /api/show (Ollama-compat) — includes modelfile with num_ctx
183
+ """
184
+ url = base_url.rstrip("/")
185
+
186
+ # Attempt 1: /v1/models/{model} (vLLM, TGI expose max_model_len here)
187
+ try:
188
+ req = urllib.request.Request(
189
+ f"{url}/v1/models/{model}",
190
+ headers={"Accept": "application/json"},
191
+ method="GET",
192
+ )
193
+ with urllib.request.urlopen(req, timeout=5) as resp:
194
+ data = json.loads(resp.read())
195
+ ctx = (
196
+ data.get("max_model_len")
197
+ or data.get("context_length")
198
+ or data.get("max_context_length")
199
+ )
200
+ if ctx and isinstance(ctx, int) and ctx > 0:
201
+ max_out = data.get("max_output_tokens", min(ctx // 4, 16_384))
202
+ logger.info(
203
+ "Server probe found model '%s': ctx=%d, max_out=%d",
204
+ model, ctx, max_out,
205
+ )
206
+ return (ctx, max_out)
207
+ except Exception:
208
+ logger.debug("Server probe /v1/models/%s failed (expected for non-vLLM servers)", model)
209
+
210
+ # Attempt 2: Ollama-compatible /api/show
211
+ try:
212
+ req = urllib.request.Request(
213
+ f"{url}/api/show",
214
+ data=json.dumps({"name": model}).encode("utf-8"),
215
+ headers={"Content-Type": "application/json"},
216
+ method="POST",
217
+ )
218
+ with urllib.request.urlopen(req, timeout=5) as resp:
219
+ data = json.loads(resp.read())
220
+ params = data.get("model_info", {})
221
+ ctx = params.get("context_length") or params.get("num_ctx")
222
+ if ctx and isinstance(ctx, int) and ctx > 0:
223
+ max_out = min(ctx // 4, 16_384)
224
+ logger.info(
225
+ "Ollama probe found model '%s': ctx=%d, max_out=%d",
226
+ model, ctx, max_out,
227
+ )
228
+ return (ctx, max_out)
229
+ except Exception:
230
+ logger.debug("Ollama probe /api/show failed for '%s' (expected for non-Ollama servers)", model)
231
+
232
+ return None
233
+
234
+
235
+ def _require_openai():
236
+ """Import openai with a friendly error."""
237
+ try:
238
+ import openai
239
+ return openai
240
+ except ImportError:
241
+ raise ImportError(
242
+ "OpenAI adapter requires the 'openai' package. "
243
+ "Install with: pip install crprotocol[full]"
244
+ ) from None
245
+
246
+
247
+ def _require_tiktoken():
248
+ """Import tiktoken with a friendly error."""
249
+ try:
250
+ import tiktoken
251
+ return tiktoken
252
+ except ImportError:
253
+ return None # Fall back to heuristic
254
+
255
+
256
+ class OpenAIAdapter(LLMProvider):
257
+ """OpenAI chat completions adapter.
258
+
259
+ Works with OpenAI API and any OpenAI-compatible server (LM Studio,
260
+ vLLM, llama.cpp server, Ollama OpenAI compat, TGI, etc.).
261
+
262
+ Model capabilities are auto-discovered via 3-layer resolution:
263
+ 1. Exact match against known OpenAI models
264
+ 2. Prefix match against 50+ open-source model families
265
+ 3. Server-side probing (for vLLM, Ollama-compat endpoints)
266
+ 4. Conservative fallback (8K context) — safe for unknown models
267
+
268
+ Args:
269
+ model: Model name (e.g. "gpt-4o", "qwen3-4b", "llama3.1").
270
+ api_key: API key. Defaults to ``OPENAI_API_KEY`` env var.
271
+ base_url: Override API base URL (for LM Studio, vLLM, etc.).
272
+ context_size: Override auto-discovered context window (tokens).
273
+ max_tokens: Override auto-discovered max output tokens per request.
274
+ timeout: HTTP timeout in seconds (default: 120).
275
+ """
276
+
277
+ def __init__(
278
+ self,
279
+ model: str = "gpt-4o",
280
+ *,
281
+ api_key: str | None = None,
282
+ base_url: str | None = None,
283
+ context_size: int | None = None,
284
+ max_tokens: int | None = None,
285
+ timeout: float = 120.0,
286
+ ) -> None:
287
+ openai = _require_openai()
288
+
289
+ self._model = model
290
+
291
+ # ── Auto-discover model capabilities (3-layer) ──────────
292
+ # User-explicit overrides always win.
293
+ resolved_ctx, resolved_max = _resolve_model_capabilities(model, base_url)
294
+ self._context_size = context_size or resolved_ctx
295
+ self._max_tokens = max_tokens or resolved_max
296
+
297
+ # Build the client
298
+ key = api_key or os.environ.get("OPENAI_API_KEY")
299
+ if not key:
300
+ raise ValueError(
301
+ "No API key provided. Pass api_key= or set OPENAI_API_KEY."
302
+ )
303
+ kwargs: dict[str, Any] = {"api_key": key, "timeout": timeout}
304
+ if base_url:
305
+ kwargs["base_url"] = base_url
306
+ self._client = openai.OpenAI(**kwargs)
307
+
308
+ # Tokenizer (optional — tiktoken for accurate counts)
309
+ tiktoken = _require_tiktoken()
310
+ self._encoding = None
311
+ if tiktoken:
312
+ try:
313
+ self._encoding = tiktoken.encoding_for_model(model)
314
+ except KeyError:
315
+ self._encoding = tiktoken.get_encoding("cl100k_base")
316
+
317
+ logger.info(
318
+ "OpenAIAdapter initialized: model=%s, ctx=%d, max_out=%d (auto-discovered=%s)",
319
+ model, self._context_size, self._max_tokens,
320
+ "no" if context_size or max_tokens else "yes",
321
+ )
322
+
323
+ # Reasoning/thinking content from the last generate_chat() call.
324
+ # Set after every call; None if no reasoning was present.
325
+ self.last_reasoning_content: str | None = None
326
+
327
+ # -- LLMProvider interface --------------------------------------------
328
+
329
+ # Retry config: 3 attempts, exponential backoff with jitter
330
+ _MAX_RETRIES = 4
331
+ _BASE_DELAY = 2.0 # seconds (generous for local inference servers)
332
+
333
+ @staticmethod
334
+ def _is_retryable(exc: Exception) -> bool:
335
+ """Check if an exception is transient and worth retrying."""
336
+ exc_type = type(exc).__name__
337
+ # Rate limit (429) or server error (500/502/503)
338
+ if hasattr(exc, "status_code"):
339
+ return exc.status_code in (429, 500, 502, 503)
340
+ # openai.RateLimitError, openai.APIConnectionError, etc.
341
+ if exc_type in ("RateLimitError", "APIConnectionError", "APITimeoutError",
342
+ "InternalServerError", "APIStatusError"):
343
+ return True
344
+ # Connection-level transients (includes Channel Error from LM Studio)
345
+ if isinstance(exc, (ConnectionError, TimeoutError, OSError)):
346
+ return True
347
+ # Catch-all: retry anything with "channel", "connection", "reset" in message
348
+ exc_msg = str(exc).lower()
349
+ if any(kw in exc_msg for kw in ("channel", "connection", "reset", "closed")):
350
+ return True
351
+ return False
352
+
353
+ def generate_chat(
354
+ self, messages: list[dict[str, str]], **kwargs: Any
355
+ ) -> tuple[str, str]:
356
+ """Call OpenAI chat completions API with retry on transient failures.
357
+
358
+ Handles "thinking" models (Qwen3, DeepSeek-R1, o1, etc.) that
359
+ split output into reasoning_content + content fields. CRP
360
+ extracts the final content and preserves the full reasoning
361
+ for downstream extraction.
362
+
363
+ Returns (output_text, finish_reason).
364
+ """
365
+ params: dict[str, Any] = {
366
+ "model": self._model,
367
+ "messages": messages,
368
+ "max_tokens": kwargs.pop("max_tokens", self._max_tokens),
369
+ }
370
+ params.update(kwargs)
371
+
372
+ last_exc: Exception | None = None
373
+ for attempt in range(self._MAX_RETRIES):
374
+ try:
375
+ response = self._client.chat.completions.create(**params)
376
+ choice = response.choices[0]
377
+ text = choice.message.content or ""
378
+ reason = choice.finish_reason or "stop"
379
+
380
+ # ── Handle thinking models ──────────────────────
381
+ # Models like Qwen3, DeepSeek-R1, o1 put reasoning
382
+ # in a separate field. If content is empty but
383
+ # reasoning exists, the model spent all tokens on
384
+ # thinking and didn't produce final output.
385
+ reasoning = getattr(choice.message, "reasoning_content", None)
386
+ self.last_reasoning_content = reasoning or None
387
+ if not text and reasoning:
388
+ # Model exhausted budget on reasoning — no final
389
+ # content produced. Return empty output with
390
+ # finish_reason="length" so the continuation engine
391
+ # knows the budget was exhausted (not that the model
392
+ # finished). The orchestrator will skip extraction
393
+ # for this window and continue to the next one.
394
+ # Reasoning is preserved for inspection but NOT used
395
+ # as output — it would pollute the document map.
396
+ text = ""
397
+ reason = "length"
398
+ logger.info(
399
+ "Thinking model: all tokens spent on reasoning "
400
+ "(%d chars), no content produced. Returning "
401
+ "finish_reason=length so continuation proceeds.",
402
+ len(reasoning),
403
+ )
404
+ elif text and reasoning:
405
+ # Both present — model completed reasoning AND produced
406
+ # final content. CRP gets the clean content. Reasoning
407
+ # is discarded (it's internal chain-of-thought).
408
+ logger.debug(
409
+ "Thinking model: reasoning=%d chars, content=%d chars",
410
+ len(reasoning), len(text),
411
+ )
412
+
413
+ # Map OpenAI finish reasons to CRP convention
414
+ if reason == "length":
415
+ pass # Already correct — physical wall
416
+ elif reason in ("stop", "end_turn"):
417
+ reason = "stop"
418
+ else:
419
+ reason = "stop" # content_filter, tool_calls, etc.
420
+
421
+ return (text, reason)
422
+ except Exception as exc:
423
+ last_exc = exc
424
+ if attempt < self._MAX_RETRIES - 1 and self._is_retryable(exc):
425
+ delay = self._BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
426
+ logger.warning(
427
+ "OpenAI transient error (attempt %d/%d), retrying in %.1fs: %s",
428
+ attempt + 1, self._MAX_RETRIES, delay, type(exc).__name__,
429
+ )
430
+ time.sleep(delay)
431
+ else:
432
+ logger.error("OpenAI API error: %s", type(exc).__name__)
433
+ return ("", "error")
434
+
435
+ logger.error("OpenAI API failed after %d retries: %s", self._MAX_RETRIES, type(last_exc).__name__)
436
+ return ("", "error")
437
+
438
+ def count_tokens(self, text: str) -> int:
439
+ """Count tokens using tiktoken (exact) or fallback heuristic."""
440
+ if self._encoding is not None:
441
+ return len(self._encoding.encode(text))
442
+ return max(1, len(text) // 4)
443
+
444
+ def context_window_size(self) -> int:
445
+ return self._context_size
446
+
447
+ @property
448
+ def max_output_tokens(self) -> int | None:
449
+ return self._max_tokens
450
+
451
+ @property
452
+ def model_name(self) -> str:
453
+ return self._model
454
+
455
+ # Thinking model prefixes — models that produce reasoning_content
456
+ _THINKING_PREFIXES = ("qwen3", "deepseek-r1", "o1", "o3", "o4")
457
+
458
+ @property
459
+ def is_thinking_model(self) -> bool:
460
+ """Detect if the current model is a thinking/reasoning model."""
461
+ name = self._model.lower()
462
+ return any(name.startswith(p) for p in self._THINKING_PREFIXES)
463
+
464
+ def cost_per_1k_tokens(self) -> tuple[float, float]:
465
+ """OpenAI pricing per 1K tokens (USD) — updated 2025-Q2."""
466
+ pricing = {
467
+ "gpt-4o": (0.0025, 0.010),
468
+ "gpt-4o-mini": (0.00015, 0.0006),
469
+ "gpt-4-turbo": (0.010, 0.030),
470
+ "gpt-4": (0.030, 0.060),
471
+ "gpt-3.5-turbo": (0.0005, 0.0015),
472
+ "o1": (0.015, 0.060),
473
+ "o1-mini": (0.003, 0.012),
474
+ "o3": (0.015, 0.060),
475
+ "o3-mini": (0.0011, 0.0044),
476
+ "o4-mini": (0.0011, 0.0044),
477
+ }
478
+ return pricing.get(self._model, (0.0, 0.0))
479
+
480
+ # ── Tool-mediated dispatch (§20) ──────────────────────────────────
481
+
482
+ def supports_tools(self) -> bool:
483
+ """OpenAI and compatible servers support function/tool calling."""
484
+ return True
485
+
486
+ def generate_chat_with_tools(
487
+ self,
488
+ messages: list[dict[str, object]],
489
+ tools: list[dict[str, object]],
490
+ **kwargs: object,
491
+ ) -> tuple[str, str, list[dict[str, object]] | None, dict[str, object] | None]:
492
+ """Generate with OpenAI tool/function calling.
493
+
494
+ Returns (text, finish_reason, tool_calls, raw_assistant_message).
495
+ When the model wants to call tools, finish_reason="tool_calls" and
496
+ the tool_calls list contains structured call requests. The
497
+ raw_assistant_message is the full message dict for appending to
498
+ conversation history (required by the OpenAI tool protocol).
499
+ """
500
+ params: dict[str, Any] = {
501
+ "model": self._model,
502
+ "messages": messages,
503
+ "max_tokens": kwargs.pop("max_tokens", self._max_tokens),
504
+ "tools": tools,
505
+ "tool_choice": kwargs.pop("tool_choice", "auto"),
506
+ }
507
+ params.update(kwargs)
508
+
509
+ last_exc: Exception | None = None
510
+ for attempt in range(self._MAX_RETRIES):
511
+ try:
512
+ response = self._client.chat.completions.create(**params)
513
+ choice = response.choices[0]
514
+ text = choice.message.content or ""
515
+ reason = choice.finish_reason or "stop"
516
+
517
+ # Extract tool calls if present
518
+ raw_tool_calls = choice.message.tool_calls
519
+ if raw_tool_calls:
520
+ tool_calls_out: list[dict[str, object]] = []
521
+ for tc in raw_tool_calls:
522
+ # Parse arguments (may be JSON string)
523
+ try:
524
+ args = json.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments
525
+ except (json.JSONDecodeError, TypeError):
526
+ args = {"raw": tc.function.arguments}
527
+
528
+ tool_calls_out.append({
529
+ "id": tc.id,
530
+ "type": "function",
531
+ "function": {
532
+ "name": tc.function.name,
533
+ "arguments": args,
534
+ },
535
+ })
536
+
537
+ # Build raw assistant message for conversation history
538
+ raw_msg: dict[str, Any] = {
539
+ "role": "assistant",
540
+ "content": text or None,
541
+ "tool_calls": [
542
+ {
543
+ "id": tc.id,
544
+ "type": "function",
545
+ "function": {
546
+ "name": tc.function.name,
547
+ "arguments": tc.function.arguments if isinstance(tc.function.arguments, str) else json.dumps(tc.function.arguments),
548
+ },
549
+ }
550
+ for tc in raw_tool_calls
551
+ ],
552
+ }
553
+
554
+ logger.info(
555
+ "Tool calls requested: %d calls [%s]",
556
+ len(tool_calls_out),
557
+ ", ".join(tc["function"]["name"] for tc in tool_calls_out),
558
+ )
559
+ return (text, "tool_calls", tool_calls_out, raw_msg)
560
+
561
+ # No tool calls — normal completion
562
+ # Handle thinking models (same as generate_chat)
563
+ reasoning = getattr(choice.message, "reasoning_content", None)
564
+ self.last_reasoning_content = reasoning or None
565
+ if not text and reasoning:
566
+ text = ""
567
+ reason = "length"
568
+
569
+ if reason == "length":
570
+ pass
571
+ elif reason in ("stop", "end_turn"):
572
+ reason = "stop"
573
+ else:
574
+ reason = "stop"
575
+
576
+ return (text, reason, None, None)
577
+
578
+ except Exception as exc:
579
+ last_exc = exc
580
+ if attempt < self._MAX_RETRIES - 1 and self._is_retryable(exc):
581
+ delay = self._BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
582
+ logger.warning(
583
+ "OpenAI tool call transient error (attempt %d/%d), retrying in %.1fs: %s",
584
+ attempt + 1, self._MAX_RETRIES, delay, exc,
585
+ )
586
+ time.sleep(delay)
587
+ else:
588
+ logger.error("OpenAI tool call API error: %s", exc)
589
+ return ("", "error", None, None)
590
+
591
+ logger.error("OpenAI tool call failed after %d retries: %s", self._MAX_RETRIES, last_exc)
592
+ return ("", "error", None, None)
593
+
594
+ def generate_chat_stream(
595
+ self,
596
+ messages: list[dict[str, str]],
597
+ **kwargs: object,
598
+ ):
599
+ """Stream token chunks from OpenAI.
600
+
601
+ Yields individual token deltas. Return value is finish_reason.
602
+ """
603
+ from collections.abc import Generator
604
+
605
+ params: dict[str, object] = {
606
+ "model": self._model,
607
+ "messages": messages,
608
+ "max_tokens": kwargs.pop("max_tokens", self._max_tokens) if "max_tokens" in kwargs else self._max_tokens,
609
+ "stream": True,
610
+ }
611
+ params.update(kwargs)
612
+
613
+ finish_reason = "stop"
614
+ try:
615
+ stream = self._client.chat.completions.create(**params)
616
+ for chunk in stream:
617
+ if chunk.choices:
618
+ delta = chunk.choices[0].delta
619
+ if delta and delta.content:
620
+ yield delta.content
621
+ fr = chunk.choices[0].finish_reason
622
+ if fr:
623
+ finish_reason = "length" if fr == "length" else "stop"
624
+ except Exception as exc:
625
+ logger.error("OpenAI streaming error: %s", exc)
626
+ finish_reason = "error"
627
+
628
+ return finish_reason
@@ -0,0 +1,48 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """Per-provider tokenizer reconciliation (§06 §6.4).
4
+
5
+ Three-layer hierarchy:
6
+ Layer 1: Model-specific tokenizer (best — 100% accuracy)
7
+ Layer 2: Provider API token counting (good — 99%)
8
+ Layer 3: Character-to-token fallback (acceptable — 70-80%)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from crp.providers.base import LLMProvider
14
+
15
+
16
+ class TokenizerRegistry:
17
+ """Cache and resolve tokenizers per provider.
18
+
19
+ Phase 1 implementation delegates to LLMProvider.count_tokens() which
20
+ each adapter must implement with its own tokenizer. The registry adds
21
+ the Layer 3 fallback heuristic for providers that raise.
22
+ """
23
+
24
+ _CHARS_PER_TOKEN = 4 # Layer 3 heuristic
25
+
26
+ def count_tokens(self, text: str, provider: LLMProvider) -> int:
27
+ """Count tokens using the best available method for *provider*.
28
+
29
+ Layer 1/2: provider.count_tokens() — exact model tokenizer or API.
30
+ Layer 3: chars/4 fallback if the provider raises.
31
+ """
32
+ try:
33
+ return provider.count_tokens(text)
34
+ except Exception:
35
+ return self._fallback_count(text)
36
+
37
+ def _fallback_count(self, text: str) -> int:
38
+ """Layer 3: ~4 characters = 1 token average."""
39
+ return max(1, len(text) // self._CHARS_PER_TOKEN)
40
+
41
+ def validate_roundtrip(self, text: str, provider: LLMProvider) -> bool:
42
+ """Validate encode→decode→encode is lossless (best-effort)."""
43
+ try:
44
+ count1 = provider.count_tokens(text)
45
+ count2 = provider.count_tokens(text)
46
+ return count1 == count2
47
+ except Exception:
48
+ return False
crp/py.typed ADDED
File without changes