debugerai 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- debugai/__init__.py +51 -0
- debugai/agents/__init__.py +43 -0
- debugai/agents/base.py +192 -0
- debugai/agents/builtin.py +246 -0
- debugai/agents/registry.py +31 -0
- debugai/agents/types.py +108 -0
- debugai/analyze.py +142 -0
- debugai/calibration.py +198 -0
- debugai/cli.py +171 -0
- debugai/config.py +134 -0
- debugai/detectors.py +206 -0
- debugai/diagnosis.py +64 -0
- debugai/explainer.py +105 -0
- debugai/integrations/__init__.py +5 -0
- debugai/integrations/langchain.py +109 -0
- debugai/judge.py +171 -0
- debugai/metrics.py +139 -0
- debugai/models.py +92 -0
- debugai/providers.py +179 -0
- debugai/schema.py +66 -0
- debugai/sdk.py +1271 -0
- debugai/signals.py +399 -0
- debugai/thresholds.json +15 -0
- debugai/thresholds.py +44 -0
- debugai/tracing.py +283 -0
- debugerai-0.2.0.dist-info/METADATA +535 -0
- debugerai-0.2.0.dist-info/RECORD +31 -0
- debugerai-0.2.0.dist-info/WHEEL +5 -0
- debugerai-0.2.0.dist-info/entry_points.txt +2 -0
- debugerai-0.2.0.dist-info/licenses/LICENSE +21 -0
- debugerai-0.2.0.dist-info/top_level.txt +1 -0
debugai/metrics.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Per-model metrics ledger — lightweight thread-safe counters for tokens,
|
|
2
|
+
cost, latency, requests, and failures.
|
|
3
|
+
|
|
4
|
+
import debugai
|
|
5
|
+
debugai.metrics.snapshot() # full dict
|
|
6
|
+
debugai.metrics.by_model # per-model breakdown
|
|
7
|
+
debugai.metrics.reset() # clear all counters
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import threading
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class _ModelStats:
|
|
18
|
+
requests: int = 0
|
|
19
|
+
prompt_tokens: int = 0
|
|
20
|
+
completion_tokens: int = 0
|
|
21
|
+
total_tokens: int = 0
|
|
22
|
+
cost_usd: float = 0.0
|
|
23
|
+
failures: int = 0
|
|
24
|
+
cache_hits: int = 0
|
|
25
|
+
cache_misses: int = 0
|
|
26
|
+
_latencies: list[float] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
def record(self, prompt: int, completion: int, cost: float,
|
|
29
|
+
latency_ms: float, failed: bool, from_cache: bool = False) -> None:
|
|
30
|
+
self.requests += 1
|
|
31
|
+
self.prompt_tokens += prompt
|
|
32
|
+
self.completion_tokens += completion
|
|
33
|
+
self.total_tokens += prompt + completion
|
|
34
|
+
self.cost_usd = round(self.cost_usd + cost, 8)
|
|
35
|
+
if not from_cache:
|
|
36
|
+
self._latencies.append(latency_ms)
|
|
37
|
+
if failed:
|
|
38
|
+
self.failures += 1
|
|
39
|
+
if from_cache:
|
|
40
|
+
self.cache_hits += 1
|
|
41
|
+
else:
|
|
42
|
+
self.cache_misses += 1
|
|
43
|
+
|
|
44
|
+
def _pct(self, p: float) -> float:
|
|
45
|
+
if not self._latencies:
|
|
46
|
+
return 0.0
|
|
47
|
+
s = sorted(self._latencies)
|
|
48
|
+
i = max(0, min(int(len(s) * p), len(s) - 1))
|
|
49
|
+
return round(s[i], 2)
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict:
|
|
52
|
+
return {
|
|
53
|
+
"requests": self.requests,
|
|
54
|
+
"prompt_tokens": self.prompt_tokens,
|
|
55
|
+
"completion_tokens": self.completion_tokens,
|
|
56
|
+
"total_tokens": self.total_tokens,
|
|
57
|
+
"cost_usd": round(self.cost_usd, 6),
|
|
58
|
+
"failures": self.failures,
|
|
59
|
+
"cache_hits": self.cache_hits,
|
|
60
|
+
"cache_misses": self.cache_misses,
|
|
61
|
+
"latency_p50_ms": self._pct(0.50),
|
|
62
|
+
"latency_p95_ms": self._pct(0.95),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class MetricsLedger:
|
|
67
|
+
"""Thread-safe per-model aggregate counters. Updated by the SDK worker
|
|
68
|
+
after each request; safe to read from any thread."""
|
|
69
|
+
|
|
70
|
+
def __init__(self) -> None:
|
|
71
|
+
self._lock = threading.Lock()
|
|
72
|
+
self._models: dict[str, _ModelStats] = {}
|
|
73
|
+
self._global = _ModelStats()
|
|
74
|
+
|
|
75
|
+
# ── Recording (called by background worker) ─────────────────────────────
|
|
76
|
+
def record(self, model: str, prompt_tokens: int, completion_tokens: int,
|
|
77
|
+
cost_usd: float, latency_ms: float, failed: bool,
|
|
78
|
+
from_cache: bool = False) -> None:
|
|
79
|
+
with self._lock:
|
|
80
|
+
if model not in self._models:
|
|
81
|
+
self._models[model] = _ModelStats()
|
|
82
|
+
self._models[model].record(prompt_tokens, completion_tokens,
|
|
83
|
+
cost_usd, latency_ms, failed, from_cache)
|
|
84
|
+
self._global.record(prompt_tokens, completion_tokens,
|
|
85
|
+
cost_usd, latency_ms, failed, from_cache)
|
|
86
|
+
|
|
87
|
+
# ── Read properties (safe from any thread) ──────────────────────────────
|
|
88
|
+
@property
|
|
89
|
+
def requests(self) -> int:
|
|
90
|
+
with self._lock:
|
|
91
|
+
return self._global.requests
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def failures(self) -> int:
|
|
95
|
+
with self._lock:
|
|
96
|
+
return self._global.failures
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def total_tokens(self) -> int:
|
|
100
|
+
with self._lock:
|
|
101
|
+
return self._global.total_tokens
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def cost_usd(self) -> float:
|
|
105
|
+
with self._lock:
|
|
106
|
+
return self._global.cost_usd
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def latency_p50(self) -> float:
|
|
110
|
+
with self._lock:
|
|
111
|
+
return self._global._pct(0.50)
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def latency_p95(self) -> float:
|
|
115
|
+
with self._lock:
|
|
116
|
+
return self._global._pct(0.95)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def by_model(self) -> dict[str, dict]:
|
|
120
|
+
with self._lock:
|
|
121
|
+
return {m: s.to_dict() for m, s in self._models.items()}
|
|
122
|
+
|
|
123
|
+
def snapshot(self) -> dict:
|
|
124
|
+
"""Return a complete, JSON-serialisable snapshot of all counters."""
|
|
125
|
+
with self._lock:
|
|
126
|
+
return {
|
|
127
|
+
**self._global.to_dict(),
|
|
128
|
+
"by_model": {m: s.to_dict() for m, s in self._models.items()},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
def reset(self) -> None:
|
|
132
|
+
"""Clear all counters (useful between test runs or reporting windows)."""
|
|
133
|
+
with self._lock:
|
|
134
|
+
self._models.clear()
|
|
135
|
+
self._global = _ModelStats()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# Module-level singleton — `import debugai; debugai.metrics`
|
|
139
|
+
metrics = MetricsLedger()
|
debugai/models.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Lazy-loaded small ML models (Architecture §8.2).
|
|
2
|
+
|
|
3
|
+
These are NOT LLMs — they are tiny, fast, task-specific models that run on CPU:
|
|
4
|
+
- sentence-transformers/all-MiniLM-L6-v2 (embeddings, ~80MB)
|
|
5
|
+
- spaCy en_core_web_sm (NER, ~12MB)
|
|
6
|
+
- cross-encoder/nli-MiniLM2-L6-H768 (NLI, ~120MB)
|
|
7
|
+
|
|
8
|
+
Each loader is a cached singleton so model weights load once per process. If a
|
|
9
|
+
model is unavailable, the loader returns ``None`` and signal computations fall
|
|
10
|
+
back to their deterministic pure-Python methods (per the doc's layered design).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger("debugai.models")
|
|
20
|
+
|
|
21
|
+
EMBED_MODEL = "all-MiniLM-L6-v2"
|
|
22
|
+
SPACY_MODEL = "en_core_web_sm"
|
|
23
|
+
|
|
24
|
+
# Three NLI modes (set via env var):
|
|
25
|
+
#
|
|
26
|
+
# default → cross-encoder/nli-deberta-v3-base local (~500 MB RAM)
|
|
27
|
+
# Most accurate. Best for self-hosted VPS with ≥1 GB RAM.
|
|
28
|
+
#
|
|
29
|
+
# DEBUGAI_LITE=1 → cross-encoder/nli-MiniLM2-L6-H768 local (~120 MB RAM)
|
|
30
|
+
# Fits in free-tier PaaS (512 MB RAM). Slightly more
|
|
31
|
+
# false-positive contradictions but good enough.
|
|
32
|
+
#
|
|
33
|
+
# DEBUGAI_NLI_API=1 → HuggingFace Inference API zero local RAM
|
|
34
|
+
# Sends (premise, hypothesis) to api-inference.huggingface.co.
|
|
35
|
+
# Set HF_TOKEN for higher rate limits (free account works).
|
|
36
|
+
# Best choice for Render / Railway free tier.
|
|
37
|
+
_LITE = bool(os.environ.get("DEBUGAI_LITE"))
|
|
38
|
+
_NLI_API = bool(os.environ.get("DEBUGAI_NLI_API"))
|
|
39
|
+
NLI_MODEL = ("cross-encoder/nli-MiniLM2-L6-H768" if _LITE
|
|
40
|
+
else "cross-encoder/nli-deberta-v3-base")
|
|
41
|
+
NLI_HF_MODEL_ID = "cross-encoder/nli-deberta-v3-base" # used by the API path
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@functools.lru_cache(maxsize=1)
|
|
45
|
+
def embedder():
|
|
46
|
+
"""SentenceTransformer for semantic cosine. None if unavailable."""
|
|
47
|
+
try:
|
|
48
|
+
from sentence_transformers import SentenceTransformer
|
|
49
|
+
|
|
50
|
+
log.info("loading embedding model %s", EMBED_MODEL)
|
|
51
|
+
return SentenceTransformer(EMBED_MODEL)
|
|
52
|
+
except Exception as e: # pragma: no cover - environment dependent
|
|
53
|
+
log.warning("embedder unavailable (%s); using token-overlap fallback", e)
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@functools.lru_cache(maxsize=1)
|
|
58
|
+
def nli_model():
|
|
59
|
+
"""CrossEncoder NLI model. Returns label-ordered logits. None if unavailable.
|
|
60
|
+
|
|
61
|
+
When DEBUGAI_NLI_API=1 this returns a special sentinel object that tells
|
|
62
|
+
compute_contradiction() to call the HuggingFace Inference API instead.
|
|
63
|
+
"""
|
|
64
|
+
if _NLI_API:
|
|
65
|
+
return _HFNLISentinel()
|
|
66
|
+
try:
|
|
67
|
+
from sentence_transformers import CrossEncoder
|
|
68
|
+
|
|
69
|
+
log.info("loading NLI model %s", NLI_MODEL)
|
|
70
|
+
return CrossEncoder(NLI_MODEL)
|
|
71
|
+
except Exception as e: # pragma: no cover - environment dependent
|
|
72
|
+
log.warning("NLI model unavailable (%s); contradiction set to 0.0", e)
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class _HFNLISentinel:
|
|
77
|
+
"""Marker returned by nli_model() when DEBUGAI_NLI_API=1.
|
|
78
|
+
compute_contradiction() detects this and calls the HF Inference API."""
|
|
79
|
+
is_hf_api = True
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@functools.lru_cache(maxsize=1)
|
|
83
|
+
def ner():
|
|
84
|
+
"""spaCy NER pipeline. None if unavailable (regex fallback used instead)."""
|
|
85
|
+
try:
|
|
86
|
+
import spacy
|
|
87
|
+
|
|
88
|
+
log.info("loading spaCy model %s", SPACY_MODEL)
|
|
89
|
+
return spacy.load(SPACY_MODEL)
|
|
90
|
+
except Exception as e: # pragma: no cover - environment dependent
|
|
91
|
+
log.warning("spaCy model unavailable (%s); using regex NER fallback", e)
|
|
92
|
+
return None
|
debugai/providers.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Provider routing table — maps model name prefixes to (base_url, api_key_env,
|
|
2
|
+
adapter_class). Called by ``debugai.completion()`` / ``debugai.acompletion()``.
|
|
3
|
+
|
|
4
|
+
The key architectural insight: most modern providers speak the OpenAI REST API spec
|
|
5
|
+
(same endpoint shape, same response format). A single ``_OpenAICompatAdapter`` covers:
|
|
6
|
+
- Google Gemini (via Google's official OpenAI-compat endpoint)
|
|
7
|
+
- Ollama + any local model (Qwen, Llama, Phi, DeepSeek…)
|
|
8
|
+
- Groq, Together AI, Mistral AI, OpenRouter, Azure OpenAI, LM Studio, vLLM
|
|
9
|
+
- Any custom server that accepts POST /v1/chat/completions
|
|
10
|
+
|
|
11
|
+
Only Cohere requires a native adapter (different API shape).
|
|
12
|
+
|
|
13
|
+
All routing is prefix-based on the model name. Users can extend or override the table:
|
|
14
|
+
from debugai import register_provider
|
|
15
|
+
register_provider(matches=lambda m: m.startswith("my-"), adapter=MyAdapter,
|
|
16
|
+
client_factory=lambda cfg: MyClient(...))
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from debugai.config import DebugAIConfig
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class ProviderRoute:
|
|
31
|
+
prefix: str # model name prefix, e.g. "gemini-", "ollama/"
|
|
32
|
+
name: str # human name, e.g. "Google Gemini"
|
|
33
|
+
base_url: str | None # None → use the SDK default
|
|
34
|
+
api_key_env: str | None # env var name for the API key; None → no key (local)
|
|
35
|
+
adapter: str # "openai" | "openai_compat" | "anthropic" | "cohere"
|
|
36
|
+
notes: str = ""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# The routing table. Checked in order — first prefix match wins.
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
PROVIDER_ROUTES: list[ProviderRoute] = [
|
|
43
|
+
# ── OpenAI ──────────────────────────────────────────────────────────────
|
|
44
|
+
ProviderRoute("gpt-", "OpenAI", None, "OPENAI_API_KEY", "openai"),
|
|
45
|
+
ProviderRoute("o1-", "OpenAI", None, "OPENAI_API_KEY", "openai"),
|
|
46
|
+
ProviderRoute("o3-", "OpenAI", None, "OPENAI_API_KEY", "openai"),
|
|
47
|
+
ProviderRoute("o4-", "OpenAI", None, "OPENAI_API_KEY", "openai"),
|
|
48
|
+
ProviderRoute("text-", "OpenAI", None, "OPENAI_API_KEY", "openai"),
|
|
49
|
+
|
|
50
|
+
# ── Anthropic ───────────────────────────────────────────────────────────
|
|
51
|
+
ProviderRoute("claude-", "Anthropic", None, "ANTHROPIC_API_KEY", "anthropic"),
|
|
52
|
+
|
|
53
|
+
# ── Google Gemini (OpenAI-compat endpoint) ──────────────────────────────
|
|
54
|
+
ProviderRoute("gemini-", "Google Gemini", "https://generativelanguage.googleapis.com/v1beta/openai/", "GEMINI_API_KEY", "openai_compat"),
|
|
55
|
+
ProviderRoute("google/", "Google Gemini", "https://generativelanguage.googleapis.com/v1beta/openai/", "GEMINI_API_KEY", "openai_compat"),
|
|
56
|
+
|
|
57
|
+
# ── Groq (fast inference, OpenAI-compat) ────────────────────────────────
|
|
58
|
+
ProviderRoute("groq/", "Groq", "https://api.groq.com/openai/v1", "GROQ_API_KEY", "openai_compat"),
|
|
59
|
+
|
|
60
|
+
# ── Together AI (OpenAI-compat) ─────────────────────────────────────────
|
|
61
|
+
ProviderRoute("together/", "Together AI", "https://api.together.xyz/v1", "TOGETHER_API_KEY", "openai_compat"),
|
|
62
|
+
|
|
63
|
+
# ── Mistral AI (OpenAI-compat) ──────────────────────────────────────────
|
|
64
|
+
ProviderRoute("mistral/", "Mistral AI", "https://api.mistral.ai/v1", "MISTRAL_API_KEY", "openai_compat"),
|
|
65
|
+
|
|
66
|
+
# ── OpenRouter (multi-provider proxy, OpenAI-compat) ────────────────────
|
|
67
|
+
ProviderRoute("openrouter/", "OpenRouter", "https://openrouter.ai/api/v1", "OPENROUTER_API_KEY", "openai_compat"),
|
|
68
|
+
|
|
69
|
+
# ── Azure OpenAI (OpenAI-compat + endpoint from env) ────────────────────
|
|
70
|
+
ProviderRoute("azure/", "Azure OpenAI", None, "AZURE_OPENAI_API_KEY","openai_compat",
|
|
71
|
+
notes="Set AZURE_OPENAI_ENDPOINT. Model name: 'azure/<deployment-name>'"),
|
|
72
|
+
|
|
73
|
+
# ── Cohere (native adapter) ──────────────────────────────────────────────
|
|
74
|
+
ProviderRoute("cohere/", "Cohere", None, "COHERE_API_KEY", "cohere"),
|
|
75
|
+
ProviderRoute("command-", "Cohere", None, "COHERE_API_KEY", "cohere"),
|
|
76
|
+
|
|
77
|
+
# ── Ollama — local models via the Ollama OpenAI-compat server ───────────
|
|
78
|
+
# Any model served by Ollama: qwen2.5, llama3, phi3, deepseek-coder, codellama…
|
|
79
|
+
# Default: http://localhost:11434/v1 (override with OLLAMA_BASE_URL or config.ollama_base_url)
|
|
80
|
+
ProviderRoute("ollama/", "Ollama (local)", None, None, "ollama",
|
|
81
|
+
notes="Requires Ollama running. Set OLLAMA_BASE_URL if not localhost:11434."),
|
|
82
|
+
# Common local model families also route to Ollama when no prefix given:
|
|
83
|
+
ProviderRoute("qwen", "Ollama (Qwen)", None, None, "ollama"),
|
|
84
|
+
ProviderRoute("llama", "Ollama (Llama)", None, None, "ollama"),
|
|
85
|
+
ProviderRoute("phi", "Ollama (Phi)", None, None, "ollama"),
|
|
86
|
+
ProviderRoute("deepseek", "Ollama (DeepSeek)",None, None, "ollama"),
|
|
87
|
+
ProviderRoute("codellama", "Ollama (CodeLlama)",None,None, "ollama"),
|
|
88
|
+
ProviderRoute("gemma", "Ollama (Gemma)", None, None, "ollama"),
|
|
89
|
+
ProviderRoute("mixtral", "Ollama (Mixtral)",None, None, "ollama"),
|
|
90
|
+
ProviderRoute("vicuna", "Ollama (Vicuna)", None, None, "ollama"),
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def route_for(model: str) -> ProviderRoute | None:
|
|
95
|
+
"""Return the first matching route for a model name, or None."""
|
|
96
|
+
low = model.lower()
|
|
97
|
+
for route in PROVIDER_ROUTES:
|
|
98
|
+
if low.startswith(route.prefix.lower()):
|
|
99
|
+
return route
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _get_adapter_map():
|
|
104
|
+
"""Deferred import to avoid circular imports at module load time."""
|
|
105
|
+
from debugai.sdk import (
|
|
106
|
+
_AnthropicAdapter, _CohereAdapter,
|
|
107
|
+
_OpenAIAdapter, _OpenAICompatAdapter,
|
|
108
|
+
)
|
|
109
|
+
return {
|
|
110
|
+
"openai": _OpenAIAdapter,
|
|
111
|
+
"anthropic": _AnthropicAdapter,
|
|
112
|
+
"openai_compat": _OpenAICompatAdapter,
|
|
113
|
+
"ollama": _OpenAICompatAdapter,
|
|
114
|
+
"cohere": _CohereAdapter,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# Lazily evaluated so there's no circular import at load time.
|
|
119
|
+
class _AdapterMapProxy(dict):
|
|
120
|
+
_loaded = False
|
|
121
|
+
def _ensure(self):
|
|
122
|
+
if not self._loaded:
|
|
123
|
+
self.update(_get_adapter_map())
|
|
124
|
+
self._loaded = True
|
|
125
|
+
def get(self, key, default=None):
|
|
126
|
+
self._ensure()
|
|
127
|
+
return super().get(key, default)
|
|
128
|
+
def __getitem__(self, key):
|
|
129
|
+
self._ensure()
|
|
130
|
+
return super().__getitem__(key)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
_ADAPTER_MAP = _AdapterMapProxy()
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def make_client(route: ProviderRoute, config: "DebugAIConfig") -> Any:
|
|
137
|
+
"""Build the provider client from a route + config. All OpenAI-compat clients
|
|
138
|
+
are instantiated via the OpenAI SDK (no per-provider install needed)."""
|
|
139
|
+
|
|
140
|
+
if route.adapter == "anthropic":
|
|
141
|
+
from anthropic import Anthropic
|
|
142
|
+
return Anthropic(timeout=60.0, max_retries=2)
|
|
143
|
+
|
|
144
|
+
if route.adapter == "cohere":
|
|
145
|
+
try:
|
|
146
|
+
import cohere # optional; graceful ImportError if not installed
|
|
147
|
+
return cohere.ClientV2(api_key=os.environ.get(route.api_key_env or "COHERE_API_KEY", ""))
|
|
148
|
+
except ImportError:
|
|
149
|
+
raise ImportError(
|
|
150
|
+
"Cohere models require the 'cohere' package: pip install cohere"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if route.adapter == "ollama":
|
|
154
|
+
from openai import OpenAI
|
|
155
|
+
base_url = (getattr(config, "ollama_base_url", None)
|
|
156
|
+
or os.environ.get("OLLAMA_BASE_URL")
|
|
157
|
+
or "http://localhost:11434/v1")
|
|
158
|
+
return OpenAI(base_url=base_url, api_key="ollama", timeout=120.0)
|
|
159
|
+
|
|
160
|
+
if route.adapter in ("openai_compat", "openai"):
|
|
161
|
+
from openai import OpenAI
|
|
162
|
+
base_url = route.base_url
|
|
163
|
+
api_key: str | None = None
|
|
164
|
+
|
|
165
|
+
if route.adapter == "openai_compat":
|
|
166
|
+
if route.prefix.startswith("azure/"):
|
|
167
|
+
base_url = (os.environ.get("AZURE_OPENAI_ENDPOINT", "")
|
|
168
|
+
.rstrip("/") + "/openai")
|
|
169
|
+
api_key = (os.environ.get(route.api_key_env, "") if route.api_key_env
|
|
170
|
+
else "no-key-needed")
|
|
171
|
+
|
|
172
|
+
return OpenAI(
|
|
173
|
+
base_url=base_url,
|
|
174
|
+
api_key=api_key or os.environ.get(route.api_key_env or "", ""),
|
|
175
|
+
timeout=60.0,
|
|
176
|
+
max_retries=2,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
raise ValueError(f"Unknown adapter type {route.adapter!r} in routing table.")
|
debugai/schema.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""SDK capture schema (Architecture §3).
|
|
2
|
+
|
|
3
|
+
The unified payload every integration level produces. Only the Core IO group is
|
|
4
|
+
strictly required; retrieval and runtime groups unlock RAG-specific and
|
|
5
|
+
capacity signals respectively.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CaptureRecord:
|
|
16
|
+
"""Unified request payload: prompt + context + output + metadata.
|
|
17
|
+
|
|
18
|
+
Groups (§3.1):
|
|
19
|
+
Core IO — minimum viable input (required).
|
|
20
|
+
Retrieval — RAG-specific signals.
|
|
21
|
+
Metadata — pipeline configuration.
|
|
22
|
+
Runtime — auto-captured metrics.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# --- Core IO (required) ---
|
|
26
|
+
user_prompt: str
|
|
27
|
+
llm_output: str
|
|
28
|
+
system_prompt: str = ""
|
|
29
|
+
expected_output: str | None = None
|
|
30
|
+
|
|
31
|
+
# --- Retrieval context (RAG) ---
|
|
32
|
+
retrieved_chunks: list[str] = field(default_factory=list)
|
|
33
|
+
similarity_scores: list[float] = field(default_factory=list)
|
|
34
|
+
retrieval_query: str | None = None
|
|
35
|
+
chunk_sources: list[dict[str, Any]] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
# --- Pipeline metadata ---
|
|
38
|
+
model_name: str | None = None
|
|
39
|
+
temperature: float | None = None
|
|
40
|
+
max_tokens: int | None = None
|
|
41
|
+
tool_calls: list[dict[str, Any]] = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
# --- Runtime metrics ---
|
|
44
|
+
latency_ms: int | None = None
|
|
45
|
+
token_usage: dict[str, int] = field(default_factory=dict)
|
|
46
|
+
timestamp: str | None = None
|
|
47
|
+
error_code: str | None = None
|
|
48
|
+
|
|
49
|
+
# --- Optional capacity hints (used by ratio signals) ---
|
|
50
|
+
context_window: int | None = None # model's max context window in tokens
|
|
51
|
+
|
|
52
|
+
def __post_init__(self) -> None:
|
|
53
|
+
if not self.user_prompt:
|
|
54
|
+
raise ValueError("user_prompt is required (Core IO)")
|
|
55
|
+
if self.llm_output is None:
|
|
56
|
+
raise ValueError("llm_output is required (Core IO)")
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def context_text(self) -> str:
|
|
60
|
+
"""Concatenated retrieved chunks — the 'grounding' the output should rest on."""
|
|
61
|
+
return "\n".join(self.retrieved_chunks)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def full_prompt(self) -> str:
|
|
65
|
+
"""System + user prompt combined (used for context-length accounting)."""
|
|
66
|
+
return (self.system_prompt + "\n" + self.user_prompt).strip()
|