rag-debugger 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ import uuid
2
+ from contextlib import asynccontextmanager
3
+ from .emitter import configure, stop_worker
4
+ from .decorators import rag_trace
5
+ from .context import set_trace_id, set_query_id, reset_context
6
+ from .context import _trace_id, _query_id
7
+
8
+ __version__ = "1.0.0"
9
+
10
+ _initialized = False
11
+
12
+
13
+ def init(dashboard_url: str = "http://localhost:7777") -> None:
14
+ """Call once at application startup.
15
+
16
+ Configures the dashboard URL. The background worker starts lazily
17
+ on the first ``emit()`` call, so this is safe to call at import time
18
+ or before the async event loop is running.
19
+ """
20
+ global _initialized
21
+ configure(dashboard_url)
22
+ _initialized = True
23
+
24
+
25
+ def new_trace(
26
+ trace_id: str | None = None,
27
+ query_id: str | None = None,
28
+ ) -> None:
29
+ """Explicitly set trace/query IDs (optional — auto-generated if not called)."""
30
+ if trace_id:
31
+ set_trace_id(trace_id)
32
+ if query_id:
33
+ set_query_id(query_id)
34
+
35
+
36
+ class _TraceHandle:
37
+ """Lightweight handle returned by the ``trace()`` context manager."""
38
+ __slots__ = ("trace_id", "query_id")
39
+
40
+ def __init__(self, trace_id: str, query_id: str) -> None:
41
+ self.trace_id = trace_id
42
+ self.query_id = query_id
43
+
44
+
45
+ @asynccontextmanager
46
+ async def trace(
47
+ trace_id: str | None = None,
48
+ query_id: str | None = None,
49
+ ):
50
+ """Async context manager for explicit trace scoping.
51
+
52
+ Usage::
53
+
54
+ async with rag_debugger.trace(trace_id="req-123") as t:
55
+ print(t.trace_id)
56
+ result = await my_rag_pipeline(query)
57
+ # Context is automatically restored after the block
58
+
59
+ Nested ``trace()`` contexts work correctly — the outer context
60
+ is restored when the inner block exits.
61
+ """
62
+ tid = trace_id or str(uuid.uuid4())
63
+ qid = query_id or str(uuid.uuid4())
64
+
65
+ # Save previous values using ContextVar tokens
66
+ trace_token = _trace_id.set(tid)
67
+ query_token = _query_id.set(qid)
68
+
69
+ try:
70
+ yield _TraceHandle(tid, qid)
71
+ finally:
72
+ # Restore previous values
73
+ _trace_id.reset(trace_token)
74
+ _query_id.reset(query_token)
75
+
76
+
77
+ __all__ = ["init", "rag_trace", "new_trace", "reset_context", "trace", "stop_worker", "__version__"]
File without changes
@@ -0,0 +1,80 @@
1
+ try:
2
+ from langchain_core.callbacks import BaseCallbackHandler
3
+ from langchain_core.outputs import LLMResult
4
+ except ImportError:
5
+ raise ImportError(
6
+ "LangChain adapter requires langchain-core. "
7
+ "Install with: pip install rag-debugger[langchain]"
8
+ )
9
+
10
+ import asyncio
11
+ import time
12
+ import uuid
13
+ from ..context import get_or_create_trace_id, get_or_create_query_id
14
+ from ..emitter import emit
15
+
16
+
17
+ class RAGDebuggerCallback(BaseCallbackHandler):
18
+ """
19
+ LangChain callback handler.
20
+ Usage:
21
+ from rag_debugger.adapters.langchain import RAGDebuggerCallback
22
+ handler = RAGDebuggerCallback()
23
+ chain.invoke({"query": "..."}, config={"callbacks": [handler]})
24
+ """
25
+
26
+ def __init__(self) -> None:
27
+ self._retriever_start: float = 0
28
+ self._llm_start: float = 0
29
+ self._query_text: str = ""
30
+
31
+ def on_retriever_start(self, serialized, query, **kwargs) -> None:
32
+ self._retriever_start = time.time()
33
+ self._query_text = query
34
+
35
+ def on_retriever_end(self, documents, **kwargs) -> None:
36
+ duration = (time.time() - self._retriever_start) * 1000
37
+ chunks = [
38
+ {
39
+ "chunk_id": str(i),
40
+ "text": doc.page_content[:1000],
41
+ "cosine_score": doc.metadata.get("score", 0.0),
42
+ "final_rank": i,
43
+ "metadata": doc.metadata,
44
+ }
45
+ for i, doc in enumerate(documents)
46
+ ]
47
+ try:
48
+ loop = asyncio.get_running_loop()
49
+ loop.create_task(emit({
50
+ "id": str(uuid.uuid4()),
51
+ "trace_id": get_or_create_trace_id(),
52
+ "query_id": get_or_create_query_id(),
53
+ "stage": "retrieve",
54
+ "ts_start": self._retriever_start,
55
+ "duration_ms": duration,
56
+ "query_text": self._query_text,
57
+ "chunks": chunks,
58
+ }))
59
+ except RuntimeError:
60
+ pass # No running loop — skip
61
+
62
+ def on_llm_start(self, serialized, prompts, **kwargs) -> None:
63
+ self._llm_start = time.time()
64
+
65
+ def on_llm_end(self, response: LLMResult, **kwargs) -> None:
66
+ duration = (time.time() - self._llm_start) * 1000
67
+ answer = response.generations[0][0].text if response.generations else ""
68
+ try:
69
+ loop = asyncio.get_running_loop()
70
+ loop.create_task(emit({
71
+ "id": str(uuid.uuid4()),
72
+ "trace_id": get_or_create_trace_id(),
73
+ "query_id": get_or_create_query_id(),
74
+ "stage": "generate",
75
+ "ts_start": self._llm_start,
76
+ "duration_ms": duration,
77
+ "generated_answer": answer,
78
+ }))
79
+ except RuntimeError:
80
+ pass
@@ -0,0 +1,105 @@
1
+ """LlamaIndex observer adapter for RAG Debugger SDK."""
2
+
3
+ try:
4
+ from llama_index.core.callbacks import CallbackManager, CBEventType, LlamaDebugHandler
5
+ from llama_index.core.callbacks.base_handler import BaseCallbackHandler
6
+ except ImportError:
7
+ raise ImportError(
8
+ "LlamaIndex adapter requires llama-index-core. "
9
+ "Install with: pip install rag-debugger[llamaindex]"
10
+ )
11
+
12
+ import asyncio
13
+ import time
14
+ import uuid
15
+ from typing import Any, Dict, List, Optional
16
+ from ..context import get_or_create_trace_id, get_or_create_query_id
17
+ from ..emitter import emit
18
+
19
+
20
+ class RAGDebuggerLlamaIndex(BaseCallbackHandler):
21
+ """
22
+ LlamaIndex callback handler for RAG Debugger.
23
+ Usage:
24
+ from rag_debugger.adapters.llamaindex import RAGDebuggerLlamaIndex
25
+ handler = RAGDebuggerLlamaIndex()
26
+ callback_manager = CallbackManager([handler])
27
+ index = VectorStoreIndex.from_documents(docs, callback_manager=callback_manager)
28
+ """
29
+
30
+ def __init__(self) -> None:
31
+ super().__init__([], [])
32
+ self._event_starts: Dict[str, float] = {}
33
+
34
+ def on_event_start(
35
+ self,
36
+ event_type: CBEventType,
37
+ payload: Optional[Dict[str, Any]] = None,
38
+ event_id: str = "",
39
+ **kwargs,
40
+ ) -> str:
41
+ self._event_starts[event_id] = time.time()
42
+ return event_id
43
+
44
+ def on_event_end(
45
+ self,
46
+ event_type: CBEventType,
47
+ payload: Optional[Dict[str, Any]] = None,
48
+ event_id: str = "",
49
+ **kwargs,
50
+ ) -> None:
51
+ start_time = self._event_starts.pop(event_id, time.time())
52
+ duration = (time.time() - start_time) * 1000
53
+
54
+ stage = self._map_event_type(event_type)
55
+ if stage is None:
56
+ return
57
+
58
+ event = {
59
+ "id": str(uuid.uuid4()),
60
+ "trace_id": get_or_create_trace_id(),
61
+ "query_id": get_or_create_query_id(),
62
+ "stage": stage,
63
+ "ts_start": start_time,
64
+ "duration_ms": duration,
65
+ }
66
+
67
+ if payload:
68
+ if stage == "retrieve" and "nodes" in payload:
69
+ event["chunks"] = [
70
+ {
71
+ "chunk_id": str(i),
72
+ "text": str(getattr(n, "text", ""))[:1000],
73
+ "cosine_score": float(getattr(n, "score", 0.0)),
74
+ "final_rank": i,
75
+ }
76
+ for i, n in enumerate(payload["nodes"])
77
+ ]
78
+ elif stage == "generate" and "response" in payload:
79
+ event["generated_answer"] = str(payload["response"])
80
+
81
+ try:
82
+ loop = asyncio.get_running_loop()
83
+ loop.create_task(emit(event))
84
+ except RuntimeError:
85
+ pass
86
+
87
+ def start_trace(self, trace_id: Optional[str] = None) -> None:
88
+ pass
89
+
90
+ def end_trace(
91
+ self,
92
+ trace_id: Optional[str] = None,
93
+ trace_map: Optional[Dict[str, List[str]]] = None,
94
+ ) -> None:
95
+ pass
96
+
97
+ @staticmethod
98
+ def _map_event_type(event_type: CBEventType) -> Optional[str]:
99
+ mapping = {
100
+ CBEventType.EMBEDDING: "embed",
101
+ CBEventType.RETRIEVE: "retrieve",
102
+ CBEventType.RERANKING: "rerank",
103
+ CBEventType.LLM: "generate",
104
+ }
105
+ return mapping.get(event_type)
@@ -0,0 +1,112 @@
1
+ """OpenAI wrapper adapter for RAG Debugger SDK."""
2
+
3
+ try:
4
+ import openai
5
+ except ImportError:
6
+ raise ImportError(
7
+ "OpenAI adapter requires openai. "
8
+ "Install with: pip install rag-debugger[openai]"
9
+ )
10
+
11
+ import asyncio
12
+ import time
13
+ import uuid
14
+ from typing import Any
15
+ from ..context import get_or_create_trace_id, get_or_create_query_id
16
+ from ..emitter import emit
17
+
18
+
19
+ class RAGDebuggerOpenAI:
20
+ """
21
+ Wrapper around OpenAI client that auto-instruments embedding and completion calls.
22
+ Usage:
23
+ from rag_debugger.adapters.openai import RAGDebuggerOpenAI
24
+ client = RAGDebuggerOpenAI(openai.AsyncOpenAI())
25
+ embeddings = await client.embed("hello world")
26
+ response = await client.complete("hello world", system="You are helpful")
27
+ """
28
+
29
+ def __init__(self, client: Any) -> None:
30
+ self._client = client
31
+
32
+ async def embed(self, text: str, model: str = "text-embedding-3-small") -> list[float]:
33
+ ts_start = time.time()
34
+ try:
35
+ response = await self._client.embeddings.create(
36
+ input=text,
37
+ model=model,
38
+ )
39
+ vector = response.data[0].embedding
40
+ duration = (time.time() - ts_start) * 1000
41
+
42
+ await emit({
43
+ "id": str(uuid.uuid4()),
44
+ "trace_id": get_or_create_trace_id(),
45
+ "query_id": get_or_create_query_id(),
46
+ "stage": "embed",
47
+ "ts_start": ts_start,
48
+ "duration_ms": duration,
49
+ "query_text": text[:500],
50
+ "query_vector": vector[:1536],
51
+ })
52
+
53
+ return vector
54
+ except Exception as e:
55
+ duration = (time.time() - ts_start) * 1000
56
+ await emit({
57
+ "id": str(uuid.uuid4()),
58
+ "trace_id": get_or_create_trace_id(),
59
+ "query_id": get_or_create_query_id(),
60
+ "stage": "embed",
61
+ "ts_start": ts_start,
62
+ "duration_ms": duration,
63
+ "query_text": text[:500],
64
+ "error": str(e),
65
+ })
66
+ raise
67
+
68
+ async def complete(
69
+ self,
70
+ prompt: str,
71
+ system: str = "You are a helpful assistant.",
72
+ model: str = "gpt-4o-mini",
73
+ **kwargs,
74
+ ) -> str:
75
+ ts_start = time.time()
76
+ try:
77
+ response = await self._client.chat.completions.create(
78
+ model=model,
79
+ messages=[
80
+ {"role": "system", "content": system},
81
+ {"role": "user", "content": prompt},
82
+ ],
83
+ **kwargs,
84
+ )
85
+ answer = response.choices[0].message.content or ""
86
+ duration = (time.time() - ts_start) * 1000
87
+
88
+ await emit({
89
+ "id": str(uuid.uuid4()),
90
+ "trace_id": get_or_create_trace_id(),
91
+ "query_id": get_or_create_query_id(),
92
+ "stage": "generate",
93
+ "ts_start": ts_start,
94
+ "duration_ms": duration,
95
+ "query_text": prompt[:500],
96
+ "generated_answer": answer,
97
+ })
98
+
99
+ return answer
100
+ except Exception as e:
101
+ duration = (time.time() - ts_start) * 1000
102
+ await emit({
103
+ "id": str(uuid.uuid4()),
104
+ "trace_id": get_or_create_trace_id(),
105
+ "query_id": get_or_create_query_id(),
106
+ "stage": "generate",
107
+ "ts_start": ts_start,
108
+ "duration_ms": duration,
109
+ "query_text": prompt[:500],
110
+ "error": str(e),
111
+ })
112
+ raise
@@ -0,0 +1,34 @@
1
+ from contextvars import ContextVar
2
+ import uuid
3
+
4
+ _trace_id: ContextVar[str] = ContextVar("rag_trace_id", default="")
5
+ _query_id: ContextVar[str] = ContextVar("rag_query_id", default="")
6
+
7
+
8
+ def get_or_create_trace_id() -> str:
9
+ tid = _trace_id.get()
10
+ if not tid:
11
+ tid = str(uuid.uuid4())
12
+ _trace_id.set(tid)
13
+ return tid
14
+
15
+
16
+ def get_or_create_query_id() -> str:
17
+ qid = _query_id.get()
18
+ if not qid:
19
+ qid = str(uuid.uuid4())
20
+ _query_id.set(qid)
21
+ return qid
22
+
23
+
24
+ def set_trace_id(tid: str) -> None:
25
+ _trace_id.set(tid)
26
+
27
+
28
+ def set_query_id(qid: str) -> None:
29
+ _query_id.set(qid)
30
+
31
+
32
+ def reset_context() -> None:
33
+ _trace_id.set("")
34
+ _query_id.set("")
@@ -0,0 +1,180 @@
1
+ import asyncio
2
+ import time
3
+ import uuid
4
+ from collections import OrderedDict
5
+ from functools import wraps
6
+ from typing import Literal
7
+ from .context import get_or_create_trace_id, get_or_create_query_id
8
+ from .emitter import emit
9
+
10
+ RAGStage = Literal["embed", "retrieve", "rerank", "generate"]
11
+
12
+ # Track stages per query for session_complete calculation.
13
+ # OrderedDict for FIFO eviction when cap is exceeded (BUG 1 fix).
14
+ _query_stages: OrderedDict[str, list] = OrderedDict()
15
+ _STAGES_CAP = 500
16
+ _STAGES_EVICT = 100
17
+
18
+ MAX_VECTOR_DIMS = 4096 # Safety cap — no real model exceeds this
19
+
20
+
21
+ def _enforce_stages_cap() -> None:
22
+ """Evict oldest entries if _query_stages exceeds the safety cap."""
23
+ if len(_query_stages) > _STAGES_CAP:
24
+ for _ in range(_STAGES_EVICT):
25
+ if _query_stages:
26
+ _query_stages.popitem(last=False)
27
+
28
+
29
+ def rag_trace(stage: RAGStage):
30
+ """
31
+ Decorator for any RAG pipeline function.
32
+ Works with both async and sync functions.
33
+ Auto-generates trace_id and query_id via ContextVar.
34
+ Emits session_complete after 'generate' stage.
35
+
36
+ Sync function support is best-effort. If the decorated sync function
37
+ is called inside an async framework (FastAPI, Django async views, etc.),
38
+ use ``async def`` with ``await`` instead.
39
+ """
40
+ def decorator(func):
41
+ @wraps(func)
42
+ async def async_wrapper(*args, **kwargs):
43
+ trace_id = get_or_create_trace_id()
44
+ query_id = get_or_create_query_id()
45
+ event_id = str(uuid.uuid4())
46
+ ts_start = time.time()
47
+
48
+ event = {
49
+ "id": event_id,
50
+ "trace_id": trace_id,
51
+ "query_id": query_id,
52
+ "stage": stage,
53
+ "ts_start": ts_start,
54
+ }
55
+
56
+ # Capture query text from first string argument
57
+ if args and isinstance(args[0], str):
58
+ event["query_text"] = args[0][:500] # truncate
59
+
60
+ try:
61
+ result = await func(*args, **kwargs)
62
+ event["duration_ms"] = (time.time() - ts_start) * 1000
63
+ event["output"] = _safe_serialize(result, stage)
64
+ _enrich_event(event, result, stage)
65
+ await emit(event)
66
+ _track_stage(query_id, stage, event)
67
+
68
+ # Emit session_complete after generate
69
+ if stage == "generate":
70
+ await _emit_session_complete(query_id, trace_id, event, result)
71
+
72
+ return result
73
+ except Exception as e:
74
+ event["duration_ms"] = (time.time() - ts_start) * 1000
75
+ event["error"] = str(e)
76
+ _query_stages.pop(query_id, None) # BUG 1: clean up on error
77
+ await emit(event)
78
+ raise
79
+
80
+ @wraps(func)
81
+ def sync_wrapper(*args, **kwargs):
82
+ # BUG 2 fix: simple, honest sync support
83
+ try:
84
+ loop = asyncio.get_running_loop()
85
+ if loop.is_running():
86
+ raise RuntimeError(
87
+ "rag_trace: cannot use a sync function inside a running "
88
+ "async event loop. Use 'async def' with 'await' instead."
89
+ )
90
+ except RuntimeError as e:
91
+ if "cannot use a sync function" in str(e):
92
+ raise
93
+ # No running loop — safe to use asyncio.run()
94
+ pass
95
+ return asyncio.run(async_wrapper(*args, **kwargs))
96
+
97
+ return async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper
98
+ return decorator
99
+
100
+
101
+ def _enrich_event(event: dict, result, stage: str) -> None:
102
+ """Add stage-specific output fields."""
103
+ if stage == "embed" and isinstance(result, list):
104
+ event["query_vector"] = result[:MAX_VECTOR_DIMS]
105
+ event.setdefault("metadata", {})["vector_dims"] = len(result)
106
+ elif stage in ("retrieve", "rerank") and isinstance(result, list):
107
+ event["chunks"] = [_to_chunk_dict(c, i) for i, c in enumerate(result)]
108
+ elif stage == "generate" and isinstance(result, str):
109
+ event["generated_answer"] = result
110
+
111
+
112
+ def _to_chunk_dict(chunk, rank: int) -> dict:
113
+ # BUG 6: Check for LangChain Document (and similar objects with page_content)
114
+ if hasattr(chunk, "page_content"):
115
+ metadata = dict(chunk.metadata) if hasattr(chunk, "metadata") and chunk.metadata else {}
116
+ return {
117
+ "chunk_id": getattr(chunk, "id", None) or str(rank),
118
+ "text": str(chunk.page_content)[:1000],
119
+ "cosine_score": float(metadata.get("score", metadata.get("relevance_score", 0.0))),
120
+ "rerank_score": metadata.get("rerank_score"),
121
+ "final_rank": rank,
122
+ "metadata": metadata,
123
+ }
124
+ if isinstance(chunk, dict):
125
+ return {
126
+ "chunk_id": chunk.get("id", str(rank)),
127
+ "text": str(chunk.get("text", chunk.get("page_content", "")))[:1000],
128
+ "cosine_score": float(chunk.get("score", chunk.get("cosine_score", 0.0))),
129
+ "rerank_score": chunk.get("rerank_score"),
130
+ "final_rank": rank,
131
+ "metadata": chunk.get("metadata", {}),
132
+ }
133
+ return {
134
+ "chunk_id": str(rank),
135
+ "text": str(chunk)[:500],
136
+ "cosine_score": 0.0,
137
+ "final_rank": rank,
138
+ }
139
+
140
+
141
+ def _safe_serialize(value, stage: str):
142
+ if stage == "embed":
143
+ return None # vectors sent separately
144
+ try:
145
+ import json
146
+ json.dumps(value)
147
+ return value
148
+ except Exception:
149
+ return str(value)[:200]
150
+
151
+
152
+ def _track_stage(query_id: str, stage: str, event: dict) -> None:
153
+ if query_id not in _query_stages:
154
+ _query_stages[query_id] = []
155
+ _query_stages[query_id].append({
156
+ "stage": stage,
157
+ "duration_ms": event.get("duration_ms", 0),
158
+ })
159
+ _enforce_stages_cap()
160
+
161
+
162
+ async def _emit_session_complete(
163
+ query_id: str, trace_id: str, gen_event: dict, answer
164
+ ) -> None:
165
+ stages = _query_stages.pop(query_id, [])
166
+ total_ms = sum(s["duration_ms"] for s in stages)
167
+ await emit({
168
+ "id": str(uuid.uuid4()),
169
+ "trace_id": trace_id,
170
+ "query_id": query_id,
171
+ "stage": "session_complete",
172
+ "ts_start": time.time(),
173
+ "duration_ms": total_ms,
174
+ "query_text": gen_event.get("query_text"),
175
+ "generated_answer": str(answer) if answer else None,
176
+ "metadata": {
177
+ "stage_count": len(stages),
178
+ "has_error": False,
179
+ },
180
+ })
@@ -0,0 +1,72 @@
1
+ import asyncio
2
+ import sys
3
+ import httpx
4
+ from .scrubber import scrub_event
5
+
6
+ _queue: asyncio.Queue | None = None
7
+ _dashboard_url: str = "http://localhost:7777"
8
+ _worker_task: asyncio.Task | None = None
9
+ _init_lock: asyncio.Lock | None = None
10
+
11
+ # BUG 3: Track dropped events and warn periodically
12
+ _drop_count: int = 0
13
+
14
+
15
+ def configure(dashboard_url: str) -> None:
16
+ global _dashboard_url
17
+ _dashboard_url = dashboard_url.rstrip("/")
18
+
19
+
20
+ async def _emit_worker() -> None:
21
+ """Background worker that drains the queue and POSTs events."""
22
+ async with httpx.AsyncClient(timeout=10.0) as client:
23
+ while True:
24
+ try:
25
+ event = await asyncio.wait_for(_queue.get(), timeout=1.0)
26
+ for attempt in range(3):
27
+ try:
28
+ await client.post(f"{_dashboard_url}/events", json=event)
29
+ break
30
+ except Exception:
31
+ await asyncio.sleep(0.5 * (2 ** attempt))
32
+ _queue.task_done()
33
+ except asyncio.TimeoutError:
34
+ continue
35
+ except asyncio.CancelledError:
36
+ break
37
+
38
+
39
+ async def _ensure_worker_started() -> None:
40
+ """Lazily initialize queue and worker on first emit (BUG 4 fix)."""
41
+ global _queue, _worker_task, _init_lock
42
+ if _queue is not None:
43
+ return # already started
44
+ if _init_lock is None:
45
+ _init_lock = asyncio.Lock()
46
+ async with _init_lock:
47
+ if _queue is None: # double-check after acquiring lock
48
+ _queue = asyncio.Queue(maxsize=1000)
49
+ _worker_task = asyncio.create_task(_emit_worker())
50
+
51
+
52
+ async def stop_worker() -> None:
53
+ if _worker_task:
54
+ await _queue.join()
55
+ _worker_task.cancel()
56
+
57
+
58
+ async def emit(event: dict) -> None:
59
+ """Non-blocking enqueue. Warns on drops instead of silently losing events."""
60
+ global _drop_count
61
+ await _ensure_worker_started()
62
+ scrubbed = scrub_event(event)
63
+ try:
64
+ _queue.put_nowait(scrubbed)
65
+ except asyncio.QueueFull:
66
+ _drop_count += 1
67
+ if _drop_count == 1 or _drop_count % 50 == 0:
68
+ print(
69
+ f"[rag-debugger] WARNING: event dropped (total dropped: {_drop_count})"
70
+ f" — is the server running at {_dashboard_url}?",
71
+ file=sys.stderr,
72
+ )
rag_debugger/models.py ADDED
@@ -0,0 +1,27 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Literal
3
+ import uuid
4
+
5
+
6
+ class ChunkScore(BaseModel):
7
+ chunk_id: str
8
+ text: str
9
+ cosine_score: float
10
+ rerank_score: float | None = None
11
+ final_rank: int
12
+ metadata: dict = Field(default_factory=dict)
13
+
14
+
15
+ class RAGEvent(BaseModel):
16
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
17
+ trace_id: str
18
+ query_id: str
19
+ stage: Literal["embed", "retrieve", "rerank", "generate", "session_complete"]
20
+ ts_start: float
21
+ duration_ms: float | None = None
22
+ query_text: str | None = None
23
+ query_vector: list[float] | None = None
24
+ chunks: list[ChunkScore] | None = None
25
+ generated_answer: str | None = None
26
+ error: str | None = None
27
+ metadata: dict = Field(default_factory=dict)
rag_debugger/py.typed ADDED
File without changes
@@ -0,0 +1,29 @@
1
+ """PII scrubber — redacts emails, phone numbers, SSNs from event payloads."""
2
+ import re
3
+
4
+ _PATTERNS = [
5
+ (re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'), "[EMAIL]"),
6
+ (re.compile(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'), "[PHONE]"),
7
+ (re.compile(r'\b\d{3}-\d{2}-\d{4}\b'), "[SSN]"),
8
+ (re.compile(r'\bsk-[a-zA-Z0-9]{20,}\b'), "[API_KEY]"),
9
+ ]
10
+
11
+
12
+ def scrub(value):
13
+ """Recursively scrub PII from strings, dicts, and lists."""
14
+ if isinstance(value, str):
15
+ if not value:
16
+ return value
17
+ for pattern, replacement in _PATTERNS:
18
+ value = pattern.sub(replacement, value)
19
+ return value
20
+ elif isinstance(value, dict):
21
+ return {k: scrub(v) for k, v in value.items()}
22
+ elif isinstance(value, list):
23
+ return [scrub(item) for item in value]
24
+ return value
25
+
26
+
27
+ def scrub_event(event: dict) -> dict:
28
+ """Scrub PII from all string fields recursively in an event dict."""
29
+ return scrub(event)
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: rag-debugger
3
+ Version: 1.0.0
4
+ Summary: Real-time debugging SDK for RAG pipelines
5
+ Project-URL: Homepage, https://github.com/ChanduBobbili/rag-debugger
6
+ Project-URL: Repository, https://github.com/ChanduBobbili/rag-debugger
7
+ Project-URL: Issues, https://github.com/ChanduBobbili/rag-debugger/issues
8
+ Author: Chandu Bobbili
9
+ License: MIT
10
+ Keywords: debugging,llm,observability,rag,tracing
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Debuggers
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.24.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Provides-Extra: all
25
+ Requires-Dist: langchain-core>=0.1.0; extra == 'all'
26
+ Requires-Dist: llama-index-core>=0.10.0; extra == 'all'
27
+ Requires-Dist: openai>=1.0.0; extra == 'all'
28
+ Provides-Extra: langchain
29
+ Requires-Dist: langchain-core>=0.1.0; extra == 'langchain'
30
+ Provides-Extra: llamaindex
31
+ Requires-Dist: llama-index-core>=0.10.0; extra == 'llamaindex'
32
+ Provides-Extra: openai
33
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # RAG Debugger SDK 🔍
37
+
38
+ [![PyPI version](https://img.shields.io/pypi/v/rag-debugger.svg)](https://pypi.org/project/rag-debugger/)
39
+ [![Python](https://img.shields.io/pypi/pyversions/rag-debugger.svg)](https://pypi.org/project/rag-debugger/)
40
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
41
+
42
+ **One-line decorator to debug your RAG pipelines in real time.**
43
+
44
+ Instrument any Python RAG pipeline with `@rag_trace` — captures inputs, outputs, timing, and errors for every stage (embed → retrieve → rerank → generate) and streams them to the [RAG Debugger Dashboard](https://github.com/ChanduBobbili/rag-debugger).
45
+
46
+ ## Features
47
+
48
+ - 🔗 **One decorator** — `@rag_trace("retrieve")` on your existing functions
49
+ - ⚡ **Non-blocking** — async background worker, never slows your pipeline
50
+ - 🧵 **Auto-correlation** — `trace_id` / `query_id` via `ContextVar` (no manual threading)
51
+ - 🔒 **PII scrubbing** — emails, phone numbers, SSNs, API keys automatically redacted
52
+ - 🔌 **Framework adapters** — LangChain, LlamaIndex, and OpenAI out of the box
53
+ - 🛡️ **Safe** — errors in the SDK never crash your application
54
+
55
+ ## Installation
56
+
57
+ ```bash
58
+ pip install rag-debugger
59
+ ```
60
+
61
+ With framework adapters:
62
+
63
+ ```bash
64
+ pip install rag-debugger[langchain] # LangChain
65
+ pip install rag-debugger[llamaindex] # LlamaIndex
66
+ pip install rag-debugger[openai] # OpenAI
67
+ pip install rag-debugger[all] # All adapters
68
+ ```
69
+
70
+ ## Quick Start
71
+
72
+ ```python
73
+ from rag_debugger import init, rag_trace
74
+
75
+ # 1. Point to your RAG Debugger server
76
+ init(dashboard_url="http://localhost:7777")
77
+
78
+ # 2. Decorate your pipeline functions
79
+ @rag_trace("embed")
80
+ async def embed_query(query: str) -> list[float]:
81
+ return await my_embedder.embed(query)
82
+
83
+ @rag_trace("retrieve")
84
+ async def retrieve_chunks(vector: list[float], k: int = 10):
85
+ return await vector_store.query(vector, k)
86
+
87
+ @rag_trace("rerank")
88
+ async def rerank(query: str, chunks: list) -> list:
89
+ return await reranker.rerank(query, chunks)
90
+
91
+ @rag_trace("generate")
92
+ async def generate(query: str, context: str) -> str:
93
+ return await llm.complete(query, context)
94
+
95
+ # 3. Call your pipeline — traces appear in the dashboard
96
+ answer = await generate(query, context)
97
+ ```
98
+
99
+ The decorator automatically:
100
+
101
+ - Generates `trace_id` and `query_id` per request
102
+ - Captures function inputs and outputs
103
+ - Measures `duration_ms` for each stage
104
+ - Emits a `session_complete` summary after the generate stage
105
+ - Scrubs PII before sending
106
+
107
+ ## Framework Adapters
108
+
109
+ ### LangChain
110
+
111
+ ```python
112
+ from rag_debugger.adapters.langchain import RAGDebuggerCallback
113
+
114
+ handler = RAGDebuggerCallback()
115
+ chain.invoke({"query": "..."}, config={"callbacks": [handler]})
116
+ ```
117
+
118
+ ### LlamaIndex
119
+
120
+ ```python
121
+ from rag_debugger.adapters.llamaindex import RAGDebuggerLlamaIndex
122
+ from llama_index.core.callbacks import CallbackManager
123
+
124
+ handler = RAGDebuggerLlamaIndex()
125
+ callback_manager = CallbackManager([handler])
126
+ index = VectorStoreIndex.from_documents(docs, callback_manager=callback_manager)
127
+ ```
128
+
129
+ ### OpenAI
130
+
131
+ ```python
132
+ from rag_debugger.adapters.openai import RAGDebuggerOpenAI
133
+
134
+ client = RAGDebuggerOpenAI(openai.AsyncOpenAI())
135
+ embedding = await client.embed("What is RAG?")
136
+ response = await client.complete("Explain RAG")
137
+ ```
138
+
139
+ ## Advanced Usage
140
+
141
+ ### Explicit Trace Control
142
+
143
+ ```python
144
+ from rag_debugger import new_trace, reset_context
145
+
146
+ # Group events under a custom trace
147
+ new_trace(trace_id="my-trace-123", query_id="q-001")
148
+ await embed_query("What is RAG?")
149
+ await retrieve_chunks(vector)
150
+
151
+ # Reset for the next request
152
+ reset_context()
153
+ ```
154
+
155
+ ### Async Context Manager
156
+
157
+ ```python
158
+ import rag_debugger
159
+
160
+ async with rag_debugger.trace(trace_id="req-123") as t:
161
+ print(t.trace_id)
162
+ result = await my_rag_pipeline(query)
163
+ # Context is automatically restored after the block
164
+ ```
165
+
166
+ ## Documentation
167
+
168
+ - [Full SDK Documentation](https://github.com/ChanduBobbili/rag-debugger/blob/main/docs/SDK.md)
169
+ - [Server Documentation](https://github.com/ChanduBobbili/rag-debugger/blob/main/docs/SERVER.md)
170
+ - [Dashboard Documentation](https://github.com/ChanduBobbili/rag-debugger/blob/main/docs/DASHBOARD.md)
171
+
172
+ ## License
173
+
174
+ MIT — see [LICENSE](https://github.com/ChanduBobbili/rag-debugger/blob/main/LICENSE) for details.
@@ -0,0 +1,14 @@
1
+ rag_debugger/__init__.py,sha256=F0s6eeTEECWRfCETcGqAiyi8gjgfsuPcg4HmIEiWShs,2191
2
+ rag_debugger/context.py,sha256=n3GN4WZsq69GQAkrbr8OvpSX-y2strLr8wDBr6isTJ8,691
3
+ rag_debugger/decorators.py,sha256=0O3Ub7Sh1o5Gg0tS2CJHfipfO7RKhVSnY2HCT0Wn0L4,6510
4
+ rag_debugger/emitter.py,sha256=zAXo_PyVJRHx5aY9giIL8oXIssPUOykZgSlFmQs77_s,2341
5
+ rag_debugger/models.py,sha256=lI6cy9QfwaA69kXa8__fz-ZtDJpzfRqcSVlc3fTmOxI,777
6
+ rag_debugger/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ rag_debugger/scrubber.py,sha256=e32i_k5X6KbRluiHxZhKvE3ei-38yF8JLChcWUMAZ0k,973
8
+ rag_debugger/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ rag_debugger/adapters/langchain.py,sha256=TKLpfvST1tcACjA9ZF4_r098xWPIn7A9n3lbxEkk1Wc,2783
10
+ rag_debugger/adapters/llamaindex.py,sha256=Afs4COAZ_JoN0j9TpYlrmHS7Gic6oOrb65mwpKqbSCg,3319
11
+ rag_debugger/adapters/openai.py,sha256=fzCSi-qbeqLhjC5zfb63BzfXjlYMA5fL9KZGhQs-Dmg,3682
12
+ rag_debugger-1.0.0.dist-info/METADATA,sha256=8udIDBvuyYk-3-D7b0Dkd59Zejt82PB037OCn_N1tp8,5725
13
+ rag_debugger-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
14
+ rag_debugger-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any