ragdebug 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragdebug-0.1.0/PKG-INFO +68 -0
- ragdebug-0.1.0/README.md +53 -0
- ragdebug-0.1.0/pyproject.toml +29 -0
- ragdebug-0.1.0/ragdebug/__init__.py +89 -0
- ragdebug-0.1.0/ragdebug/cli.py +159 -0
- ragdebug-0.1.0/ragdebug/client.py +177 -0
- ragdebug-0.1.0/ragdebug/config.py +33 -0
- ragdebug-0.1.0/ragdebug/debugger.py +25 -0
- ragdebug-0.1.0/ragdebug/evaluator.py +61 -0
- ragdebug-0.1.0/ragdebug/models.py +70 -0
- ragdebug-0.1.0/ragdebug/platform/__init__.py +1 -0
- ragdebug-0.1.0/ragdebug/platform/docker-compose.yml +79 -0
- ragdebug-0.1.0/ragdebug/prompt.py +95 -0
- ragdebug-0.1.0/ragdebug/spans.py +63 -0
- ragdebug-0.1.0/ragdebug/tracer.py +118 -0
- ragdebug-0.1.0/ragdebug.egg-info/PKG-INFO +68 -0
- ragdebug-0.1.0/ragdebug.egg-info/SOURCES.txt +20 -0
- ragdebug-0.1.0/ragdebug.egg-info/dependency_links.txt +1 -0
- ragdebug-0.1.0/ragdebug.egg-info/entry_points.txt +2 -0
- ragdebug-0.1.0/ragdebug.egg-info/requires.txt +10 -0
- ragdebug-0.1.0/ragdebug.egg-info/top_level.txt +1 -0
- ragdebug-0.1.0/setup.cfg +4 -0
ragdebug-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ragdebug
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Observability SDK for RAG pipelines — auto-trace every stage of your RAG app.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Requires-Dist: click>=8.0
|
|
10
|
+
Provides-Extra: test
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
12
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "test"
|
|
13
|
+
Provides-Extra: tiktoken
|
|
14
|
+
Requires-Dist: tiktoken>=0.7; extra == "tiktoken"
|
|
15
|
+
|
|
16
|
+
# RAG Debugger SDK
|
|
17
|
+
|
|
18
|
+
**Observability & Debugging Platform for RAG Pipelines**
|
|
19
|
+
|
|
20
|
+
`ragdebug` allows you to auto-trace every stage of your RAG application and start a comprehensive local dashboard for analysis with a single command.
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
1. Install the SDK with the CLI power-ups:
|
|
25
|
+
```bash
|
|
26
|
+
pip install "ragdebug[cli]"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
2. Start the local RAG Debugger platform:
|
|
30
|
+
```bash
|
|
31
|
+
ragdebug up
|
|
32
|
+
```
|
|
33
|
+
*Note: This requires Docker Desktop to be running.*
|
|
34
|
+
|
|
35
|
+
3. Open the dashboard at [http://localhost:8000](http://localhost:8000)
|
|
36
|
+
|
|
37
|
+
## Integrating Tracing
|
|
38
|
+
|
|
39
|
+
Add the `@trace` decorator to your RAG pipeline functions:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from ragdebug import init, trace, Prompt
|
|
43
|
+
|
|
44
|
+
init(project="my-project")
|
|
45
|
+
|
|
46
|
+
test_prompt = Prompt(name="QA Prompt", template="Context: {{context}}\n\nQ: {{query}}")
|
|
47
|
+
|
|
48
|
+
@trace(name="qa_pipeline")
|
|
49
|
+
def answer_question(query: str):
|
|
50
|
+
# Your RAG logic here
|
|
51
|
+
return "Answer"
|
|
52
|
+
|
|
53
|
+
answer_question("What is RAG?")
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Advanced CLI Usage
|
|
57
|
+
|
|
58
|
+
| Command | Description |
|
|
59
|
+
|---|---|
|
|
60
|
+
| `ragdebug up` | Start all 4 containers (mongo, postgres, redis, platform) |
|
|
61
|
+
| `ragdebug up --build` | Force rebuild the platform image |
|
|
62
|
+
| `ragdebug down` | Stop all containers |
|
|
63
|
+
| `ragdebug status` | Show container status + API health check |
|
|
64
|
+
| `ragdebug logs` | Stream container logs |
|
|
65
|
+
|
|
66
|
+
## Documentation
|
|
67
|
+
|
|
68
|
+
For full documentation on Prompts, Evaluations, and more, check the UI dashboard after running `ragdebug up`.
|
ragdebug-0.1.0/README.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# RAG Debugger SDK
|
|
2
|
+
|
|
3
|
+
**Observability & Debugging Platform for RAG Pipelines**
|
|
4
|
+
|
|
5
|
+
`ragdebug` allows you to auto-trace every stage of your RAG application and start a comprehensive local dashboard for analysis with a single command.
|
|
6
|
+
|
|
7
|
+
## Quickstart
|
|
8
|
+
|
|
9
|
+
1. Install the SDK with the CLI power-ups:
|
|
10
|
+
```bash
|
|
11
|
+
pip install "ragdebug[cli]"
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
2. Start the local RAG Debugger platform:
|
|
15
|
+
```bash
|
|
16
|
+
ragdebug up
|
|
17
|
+
```
|
|
18
|
+
*Note: This requires Docker Desktop to be running.*
|
|
19
|
+
|
|
20
|
+
3. Open the dashboard at [http://localhost:8000](http://localhost:8000)
|
|
21
|
+
|
|
22
|
+
## Integrating Tracing
|
|
23
|
+
|
|
24
|
+
Add the `@trace` decorator to your RAG pipeline functions:
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from ragdebug import init, trace, Prompt
|
|
28
|
+
|
|
29
|
+
init(project="my-project")
|
|
30
|
+
|
|
31
|
+
test_prompt = Prompt(name="QA Prompt", template="Context: {{context}}\n\nQ: {{query}}")
|
|
32
|
+
|
|
33
|
+
@trace(name="qa_pipeline")
|
|
34
|
+
def answer_question(query: str):
|
|
35
|
+
# Your RAG logic here
|
|
36
|
+
return "Answer"
|
|
37
|
+
|
|
38
|
+
answer_question("What is RAG?")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Advanced CLI Usage
|
|
42
|
+
|
|
43
|
+
| Command | Description |
|
|
44
|
+
|---|---|
|
|
45
|
+
| `ragdebug up` | Start all 4 containers (mongo, postgres, redis, platform) |
|
|
46
|
+
| `ragdebug up --build` | Force rebuild the platform image |
|
|
47
|
+
| `ragdebug down` | Stop all containers |
|
|
48
|
+
| `ragdebug status` | Show container status + API health check |
|
|
49
|
+
| `ragdebug logs` | Stream container logs |
|
|
50
|
+
|
|
51
|
+
## Documentation
|
|
52
|
+
|
|
53
|
+
For full documentation on Prompts, Evaluations, and more, check the UI dashboard after running `ragdebug up`.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ragdebug"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Observability SDK for RAG pipelines — auto-trace every stage of your RAG app."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"httpx>=0.27",
|
|
13
|
+
"pydantic>=2.0",
|
|
14
|
+
"click>=8.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.optional-dependencies]
|
|
18
|
+
test = ["pytest>=8.0", "pytest-asyncio>=0.24"]
|
|
19
|
+
tiktoken = ["tiktoken>=0.7"]
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
ragdebug = "ragdebug.cli:cli"
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.packages.find]
|
|
25
|
+
include = ["ragdebug*"]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.package-data]
|
|
28
|
+
"ragdebug.platform" = ["*.yml", "*.yaml"]
|
|
29
|
+
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ragdebug — Observability SDK for RAG pipelines.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from ragdebug import init, trace, span
|
|
6
|
+
|
|
7
|
+
init(api_key="rdb_xxx", project="my-rag-app")
|
|
8
|
+
|
|
9
|
+
@trace
|
|
10
|
+
def rag_pipeline(query: str) -> str:
|
|
11
|
+
with span("embedding"):
|
|
12
|
+
vector = embed(query)
|
|
13
|
+
with span("retrieval"):
|
|
14
|
+
docs = search(vector)
|
|
15
|
+
with span("llm_call"):
|
|
16
|
+
response = llm(docs, query)
|
|
17
|
+
return response
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
|
|
22
|
+
from ragdebug.config import set_config
|
|
23
|
+
from ragdebug.tracer import trace
|
|
24
|
+
from ragdebug.spans import span
|
|
25
|
+
from ragdebug.prompt import Prompt
|
|
26
|
+
from ragdebug.evaluator import evaluate
|
|
27
|
+
from ragdebug.debugger import RAGDebugger
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("ragdebug")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _check_backend(endpoint: str) -> None:
|
|
33
|
+
"""Non-blocking health check — warns if the backend is unreachable."""
|
|
34
|
+
try:
|
|
35
|
+
import httpx
|
|
36
|
+
resp = httpx.get(f"{endpoint}/health", timeout=2.0)
|
|
37
|
+
if resp.status_code == 200:
|
|
38
|
+
logger.debug(f"Backend reachable at {endpoint}")
|
|
39
|
+
else:
|
|
40
|
+
logger.warning(
|
|
41
|
+
f"⚠ ragdebug backend returned status {resp.status_code}. "
|
|
42
|
+
f"Dashboard may not be working."
|
|
43
|
+
)
|
|
44
|
+
except Exception:
|
|
45
|
+
print(
|
|
46
|
+
f"\n⚠ ragdebug backend not reachable at {endpoint}\n"
|
|
47
|
+
f" Run 'ragdebug up' to start the platform.\n"
|
|
48
|
+
f" Or set enabled=False to silence this warning.\n"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def init(
|
|
53
|
+
api_key: str = "",
|
|
54
|
+
project: str = "",
|
|
55
|
+
endpoint: str = "http://localhost:8000",
|
|
56
|
+
enabled: bool = True,
|
|
57
|
+
debug: bool = False,
|
|
58
|
+
flush_interval: float = 5.0,
|
|
59
|
+
batch_size: int = 10,
|
|
60
|
+
) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Initialize the ragdebug SDK.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
api_key: API key for authentication (from the RAG Debugger dashboard).
|
|
66
|
+
project: Project identifier (e.g. "my-rag-app").
|
|
67
|
+
endpoint: RAG Debugger platform URL (default: localhost).
|
|
68
|
+
enabled: Set to False to disable tracing without removing decorators.
|
|
69
|
+
debug: Enable debug logging.
|
|
70
|
+
flush_interval: Seconds between flush cycles (default: 5).
|
|
71
|
+
batch_size: Max traces per flush batch (default: 10).
|
|
72
|
+
"""
|
|
73
|
+
set_config(
|
|
74
|
+
api_key=api_key,
|
|
75
|
+
project=project,
|
|
76
|
+
endpoint=endpoint,
|
|
77
|
+
enabled=enabled,
|
|
78
|
+
debug=debug,
|
|
79
|
+
flush_interval=flush_interval,
|
|
80
|
+
batch_size=batch_size,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if enabled:
|
|
84
|
+
_check_backend(endpoint)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
__all__ = ["init", "trace", "span", "Prompt", "evaluate"]
|
|
88
|
+
__version__ = "0.1.0"
|
|
89
|
+
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ragdebug CLI — manage the self-hosted platform via Docker Compose.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
ragdebug up Start the platform (MongoDB, PostgreSQL, Redis, Backend+Dashboard)
|
|
6
|
+
ragdebug down Stop the platform
|
|
7
|
+
ragdebug status Show container status and health check
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import importlib.resources
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
import click
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_compose_file() -> str:
|
|
22
|
+
"""Resolve the path to the bundled docker-compose.yml."""
|
|
23
|
+
ref = importlib.resources.files("ragdebug") / "platform" / "docker-compose.yml"
|
|
24
|
+
with importlib.resources.as_file(ref) as path:
|
|
25
|
+
return str(path)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _docker_available() -> bool:
|
|
29
|
+
"""Check if Docker and Docker Compose are available."""
|
|
30
|
+
if shutil.which("docker") is None:
|
|
31
|
+
click.secho("✗ Docker not found. Please install Docker Desktop:", fg="red")
|
|
32
|
+
click.echo(" https://docs.docker.com/get-docker/")
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
result = subprocess.run(
|
|
36
|
+
["docker", "compose", "version"],
|
|
37
|
+
capture_output=True, text=True,
|
|
38
|
+
)
|
|
39
|
+
if result.returncode != 0:
|
|
40
|
+
click.secho("✗ Docker Compose not found. Please install Docker Desktop.", fg="red")
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _run_compose(args: list[str], compose_file: str) -> int:
|
|
47
|
+
"""Run a docker compose command with the bundled compose file."""
|
|
48
|
+
cmd = ["docker", "compose", "-f", compose_file, "-p", "ragdebug"] + args
|
|
49
|
+
click.secho(f" → {' '.join(cmd)}", fg="bright_black")
|
|
50
|
+
result = subprocess.run(cmd)
|
|
51
|
+
return result.returncode
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@click.group()
|
|
55
|
+
@click.version_option(package_name="ragdebug")
|
|
56
|
+
def cli():
|
|
57
|
+
"""ragdebug — Observability & Debugging Platform for RAG Pipelines."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@cli.command()
|
|
62
|
+
@click.option("--build", is_flag=True, help="Force rebuild the platform image.")
|
|
63
|
+
@click.option(
|
|
64
|
+
"--detach/--no-detach", default=True,
|
|
65
|
+
help="Run containers in the background (default: detach)."
|
|
66
|
+
)
|
|
67
|
+
def up(build: bool, detach: bool):
|
|
68
|
+
"""Start the ragdebug platform (databases + backend + dashboard)."""
|
|
69
|
+
if not _docker_available():
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
|
|
72
|
+
compose_file = _get_compose_file()
|
|
73
|
+
click.secho("\n🚀 Starting ragdebug platform...\n", fg="cyan", bold=True)
|
|
74
|
+
|
|
75
|
+
args = ["up"]
|
|
76
|
+
if detach:
|
|
77
|
+
args.append("-d")
|
|
78
|
+
if build:
|
|
79
|
+
args.append("--build")
|
|
80
|
+
|
|
81
|
+
rc = _run_compose(args, compose_file)
|
|
82
|
+
|
|
83
|
+
if rc == 0 and detach:
|
|
84
|
+
click.echo()
|
|
85
|
+
click.secho("✓ Platform is starting up!", fg="green", bold=True)
|
|
86
|
+
click.echo()
|
|
87
|
+
click.echo(" Dashboard: http://localhost:8000")
|
|
88
|
+
click.echo(" API: http://localhost:8000/api/v1/")
|
|
89
|
+
click.echo(" Health: http://localhost:8000/health")
|
|
90
|
+
click.echo()
|
|
91
|
+
click.secho(" Run 'ragdebug status' to check readiness.", fg="bright_black")
|
|
92
|
+
click.secho(" Run 'ragdebug down' to stop.\n", fg="bright_black")
|
|
93
|
+
|
|
94
|
+
sys.exit(rc)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@cli.command()
|
|
98
|
+
@click.option(
|
|
99
|
+
"--volumes", is_flag=True,
|
|
100
|
+
help="Also remove persistent data volumes (MongoDB, PostgreSQL)."
|
|
101
|
+
)
|
|
102
|
+
def down(volumes: bool):
|
|
103
|
+
"""Stop the ragdebug platform."""
|
|
104
|
+
if not _docker_available():
|
|
105
|
+
sys.exit(1)
|
|
106
|
+
|
|
107
|
+
compose_file = _get_compose_file()
|
|
108
|
+
click.secho("\n⏹ Stopping ragdebug platform...\n", fg="yellow", bold=True)
|
|
109
|
+
|
|
110
|
+
args = ["down"]
|
|
111
|
+
if volumes:
|
|
112
|
+
args.append("-v")
|
|
113
|
+
|
|
114
|
+
rc = _run_compose(args, compose_file)
|
|
115
|
+
|
|
116
|
+
if rc == 0:
|
|
117
|
+
msg = "✓ Platform stopped."
|
|
118
|
+
if volumes:
|
|
119
|
+
msg += " All data volumes removed."
|
|
120
|
+
click.secho(msg, fg="green", bold=True)
|
|
121
|
+
click.echo()
|
|
122
|
+
|
|
123
|
+
sys.exit(rc)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@cli.command()
|
|
127
|
+
def status():
|
|
128
|
+
"""Show platform container status and health check."""
|
|
129
|
+
if not _docker_available():
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
compose_file = _get_compose_file()
|
|
133
|
+
click.secho("\n📊 ragdebug platform status\n", fg="cyan", bold=True)
|
|
134
|
+
|
|
135
|
+
_run_compose(["ps"], compose_file)
|
|
136
|
+
click.echo()
|
|
137
|
+
|
|
138
|
+
# Health check
|
|
139
|
+
try:
|
|
140
|
+
resp = httpx.get("http://localhost:8000/health", timeout=3.0)
|
|
141
|
+
data = resp.json()
|
|
142
|
+
click.secho(
|
|
143
|
+
f" API Health: ✓ {data.get('status', 'ok')} (v{data.get('version', '?')})",
|
|
144
|
+
fg="green",
|
|
145
|
+
)
|
|
146
|
+
except Exception:
|
|
147
|
+
click.secho(" API Health: ✗ Backend not reachable", fg="red")
|
|
148
|
+
|
|
149
|
+
click.echo()
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@cli.command()
|
|
153
|
+
def logs():
|
|
154
|
+
"""Stream logs from the platform containers."""
|
|
155
|
+
if not _docker_available():
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
|
|
158
|
+
compose_file = _get_compose_file()
|
|
159
|
+
_run_compose(["logs", "-f", "--tail", "100"], compose_file)
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTTP client — batches traces and sends them asynchronously.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import atexit
|
|
8
|
+
import logging
|
|
9
|
+
import queue
|
|
10
|
+
import threading
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from ragdebug.config import get_config
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("ragdebug")
|
|
18
|
+
|
|
19
|
+
_queue: queue.Queue[dict[str, Any]] = queue.Queue()
|
|
20
|
+
_flush_thread: threading.Thread | None = None
|
|
21
|
+
_shutdown_event = threading.Event()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _flush_loop() -> None:
|
|
25
|
+
"""Background thread that flushes batched traces."""
|
|
26
|
+
cfg = get_config()
|
|
27
|
+
while not _shutdown_event.is_set():
|
|
28
|
+
batch: list[dict] = []
|
|
29
|
+
try:
|
|
30
|
+
# Block up to flush_interval for the first item
|
|
31
|
+
item = _queue.get(timeout=cfg.flush_interval)
|
|
32
|
+
batch.append(item)
|
|
33
|
+
except queue.Empty:
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
# Drain up to batch_size
|
|
37
|
+
while len(batch) < cfg.batch_size:
|
|
38
|
+
try:
|
|
39
|
+
batch.append(_queue.get_nowait())
|
|
40
|
+
except queue.Empty:
|
|
41
|
+
break
|
|
42
|
+
|
|
43
|
+
_send_batch(batch)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _send_batch(batch: list[dict]) -> None:
|
|
47
|
+
"""Send a batch of traces to the platform API."""
|
|
48
|
+
cfg = get_config()
|
|
49
|
+
url = f"{cfg.endpoint}/api/v1/traces"
|
|
50
|
+
headers = {"Authorization": f"Bearer {cfg.api_key}"}
|
|
51
|
+
|
|
52
|
+
for payload in batch:
|
|
53
|
+
try:
|
|
54
|
+
resp = httpx.post(url, json=payload, headers=headers, timeout=10.0)
|
|
55
|
+
if cfg.debug:
|
|
56
|
+
logger.debug(f"Trace sent: {resp.status_code}")
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
if cfg.debug:
|
|
59
|
+
logger.warning(f"Failed to send trace: {exc}")
|
|
60
|
+
# Silently swallow — SDK errors must never crash the host app
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _sync_prompt(
|
|
64
|
+
project_id: str,
|
|
65
|
+
name: str,
|
|
66
|
+
template: str,
|
|
67
|
+
template_hash: str,
|
|
68
|
+
variables: list[str],
|
|
69
|
+
metadata: dict[str, Any],
|
|
70
|
+
) -> int | None:
|
|
71
|
+
"""Synchronously sync a prompt with the backend and return its ID."""
|
|
72
|
+
cfg = get_config()
|
|
73
|
+
if not cfg.enabled:
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
url = f"{cfg.endpoint}/api/v1/prompts/sync"
|
|
77
|
+
headers = {"Authorization": f"Bearer {cfg.api_key}"}
|
|
78
|
+
payload = {
|
|
79
|
+
"project_id": project_id,
|
|
80
|
+
"name": name,
|
|
81
|
+
"template": template,
|
|
82
|
+
"template_hash": template_hash,
|
|
83
|
+
"variables": variables,
|
|
84
|
+
"metadata": metadata,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
resp = httpx.post(url, json=payload, headers=headers, timeout=5.0)
|
|
89
|
+
resp.raise_for_status()
|
|
90
|
+
return resp.json().get("id")
|
|
91
|
+
except Exception as exc:
|
|
92
|
+
if cfg.debug:
|
|
93
|
+
logger.warning(f"Failed to sync prompt: {exc}")
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _pull_active_prompt(project_id: str, name: str) -> dict[str, Any] | None:
|
|
98
|
+
"""Synchronously pull the active prompt version from the backend."""
|
|
99
|
+
cfg = get_config()
|
|
100
|
+
if not cfg.enabled:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
url = f"{cfg.endpoint}/api/v1/prompts/{name}/active?project_id={project_id}"
|
|
104
|
+
headers = {"Authorization": f"Bearer {cfg.api_key}"}
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
resp = httpx.get(url, headers=headers, timeout=5.0)
|
|
108
|
+
resp.raise_for_status()
|
|
109
|
+
return resp.json()
|
|
110
|
+
except Exception as exc:
|
|
111
|
+
if cfg.debug:
|
|
112
|
+
logger.warning(f"Failed to pull active prompt: {exc}")
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def _trigger_eval(
|
|
116
|
+
project_id: str,
|
|
117
|
+
name: str,
|
|
118
|
+
prompt_version_id: int | None,
|
|
119
|
+
test_cases: list[dict[str, Any]],
|
|
120
|
+
) -> dict[str, Any] | None:
|
|
121
|
+
"""Synchronously trigger an evaluation run on the backend."""
|
|
122
|
+
cfg = get_config()
|
|
123
|
+
if not cfg.enabled:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
url = f"{cfg.endpoint}/api/v1/evaluations"
|
|
127
|
+
headers = {"Authorization": f"Bearer {cfg.api_key}"}
|
|
128
|
+
payload = {
|
|
129
|
+
"project_id": project_id,
|
|
130
|
+
"name": name,
|
|
131
|
+
"prompt_version_id": prompt_version_id,
|
|
132
|
+
"test_cases": test_cases,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
resp = httpx.post(url, json=payload, headers=headers, timeout=10.0)
|
|
137
|
+
resp.raise_for_status()
|
|
138
|
+
return resp.json()
|
|
139
|
+
except Exception as exc:
|
|
140
|
+
if cfg.debug:
|
|
141
|
+
logger.warning(f"Failed to trigger evaluation: {exc}")
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def enqueue_trace(trace_dict: dict[str, Any]) -> None:
|
|
146
|
+
"""Add a trace to the outgoing queue."""
|
|
147
|
+
cfg = get_config()
|
|
148
|
+
if not cfg.enabled:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
_queue.put(trace_dict)
|
|
152
|
+
_ensure_flush_thread()
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _ensure_flush_thread() -> None:
|
|
156
|
+
"""Lazily start the background flush thread."""
|
|
157
|
+
global _flush_thread
|
|
158
|
+
if _flush_thread is None or not _flush_thread.is_alive():
|
|
159
|
+
_flush_thread = threading.Thread(target=_flush_loop, daemon=True, name="ragdebug-flush")
|
|
160
|
+
_flush_thread.start()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def shutdown() -> None:
|
|
164
|
+
"""Flush remaining traces and stop the background thread."""
|
|
165
|
+
_shutdown_event.set()
|
|
166
|
+
# Drain remaining items
|
|
167
|
+
remaining: list[dict] = []
|
|
168
|
+
while not _queue.empty():
|
|
169
|
+
try:
|
|
170
|
+
remaining.append(_queue.get_nowait())
|
|
171
|
+
except queue.Empty:
|
|
172
|
+
break
|
|
173
|
+
if remaining:
|
|
174
|
+
_send_batch(remaining)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
atexit.register(shutdown)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SDK configuration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SDKConfig:
|
|
12
|
+
"""Global SDK configuration — set via ragdebug.init()."""
|
|
13
|
+
api_key: str = ""
|
|
14
|
+
project: str = ""
|
|
15
|
+
endpoint: str = "http://localhost:8000"
|
|
16
|
+
flush_interval: float = 5.0 # seconds
|
|
17
|
+
batch_size: int = 10
|
|
18
|
+
enabled: bool = True
|
|
19
|
+
debug: bool = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Singleton instance
|
|
23
|
+
_config = SDKConfig()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_config() -> SDKConfig:
|
|
27
|
+
return _config
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def set_config(**kwargs) -> None:
|
|
31
|
+
for k, v in kwargs.items():
|
|
32
|
+
if hasattr(_config, k):
|
|
33
|
+
setattr(_config, k, v)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RAGDebugger wrapper for programmatic trace control.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
from ragdebug.config import set_config
|
|
7
|
+
from ragdebug.tracer import trace
|
|
8
|
+
from ragdebug.spans import span
|
|
9
|
+
|
|
10
|
+
class RAGDebugger:
|
|
11
|
+
"""
|
|
12
|
+
Programmatic entry point for the RAG Debugger SDK.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, api_url: str = "http://localhost:8000", project_id: str = "default", api_key: str = "default_key"):
|
|
15
|
+
set_config(endpoint=api_url, project=project_id, api_key=api_key)
|
|
16
|
+
self.project_id = project_id
|
|
17
|
+
self.api_key = api_key
|
|
18
|
+
|
|
19
|
+
def trace(self, query: str):
|
|
20
|
+
"""Context manager/decorator for a trace."""
|
|
21
|
+
return trace(name=query)
|
|
22
|
+
|
|
23
|
+
def span(self, name: str, span_type: str = "general"):
|
|
24
|
+
"""Context manager for a span."""
|
|
25
|
+
return span(name=name, span_type=span_type)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Evaluation trigger from the SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ragdebug.client import _trigger_eval
|
|
10
|
+
from ragdebug.config import get_config
|
|
11
|
+
from ragdebug.prompt import Prompt
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def evaluate(
|
|
15
|
+
name: str,
|
|
16
|
+
test_cases: list[dict[str, Any]],
|
|
17
|
+
prompt: Prompt | None = None,
|
|
18
|
+
project_id: str | None = None,
|
|
19
|
+
) -> dict[str, Any] | None:
|
|
20
|
+
"""
|
|
21
|
+
Trigger an automated evaluation run on the RAG Debugger platform.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
name: Name for this evaluation run (e.g., "Nightly CI Eval")
|
|
25
|
+
test_cases: List of dictionaries, each containing at least 'input_query'
|
|
26
|
+
and optionally 'expected_output'.
|
|
27
|
+
prompt: An optional Prompt instance. If provided, the evaluation will be
|
|
28
|
+
linked to this prompt's active version.
|
|
29
|
+
project_id: Override the default project ID.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
The evaluation run definition including its ID and status.
|
|
33
|
+
"""
|
|
34
|
+
cfg = get_config()
|
|
35
|
+
pid = project_id or cfg.project
|
|
36
|
+
|
|
37
|
+
prompt_version_id = None
|
|
38
|
+
if prompt:
|
|
39
|
+
prompt_version_id = getattr(prompt, "version_id", None)
|
|
40
|
+
|
|
41
|
+
# Ensure test cases are formatted correctly
|
|
42
|
+
formatted_cases = []
|
|
43
|
+
for tc in test_cases:
|
|
44
|
+
if "input_query" not in tc:
|
|
45
|
+
if "query" in tc:
|
|
46
|
+
tc["input_query"] = tc.pop("query")
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError("Each test case must contain an 'input_query' key.")
|
|
49
|
+
|
|
50
|
+
# Ensure we don't send extra junk that the backend doesn't expect
|
|
51
|
+
formatted_cases.append({
|
|
52
|
+
"input_query": tc["input_query"],
|
|
53
|
+
"expected_output": tc.get("expected_output", ""),
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
return _trigger_eval(
|
|
57
|
+
project_id=pid,
|
|
58
|
+
name=name,
|
|
59
|
+
prompt_version_id=prompt_version_id,
|
|
60
|
+
test_cases=formatted_cases,
|
|
61
|
+
)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lightweight data models for trace payloads (SDK-side).
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class SpanData:
|
|
14
|
+
span_id: str
|
|
15
|
+
name: str
|
|
16
|
+
span_type: str = "general"
|
|
17
|
+
start_time_ms: float = 0
|
|
18
|
+
duration_ms: float = 0
|
|
19
|
+
input: dict = field(default_factory=dict)
|
|
20
|
+
output: dict = field(default_factory=dict)
|
|
21
|
+
metadata: dict = field(default_factory=dict)
|
|
22
|
+
status: str = "success"
|
|
23
|
+
children: list[SpanData] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
def set_input(self, **kwargs):
|
|
26
|
+
self.input.update(kwargs)
|
|
27
|
+
|
|
28
|
+
def set_output(self, **kwargs):
|
|
29
|
+
self.output.update(kwargs)
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict:
|
|
32
|
+
return {
|
|
33
|
+
"span_id": self.span_id,
|
|
34
|
+
"name": self.name,
|
|
35
|
+
"span_type": self.span_type,
|
|
36
|
+
"start_time_ms": self.start_time_ms,
|
|
37
|
+
"duration_ms": self.duration_ms,
|
|
38
|
+
"input": self.input,
|
|
39
|
+
"output": self.output,
|
|
40
|
+
"metadata": self.metadata,
|
|
41
|
+
"status": self.status,
|
|
42
|
+
"children": [c.to_dict() for c in self.children],
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class TraceData:
|
|
48
|
+
"""Full trace payload to be sent to the platform."""
|
|
49
|
+
project_id: str = ""
|
|
50
|
+
prompt_version_id: int | None = None
|
|
51
|
+
input: dict[str, Any] = field(default_factory=dict)
|
|
52
|
+
output: dict[str, Any] = field(default_factory=dict)
|
|
53
|
+
spans: list[SpanData] = field(default_factory=list)
|
|
54
|
+
duration_ms: float = 0
|
|
55
|
+
status: str = "success"
|
|
56
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict:
|
|
59
|
+
d = {
|
|
60
|
+
"project_id": self.project_id,
|
|
61
|
+
"input": self.input,
|
|
62
|
+
"output": self.output,
|
|
63
|
+
"spans": [s.to_dict() for s in self.spans],
|
|
64
|
+
"duration_ms": self.duration_ms,
|
|
65
|
+
"status": self.status,
|
|
66
|
+
"metadata": self.metadata,
|
|
67
|
+
}
|
|
68
|
+
if self.prompt_version_id is not None:
|
|
69
|
+
d["prompt_version_id"] = self.prompt_version_id
|
|
70
|
+
return d
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Platform data package — contains docker-compose.yml for ragdebug CLI.
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# ragdebug platform — Docker Compose for self-hosted setup
|
|
3
|
+
# Launched via: ragdebug up
|
|
4
|
+
# ---------------------------------------------------------
|
|
5
|
+
#
|
|
6
|
+
# Services:
|
|
7
|
+
# platform — FastAPI backend + bundled React dashboard
|
|
8
|
+
# mongodb — Trace & span storage
|
|
9
|
+
# postgres — Projects, prompts, evaluations, users
|
|
10
|
+
# redis — Caching & task queue
|
|
11
|
+
# ---------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
services:
|
|
14
|
+
mongodb:
|
|
15
|
+
image: mongo:7
|
|
16
|
+
container_name: ragdebug-mongo
|
|
17
|
+
restart: unless-stopped
|
|
18
|
+
ports:
|
|
19
|
+
- "27017:27017"
|
|
20
|
+
environment:
|
|
21
|
+
MONGO_INITDB_ROOT_USERNAME: ragdebug
|
|
22
|
+
MONGO_INITDB_ROOT_PASSWORD: ragdebug_secret
|
|
23
|
+
volumes:
|
|
24
|
+
- ragdebug_mongo_data:/data/db
|
|
25
|
+
|
|
26
|
+
postgres:
|
|
27
|
+
image: postgres:16-alpine
|
|
28
|
+
container_name: ragdebug-postgres
|
|
29
|
+
restart: unless-stopped
|
|
30
|
+
ports:
|
|
31
|
+
- "5432:5432"
|
|
32
|
+
environment:
|
|
33
|
+
POSTGRES_USER: ragdebug
|
|
34
|
+
POSTGRES_PASSWORD: ragdebug_secret
|
|
35
|
+
POSTGRES_DB: ragdebug
|
|
36
|
+
volumes:
|
|
37
|
+
- ragdebug_pg_data:/var/lib/postgresql/data
|
|
38
|
+
|
|
39
|
+
redis:
|
|
40
|
+
image: redis:7-alpine
|
|
41
|
+
container_name: ragdebug-redis
|
|
42
|
+
restart: unless-stopped
|
|
43
|
+
ports:
|
|
44
|
+
- "6379:6379"
|
|
45
|
+
|
|
46
|
+
platform:
|
|
47
|
+
image: ragdebug/platform:latest
|
|
48
|
+
container_name: ragdebug-platform
|
|
49
|
+
restart: unless-stopped
|
|
50
|
+
ports:
|
|
51
|
+
- "8000:8000"
|
|
52
|
+
environment:
|
|
53
|
+
# Database connections (use Docker service names)
|
|
54
|
+
MONGO_USER: ragdebug
|
|
55
|
+
MONGO_PASSWORD: ragdebug_secret
|
|
56
|
+
MONGO_HOST: mongodb
|
|
57
|
+
MONGO_PORT: "27017"
|
|
58
|
+
MONGO_DB: ragdebug
|
|
59
|
+
PG_USER: ragdebug
|
|
60
|
+
PG_PASSWORD: ragdebug_secret
|
|
61
|
+
PG_HOST: postgres
|
|
62
|
+
PG_PORT: "5432"
|
|
63
|
+
PG_DB: ragdebug
|
|
64
|
+
REDIS_HOST: redis
|
|
65
|
+
REDIS_PORT: "6379"
|
|
66
|
+
REDIS_URL: redis://redis:6379/0
|
|
67
|
+
# App
|
|
68
|
+
APP_ENV: production
|
|
69
|
+
APP_DEBUG: "false"
|
|
70
|
+
CORS_ORIGINS: "http://localhost:8000"
|
|
71
|
+
JWT_SECRET: ragdebug-local-dev-secret
|
|
72
|
+
depends_on:
|
|
73
|
+
- mongodb
|
|
74
|
+
- postgres
|
|
75
|
+
- redis
|
|
76
|
+
|
|
77
|
+
volumes:
|
|
78
|
+
ragdebug_mongo_data:
|
|
79
|
+
ragdebug_pg_data:
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prompt management via the SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
import threading
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ragdebug.client import _sync_prompt, _pull_active_prompt
|
|
13
|
+
|
|
14
|
+
# Thread-local storage to associate physical traces with the prompt executed
|
|
15
|
+
_prompt_context = threading.local()
|
|
16
|
+
|
|
17
|
+
def get_current_prompt_context() -> int | None:
|
|
18
|
+
"""Retrieve the active prompt_version_id for the current thread."""
|
|
19
|
+
return getattr(_prompt_context, "prompt_version_id", None)
|
|
20
|
+
|
|
21
|
+
def set_current_prompt_context(version_id: int | None) -> None:
|
|
22
|
+
"""Set the active prompt_version_id for the current thread."""
|
|
23
|
+
_prompt_context.prompt_version_id = version_id
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Prompt:
|
|
27
|
+
"""
|
|
28
|
+
A Prompt Template that automatically syncs to the RAG Debugger platform.
|
|
29
|
+
"""
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
name: str,
|
|
33
|
+
template: str,
|
|
34
|
+
project_id: str | None = None,
|
|
35
|
+
metadata: dict[str, Any] | None = None,
|
|
36
|
+
):
|
|
37
|
+
self.name = name
|
|
38
|
+
self.template = template
|
|
39
|
+
self.project_id = project_id or "default"
|
|
40
|
+
self.metadata = metadata or {}
|
|
41
|
+
|
|
42
|
+
# Extract variables simple regex or just string match
|
|
43
|
+
import re
|
|
44
|
+
self.variables = list(set(re.findall(r"\{\{(\w+)\}\}", template)))
|
|
45
|
+
|
|
46
|
+
# Compute SHA-256 hash of the template
|
|
47
|
+
self.template_hash = hashlib.sha256(template.encode("utf-8")).hexdigest()
|
|
48
|
+
|
|
49
|
+
# Sync with backend immediately
|
|
50
|
+
self.version_id = self._sync()
|
|
51
|
+
|
|
52
|
+
def _sync(self) -> int | None:
|
|
53
|
+
"""Sync this prompt with the platform and return its version ID."""
|
|
54
|
+
return _sync_prompt(
|
|
55
|
+
project_id=self.project_id,
|
|
56
|
+
name=self.name,
|
|
57
|
+
template=self.template,
|
|
58
|
+
template_hash=self.template_hash,
|
|
59
|
+
variables=self.variables,
|
|
60
|
+
metadata=self.metadata,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def format(self, **kwargs: Any) -> str:
|
|
64
|
+
"""
|
|
65
|
+
Format the prompt with variables.
|
|
66
|
+
Also temporarily sets the thread-local prompt context so any
|
|
67
|
+
@trace executed within this block can link to this prompt_version_id.
|
|
68
|
+
"""
|
|
69
|
+
rendered = self.template
|
|
70
|
+
for k, v in kwargs.items():
|
|
71
|
+
rendered = rendered.replace(f"{{{{{k}}}}}", str(v))
|
|
72
|
+
|
|
73
|
+
# Set the prompt context before returning
|
|
74
|
+
set_current_prompt_context(self.version_id)
|
|
75
|
+
return rendered
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def pull(cls, name: str, project_id: str = "default") -> "Prompt":
|
|
79
|
+
"""
|
|
80
|
+
Pull the currently active version of a prompt from the platform.
|
|
81
|
+
"""
|
|
82
|
+
data = _pull_active_prompt(project_id, name)
|
|
83
|
+
if not data:
|
|
84
|
+
raise ValueError(f"No active prompt found for '{name}' in project '{project_id}'")
|
|
85
|
+
|
|
86
|
+
# Create an instance that won't re-sync because we already have the ID
|
|
87
|
+
p = cls.__new__(cls)
|
|
88
|
+
p.name = data["name"]
|
|
89
|
+
p.template = data["template"]
|
|
90
|
+
p.project_id = data["project_id"]
|
|
91
|
+
p.metadata = data.get("metadata", {})
|
|
92
|
+
p.variables = data.get("variables", [])
|
|
93
|
+
p.template_hash = None # Skip hash check
|
|
94
|
+
p.version_id = data["id"]
|
|
95
|
+
return p
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Span context manager — captures timing and I/O for a pipeline stage.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from typing import Any, Generator
|
|
11
|
+
|
|
12
|
+
from ragdebug.models import SpanData
|
|
13
|
+
|
|
14
|
+
# Thread-local span stack (supports nested spans)
|
|
15
|
+
import threading
|
|
16
|
+
|
|
17
|
+
_span_stack: threading.local = threading.local()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_stack() -> list[SpanData]:
|
|
21
|
+
if not hasattr(_span_stack, "stack"):
|
|
22
|
+
_span_stack.stack = []
|
|
23
|
+
return _span_stack.stack
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@contextmanager
|
|
27
|
+
def span(name: str, span_type: str = "general") -> Generator[SpanData, None, None]:
|
|
28
|
+
"""
|
|
29
|
+
Context manager that records a named span.
|
|
30
|
+
"""
|
|
31
|
+
stack = _get_stack()
|
|
32
|
+
span_data = SpanData(
|
|
33
|
+
span_id=f"span_{uuid.uuid4().hex[:8]}",
|
|
34
|
+
name=name,
|
|
35
|
+
span_type=span_type
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
span_data._abs_start = time.perf_counter() # type: ignore[attr-defined]
|
|
39
|
+
stack.append(span_data)
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
yield span_data
|
|
43
|
+
except Exception as exc:
|
|
44
|
+
span_data.status = "error"
|
|
45
|
+
span_data.metadata["error"] = str(exc)
|
|
46
|
+
raise
|
|
47
|
+
finally:
|
|
48
|
+
elapsed = time.perf_counter() - span_data._abs_start # type: ignore[attr-defined]
|
|
49
|
+
span_data.duration_ms = round(elapsed * 1000, 2)
|
|
50
|
+
stack.pop()
|
|
51
|
+
|
|
52
|
+
if stack:
|
|
53
|
+
# Nest under parent
|
|
54
|
+
stack[-1].children.append(span_data)
|
|
55
|
+
else:
|
|
56
|
+
# Register as top-level span for the current trace
|
|
57
|
+
from ragdebug.tracer import _register_span
|
|
58
|
+
_register_span(span_data)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def reset_spans() -> None:
|
|
62
|
+
_span_stack.stack = []
|
|
63
|
+
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@trace decorator — wraps a RAG pipeline function to capture the full trace.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import functools
|
|
9
|
+
import inspect
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from typing import Any, Callable
|
|
13
|
+
|
|
14
|
+
from ragdebug.client import enqueue_trace
|
|
15
|
+
from ragdebug.config import get_config
|
|
16
|
+
from ragdebug.models import SpanData, TraceData
|
|
17
|
+
from ragdebug.prompt import get_current_prompt_context
|
|
18
|
+
|
|
19
|
+
# Thread-local to hold the current trace's spans
|
|
20
|
+
import threading
|
|
21
|
+
|
|
22
|
+
_trace_local = threading.local()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _get_trace_spans() -> list[SpanData]:
|
|
26
|
+
if not hasattr(_trace_local, "spans"):
|
|
27
|
+
_trace_local.spans = []
|
|
28
|
+
return _trace_local.spans
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _register_span(span_data: SpanData) -> None:
|
|
32
|
+
"""Called when a top-level span finishes to register it in the current trace."""
|
|
33
|
+
spans = _get_trace_spans()
|
|
34
|
+
spans.append(span_data)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TraceContext:
|
|
38
|
+
def __init__(self, name: str, cfg: SDKConfig):
|
|
39
|
+
self.name = name
|
|
40
|
+
self.cfg = cfg
|
|
41
|
+
self.start_time = 0
|
|
42
|
+
self.trace_id = str(uuid.uuid4())
|
|
43
|
+
self.spans = []
|
|
44
|
+
self.input_data = {}
|
|
45
|
+
self.output_data = {}
|
|
46
|
+
self.status = "success"
|
|
47
|
+
|
|
48
|
+
def __enter__(self):
|
|
49
|
+
_trace_local.spans = []
|
|
50
|
+
self.start_time = time.perf_counter()
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
54
|
+
elapsed_ms = round((time.perf_counter() - self.start_time) * 1000, 2)
|
|
55
|
+
if exc_type:
|
|
56
|
+
self.status = "error"
|
|
57
|
+
|
|
58
|
+
trace_data = TraceData(
|
|
59
|
+
project_id=self.cfg.project,
|
|
60
|
+
prompt_version_id=get_current_prompt_context(),
|
|
61
|
+
input=self.input_data or {"query": self.name},
|
|
62
|
+
output=self.output_data,
|
|
63
|
+
spans=list(_trace_local.spans),
|
|
64
|
+
duration_ms=elapsed_ms,
|
|
65
|
+
status=self.status,
|
|
66
|
+
metadata={"error": str(exc_val)} if exc_val else {},
|
|
67
|
+
)
|
|
68
|
+
enqueue_trace(trace_data.to_dict())
|
|
69
|
+
_trace_local.spans = []
|
|
70
|
+
|
|
71
|
+
def span(self, name: str, span_type: str = "general"):
|
|
72
|
+
from ragdebug.spans import span
|
|
73
|
+
return span(name=name, span_type=span_type)
|
|
74
|
+
|
|
75
|
+
def set_input(self, **kwargs):
|
|
76
|
+
self.input_data.update(kwargs)
|
|
77
|
+
|
|
78
|
+
def set_output(self, **kwargs):
|
|
79
|
+
self.output_data.update(kwargs)
|
|
80
|
+
|
|
81
|
+
def __call__(self, func: Callable):
|
|
82
|
+
trace_name = self.name or func.__name__
|
|
83
|
+
if asyncio.iscoroutinefunction(func):
|
|
84
|
+
@functools.wraps(func)
|
|
85
|
+
async def async_wrapper(*args, **kwargs):
|
|
86
|
+
if not self.cfg.enabled: return await func(*args, **kwargs)
|
|
87
|
+
with TraceContext(trace_name, self.cfg) as ctx:
|
|
88
|
+
query = str(args[0]) if args else str(kwargs.get("query", ""))
|
|
89
|
+
ctx.set_input(query=query)
|
|
90
|
+
res = await func(*args, **kwargs)
|
|
91
|
+
ctx.set_output(response=str(res))
|
|
92
|
+
return res
|
|
93
|
+
return async_wrapper
|
|
94
|
+
else:
|
|
95
|
+
@functools.wraps(func)
|
|
96
|
+
def sync_wrapper(*args, **kwargs):
|
|
97
|
+
if not self.cfg.enabled: return func(*args, **kwargs)
|
|
98
|
+
with TraceContext(trace_name, self.cfg) as ctx:
|
|
99
|
+
query = str(args[0]) if args else str(kwargs.get("query", ""))
|
|
100
|
+
ctx.set_input(query=query)
|
|
101
|
+
res = func(*args, **kwargs)
|
|
102
|
+
ctx.set_output(response=str(res))
|
|
103
|
+
return res
|
|
104
|
+
return sync_wrapper
|
|
105
|
+
|
|
106
|
+
def trace(func: Callable | None = None, *, name: str | None = None):
|
|
107
|
+
"""
|
|
108
|
+
Decorator OR Context Manager to capture a full RAG pipeline trace.
|
|
109
|
+
"""
|
|
110
|
+
cfg = get_config()
|
|
111
|
+
if func is None:
|
|
112
|
+
# If it's not a function, it's a context manager call: with trace(name="..."):
|
|
113
|
+
# Also supports @trace(name="...") by implementing __call__ on TraceContext
|
|
114
|
+
return TraceContext(name or "manual_trace", cfg)
|
|
115
|
+
|
|
116
|
+
# It's a direct decorator: @trace
|
|
117
|
+
return TraceContext(name or func.__name__, cfg)(func)
|
|
118
|
+
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ragdebug
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Observability SDK for RAG pipelines — auto-trace every stage of your RAG app.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Requires-Dist: click>=8.0
|
|
10
|
+
Provides-Extra: test
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
12
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "test"
|
|
13
|
+
Provides-Extra: tiktoken
|
|
14
|
+
Requires-Dist: tiktoken>=0.7; extra == "tiktoken"
|
|
15
|
+
|
|
16
|
+
# RAG Debugger SDK
|
|
17
|
+
|
|
18
|
+
**Observability & Debugging Platform for RAG Pipelines**
|
|
19
|
+
|
|
20
|
+
`ragdebug` allows you to auto-trace every stage of your RAG application and start a comprehensive local dashboard for analysis with a single command.
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
1. Install the SDK with the CLI power-ups:
|
|
25
|
+
```bash
|
|
26
|
+
pip install "ragdebug[cli]"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
2. Start the local RAG Debugger platform:
|
|
30
|
+
```bash
|
|
31
|
+
ragdebug up
|
|
32
|
+
```
|
|
33
|
+
*Note: This requires Docker Desktop to be running.*
|
|
34
|
+
|
|
35
|
+
3. Open the dashboard at [http://localhost:8000](http://localhost:8000)
|
|
36
|
+
|
|
37
|
+
## Integrating Tracing
|
|
38
|
+
|
|
39
|
+
Add the `@trace` decorator to your RAG pipeline functions:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from ragdebug import init, trace, Prompt
|
|
43
|
+
|
|
44
|
+
init(project="my-project")
|
|
45
|
+
|
|
46
|
+
test_prompt = Prompt(name="QA Prompt", template="Context: {{context}}\n\nQ: {{query}}")
|
|
47
|
+
|
|
48
|
+
@trace(name="qa_pipeline")
|
|
49
|
+
def answer_question(query: str):
|
|
50
|
+
# Your RAG logic here
|
|
51
|
+
return "Answer"
|
|
52
|
+
|
|
53
|
+
answer_question("What is RAG?")
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Advanced CLI Usage
|
|
57
|
+
|
|
58
|
+
| Command | Description |
|
|
59
|
+
|---|---|
|
|
60
|
+
| `ragdebug up` | Start all 4 containers (mongo, postgres, redis, platform) |
|
|
61
|
+
| `ragdebug up --build` | Force rebuild the platform image |
|
|
62
|
+
| `ragdebug down` | Stop all containers |
|
|
63
|
+
| `ragdebug status` | Show container status + API health check |
|
|
64
|
+
| `ragdebug logs` | Stream container logs |
|
|
65
|
+
|
|
66
|
+
## Documentation
|
|
67
|
+
|
|
68
|
+
For full documentation on Prompts, Evaluations, and more, check the UI dashboard after running `ragdebug up`.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
ragdebug/__init__.py
|
|
4
|
+
ragdebug/cli.py
|
|
5
|
+
ragdebug/client.py
|
|
6
|
+
ragdebug/config.py
|
|
7
|
+
ragdebug/debugger.py
|
|
8
|
+
ragdebug/evaluator.py
|
|
9
|
+
ragdebug/models.py
|
|
10
|
+
ragdebug/prompt.py
|
|
11
|
+
ragdebug/spans.py
|
|
12
|
+
ragdebug/tracer.py
|
|
13
|
+
ragdebug.egg-info/PKG-INFO
|
|
14
|
+
ragdebug.egg-info/SOURCES.txt
|
|
15
|
+
ragdebug.egg-info/dependency_links.txt
|
|
16
|
+
ragdebug.egg-info/entry_points.txt
|
|
17
|
+
ragdebug.egg-info/requires.txt
|
|
18
|
+
ragdebug.egg-info/top_level.txt
|
|
19
|
+
ragdebug/platform/__init__.py
|
|
20
|
+
ragdebug/platform/docker-compose.yml
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ragdebug
|
ragdebug-0.1.0/setup.cfg
ADDED