ragobserve 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragobserve-0.2.0/PKG-INFO +192 -0
- ragobserve-0.2.0/README.md +163 -0
- ragobserve-0.2.0/pyproject.toml +50 -0
- ragobserve-0.2.0/ragobserve/__init__.py +58 -0
- ragobserve-0.2.0/ragobserve/_diag.py +32 -0
- ragobserve-0.2.0/ragobserve/adapters/__init__.py +23 -0
- ragobserve-0.2.0/ragobserve/adapters/langchain.py +365 -0
- ragobserve-0.2.0/ragobserve/adapters/llamaindex.py +430 -0
- ragobserve-0.2.0/ragobserve/adapters/vectordb.py +317 -0
- ragobserve-0.2.0/ragobserve/cli.py +62 -0
- ragobserve-0.2.0/ragobserve/client.py +118 -0
- ragobserve-0.2.0/ragobserve/events.py +120 -0
- ragobserve-0.2.0/ragobserve/server/__init__.py +0 -0
- ragobserve-0.2.0/ragobserve/server/api.py +181 -0
- ragobserve-0.2.0/ragobserve/server/app.py +55 -0
- ragobserve-0.2.0/ragobserve/server/db.py +499 -0
- ragobserve-0.2.0/ragobserve/server/llm.py +234 -0
- ragobserve-0.2.0/ragobserve/server/metrics.py +105 -0
- ragobserve-0.2.0/ragobserve/server/pricing.py +87 -0
- ragobserve-0.2.0/ragobserve/server/static/app.js +86 -0
- ragobserve-0.2.0/ragobserve/server/static/charts.js +118 -0
- ragobserve-0.2.0/ragobserve/server/static/style.css +169 -0
- ragobserve-0.2.0/ragobserve/server/templates/base.html +29 -0
- ragobserve-0.2.0/ragobserve/server/templates/chunks.html +49 -0
- ragobserve-0.2.0/ragobserve/server/templates/dashboard.html +29 -0
- ragobserve-0.2.0/ragobserve/server/templates/generations.html +126 -0
- ragobserve-0.2.0/ragobserve/server/templates/metrics.html +50 -0
- ragobserve-0.2.0/ragobserve/server/templates/trace_detail.html +227 -0
- ragobserve-0.2.0/ragobserve/server/templates/traces.html +42 -0
- ragobserve-0.2.0/ragobserve/storage.py +60 -0
- ragobserve-0.2.0/ragobserve/tracing.py +201 -0
- ragobserve-0.2.0/ragobserve.egg-info/PKG-INFO +192 -0
- ragobserve-0.2.0/ragobserve.egg-info/SOURCES.txt +42 -0
- ragobserve-0.2.0/ragobserve.egg-info/dependency_links.txt +1 -0
- ragobserve-0.2.0/ragobserve.egg-info/entry_points.txt +2 -0
- ragobserve-0.2.0/ragobserve.egg-info/requires.txt +18 -0
- ragobserve-0.2.0/ragobserve.egg-info/top_level.txt +1 -0
- ragobserve-0.2.0/setup.cfg +4 -0
- ragobserve-0.2.0/tests/test_adapter_dispatch.py +201 -0
- ragobserve-0.2.0/tests/test_adapters.py +90 -0
- ragobserve-0.2.0/tests/test_api.py +82 -0
- ragobserve-0.2.0/tests/test_events.py +30 -0
- ragobserve-0.2.0/tests/test_metrics.py +45 -0
- ragobserve-0.2.0/tests/test_sdk.py +80 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ragobserve
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: RAGObserve: local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG.
|
|
5
|
+
Author-email: Pranesh <praneshmadhan646@gmail.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/Pranesh-2005/ragobserve
|
|
8
|
+
Keywords: rag,observability,tracing,retrieval,llm,evaluation
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: fastapi>=0.100
|
|
16
|
+
Requires-Dist: uvicorn>=0.23
|
|
17
|
+
Requires-Dist: jinja2>=3.1
|
|
18
|
+
Requires-Dist: pydantic>=2.0
|
|
19
|
+
Requires-Dist: httpx>=0.24
|
|
20
|
+
Provides-Extra: langchain
|
|
21
|
+
Requires-Dist: langchain-core>=0.1; extra == "langchain"
|
|
22
|
+
Provides-Extra: llamaindex
|
|
23
|
+
Requires-Dist: llama-index-core>=0.10; extra == "llamaindex"
|
|
24
|
+
Provides-Extra: llm
|
|
25
|
+
Requires-Dist: anthropic>=0.40; extra == "llm"
|
|
26
|
+
Requires-Dist: openai>=1.0; extra == "llm"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# RAGObserve
|
|
31
|
+
|
|
32
|
+
**Local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG.**
|
|
33
|
+
|
|
34
|
+
Unlike general LLM observability tools, RAGObserve focuses on the *retrieval lifecycle*:
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
documents → chunking → embedding → indexing → retrieval → fusion
|
|
38
|
+
→ reranking → context assembly → generation → grounding
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
It is framework-agnostic (a universal RAG event model, not LangChain hooks), provider-agnostic, vector-DB-agnostic, and stores everything in a single local SQLite file inside a hidden `./.ragobserve/` folder (like `.git`) — no servers, no accounts.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install ragobserve # or: uv tool install ragobserve
|
|
47
|
+
pip install ragobserve[langchain] # optional LangChain auto-instrumentation
|
|
48
|
+
pip install ragobserve[llamaindex] # optional LlamaIndex auto-instrumentation
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quickstart
|
|
52
|
+
|
|
53
|
+
Instrument your RAG code (writes to a hidden `./.ragobserve/ragobserve.db`, no server needed):
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
import ragobserve
|
|
57
|
+
|
|
58
|
+
ragobserve.init(project="contract-rag")
|
|
59
|
+
# or point at a running server:
|
|
60
|
+
# ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
|
|
61
|
+
|
|
62
|
+
with ragobserve.trace("query", query=question):
|
|
63
|
+
ragobserve.log_retrieval(question, results, retriever="qdrant", duration_ms=23)
|
|
64
|
+
ragobserve.log_rerank(before, after, model="bge-reranker")
|
|
65
|
+
ragobserve.log_context(final_prompt, system_prompt=sys, chunks=top_chunks, context_window=8192)
|
|
66
|
+
ragobserve.log_generation(model="gpt-4o", prompt=final_prompt, response=answer, cost=0.002)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Decorator and nesting also work:
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
@ragobserve.trace
|
|
73
|
+
def retrieve(query): ...
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Then explore:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
ragobserve ui # http://127.0.0.1:5601
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Dashboard
|
|
83
|
+
|
|
84
|
+
- **Query Explorer** — every query with latency, cost, retriever, model, chunk count
|
|
85
|
+
- **Trace waterfall** — the full pipeline per query, stage by stage
|
|
86
|
+
- **Retrieval Explorer** — retrieved chunks with scores, ranks, metadata
|
|
87
|
+
- **Hybrid Search Explorer** — BM25 vs vector vs fused results
|
|
88
|
+
- **Reranker Analytics** — before/after with rank shifts and Kendall's τ
|
|
89
|
+
- **Context Builder Viewer** — exactly what was sent to the model, DevTools-style
|
|
90
|
+
- **Chunk Explorer** — most retrieved / never retrieved (dead) / duplicate chunks
|
|
91
|
+
- **Metrics** — Precision@k, Recall@k, MRR, nDCG over logged ground truth, plus chunk utilization
|
|
92
|
+
- **Generations & cost** — Langfuse-style cost tracing: per-model / per-day token & $ breakdowns, charts, and the context that produced each generation. Costs are auto-backfilled from a built-in price book when you don't pass `cost=`.
|
|
93
|
+
|
|
94
|
+
## LLM generation & live replay
|
|
95
|
+
|
|
96
|
+
RAGObserve ships a zero-SDK, httpx-based provider layer covering **11 providers** — Anthropic, OpenAI, Gemini, Groq, OpenRouter, Together, Mistral, DeepSeek, Fireworks, Perplexity, Ollama. From any trace's **Generation** / **Context** view you can *replay* the captured context against a live provider (when its API key is set) and the new generation is logged back into the trace with its cost.
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
ragobserve providers # list providers and which have keys configured
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Framework adapters
|
|
103
|
+
|
|
104
|
+
Full pipeline — ingest *and* query — is captured.
|
|
105
|
+
|
|
106
|
+
### LangChain
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from ragobserve.adapters import (
|
|
110
|
+
RagObserveCallbackHandler,
|
|
111
|
+
instrument_loader, instrument_splitter, instrument_embeddings,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# query-time: retrieval + generation (+ model, token usage, cost) via the handler
|
|
115
|
+
chain.invoke(q, config={"callbacks": [RagObserveCallbackHandler()]})
|
|
116
|
+
|
|
117
|
+
# ingest-time: loaders/splitters/embeddings emit no callbacks, so wrap them
|
|
118
|
+
loader = instrument_loader(PyPDFLoader("contract.pdf")) # → ingestion event
|
|
119
|
+
splitter = instrument_splitter(RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50))
|
|
120
|
+
emb = instrument_embeddings(OpenAIEmbeddings()) # real Embeddings subclass — FAISS-safe
|
|
121
|
+
|
|
122
|
+
docs = loader.load()
|
|
123
|
+
chunks = splitter.split_documents(docs) # → chunking event (split_documents/split_text/create_documents/transform_documents)
|
|
124
|
+
FAISS.from_documents(chunks, emb) # embed_documents → embedding event
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
`instrument_embeddings` returns a true `Embeddings` subclass, so vector stores that `isinstance`-check it (FAISS, etc.) keep working; async `aembed_*` is covered via the base class. The callback handler reads token usage from both `llm_output` and chat-message `usage_metadata`. For reranking, `instrument_compressor(CrossEncoderReranker(...))` returns a real `BaseDocumentCompressor` subclass (so `ContextualCompressionRetriever` still validates it) and logs before/after on `compress_documents` — the one RAG step LangChain fires no callback for. The handler also emits **context_assembly** automatically (the prompt sent to the model is the assembled context — no manual `log_context` needed).
|
|
128
|
+
|
|
129
|
+
If a framework version moves an API the adapters hook, the wrappers emit a `RagObserveWarning` ("…not captured (version drift?)") instead of silently logging nothing.
|
|
130
|
+
|
|
131
|
+
### LlamaIndex
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from ragobserve.adapters.llamaindex import register
|
|
135
|
+
register() # ONE call instruments the global dispatcher — ingest + query
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Hooks LlamaIndex's instrumentation dispatcher, so it captures every stage with no code changes:
|
|
139
|
+
|
|
140
|
+
- **embedding** (`EmbeddingEndEvent`, incl. sparse) — model + dimensions
|
|
141
|
+
- **chunking** — derived from the ingest embedding batch (LlamaIndex emits no node-parsing event)
|
|
142
|
+
- **retrieval** (`RetrievalEndEvent`) — at the retriever layer, so **all 80+ vector stores** (Chroma/Pinecone/Qdrant/Milvus/Weaviate/…) are covered transitively
|
|
143
|
+
- **reranking** — `StructuredLLMRerank` fires `ReRankEndEvent` automatically; most rerankers (`SentenceTransformerRerank`, Cohere, `LLMRerank`) emit **no** event, so wrap them: `instrument_postprocessor(SentenceTransformerRerank(...))` → logs before/after, model, top_n
|
|
144
|
+
- **context_assembly** (`GetResponseStartEvent`) — the exact context handed to the LLM during synthesis
|
|
145
|
+
- **generation** (`LLMChat/CompletionEndEvent`) — model, prompt/response, tokens → **cost**
|
|
146
|
+
- **boundaries** — query engines (`QueryStart/End`) and chat engines (`StreamChat*`, `AgentChatWithStep*`, incl. streamed deltas), de-duplicated against the LLM events
|
|
147
|
+
|
|
148
|
+
| Stage | LangChain | LlamaIndex |
|
|
149
|
+
|---|---|---|
|
|
150
|
+
| ingestion | `instrument_loader` | (via pipeline) |
|
|
151
|
+
| chunking | `instrument_splitter` | auto |
|
|
152
|
+
| embedding | `instrument_embeddings` | auto |
|
|
153
|
+
| retrieval | auto (callback) | auto |
|
|
154
|
+
| reranking | `instrument_compressor` (or `log_rerank`) | auto |
|
|
155
|
+
| context assembly | auto (handler) | auto |
|
|
156
|
+
| generation + cost | auto | auto |
|
|
157
|
+
| query / chat boundary | auto (chain) | auto |
|
|
158
|
+
|
|
159
|
+
## Vector database integrations
|
|
160
|
+
|
|
161
|
+
Wrap a live client once; every query is logged as a retrieval event automatically — no manual `log_retrieval` calls. Duck-typed, so importing these never requires the DB package installed.
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
import ragobserve
|
|
165
|
+
ragobserve.init(project="my-rag")
|
|
166
|
+
|
|
167
|
+
col = ragobserve.instrument_chroma(chroma_collection) # .query
|
|
168
|
+
idx = ragobserve.instrument_pinecone(pinecone_index) # .query
|
|
169
|
+
qc = ragobserve.instrument_qdrant(qdrant_client) # .search / .query_points
|
|
170
|
+
wv = ragobserve.instrument_weaviate(weaviate_collection) # .query.near_vector/near_text/hybrid/bm25
|
|
171
|
+
mv = ragobserve.instrument_milvus(milvus_collection) # .search (ORM + MilvusClient)
|
|
172
|
+
|
|
173
|
+
# pgvector has no client to proxy — run your SQL, pass the rows:
|
|
174
|
+
rows = cur.fetchall() # ORDER BY embedding <=> %s LIMIT k
|
|
175
|
+
ragobserve.log_pgvector(query, rows)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
RAGObserve is vector-DB-agnostic: the `retriever` label is free-text, so **any** store works (FAISS, Elasticsearch, OpenSearch, pgvector, …) even without a dedicated wrapper — just pass results to `ragobserve.log_retrieval(query, results, retriever="...")`.
|
|
179
|
+
|
|
180
|
+
## Try the demo
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
python examples/demo_rag.py
|
|
184
|
+
ragobserve ui
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Development
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
pip install -e .[dev]
|
|
191
|
+
pytest
|
|
192
|
+
```
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# RAGObserve
|
|
2
|
+
|
|
3
|
+
**Local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG.**
|
|
4
|
+
|
|
5
|
+
Unlike general LLM observability tools, RAGObserve focuses on the *retrieval lifecycle*:
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
documents → chunking → embedding → indexing → retrieval → fusion
|
|
9
|
+
→ reranking → context assembly → generation → grounding
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
It is framework-agnostic (a universal RAG event model, not LangChain hooks), provider-agnostic, vector-DB-agnostic, and stores everything in a single local SQLite file inside a hidden `./.ragobserve/` folder (like `.git`) — no servers, no accounts.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install ragobserve # or: uv tool install ragobserve
|
|
18
|
+
pip install ragobserve[langchain] # optional LangChain auto-instrumentation
|
|
19
|
+
pip install ragobserve[llamaindex] # optional LlamaIndex auto-instrumentation
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
Instrument your RAG code (writes to a hidden `./.ragobserve/ragobserve.db`, no server needed):
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
import ragobserve
|
|
28
|
+
|
|
29
|
+
ragobserve.init(project="contract-rag")
|
|
30
|
+
# or point at a running server:
|
|
31
|
+
# ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
|
|
32
|
+
|
|
33
|
+
with ragobserve.trace("query", query=question):
|
|
34
|
+
ragobserve.log_retrieval(question, results, retriever="qdrant", duration_ms=23)
|
|
35
|
+
ragobserve.log_rerank(before, after, model="bge-reranker")
|
|
36
|
+
ragobserve.log_context(final_prompt, system_prompt=sys, chunks=top_chunks, context_window=8192)
|
|
37
|
+
ragobserve.log_generation(model="gpt-4o", prompt=final_prompt, response=answer, cost=0.002)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Decorator and nesting also work:
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
@ragobserve.trace
|
|
44
|
+
def retrieve(query): ...
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Then explore:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
ragobserve ui # http://127.0.0.1:5601
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Dashboard
|
|
54
|
+
|
|
55
|
+
- **Query Explorer** — every query with latency, cost, retriever, model, chunk count
|
|
56
|
+
- **Trace waterfall** — the full pipeline per query, stage by stage
|
|
57
|
+
- **Retrieval Explorer** — retrieved chunks with scores, ranks, metadata
|
|
58
|
+
- **Hybrid Search Explorer** — BM25 vs vector vs fused results
|
|
59
|
+
- **Reranker Analytics** — before/after with rank shifts and Kendall's τ
|
|
60
|
+
- **Context Builder Viewer** — exactly what was sent to the model, DevTools-style
|
|
61
|
+
- **Chunk Explorer** — most retrieved / never retrieved (dead) / duplicate chunks
|
|
62
|
+
- **Metrics** — Precision@k, Recall@k, MRR, nDCG over logged ground truth, plus chunk utilization
|
|
63
|
+
- **Generations & cost** — Langfuse-style cost tracing: per-model / per-day token & $ breakdowns, charts, and the context that produced each generation. Costs are auto-backfilled from a built-in price book when you don't pass `cost=`.
|
|
64
|
+
|
|
65
|
+
## LLM generation & live replay
|
|
66
|
+
|
|
67
|
+
RAGObserve ships a zero-SDK, httpx-based provider layer covering **11 providers** — Anthropic, OpenAI, Gemini, Groq, OpenRouter, Together, Mistral, DeepSeek, Fireworks, Perplexity, Ollama. From any trace's **Generation** / **Context** view you can *replay* the captured context against a live provider (when its API key is set) and the new generation is logged back into the trace with its cost.
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
ragobserve providers # list providers and which have keys configured
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Framework adapters
|
|
74
|
+
|
|
75
|
+
Full pipeline — ingest *and* query — is captured.
|
|
76
|
+
|
|
77
|
+
### LangChain
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from ragobserve.adapters import (
|
|
81
|
+
RagObserveCallbackHandler,
|
|
82
|
+
instrument_loader, instrument_splitter, instrument_embeddings,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# query-time: retrieval + generation (+ model, token usage, cost) via the handler
|
|
86
|
+
chain.invoke(q, config={"callbacks": [RagObserveCallbackHandler()]})
|
|
87
|
+
|
|
88
|
+
# ingest-time: loaders/splitters/embeddings emit no callbacks, so wrap them
|
|
89
|
+
loader = instrument_loader(PyPDFLoader("contract.pdf")) # → ingestion event
|
|
90
|
+
splitter = instrument_splitter(RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50))
|
|
91
|
+
emb = instrument_embeddings(OpenAIEmbeddings()) # real Embeddings subclass — FAISS-safe
|
|
92
|
+
|
|
93
|
+
docs = loader.load()
|
|
94
|
+
chunks = splitter.split_documents(docs) # → chunking event (split_documents/split_text/create_documents/transform_documents)
|
|
95
|
+
FAISS.from_documents(chunks, emb) # embed_documents → embedding event
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
`instrument_embeddings` returns a true `Embeddings` subclass, so vector stores that `isinstance`-check it (FAISS, etc.) keep working; async `aembed_*` is covered via the base class. The callback handler reads token usage from both `llm_output` and chat-message `usage_metadata`. For reranking, `instrument_compressor(CrossEncoderReranker(...))` returns a real `BaseDocumentCompressor` subclass (so `ContextualCompressionRetriever` still validates it) and logs before/after on `compress_documents` — the one RAG step LangChain fires no callback for. The handler also emits **context_assembly** automatically (the prompt sent to the model is the assembled context — no manual `log_context` needed).
|
|
99
|
+
|
|
100
|
+
If a framework version moves an API the adapters hook, the wrappers emit a `RagObserveWarning` ("…not captured (version drift?)") instead of silently logging nothing.
|
|
101
|
+
|
|
102
|
+
### LlamaIndex
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from ragobserve.adapters.llamaindex import register
|
|
106
|
+
register() # ONE call instruments the global dispatcher — ingest + query
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Hooks LlamaIndex's instrumentation dispatcher, so it captures every stage with no code changes:
|
|
110
|
+
|
|
111
|
+
- **embedding** (`EmbeddingEndEvent`, incl. sparse) — model + dimensions
|
|
112
|
+
- **chunking** — derived from the ingest embedding batch (LlamaIndex emits no node-parsing event)
|
|
113
|
+
- **retrieval** (`RetrievalEndEvent`) — at the retriever layer, so **all 80+ vector stores** (Chroma/Pinecone/Qdrant/Milvus/Weaviate/…) are covered transitively
|
|
114
|
+
- **reranking** — `StructuredLLMRerank` fires `ReRankEndEvent` automatically; most rerankers (`SentenceTransformerRerank`, Cohere, `LLMRerank`) emit **no** event, so wrap them: `instrument_postprocessor(SentenceTransformerRerank(...))` → logs before/after, model, top_n
|
|
115
|
+
- **context_assembly** (`GetResponseStartEvent`) — the exact context handed to the LLM during synthesis
|
|
116
|
+
- **generation** (`LLMChat/CompletionEndEvent`) — model, prompt/response, tokens → **cost**
|
|
117
|
+
- **boundaries** — query engines (`QueryStart/End`) and chat engines (`StreamChat*`, `AgentChatWithStep*`, incl. streamed deltas), de-duplicated against the LLM events
|
|
118
|
+
|
|
119
|
+
| Stage | LangChain | LlamaIndex |
|
|
120
|
+
|---|---|---|
|
|
121
|
+
| ingestion | `instrument_loader` | (via pipeline) |
|
|
122
|
+
| chunking | `instrument_splitter` | auto |
|
|
123
|
+
| embedding | `instrument_embeddings` | auto |
|
|
124
|
+
| retrieval | auto (callback) | auto |
|
|
125
|
+
| reranking | `instrument_compressor` (or `log_rerank`) | auto |
|
|
126
|
+
| context assembly | auto (handler) | auto |
|
|
127
|
+
| generation + cost | auto | auto |
|
|
128
|
+
| query / chat boundary | auto (chain) | auto |
|
|
129
|
+
|
|
130
|
+
## Vector database integrations
|
|
131
|
+
|
|
132
|
+
Wrap a live client once; every query is logged as a retrieval event automatically — no manual `log_retrieval` calls. Duck-typed, so importing these never requires the DB package installed.
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
import ragobserve
|
|
136
|
+
ragobserve.init(project="my-rag")
|
|
137
|
+
|
|
138
|
+
col = ragobserve.instrument_chroma(chroma_collection) # .query
|
|
139
|
+
idx = ragobserve.instrument_pinecone(pinecone_index) # .query
|
|
140
|
+
qc = ragobserve.instrument_qdrant(qdrant_client) # .search / .query_points
|
|
141
|
+
wv = ragobserve.instrument_weaviate(weaviate_collection) # .query.near_vector/near_text/hybrid/bm25
|
|
142
|
+
mv = ragobserve.instrument_milvus(milvus_collection) # .search (ORM + MilvusClient)
|
|
143
|
+
|
|
144
|
+
# pgvector has no client to proxy — run your SQL, pass the rows:
|
|
145
|
+
rows = cur.fetchall() # ORDER BY embedding <=> %s LIMIT k
|
|
146
|
+
ragobserve.log_pgvector(query, rows)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
RAGObserve is vector-DB-agnostic: the `retriever` label is free-text, so **any** store works (FAISS, Elasticsearch, OpenSearch, pgvector, …) even without a dedicated wrapper — just pass results to `ragobserve.log_retrieval(query, results, retriever="...")`.
|
|
150
|
+
|
|
151
|
+
## Try the demo
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
python examples/demo_rag.py
|
|
155
|
+
ragobserve ui
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Development
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
pip install -e .[dev]
|
|
162
|
+
pytest
|
|
163
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ragobserve"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "RAGObserve: local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "Apache-2.0" }
|
|
12
|
+
authors = [{ name = "Pranesh", email = "praneshmadhan646@gmail.com" }]
|
|
13
|
+
keywords = ["rag", "observability", "tracing", "retrieval", "llm", "evaluation"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Topic :: Software Development :: Debuggers",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"fastapi>=0.100",
|
|
22
|
+
"uvicorn>=0.23",
|
|
23
|
+
"jinja2>=3.1",
|
|
24
|
+
"pydantic>=2.0",
|
|
25
|
+
"httpx>=0.24",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
langchain = ["langchain-core>=0.1"]
|
|
30
|
+
llamaindex = ["llama-index-core>=0.10"]
|
|
31
|
+
# Live generation replay works out of the box via httpx (OpenAI-compatible
|
|
32
|
+
# providers + Anthropic + Ollama), so no extra deps are strictly required.
|
|
33
|
+
# Install these only if you prefer the official vendor SDKs elsewhere.
|
|
34
|
+
llm = ["anthropic>=0.40", "openai>=1.0"]
|
|
35
|
+
dev = ["pytest>=7.0"]
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
ragobserve = "ragobserve.cli:main"
|
|
39
|
+
|
|
40
|
+
[project.urls]
|
|
41
|
+
Homepage = "https://github.com/Pranesh-2005/ragobserve"
|
|
42
|
+
|
|
43
|
+
[tool.pytest.ini_options]
|
|
44
|
+
testpaths = ["tests"]
|
|
45
|
+
|
|
46
|
+
[tool.setuptools.packages.find]
|
|
47
|
+
include = ["ragobserve*"]
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.package-data]
|
|
50
|
+
"ragobserve.server" = ["templates/*.html", "static/*.css", "static/*.js"]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""RAGObserve — local-first observability for RAG systems.
|
|
2
|
+
|
|
3
|
+
Quickstart::
|
|
4
|
+
|
|
5
|
+
import ragobserve
|
|
6
|
+
ragobserve.init(project="contract-rag") # local ./ragobserve.db
|
|
7
|
+
# or: ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
|
|
8
|
+
|
|
9
|
+
with ragobserve.trace("query", query="What is the notice period?"):
|
|
10
|
+
ragobserve.log_retrieval(query, results, retriever="qdrant")
|
|
11
|
+
ragobserve.log_rerank(before, after, model="bge-reranker")
|
|
12
|
+
ragobserve.log_context(final_prompt, system_prompt=sys, chunks=chunks)
|
|
13
|
+
ragobserve.log_generation(model="gpt-4o", response=answer, cost=0.002)
|
|
14
|
+
|
|
15
|
+
Then ``ragobserve ui`` to explore the dashboard.
|
|
16
|
+
"""
|
|
17
|
+
from .adapters.langchain import (
|
|
18
|
+
instrument_compressor,
|
|
19
|
+
instrument_embeddings,
|
|
20
|
+
instrument_loader,
|
|
21
|
+
instrument_splitter,
|
|
22
|
+
)
|
|
23
|
+
from .adapters.vectordb import (
|
|
24
|
+
instrument_chroma,
|
|
25
|
+
instrument_milvus,
|
|
26
|
+
instrument_pinecone,
|
|
27
|
+
instrument_qdrant,
|
|
28
|
+
instrument_weaviate,
|
|
29
|
+
log_pgvector,
|
|
30
|
+
)
|
|
31
|
+
from .client import flush, get_client, init
|
|
32
|
+
from .events import Chunk, RagEvent, Stage
|
|
33
|
+
from .tracing import (
|
|
34
|
+
current_trace_id,
|
|
35
|
+
log_chunks,
|
|
36
|
+
log_context,
|
|
37
|
+
log_embedding,
|
|
38
|
+
log_fusion,
|
|
39
|
+
log_generation,
|
|
40
|
+
log_ground_truth,
|
|
41
|
+
log_ingestion,
|
|
42
|
+
log_rerank,
|
|
43
|
+
log_retrieval,
|
|
44
|
+
trace,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
__version__ = "0.2.0"
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
"init", "flush", "get_client", "trace", "current_trace_id",
|
|
51
|
+
"log_ingestion", "log_chunks", "log_embedding", "log_retrieval", "log_fusion",
|
|
52
|
+
"log_rerank", "log_context", "log_generation", "log_ground_truth",
|
|
53
|
+
"instrument_chroma", "instrument_pinecone", "instrument_qdrant",
|
|
54
|
+
"instrument_weaviate", "instrument_milvus", "log_pgvector",
|
|
55
|
+
"instrument_splitter", "instrument_embeddings", "instrument_loader",
|
|
56
|
+
"instrument_compressor",
|
|
57
|
+
"RagEvent", "Chunk", "Stage", "__version__",
|
|
58
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Diagnostics for the framework adapters.
|
|
2
|
+
|
|
3
|
+
Adapters hook into LangChain / LlamaIndex internals (callback signatures,
|
|
4
|
+
instrumentation event names, expected methods). Those move between framework
|
|
5
|
+
versions, and when they do the failure is silent — a stage just stops being
|
|
6
|
+
captured. These helpers turn that silence into a visible ``RagObserveWarning`` so
|
|
7
|
+
version drift is noticed instead of producing empty dashboards.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
from typing import Iterable
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RagObserveWarning(UserWarning):
|
|
16
|
+
"""Emitted when an adapter can't hook something it expected to."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def warn(message: str) -> None:
|
|
20
|
+
warnings.warn(f"[ragobserve] {message}", RagObserveWarning, stacklevel=3)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def require_methods(obj: object, methods: Iterable[str], what: str) -> None:
|
|
24
|
+
"""Warn if ``obj`` is missing every one of ``methods`` (so the wrapper would
|
|
25
|
+
silently capture nothing). ``methods`` is treated as "at least one must
|
|
26
|
+
exist"."""
|
|
27
|
+
present = [m for m in methods if callable(getattr(obj, m, None))]
|
|
28
|
+
if not present:
|
|
29
|
+
warn(
|
|
30
|
+
f"{what}: {type(obj).__name__} has none of {list(methods)} — "
|
|
31
|
+
f"that stage will not be captured (framework version drift?)"
|
|
32
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Framework + vector-DB adapters."""
|
|
2
|
+
from .langchain import (
|
|
3
|
+
RagObserveCallbackHandler,
|
|
4
|
+
instrument_compressor,
|
|
5
|
+
instrument_embeddings,
|
|
6
|
+
instrument_loader,
|
|
7
|
+
instrument_splitter,
|
|
8
|
+
)
|
|
9
|
+
from .vectordb import (
|
|
10
|
+
instrument_chroma,
|
|
11
|
+
instrument_milvus,
|
|
12
|
+
instrument_pinecone,
|
|
13
|
+
instrument_qdrant,
|
|
14
|
+
instrument_weaviate,
|
|
15
|
+
log_pgvector,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"instrument_chroma", "instrument_pinecone", "instrument_qdrant",
|
|
20
|
+
"instrument_weaviate", "instrument_milvus", "log_pgvector",
|
|
21
|
+
"instrument_splitter", "instrument_embeddings", "instrument_loader",
|
|
22
|
+
"instrument_compressor", "RagObserveCallbackHandler",
|
|
23
|
+
]
|