ragobserve 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ragobserve-0.2.0/PKG-INFO +192 -0
  2. ragobserve-0.2.0/README.md +163 -0
  3. ragobserve-0.2.0/pyproject.toml +50 -0
  4. ragobserve-0.2.0/ragobserve/__init__.py +58 -0
  5. ragobserve-0.2.0/ragobserve/_diag.py +32 -0
  6. ragobserve-0.2.0/ragobserve/adapters/__init__.py +23 -0
  7. ragobserve-0.2.0/ragobserve/adapters/langchain.py +365 -0
  8. ragobserve-0.2.0/ragobserve/adapters/llamaindex.py +430 -0
  9. ragobserve-0.2.0/ragobserve/adapters/vectordb.py +317 -0
  10. ragobserve-0.2.0/ragobserve/cli.py +62 -0
  11. ragobserve-0.2.0/ragobserve/client.py +118 -0
  12. ragobserve-0.2.0/ragobserve/events.py +120 -0
  13. ragobserve-0.2.0/ragobserve/server/__init__.py +0 -0
  14. ragobserve-0.2.0/ragobserve/server/api.py +181 -0
  15. ragobserve-0.2.0/ragobserve/server/app.py +55 -0
  16. ragobserve-0.2.0/ragobserve/server/db.py +499 -0
  17. ragobserve-0.2.0/ragobserve/server/llm.py +234 -0
  18. ragobserve-0.2.0/ragobserve/server/metrics.py +105 -0
  19. ragobserve-0.2.0/ragobserve/server/pricing.py +87 -0
  20. ragobserve-0.2.0/ragobserve/server/static/app.js +86 -0
  21. ragobserve-0.2.0/ragobserve/server/static/charts.js +118 -0
  22. ragobserve-0.2.0/ragobserve/server/static/style.css +169 -0
  23. ragobserve-0.2.0/ragobserve/server/templates/base.html +29 -0
  24. ragobserve-0.2.0/ragobserve/server/templates/chunks.html +49 -0
  25. ragobserve-0.2.0/ragobserve/server/templates/dashboard.html +29 -0
  26. ragobserve-0.2.0/ragobserve/server/templates/generations.html +126 -0
  27. ragobserve-0.2.0/ragobserve/server/templates/metrics.html +50 -0
  28. ragobserve-0.2.0/ragobserve/server/templates/trace_detail.html +227 -0
  29. ragobserve-0.2.0/ragobserve/server/templates/traces.html +42 -0
  30. ragobserve-0.2.0/ragobserve/storage.py +60 -0
  31. ragobserve-0.2.0/ragobserve/tracing.py +201 -0
  32. ragobserve-0.2.0/ragobserve.egg-info/PKG-INFO +192 -0
  33. ragobserve-0.2.0/ragobserve.egg-info/SOURCES.txt +42 -0
  34. ragobserve-0.2.0/ragobserve.egg-info/dependency_links.txt +1 -0
  35. ragobserve-0.2.0/ragobserve.egg-info/entry_points.txt +2 -0
  36. ragobserve-0.2.0/ragobserve.egg-info/requires.txt +18 -0
  37. ragobserve-0.2.0/ragobserve.egg-info/top_level.txt +1 -0
  38. ragobserve-0.2.0/setup.cfg +4 -0
  39. ragobserve-0.2.0/tests/test_adapter_dispatch.py +201 -0
  40. ragobserve-0.2.0/tests/test_adapters.py +90 -0
  41. ragobserve-0.2.0/tests/test_api.py +82 -0
  42. ragobserve-0.2.0/tests/test_events.py +30 -0
  43. ragobserve-0.2.0/tests/test_metrics.py +45 -0
  44. ragobserve-0.2.0/tests/test_sdk.py +80 -0
@@ -0,0 +1,192 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragobserve
3
+ Version: 0.2.0
4
+ Summary: RAGObserve: local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG.
5
+ Author-email: Pranesh <praneshmadhan646@gmail.com>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/Pranesh-2005/ragobserve
8
+ Keywords: rag,observability,tracing,retrieval,llm,evaluation
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development :: Debuggers
13
+ Requires-Python: >=3.9
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: fastapi>=0.100
16
+ Requires-Dist: uvicorn>=0.23
17
+ Requires-Dist: jinja2>=3.1
18
+ Requires-Dist: pydantic>=2.0
19
+ Requires-Dist: httpx>=0.24
20
+ Provides-Extra: langchain
21
+ Requires-Dist: langchain-core>=0.1; extra == "langchain"
22
+ Provides-Extra: llamaindex
23
+ Requires-Dist: llama-index-core>=0.10; extra == "llamaindex"
24
+ Provides-Extra: llm
25
+ Requires-Dist: anthropic>=0.40; extra == "llm"
26
+ Requires-Dist: openai>=1.0; extra == "llm"
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=7.0; extra == "dev"
29
+
30
+ # RAGObserve
31
+
32
+ **Local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG.**
33
+
34
+ Unlike general LLM observability tools, RAGObserve focuses on the *retrieval lifecycle*:
35
+
36
+ ```
37
+ documents → chunking → embedding → indexing → retrieval → fusion
38
+ → reranking → context assembly → generation → grounding
39
+ ```
40
+
41
+ It is framework-agnostic (a universal RAG event model, not LangChain hooks), provider-agnostic, vector-DB-agnostic, and stores everything in a single local SQLite file inside a hidden `./.ragobserve/` folder (like `.git`) — no servers, no accounts.
42
+
43
+ ## Install
44
+
45
+ ```bash
46
+ pip install ragobserve # or: uv tool install ragobserve
47
+ pip install ragobserve[langchain] # optional LangChain auto-instrumentation
48
+ pip install ragobserve[llamaindex] # optional LlamaIndex auto-instrumentation
49
+ ```
50
+
51
+ ## Quickstart
52
+
53
+ Instrument your RAG code (writes to a hidden `./.ragobserve/ragobserve.db`, no server needed):
54
+
55
+ ```python
56
+ import ragobserve
57
+
58
+ ragobserve.init(project="contract-rag")
59
+ # or point at a running server:
60
+ # ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
61
+
62
+ with ragobserve.trace("query", query=question):
63
+ ragobserve.log_retrieval(question, results, retriever="qdrant", duration_ms=23)
64
+ ragobserve.log_rerank(before, after, model="bge-reranker")
65
+ ragobserve.log_context(final_prompt, system_prompt=sys, chunks=top_chunks, context_window=8192)
66
+ ragobserve.log_generation(model="gpt-4o", prompt=final_prompt, response=answer, cost=0.002)
67
+ ```
68
+
69
+ Decorator and nesting also work:
70
+
71
+ ```python
72
+ @ragobserve.trace
73
+ def retrieve(query): ...
74
+ ```
75
+
76
+ Then explore:
77
+
78
+ ```bash
79
+ ragobserve ui # http://127.0.0.1:5601
80
+ ```
81
+
82
+ ## Dashboard
83
+
84
+ - **Query Explorer** — every query with latency, cost, retriever, model, chunk count
85
+ - **Trace waterfall** — the full pipeline per query, stage by stage
86
+ - **Retrieval Explorer** — retrieved chunks with scores, ranks, metadata
87
+ - **Hybrid Search Explorer** — BM25 vs vector vs fused results
88
+ - **Reranker Analytics** — before/after with rank shifts and Kendall's τ
89
+ - **Context Builder Viewer** — exactly what was sent to the model, DevTools-style
90
+ - **Chunk Explorer** — most retrieved / never retrieved (dead) / duplicate chunks
91
+ - **Metrics** — Precision@k, Recall@k, MRR, nDCG over logged ground truth, plus chunk utilization
92
+ - **Generations & cost** — Langfuse-style cost tracing: per-model / per-day token & $ breakdowns, charts, and the context that produced each generation. Costs are auto-backfilled from a built-in price book when you don't pass `cost=`.
93
+
94
+ ## LLM generation & live replay
95
+
96
+ RAGObserve ships a zero-SDK, httpx-based provider layer covering **11 providers** — Anthropic, OpenAI, Gemini, Groq, OpenRouter, Together, Mistral, DeepSeek, Fireworks, Perplexity, Ollama. From any trace's **Generation** / **Context** view you can *replay* the captured context against a live provider (when its API key is set) and the new generation is logged back into the trace with its cost.
97
+
98
+ ```bash
99
+ ragobserve providers # list providers and which have keys configured
100
+ ```
101
+
102
+ ## Framework adapters
103
+
104
+ Full pipeline — ingest *and* query — is captured.
105
+
106
+ ### LangChain
107
+
108
+ ```python
109
+ from ragobserve.adapters import (
110
+ RagObserveCallbackHandler,
111
+ instrument_loader, instrument_splitter, instrument_embeddings,
112
+ )
113
+
114
+ # query-time: retrieval + generation (+ model, token usage, cost) via the handler
115
+ chain.invoke(q, config={"callbacks": [RagObserveCallbackHandler()]})
116
+
117
+ # ingest-time: loaders/splitters/embeddings emit no callbacks, so wrap them
118
+ loader = instrument_loader(PyPDFLoader("contract.pdf")) # → ingestion event
119
+ splitter = instrument_splitter(RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50))
120
+ emb = instrument_embeddings(OpenAIEmbeddings()) # real Embeddings subclass — FAISS-safe
121
+
122
+ docs = loader.load()
123
+ chunks = splitter.split_documents(docs) # → chunking event (split_documents/split_text/create_documents/transform_documents)
124
+ FAISS.from_documents(chunks, emb) # embed_documents → embedding event
125
+ ```
126
+
127
+ `instrument_embeddings` returns a true `Embeddings` subclass, so vector stores that `isinstance`-check it (FAISS, etc.) keep working; async `aembed_*` is covered via the base class. The callback handler reads token usage from both `llm_output` and chat-message `usage_metadata`. For reranking, `instrument_compressor(CrossEncoderReranker(...))` returns a real `BaseDocumentCompressor` subclass (so `ContextualCompressionRetriever` still validates it) and logs before/after on `compress_documents` — the one RAG step LangChain fires no callback for. The handler also emits **context_assembly** automatically (the prompt sent to the model is the assembled context — no manual `log_context` needed).
128
+
129
+ If a framework version moves an API the adapters hook, the wrappers emit a `RagObserveWarning` ("…not captured (version drift?)") instead of silently logging nothing.
130
+
131
+ ### LlamaIndex
132
+
133
+ ```python
134
+ from ragobserve.adapters.llamaindex import register
135
+ register() # ONE call instruments the global dispatcher — ingest + query
136
+ ```
137
+
138
+ Hooks LlamaIndex's instrumentation dispatcher, so it captures every stage with no code changes:
139
+
140
+ - **embedding** (`EmbeddingEndEvent`, incl. sparse) — model + dimensions
141
+ - **chunking** — derived from the ingest embedding batch (LlamaIndex emits no node-parsing event)
142
+ - **retrieval** (`RetrievalEndEvent`) — at the retriever layer, so **all 80+ vector stores** (Chroma/Pinecone/Qdrant/Milvus/Weaviate/…) are covered transitively
143
+ - **reranking** — `StructuredLLMRerank` fires `ReRankEndEvent` automatically; most rerankers (`SentenceTransformerRerank`, Cohere, `LLMRerank`) emit **no** event, so wrap them: `instrument_postprocessor(SentenceTransformerRerank(...))` → logs before/after, model, top_n
144
+ - **context_assembly** (`GetResponseStartEvent`) — the exact context handed to the LLM during synthesis
145
+ - **generation** (`LLMChat/CompletionEndEvent`) — model, prompt/response, tokens → **cost**
146
+ - **boundaries** — query engines (`QueryStart/End`) and chat engines (`StreamChat*`, `AgentChatWithStep*`, incl. streamed deltas), de-duplicated against the LLM events
147
+
148
+ | Stage | LangChain | LlamaIndex |
149
+ |---|---|---|
150
+ | ingestion | `instrument_loader` | (via pipeline) |
151
+ | chunking | `instrument_splitter` | auto |
152
+ | embedding | `instrument_embeddings` | auto |
153
+ | retrieval | auto (callback) | auto |
154
+ | reranking | `instrument_compressor` (or `log_rerank`) | auto |
155
+ | context assembly | auto (handler) | auto |
156
+ | generation + cost | auto | auto |
157
+ | query / chat boundary | auto (chain) | auto |
158
+
159
+ ## Vector database integrations
160
+
161
+ Wrap a live client once; every query is logged as a retrieval event automatically — no manual `log_retrieval` calls. Duck-typed, so importing these never requires the DB package installed.
162
+
163
+ ```python
164
+ import ragobserve
165
+ ragobserve.init(project="my-rag")
166
+
167
+ col = ragobserve.instrument_chroma(chroma_collection) # .query
168
+ idx = ragobserve.instrument_pinecone(pinecone_index) # .query
169
+ qc = ragobserve.instrument_qdrant(qdrant_client) # .search / .query_points
170
+ wv = ragobserve.instrument_weaviate(weaviate_collection) # .query.near_vector/near_text/hybrid/bm25
171
+ mv = ragobserve.instrument_milvus(milvus_collection) # .search (ORM + MilvusClient)
172
+
173
+ # pgvector has no client to proxy — run your SQL, pass the rows:
174
+ rows = cur.fetchall() # ORDER BY embedding <=> %s LIMIT k
175
+ ragobserve.log_pgvector(query, rows)
176
+ ```
177
+
178
+ RAGObserve is vector-DB-agnostic: the `retriever` label is free-text, so **any** store works (FAISS, Elasticsearch, OpenSearch, pgvector, …) even without a dedicated wrapper — just pass results to `ragobserve.log_retrieval(query, results, retriever="...")`.
179
+
180
+ ## Try the demo
181
+
182
+ ```bash
183
+ python examples/demo_rag.py
184
+ ragobserve ui
185
+ ```
186
+
187
+ ## Development
188
+
189
+ ```bash
190
+ pip install -e .[dev]
191
+ pytest
192
+ ```
@@ -0,0 +1,163 @@
1
+ # RAGObserve
2
+
3
+ **Local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG.**
4
+
5
+ Unlike general LLM observability tools, RAGObserve focuses on the *retrieval lifecycle*:
6
+
7
+ ```
8
+ documents → chunking → embedding → indexing → retrieval → fusion
9
+ → reranking → context assembly → generation → grounding
10
+ ```
11
+
12
+ It is framework-agnostic (a universal RAG event model, not LangChain hooks), provider-agnostic, vector-DB-agnostic, and stores everything in a single local SQLite file inside a hidden `./.ragobserve/` folder (like `.git`) — no servers, no accounts.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install ragobserve # or: uv tool install ragobserve
18
+ pip install ragobserve[langchain] # optional LangChain auto-instrumentation
19
+ pip install ragobserve[llamaindex] # optional LlamaIndex auto-instrumentation
20
+ ```
21
+
22
+ ## Quickstart
23
+
24
+ Instrument your RAG code (writes to a hidden `./.ragobserve/ragobserve.db`, no server needed):
25
+
26
+ ```python
27
+ import ragobserve
28
+
29
+ ragobserve.init(project="contract-rag")
30
+ # or point at a running server:
31
+ # ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
32
+
33
+ with ragobserve.trace("query", query=question):
34
+ ragobserve.log_retrieval(question, results, retriever="qdrant", duration_ms=23)
35
+ ragobserve.log_rerank(before, after, model="bge-reranker")
36
+ ragobserve.log_context(final_prompt, system_prompt=sys, chunks=top_chunks, context_window=8192)
37
+ ragobserve.log_generation(model="gpt-4o", prompt=final_prompt, response=answer, cost=0.002)
38
+ ```
39
+
40
+ Decorator and nesting also work:
41
+
42
+ ```python
43
+ @ragobserve.trace
44
+ def retrieve(query): ...
45
+ ```
46
+
47
+ Then explore:
48
+
49
+ ```bash
50
+ ragobserve ui # http://127.0.0.1:5601
51
+ ```
52
+
53
+ ## Dashboard
54
+
55
+ - **Query Explorer** — every query with latency, cost, retriever, model, chunk count
56
+ - **Trace waterfall** — the full pipeline per query, stage by stage
57
+ - **Retrieval Explorer** — retrieved chunks with scores, ranks, metadata
58
+ - **Hybrid Search Explorer** — BM25 vs vector vs fused results
59
+ - **Reranker Analytics** — before/after with rank shifts and Kendall's τ
60
+ - **Context Builder Viewer** — exactly what was sent to the model, DevTools-style
61
+ - **Chunk Explorer** — most retrieved / never retrieved (dead) / duplicate chunks
62
+ - **Metrics** — Precision@k, Recall@k, MRR, nDCG over logged ground truth, plus chunk utilization
63
+ - **Generations & cost** — Langfuse-style cost tracing: per-model / per-day token & $ breakdowns, charts, and the context that produced each generation. Costs are auto-backfilled from a built-in price book when you don't pass `cost=`.
64
+
65
+ ## LLM generation & live replay
66
+
67
+ RAGObserve ships a zero-SDK, httpx-based provider layer covering **11 providers** — Anthropic, OpenAI, Gemini, Groq, OpenRouter, Together, Mistral, DeepSeek, Fireworks, Perplexity, Ollama. From any trace's **Generation** / **Context** view you can *replay* the captured context against a live provider (when its API key is set) and the new generation is logged back into the trace with its cost.
68
+
69
+ ```bash
70
+ ragobserve providers # list providers and which have keys configured
71
+ ```
72
+
73
+ ## Framework adapters
74
+
75
+ Full pipeline — ingest *and* query — is captured.
76
+
77
+ ### LangChain
78
+
79
+ ```python
80
+ from ragobserve.adapters import (
81
+ RagObserveCallbackHandler,
82
+ instrument_loader, instrument_splitter, instrument_embeddings,
83
+ )
84
+
85
+ # query-time: retrieval + generation (+ model, token usage, cost) via the handler
86
+ chain.invoke(q, config={"callbacks": [RagObserveCallbackHandler()]})
87
+
88
+ # ingest-time: loaders/splitters/embeddings emit no callbacks, so wrap them
89
+ loader = instrument_loader(PyPDFLoader("contract.pdf")) # → ingestion event
90
+ splitter = instrument_splitter(RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50))
91
+ emb = instrument_embeddings(OpenAIEmbeddings()) # real Embeddings subclass — FAISS-safe
92
+
93
+ docs = loader.load()
94
+ chunks = splitter.split_documents(docs) # → chunking event (split_documents/split_text/create_documents/transform_documents)
95
+ FAISS.from_documents(chunks, emb) # embed_documents → embedding event
96
+ ```
97
+
98
+ `instrument_embeddings` returns a true `Embeddings` subclass, so vector stores that `isinstance`-check it (FAISS, etc.) keep working; async `aembed_*` is covered via the base class. The callback handler reads token usage from both `llm_output` and chat-message `usage_metadata`. For reranking, `instrument_compressor(CrossEncoderReranker(...))` returns a real `BaseDocumentCompressor` subclass (so `ContextualCompressionRetriever` still validates it) and logs before/after on `compress_documents` — the one RAG step LangChain fires no callback for. The handler also emits **context_assembly** automatically (the prompt sent to the model is the assembled context — no manual `log_context` needed).
99
+
100
+ If a framework version moves an API the adapters hook, the wrappers emit a `RagObserveWarning` ("…not captured (version drift?)") instead of silently logging nothing.
101
+
102
+ ### LlamaIndex
103
+
104
+ ```python
105
+ from ragobserve.adapters.llamaindex import register
106
+ register() # ONE call instruments the global dispatcher — ingest + query
107
+ ```
108
+
109
+ Hooks LlamaIndex's instrumentation dispatcher, so it captures every stage with no code changes:
110
+
111
+ - **embedding** (`EmbeddingEndEvent`, incl. sparse) — model + dimensions
112
+ - **chunking** — derived from the ingest embedding batch (LlamaIndex emits no node-parsing event)
113
+ - **retrieval** (`RetrievalEndEvent`) — at the retriever layer, so **all 80+ vector stores** (Chroma/Pinecone/Qdrant/Milvus/Weaviate/…) are covered transitively
114
+ - **reranking** — `StructuredLLMRerank` fires `ReRankEndEvent` automatically; most rerankers (`SentenceTransformerRerank`, Cohere, `LLMRerank`) emit **no** event, so wrap them: `instrument_postprocessor(SentenceTransformerRerank(...))` → logs before/after, model, top_n
115
+ - **context_assembly** (`GetResponseStartEvent`) — the exact context handed to the LLM during synthesis
116
+ - **generation** (`LLMChat/CompletionEndEvent`) — model, prompt/response, tokens → **cost**
117
+ - **boundaries** — query engines (`QueryStart/End`) and chat engines (`StreamChat*`, `AgentChatWithStep*`, incl. streamed deltas), de-duplicated against the LLM events
118
+
119
+ | Stage | LangChain | LlamaIndex |
120
+ |---|---|---|
121
+ | ingestion | `instrument_loader` | (via pipeline) |
122
+ | chunking | `instrument_splitter` | auto |
123
+ | embedding | `instrument_embeddings` | auto |
124
+ | retrieval | auto (callback) | auto |
125
+ | reranking | `instrument_compressor` (or `log_rerank`) | auto |
126
+ | context assembly | auto (handler) | auto |
127
+ | generation + cost | auto | auto |
128
+ | query / chat boundary | auto (chain) | auto |
129
+
130
+ ## Vector database integrations
131
+
132
+ Wrap a live client once; every query is logged as a retrieval event automatically — no manual `log_retrieval` calls. Duck-typed, so importing these never requires the DB package installed.
133
+
134
+ ```python
135
+ import ragobserve
136
+ ragobserve.init(project="my-rag")
137
+
138
+ col = ragobserve.instrument_chroma(chroma_collection) # .query
139
+ idx = ragobserve.instrument_pinecone(pinecone_index) # .query
140
+ qc = ragobserve.instrument_qdrant(qdrant_client) # .search / .query_points
141
+ wv = ragobserve.instrument_weaviate(weaviate_collection) # .query.near_vector/near_text/hybrid/bm25
142
+ mv = ragobserve.instrument_milvus(milvus_collection) # .search (ORM + MilvusClient)
143
+
144
+ # pgvector has no client to proxy — run your SQL, pass the rows:
145
+ rows = cur.fetchall() # ORDER BY embedding <=> %s LIMIT k
146
+ ragobserve.log_pgvector(query, rows)
147
+ ```
148
+
149
+ RAGObserve is vector-DB-agnostic: the `retriever` label is free-text, so **any** store works (FAISS, Elasticsearch, OpenSearch, pgvector, …) even without a dedicated wrapper — just pass results to `ragobserve.log_retrieval(query, results, retriever="...")`.
150
+
151
+ ## Try the demo
152
+
153
+ ```bash
154
+ python examples/demo_rag.py
155
+ ragobserve ui
156
+ ```
157
+
158
+ ## Development
159
+
160
+ ```bash
161
+ pip install -e .[dev]
162
+ pytest
163
+ ```
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ragobserve"
7
+ version = "0.2.0"
8
+ description = "RAGObserve: local-first observability, debugging and evaluation for RAG systems. The MLflow for RAG."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "Apache-2.0" }
12
+ authors = [{ name = "Pranesh", email = "praneshmadhan646@gmail.com" }]
13
+ keywords = ["rag", "observability", "tracing", "retrieval", "llm", "evaluation"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Topic :: Software Development :: Debuggers",
19
+ ]
20
+ dependencies = [
21
+ "fastapi>=0.100",
22
+ "uvicorn>=0.23",
23
+ "jinja2>=3.1",
24
+ "pydantic>=2.0",
25
+ "httpx>=0.24",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ langchain = ["langchain-core>=0.1"]
30
+ llamaindex = ["llama-index-core>=0.10"]
31
+ # Live generation replay works out of the box via httpx (OpenAI-compatible
32
+ # providers + Anthropic + Ollama), so no extra deps are strictly required.
33
+ # Install these only if you prefer the official vendor SDKs elsewhere.
34
+ llm = ["anthropic>=0.40", "openai>=1.0"]
35
+ dev = ["pytest>=7.0"]
36
+
37
+ [project.scripts]
38
+ ragobserve = "ragobserve.cli:main"
39
+
40
+ [project.urls]
41
+ Homepage = "https://github.com/Pranesh-2005/ragobserve"
42
+
43
+ [tool.pytest.ini_options]
44
+ testpaths = ["tests"]
45
+
46
+ [tool.setuptools.packages.find]
47
+ include = ["ragobserve*"]
48
+
49
+ [tool.setuptools.package-data]
50
+ "ragobserve.server" = ["templates/*.html", "static/*.css", "static/*.js"]
@@ -0,0 +1,58 @@
1
+ """RAGObserve — local-first observability for RAG systems.
2
+
3
+ Quickstart::
4
+
5
+ import ragobserve
6
+ ragobserve.init(project="contract-rag") # local ./ragobserve.db
7
+ # or: ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
8
+
9
+ with ragobserve.trace("query", query="What is the notice period?"):
10
+ ragobserve.log_retrieval(query, results, retriever="qdrant")
11
+ ragobserve.log_rerank(before, after, model="bge-reranker")
12
+ ragobserve.log_context(final_prompt, system_prompt=sys, chunks=chunks)
13
+ ragobserve.log_generation(model="gpt-4o", response=answer, cost=0.002)
14
+
15
+ Then ``ragobserve ui`` to explore the dashboard.
16
+ """
17
+ from .adapters.langchain import (
18
+ instrument_compressor,
19
+ instrument_embeddings,
20
+ instrument_loader,
21
+ instrument_splitter,
22
+ )
23
+ from .adapters.vectordb import (
24
+ instrument_chroma,
25
+ instrument_milvus,
26
+ instrument_pinecone,
27
+ instrument_qdrant,
28
+ instrument_weaviate,
29
+ log_pgvector,
30
+ )
31
+ from .client import flush, get_client, init
32
+ from .events import Chunk, RagEvent, Stage
33
+ from .tracing import (
34
+ current_trace_id,
35
+ log_chunks,
36
+ log_context,
37
+ log_embedding,
38
+ log_fusion,
39
+ log_generation,
40
+ log_ground_truth,
41
+ log_ingestion,
42
+ log_rerank,
43
+ log_retrieval,
44
+ trace,
45
+ )
46
+
47
+ __version__ = "0.2.0"
48
+
49
+ __all__ = [
50
+ "init", "flush", "get_client", "trace", "current_trace_id",
51
+ "log_ingestion", "log_chunks", "log_embedding", "log_retrieval", "log_fusion",
52
+ "log_rerank", "log_context", "log_generation", "log_ground_truth",
53
+ "instrument_chroma", "instrument_pinecone", "instrument_qdrant",
54
+ "instrument_weaviate", "instrument_milvus", "log_pgvector",
55
+ "instrument_splitter", "instrument_embeddings", "instrument_loader",
56
+ "instrument_compressor",
57
+ "RagEvent", "Chunk", "Stage", "__version__",
58
+ ]
@@ -0,0 +1,32 @@
1
+ """Diagnostics for the framework adapters.
2
+
3
+ Adapters hook into LangChain / LlamaIndex internals (callback signatures,
4
+ instrumentation event names, expected methods). Those move between framework
5
+ versions, and when they do the failure is silent — a stage just stops being
6
+ captured. These helpers turn that silence into a visible ``RagObserveWarning`` so
7
+ version drift is noticed instead of producing empty dashboards.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import warnings
12
+ from typing import Iterable
13
+
14
+
15
+ class RagObserveWarning(UserWarning):
16
+ """Emitted when an adapter can't hook something it expected to."""
17
+
18
+
19
+ def warn(message: str) -> None:
20
+ warnings.warn(f"[ragobserve] {message}", RagObserveWarning, stacklevel=3)
21
+
22
+
23
+ def require_methods(obj: object, methods: Iterable[str], what: str) -> None:
24
+ """Warn if ``obj`` is missing every one of ``methods`` (so the wrapper would
25
+ silently capture nothing). ``methods`` is treated as "at least one must
26
+ exist"."""
27
+ present = [m for m in methods if callable(getattr(obj, m, None))]
28
+ if not present:
29
+ warn(
30
+ f"{what}: {type(obj).__name__} has none of {list(methods)} — "
31
+ f"that stage will not be captured (framework version drift?)"
32
+ )
@@ -0,0 +1,23 @@
1
+ """Framework + vector-DB adapters."""
2
+ from .langchain import (
3
+ RagObserveCallbackHandler,
4
+ instrument_compressor,
5
+ instrument_embeddings,
6
+ instrument_loader,
7
+ instrument_splitter,
8
+ )
9
+ from .vectordb import (
10
+ instrument_chroma,
11
+ instrument_milvus,
12
+ instrument_pinecone,
13
+ instrument_qdrant,
14
+ instrument_weaviate,
15
+ log_pgvector,
16
+ )
17
+
18
+ __all__ = [
19
+ "instrument_chroma", "instrument_pinecone", "instrument_qdrant",
20
+ "instrument_weaviate", "instrument_milvus", "log_pgvector",
21
+ "instrument_splitter", "instrument_embeddings", "instrument_loader",
22
+ "instrument_compressor", "RagObserveCallbackHandler",
23
+ ]