ragobserve 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragobserve/__init__.py ADDED
@@ -0,0 +1,58 @@
1
+ """RAGObserve — local-first observability for RAG systems.
2
+
3
+ Quickstart::
4
+
5
+ import ragobserve
6
+ ragobserve.init(project="contract-rag") # local ./ragobserve.db
7
+ # or: ragobserve.init(project="contract-rag", tracking_uri="http://localhost:5601")
8
+
9
+ with ragobserve.trace("query", query="What is the notice period?"):
10
+ ragobserve.log_retrieval(query, results, retriever="qdrant")
11
+ ragobserve.log_rerank(before, after, model="bge-reranker")
12
+ ragobserve.log_context(final_prompt, system_prompt=sys, chunks=chunks)
13
+ ragobserve.log_generation(model="gpt-4o", response=answer, cost=0.002)
14
+
15
+ Then ``ragobserve ui`` to explore the dashboard.
16
+ """
17
+ from .adapters.langchain import (
18
+ instrument_compressor,
19
+ instrument_embeddings,
20
+ instrument_loader,
21
+ instrument_splitter,
22
+ )
23
+ from .adapters.vectordb import (
24
+ instrument_chroma,
25
+ instrument_milvus,
26
+ instrument_pinecone,
27
+ instrument_qdrant,
28
+ instrument_weaviate,
29
+ log_pgvector,
30
+ )
31
+ from .client import flush, get_client, init
32
+ from .events import Chunk, RagEvent, Stage
33
+ from .tracing import (
34
+ current_trace_id,
35
+ log_chunks,
36
+ log_context,
37
+ log_embedding,
38
+ log_fusion,
39
+ log_generation,
40
+ log_ground_truth,
41
+ log_ingestion,
42
+ log_rerank,
43
+ log_retrieval,
44
+ trace,
45
+ )
46
+
47
+ __version__ = "0.2.0"
48
+
49
+ __all__ = [
50
+ "init", "flush", "get_client", "trace", "current_trace_id",
51
+ "log_ingestion", "log_chunks", "log_embedding", "log_retrieval", "log_fusion",
52
+ "log_rerank", "log_context", "log_generation", "log_ground_truth",
53
+ "instrument_chroma", "instrument_pinecone", "instrument_qdrant",
54
+ "instrument_weaviate", "instrument_milvus", "log_pgvector",
55
+ "instrument_splitter", "instrument_embeddings", "instrument_loader",
56
+ "instrument_compressor",
57
+ "RagEvent", "Chunk", "Stage", "__version__",
58
+ ]
ragobserve/_diag.py ADDED
@@ -0,0 +1,32 @@
1
+ """Diagnostics for the framework adapters.
2
+
3
+ Adapters hook into LangChain / LlamaIndex internals (callback signatures,
4
+ instrumentation event names, expected methods). Those move between framework
5
+ versions, and when they do the failure is silent — a stage just stops being
6
+ captured. These helpers turn that silence into a visible ``RagObserveWarning`` so
7
+ version drift is noticed instead of producing empty dashboards.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import warnings
12
+ from typing import Iterable
13
+
14
+
15
+ class RagObserveWarning(UserWarning):
16
+ """Emitted when an adapter can't hook something it expected to."""
17
+
18
+
19
+ def warn(message: str) -> None:
20
+ warnings.warn(f"[ragobserve] {message}", RagObserveWarning, stacklevel=3)
21
+
22
+
23
+ def require_methods(obj: object, methods: Iterable[str], what: str) -> None:
24
+ """Warn if ``obj`` is missing every one of ``methods`` (so the wrapper would
25
+ silently capture nothing). ``methods`` is treated as "at least one must
26
+ exist"."""
27
+ present = [m for m in methods if callable(getattr(obj, m, None))]
28
+ if not present:
29
+ warn(
30
+ f"{what}: {type(obj).__name__} has none of {list(methods)} — "
31
+ f"that stage will not be captured (framework version drift?)"
32
+ )
@@ -0,0 +1,23 @@
1
+ """Framework + vector-DB adapters."""
2
+ from .langchain import (
3
+ RagObserveCallbackHandler,
4
+ instrument_compressor,
5
+ instrument_embeddings,
6
+ instrument_loader,
7
+ instrument_splitter,
8
+ )
9
+ from .vectordb import (
10
+ instrument_chroma,
11
+ instrument_milvus,
12
+ instrument_pinecone,
13
+ instrument_qdrant,
14
+ instrument_weaviate,
15
+ log_pgvector,
16
+ )
17
+
18
+ __all__ = [
19
+ "instrument_chroma", "instrument_pinecone", "instrument_qdrant",
20
+ "instrument_weaviate", "instrument_milvus", "log_pgvector",
21
+ "instrument_splitter", "instrument_embeddings", "instrument_loader",
22
+ "instrument_compressor", "RagObserveCallbackHandler",
23
+ ]
@@ -0,0 +1,365 @@
1
+ """LangChain adapter: a callback handler that converts LangChain run events
2
+ into the universal RAGObserve event model.
3
+
4
+ Usage::
5
+
6
+ from ragobserve.adapters.langchain import RagObserveCallbackHandler
7
+ chain.invoke(question, config={"callbacks": [RagObserveCallbackHandler()]})
8
+
9
+ Requires ``pip install ragobserve[langchain]``. The translation helpers below
10
+ are pure (dict in -> event dict out) so they are testable without LangChain.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import time
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from .. import client as _client
18
+ from .._diag import require_methods
19
+ from ..events import RagEvent, Stage, estimate_tokens, normalize_result
20
+ from .vectordb import _Proxy
21
+
22
+ try:
23
+ from langchain_core.callbacks import BaseCallbackHandler # type: ignore
24
+ except ImportError: # pragma: no cover - exercised only without the extra
25
+ BaseCallbackHandler = object
26
+
27
+ try:
28
+ from langchain_core.documents.compressor import BaseDocumentCompressor as _BaseCompressor # type: ignore
29
+ except ImportError: # pragma: no cover
30
+ _BaseCompressor = None
31
+
32
+
33
+ # --------------------------------------------------------------- pure mapping
34
+
35
+ def retrieval_event(query: str, documents: List[Any], trace_id: str,
36
+ parent_span_id: Optional[str], project: str,
37
+ start_time: float, retriever: Optional[str] = None) -> Dict[str, Any]:
38
+ results = [normalize_result(d) for d in documents]
39
+ for i, r in enumerate(results):
40
+ r.setdefault("rank", i + 1)
41
+ meta = r.get("metadata") or {}
42
+ r.setdefault("source", meta.get("source"))
43
+ ev = RagEvent(
44
+ trace_id=trace_id, parent_span_id=parent_span_id, project=project,
45
+ stage=Stage.RETRIEVAL.value, name=retriever or "langchain.retriever",
46
+ start_time=start_time,
47
+ attributes={"query": query, "results": results, "top_k": len(results), "retriever": retriever},
48
+ )
49
+ return ev.finish().model_dump()
50
+
51
+
52
+ def generation_event(prompts: List[str], response_text: str, model: Optional[str],
53
+ trace_id: str, parent_span_id: Optional[str], project: str,
54
+ start_time: float, token_usage: Optional[Dict[str, Any]] = None,
55
+ status: str = "ok") -> Dict[str, Any]:
56
+ usage = token_usage or {}
57
+ ev = RagEvent(
58
+ trace_id=trace_id, parent_span_id=parent_span_id, project=project,
59
+ stage=Stage.GENERATION.value, name=model or "langchain.llm",
60
+ start_time=start_time,
61
+ attributes={
62
+ "model": model,
63
+ "prompt": "\n\n".join(prompts),
64
+ "response": response_text,
65
+ "input_tokens": usage.get("prompt_tokens"),
66
+ "output_tokens": usage.get("completion_tokens"),
67
+ },
68
+ )
69
+ return ev.finish(status).model_dump()
70
+
71
+
72
+ # ------------------------------------------------------------------- handler
73
+
74
+ class RagObserveCallbackHandler(BaseCallbackHandler):
75
+ """Maps on_chain_* to trace boundaries, on_retriever_* to retrieval
76
+ events and on_llm_* to generation events."""
77
+
78
+ def __init__(self, project: Optional[str] = None):
79
+ if BaseCallbackHandler is object:
80
+ raise ImportError(
81
+ "LangChain is not installed. Run: pip install ragobserve[langchain]"
82
+ )
83
+ self.project = project or _client.get_project()
84
+ self._trace_id: Optional[str] = None
85
+ self._root_run: Optional[str] = None
86
+ self._starts: Dict[str, float] = {}
87
+ self._queries: Dict[str, str] = {}
88
+ self._trace_start: Optional[float] = None
89
+ self._query: Optional[str] = None
90
+
91
+ # -- chain = trace boundary -------------------------------------------
92
+ def on_chain_start(self, serialized, inputs, *, run_id, parent_run_id=None, **kw):
93
+ if parent_run_id is None and self._root_run is None:
94
+ self._root_run = str(run_id)
95
+ self._trace_id = RagEvent().trace_id
96
+ self._trace_start = time.time()
97
+ if isinstance(inputs, dict):
98
+ for key in ("question", "query", "input"):
99
+ if isinstance(inputs.get(key), str):
100
+ self._query = inputs[key]
101
+ break
102
+ elif isinstance(inputs, str):
103
+ self._query = inputs
104
+
105
+ def on_chain_end(self, outputs, *, run_id, parent_run_id=None, **kw):
106
+ if str(run_id) == self._root_run:
107
+ ev = RagEvent(
108
+ trace_id=self._trace_id, project=self.project,
109
+ stage=Stage.OTHER.value, name="langchain.chain",
110
+ start_time=self._trace_start or time.time(),
111
+ attributes={"query": self._query},
112
+ )
113
+ _client.get_client().log_event(ev.finish().model_dump())
114
+ self._root_run = None
115
+
116
+ def on_chain_error(self, error, *, run_id, parent_run_id=None, **kw):
117
+ if str(run_id) == self._root_run:
118
+ ev = RagEvent(
119
+ trace_id=self._trace_id, project=self.project,
120
+ stage=Stage.OTHER.value, name="langchain.chain",
121
+ start_time=self._trace_start or time.time(),
122
+ attributes={"query": self._query, "error": repr(error)},
123
+ )
124
+ _client.get_client().log_event(ev.finish("error").model_dump())
125
+ self._root_run = None
126
+
127
+ def _ensure_trace(self) -> str:
128
+ if self._trace_id is None:
129
+ self._trace_id = RagEvent().trace_id
130
+ return self._trace_id
131
+
132
+ # -- retriever ----------------------------------------------------------
133
+ def on_retriever_start(self, serialized, query, *, run_id, parent_run_id=None, **kw):
134
+ rid = str(run_id)
135
+ self._starts[rid] = time.time()
136
+ self._queries[rid] = query
137
+
138
+ def on_retriever_end(self, documents, *, run_id, parent_run_id=None, **kw):
139
+ rid = str(run_id)
140
+ ev = retrieval_event(
141
+ query=self._queries.pop(rid, ""),
142
+ documents=list(documents or []),
143
+ trace_id=self._ensure_trace(),
144
+ parent_span_id=None,
145
+ project=self.project,
146
+ start_time=self._starts.pop(rid, time.time()),
147
+ )
148
+ _client.get_client().log_event(ev)
149
+
150
+ # -- llm ------------------------------------------------------------------
151
+ def _log_context(self, final_prompt: str) -> None:
152
+ """The prompt sent to the model is exactly the assembled context, so
153
+ emit a context_assembly event — no manual ``log_context`` needed."""
154
+ ev = RagEvent(
155
+ trace_id=self._ensure_trace(), project=self.project,
156
+ stage=Stage.CONTEXT_ASSEMBLY.value, name="langchain.prompt",
157
+ attributes={"final_prompt": final_prompt, "query": None,
158
+ "system_prompt": None, "chunks": [],
159
+ "token_count": estimate_tokens(final_prompt),
160
+ "context_window": None},
161
+ )
162
+ _client.get_client().log_event(ev.finish().model_dump())
163
+
164
+ def on_llm_start(self, serialized, prompts, *, run_id, parent_run_id=None, **kw):
165
+ rid = str(run_id)
166
+ self._starts[rid] = time.time()
167
+ self._queries[rid] = "\n\n".join(prompts)
168
+ self._log_context(self._queries[rid])
169
+
170
+ def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id=None, **kw):
171
+ rid = str(run_id)
172
+ self._starts[rid] = time.time()
173
+ flat = []
174
+ for batch in messages:
175
+ for m in batch:
176
+ flat.append(f"{getattr(m, 'type', 'msg')}: {getattr(m, 'content', m)}")
177
+ self._queries[rid] = "\n".join(flat)
178
+ self._log_context(self._queries[rid])
179
+
180
+ def on_llm_end(self, response, *, run_id, parent_run_id=None, **kw):
181
+ rid = str(run_id)
182
+ text = ""
183
+ model = None
184
+ usage = None
185
+ try:
186
+ out = getattr(response, "llm_output", None) or {}
187
+ model = out.get("model_name") or out.get("model")
188
+ usage = out.get("token_usage") or out.get("usage")
189
+ gen = response.generations[0][0]
190
+ msg = getattr(gen, "message", None)
191
+ text = getattr(gen, "text", "") or getattr(msg, "content", "")
192
+ # chat models put usage on the message, not llm_output
193
+ meta = getattr(msg, "response_metadata", None) or {}
194
+ model = model or meta.get("model_name") or meta.get("model")
195
+ usage = usage or meta.get("token_usage")
196
+ um = getattr(msg, "usage_metadata", None)
197
+ if not usage and um:
198
+ usage = {"prompt_tokens": um.get("input_tokens"),
199
+ "completion_tokens": um.get("output_tokens")}
200
+ except (AttributeError, IndexError):
201
+ pass
202
+ ev = generation_event(
203
+ prompts=[self._queries.pop(rid, "")], response_text=text, model=model,
204
+ trace_id=self._ensure_trace(), parent_span_id=None, project=self.project,
205
+ start_time=self._starts.pop(rid, time.time()), token_usage=usage,
206
+ )
207
+ _client.get_client().log_event(ev)
208
+
209
+ def on_llm_error(self, error, *, run_id, parent_run_id=None, **kw):
210
+ rid = str(run_id)
211
+ ev = generation_event(
212
+ prompts=[self._queries.pop(rid, "")], response_text=repr(error), model=None,
213
+ trace_id=self._ensure_trace(), parent_span_id=None, project=self.project,
214
+ start_time=self._starts.pop(rid, time.time()), status="error",
215
+ )
216
+ _client.get_client().log_event(ev)
217
+
218
+
219
+ # ----------------------------------------------------- ingest-time instrumenting
220
+ # LangChain text splitters and embeddings emit no callbacks (they are plain
221
+ # batch calls), so the callback handler above can never see them. These thin
222
+ # proxies wrap the objects and log a chunking / embedding event per call.
223
+
224
+ class _SplitterProxy(_Proxy):
225
+ def _emit(self, docs):
226
+ from ..tracing import log_chunks
227
+ try:
228
+ log_chunks(
229
+ list(docs or []),
230
+ strategy=type(self._target).__name__,
231
+ chunk_size=getattr(self._target, "_chunk_size", None),
232
+ overlap=getattr(self._target, "_chunk_overlap", None),
233
+ )
234
+ except Exception:
235
+ pass
236
+ return docs
237
+
238
+ def split_documents(self, *a, **k):
239
+ return self._emit(self._target.split_documents(*a, **k))
240
+
241
+ def split_text(self, *a, **k):
242
+ return self._emit(self._target.split_text(*a, **k))
243
+
244
+ def create_documents(self, *a, **k):
245
+ return self._emit(self._target.create_documents(*a, **k))
246
+
247
+ def transform_documents(self, *a, **k):
248
+ return self._emit(self._target.transform_documents(*a, **k))
249
+
250
+
251
+ def instrument_splitter(splitter: Any) -> Any:
252
+ """Wrap a LangChain ``TextSplitter`` so ``split_documents`` / ``split_text``
253
+ / ``create_documents`` / ``transform_documents`` auto-log a chunking event."""
254
+ require_methods(splitter, ["split_documents", "split_text", "create_documents",
255
+ "transform_documents"], "instrument_splitter")
256
+ return _SplitterProxy(splitter, "langchain")
257
+
258
+
259
+ class _LoaderProxy(_Proxy):
260
+ def _emit(self, docs):
261
+ from ..tracing import log_ingestion
262
+
263
+ docs = list(docs or [])
264
+ try:
265
+ srcs = [(getattr(d, "metadata", {}) or {}).get("source") for d in docs]
266
+ log_ingestion(count=len(docs), sources=[s for s in srcs if s][:50])
267
+ except Exception:
268
+ pass
269
+ return docs
270
+
271
+ def load(self, *a, **k):
272
+ return self._emit(self._target.load(*a, **k))
273
+
274
+ def load_and_split(self, *a, **k):
275
+ return self._emit(self._target.load_and_split(*a, **k))
276
+
277
+
278
+ def instrument_loader(loader: Any) -> Any:
279
+ """Wrap a LangChain ``BaseLoader`` so ``load`` / ``load_and_split`` auto-log
280
+ an ingestion event (document count + sources). ``lazy_load`` passes through
281
+ untouched (streaming iterator)."""
282
+ require_methods(loader, ["load", "load_and_split"], "instrument_loader")
283
+ return _LoaderProxy(loader, "langchain")
284
+
285
+
286
+ # Rerankers are ``BaseDocumentCompressor``s and ``compress_documents`` fires no
287
+ # callback, so the handler can't see reranking. Wrap the compressor in a real
288
+ # subclass (so ``ContextualCompressionRetriever`` still validates it) that logs
289
+ # a reranking event with before/after order.
290
+ if _BaseCompressor is not None:
291
+
292
+ class _LoggedCompressor(_BaseCompressor): # type: ignore[misc, valid-type]
293
+ target: Any = None
294
+ model_config = {"arbitrary_types_allowed": True}
295
+
296
+ def compress_documents(self, documents, query, callbacks=None):
297
+ from ..tracing import log_rerank
298
+
299
+ before = list(documents or [])
300
+ after = list(self.target.compress_documents(before, query, callbacks))
301
+ try:
302
+ inner = getattr(self.target, "model", None)
303
+ model = (getattr(inner, "model_name", None)
304
+ or getattr(inner, "model", None)
305
+ or type(inner if inner is not None else self.target).__name__)
306
+ log_rerank(before, after, model=model, top_n=getattr(self.target, "top_n", None))
307
+ except Exception:
308
+ pass
309
+ return after
310
+
311
+
312
+ def instrument_compressor(compressor: Any) -> Any:
313
+ """Wrap a LangChain reranker / ``BaseDocumentCompressor`` so
314
+ ``compress_documents`` auto-logs a reranking event (before/after, model,
315
+ top_n). Pass the wrapped compressor to ``ContextualCompressionRetriever``."""
316
+ if _BaseCompressor is None:
317
+ raise ImportError("LangChain is not installed. Run: pip install ragobserve[langchain]")
318
+ require_methods(compressor, ["compress_documents"], "instrument_compressor")
319
+ return _LoggedCompressor(target=compressor)
320
+
321
+
322
+ def instrument_embeddings(embeddings: Any) -> Any:
323
+ """Wrap a LangChain ``Embeddings`` so ``embed_documents`` auto-logs an
324
+ embedding event. ``embed_query`` passes straight through (query embeds are
325
+ already implied by the retrieval event).
326
+
327
+ Returns a real ``Embeddings`` subclass — not a generic proxy — so callers
328
+ that ``isinstance``-check the object (e.g. FAISS / vector stores) keep
329
+ working. Any other attribute/method delegates to the wrapped object.
330
+ """
331
+ require_methods(embeddings, ["embed_documents"], "instrument_embeddings")
332
+ try:
333
+ from langchain_core.embeddings import Embeddings as _Base
334
+ except Exception: # langchain not installed -> degrade to a plain proxy
335
+ _Base = object
336
+
337
+ target = embeddings
338
+
339
+ class _LoggedEmbeddings(_Base): # type: ignore[misc, valid-type]
340
+ def embed_documents(self, texts, *a, **k):
341
+ from ..tracing import log_embedding
342
+
343
+ t0 = time.time()
344
+ vecs = target.embed_documents(texts, *a, **k)
345
+ dur = (time.time() - t0) * 1000.0
346
+ try:
347
+ log_embedding(
348
+ model=(getattr(target, "model", None)
349
+ or getattr(target, "model_name", None)
350
+ or type(target).__name__),
351
+ input_count=len(texts) if texts is not None else (len(vecs) if vecs else 0),
352
+ dimensions=len(vecs[0]) if vecs else None,
353
+ duration_ms=dur,
354
+ )
355
+ except Exception:
356
+ pass
357
+ return vecs
358
+
359
+ def embed_query(self, text, *a, **k):
360
+ return target.embed_query(text, *a, **k)
361
+
362
+ def __getattr__(self, name):
363
+ return getattr(target, name)
364
+
365
+ return _LoggedEmbeddings()