ragpeek 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. ragpeek-0.1.0/.github/workflows/ci.yaml +50 -0
  2. ragpeek-0.1.0/.gitignore +221 -0
  3. ragpeek-0.1.0/.python-version +1 -0
  4. ragpeek-0.1.0/CHANGELOG.md +33 -0
  5. ragpeek-0.1.0/LICENSE +21 -0
  6. ragpeek-0.1.0/PKG-INFO +311 -0
  7. ragpeek-0.1.0/README.md +280 -0
  8. ragpeek-0.1.0/examples/async_rag.py +148 -0
  9. ragpeek-0.1.0/examples/corpus.py +70 -0
  10. ragpeek-0.1.0/examples/data/jupiter.txt +5 -0
  11. ragpeek-0.1.0/examples/data/saturn.txt +5 -0
  12. ragpeek-0.1.0/examples/data/terrestrial_planets.txt +7 -0
  13. ragpeek-0.1.0/examples/simple_rag.py +46 -0
  14. ragpeek-0.1.0/pyproject.toml +52 -0
  15. ragpeek-0.1.0/ragpeek/__init__.py +22 -0
  16. ragpeek-0.1.0/ragpeek/__main__.py +3 -0
  17. ragpeek-0.1.0/ragpeek/analyzers/__init__.py +56 -0
  18. ragpeek-0.1.0/ragpeek/analyzers/context.py +213 -0
  19. ragpeek-0.1.0/ragpeek/analyzers/generation.py +55 -0
  20. ragpeek-0.1.0/ragpeek/analyzers/retrieval.py +89 -0
  21. ragpeek-0.1.0/ragpeek/cli.py +210 -0
  22. ragpeek-0.1.0/ragpeek/collector.py +60 -0
  23. ragpeek-0.1.0/ragpeek/config.py +23 -0
  24. ragpeek-0.1.0/ragpeek/decorators.py +146 -0
  25. ragpeek-0.1.0/ragpeek/logging.py +103 -0
  26. ragpeek-0.1.0/ragpeek/py.typed +0 -0
  27. ragpeek-0.1.0/ragpeek/renderers/__init__.py +0 -0
  28. ragpeek-0.1.0/ragpeek/renderers/html.py +178 -0
  29. ragpeek-0.1.0/ragpeek/renderers/terminal.py +140 -0
  30. ragpeek-0.1.0/ragpeek/serialization.py +107 -0
  31. ragpeek-0.1.0/ragpeek/session.py +133 -0
  32. ragpeek-0.1.0/tests/__init__.py +0 -0
  33. ragpeek-0.1.0/tests/conftest.py +59 -0
  34. ragpeek-0.1.0/tests/fixtures/sample_session.json +43 -0
  35. ragpeek-0.1.0/tests/test_analyzers.py +294 -0
  36. ragpeek-0.1.0/tests/test_cli.py +158 -0
  37. ragpeek-0.1.0/tests/test_decorators.py +215 -0
  38. ragpeek-0.1.0/tests/test_renderers.py +91 -0
  39. ragpeek-0.1.0/tests/test_session.py +107 -0
  40. ragpeek-0.1.0/uv.lock +3272 -0
@@ -0,0 +1,50 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ env:
16
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v5
20
+ env:
21
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
22
+
23
+ - name: Ruff lint
24
+ run: uvx ruff check .
25
+
26
+ test:
27
+ runs-on: ubuntu-latest
28
+
29
+ strategy:
30
+ matrix:
31
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
32
+
33
+ steps:
34
+ - uses: actions/checkout@v4
35
+ env:
36
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
37
+
38
+ - name: Install uv
39
+ uses: astral-sh/setup-uv@v5
40
+ env:
41
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
42
+
43
+ - name: Set up Python ${{ matrix.python-version }}
44
+ run: uv python install ${{ matrix.python-version }}
45
+
46
+ - name: Install dependencies
47
+ run: uv sync --all-extras
48
+
49
+ - name: Run tests
50
+ run: uv run pytest tests/ -v
@@ -0,0 +1,221 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # ragpeek example output
55
+ async_report.html
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ # Pipfile.lock
99
+
100
+ # UV
101
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # uv.lock
105
+
106
+ # poetry
107
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
109
+ # commonly ignored for libraries.
110
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111
+ # poetry.lock
112
+ # poetry.toml
113
+
114
+ # pdm
115
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
117
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
118
+ # pdm.lock
119
+ # pdm.toml
120
+ .pdm-python
121
+ .pdm-build/
122
+
123
+ # pixi
124
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
125
+ # pixi.lock
126
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
127
+ # in the .venv directory. It is recommended not to include this directory in version control.
128
+ .pixi
129
+
130
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131
+ __pypackages__/
132
+
133
+ # Celery stuff
134
+ celerybeat-schedule
135
+ celerybeat.pid
136
+
137
+ # Redis
138
+ *.rdb
139
+ *.aof
140
+ *.pid
141
+
142
+ # RabbitMQ
143
+ mnesia/
144
+ rabbitmq/
145
+ rabbitmq-data/
146
+
147
+ # ActiveMQ
148
+ activemq-data/
149
+
150
+ # SageMath parsed files
151
+ *.sage.py
152
+
153
+ # Environments
154
+ .env
155
+ .envrc
156
+ .venv
157
+ env/
158
+ venv/
159
+ ENV/
160
+ env.bak/
161
+ venv.bak/
162
+
163
+ # Spyder project settings
164
+ .spyderproject
165
+ .spyproject
166
+
167
+ # Rope project settings
168
+ .ropeproject
169
+
170
+ # mkdocs documentation
171
+ /site
172
+
173
+ # mypy
174
+ .mypy_cache/
175
+ .dmypy.json
176
+ dmypy.json
177
+
178
+ # Pyre type checker
179
+ .pyre/
180
+
181
+ # pytype static type analyzer
182
+ .pytype/
183
+
184
+ # Cython debug symbols
185
+ cython_debug/
186
+
187
+ # PyCharm
188
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
189
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
190
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
191
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
192
+ # .idea/
193
+
194
+ # Abstra
195
+ # Abstra is an AI-powered process automation framework.
196
+ # Ignore directories containing user credentials, local state, and settings.
197
+ # Learn more at https://abstra.io/docs
198
+ .abstra/
199
+
200
+ # Visual Studio Code
201
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
202
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
203
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
204
+ # you could uncomment the following to ignore the entire vscode folder
205
+ # .vscode/
206
+ # Temporary file for partial code execution
207
+ tempCodeRunnerFile.py
208
+
209
+ # Ruff stuff:
210
+ .ruff_cache/
211
+
212
+ # PyPI configuration file
213
+ .pypirc
214
+
215
+ # Marimo
216
+ marimo/_static/
217
+ marimo/_lsp/
218
+ __marimo__/
219
+
220
+ # Streamlit
221
+ .streamlit/secrets.toml
@@ -0,0 +1 @@
1
+ 3.12.3
@@ -0,0 +1,33 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres
5
+ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2026-06-22
10
+
11
+ Initial release.
12
+
13
+ ### Added
14
+
15
+ - `@trace` decorator that instruments sync **and** async RAG pipelines, with
16
+ `log_retrieval`, `log_generation`, and `link_retrieval_to_generation`. The active
17
+ session rides a `contextvars.ContextVar`, so concurrent traces stay isolated.
18
+ - Retrieval, context, and generation analyzers that produce within-set,
19
+ calibration-aware **signals** (low-relevance padding, sharp rank-1 precision, flat
20
+ distribution, k mismatch, rank disagreement, low context utilisation, hedging
21
+ language) rather than absolute verdicts.
22
+ - Terminal and HTML trace renderers; `serialize_trace` / `deserialize_trace`.
23
+ - `ragpeek` command line:
24
+ - `ragpeek demo` — ask a question, retrieve over a built-in corpus with real
25
+ embeddings, generate via a local Ollama server if available, and render the
26
+ trace.
27
+ - `ragpeek <trace.json>` — view and diagnose a saved trace.
28
+ - `TracerConfig` for tuning thresholds; `py.typed` so downstream type checkers use
29
+ the inline type hints.
30
+ - Optional extras: `semantic` (embedding-based context analysis) and `examples`.
31
+
32
+ [Unreleased]: https://github.com/meutsabdahal/ragpeek/compare/v0.1.0...HEAD
33
+ [0.1.0]: https://github.com/meutsabdahal/ragpeek/releases/tag/v0.1.0
ragpeek-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Utsab Dahal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ragpeek-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragpeek
3
+ Version: 0.1.0
4
+ Summary: A lightweight debugger for RAG pipelines
5
+ Project-URL: Homepage, https://github.com/meutsabdahal/ragpeek
6
+ Project-URL: Repository, https://github.com/meutsabdahal/ragpeek
7
+ Project-URL: Issues, https://github.com/meutsabdahal/ragpeek/issues
8
+ Author: Utsab Dahal
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: debugging,developer-tools,llm,observability,rag,retrieval-augmented-generation
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Debuggers
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: rich>=13.0
24
+ Provides-Extra: examples
25
+ Requires-Dist: chromadb>=1.5.9; extra == 'examples'
26
+ Requires-Dist: httpx>=0.27; extra == 'examples'
27
+ Provides-Extra: semantic
28
+ Requires-Dist: scikit-learn>=1.3; extra == 'semantic'
29
+ Requires-Dist: sentence-transformers>=3.0; extra == 'semantic'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # ragpeek
33
+
34
+ [![CI](https://github.com/meutsabdahal/ragpeek/actions/workflows/ci.yaml/badge.svg)](https://github.com/meutsabdahal/ragpeek/actions/workflows/ci.yaml)
35
+
36
+ **A lightweight debugger for RAG pipelines.**
37
+
38
+ When a RAG pipeline returns a bad answer, the usual move is to print the retrieved
39
+ chunks and squint at them. ragpeek replaces the squinting: wrap your pipeline in one
40
+ decorator and it shows you, per query, what was retrieved, the score of every chunk,
41
+ the exact prompt sent to the model, and a plain-English read on where things went
42
+ sideways retrieval, context ranking, or generation.
43
+
44
+ Ask a question in one command no code (output depends on your question and the LLM):
45
+
46
+ ```
47
+ $ ragpeek demo
48
+ Question> How hot is Venus?
49
+
50
+ Retrieval k=4/4
51
+ ✓ 0.77 Venus is the hottest planet, with surface temperatures…
52
+ ✗ 0.39 Mercury is the smallest planet and the closest to the Sun.
53
+ ✗ 0.34 Neptune is the most distant planet from the Sun…
54
+ ✗ 0.31 Mars hosts Olympus Mons, the tallest volcano…
55
+
56
+ ⚠ 3 of 4 chunks sit in the lower half of this result's score range
57
+ (top 0.77, bottom 0.31) possible low-relevance padding.
58
+ ✓ Sharp rank-1 separation (0.77 vs 0.39): the retriever cleanly
59
+ separates the top match a precision signal.
60
+
61
+ Generation model=llama3.2
62
+ Venus's average surface temperature is around 465 °C…
63
+ ✓ Generation looks healthy - no obvious signals.
64
+ ```
65
+
66
+ > **Score convention:** ragpeek assumes **higher scores mean more relevant** chunks.
67
+ > If your vector store returns distances, convert them to similarities first see
68
+ > [Works with any vector store](#works-with-any-vector-store).
69
+
70
+ ---
71
+
72
+ ## Install
73
+
74
+ ```bash
75
+ pip install ragpeek
76
+ ```
77
+
78
+ The default install is lightweight only [`rich`](https://github.com/Textualize/rich)
79
+ at runtime. For the embedding-based context analyzer (and `ragpeek demo`, which
80
+ retrieves with real embeddings), add the `semantic` extra:
81
+
82
+ ```bash
83
+ pip install "ragpeek[semantic]"
84
+ ```
85
+
86
+ Requires Python 3.10+. On first semantic run, ragpeek downloads a small embedding
87
+ model (~80MB) once. `ragpeek demo` also generates an answer if a local
88
+ [Ollama](https://ollama.com) server is running; without one it shows retrieval only.
89
+
90
+ **From source:**
91
+
92
+ ```bash
93
+ git clone https://github.com/meutsabdahal/ragpeek
94
+ cd ragpeek
95
+ uv sync --group dev # create the env + install dev deps
96
+ uv run pytest tests/ -v
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Command line
102
+
103
+ Once installed, `ragpeek` is a command:
104
+
105
+ ```bash
106
+ ragpeek demo # prompts for a question, then retrieves + answers + traces it
107
+ ragpeek demo "How hot is Venus?" # or pass the question directly
108
+ ragpeek demo --model mistral # choose the Ollama model (default: llama3.2)
109
+ ragpeek demo --html report.html # also save a shareable HTML report
110
+ ragpeek path/to/trace.json # view a saved trace (from @trace(output=...) / serialize_trace)
111
+ ragpeek # help
112
+ ```
113
+
114
+ `ragpeek demo` retrieves over a small built-in corpus with real embeddings (needs the
115
+ `semantic` extra) and answers via a local Ollama server if one is running. Running
116
+ from a source checkout instead of an install? Prefix with `uv run`:
117
+
118
+ ```bash
119
+ uv run ragpeek demo "How hot is Venus?"
120
+ uv run ragpeek demo --html report.html # also save an HTML report
121
+ uv run ragpeek tests/fixtures/sample_session.json # view a saved trace
122
+ uv run ragpeek # help
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Instrument your pipeline
128
+
129
+ Tracing your own pipeline is two imports and two log calls ragpeek never
130
+ monkey-patches your stack, so it works with any retriever and any model.
131
+
132
+ ```python
133
+ from ragpeek import trace, log_retrieval, log_generation
134
+
135
+ @trace
136
+ def answer_question(query: str) -> str:
137
+ docs, scores = retriever.search(query, k=5)
138
+ log_retrieval(query=query, chunks=docs, scores=scores)
139
+
140
+ prompt = build_prompt(docs, query)
141
+ response = llm.generate(prompt)
142
+ log_generation(prompt=prompt, response=response, model="llama3.2")
143
+
144
+ return response
145
+ ```
146
+
147
+ Call the function exactly as before the trace prints automatically:
148
+
149
+ ```python
150
+ answer_question("Which is the largest planet in the Solar System?")
151
+ ```
152
+
153
+ Async pipelines work the same way; the active session follows your coroutines
154
+ through every `await` (it rides a `contextvars.ContextVar`), so concurrent
155
+ queries never cross-contaminate:
156
+
157
+ ```python
158
+ @trace
159
+ async def answer(query: str) -> str:
160
+ docs, scores = await retriever.asearch(query, k=5)
161
+ log_retrieval(query=query, chunks=docs, scores=scores)
162
+
163
+ response = await llm.acomplete(build_prompt(docs, query))
164
+ log_generation(prompt=build_prompt(docs, query), response=response, model="llama3.2")
165
+ return response
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Configuration
171
+
172
+ Pass a `TracerConfig` to tune thresholds, or flip decorator flags for common cases:
173
+
174
+ ```python
175
+ from ragpeek import trace, TracerConfig
176
+
177
+ config = TracerConfig(
178
+ score_gap_threshold=0.3, # rank-1→rank-2 gap that reads as precision
179
+ semantic=True, # embedding-based context analysis
180
+ show_prompt=False, # hide the full prompt in terminal output
181
+ # min_score_threshold=0.6, # opt-in absolute floor — only set once you've
182
+ # # calibrated a cutoff for your own embedder
183
+ )
184
+
185
+ @trace(config=config)
186
+ def answer(query: str) -> str:
187
+ ...
188
+ ```
189
+
190
+ ```python
191
+ @trace(semantic=False) # skip the embedding model (faster, no download)
192
+ @trace(output="report.html") # save a shareable HTML report
193
+ @trace(render=False) # don't print — just populate session.analysis_report
194
+ ```
195
+
196
+ With `render=False` the analyzers still run; grab the finalized session and hand it
197
+ to downstream tooling with `serialize_trace(...)` (and `deserialize_trace(...)` to
198
+ read it back, e.g. `ragpeek trace.json`).
199
+
200
+ ---
201
+
202
+ ## Works with any vector store
203
+
204
+ `log_retrieval` takes similarity **scores** (higher = better). Most stores return
205
+ those directly; some return distances you convert first.
206
+
207
+ ```python
208
+ # ChromaDB (cosine space): distance ∈ [0, 2] → similarity = 1 - distance
209
+ results = collection.query(query_texts=[query], n_results=5)
210
+ log_retrieval(query=query,
211
+ chunks=results["documents"][0],
212
+ scores=[1.0 - d for d in results["distances"][0]])
213
+
214
+ # FAISS IndexFlatL2 with normalized vectors: similarity = 1 - d² / 2
215
+ distances, indices = index.search(query_embedding, k=5)
216
+ log_retrieval(query=query,
217
+ chunks=[corpus[i] for i in indices[0]],
218
+ scores=[1.0 - (d ** 2) / 2 for d in distances[0].tolist()])
219
+
220
+ # Qdrant (cosine): .score is already a similarity — use it as-is
221
+ results = client.search("docs", query_vector=embedding, limit=5)
222
+ log_retrieval(query=query,
223
+ chunks=[r.payload["text"] for r in results],
224
+ scores=[r.score for r in results])
225
+ ```
226
+
227
+ > **Note on scores:** ragpeek assumes higher score = more relevant. There is
228
+ > no single distance→similarity formula convert per metric:
229
+ >
230
+ > | Store returns | Correct conversion |
231
+ > |---|---|
232
+ > | Cosine distance (∈ [0, 2]) | `score = 1.0 - distance` (exact) |
233
+ > | L2 / Euclidean, normalized vectors | `score = 1.0 - distance ** 2 / 2` (exact) |
234
+ > | L2 / Euclidean, un-normalized | `score = 1.0 / (1.0 + distance)` (monotonic squash) |
235
+ > | Inner product / dot product | already a similarity use as-is (negate if returned as a distance) |
236
+ >
237
+ > `score = 1.0 - distance` is **only** correct for cosine distance; using it on
238
+ > raw L2 distances silently produces wrong (often negative) similarities.
239
+
240
+ Need a non-default retrieval→generation association? Keep the returned span objects
241
+ and pair them explicitly:
242
+
243
+ ```python
244
+ from ragpeek import trace, log_retrieval, log_generation, link_retrieval_to_generation
245
+
246
+ @trace(render=False)
247
+ def answer(query: str) -> str:
248
+ retrieval = log_retrieval(query=query, chunks=["chunk"], scores=[0.9])
249
+ response = llm.complete(query)
250
+ generation = log_generation(prompt=query, response=response, model="llama3.2")
251
+ link_retrieval_to_generation(retrieval, generation)
252
+ return response
253
+ ```
254
+
255
+ ---
256
+
257
+ ## What it surfaces
258
+
259
+ These are **signals to calibrate**, not verdicts. Scores are read within each
260
+ result set, so they don't assume an absolute scale tune thresholds to your
261
+ own embedder.
262
+
263
+ | Signal | What it means |
264
+ |---|---|
265
+ | Within-set padding | Most chunks fall in the lower half of *this result's* score range (relative, not an absolute cutoff) |
266
+ | Sharp rank-1 separation | The retriever cleanly separates the top match a **precision** signal, not noise |
267
+ | Flat distribution | Scores barely differ the retriever can't discriminate (query too vague / chunks too broad) |
268
+ | k mismatch | Retriever returned fewer chunks than requested |
269
+ | Rank disagreement | The answer aligns with a chunk the retriever didn't rank first a reranking signal |
270
+ | Low context utilisation | The response is semantically dissimilar to every retrieved chunk |
271
+ | Hedging language | Phrase-level signal the model may be answering from training weights, not context |
272
+
273
+ ---
274
+
275
+ ## How it works
276
+
277
+ 1. `@trace` wraps your function and opens a `TraceSession`.
278
+ 2. The session id lives in a `contextvars.ContextVar`, so it propagates through both
279
+ sync and async code without you threading anything through your call stack.
280
+ 3. `log_retrieval()` and `log_generation()` read that `ContextVar` and append spans
281
+ to the active session.
282
+ 4. When your function returns, three analyzers run over the collected spans:
283
+ - **Retrieval**: within-set score distribution, low-relevance padding, rank-1 precision, k mismatch.
284
+ - **Context**: chunk↔response similarity and the rank-disagreement (reranking) signal.
285
+ - **Generation**: hedging language and response-length anomalies.
286
+ 5. The terminal renderer prints the trace; the HTML renderer saves a shareable report.
287
+
288
+ The embedding model runs entirely on your machine your data never leaves it.
289
+
290
+ ---
291
+
292
+ ## Limitations
293
+
294
+ - **Explicit, not magic.** You call `log_retrieval` / `log_generation` yourself
295
+ ragpeek doesn't patch framework internals. That's three lines of instrumentation
296
+ per pipeline, traded for working with any stack.
297
+ - **Signals, not truth.** Retrieval signals are computed *within* each result set and
298
+ assume higher = better, but they can't know your embedder's absolute scale. Treat
299
+ every diagnosis as a prompt to calibrate, and convert distances to similarities
300
+ per metric (table above) before calling `log_retrieval`.
301
+
302
+ ---
303
+
304
+ ## Contributing
305
+
306
+ Issues and PRs welcome. If a vector-store integration doesn't work or a diagnosis
307
+ looks wrong, open an issue with a minimal reproduction.
308
+
309
+ ## License
310
+
311
+ MIT