khazad 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khazad-0.1.2/.gitignore +207 -0
- khazad-0.1.2/CHANGELOG.md +55 -0
- khazad-0.1.2/CLAUDE.md +255 -0
- khazad-0.1.2/CONTRIBUTING.md +81 -0
- khazad-0.1.2/LICENSE +21 -0
- khazad-0.1.2/PKG-INFO +443 -0
- khazad-0.1.2/README.md +412 -0
- khazad-0.1.2/docs/_static/flow.svg +2 -0
- khazad-0.1.2/docs/_static/logo-dark.png +0 -0
- khazad-0.1.2/docs/_static/logo-light.png +0 -0
- khazad-0.1.2/docs/_static/run_example.png +0 -0
- khazad-0.1.2/examples/anthropic.py +31 -0
- khazad-0.1.2/examples/anthropic_scope.py +46 -0
- khazad-0.1.2/examples/anthropic_system.py +50 -0
- khazad-0.1.2/examples/azure_openai.py +47 -0
- khazad-0.1.2/examples/azure_openai_entra_stream.py +52 -0
- khazad-0.1.2/examples/gemini.py +21 -0
- khazad-0.1.2/examples/gemini_hosts.py +37 -0
- khazad-0.1.2/examples/gemini_stop.py +43 -0
- khazad-0.1.2/examples/openai.py +30 -0
- khazad-0.1.2/examples/openai_ollama.py +32 -0
- khazad-0.1.2/khazad/__init__.py +109 -0
- khazad-0.1.2/khazad/_models.py +81 -0
- khazad-0.1.2/khazad/_transport.py +290 -0
- khazad-0.1.2/khazad/adapters/__init__.py +0 -0
- khazad-0.1.2/khazad/adapters/embedders/__init__.py +0 -0
- khazad-0.1.2/khazad/adapters/embedders/huggingface.py +41 -0
- khazad-0.1.2/khazad/adapters/embedders/openai.py +50 -0
- khazad-0.1.2/khazad/adapters/parsers/__init__.py +0 -0
- khazad-0.1.2/khazad/adapters/parsers/anthropic.py +122 -0
- khazad-0.1.2/khazad/adapters/parsers/gemini.py +50 -0
- khazad-0.1.2/khazad/adapters/parsers/openai.py +125 -0
- khazad-0.1.2/khazad/adapters/parsers/openai_responses.py +168 -0
- khazad-0.1.2/khazad/adapters/redis/__init__.py +0 -0
- khazad-0.1.2/khazad/adapters/redis/store.py +142 -0
- khazad-0.1.2/khazad/khazad.py +290 -0
- khazad-0.1.2/khazad/ports/__init__.py +7 -0
- khazad-0.1.2/khazad/ports/embedder.py +18 -0
- khazad-0.1.2/khazad/ports/parser.py +91 -0
- khazad-0.1.2/khazad/ports/store.py +50 -0
- khazad-0.1.2/khazad/py.typed +0 -0
- khazad-0.1.2/pyproject.toml +117 -0
- khazad-0.1.2/tests/__init__.py +0 -0
- khazad-0.1.2/tests/conftest.py +309 -0
- khazad-0.1.2/tests/integration/__init__.py +0 -0
- khazad-0.1.2/tests/integration/test_end_to_end.py +204 -0
- khazad-0.1.2/tests/integration/test_interceptor.py +347 -0
- khazad-0.1.2/tests/integration/test_provider_anthropic.py +109 -0
- khazad-0.1.2/tests/integration/test_provider_azure_openai.py +97 -0
- khazad-0.1.2/tests/integration/test_provider_gemini.py +87 -0
- khazad-0.1.2/tests/integration/test_provider_openai.py +117 -0
- khazad-0.1.2/tests/integration/test_provider_openai_compat.py +118 -0
- khazad-0.1.2/tests/integration/test_redis_store.py +128 -0
- khazad-0.1.2/tests/integration/test_streaming.py +279 -0
- khazad-0.1.2/tests/stress/__init__.py +0 -0
- khazad-0.1.2/tests/stress/test_concurrent.py +137 -0
- khazad-0.1.2/tests/unit/__init__.py +0 -0
- khazad-0.1.2/tests/unit/test_config.py +108 -0
- khazad-0.1.2/tests/unit/test_engine.py +287 -0
- khazad-0.1.2/tests/unit/test_models.py +69 -0
- khazad-0.1.2/tests/unit/test_parsers/__init__.py +0 -0
- khazad-0.1.2/tests/unit/test_parsers/test_anthropic_parser.py +157 -0
- khazad-0.1.2/tests/unit/test_parsers/test_gemini_parser.py +94 -0
- khazad-0.1.2/tests/unit/test_parsers/test_openai_parser.py +191 -0
- khazad-0.1.2/tests/unit/test_parsers/test_openai_responses_parser.py +163 -0
- khazad-0.1.2/uv.lock +2514 -0
khazad-0.1.2/.gitignore
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
.vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.2] - 2026-06-29
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- `examples` dependency group in `pyproject.toml` (`OpenAI`, `Anthropic`, `google-genai`, `azure-identity`) so the provider example scripts run with `uv run --group examples`.
|
|
13
|
+
- Extended and expanded the per-provider example scripts.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
|
|
17
|
+
- README images now use absolute `raw.githubusercontent.com` URLs so they render on the PyPI project page.
|
|
18
|
+
|
|
19
|
+
## [0.1.1] - 2026-06-24
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- Replaced the `shared_models` parameter with `cache_scope` (a `CacheScope` enum — `MODEL` by default, `HOST` to opt in) to control cache partitioning.
|
|
24
|
+
- Rewrote the README and added a flow diagram illustrating the request lifecycle.
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
|
|
28
|
+
- Standalone example scripts for each supported provider, including streaming usage.
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
|
|
32
|
+
- Streaming cache misses are now correctly tee'd, reconstructed into canonical JSON at stream end, and cached.
|
|
33
|
+
|
|
34
|
+
## [0.1.0] - 2026-06-13
|
|
35
|
+
|
|
36
|
+
First public release.
|
|
37
|
+
|
|
38
|
+
### Added
|
|
39
|
+
|
|
40
|
+
- Transparent semantic cache for LLM API calls via `httpx` transport patching — zero changes to application code.
|
|
41
|
+
- Module-level singleton API (`khazad.init()` / `stop()` / `get_stats()` / `flush()` / `is_active()`) and explicit `Khazad` class with the same surface.
|
|
42
|
+
- Redis 8 Vector Sets backend (`VADD` / `VSIM`), one vector set per `(provider host, model)` scope so different models never cross-serve.
|
|
43
|
+
- Provider parsers: OpenAI Chat Completions (incl. Azure OpenAI and any OpenAI-compatible proxy), OpenAI Responses API, Anthropic Messages, Google Gemini `generateContent`.
|
|
44
|
+
- Conversation-aware matching: the full message list (system, user, assistant) is embedded, not just the last user turn.
|
|
45
|
+
- Streaming support both ways:
|
|
46
|
+
- cache hits replay as SSE streams for sync and async clients;
|
|
47
|
+
- streaming cache misses are tee'd to the client with no added latency, reconstructed into canonical JSON at stream end, and cached (aborted streams are never cached).
|
|
48
|
+
- Embedding backends: HuggingFace `sentence-transformers` (default, local) and OpenAI Embeddings (optional extra `khazad[openai-embeddings]`).
|
|
49
|
+
- Configurable similarity threshold, TTL with automatic pruning of orphaned vectors, Redis key namespace, log level.
|
|
50
|
+
- Thread-safe hit/miss statistics (`total_requests`, `cache_hits`, `cache_misses`, `hit_rate`, `avg_hit_similarity`).
|
|
51
|
+
- `hosts` opt-in allowlist (exact hosts and `*.` wildcard subdomains) — restricts interception to explicitly listed endpoints.
|
|
52
|
+
|
|
53
|
+
[0.1.2]: https://github.com/GuglielmoCerri/khazad/releases/tag/v0.1.2
|
|
54
|
+
[0.1.1]: https://github.com/GuglielmoCerri/khazad/releases/tag/v0.1.1
|
|
55
|
+
[0.1.0]: https://github.com/GuglielmoCerri/khazad/releases/tag/v0.1.0
|
khazad-0.1.2/CLAUDE.md
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# CLAUDE.md - Khazad Project Context
|
|
2
|
+
|
|
3
|
+
## Frequently Used Commands
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
# Setup
|
|
7
|
+
uv sync --group dev # Install all dependencies (creates .venv automatically)
|
|
8
|
+
|
|
9
|
+
# Testing (no Redis or API keys needed — fakes and mock transports)
|
|
10
|
+
uv run python -m pytest tests/ -q # Full suite
|
|
11
|
+
uv run python -m pytest tests/unit/ -q # Unit tests only
|
|
12
|
+
uv run python -m pytest -m "not stress" # Skip stress tests
|
|
13
|
+
|
|
14
|
+
# Lint / format
|
|
15
|
+
uv run python -m ruff check . --fix # Lint with auto-fix
|
|
16
|
+
uv run python -m ruff format . # Format code
|
|
17
|
+
|
|
18
|
+
# Quick smoke test (requires Redis 8 + endpoint credentials)
|
|
19
|
+
uv run --group examples python -P examples/azure_openai.py
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Important Architectural Patterns
|
|
23
|
+
|
|
24
|
+
### Single Entry Point — `Khazad` Class
|
|
25
|
+
|
|
26
|
+
Everything goes through one class. There is no separate engine, config model, or orchestrator.
|
|
27
|
+
`Khazad` owns the embedder, vector store, parsers, stats, and cache logic directly.
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from khazad import Khazad
|
|
31
|
+
|
|
32
|
+
cache = Khazad(redis_url="redis://localhost:6379", threshold=0.92)
|
|
33
|
+
# ... all LLM HTTP traffic is now cached ...
|
|
34
|
+
cache.stop()
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
A module-level singleton API (`khazad.init()` / `khazad.stop()`) wraps `Khazad` for convenience.
|
|
38
|
+
|
|
39
|
+
### Request lifecycle: prepare → lookup → store
|
|
40
|
+
|
|
41
|
+
The transport calls `Khazad.prepare(request)` exactly once per request. It returns a
|
|
42
|
+
`PreparedRequest` (parser, prompt, scope, stream flag) or `None` for pass-through.
|
|
43
|
+
The request body is JSON-parsed **once**; the embedding is computed lazily and memoized
|
|
44
|
+
on the `PreparedRequest`, so a miss that later stores its response never re-embeds.
|
|
45
|
+
|
|
46
|
+
- `prepare(request) -> PreparedRequest | None` — parser matching + body parsing
|
|
47
|
+
- `lookup(prepared) -> CacheHit | None` — embed, VSIM search, stats
|
|
48
|
+
- `store(prepared, response_bytes)` — reuses the memoized embedding
|
|
49
|
+
|
|
50
|
+
`prepare()` also applies the opt-in `hosts=[...]` allowlist (exact match or `*.suffix`
|
|
51
|
+
wildcard, case-insensitive) — non-allowed hosts pass through untouched.
|
|
52
|
+
|
|
53
|
+
A temperature-based gate (`cache_only_deterministic`) was evaluated and deliberately
|
|
54
|
+
**rejected**: GPT-5-family and o-series models hard-reject any `temperature` other than
|
|
55
|
+
the default 1.0 (400 error), so gating on `temperature=0` would make flagship models
|
|
56
|
+
permanently uncacheable. Do not reintroduce it.
|
|
57
|
+
|
|
58
|
+
Unparseable, unmatched, or non-allowlisted requests are **not counted** in stats.
|
|
59
|
+
|
|
60
|
+
### Cache scope: host + model
|
|
61
|
+
|
|
62
|
+
`scope = f"{host}/{model or 'default'}"`. Each scope gets its own Redis vector set,
|
|
63
|
+
so the same prompt sent to `gpt-4o` and `gpt-4o-mini` can never cross-serve.
|
|
64
|
+
The prompt text embedded is the **full conversation** (`role: text` lines, including
|
|
65
|
+
system), not just the last user message — prevents multi-turn collisions.
|
|
66
|
+
|
|
67
|
+
The opt-in `cache_scope` parameter (a `CacheScope` enum — `MODEL` by default, `HOST`
|
|
68
|
+
to opt in; keyword-only on `Khazad`, also on `khazad.init`) controls this. Pass
|
|
69
|
+
`cache_scope=CacheScope.HOST` (or the string `"host"`) to collapse the scope to `host`
|
|
70
|
+
only, so every model/deployment on the same provider shares one vector set. The host
|
|
71
|
+
always stays in the scope, so different providers (Azure OpenAI vs Gemini) remain
|
|
72
|
+
isolated and a response is never replayed to a client expecting a different wire
|
|
73
|
+
format. Use it only for format-compatible pools (e.g. several Azure OpenAI
|
|
74
|
+
deployments, or `gpt-4o` + `gpt-4o-mini`).
|
|
75
|
+
|
|
76
|
+
### Hexagonal Architecture (Ports & Adapters)
|
|
77
|
+
|
|
78
|
+
- **Ports** (`khazad/ports/`) — abstract interfaces: `Embedder`, `ProviderParser`, `VectorStore`
|
|
79
|
+
- **Adapters** (`khazad/adapters/`) — concrete implementations (Redis, HuggingFace, OpenAI, parsers)
|
|
80
|
+
- `ProviderParser` is an ABC with shared concrete helpers (`build_response`, `_sse`,
|
|
81
|
+
`_iter_sse_payloads`, `_flatten_text`) — subclasses implement `can_handle` and
|
|
82
|
+
`parse_request`, and optionally override `stream_chunks` / `response_from_stream`.
|
|
83
|
+
- There is **no Azure parser** — Azure OpenAI is covered by `OpenAIParser`'s
|
|
84
|
+
path-suffix matching (`/chat/completions`).
|
|
85
|
+
|
|
86
|
+
### httpx Transport Monkey-Patching
|
|
87
|
+
|
|
88
|
+
Khazad intercepts LLM traffic by patching `httpx.Client.__init__` and `httpx.AsyncClient.__init__`
|
|
89
|
+
to wrap their transports (`khazad/_transport.py`, `install(cache)` / `uninstall()`).
|
|
90
|
+
|
|
91
|
+
`install()` is **idempotent for the original-init capture**: only the first call records the
|
|
92
|
+
pristine `httpx.*Client.__init__` references. Subsequent calls swap the active cache without
|
|
93
|
+
overwriting the originals, so `uninstall()` always restores real httpx.
|
|
94
|
+
|
|
95
|
+
Transports check `cache.is_active()` on every request — clients created while the patch
|
|
96
|
+
was installed stop serving from cache immediately after `stop()`.
|
|
97
|
+
|
|
98
|
+
### Streaming
|
|
99
|
+
|
|
100
|
+
- **Hit**: `parser.stream_chunks(cached_json)` is a *sync* generator of SSE frames;
|
|
101
|
+
`_ReplayStream` implements both `SyncByteStream` and `AsyncByteStream`, so sync and
|
|
102
|
+
async clients both replay correctly.
|
|
103
|
+
- **Miss**: the upstream SSE body is tee'd through `_SyncTeeStream` / `_AsyncTeeStream`
|
|
104
|
+
with zero added latency. The collected bytes are passed to `parser.response_from_stream(sse)`
|
|
105
|
+
when the stream ends — on natural exhaustion **or** on `close()`/`aclose()`. The latter
|
|
106
|
+
matters because SDKs (e.g. the OpenAI client) break their read loop on the terminal SSE
|
|
107
|
+
sentinel and close the response without driving the byte stream to EOF, so caching on
|
|
108
|
+
natural exhaustion alone would never fire. `response_from_stream` reconstructs the
|
|
109
|
+
**canonical JSON response** only when the capture is complete (OpenAI Chat requires the
|
|
110
|
+
`[DONE]` sentinel, Anthropic requires `message_stop`, Responses requires
|
|
111
|
+
`response.completed`); a partial/aborted stream reconstructs to `None` and is never cached.
|
|
112
|
+
- Compressed SSE bodies (`content-encoding != identity`) are passed through uncached.
|
|
113
|
+
- Gemini streaming (`:streamGenerateContent`) is not matched at all — pass-through.
|
|
114
|
+
|
|
115
|
+
### Redis adapter (`khazad/adapters/redis/store.py`)
|
|
116
|
+
|
|
117
|
+
- One vector set per scope: `{namespace}:vset:{scope}`; bodies at `{namespace}:resp:{key}`.
|
|
118
|
+
- `store()` pipelines `VADD` + `SET ex=ttl` (single round-trip).
|
|
119
|
+
- VSIM workaround: redis-py's `parse_vsim_result` callback misparses RESP3 dict responses
|
|
120
|
+
when the WITHSCORES option flag isn't propagated (found in 8.0.0b2, callback unchanged
|
|
121
|
+
in 8.0.0 GA — dependency is now `redis>=8.0.0,<9`). `search()` issues a raw
|
|
122
|
+
`execute_command("VSIM", ...)` and `_parse_vsim_response` handles both RESP3 dict and
|
|
123
|
+
RESP2 flat-list shapes.
|
|
124
|
+
- TTL: only the response body expires. `Khazad.lookup` prunes the orphaned vector
|
|
125
|
+
(`store.delete(scope, key)`) when the body is gone, then counts a miss.
|
|
126
|
+
|
|
127
|
+
### Testing with Dependency Injection
|
|
128
|
+
|
|
129
|
+
For tests, `Khazad` accepts `_vector_store` and `_embedder_instance` keyword args (both or
|
|
130
|
+
neither) to bypass Redis and real embedding models:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
cache = Khazad(
|
|
134
|
+
threshold=0.99,
|
|
135
|
+
_vector_store=InMemoryVectorStore(),
|
|
136
|
+
_embedder_instance=FakeEmbedder(),
|
|
137
|
+
)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
This skips Redis connection and transport patching entirely (tests call
|
|
141
|
+
`install(cache)` / `uninstall()` themselves).
|
|
142
|
+
|
|
143
|
+
## Critical Rules
|
|
144
|
+
|
|
145
|
+
### Git Operations
|
|
146
|
+
|
|
147
|
+
**CRITICAL**: NEVER use `git push` or attempt to push to remote repositories. The user will handle all git push operations.
|
|
148
|
+
|
|
149
|
+
### Code Quality
|
|
150
|
+
|
|
151
|
+
**IMPORTANT**: Always run `uv run python -m ruff check . --fix && uv run python -m ruff format .` before committing.
|
|
152
|
+
|
|
153
|
+
### README.md Maintenance
|
|
154
|
+
|
|
155
|
+
**IMPORTANT**: DO NOT modify README.md unless explicitly requested.
|
|
156
|
+
|
|
157
|
+
### No Pydantic
|
|
158
|
+
|
|
159
|
+
The project deliberately removed `pydantic` as a dependency. Validation is done inline in `Khazad.__init__`.
|
|
160
|
+
Do not reintroduce pydantic.
|
|
161
|
+
|
|
162
|
+
### No Separate Engine Class
|
|
163
|
+
|
|
164
|
+
All cache logic (lookup, store, stats, key generation) lives in the `Khazad` class.
|
|
165
|
+
Do not create a separate `CacheEngine` or orchestrator class.
|
|
166
|
+
|
|
167
|
+
### Python 3.10 Compatibility
|
|
168
|
+
|
|
169
|
+
`requires-python = ">=3.10"` (ruff target py310). No 3.11+ stdlib APIs (`tomllib`,
|
|
170
|
+
`StrEnum`, `asyncio.timeout`, exception groups). Every module starts with
|
|
171
|
+
`from __future__ import annotations`. Dev venv is pinned to 3.13 via `.python-version`.
|
|
172
|
+
|
|
173
|
+
## Testing Notes
|
|
174
|
+
|
|
175
|
+
- **Unit tests** use `FakeEmbedder` and `InMemoryVectorStore` from `tests/conftest.py` — no external services needed
|
|
176
|
+
- **Integration tests** use `httpx.MockTransport` to simulate LLM APIs — no real API keys needed
|
|
177
|
+
- **Redis store tests** (`test_redis_store.py`) mock the redis-py client with plain `Mock`/`MagicMock` (the store is sync — never use `AsyncMock` there)
|
|
178
|
+
- **Stress tests** are marked with `@pytest.mark.stress`
|
|
179
|
+
- `pytest-asyncio` runs in auto mode (`asyncio_mode = "auto"`)
|
|
180
|
+
- `conftest.py` provides fixtures for all provider request/response bodies
|
|
181
|
+
- `FakeEmbedder` hashes with sha256 (deterministic across processes — `hash()` is salted)
|
|
182
|
+
|
|
183
|
+
## Project Structure
|
|
184
|
+
|
|
185
|
+
```text
|
|
186
|
+
khazad/
|
|
187
|
+
├── __init__.py # Public API + module-level singleton (init/stop/get_stats/flush)
|
|
188
|
+
├── khazad.py # Khazad class + PreparedRequest — all cache logic
|
|
189
|
+
├── _models.py # Domain models: ParsedRequest, CacheHit, Stats
|
|
190
|
+
├── _transport.py # httpx patch, cached transports, tee/replay streams
|
|
191
|
+
├── ports/ # Abstract interfaces (Hexagonal Architecture boundaries)
|
|
192
|
+
│ ├── embedder.py # Embedder ABC (embed, dimension)
|
|
193
|
+
│ ├── parser.py # ProviderParser ABC + shared SSE/text helpers
|
|
194
|
+
│ └── store.py # VectorStore ABC (scope-aware search/store/delete)
|
|
195
|
+
└── adapters/ # Concrete implementations
|
|
196
|
+
├── embedders/
|
|
197
|
+
│ ├── huggingface.py # HuggingFaceEmbedder (sentence-transformers, free)
|
|
198
|
+
│ └── openai.py # OpenAIEmbedder (OpenAI API, paid)
|
|
199
|
+
├── parsers/
|
|
200
|
+
│ ├── openai.py # OpenAI Chat Completions (+ Azure, proxies)
|
|
201
|
+
│ ├── openai_responses.py # OpenAI Responses API
|
|
202
|
+
│ ├── anthropic.py # Anthropic Messages
|
|
203
|
+
│ └── gemini.py # Google Gemini
|
|
204
|
+
└── redis/
|
|
205
|
+
└── store.py # RedisVectorStore (Redis 8 Vector Sets)
|
|
206
|
+
|
|
207
|
+
tests/
|
|
208
|
+
├── conftest.py # FakeEmbedder, InMemoryVectorStore, provider fixtures
|
|
209
|
+
├── unit/ # Pure logic tests (no I/O)
|
|
210
|
+
│ ├── test_config.py # Khazad init validation
|
|
211
|
+
│ ├── test_engine.py # prepare/lookup/store, scoping, stats, embedding reuse
|
|
212
|
+
│ ├── test_models.py # Stats, CacheHit
|
|
213
|
+
│ └── test_parsers/ # Per-provider parser tests (incl. SSE round-trips)
|
|
214
|
+
├── integration/ # Full lifecycle with mock transports
|
|
215
|
+
│ ├── test_end_to_end.py
|
|
216
|
+
│ ├── test_interceptor.py
|
|
217
|
+
│ ├── test_redis_store.py # Mocked redis-py client
|
|
218
|
+
│ ├── test_streaming.py # SSE capture + replay, sync & async
|
|
219
|
+
│ ├── test_provider_openai.py # OpenAI Chat + Responses
|
|
220
|
+
│ ├── test_provider_azure_openai.py # Azure deployments
|
|
221
|
+
│ ├── test_provider_openai_compat.py # LiteLLM, vLLM, Ollama
|
|
222
|
+
│ ├── test_provider_anthropic.py # Anthropic Claude
|
|
223
|
+
│ └── test_provider_gemini.py # Google Gemini
|
|
224
|
+
└── stress/ # Concurrent access, thread safety
|
|
225
|
+
└── test_concurrent.py
|
|
226
|
+
|
|
227
|
+
examples/
|
|
228
|
+
└── azure_openai.py # Smoke test against a real endpoint
|
|
229
|
+
|
|
230
|
+
docs/
|
|
231
|
+
└── _static/ # Logos (logo-light.png / logo-dark.png are transparent)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Supported Providers
|
|
235
|
+
|
|
236
|
+
| Provider | Parser | URL pattern matched |
|
|
237
|
+
|---|---|---|
|
|
238
|
+
| OpenAI Chat | `OpenAIParser` | any path ending `/chat/completions` |
|
|
239
|
+
| OpenAI Responses | `OpenAIResponsesParser` | any path ending `/responses` |
|
|
240
|
+
| Azure OpenAI | (covered by `OpenAIParser`) | any path ending `/chat/completions` |
|
|
241
|
+
| OpenAI-compat proxies | (covered by `OpenAIParser`) | any path ending `/chat/completions` |
|
|
242
|
+
| Anthropic | `AnthropicParser` | `api.anthropic.com/v1/messages` |
|
|
243
|
+
| Google Gemini | `GeminiParser` | `generativelanguage.googleapis.com/*:generateContent` |
|
|
244
|
+
|
|
245
|
+
## Cache Flow
|
|
246
|
+
|
|
247
|
+
```text
|
|
248
|
+
Request → prepare(request)
|
|
249
|
+
None → pass-through to real API (not counted in stats)
|
|
250
|
+
PreparedRequest → embed(conversation) → VSIM in scope {host}/{model}
|
|
251
|
+
HIT → replay cached JSON (or synthesize SSE stream)
|
|
252
|
+
MISS → forward to API
|
|
253
|
+
non-streaming 200 → store JSON body
|
|
254
|
+
SSE 200 → tee stream to client, reconstruct JSON at end, store
|
|
255
|
+
```
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Contributing to Khazad
|
|
2
|
+
|
|
3
|
+
Thanks for considering a contribution! This document covers everything you need to get a change merged.
|
|
4
|
+
|
|
5
|
+
## Development setup
|
|
6
|
+
|
|
7
|
+
Requirements: Python >= 3.10 and [uv](https://docs.astral.sh/uv/).
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/GuglielmoCerri/khazad.git
|
|
11
|
+
cd khazad
|
|
12
|
+
uv sync --group dev
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
`uv sync` creates `.venv` and installs the project in editable mode — nothing else to do.
|
|
16
|
+
|
|
17
|
+
A running Redis is **not** required for the test suite (fakes and mock transports are used everywhere). For manual end-to-end testing against a real instance:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
docker run -d --name redis8 -p 6379:6379 redis:8
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Running tests
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
uv run python -m pytest tests/ -q # full suite
|
|
27
|
+
uv run python -m pytest tests/unit/ -q # unit tests only
|
|
28
|
+
uv run python -m pytest -m "not stress" # skip stress tests
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
The suite must be green before any PR. New behavior needs new tests:
|
|
32
|
+
|
|
33
|
+
- **Parsers** → `tests/unit/test_parsers/` (include an SSE round-trip test if the parser supports streaming: `stream_chunks` → `response_from_stream` must preserve content)
|
|
34
|
+
- **Cache logic** → `tests/unit/test_engine.py`
|
|
35
|
+
- **Transport / interception** → `tests/integration/`
|
|
36
|
+
|
|
37
|
+
## Lint and format
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
uv run python -m ruff check . --fix
|
|
41
|
+
uv run python -m ruff format .
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
CI rejects unformatted code. Configuration lives in `pyproject.toml` (line length 99, target py310).
|
|
45
|
+
|
|
46
|
+
## Architecture ground rules
|
|
47
|
+
|
|
48
|
+
Read `CLAUDE.md` for the full picture. The non-negotiables:
|
|
49
|
+
|
|
50
|
+
1. **One entry point.** All cache logic lives in the `Khazad` class — do not introduce a separate engine, orchestrator, or config object.
|
|
51
|
+
2. **No pydantic.** Validation is inline in `Khazad.__init__`. Plain dataclasses for models.
|
|
52
|
+
3. **Ports & Adapters.** New providers implement `ProviderParser` (`khazad/ports/parser.py`); new storage backends implement `VectorStore`; new embedders implement `Embedder`. Adapters never import other adapters.
|
|
53
|
+
4. **Parse once.** A request body is JSON-parsed exactly once (`parse_request`); the embedding is computed at most once per request. Don't add code paths that re-parse or re-embed.
|
|
54
|
+
5. **The cache stores canonical JSON only.** Streamed responses must be reconstructed via `response_from_stream` before storing — never cache raw SSE bytes.
|
|
55
|
+
6. **Python 3.10 compatibility.** No 3.11+ stdlib APIs (`tomllib`, `StrEnum`, `asyncio.timeout`, exception groups...). Use `from __future__ import annotations` in every module.
|
|
56
|
+
|
|
57
|
+
## Adding a new provider parser
|
|
58
|
+
|
|
59
|
+
1. Create `khazad/adapters/parsers/<provider>.py` implementing `ProviderParser`:
|
|
60
|
+
- `can_handle(url)` — match by URL path suffix when the API is host-agnostic (proxies!), by host only when the schema is unique to one vendor.
|
|
61
|
+
- `parse_request(request)` — return a `ParsedRequest(prompt, model, stream)`. The prompt must include the **full conversation** (`role: text` lines), and raise `ValueError` for bodies you can't understand.
|
|
62
|
+
- Override `stream_chunks` / `response_from_stream` only if the provider streams over SSE.
|
|
63
|
+
2. Register it in the `_parsers` list in `khazad/khazad.py`.
|
|
64
|
+
3. Add request/response fixtures in `tests/conftest.py`, unit tests in `tests/unit/test_parsers/`, and an interception test in `tests/integration/`.
|
|
65
|
+
4. Document the URL pattern in the README "Supported Providers" table.
|
|
66
|
+
|
|
67
|
+
## Pull requests
|
|
68
|
+
|
|
69
|
+
- Branch from `main`; one logical change per PR.
|
|
70
|
+
- Subject line in imperative mood ("Add Mistral parser", not "Added...").
|
|
71
|
+
- Explain *why* in the body if it isn't obvious from the diff.
|
|
72
|
+
- Update `CHANGELOG.md` under `[Unreleased]`.
|
|
73
|
+
- Don't bump the version — maintainers handle releases.
|
|
74
|
+
|
|
75
|
+
## Reporting bugs
|
|
76
|
+
|
|
77
|
+
Open an issue with: Python version, khazad version, the SDK and provider you're calling, a minimal reproduction, and (if relevant) `log_level="DEBUG"` output. For suspected cache-correctness issues, include the two prompts involved and your `threshold`.
|
|
78
|
+
|
|
79
|
+
## Security
|
|
80
|
+
|
|
81
|
+
Don't open public issues for security problems — see the contact in `pyproject.toml`.
|
khazad-0.1.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Guglielmo Cerri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|