khazad 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. khazad-0.1.2/.gitignore +207 -0
  2. khazad-0.1.2/CHANGELOG.md +55 -0
  3. khazad-0.1.2/CLAUDE.md +255 -0
  4. khazad-0.1.2/CONTRIBUTING.md +81 -0
  5. khazad-0.1.2/LICENSE +21 -0
  6. khazad-0.1.2/PKG-INFO +443 -0
  7. khazad-0.1.2/README.md +412 -0
  8. khazad-0.1.2/docs/_static/flow.svg +2 -0
  9. khazad-0.1.2/docs/_static/logo-dark.png +0 -0
  10. khazad-0.1.2/docs/_static/logo-light.png +0 -0
  11. khazad-0.1.2/docs/_static/run_example.png +0 -0
  12. khazad-0.1.2/examples/anthropic.py +31 -0
  13. khazad-0.1.2/examples/anthropic_scope.py +46 -0
  14. khazad-0.1.2/examples/anthropic_system.py +50 -0
  15. khazad-0.1.2/examples/azure_openai.py +47 -0
  16. khazad-0.1.2/examples/azure_openai_entra_stream.py +52 -0
  17. khazad-0.1.2/examples/gemini.py +21 -0
  18. khazad-0.1.2/examples/gemini_hosts.py +37 -0
  19. khazad-0.1.2/examples/gemini_stop.py +43 -0
  20. khazad-0.1.2/examples/openai.py +30 -0
  21. khazad-0.1.2/examples/openai_ollama.py +32 -0
  22. khazad-0.1.2/khazad/__init__.py +109 -0
  23. khazad-0.1.2/khazad/_models.py +81 -0
  24. khazad-0.1.2/khazad/_transport.py +290 -0
  25. khazad-0.1.2/khazad/adapters/__init__.py +0 -0
  26. khazad-0.1.2/khazad/adapters/embedders/__init__.py +0 -0
  27. khazad-0.1.2/khazad/adapters/embedders/huggingface.py +41 -0
  28. khazad-0.1.2/khazad/adapters/embedders/openai.py +50 -0
  29. khazad-0.1.2/khazad/adapters/parsers/__init__.py +0 -0
  30. khazad-0.1.2/khazad/adapters/parsers/anthropic.py +122 -0
  31. khazad-0.1.2/khazad/adapters/parsers/gemini.py +50 -0
  32. khazad-0.1.2/khazad/adapters/parsers/openai.py +125 -0
  33. khazad-0.1.2/khazad/adapters/parsers/openai_responses.py +168 -0
  34. khazad-0.1.2/khazad/adapters/redis/__init__.py +0 -0
  35. khazad-0.1.2/khazad/adapters/redis/store.py +142 -0
  36. khazad-0.1.2/khazad/khazad.py +290 -0
  37. khazad-0.1.2/khazad/ports/__init__.py +7 -0
  38. khazad-0.1.2/khazad/ports/embedder.py +18 -0
  39. khazad-0.1.2/khazad/ports/parser.py +91 -0
  40. khazad-0.1.2/khazad/ports/store.py +50 -0
  41. khazad-0.1.2/khazad/py.typed +0 -0
  42. khazad-0.1.2/pyproject.toml +117 -0
  43. khazad-0.1.2/tests/__init__.py +0 -0
  44. khazad-0.1.2/tests/conftest.py +309 -0
  45. khazad-0.1.2/tests/integration/__init__.py +0 -0
  46. khazad-0.1.2/tests/integration/test_end_to_end.py +204 -0
  47. khazad-0.1.2/tests/integration/test_interceptor.py +347 -0
  48. khazad-0.1.2/tests/integration/test_provider_anthropic.py +109 -0
  49. khazad-0.1.2/tests/integration/test_provider_azure_openai.py +97 -0
  50. khazad-0.1.2/tests/integration/test_provider_gemini.py +87 -0
  51. khazad-0.1.2/tests/integration/test_provider_openai.py +117 -0
  52. khazad-0.1.2/tests/integration/test_provider_openai_compat.py +118 -0
  53. khazad-0.1.2/tests/integration/test_redis_store.py +128 -0
  54. khazad-0.1.2/tests/integration/test_streaming.py +279 -0
  55. khazad-0.1.2/tests/stress/__init__.py +0 -0
  56. khazad-0.1.2/tests/stress/test_concurrent.py +137 -0
  57. khazad-0.1.2/tests/unit/__init__.py +0 -0
  58. khazad-0.1.2/tests/unit/test_config.py +108 -0
  59. khazad-0.1.2/tests/unit/test_engine.py +287 -0
  60. khazad-0.1.2/tests/unit/test_models.py +69 -0
  61. khazad-0.1.2/tests/unit/test_parsers/__init__.py +0 -0
  62. khazad-0.1.2/tests/unit/test_parsers/test_anthropic_parser.py +157 -0
  63. khazad-0.1.2/tests/unit/test_parsers/test_gemini_parser.py +94 -0
  64. khazad-0.1.2/tests/unit/test_parsers/test_openai_parser.py +191 -0
  65. khazad-0.1.2/tests/unit/test_parsers/test_openai_responses_parser.py +163 -0
  66. khazad-0.1.2/uv.lock +2514 -0
@@ -0,0 +1,207 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
@@ -0,0 +1,55 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.2] - 2026-06-29
9
+
10
+ ### Added
11
+
12
+ - `examples` dependency group in `pyproject.toml` (`OpenAI`, `Anthropic`, `google-genai`, `azure-identity`) so the provider example scripts run with `uv run --group examples`.
13
+ - Extended and expanded the per-provider example scripts.
14
+
15
+ ### Changed
16
+
17
+ - README images now use absolute `raw.githubusercontent.com` URLs so they render on the PyPI project page.
18
+
19
+ ## [0.1.1] - 2026-06-24
20
+
21
+ ### Changed
22
+
23
+ - Replaced the `shared_models` parameter with `cache_scope` (a `CacheScope` enum — `MODEL` by default, `HOST` to opt in) to control cache partitioning.
24
+ - Rewrote the README and added a flow diagram illustrating the request lifecycle.
25
+
26
+ ### Added
27
+
28
+ - Standalone example scripts for each supported provider, including streaming usage.
29
+
30
+ ### Fixed
31
+
32
+ - Streaming cache misses are now correctly tee'd, reconstructed into canonical JSON at stream end, and cached.
33
+
34
+ ## [0.1.0] - 2026-06-13
35
+
36
+ First public release.
37
+
38
+ ### Added
39
+
40
+ - Transparent semantic cache for LLM API calls via `httpx` transport patching — zero changes to application code.
41
+ - Module-level singleton API (`khazad.init()` / `stop()` / `get_stats()` / `flush()` / `is_active()`) and explicit `Khazad` class with the same surface.
42
+ - Redis 8 Vector Sets backend (`VADD` / `VSIM`), one vector set per `(provider host, model)` scope so different models never cross-serve.
43
+ - Provider parsers: OpenAI Chat Completions (incl. Azure OpenAI and any OpenAI-compatible proxy), OpenAI Responses API, Anthropic Messages, Google Gemini `generateContent`.
44
+ - Conversation-aware matching: the full message list (system, user, assistant) is embedded, not just the last user turn.
45
+ - Streaming support both ways:
46
+ - cache hits replay as SSE streams for sync and async clients;
47
+ - streaming cache misses are tee'd to the client with no added latency, reconstructed into canonical JSON at stream end, and cached (aborted streams are never cached).
48
+ - Embedding backends: HuggingFace `sentence-transformers` (default, local) and OpenAI Embeddings (optional extra `khazad[openai-embeddings]`).
49
+ - Configurable similarity threshold, TTL with automatic pruning of orphaned vectors, Redis key namespace, log level.
50
+ - Thread-safe hit/miss statistics (`total_requests`, `cache_hits`, `cache_misses`, `hit_rate`, `avg_hit_similarity`).
51
+ - `hosts` opt-in allowlist (exact hosts and `*.` wildcard subdomains) — restricts interception to explicitly listed endpoints.
52
+
53
+ [0.1.2]: https://github.com/GuglielmoCerri/khazad/releases/tag/v0.1.2
54
+ [0.1.1]: https://github.com/GuglielmoCerri/khazad/releases/tag/v0.1.1
55
+ [0.1.0]: https://github.com/GuglielmoCerri/khazad/releases/tag/v0.1.0
khazad-0.1.2/CLAUDE.md ADDED
@@ -0,0 +1,255 @@
1
+ # CLAUDE.md - Khazad Project Context
2
+
3
+ ## Frequently Used Commands
4
+
5
+ ```bash
6
+ # Setup
7
+ uv sync --group dev # Install all dependencies (creates .venv automatically)
8
+
9
+ # Testing (no Redis or API keys needed — fakes and mock transports)
10
+ uv run python -m pytest tests/ -q # Full suite
11
+ uv run python -m pytest tests/unit/ -q # Unit tests only
12
+ uv run python -m pytest -m "not stress" # Skip stress tests
13
+
14
+ # Lint / format
15
+ uv run python -m ruff check . --fix # Lint with auto-fix
16
+ uv run python -m ruff format . # Format code
17
+
18
+ # Quick smoke test (requires Redis 8 + endpoint credentials)
19
+ uv run --group examples python -P examples/azure_openai.py
20
+ ```
21
+
22
+ ## Important Architectural Patterns
23
+
24
+ ### Single Entry Point — `Khazad` Class
25
+
26
+ Everything goes through one class. There is no separate engine, config model, or orchestrator.
27
+ `Khazad` owns the embedder, vector store, parsers, stats, and cache logic directly.
28
+
29
+ ```python
30
+ from khazad import Khazad
31
+
32
+ cache = Khazad(redis_url="redis://localhost:6379", threshold=0.92)
33
+ # ... all LLM HTTP traffic is now cached ...
34
+ cache.stop()
35
+ ```
36
+
37
+ A module-level singleton API (`khazad.init()` / `khazad.stop()`) wraps `Khazad` for convenience.
38
+
39
+ ### Request lifecycle: prepare → lookup → store
40
+
41
+ The transport calls `Khazad.prepare(request)` exactly once per request. It returns a
42
+ `PreparedRequest` (parser, prompt, scope, stream flag) or `None` for pass-through.
43
+ The request body is JSON-parsed **once**; the embedding is computed lazily and memoized
44
+ on the `PreparedRequest`, so a miss that later stores its response never re-embeds.
45
+
46
+ - `prepare(request) -> PreparedRequest | None` — parser matching + body parsing
47
+ - `lookup(prepared) -> CacheHit | None` — embed, VSIM search, stats
48
+ - `store(prepared, response_bytes)` — reuses the memoized embedding
49
+
50
+ `prepare()` also applies the opt-in `hosts=[...]` allowlist (exact match or `*.suffix`
51
+ wildcard, case-insensitive) — non-allowed hosts pass through untouched.
52
+
53
+ A temperature-based gate (`cache_only_deterministic`) was evaluated and deliberately
54
+ **rejected**: GPT-5-family and o-series models hard-reject any `temperature` other than
55
+ the default 1.0 (400 error), so gating on `temperature=0` would make flagship models
56
+ permanently uncacheable. Do not reintroduce it.
57
+
58
+ Unparseable, unmatched, or non-allowlisted requests are **not counted** in stats.
59
+
60
+ ### Cache scope: host + model
61
+
62
+ `scope = f"{host}/{model or 'default'}"`. Each scope gets its own Redis vector set,
63
+ so the same prompt sent to `gpt-4o` and `gpt-4o-mini` can never cross-serve.
64
+ The prompt text embedded is the **full conversation** (`role: text` lines, including
65
+ system), not just the last user message — prevents multi-turn collisions.
66
+
67
+ The opt-in `cache_scope` parameter (a `CacheScope` enum — `MODEL` by default, `HOST`
68
+ to opt in; keyword-only on `Khazad`, also on `khazad.init`) controls this. Pass
69
+ `cache_scope=CacheScope.HOST` (or the string `"host"`) to collapse the scope to `host`
70
+ only, so every model/deployment on the same provider shares one vector set. The host
71
+ always stays in the scope, so different providers (Azure OpenAI vs Gemini) remain
72
+ isolated and a response is never replayed to a client expecting a different wire
73
+ format. Use it only for format-compatible pools (e.g. several Azure OpenAI
74
+ deployments, or `gpt-4o` + `gpt-4o-mini`).
75
+
76
+ ### Hexagonal Architecture (Ports & Adapters)
77
+
78
+ - **Ports** (`khazad/ports/`) — abstract interfaces: `Embedder`, `ProviderParser`, `VectorStore`
79
+ - **Adapters** (`khazad/adapters/`) — concrete implementations (Redis, HuggingFace, OpenAI, parsers)
80
+ - `ProviderParser` is an ABC with shared concrete helpers (`build_response`, `_sse`,
81
+ `_iter_sse_payloads`, `_flatten_text`) — subclasses implement `can_handle` and
82
+ `parse_request`, and optionally override `stream_chunks` / `response_from_stream`.
83
+ - There is **no Azure parser** — Azure OpenAI is covered by `OpenAIParser`'s
84
+ path-suffix matching (`/chat/completions`).
85
+
86
+ ### httpx Transport Monkey-Patching
87
+
88
+ Khazad intercepts LLM traffic by patching `httpx.Client.__init__` and `httpx.AsyncClient.__init__`
89
+ to wrap their transports (`khazad/_transport.py`, `install(cache)` / `uninstall()`).
90
+
91
+ `install()` is **idempotent for the original-init capture**: only the first call records the
92
+ pristine `httpx.*Client.__init__` references. Subsequent calls swap the active cache without
93
+ overwriting the originals, so `uninstall()` always restores real httpx.
94
+
95
+ Transports check `cache.is_active()` on every request — clients created while the patch
96
+ was installed stop serving from cache immediately after `stop()`.
97
+
98
+ ### Streaming
99
+
100
+ - **Hit**: `parser.stream_chunks(cached_json)` is a *sync* generator of SSE frames;
101
+ `_ReplayStream` implements both `SyncByteStream` and `AsyncByteStream`, so sync and
102
+ async clients both replay correctly.
103
+ - **Miss**: the upstream SSE body is tee'd through `_SyncTeeStream` / `_AsyncTeeStream`
104
+ with zero added latency. The collected bytes are passed to `parser.response_from_stream(sse)`
105
+ when the stream ends — on natural exhaustion **or** on `close()`/`aclose()`. The latter
106
+ matters because SDKs (e.g. the OpenAI client) break their read loop on the terminal SSE
107
+ sentinel and close the response without driving the byte stream to EOF, so caching on
108
+ natural exhaustion alone would never fire. `response_from_stream` reconstructs the
109
+ **canonical JSON response** only when the capture is complete (OpenAI Chat requires the
110
+ `[DONE]` sentinel, Anthropic requires `message_stop`, Responses requires
111
+ `response.completed`); a partial/aborted stream reconstructs to `None` and is never cached.
112
+ - Compressed SSE bodies (`content-encoding != identity`) are passed through uncached.
113
+ - Gemini streaming (`:streamGenerateContent`) is not matched at all — pass-through.
114
+
115
+ ### Redis adapter (`khazad/adapters/redis/store.py`)
116
+
117
+ - One vector set per scope: `{namespace}:vset:{scope}`; bodies at `{namespace}:resp:{key}`.
118
+ - `store()` pipelines `VADD` + `SET ex=ttl` (single round-trip).
119
+ - VSIM workaround: redis-py's `parse_vsim_result` callback misparses RESP3 dict responses
120
+ when the WITHSCORES option flag isn't propagated (found in 8.0.0b2, callback unchanged
121
+ in 8.0.0 GA — dependency is now `redis>=8.0.0,<9`). `search()` issues a raw
122
+ `execute_command("VSIM", ...)` and `_parse_vsim_response` handles both RESP3 dict and
123
+ RESP2 flat-list shapes.
124
+ - TTL: only the response body expires. `Khazad.lookup` prunes the orphaned vector
125
+ (`store.delete(scope, key)`) when the body is gone, then counts a miss.
126
+
127
+ ### Testing with Dependency Injection
128
+
129
+ For tests, `Khazad` accepts `_vector_store` and `_embedder_instance` keyword args (both or
130
+ neither) to bypass Redis and real embedding models:
131
+
132
+ ```python
133
+ cache = Khazad(
134
+ threshold=0.99,
135
+ _vector_store=InMemoryVectorStore(),
136
+ _embedder_instance=FakeEmbedder(),
137
+ )
138
+ ```
139
+
140
+ This skips Redis connection and transport patching entirely (tests call
141
+ `install(cache)` / `uninstall()` themselves).
142
+
143
+ ## Critical Rules
144
+
145
+ ### Git Operations
146
+
147
+ **CRITICAL**: NEVER use `git push` or attempt to push to remote repositories. The user will handle all git push operations.
148
+
149
+ ### Code Quality
150
+
151
+ **IMPORTANT**: Always run `uv run python -m ruff check . --fix && uv run python -m ruff format .` before committing.
152
+
153
+ ### README.md Maintenance
154
+
155
+ **IMPORTANT**: DO NOT modify README.md unless explicitly requested.
156
+
157
+ ### No Pydantic
158
+
159
+ The project deliberately removed `pydantic` as a dependency. Validation is done inline in `Khazad.__init__`.
160
+ Do not reintroduce pydantic.
161
+
162
+ ### No Separate Engine Class
163
+
164
+ All cache logic (lookup, store, stats, key generation) lives in the `Khazad` class.
165
+ Do not create a separate `CacheEngine` or orchestrator class.
166
+
167
+ ### Python 3.10 Compatibility
168
+
169
+ `requires-python = ">=3.10"` (ruff target py310). No 3.11+ stdlib APIs (`tomllib`,
170
+ `StrEnum`, `asyncio.timeout`, exception groups). Every module starts with
171
+ `from __future__ import annotations`. Dev venv is pinned to 3.13 via `.python-version`.
172
+
173
+ ## Testing Notes
174
+
175
+ - **Unit tests** use `FakeEmbedder` and `InMemoryVectorStore` from `tests/conftest.py` — no external services needed
176
+ - **Integration tests** use `httpx.MockTransport` to simulate LLM APIs — no real API keys needed
177
+ - **Redis store tests** (`test_redis_store.py`) mock the redis-py client with plain `Mock`/`MagicMock` (the store is sync — never use `AsyncMock` there)
178
+ - **Stress tests** are marked with `@pytest.mark.stress`
179
+ - `pytest-asyncio` runs in auto mode (`asyncio_mode = "auto"`)
180
+ - `conftest.py` provides fixtures for all provider request/response bodies
181
+ - `FakeEmbedder` hashes with sha256 (deterministic across processes — `hash()` is salted)
182
+
183
+ ## Project Structure
184
+
185
+ ```text
186
+ khazad/
187
+ ├── __init__.py # Public API + module-level singleton (init/stop/get_stats/flush)
188
+ ├── khazad.py # Khazad class + PreparedRequest — all cache logic
189
+ ├── _models.py # Domain models: ParsedRequest, CacheHit, Stats
190
+ ├── _transport.py # httpx patch, cached transports, tee/replay streams
191
+ ├── ports/ # Abstract interfaces (Hexagonal Architecture boundaries)
192
+ │ ├── embedder.py # Embedder ABC (embed, dimension)
193
+ │ ├── parser.py # ProviderParser ABC + shared SSE/text helpers
194
+ │ └── store.py # VectorStore ABC (scope-aware search/store/delete)
195
+ └── adapters/ # Concrete implementations
196
+ ├── embedders/
197
+ │ ├── huggingface.py # HuggingFaceEmbedder (sentence-transformers, free)
198
+ │ └── openai.py # OpenAIEmbedder (OpenAI API, paid)
199
+ ├── parsers/
200
+ │ ├── openai.py # OpenAI Chat Completions (+ Azure, proxies)
201
+ │ ├── openai_responses.py # OpenAI Responses API
202
+ │ ├── anthropic.py # Anthropic Messages
203
+ │ └── gemini.py # Google Gemini
204
+ └── redis/
205
+ └── store.py # RedisVectorStore (Redis 8 Vector Sets)
206
+
207
+ tests/
208
+ ├── conftest.py # FakeEmbedder, InMemoryVectorStore, provider fixtures
209
+ ├── unit/ # Pure logic tests (no I/O)
210
+ │ ├── test_config.py # Khazad init validation
211
+ │ ├── test_engine.py # prepare/lookup/store, scoping, stats, embedding reuse
212
+ │ ├── test_models.py # Stats, CacheHit
213
+ │ └── test_parsers/ # Per-provider parser tests (incl. SSE round-trips)
214
+ ├── integration/ # Full lifecycle with mock transports
215
+ │ ├── test_end_to_end.py
216
+ │ ├── test_interceptor.py
217
+ │ ├── test_redis_store.py # Mocked redis-py client
218
+ │ ├── test_streaming.py # SSE capture + replay, sync & async
219
+ │ ├── test_provider_openai.py # OpenAI Chat + Responses
220
+ │ ├── test_provider_azure_openai.py # Azure deployments
221
+ │ ├── test_provider_openai_compat.py # LiteLLM, vLLM, Ollama
222
+ │ ├── test_provider_anthropic.py # Anthropic Claude
223
+ │ └── test_provider_gemini.py # Google Gemini
224
+ └── stress/ # Concurrent access, thread safety
225
+ └── test_concurrent.py
226
+
227
+ examples/
228
+ └── azure_openai.py # Smoke test against a real endpoint
229
+
230
+ docs/
231
+ └── _static/ # Logos (logo-light.png / logo-dark.png are transparent)
232
+ ```
233
+
234
+ ## Supported Providers
235
+
236
+ | Provider | Parser | URL pattern matched |
237
+ |---|---|---|
238
+ | OpenAI Chat | `OpenAIParser` | any path ending `/chat/completions` |
239
+ | OpenAI Responses | `OpenAIResponsesParser` | any path ending `/responses` |
240
+ | Azure OpenAI | (covered by `OpenAIParser`) | any path ending `/chat/completions` |
241
+ | OpenAI-compat proxies | (covered by `OpenAIParser`) | any path ending `/chat/completions` |
242
+ | Anthropic | `AnthropicParser` | `api.anthropic.com/v1/messages` |
243
+ | Google Gemini | `GeminiParser` | `generativelanguage.googleapis.com/*:generateContent` |
244
+
245
+ ## Cache Flow
246
+
247
+ ```text
248
+ Request → prepare(request)
249
+ None → pass-through to real API (not counted in stats)
250
+ PreparedRequest → embed(conversation) → VSIM in scope {host}/{model}
251
+ HIT → replay cached JSON (or synthesize SSE stream)
252
+ MISS → forward to API
253
+ non-streaming 200 → store JSON body
254
+ SSE 200 → tee stream to client, reconstruct JSON at end, store
255
+ ```
@@ -0,0 +1,81 @@
1
+ # Contributing to Khazad
2
+
3
+ Thanks for considering a contribution! This document covers everything you need to get a change merged.
4
+
5
+ ## Development setup
6
+
7
+ Requirements: Python >= 3.10 and [uv](https://docs.astral.sh/uv/).
8
+
9
+ ```bash
10
+ git clone https://github.com/GuglielmoCerri/khazad.git
11
+ cd khazad
12
+ uv sync --group dev
13
+ ```
14
+
15
+ `uv sync` creates `.venv` and installs the project in editable mode — nothing else to do.
16
+
17
+ A running Redis is **not** required for the test suite (fakes and mock transports are used everywhere). For manual end-to-end testing against a real instance:
18
+
19
+ ```bash
20
+ docker run -d --name redis8 -p 6379:6379 redis:8
21
+ ```
22
+
23
+ ## Running tests
24
+
25
+ ```bash
26
+ uv run python -m pytest tests/ -q # full suite
27
+ uv run python -m pytest tests/unit/ -q # unit tests only
28
+ uv run python -m pytest -m "not stress" # skip stress tests
29
+ ```
30
+
31
+ The suite must be green before any PR. New behavior needs new tests:
32
+
33
+ - **Parsers** → `tests/unit/test_parsers/` (include an SSE round-trip test if the parser supports streaming: `stream_chunks` → `response_from_stream` must preserve content)
34
+ - **Cache logic** → `tests/unit/test_engine.py`
35
+ - **Transport / interception** → `tests/integration/`
36
+
37
+ ## Lint and format
38
+
39
+ ```bash
40
+ uv run python -m ruff check . --fix
41
+ uv run python -m ruff format .
42
+ ```
43
+
44
+ CI rejects unformatted code. Configuration lives in `pyproject.toml` (line length 99, target py310).
45
+
46
+ ## Architecture ground rules
47
+
48
+ Read `CLAUDE.md` for the full picture. The non-negotiables:
49
+
50
+ 1. **One entry point.** All cache logic lives in the `Khazad` class — do not introduce a separate engine, orchestrator, or config object.
51
+ 2. **No pydantic.** Validation is inline in `Khazad.__init__`. Plain dataclasses for models.
52
+ 3. **Ports & Adapters.** New providers implement `ProviderParser` (`khazad/ports/parser.py`); new storage backends implement `VectorStore`; new embedders implement `Embedder`. Adapters never import other adapters.
53
+ 4. **Parse once.** A request body is JSON-parsed exactly once (`parse_request`); the embedding is computed at most once per request. Don't add code paths that re-parse or re-embed.
54
+ 5. **The cache stores canonical JSON only.** Streamed responses must be reconstructed via `response_from_stream` before storing — never cache raw SSE bytes.
55
+ 6. **Python 3.10 compatibility.** No 3.11+ stdlib APIs (`tomllib`, `StrEnum`, `asyncio.timeout`, exception groups...). Use `from __future__ import annotations` in every module.
56
+
57
+ ## Adding a new provider parser
58
+
59
+ 1. Create `khazad/adapters/parsers/<provider>.py` implementing `ProviderParser`:
60
+ - `can_handle(url)` — match by URL path suffix when the API is host-agnostic (proxies!), by host only when the schema is unique to one vendor.
61
+ - `parse_request(request)` — return a `ParsedRequest(prompt, model, stream)`. The prompt must include the **full conversation** (`role: text` lines), and raise `ValueError` for bodies you can't understand.
62
+ - Override `stream_chunks` / `response_from_stream` only if the provider streams over SSE.
63
+ 2. Register it in the `_parsers` list in `khazad/khazad.py`.
64
+ 3. Add request/response fixtures in `tests/conftest.py`, unit tests in `tests/unit/test_parsers/`, and an interception test in `tests/integration/`.
65
+ 4. Document the URL pattern in the README "Supported Providers" table.
66
+
67
+ ## Pull requests
68
+
69
+ - Branch from `main`; one logical change per PR.
70
+ - Subject line in imperative mood ("Add Mistral parser", not "Added...").
71
+ - Explain *why* in the body if it isn't obvious from the diff.
72
+ - Update `CHANGELOG.md` under `[Unreleased]`.
73
+ - Don't bump the version — maintainers handle releases.
74
+
75
+ ## Reporting bugs
76
+
77
+ Open an issue with: Python version, khazad version, the SDK and provider you're calling, a minimal reproduction, and (if relevant) `log_level="DEBUG"` output. For suspected cache-correctness issues, include the two prompts involved and your `threshold`.
78
+
79
+ ## Security
80
+
81
+ Don't open public issues for security problems — see the contact in `pyproject.toml`.
khazad-0.1.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Guglielmo Cerri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.