bijotel 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bijotel-1.0.0/.audit_tmp/01_STATE.md +536 -0
- bijotel-1.0.0/.audit_tmp/03_GAPS.md +104 -0
- bijotel-1.0.0/.audit_tmp/04_NEXT.md +92 -0
- bijotel-1.0.0/.dockerignore +36 -0
- bijotel-1.0.0/.env.bak.20260520-aig +1 -0
- bijotel-1.0.0/.env.example +8 -0
- bijotel-1.0.0/.env.example.bak.20260520-aig +4 -0
- bijotel-1.0.0/.github/workflows/ci.yml +25 -0
- bijotel-1.0.0/.gitignore +23 -0
- bijotel-1.0.0/CHANGELOG.md +925 -0
- bijotel-1.0.0/Dockerfile +82 -0
- bijotel-1.0.0/LICENSE +21 -0
- bijotel-1.0.0/PKG-INFO +614 -0
- bijotel-1.0.0/README.md +552 -0
- bijotel-1.0.0/docker-compose.yml +41 -0
- bijotel-1.0.0/pyproject.toml +101 -0
- bijotel-1.0.0/requirements-lock.txt +66 -0
- bijotel-1.0.0/ruff.toml +6 -0
- bijotel-1.0.0/scripts/e2e_smoke.py +233 -0
- bijotel-1.0.0/scripts/e2e_smoke.py.bak.20260520-aig +229 -0
- bijotel-1.0.0/scripts/f1_analyze.py +116 -0
- bijotel-1.0.0/scripts/f1_capture.py +103 -0
- bijotel-1.0.0/scripts/f1_capture.py.bak.20260520-aig +99 -0
- bijotel-1.0.0/scripts/f2_capture_chain.py +92 -0
- bijotel-1.0.0/scripts/f2_capture_chain.py.bak.20260520-aig +88 -0
- bijotel-1.0.0/scripts/f3_capture_cas.py +164 -0
- bijotel-1.0.0/scripts/f3_capture_cas.py.bak.20260520-aig +160 -0
- bijotel-1.0.0/scripts/f4_capture_policy.py +176 -0
- bijotel-1.0.0/scripts/f4_capture_policy.py.bak.20260520-aig +172 -0
- bijotel-1.0.0/scripts/f5_capture_decorator.py +175 -0
- bijotel-1.0.0/scripts/gena_deploy/DEPLOY_PLAN.md +336 -0
- bijotel-1.0.0/scripts/gena_deploy/capture_baseline.py +305 -0
- bijotel-1.0.0/scripts/gena_deploy/compare_baseline.py +268 -0
- bijotel-1.0.0/scripts/gena_deploy/compose_patch.yaml +38 -0
- bijotel-1.0.0/scripts/gena_deploy/patch_compose.py +120 -0
- bijotel-1.0.0/scripts/gena_deploy/patch_runner.py +84 -0
- bijotel-1.0.0/scripts/gena_deploy/requirements_addition.txt +14 -0
- bijotel-1.0.0/scripts/gena_deploy/runner_hook.py.snippet +49 -0
- bijotel-1.0.0/src/bijotel/__init__.py +105 -0
- bijotel-1.0.0/src/bijotel/adapters/__init__.py +12 -0
- bijotel-1.0.0/src/bijotel/adapters/anthropic_adapter.py +118 -0
- bijotel-1.0.0/src/bijotel/adapters/base.py +123 -0
- bijotel-1.0.0/src/bijotel/adapters/openai_adapter.py +135 -0
- bijotel-1.0.0/src/bijotel/adapters/openai_extractors.py +112 -0
- bijotel-1.0.0/src/bijotel/api/__init__.py +31 -0
- bijotel-1.0.0/src/bijotel/api/app.py +118 -0
- bijotel-1.0.0/src/bijotel/cli/__init__.py +11 -0
- bijotel-1.0.0/src/bijotel/cli/commands.py +531 -0
- bijotel-1.0.0/src/bijotel/cli/main.py +185 -0
- bijotel-1.0.0/src/bijotel/core/__init__.py +1 -0
- bijotel-1.0.0/src/bijotel/core/init.py +59 -0
- bijotel-1.0.0/src/bijotel/decorators/__init__.py +18 -0
- bijotel-1.0.0/src/bijotel/decorators/extractors.py +104 -0
- bijotel-1.0.0/src/bijotel/decorators/trace_genai.py +272 -0
- bijotel-1.0.0/src/bijotel/exporters/__init__.py +1 -0
- bijotel-1.0.0/src/bijotel/layers/__init__.py +71 -0
- bijotel-1.0.0/src/bijotel/layers/ast_safety.py +677 -0
- bijotel-1.0.0/src/bijotel/layers/containment.py +233 -0
- bijotel-1.0.0/src/bijotel/layers/fingerprint.py +481 -0
- bijotel-1.0.0/src/bijotel/layers/misalignment.py +424 -0
- bijotel-1.0.0/src/bijotel/layers/routing.py +490 -0
- bijotel-1.0.0/src/bijotel/policy/__init__.py +30 -0
- bijotel-1.0.0/src/bijotel/policy/decision.py +69 -0
- bijotel-1.0.0/src/bijotel/policy/engine.py +42 -0
- bijotel-1.0.0/src/bijotel/policy/guard.py +131 -0
- bijotel-1.0.0/src/bijotel/policy/prices.py +45 -0
- bijotel-1.0.0/src/bijotel/policy/prompt_patterns.py +101 -0
- bijotel-1.0.0/src/bijotel/policy/rules.py +536 -0
- bijotel-1.0.0/src/bijotel/processors/__init__.py +18 -0
- bijotel-1.0.0/src/bijotel/processors/canonical.py +127 -0
- bijotel-1.0.0/src/bijotel/processors/cas.py +204 -0
- bijotel-1.0.0/src/bijotel/processors/dag.py +239 -0
- bijotel-1.0.0/src/bijotel/processors/export.py +228 -0
- bijotel-1.0.0/src/bijotel/processors/hmac_chain.py +291 -0
- bijotel-1.0.0/src/bijotel/regression/__init__.py +20 -0
- bijotel-1.0.0/src/bijotel/regression/baseline.py +197 -0
- bijotel-1.0.0/src/bijotel/regression/detector.py +251 -0
- bijotel-1.0.0/tests/__init__.py +0 -0
- bijotel-1.0.0/tests/conftest.py +18 -0
- bijotel-1.0.0/tests/test_anthropic_adapter.py +142 -0
- bijotel-1.0.0/tests/test_ast_safety.py +381 -0
- bijotel-1.0.0/tests/test_canonical.py +206 -0
- bijotel-1.0.0/tests/test_cas.py +129 -0
- bijotel-1.0.0/tests/test_cli.py +286 -0
- bijotel-1.0.0/tests/test_cli_cost_calc.py +137 -0
- bijotel-1.0.0/tests/test_cli_export.py +198 -0
- bijotel-1.0.0/tests/test_cli_helpers.py +47 -0
- bijotel-1.0.0/tests/test_compliance_rules.py +130 -0
- bijotel-1.0.0/tests/test_containment.py +193 -0
- bijotel-1.0.0/tests/test_core_init.py +42 -0
- bijotel-1.0.0/tests/test_dag.py +110 -0
- bijotel-1.0.0/tests/test_decorators_extractors.py +80 -0
- bijotel-1.0.0/tests/test_decorators_trace_genai.py +272 -0
- bijotel-1.0.0/tests/test_fingerprint.py +372 -0
- bijotel-1.0.0/tests/test_hardening.py +314 -0
- bijotel-1.0.0/tests/test_hmac_chain.py +286 -0
- bijotel-1.0.0/tests/test_misalignment.py +202 -0
- bijotel-1.0.0/tests/test_openai_adapter.py +324 -0
- bijotel-1.0.0/tests/test_policy_decision.py +48 -0
- bijotel-1.0.0/tests/test_policy_engine.py +53 -0
- bijotel-1.0.0/tests/test_policy_guard.py +132 -0
- bijotel-1.0.0/tests/test_policy_rate_limit.py +149 -0
- bijotel-1.0.0/tests/test_policy_rules.py +144 -0
- bijotel-1.0.0/tests/test_processors_export.py +192 -0
- bijotel-1.0.0/tests/test_prompt_pattern_deny.py +267 -0
- bijotel-1.0.0/tests/test_provider_base.py +83 -0
- bijotel-1.0.0/tests/test_regression.py +378 -0
- bijotel-1.0.0/tests/test_routing.py +272 -0
- bijotel-1.0.0/tests/test_serve.py +217 -0
- bijotel-1.0.0/tests/test_smoke.py +17 -0
- bijotel-1.0.0/tests/test_trace_genai_provider_integration.py +177 -0
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
# DOC 01 — STATE | BIJOTEL Audit T+7d | 2026-05-17
|
|
2
|
+
|
|
3
|
+
Generated-at (UTC): 2026-05-17T07:30:41Z
|
|
4
|
+
Repo: C:\Users\User\Desktop\AGENTY 2026\BIJUTERII S3\BIJUTERII IMPLEMENT\BIJOTEL
|
|
5
|
+
HEAD: 0bffb6cc32ef1dc6d383090024a904578412d9be (F11, v0.5.0)
|
|
6
|
+
Working tree: clean (`git status -s -uno` empty)
|
|
7
|
+
Interpreter: .venv/Scripts/python — Python 3.12.10
|
|
8
|
+
|
|
9
|
+
All numbers below are empirical. Each subsection carries a `Method:` line with
|
|
10
|
+
the exact command. Language is restricted to "is" / "unknown". Claimed-vs-actual
|
|
11
|
+
mismatches are marked **REFRAMING**.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## A. CODEBASE INVENTORY
|
|
16
|
+
|
|
17
|
+
**Method:** `find src -name "*.py" -exec wc -l {} \;`,
|
|
18
|
+
`find tests -name "*.py" -exec wc -l {} \;`,
|
|
19
|
+
project total: `find . -name "*.py" -not -path "./.venv/*" -not -path "./.git/*" | xargs wc -l`.
|
|
20
|
+
Module purpose: first docstring line via `head -1 src/bijotel/<f>.py`.
|
|
21
|
+
|
|
22
|
+
### LOC totals
|
|
23
|
+
|
|
24
|
+
| Scope | LOC | Method |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| `src/` (all .py) | 3625 | `find src -name "*.py" -exec cat {} \; | wc -l` |
|
|
27
|
+
| `tests/` (all .py) | 3807 | `find tests -name "*.py" -exec cat {} \; | wc -l` |
|
|
28
|
+
| Whole project (.py, excl .venv/.git; incl scripts/) | 13826 | `find . -name "*.py" ... | xargs wc -l | tail -1` |
|
|
29
|
+
|
|
30
|
+
Note: project total (13826) includes `src/` + `tests/` + `scripts/` (F0–F5
|
|
31
|
+
capture scripts + `gena_deploy/`). `src/`+`tests/` alone = 7432.
|
|
32
|
+
|
|
33
|
+
### src/bijotel — LOC per module, grouped by subpackage
|
|
34
|
+
|
|
35
|
+
Top-level
|
|
36
|
+
| File | LOC | Purpose (from docstring) |
|
|
37
|
+
|---|---|---|
|
|
38
|
+
| `__init__.py` | 59 | Package root; re-exports public API; `__version__ = "0.5.0"` |
|
|
39
|
+
|
|
40
|
+
adapters (F7/F9)
|
|
41
|
+
| File | LOC | Purpose |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| `__init__.py` | 12 | Subpackage exports (Provider, ProviderResponse, AnthropicAdapter, OpenAIAdapter) |
|
|
44
|
+
| `base.py` | 123 | Provider Protocol — base contract for LLM provider adapters (F7) |
|
|
45
|
+
| `anthropic_adapter.py` | 109 | AnthropicAdapter — Provider implementation for Anthropic Claude (F7) |
|
|
46
|
+
| `openai_adapter.py` | 135 | OpenAIAdapter — Provider implementation for OpenAI (F9, v0.4.0) |
|
|
47
|
+
| `openai_extractors.py` | 112 | Request/response extractors for OpenAI SDK (F9) |
|
|
48
|
+
|
|
49
|
+
cli (F6)
|
|
50
|
+
| File | LOC | Purpose |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| `__init__.py` | 11 | CLI subpackage marker |
|
|
53
|
+
| `main.py` | 152 | BIJOTEL CLI entry point (argparse dispatch) |
|
|
54
|
+
| `commands.py` | 480 | CLI subcommand handlers (verify/inspect/stats/list/export/verify-export/regression) |
|
|
55
|
+
|
|
56
|
+
core
|
|
57
|
+
| File | LOC | Purpose |
|
|
58
|
+
|---|---|---|
|
|
59
|
+
| `__init__.py` | 1 | Core subpackage marker |
|
|
60
|
+
| `init.py` | 59 | Core initialization: TracerProvider + ConsoleExporter for F1 schema discovery |
|
|
61
|
+
|
|
62
|
+
decorators (F5) — *not in the prompt's subpackage list but present in code*
|
|
63
|
+
| File | LOC | Purpose |
|
|
64
|
+
|---|---|---|
|
|
65
|
+
| `__init__.py` | 18 | Exports trace_genai, wrap |
|
|
66
|
+
| `extractors.py` | 104 | Default request/response extractors for common LLM call patterns |
|
|
67
|
+
| `trace_genai.py` | 272 | @trace_genai decorator + wrap() runtime alternative |
|
|
68
|
+
|
|
69
|
+
exporters
|
|
70
|
+
| File | LOC | Purpose |
|
|
71
|
+
|---|---|---|
|
|
72
|
+
| `__init__.py` | 1 | Empty namespace marker (no implementation; export logic lives in processors/export.py) |
|
|
73
|
+
|
|
74
|
+
processors (F2/F3/F8)
|
|
75
|
+
| File | LOC | Purpose |
|
|
76
|
+
|---|---|---|
|
|
77
|
+
| `__init__.py` | 15 | Exports export_chain, verify_export |
|
|
78
|
+
| `canonical.py` | 127 | Canonicalization: span -> deterministic JSON bytes via JCS (RFC 8785) |
|
|
79
|
+
| `cas.py` | 139 | CasSpanProcessor: content-addressable storage, dedup on input-only body |
|
|
80
|
+
| `hmac_chain.py` | 202 | HmacChainSpanProcessor: tamper-evident audit chain over GenAI spans |
|
|
81
|
+
| `export.py` | 228 | Portable signed JSON export of HMAC chain (F8) |
|
|
82
|
+
|
|
83
|
+
policy (F4/F8/F11)
|
|
84
|
+
| File | LOC | Purpose |
|
|
85
|
+
|---|---|---|
|
|
86
|
+
| `__init__.py` | 24 | Exports Decision, PolicyDeniedError, PolicyEngine, guard, 5 rule factories |
|
|
87
|
+
| `decision.py` | 69 | Decision: 3-state (allow/warn/deny) result of policy rule evaluation |
|
|
88
|
+
| `engine.py` | 42 | PolicyEngine: evaluate list of rules against a request |
|
|
89
|
+
| `guard.py` | 131 | guard(): decorator wrapping callable with policy gate |
|
|
90
|
+
| `prices.py` | 45 | Anthropic price snapshot, USD per 1k tokens |
|
|
91
|
+
| `prompt_patterns.py` | 101 | Default prompt patterns for jailbreak / prompt-injection detection (F11) |
|
|
92
|
+
| `rules.py` | 386 | Built-in policy rules; each rule is callable (request_dict) -> Decision |
|
|
93
|
+
|
|
94
|
+
regression (F12)
|
|
95
|
+
| File | LOC | Purpose |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| `__init__.py` | 20 | Exports RegressionDetector, Anomaly, AnomalyMethod, DimensionStats, compute_baseline |
|
|
98
|
+
| `baseline.py` | 197 | Rolling baseline aggregation for regression detection (F12) |
|
|
99
|
+
| `detector.py` | 251 | Anomaly detection via z-score + IQR thresholds (F12, Bijuteria #16) |
|
|
100
|
+
|
|
101
|
+
### Feature-to-code cross-check (CHANGELOG v0.1.0–v0.5.0)
|
|
102
|
+
|
|
103
|
+
**Method:** map each CHANGELOG feature ID to source paths confirmed present by
|
|
104
|
+
`find src`.
|
|
105
|
+
|
|
106
|
+
| Feature | CHANGELOG ref | Code present | Evidence path |
|
|
107
|
+
|---|---|---|---|
|
|
108
|
+
| F0 skeleton | v0.1.0 | yes | package tree + `__init__.py` |
|
|
109
|
+
| F1 TracerProvider/semconv | v0.1.0 | yes | `core/init.py` |
|
|
110
|
+
| F2 HMAC chain | v0.1.0 | yes | `processors/hmac_chain.py`, `processors/canonical.py` |
|
|
111
|
+
| F3 CAS | v0.1.0 | yes | `processors/cas.py` |
|
|
112
|
+
| F4 Policy gate | v0.1.0 | yes | `policy/decision.py`, `engine.py`, `guard.py`, `rules.py`, `prices.py` |
|
|
113
|
+
| F5 @trace_genai/wrap | v0.1.0 | yes | `decorators/trace_genai.py`, `decorators/extractors.py` |
|
|
114
|
+
| F6 CLI | v0.1.0 | yes | `cli/main.py`, `cli/commands.py` |
|
|
115
|
+
| F7 Provider Protocol + AnthropicAdapter | v0.1.0 | yes | `adapters/base.py`, `adapters/anthropic_adapter.py` |
|
|
116
|
+
| F8 Portable export + rate_limit rule | v0.2.0 | yes | `processors/export.py`, `rate_limit_calls_per_minute` in `policy/rules.py` |
|
|
117
|
+
| F12 Regression Detection | v0.3.0 | yes | `regression/baseline.py`, `regression/detector.py` |
|
|
118
|
+
| F9 OpenAIAdapter | v0.4.0 | yes | `adapters/openai_adapter.py`, `adapters/openai_extractors.py` |
|
|
119
|
+
| F11 prompt_pattern_deny | v0.5.0 | yes | `policy/prompt_patterns.py`, `prompt_pattern_deny` in `policy/rules.py` |
|
|
120
|
+
|
|
121
|
+
All 12 claimed features (F0–F9, F11, F12) have corresponding code present. No
|
|
122
|
+
feature is claimed-but-missing. Note: feature numbering is non-contiguous in the
|
|
123
|
+
CHANGELOG itself (F10 is never referenced; F12 ships in v0.3.0 before F9 in
|
|
124
|
+
v0.4.0 and F11 in v0.5.0 — the F-numbers are not chronological). This is a
|
|
125
|
+
CHANGELOG numbering style, not a code gap.
|
|
126
|
+
|
|
127
|
+
**REFRAMING (prompt scope vs reality):** The audit prompt enumerates
|
|
128
|
+
subpackages "adapters, cli, core, exporters, processors, policy, regression,
|
|
129
|
+
plus top-level". Reality adds an 8th subpackage `decorators/` (3 files, 394 LOC,
|
|
130
|
+
houses F5 `@trace_genai`/`wrap`). `exporters/` is an empty namespace marker
|
|
131
|
+
(1 LOC, no implementation) — F8 export logic actually lives in
|
|
132
|
+
`processors/export.py`, not `exporters/`.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## B. TESTS
|
|
137
|
+
|
|
138
|
+
**Method (collection):** `.venv/Scripts/python -m pytest --co -q | tail -5`.
|
|
139
|
+
**Method (run):** `.venv/Scripts/python -m pytest -q`.
|
|
140
|
+
**Method (coverage):** `.venv/Scripts/python -m pytest --cov=bijotel --cov-report=term-missing -q`.
|
|
141
|
+
|
|
142
|
+
### Totals (raw evidence)
|
|
143
|
+
|
|
144
|
+
- Collected: `211 tests collected in 0.36s`
|
|
145
|
+
- Run result: `209 passed, 2 skipped in 12.93s`
|
|
146
|
+
(a second independent run: `209 passed, 2 skipped in 12.63s`;
|
|
147
|
+
coverage run: `209 passed, 2 skipped in 14.61s`)
|
|
148
|
+
- Wall time: ~12.6–14.6 s depending on coverage instrumentation
|
|
149
|
+
(non-coverage runs ~12.6–12.9 s).
|
|
150
|
+
|
|
151
|
+
CHANGELOG v0.5.0 claim "209 passed, 2 skipped" — **VERIFIED, matches exactly.**
|
|
152
|
+
Collected count is 211 (= 209 run + 2 skipped, consistent).
|
|
153
|
+
|
|
154
|
+
### Per-file test count
|
|
155
|
+
|
|
156
|
+
**Method:** `pytest --co -q | grep "::" | sed 's/::.*//' | sort | uniq -c | sort -rn`.
|
|
157
|
+
|
|
158
|
+
| Test file | Count |
|
|
159
|
+
|---|---|
|
|
160
|
+
| test_openai_adapter.py | 18 |
|
|
161
|
+
| test_regression.py | 17 |
|
|
162
|
+
| test_prompt_pattern_deny.py | 16 |
|
|
163
|
+
| test_cli.py | 16 |
|
|
164
|
+
| test_policy_rules.py | 13 |
|
|
165
|
+
| test_processors_export.py | 12 |
|
|
166
|
+
| test_hmac_chain.py | 12 |
|
|
167
|
+
| test_canonical.py | 12 |
|
|
168
|
+
| test_decorators_trace_genai.py | 10 |
|
|
169
|
+
| test_policy_rate_limit.py | 9 |
|
|
170
|
+
| test_cli_cost_calc.py | 9 |
|
|
171
|
+
| test_anthropic_adapter.py | 9 |
|
|
172
|
+
| test_cli_export.py | 8 |
|
|
173
|
+
| test_cli_helpers.py | 7 |
|
|
174
|
+
| test_provider_base.py | 6 |
|
|
175
|
+
| test_policy_decision.py | 6 |
|
|
176
|
+
| test_decorators_extractors.py | 6 |
|
|
177
|
+
| test_cas.py | 6 |
|
|
178
|
+
| test_trace_genai_provider_integration.py | 5 |
|
|
179
|
+
| test_policy_guard.py | 5 |
|
|
180
|
+
| test_policy_engine.py | 4 |
|
|
181
|
+
| test_core_init.py | 3 |
|
|
182
|
+
| test_smoke.py | 2 |
|
|
183
|
+
| **Sum** | **211** |
|
|
184
|
+
|
|
185
|
+
### Coverage
|
|
186
|
+
|
|
187
|
+
**Method:** `.venv/Scripts/python -m pytest --cov=bijotel --cov-report=term-missing -q` (term-missing TOTAL line).
|
|
188
|
+
|
|
189
|
+
Raw evidence (exact TOTAL line):
|
|
190
|
+
```
|
|
191
|
+
TOTAL 1340 74 94%
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
- Overall coverage: **94%** (1340 statements, 74 missed).
|
|
195
|
+
- Modules <90% (the only one): `regression/baseline.py` = **91%**
|
|
196
|
+
(90 stmts, 8 miss; missing 83, 87, 93, 96, 140, 160-161, 171).
|
|
197
|
+
- Lowest-but-≥90% cluster: `cli/commands.py` 90%, `regression/detector.py` 93%,
|
|
198
|
+
`adapters/anthropic_adapter.py` 93%, `processors/export.py` 93%,
|
|
199
|
+
`decorators/trace_genai.py` 93%.
|
|
200
|
+
- `policy/prices.py` 92%, all other modules ≥94%; 16 modules at 100%.
|
|
201
|
+
|
|
202
|
+
CHANGELOG v0.5.0 claim "coverage maintained ≥92%" — actual TOTAL is **94%**,
|
|
203
|
+
which satisfies "≥92%" at the package level. **However**, one module
|
|
204
|
+
(`regression/baseline.py`) is at 91%, below 92%. The CHANGELOG sentence is true
|
|
205
|
+
as a package-level statement, false if read as a per-module floor.
|
|
206
|
+
|
|
207
|
+
**REFRAMING (CHANGELOG v0.3.0 claim vs current):** v0.3.0 entry states
|
|
208
|
+
"Coverage maintained at 94% overall (regression module: 91% baseline.py, 91%
|
|
209
|
+
detector.py)". Current measured: overall 94% (matches), `baseline.py` 91%
|
|
210
|
+
(matches), but `detector.py` is now **93%**, not 91% — improved since v0.3.0,
|
|
211
|
+
CHANGELOG not updated. Direction is favorable (drift is an improvement, not a
|
|
212
|
+
regression).
|
|
213
|
+
|
|
214
|
+
### Skipped tests (2) — identities + reasons
|
|
215
|
+
|
|
216
|
+
**Method:** `.venv/Scripts/python -m pytest -q -rs`.
|
|
217
|
+
|
|
218
|
+
| Test | File:line | Skip reason (verbatim from `@pytest.mark.skipif`) |
|
|
219
|
+
|---|---|---|
|
|
220
|
+
| `test_adapter_complete_smoke_real_call` | tests/test_anthropic_adapter.py:127 (marker @123) | "ANTHROPIC_API_KEY not set — skipping smoke" |
|
|
221
|
+
| `test_openai_adapter_complete_smoke_real_call` | tests/test_openai_adapter.py:306 (marker @302) | "OPENAI_API_KEY not set — skipping real OpenAI smoke" |
|
|
222
|
+
|
|
223
|
+
Both skips are conditional on absence of provider API keys (live-network smoke
|
|
224
|
+
tests). They are expected skips in a no-credentials environment, not failures.
|
|
225
|
+
A third conditional skip exists in `tests/test_cli.py:271`
|
|
226
|
+
(`pytest.skip` if the `bijotel` console-script is not found in the venv) — it
|
|
227
|
+
did NOT trigger here (the entry point is installed), so the count is 2.
|
|
228
|
+
|
|
229
|
+
### Test classification (unit / integration / smoke)
|
|
230
|
+
|
|
231
|
+
**Method:** read test files; classify by whether a test exercises a single
|
|
232
|
+
unit in-process (unit), composes multiple components / spawns a subprocess
|
|
233
|
+
(integration), or makes a real network LLM call (smoke).
|
|
234
|
+
|
|
235
|
+
- **Smoke (real external API): 2** — `test_adapter_complete_smoke_real_call`,
|
|
236
|
+
`test_openai_adapter_complete_smoke_real_call`. Both skipped without keys.
|
|
237
|
+
- **Integration (multi-component / subprocess, in-process or local): ~7**
|
|
238
|
+
- `tests/test_trace_genai_provider_integration.py` (5) — decorator + provider
|
|
239
|
+
adapter end-to-end through OTel.
|
|
240
|
+
- `tests/test_prompt_pattern_deny.py::test_prompt_pattern_deny_integration_with_policy_engine` (1)
|
|
241
|
+
— rule composed into `PolicyEngine`.
|
|
242
|
+
- `tests/test_cli.py::test_cli_entry_point_via_subprocess` (1) — spawns the
|
|
243
|
+
real `bijotel` binary via `subprocess.run`.
|
|
244
|
+
- **Unit: remaining ~202** — single-module, in-process assertions
|
|
245
|
+
(canonical, cas, hmac_chain, policy rules/decision/engine/guard, extractors,
|
|
246
|
+
regression baseline/detector, CLI command handlers invoked as
|
|
247
|
+
`main(argv)` directly, etc.). `tests/test_smoke.py` (2) is named "smoke" but
|
|
248
|
+
is an import/packaging unit check (no network), counted as unit.
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## C. CI/CD
|
|
253
|
+
|
|
254
|
+
**Method:** read `.github/workflows/ci.yml`; `gh run list --limit 10`.
|
|
255
|
+
|
|
256
|
+
### Workflows
|
|
257
|
+
|
|
258
|
+
Only one workflow file: `.github/workflows/ci.yml` (job name `test`).
|
|
259
|
+
|
|
260
|
+
Triggers (from yaml):
|
|
261
|
+
- `push` to branch `main`
|
|
262
|
+
- `pull_request` targeting `main`
|
|
263
|
+
- No `schedule` / cron. No manual `workflow_dispatch`.
|
|
264
|
+
|
|
265
|
+
Pipeline steps: checkout@v4 → setup-python@v5 (Python **3.11**) →
|
|
266
|
+
`pip install -e ".[dev,anthropic]"` → `ruff check .` → `pytest -v`.
|
|
267
|
+
Note: CI installs `[dev,anthropic]` but NOT `[openai]`; CI runs on Python 3.11
|
|
268
|
+
whereas this audit's local venv is Python 3.12.10.
|
|
269
|
+
|
|
270
|
+
### Last 10 CI runs
|
|
271
|
+
|
|
272
|
+
**Method:** `gh run list --limit 10` (account octavuntila-prog).
|
|
273
|
+
|
|
274
|
+
| Date (UTC) | Trigger | Branch | Conclusion | Duration | Commit subject |
|
|
275
|
+
|---|---|---|---|---|---|
|
|
276
|
+
| 2026-05-14T09:22:09Z | push | main | success | 30s | F11 (v0.5.0) prompt_pattern_deny |
|
|
277
|
+
| 2026-05-11T10:47:59Z | push | main | success | 33s | F9 (v0.4.0) OpenAIAdapter |
|
|
278
|
+
| 2026-05-10T12:49:52Z | push | main | success | 23s | F12 (v0.3.0) Regression Detection |
|
|
279
|
+
| 2026-05-10T11:46:04Z | push | main | success | 22s | fix(test) rename loop var (ruff B007) |
|
|
280
|
+
| 2026-05-10T11:45:28Z | push | main | **failure** | 21s | v0.2.1 cost fix + docs + coverage |
|
|
281
|
+
| 2026-05-10T11:16:24Z | push | main | success | 29s | F8 export + rate_limit (v0.2.0) |
|
|
282
|
+
| 2026-05-10T10:20:45Z | push | main | success | 19s | docs: CHANGELOG for v0.1.0 |
|
|
283
|
+
| 2026-05-10T10:01:08Z | push | main | success | 29s | F7 Provider Protocol + AnthropicAdapter |
|
|
284
|
+
| 2026-05-10T07:59:48Z | push | main | success | 26s | validation: e2e smoke test |
|
|
285
|
+
| 2026-05-07T21:57:34Z | push | main | success | 30s | F5 @trace_genai + wrap |
|
|
286
|
+
|
|
287
|
+
- Pass/fail: **9 success / 1 failure** over last 10 → **pass rate 90.0%**.
|
|
288
|
+
- All 10 triggered by `push` to `main` (0 pull_request runs in the window).
|
|
289
|
+
- Mean duration: (30+33+23+22+21+29+19+29+26+30)/10 = **26.2 s**.
|
|
290
|
+
- The single failure (v0.2.1, 2026-05-10T11:45:28Z) is followed by an
|
|
291
|
+
immediate fix run (`fix(test): rename unused loop var i -> _i (ruff B007)`,
|
|
292
|
+
+18 min, success) — the failure was a ruff B007 lint error, remediated in the
|
|
293
|
+
next push. Current HEAD (F11) CI is green.
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
## D. VERSIONS
|
|
298
|
+
|
|
299
|
+
**Method:** `git log --oneline -30`, `git tag -l`,
|
|
300
|
+
`git for-each-ref --format='%(refname:short) %(objectname:short) %(*objectname:short)' refs/tags`,
|
|
301
|
+
`git rev-list -n 1 v0.5.0`, `git show --no-patch --format=... v0.5.0`.
|
|
302
|
+
|
|
303
|
+
### Tags
|
|
304
|
+
|
|
305
|
+
`git tag -l`: v0.1.0, v0.2.0, v0.2.1, v0.3.0, v0.4.0, v0.5.0 (6 tags).
|
|
306
|
+
All six are **annotated** tags (tagger: Octavian Untilă).
|
|
307
|
+
|
|
308
|
+
### Tag → commit mapping (dereferenced)
|
|
309
|
+
|
|
310
|
+
| Tag | Tag-object SHA | Target commit | Commit subject |
|
|
311
|
+
|---|---|---|---|
|
|
312
|
+
| v0.1.0 | 04e3894 | 29e655d | docs: CHANGELOG for v0.1.0 |
|
|
313
|
+
| v0.2.0 | ef3606d | 4855e6b | F8 Portable export + rate_limit (v0.2.0) |
|
|
314
|
+
| v0.2.1 | d465d01 | cc16d45 | v0.2.1 cost field fix + README + CLI coverage |
|
|
315
|
+
| v0.3.0 | e2238c0 | af2dbca | F12 Regression Detection (Bijuteria #16) |
|
|
316
|
+
| v0.4.0 | 5f4416b | 889b4ab | F9 OpenAIAdapter |
|
|
317
|
+
| v0.5.0 | a99a99d | **0bffb6c** | F11 prompt_pattern_deny |
|
|
318
|
+
|
|
319
|
+
### v0.5.0 → F11 commit verification
|
|
320
|
+
|
|
321
|
+
- `git rev-list -n 1 v0.5.0` → `0bffb6cc32ef1dc6d383090024a904578412d9be`.
|
|
322
|
+
- `git show --no-patch --format` on v0.5.0 → `commit=0bffb6c...`,
|
|
323
|
+
`subject=F11 (v0.5.0): prompt_pattern_deny rule`.
|
|
324
|
+
- **VERIFIED: v0.5.0 resolves to commit `0bffb6c` = the F11 commit.** Match.
|
|
325
|
+
|
|
326
|
+
**REFRAMING (audit-method nuance, NOT a repo defect):** The exact command in
|
|
327
|
+
the audit brief, `git for-each-ref --format='%(refname:short) %(objectname:short)' refs/tags`,
|
|
328
|
+
prints `v0.5.0 a99a99d`. `a99a99d` is the **annotated-tag object** SHA, not the
|
|
329
|
+
commit. Reading that line literally would falsely conclude "v0.5.0 does NOT
|
|
330
|
+
point at 0bffb6c". The correct resolution requires dereferencing with
|
|
331
|
+
`%(*objectname:short)` (or `git rev-list -n 1`), which yields `0bffb6c`. Claimed
|
|
332
|
+
target (0bffb6c) and actual target (0bffb6c) **agree**. No tag drift.
|
|
333
|
+
|
|
334
|
+
### Version-string consistency
|
|
335
|
+
|
|
336
|
+
| Source | Value | Method |
|
|
337
|
+
|---|---|---|
|
|
338
|
+
| `pyproject.toml` `[project].version` | 0.5.0 | read line 3 |
|
|
339
|
+
| `src/bijotel/__init__.py` `__version__` | 0.5.0 | read line 31 |
|
|
340
|
+
| `tests/test_smoke.py` assertion | `== "0.5.0"` | read line 8 |
|
|
341
|
+
| CHANGELOG latest entry | `[0.5.0] — 2026-05-14` | read line 8 |
|
|
342
|
+
| Latest git tag | v0.5.0 | `git tag -l` |
|
|
343
|
+
|
|
344
|
+
All five agree on `0.5.0`. No version drift. CHANGELOG version entries
|
|
345
|
+
([0.1.0]..[0.5.0]) map 1:1 onto the six git tags — no missing tag, no
|
|
346
|
+
untagged CHANGELOG entry, no extra tag.
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## E. DEPENDENCIES
|
|
351
|
+
|
|
352
|
+
**Method:** read `pyproject.toml`; `.venv/Scripts/python -m pip_audit --version`
|
|
353
|
+
(unavailable); `.venv/Scripts/python -m pip check`;
|
|
354
|
+
`.venv/Scripts/python -m pip list --outdated`. No installs performed
|
|
355
|
+
(read-only constraint honored).
|
|
356
|
+
|
|
357
|
+
### Declared dependencies (pyproject.toml)
|
|
358
|
+
|
|
359
|
+
Required (`[project].dependencies`) — all `>=` (unpinned lower bound, no upper cap):
|
|
360
|
+
- `opentelemetry-api>=1.27.0`
|
|
361
|
+
- `opentelemetry-sdk>=1.27.0`
|
|
362
|
+
- `opentelemetry-semantic-conventions>=0.48b0`
|
|
363
|
+
- `rfc8785>=0.1.4`
|
|
364
|
+
|
|
365
|
+
Optional extras:
|
|
366
|
+
- `[anthropic]`: `anthropic>=0.40.0`, `opentelemetry-instrumentation-anthropic>=0.40.0`
|
|
367
|
+
- `[openai]`: `openai>=1.0`
|
|
368
|
+
- `[all]`: union of `[anthropic]` + `[openai]`
|
|
369
|
+
- `[dev]`: `pytest>=8.0`, `ruff>=0.6.0`, `python-dotenv>=1.0.0`
|
|
370
|
+
|
|
371
|
+
### Pinned vs unpinned
|
|
372
|
+
|
|
373
|
+
100% of declared deps use `>=` (minimum-version) constraints. **Zero `==`
|
|
374
|
+
pins** and **zero upper bounds** anywhere in `pyproject.toml`. There is no
|
|
375
|
+
lockfile (no `requirements.txt`, no `uv.lock`/`poetry.lock` in repo root).
|
|
376
|
+
|
|
377
|
+
### Audit tooling results
|
|
378
|
+
|
|
379
|
+
- `pip_audit`: NOT installed in venv (`No module named pip_audit`); install
|
|
380
|
+
disallowed by audit constraint. Known-CVE scan: **unknown** (tool absent).
|
|
381
|
+
- `pip check`: **"No broken requirements found."** (dependency graph
|
|
382
|
+
internally consistent).
|
|
383
|
+
- `pip list --outdated` (installed venv vs latest PyPI):
|
|
384
|
+
|
|
385
|
+
| Package | Installed | Latest | Type |
|
|
386
|
+
|---|---|---|---|
|
|
387
|
+
| anthropic | 0.100.0 | 0.102.0 | wheel |
|
|
388
|
+
| coverage | 7.13.5 | 7.14.0 | wheel |
|
|
389
|
+
| idna | 3.13 | 3.15 | wheel |
|
|
390
|
+
| importlib_metadata | 8.7.1 | 9.0.0 | wheel |
|
|
391
|
+
| openai | 2.36.0 | 2.37.0 | wheel |
|
|
392
|
+
| ruff | 0.15.12 | 0.15.13 | wheel |
|
|
393
|
+
|
|
394
|
+
Six packages are behind latest; all are minor/patch deltas. None is a core
|
|
395
|
+
runtime dep of bijotel with a security advisory known to this audit (CVE
|
|
396
|
+
verification is **unknown** without pip_audit).
|
|
397
|
+
|
|
398
|
+
### Risk flags
|
|
399
|
+
|
|
400
|
+
- The four **core runtime deps are unpinned with no upper bound**
|
|
401
|
+
(`opentelemetry-*`, `rfc8785`). OpenTelemetry has historically shipped
|
|
402
|
+
breaking changes in `0.x` semantic-conventions (`>=0.48b0` is a beta
|
|
403
|
+
pre-release spec); an unbounded `>=` allows a future incompatible OTel
|
|
404
|
+
semconv release to be resolved at install time. This is the single
|
|
405
|
+
highest-risk dependency posture finding. `rfc8785>=0.1.4` is also a `0.x`
|
|
406
|
+
package (no API-stability guarantee) consumed for canonicalization that the
|
|
407
|
+
HMAC chain integrity depends on.
|
|
408
|
+
- No lockfile means CI (which does a fresh `pip install -e ".[dev,anthropic]"`
|
|
409
|
+
on every push) is not reproducible across time — the green/red history in
|
|
410
|
+
section C is against floating dependency versions.
|
|
411
|
+
|
|
412
|
+
---
|
|
413
|
+
|
|
414
|
+
## F. PUBLIC API
|
|
415
|
+
|
|
416
|
+
**Method:** read `src/bijotel/__init__.py` `__all__` and
|
|
417
|
+
`src/bijotel/policy/__init__.py` `__all__`; per symbol
|
|
418
|
+
`grep -c -w "<sym>" README.md` plus plain `grep -n` verification for
|
|
419
|
+
zero-hit symbols; `git show v0.4.0:src/bijotel/__init__.py` vs current.
|
|
420
|
+
|
|
421
|
+
### Exported symbols and README documentation status
|
|
422
|
+
|
|
423
|
+
`bijotel.__all__` has 25 entries (24 symbols + `__version__`).
|
|
424
|
+
`bijotel.policy.__all__` has 9 entries (all re-exported at top level).
|
|
425
|
+
|
|
426
|
+
| Symbol | Exported from | In README? |
|
|
427
|
+
|---|---|---|
|
|
428
|
+
| `__version__` | bijotel | n/a (dunder, not user-facing API) |
|
|
429
|
+
| Anomaly | bijotel ← regression | **NO** |
|
|
430
|
+
| AnomalyMethod | bijotel ← regression | yes |
|
|
431
|
+
| AnthropicAdapter | bijotel ← adapters | yes |
|
|
432
|
+
| Decision | bijotel ← policy | yes |
|
|
433
|
+
| DimensionStats | bijotel ← regression | **NO** |
|
|
434
|
+
| OpenAIAdapter | bijotel ← adapters | yes |
|
|
435
|
+
| PolicyDeniedError | bijotel ← policy | yes |
|
|
436
|
+
| PolicyEngine | bijotel ← policy | yes |
|
|
437
|
+
| Provider | bijotel ← adapters | yes |
|
|
438
|
+
| ProviderResponse | bijotel ← adapters | yes |
|
|
439
|
+
| RegressionDetector | bijotel ← regression | yes |
|
|
440
|
+
| compute_baseline | bijotel ← regression | **NO** |
|
|
441
|
+
| cost_per_call_max | bijotel ← policy | yes |
|
|
442
|
+
| daily_token_budget | bijotel ← policy | yes |
|
|
443
|
+
| export_chain | bijotel ← processors | yes |
|
|
444
|
+
| guard | bijotel ← policy | yes |
|
|
445
|
+
| init | bijotel ← core | yes |
|
|
446
|
+
| model_allowlist | bijotel ← policy | yes |
|
|
447
|
+
| prompt_pattern_deny | bijotel ← policy | yes |
|
|
448
|
+
| rate_limit_calls_per_minute | bijotel ← policy | yes |
|
|
449
|
+
| shutdown | bijotel ← core | yes |
|
|
450
|
+
| trace_genai | bijotel ← decorators | yes |
|
|
451
|
+
| verify_export | bijotel ← processors | yes |
|
|
452
|
+
| wrap | bijotel ← decorators | yes |
|
|
453
|
+
|
|
454
|
+
`bijotel.policy.__all__` (Decision, PolicyDeniedError, PolicyEngine,
|
|
455
|
+
cost_per_call_max, daily_token_budget, guard, model_allowlist,
|
|
456
|
+
prompt_pattern_deny, rate_limit_calls_per_minute) — all 9 are documented in
|
|
457
|
+
README (same symbols, re-exported at top level).
|
|
458
|
+
|
|
459
|
+
### Undocumented exports — flagged
|
|
460
|
+
|
|
461
|
+
**Method:** `grep -n "<sym>" README.md` (plain, not word-bounded) confirms zero
|
|
462
|
+
standalone occurrences.
|
|
463
|
+
|
|
464
|
+
- **`Anomaly`** — README contains `AnomalyMethod` but NO standalone `Anomaly`
|
|
465
|
+
dataclass mention. The public `Anomaly` record type (single detection record
|
|
466
|
+
returned by `RegressionDetector`) is undocumented.
|
|
467
|
+
- **`DimensionStats`** — zero occurrences in README. Undocumented (it is the
|
|
468
|
+
return type of `compute_baseline()` and is in `__all__`).
|
|
469
|
+
- **`compute_baseline`** — zero occurrences in README. Undocumented public
|
|
470
|
+
function.
|
|
471
|
+
|
|
472
|
+
3 of 24 user-facing exports (all in the F12 regression surface) are absent from
|
|
473
|
+
README. README documents `RegressionDetector`/`AnomalyMethod`/`bijotel
|
|
474
|
+
regression` CLI but omits the `Anomaly`, `DimensionStats`, `compute_baseline`
|
|
475
|
+
symbols it also exports.
|
|
476
|
+
|
|
477
|
+
**REFRAMING (CHANGELOG v0.2.1 vs reality):** The v0.2.1 CHANGELOG entry claims
|
|
478
|
+
"README sections added for 6 previously-undocumented public API exports"
|
|
479
|
+
and frames documentation as caught up. As of current HEAD, **3 exported symbols
|
|
480
|
+
(`Anomaly`, `DimensionStats`, `compute_baseline`, all added later in F12/v0.3.0)
|
|
481
|
+
remain undocumented in README**. The "all public exports documented" posture
|
|
482
|
+
implied by v0.2.1 does not hold at v0.5.0; the F12 export surface regressed
|
|
483
|
+
documentation parity.
|
|
484
|
+
|
|
485
|
+
### Breaking-change / `__all__` growth across releases
|
|
486
|
+
|
|
487
|
+
**Method:** `git show v0.4.0:src/bijotel/__init__.py` diffed against working
|
|
488
|
+
tree `src/bijotel/__init__.py`.
|
|
489
|
+
|
|
490
|
+
- v0.4.0 `__all__`: 24 entries. v0.5.0 `__all__`: 25 entries.
|
|
491
|
+
- Sole delta: **`prompt_pattern_deny` added** (top-level + `policy.__all__`).
|
|
492
|
+
This is an **additive** change — no symbol removed, no signature change to
|
|
493
|
+
existing exports. CHANGELOG v0.5.0 "Changed" explicitly documents this
|
|
494
|
+
addition.
|
|
495
|
+
- Therefore v0.4.0 → v0.5.0 is backward-compatible at the `__all__` boundary;
|
|
496
|
+
the minor-version bump (0.4.0 → 0.5.0) is correct under SemVer. **No
|
|
497
|
+
undocumented breaking change found** in the v0.4.0 → v0.5.0 transition.
|
|
498
|
+
- Earlier growth per CHANGELOG (each documented in its release's "Changed"):
|
|
499
|
+
v0.2.0 +`export_chain`/`verify_export`/`rate_limit_calls_per_minute`;
|
|
500
|
+
v0.3.0 +`RegressionDetector`/`Anomaly`/`AnomalyMethod`/`DimensionStats`/`compute_baseline`;
|
|
501
|
+
v0.4.0 +`OpenAIAdapter`; v0.5.0 +`prompt_pattern_deny`. All `__all__` changes
|
|
502
|
+
across the tagged history are additive — no export was ever removed or
|
|
503
|
+
renamed across v0.1.0→v0.5.0, so there is no silent breaking change in the
|
|
504
|
+
public surface.
|
|
505
|
+
|
|
506
|
+
---
|
|
507
|
+
|
|
508
|
+
## SUMMARY OF REFRAMINGS
|
|
509
|
+
|
|
510
|
+
1. **Subpackage scope** — prompt's subpackage list omits the real
|
|
511
|
+
`decorators/` subpackage (394 LOC, F5); `exporters/` is an empty 1-LOC
|
|
512
|
+
marker, F8 export code is in `processors/export.py`.
|
|
513
|
+
2. **Coverage per-module floor** — CHANGELOG "coverage maintained ≥92%" holds
|
|
514
|
+
at package level (94%) but `regression/baseline.py` is 91% (<92%).
|
|
515
|
+
3. **CHANGELOG v0.3.0 detector.py coverage** — claimed 91%, actual 93%
|
|
516
|
+
(improved, CHANGELOG stale; favorable drift).
|
|
517
|
+
4. **v0.5.0 tag resolution method** — the brief's `for-each-ref` command prints
|
|
518
|
+
the annotated-tag object SHA `a99a99d`, not the commit; correct dereferenced
|
|
519
|
+
target is `0bffb6c` (= F11). Tag is correct; the audit *method* needs
|
|
520
|
+
dereferencing. No actual drift.
|
|
521
|
+
5. **Undocumented exports vs v0.2.1 "docs caught up" framing** — `Anomaly`,
|
|
522
|
+
`DimensionStats`, `compute_baseline` (F12 surface) are in `__all__` but
|
|
523
|
+
absent from README; v0.2.1's documentation-parity posture does not hold at
|
|
524
|
+
v0.5.0.
|
|
525
|
+
|
|
526
|
+
## VERIFIED CRITICAL NUMBERS (raw)
|
|
527
|
+
|
|
528
|
+
- pytest total: `209 passed, 2 skipped in 12.93s` (collected `211 tests collected`)
|
|
529
|
+
- coverage TOTAL: `TOTAL 1340 74 94%`
|
|
530
|
+
- CI pass rate (last 10): 9/10 = 90.0%; mean duration 26.2 s; all push-triggered
|
|
531
|
+
- v0.5.0 tag → commit `0bffb6c` (F11) — VERIFIED via `git rev-list -n 1 v0.5.0`
|
|
532
|
+
- LOC: src/ = 3625, tests/ = 3807, whole project (.py, excl .venv/.git) = 13826
|
|
533
|
+
- 2 skips: ANTHROPIC_API_KEY missing (test_anthropic_adapter.py:127),
|
|
534
|
+
OPENAI_API_KEY missing (test_openai_adapter.py:306)
|
|
535
|
+
|
|
536
|
+
— end DOC 01 —
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# DOC 03 — GAPS | BIJOTEL Production-Readiness Audit T+7d | 2026-05-17
|
|
2
|
+
|
|
3
|
+
Generated-at (UTC): 2026-05-17T07:40:00Z
|
|
4
|
+
Scope: what is missing for BIJOTEL to be production-ready for a *different* consumer (ARA-class: on-demand + concurrent + multi-provider + policy-enforcing). GENA empirical behavior is in DOC 02; codebase state in DOC 01.
|
|
5
|
+
Method note: every gap below carries how it was established. No speculative language — "is" or "unknown" only.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## CENTRAL NARRATIVE
|
|
10
|
+
|
|
11
|
+
BIJOTEL is **empirically excellent as a passive low-concurrency Anthropic observer** (DOC 02: 0/2776 chain mismatches, exact in-window dual-observer match, zero added cost, flat memory over ~166 h). It is **not yet proven production-ready for an ARA-class consumer**, for seven concrete reasons documented below. The single most important fact: **the artifact running in production is `bijotel 0.0.1`, not the audited v0.5.0** — "shipped" never became "deployed."
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## A. KNOWN BUGS / FIXMES / TODOs
|
|
16
|
+
|
|
17
|
+
Method: `Grep "TODO|FIXME|XXX|HACK|NotImplementedError|deferred to F"` over `src/`.
|
|
18
|
+
|
|
19
|
+
- **Zero** dangling code markers in `src/` — no TODO/FIXME/XXX/HACK/NotImplementedError. Positive finding: the codebase carries no acknowledged in-code debt.
|
|
20
|
+
- All deferrals (streaming, tool-use, vision, multi-provider) live **only** in CHANGELOG "Known Limitations" / README prose — **not surfaced in code** (no `warnings.warn`, no guard on the streaming path). A caller using an unsupported path gets silent partial behavior, not a signal.
|
|
21
|
+
- Severity: LOW (no code-level bug markers); the doc-only-deferral is a documentation/ergonomics gap, not a bug.
|
|
22
|
+
|
|
23
|
+
## B. MISSING FEATURES PROMISED
|
|
24
|
+
|
|
25
|
+
Method: README Roadmap vs DOC 01 code-presence verification vs DOC 02 deployed-version check.
|
|
26
|
+
|
|
27
|
+
- **CRITICAL — deploy gap:** README/CHANGELOG mark F0–F12 + F11 done; DOC 01 confirms code present for all; **DOC 02 confirms production runs 0.0.1**. F8 (export), F9 (OpenAIAdapter), F11 (prompt_pattern_deny), F12 (regression) **exist in repo + wheel but were never deployed**. The implicit "v0.5.0 in production 7 d" premise is false.
|
|
28
|
+
- Documented-deferred (still open since v0.1.0, honestly listed, not silently missing): streaming responses, tool-use handling, vision/multimodal, concrete adapters beyond Anthropic/OpenAI (Gemini/Bedrock/Mistral), `registry.py` adapter lookup.
|
|
29
|
+
|
|
30
|
+
## C. DOCUMENTATION GAPS
|
|
31
|
+
|
|
32
|
+
Method: DOC 01 README cross-check; new-developer read-through.
|
|
33
|
+
|
|
34
|
+
- 3 public exports undocumented in README: `Anomaly`, `DimensionStats`, `compute_baseline` (F12 surface in `__all__`). Contradicts v0.2.1 CHANGELOG "docs caught up."
|
|
35
|
+
- No deployment / version-pinning guidance anywhere. Production silently drifted to 0.0.1 and **nothing in the project documents how a host installs/upgrades the wheel** (the `/opt/substrate-v2/` wheel-install pattern is undocumented).
|
|
36
|
+
- New-dev sticking points (where a fresh integrator gets stuck): (i) SpanProcessor failure semantics — what happens if `on_end` throws? (undocumented); (ii) shared-db multi-process safety (undocumented, see F); (iii) HMAC secret rotation (no procedure).
|
|
37
|
+
|
|
38
|
+
## D. TEST GAPS
|
|
39
|
+
|
|
40
|
+
Method: DOC 01 coverage run + `Grep` test inventory.
|
|
41
|
+
|
|
42
|
+
- Package coverage 94%, but `regression/baseline.py` = **91% (below the CHANGELOG-claimed ≥92% floor)**.
|
|
43
|
+
- **Zero** tests for: streaming responses; concurrent / multi-thread writes; **multi-process writers on one shared chain.db** (the exact GENA topology — 4 processes, 1 db — is untested); SpanProcessor exception injection; sqlite-locked / disk-full error paths.
|
|
44
|
+
- No real-traffic integration test; no stress test (≥1000 calls/min). GENA peak ≈ 432/day ≈ 0.3/min, so **neither tests nor production have ever exercised concurrency**.
|
|
45
|
+
- 2 skipped tests = API-key-gated smoke (expected, benign).
|
|
46
|
+
|
|
47
|
+
## E. SECURITY POSTURE
|
|
48
|
+
|
|
49
|
+
Method: code read (`hmac_chain.py`) + GENA `stat`/`printenv` (read-only).
|
|
50
|
+
|
|
51
|
+
- HMAC secret: min-16-byte enforced (`ValueError`); deployed via container env `BIJOTEL_HMAC_SECRET` (32 bytes). Exposed to `printenv` / `docker inspect` (standard); **no rotation mechanism** in the library.
|
|
52
|
+
- **chain.db is mode 644 (world-readable)**, root:root, ~33 MB, on a shared docker volume. `canonical_body` BLOB holds full span payloads (prompts/responses). Any user/container with volume access can read audit content. The library does **not** set restrictive (0600) permissions on db creation.
|
|
53
|
+
- No secrets-in-logs issue: the processor logs nothing (`on_end` is silent).
|
|
54
|
+
- **Dependency CVE exposure: UNKNOWN** — `pip-audit` not installed and installs were disallowed for this read-only audit. Core deps (`opentelemetry-*`, `rfc8785`) are unpinned `>=` with no upper bound and **no lockfile** → supply-chain drift risk + unaudited CVE surface.
|
|
55
|
+
|
|
56
|
+
## F. OPERATIONAL READINESS
|
|
57
|
+
|
|
58
|
+
Method: code read of `HmacChainSpanProcessor.on_end` + DOC 02 empirical contiguity.
|
|
59
|
+
|
|
60
|
+
- **CRITICAL (latent) — no crash isolation.** `on_end()` has **no try/except**. A sqlite failure (lock/disk/permission) or canonicalization error raises into the OTel span-processor path. In GENA this never triggered (low rate, healthy disk). For a higher-volume consumer an `on_end` exception can disrupt the host's LLM call path. No defensive boundary, no degraded mode. Empirically un-hit ≠ safe.
|
|
61
|
+
- **CRITICAL (latent) — multi-writer race.** One shared chain.db is written by 4 container processes; concurrency is guarded **only by a per-process `threading.Lock`**. The `SELECT prev_hash → INSERT` sequence is **not atomic across processes**; SQLite serializes commits but the stale-read window permits a chain fork. **Empirically NOT manifested in GENA** (DOC 02: seq 1..2776 contiguous, `verify_chain`=VALID, 0/100 manual HMAC mismatch) because GENA's cron-driven workload is effectively serialized and low-rate. An ARA-class on-demand concurrent pattern is precisely the trigger. Unguarded + untested = unproven.
|
|
62
|
+
- **No backup / DR.** Single SQLite file on one docker volume; no replication, rotation, or backup. Tamper-evident ≠ durable — disk loss = total audit-trail loss. No documented recovery.
|
|
63
|
+
- **No self-observability.** The library emits no health signal; "is the chain advancing / intact?" is answerable only by an external query (as this audit did). No heartbeat/metric.
|
|
64
|
+
|
|
65
|
+
## G. CROSS-CONSUMER GENERALIZATION
|
|
66
|
+
|
|
67
|
+
Method: GENA profile from DOC 02; ARA profile from task brief.
|
|
68
|
+
|
|
69
|
+
| Dimension | GENA (validated in prod) | ARA (assumed target) |
|
|
70
|
+
|---|---|---|
|
|
71
|
+
| Trigger | cron-driven, serialized | on-demand |
|
|
72
|
+
| Concurrency | ~0.3 calls/min, effectively 1 writer at a time | potentially concurrent, multi-writer |
|
|
73
|
+
| Provider | Anthropic only | multi-provider |
|
|
74
|
+
| Policy | unwired (observer-only, 0 denies ever) | likely enforcing |
|
|
75
|
+
| Streaming | not used | unknown |
|
|
76
|
+
|
|
77
|
+
**Tested (in prod):** Anthropic request/response extraction (2776/2776), passive chain integrity at low rate, flat memory, zero added cost.
|
|
78
|
+
**Untested anywhere:** concurrent writers, live policy enforcement, OpenAI adapter in prod (0.0.1 lacks it), streaming, crash recovery, multi-provider, high rate.
|
|
79
|
+
**Honest verdict:** BIJOTEL is proven *only* for the passive / low-concurrency / Anthropic / observer pattern. Every ARA-distinct dimension is unproven.
|
|
80
|
+
|
|
81
|
+
## H. PYPI READINESS
|
|
82
|
+
|
|
83
|
+
Method: local `python -m build` + wheel METADATA inspection + `pip check`.
|
|
84
|
+
|
|
85
|
+
- Wheel builds clean: `bijotel-0.5.0-py3-none-any.whl`. METADATA valid (Metadata-Version 2.4, `Description-Content-Type: text/markdown` → README renders on PyPI, `Requires-Python: >=3.11`, deps + 4 extras correct). `pip check` clean.
|
|
86
|
+
- Gaps: **zero `Classifier`** (no Development Status / Python 3.11–3.12 / License::OSI::MIT / Topic / Intended Audience), **zero `Project-URL`** (no Homepage/Repo/Changelog/Issues), no `Keywords`. Not install-blocking; poor public listing + no discoverability.
|
|
87
|
+
- `twine` absent from toolchain → no pre-publish validation step in CI.
|
|
88
|
+
- Verdict: technically publishable, not polished. Metadata completeness ≈ 30-min fix.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## GAP SEVERITY ROLLUP
|
|
93
|
+
|
|
94
|
+
| # | Gap | Severity | Blocks ARA? |
|
|
95
|
+
|---|---|---|---|
|
|
96
|
+
| F1 | `on_end` no crash isolation | CRITICAL (latent) | YES |
|
|
97
|
+
| F2 | Multi-writer race on shared db | CRITICAL (latent) | YES (if concurrent) |
|
|
98
|
+
| B1 | Prod = 0.0.1, not v0.5.0; no deploy pipeline/alarm | CRITICAL (process) | YES |
|
|
99
|
+
| D1 | Zero concurrency/streaming/crash tests | HIGH | YES |
|
|
100
|
+
| E1 | Deps unpinned, no lockfile, CVE status unknown | HIGH | YES |
|
|
101
|
+
| F3 | No backup/DR for audit chain | MEDIUM | depends on retention need |
|
|
102
|
+
| E2 | chain.db world-readable (644) | MEDIUM | depends on threat model |
|
|
103
|
+
| C1 | Undocumented exports + no deploy docs | LOW | NO |
|
|
104
|
+
| H1 | Thin PyPI metadata | LOW | NO (publish-only) |
|