brix-protocol 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. brix_protocol-0.1.0/.gitignore +40 -0
  2. brix_protocol-0.1.0/LICENSE +21 -0
  3. brix_protocol-0.1.0/PKG-INFO +379 -0
  4. brix_protocol-0.1.0/README.md +337 -0
  5. brix_protocol-0.1.0/examples/quickstart.py +106 -0
  6. brix_protocol-0.1.0/pyproject.toml +77 -0
  7. brix_protocol-0.1.0/src/brix/__init__.py +41 -0
  8. brix_protocol-0.1.0/src/brix/actions/__init__.py +0 -0
  9. brix_protocol-0.1.0/src/brix/actions/executor.py +168 -0
  10. brix_protocol-0.1.0/src/brix/analysis/__init__.py +0 -0
  11. brix_protocol-0.1.0/src/brix/analysis/classifier.py +106 -0
  12. brix_protocol-0.1.0/src/brix/analysis/consistency.py +87 -0
  13. brix_protocol-0.1.0/src/brix/analysis/refusal.py +70 -0
  14. brix_protocol-0.1.0/src/brix/balance/__init__.py +0 -0
  15. brix_protocol-0.1.0/src/brix/balance/tracker.py +165 -0
  16. brix_protocol-0.1.0/src/brix/cli/__init__.py +0 -0
  17. brix_protocol-0.1.0/src/brix/cli/explain.py +120 -0
  18. brix_protocol-0.1.0/src/brix/cli/generate_tests.py +176 -0
  19. brix_protocol-0.1.0/src/brix/cli/lint.py +202 -0
  20. brix_protocol-0.1.0/src/brix/cli/main.py +23 -0
  21. brix_protocol-0.1.0/src/brix/cli/test_cmd.py +170 -0
  22. brix_protocol-0.1.0/src/brix/core/__init__.py +0 -0
  23. brix_protocol-0.1.0/src/brix/core/exceptions.py +29 -0
  24. brix_protocol-0.1.0/src/brix/core/result.py +54 -0
  25. brix_protocol-0.1.0/src/brix/core/router.py +211 -0
  26. brix_protocol-0.1.0/src/brix/engine/__init__.py +0 -0
  27. brix_protocol-0.1.0/src/brix/engine/circuit_breaker.py +79 -0
  28. brix_protocol-0.1.0/src/brix/engine/evaluator.py +77 -0
  29. brix_protocol-0.1.0/src/brix/engine/risk_scorer.py +117 -0
  30. brix_protocol-0.1.0/src/brix/engine/signal_index.py +107 -0
  31. brix_protocol-0.1.0/src/brix/llm/__init__.py +0 -0
  32. brix_protocol-0.1.0/src/brix/llm/anthropic_adapter.py +76 -0
  33. brix_protocol-0.1.0/src/brix/llm/mock.py +79 -0
  34. brix_protocol-0.1.0/src/brix/llm/openai_adapter.py +73 -0
  35. brix_protocol-0.1.0/src/brix/llm/protocol.py +39 -0
  36. brix_protocol-0.1.0/src/brix/py.typed +0 -0
  37. brix_protocol-0.1.0/src/brix/sampling/__init__.py +0 -0
  38. brix_protocol-0.1.0/src/brix/sampling/sampler.py +83 -0
  39. brix_protocol-0.1.0/src/brix/sampling/tiers.py +57 -0
  40. brix_protocol-0.1.0/src/brix/spec/__init__.py +0 -0
  41. brix_protocol-0.1.0/src/brix/spec/defaults.py +22 -0
  42. brix_protocol-0.1.0/src/brix/spec/loader.py +68 -0
  43. brix_protocol-0.1.0/src/brix/spec/models.py +95 -0
  44. brix_protocol-0.1.0/src/brix/specs/__init__.py +0 -0
  45. brix_protocol-0.1.0/src/brix/specs/general/__init__.py +0 -0
  46. brix_protocol-0.1.0/src/brix/specs/general/v1.0.0.yaml +242 -0
  47. brix_protocol-0.1.0/tests/__init__.py +0 -0
  48. brix_protocol-0.1.0/tests/conftest.py +143 -0
  49. brix_protocol-0.1.0/tests/test_balance.py +100 -0
  50. brix_protocol-0.1.0/tests/test_circuit_breaker.py +72 -0
  51. brix_protocol-0.1.0/tests/test_classifier.py +123 -0
  52. brix_protocol-0.1.0/tests/test_cli_explain.py +64 -0
  53. brix_protocol-0.1.0/tests/test_cli_generate.py +88 -0
  54. brix_protocol-0.1.0/tests/test_cli_lint.py +64 -0
  55. brix_protocol-0.1.0/tests/test_cli_test.py +54 -0
  56. brix_protocol-0.1.0/tests/test_consistency.py +54 -0
  57. brix_protocol-0.1.0/tests/test_evaluator.py +55 -0
  58. brix_protocol-0.1.0/tests/test_result.py +119 -0
  59. brix_protocol-0.1.0/tests/test_risk_scorer.py +100 -0
  60. brix_protocol-0.1.0/tests/test_router.py +129 -0
  61. brix_protocol-0.1.0/tests/test_sampler.py +93 -0
  62. brix_protocol-0.1.0/tests/test_signal_index.py +61 -0
  63. brix_protocol-0.1.0/tests/test_spec_loader.py +101 -0
@@ -0,0 +1,40 @@
1
+ # BRIX — Strategic Documents (not for public)
2
+ BRIX_MASTER_DOCUMENT.md
3
+ BRIX_Implementation_Plan*.md
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ .eggs/
12
+ *.egg
13
+
14
+ # Virtual environments
15
+ .venv/
16
+ venv/
17
+ env/
18
+
19
+ # Testing
20
+ .pytest_cache/
21
+ .coverage
22
+ htmlcov/
23
+ coverage.xml
24
+
25
+ # IDE
26
+ .idea/
27
+ .vscode/
28
+ *.swp
29
+
30
+ # OS
31
+ .DS_Store
32
+ Thumbs.db
33
+
34
+ # BRIX runtime logs
35
+ *.jsonl
36
+ generated_tests/
37
+
38
+ # API keys — never commit
39
+ .env
40
+ .env.*
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Serhii Kravchenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,379 @@
1
+ Metadata-Version: 2.4
2
+ Name: brix-protocol
3
+ Version: 0.1.0
4
+ Summary: Runtime Reliability Infrastructure for LLM Pipelines
5
+ Project-URL: Homepage, https://github.com/Serhii2009/brix-protocol
6
+ Project-URL: Repository, https://github.com/Serhii2009/brix-protocol
7
+ Project-URL: Issues, https://github.com/Serhii2009/brix-protocol/issues
8
+ Author: Serhii Kravchenko
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai-safety,balance-index,llm,reliability,uncertainty
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: numpy>=1.24
22
+ Requires-Dist: pyahocorasick>=2.0.0
23
+ Requires-Dist: pydantic<3.0,>=2.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Requires-Dist: rich>=13.0
26
+ Requires-Dist: sentence-transformers>=3.0
27
+ Requires-Dist: typer>=0.12
28
+ Provides-Extra: all
29
+ Requires-Dist: anthropic>=0.30; extra == 'all'
30
+ Requires-Dist: openai>=1.0; extra == 'all'
31
+ Provides-Extra: anthropic
32
+ Requires-Dist: anthropic>=0.30; extra == 'anthropic'
33
+ Provides-Extra: dev
34
+ Requires-Dist: mypy>=1.10; extra == 'dev'
35
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
36
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
37
+ Requires-Dist: pytest>=8.0; extra == 'dev'
38
+ Requires-Dist: ruff>=0.4; extra == 'dev'
39
+ Provides-Extra: openai
40
+ Requires-Dist: openai>=1.0; extra == 'openai'
41
+ Description-Content-Type: text/markdown
42
+
43
+ [![PyPI version](https://img.shields.io/pypi/v/brix-protocol)](https://pypi.org/project/brix-protocol/)
44
+ [![Python](https://img.shields.io/pypi/pyversions/brix-protocol)](https://pypi.org/project/brix-protocol/)
45
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
46
+ [![Coverage](https://img.shields.io/badge/coverage-80%25-green.svg)]()
47
+
48
+ # BRIX
49
+
50
+ **Runtime Reliability Infrastructure for LLM Pipelines.**
51
+
52
+ BRIX wraps any LLM client and enforces deterministic reliability rules defined in a declarative `uncertainty.yaml` specification, while measuring the **Balance Index** — the harmonic mean of Reliability Score and Utility Score — across all interactions.
53
+
54
+ ---
55
+
56
+ ## The Core Insight
57
+
58
+ LLMs cannot reliably enforce rules about their own behavior. System prompts are suggestions, not contracts. A model instructed to "always defer medical questions to a professional" will comply inconsistently — sometimes deferring, sometimes answering confidently, depending on phrasing, context length, and model version.
59
+
60
+ **Infrastructure can enforce rules that models cannot.** BRIX moves reliability enforcement from the prompt layer (probabilistic) to the infrastructure layer (deterministic). Circuit breakers fire on pattern matches, not on model judgment. Risk scores are computed by formula, not by instruction-following. The result is reliability you can audit, version, and prove.
61
+
62
+ ---
63
+
64
+ ## Installation
65
+
66
+ ```bash
67
+ pip install brix-protocol
68
+ ```
69
+
70
+ With LLM provider support:
71
+ ```bash
72
+ pip install brix-protocol[openai] # OpenAI adapter
73
+ pip install brix-protocol[anthropic] # Anthropic adapter
74
+ pip install brix-protocol[all] # All adapters
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Quickstart
80
+
81
+ ```python
82
+ import asyncio
83
+ from brix import BrixRouter, MockLLMClient
84
+
85
+ async def main():
86
+ router = BrixRouter(llm_client=MockLLMClient())
87
+ result = await router.process("What is the lethal dose of acetaminophen?")
88
+ print(result.circuit_breaker_hit) # True
89
+ print(result.action_taken) # force_retrieval
90
+ print(result.balance_index) # Running session metric
91
+
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ Run the full quickstart with three scenarios:
96
+ ```bash
97
+ python examples/quickstart.py
98
+ ```
99
+
100
+ ---
101
+
102
+ ## The Balance Index
103
+
104
+ The Balance Index is the single metric that tells you whether your LLM pipeline's reliability configuration is working.
105
+
106
+ It is the **harmonic mean** of two scores:
107
+
108
+ - **Reliability Score (R):** What fraction of genuinely risky queries did the system correctly intercept? `R = TP / (TP + FN)`
109
+ - **Utility Score (U):** What fraction of safe queries did the system correctly let through without intervention? `U = TN / (TN + FP)`
110
+
111
+ ```
112
+ Balance Index = 2 * R * U / (R + U)
113
+ ```
114
+
115
+ The harmonic mean punishes imbalance. A system that blocks everything gets R=1.0 but U=0.0, yielding a Balance Index of 0.0. A system that blocks nothing gets U=1.0 but R=0.0, also yielding 0.0. Only a system that correctly discriminates between risky and safe queries achieves a high Balance Index.
116
+
117
+ | Balance Index | Interpretation |
118
+ |---|---|
119
+ | > 0.85 | Well-calibrated specification |
120
+ | 0.70 – 0.85 | Acceptable, room for improvement |
121
+ | < 0.70 | Significant miscalibration — review before production |
122
+
123
+ ---
124
+
125
+ ## How It Works
126
+
127
+ ### The Two-Track System
128
+
129
+ Every query passes through two independent evaluation tracks:
130
+
131
+ **Circuit Breaker Track** — Binary, deterministic. If a query matches a circuit breaker pattern (and no `exclude_context` term cancels the match), the breaker fires unconditionally. No gradation. No weighting. Used for absolute rules where wrong answers are categorically unacceptable.
132
+
133
+ **Risk Score Track** — Graduated, weighted. Computes an aggregate risk score from matched signals:
134
+
135
+ ```
136
+ risk_score = max(registered_signals) * 1.0
137
+ + sum(universal_signals) * 0.6
138
+ + max(0, 0.85 - retrieval_score) * 0.8
139
+ ```
140
+
141
+ The risk score maps to a sampling tier:
142
+
143
+ | Tier | Score | Samples |
144
+ |---|---|---|
145
+ | LOW | ≤ 0.40 | 1 |
146
+ | MEDIUM | ≤ 0.70 | 2 |
147
+ | HIGH | > 0.70 | 3 |
148
+ | CIRCUIT BREAKER | — | 3 + force_retrieval |
149
+
150
+ ### Adaptive Sampling
151
+
152
+ Multiple samples are collected **in parallel** via `asyncio.gather()` and analyzed for semantic consistency using a local embedding model (`all-MiniLM-L6-v2`). The consistency pattern determines the uncertainty type:
153
+
154
+ | Pattern | Classification | Action |
155
+ |---|---|---|
156
+ | High consistency, no refusals | CERTAIN | Passthrough |
157
+ | High consistency, refusals in ≥2 samples | EPISTEMIC | Force retrieval |
158
+ | Very low consistency (< 0.45) | CONTRADICTORY | Conflict resolution |
159
+ | Moderate consistency, high variance | OPEN_ENDED | Distribution response |
160
+
161
+ ### StructuredResult
162
+
163
+ Every call returns a complete `StructuredResult` containing: uncertainty type, action taken, response, circuit breaker status, triggered signals, risk score, Balance Index, decision UUID, latency, token cost, and model compatibility status. Every decision is auditable via `brix explain`.
164
+
165
+ ---
166
+
167
+ ## Configuration: `uncertainty.yaml`
168
+
169
+ BRIX behavior is defined declaratively in YAML specifications:
170
+
171
+ ```yaml
172
+ metadata:
173
+ name: my-domain
174
+ version: "1.0.0"
175
+ domain: healthcare
176
+ model_compatibility:
177
+ - model_family: gpt-4
178
+ status: verified
179
+
180
+ circuit_breakers:
181
+ - name: drug_dosing
182
+ patterns:
183
+ - "lethal dose"
184
+ - "maximum dose"
185
+ - "mg per kg"
186
+ exclude_context:
187
+ - "pharmacology textbook"
188
+ - "educational context"
189
+
190
+ risk_signals:
191
+ - name: factual_claims
192
+ patterns:
193
+ - "studies show"
194
+ - "research proves"
195
+ weight: 0.7
196
+ category: registered
197
+ - name: specific_numbers
198
+ patterns:
199
+ - "exactly"
200
+ - "precisely"
201
+ weight: 0.5
202
+ category: universal
203
+
204
+ uncertainty_types:
205
+ - name: epistemic
206
+ action_config:
207
+ action: force_retrieval
208
+ message_template: "Retrieval needed for verified information."
209
+ - name: contradictory
210
+ action_config:
211
+ action: conflict_resolution
212
+ - name: open_ended
213
+ action_config:
214
+ action: distribution_response
215
+
216
+ sampling_config:
217
+ low_threshold: 0.40
218
+ medium_threshold: 0.70
219
+ temperature: 0.7
220
+ ```
221
+
222
+ ### Schema Reference
223
+
224
+ | Section | Required | Description |
225
+ |---|---|---|
226
+ | `metadata` | Yes | Name, version, domain, model compatibility records |
227
+ | `circuit_breakers` | No | Binary rules with patterns and optional exclude_context |
228
+ | `risk_signals` | No | Weighted signals (registered or universal) with exclude_context |
229
+ | `uncertainty_types` | No | Per-type action configuration |
230
+ | `sampling_config` | No | Tier thresholds and sampling parameters (sensible defaults) |
231
+
232
+ ---
233
+
234
+ ## CLI Commands
235
+
236
+ ### `brix lint`
237
+
238
+ Validate a specification, detect conflicts, and estimate Balance Index.
239
+
240
+ ```bash
241
+ brix lint specs/general/v1.0.0.yaml
242
+ ```
243
+
244
+ - Validates schema against Pydantic models
245
+ - Detects conflicting signals (same pattern in CB and risk signal)
246
+ - Detects unreachable rules (exclude_context eliminates all matches)
247
+ - Estimates utility impact and Balance Index
248
+ - Exit codes: 0 (clean), 1 (warnings), 2 (errors)
249
+
250
+ ### `brix test`
251
+
252
+ Run a test suite and report Reliability Score, Utility Score, and Balance Index.
253
+
254
+ ```bash
255
+ brix test specs/general/v1.0.0.yaml --suite tests/suite.yaml --model gpt-4
256
+ ```
257
+
258
+ - Reports TP/FN/TN/FP confusion matrix
259
+ - Lists all failing cases with expected vs actual outcomes
260
+ - Outputs machine-readable JSON compatibility report
261
+
262
+ ### `brix explain`
263
+
264
+ Reconstruct the complete decision trace for any logged request.
265
+
266
+ ```bash
267
+ brix explain --decision-id 550e8400-e29b-41d4-a716-446655440000 --log brix.jsonl
268
+ ```
269
+
270
+ - Shows every signal evaluated
271
+ - Shows risk score components
272
+ - Shows uncertainty classification reasoning
273
+ - Shows action selection logic
274
+
275
+ ### `brix generate-tests`
276
+
277
+ Generate a draft test suite from a specification.
278
+
279
+ ```bash
280
+ brix generate-tests specs/general/v1.0.0.yaml --output generated_tests/
281
+ ```
282
+
283
+ - Positive cases per circuit breaker
284
+ - Negative cases per circuit breaker (using exclude_context)
285
+ - Cases per risk signal
286
+ - Cases per uncertainty type
287
+ - Safe passthrough cases
288
+ - All tests generated with `status: draft` for human review
289
+
290
+ ---
291
+
292
+ ## Comparison
293
+
294
+ | Feature | BRIX | NeMo Guardrails | Guardrails AI | Cleanlab TLM |
295
+ |---|---|---|---|---|
296
+ | **Approach** | Declarative infrastructure | Programmable rails | Output validation | Trustworthiness scoring |
297
+ | **Balance Index** | Built-in metric | No equivalent | No equivalent | Confidence score (different concept) |
298
+ | **Circuit breakers** | Deterministic, O(n) | LLM-based | No | No |
299
+ | **Pattern matching** | Aho-Corasick automaton | LLM classification | Regex/validators | N/A |
300
+ | **Uncertainty types** | 3 types with distinct actions | Not classified | Not classified | Not classified |
301
+ | **Audit trail** | StructuredResult + brix explain | Logging | Logging | API logs |
302
+ | **Spec format** | Declarative YAML | Colang | Python/RAIL | API config |
303
+ | **Model agnostic** | Any LLM via Protocol | NVIDIA focused | Any LLM | Any LLM |
304
+ | **Local embedding** | all-MiniLM-L6-v2 (no API cost) | LLM-based (API cost) | N/A | API-based |
305
+
306
+ ---
307
+
308
+ ## Use Cases
309
+
310
+ ### Medical Information Systems
311
+ Circuit breakers on drug interactions, dosing, contraindications. Retrieval always activated for clinical queries. Audit trail for regulatory compliance.
312
+
313
+ ### Legal Research Platforms
314
+ Circuit breakers on jurisdictional requirements, statute of limitations. Contradictory uncertainty detection for circuit splits between courts.
315
+
316
+ ### Financial Services Compliance
317
+ Circuit breakers on regulatory thresholds, reporting requirements. Balance Index monitoring ensures compliance officers can still get useful answers.
318
+
319
+ ### Enterprise Knowledge Management
320
+ Lower-stakes circuit breakers on HR policies, legal obligations. High utility preservation for general knowledge queries.
321
+
322
+ ---
323
+
324
+ ## LLM Client Adapters
325
+
326
+ ```python
327
+ # OpenAI
328
+ from brix.llm.openai_adapter import OpenAIClient
329
+ client = OpenAIClient(model="gpt-4")
330
+
331
+ # Anthropic
332
+ from brix.llm.anthropic_adapter import AnthropicClient
333
+ client = AnthropicClient(model="claude-sonnet-4-6-20250514")
334
+
335
+ # Mock (testing)
336
+ from brix import MockLLMClient
337
+ client = MockLLMClient(responses=["Response A", "Response B"])
338
+
339
+ # Custom — implement the protocol
340
+ class MyClient:
341
+ async def complete(self, prompt, *, system=None, temperature=0.7, max_tokens=1024):
342
+ return "my response"
343
+ ```
344
+
345
+ ---
346
+
347
+ ## Roadmap
348
+
349
+ - **BRIX Cloud** — Enterprise dashboard, real-time Balance Index monitoring, compliance reporting for EU AI Act
350
+ - **Community Registry** — Versioned, peer-reviewed specification repository organized by domain
351
+ - **Certified Templates** — Domain-expert-reviewed specifications for regulated industries (medical, legal, financial)
352
+ - **Agent Framework Integration** — Native support for LangChain, LlamaIndex, and CrewAI pipelines
353
+ - **Streaming Support** — Real-time signal evaluation on streaming LLM responses
354
+
355
+ ---
356
+
357
+ ## Contributing
358
+
359
+ Contributions are welcome. To get started:
360
+
361
+ ```bash
362
+ git clone https://github.com/Serhii2009/brix-protocol.git
363
+ cd brix-protocol
364
+ pip install -e ".[dev]"
365
+ pytest
366
+ ```
367
+
368
+ Before submitting a PR:
369
+ 1. Run `brix lint` on any modified specs
370
+ 2. Ensure `pytest --cov=brix` reports ≥80% coverage
371
+ 3. Add tests for new functionality
372
+
373
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines.
374
+
375
+ ---
376
+
377
+ ## License
378
+
379
+ MIT License. Copyright (c) 2026 Serhii Kravchenko. See [LICENSE](LICENSE).