brix-protocol 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brix/__init__.py +41 -0
- brix/actions/__init__.py +0 -0
- brix/actions/executor.py +168 -0
- brix/analysis/__init__.py +0 -0
- brix/analysis/classifier.py +106 -0
- brix/analysis/consistency.py +87 -0
- brix/analysis/refusal.py +70 -0
- brix/balance/__init__.py +0 -0
- brix/balance/tracker.py +165 -0
- brix/cli/__init__.py +0 -0
- brix/cli/explain.py +120 -0
- brix/cli/generate_tests.py +176 -0
- brix/cli/lint.py +202 -0
- brix/cli/main.py +23 -0
- brix/cli/test_cmd.py +170 -0
- brix/core/__init__.py +0 -0
- brix/core/exceptions.py +29 -0
- brix/core/result.py +54 -0
- brix/core/router.py +211 -0
- brix/engine/__init__.py +0 -0
- brix/engine/circuit_breaker.py +79 -0
- brix/engine/evaluator.py +77 -0
- brix/engine/risk_scorer.py +117 -0
- brix/engine/signal_index.py +107 -0
- brix/llm/__init__.py +0 -0
- brix/llm/anthropic_adapter.py +76 -0
- brix/llm/mock.py +79 -0
- brix/llm/openai_adapter.py +73 -0
- brix/llm/protocol.py +39 -0
- brix/py.typed +0 -0
- brix/sampling/__init__.py +0 -0
- brix/sampling/sampler.py +83 -0
- brix/sampling/tiers.py +57 -0
- brix/spec/__init__.py +0 -0
- brix/spec/defaults.py +22 -0
- brix/spec/loader.py +68 -0
- brix/spec/models.py +95 -0
- brix/specs/__init__.py +0 -0
- brix/specs/general/__init__.py +0 -0
- brix/specs/general/v1.0.0.yaml +242 -0
- brix_protocol-0.1.0.dist-info/METADATA +379 -0
- brix_protocol-0.1.0.dist-info/RECORD +45 -0
- brix_protocol-0.1.0.dist-info/WHEEL +4 -0
- brix_protocol-0.1.0.dist-info/entry_points.txt +2 -0
- brix_protocol-0.1.0.dist-info/licenses/LICENSE +21 -0
brix/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""BRIX — Runtime Reliability Infrastructure for LLM Pipelines.
|
|
2
|
+
|
|
3
|
+
BRIX wraps any LLM client and enforces deterministic reliability rules
|
|
4
|
+
defined in a declarative uncertainty.yaml specification, while measuring
|
|
5
|
+
the Balance Index across all interactions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from brix.core.exceptions import (
|
|
9
|
+
BrixError,
|
|
10
|
+
CircuitBreakerError,
|
|
11
|
+
ClassifierError,
|
|
12
|
+
RegistryError,
|
|
13
|
+
SamplerError,
|
|
14
|
+
SpecValidationError,
|
|
15
|
+
)
|
|
16
|
+
from brix.core.result import ActionTaken, StructuredResult, UncertaintyType
|
|
17
|
+
from brix.core.router import BrixRouter
|
|
18
|
+
from brix.llm.mock import MockLLMClient
|
|
19
|
+
from brix.llm.protocol import LLMClient
|
|
20
|
+
from brix.spec.loader import load_spec, load_spec_from_dict
|
|
21
|
+
from brix.spec.models import SpecModel
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"ActionTaken",
|
|
25
|
+
"BrixError",
|
|
26
|
+
"BrixRouter",
|
|
27
|
+
"CircuitBreakerError",
|
|
28
|
+
"ClassifierError",
|
|
29
|
+
"LLMClient",
|
|
30
|
+
"MockLLMClient",
|
|
31
|
+
"RegistryError",
|
|
32
|
+
"SamplerError",
|
|
33
|
+
"SpecModel",
|
|
34
|
+
"SpecValidationError",
|
|
35
|
+
"StructuredResult",
|
|
36
|
+
"UncertaintyType",
|
|
37
|
+
"load_spec",
|
|
38
|
+
"load_spec_from_dict",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
__version__ = "0.1.0"
|
brix/actions/__init__.py
ADDED
|
File without changes
|
brix/actions/executor.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Action executor — dispatches response strategies per uncertainty type.
|
|
2
|
+
|
|
3
|
+
Each uncertainty type produces a meaningfully different response:
|
|
4
|
+
EPISTEMIC → force retrieval augmentation
|
|
5
|
+
CONTRADICTORY → explicit conflict resolution
|
|
6
|
+
OPEN_ENDED → distribution of outcomes
|
|
7
|
+
CERTAIN → passthrough (no intervention)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
from brix.core.result import ActionTaken, UncertaintyType
|
|
15
|
+
from brix.llm.protocol import LLMClient
|
|
16
|
+
from brix.spec.models import SpecModel, UncertaintyTypeDef
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True, slots=True)
|
|
20
|
+
class ActionResult:
|
|
21
|
+
"""Result of action execution."""
|
|
22
|
+
|
|
23
|
+
action_taken: ActionTaken
|
|
24
|
+
response: str
|
|
25
|
+
intervention_necessary: bool
|
|
26
|
+
cost_tokens_extra: int
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ActionExecutor:
|
|
30
|
+
"""Executes the appropriate response strategy for each uncertainty type.
|
|
31
|
+
|
|
32
|
+
Each type produces a meaningfully different response, not just a
|
|
33
|
+
different label on the same output.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, spec: SpecModel, llm_client: LLMClient) -> None:
|
|
37
|
+
self._llm = llm_client
|
|
38
|
+
self._type_configs: dict[str, UncertaintyTypeDef] = {
|
|
39
|
+
t.name: t for t in spec.uncertainty_types
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async def execute(
|
|
43
|
+
self,
|
|
44
|
+
uncertainty_type: UncertaintyType,
|
|
45
|
+
samples: list[str],
|
|
46
|
+
query: str,
|
|
47
|
+
force_retrieval: bool = False,
|
|
48
|
+
) -> ActionResult:
|
|
49
|
+
"""Execute the action for the classified uncertainty type.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
uncertainty_type: The classified uncertainty type.
|
|
53
|
+
samples: Collected response samples.
|
|
54
|
+
query: Original user query.
|
|
55
|
+
force_retrieval: Whether to force retrieval (from CB hit).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
ActionResult with the final response and action metadata.
|
|
59
|
+
"""
|
|
60
|
+
if uncertainty_type == UncertaintyType.CERTAIN and not force_retrieval:
|
|
61
|
+
return ActionResult(
|
|
62
|
+
action_taken=ActionTaken.NONE,
|
|
63
|
+
response=samples[0] if samples else "",
|
|
64
|
+
intervention_necessary=False,
|
|
65
|
+
cost_tokens_extra=0,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if uncertainty_type == UncertaintyType.EPISTEMIC or force_retrieval:
|
|
69
|
+
return await self._handle_epistemic(samples, query)
|
|
70
|
+
|
|
71
|
+
if uncertainty_type == UncertaintyType.CONTRADICTORY:
|
|
72
|
+
return await self._handle_contradictory(samples, query)
|
|
73
|
+
|
|
74
|
+
if uncertainty_type == UncertaintyType.OPEN_ENDED:
|
|
75
|
+
return await self._handle_open_ended(samples, query)
|
|
76
|
+
|
|
77
|
+
# Fallback: treat as epistemic
|
|
78
|
+
return await self._handle_epistemic(samples, query)
|
|
79
|
+
|
|
80
|
+
async def _handle_epistemic(
|
|
81
|
+
self, samples: list[str], query: str
|
|
82
|
+
) -> ActionResult:
|
|
83
|
+
"""Handle epistemic uncertainty — signal retrieval augmentation needed."""
|
|
84
|
+
config = self._type_configs.get("epistemic")
|
|
85
|
+
template = config.action_config.message_template if config else ""
|
|
86
|
+
|
|
87
|
+
# Build a retrieval-signaling response from the samples
|
|
88
|
+
sample_summary = samples[0] if samples else "No response available."
|
|
89
|
+
response = (
|
|
90
|
+
f"{template.strip()}\n\n"
|
|
91
|
+
f"Based on initial analysis: {sample_summary}\n\n"
|
|
92
|
+
f"[RETRIEVAL_NEEDED] This response requires verification through "
|
|
93
|
+
f"retrieval augmentation. The query '{query}' touches on knowledge "
|
|
94
|
+
f"that may not be reliably represented in the model's training data."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return ActionResult(
|
|
98
|
+
action_taken=ActionTaken.FORCE_RETRIEVAL,
|
|
99
|
+
response=response,
|
|
100
|
+
intervention_necessary=True,
|
|
101
|
+
cost_tokens_extra=self._estimate_extra_tokens(samples),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def _handle_contradictory(
|
|
105
|
+
self, samples: list[str], query: str
|
|
106
|
+
) -> ActionResult:
|
|
107
|
+
"""Handle contradictory uncertainty — explicit conflict resolution."""
|
|
108
|
+
config = self._type_configs.get("contradictory")
|
|
109
|
+
template = config.action_config.message_template if config else ""
|
|
110
|
+
|
|
111
|
+
# Build conflict resolution from divergent samples
|
|
112
|
+
conflict_parts: list[str] = []
|
|
113
|
+
for i, sample in enumerate(samples, 1):
|
|
114
|
+
conflict_parts.append(f"Position {i}: {sample.strip()}")
|
|
115
|
+
|
|
116
|
+
conflicts = "\n\n".join(conflict_parts)
|
|
117
|
+
response = (
|
|
118
|
+
f"{template.strip()}\n\n"
|
|
119
|
+
f"Multiple responses to '{query}' produced conflicting information:\n\n"
|
|
120
|
+
f"{conflicts}\n\n"
|
|
121
|
+
f"[CONFLICT_DETECTED] These positions contain material differences "
|
|
122
|
+
f"that require resolution. The correct answer may depend on specific "
|
|
123
|
+
f"context, jurisdiction, or conditions not specified in the query."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return ActionResult(
|
|
127
|
+
action_taken=ActionTaken.CONFLICT_RESOLUTION,
|
|
128
|
+
response=response,
|
|
129
|
+
intervention_necessary=True,
|
|
130
|
+
cost_tokens_extra=self._estimate_extra_tokens(samples),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
async def _handle_open_ended(
|
|
134
|
+
self, samples: list[str], query: str
|
|
135
|
+
) -> ActionResult:
|
|
136
|
+
"""Handle open-ended uncertainty — distribution of outcomes."""
|
|
137
|
+
config = self._type_configs.get("open_ended")
|
|
138
|
+
template = config.action_config.message_template if config else ""
|
|
139
|
+
|
|
140
|
+
# Build distribution response from varied samples
|
|
141
|
+
perspective_parts: list[str] = []
|
|
142
|
+
for i, sample in enumerate(samples, 1):
|
|
143
|
+
perspective_parts.append(f"Perspective {i}: {sample.strip()}")
|
|
144
|
+
|
|
145
|
+
perspectives = "\n\n".join(perspective_parts)
|
|
146
|
+
response = (
|
|
147
|
+
f"{template.strip()}\n\n"
|
|
148
|
+
f"The query '{query}' has multiple valid answers:\n\n"
|
|
149
|
+
f"{perspectives}\n\n"
|
|
150
|
+
f"[MULTIPLE_PERSPECTIVES] This question does not have a single "
|
|
151
|
+
f"deterministically correct answer. The above perspectives represent "
|
|
152
|
+
f"the distribution of plausible responses."
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return ActionResult(
|
|
156
|
+
action_taken=ActionTaken.DISTRIBUTION_RESPONSE,
|
|
157
|
+
response=response,
|
|
158
|
+
intervention_necessary=True,
|
|
159
|
+
cost_tokens_extra=self._estimate_extra_tokens(samples),
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def _estimate_extra_tokens(samples: list[str]) -> int:
|
|
164
|
+
"""Estimate extra token cost from additional samples."""
|
|
165
|
+
if len(samples) <= 1:
|
|
166
|
+
return 0
|
|
167
|
+
extra_chars = sum(len(s) for s in samples[1:])
|
|
168
|
+
return extra_chars // 4 # rough token estimate
|
|
File without changes
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Uncertainty type classifier using semantic consistency and refusal signals.
|
|
2
|
+
|
|
3
|
+
Classification thresholds:
|
|
4
|
+
- consistency > 0.90, no refusal signals → CERTAIN
|
|
5
|
+
- consistency > 0.90, refusal in ≥2 samples → EPISTEMIC
|
|
6
|
+
- consistency < 0.45 → CONTRADICTORY
|
|
7
|
+
- 0.45 ≤ consistency < 0.70, variance > 0.15 → OPEN_ENDED
|
|
8
|
+
- all other cases → EPISTEMIC (safe fallback)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
|
|
15
|
+
from brix.analysis.consistency import ConsistencyResult, SemanticConsistencyAnalyzer
|
|
16
|
+
from brix.analysis.refusal import count_refusals
|
|
17
|
+
from brix.core.result import UncertaintyType
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True, slots=True)
|
|
21
|
+
class ClassificationResult:
|
|
22
|
+
"""Result of uncertainty type classification."""
|
|
23
|
+
|
|
24
|
+
uncertainty_type: UncertaintyType
|
|
25
|
+
subtype: str
|
|
26
|
+
mean_consistency: float
|
|
27
|
+
variance: float
|
|
28
|
+
refusal_count: int
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class UncertaintyClassifier:
|
|
32
|
+
"""Classifies uncertainty type from response samples.
|
|
33
|
+
|
|
34
|
+
Uses the SemanticConsistencyAnalyzer for embedding-based similarity
|
|
35
|
+
and refusal detection for behavioral signals. Applies fixed thresholds
|
|
36
|
+
to produce a deterministic classification given the same inputs.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, analyzer: SemanticConsistencyAnalyzer) -> None:
|
|
40
|
+
self._analyzer = analyzer
|
|
41
|
+
|
|
42
|
+
def classify(self, samples: list[str]) -> ClassificationResult:
|
|
43
|
+
"""Classify the uncertainty type from collected samples.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
samples: List of LLM response texts.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
ClassificationResult with the determined uncertainty type.
|
|
50
|
+
"""
|
|
51
|
+
# Single sample → CERTAIN (low-risk passthrough)
|
|
52
|
+
if len(samples) <= 1:
|
|
53
|
+
return ClassificationResult(
|
|
54
|
+
uncertainty_type=UncertaintyType.CERTAIN,
|
|
55
|
+
subtype="single_sample",
|
|
56
|
+
mean_consistency=1.0,
|
|
57
|
+
variance=0.0,
|
|
58
|
+
refusal_count=0,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
consistency = self._analyzer.analyze(samples)
|
|
62
|
+
refusal_count = count_refusals(samples)
|
|
63
|
+
|
|
64
|
+
uncertainty_type, subtype = self._apply_thresholds(
|
|
65
|
+
consistency, refusal_count
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return ClassificationResult(
|
|
69
|
+
uncertainty_type=uncertainty_type,
|
|
70
|
+
subtype=subtype,
|
|
71
|
+
mean_consistency=consistency.mean_similarity,
|
|
72
|
+
variance=consistency.variance,
|
|
73
|
+
refusal_count=refusal_count,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _apply_thresholds(
|
|
77
|
+
self,
|
|
78
|
+
consistency: ConsistencyResult,
|
|
79
|
+
refusal_count: int,
|
|
80
|
+
) -> tuple[UncertaintyType, str]:
|
|
81
|
+
"""Apply classification thresholds to consistency and refusal data.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Tuple of (UncertaintyType, subtype_string).
|
|
85
|
+
"""
|
|
86
|
+
sim = consistency.mean_similarity
|
|
87
|
+
var = consistency.variance
|
|
88
|
+
|
|
89
|
+
# High consistency, no refusals → CERTAIN
|
|
90
|
+
if sim > 0.90 and refusal_count == 0:
|
|
91
|
+
return UncertaintyType.CERTAIN, "high_consistency_no_refusal"
|
|
92
|
+
|
|
93
|
+
# High consistency, but refusals present → EPISTEMIC
|
|
94
|
+
if sim > 0.90 and refusal_count >= 2:
|
|
95
|
+
return UncertaintyType.EPISTEMIC, "high_consistency_with_refusals"
|
|
96
|
+
|
|
97
|
+
# Very low consistency → CONTRADICTORY
|
|
98
|
+
if sim < 0.45:
|
|
99
|
+
return UncertaintyType.CONTRADICTORY, "low_consistency"
|
|
100
|
+
|
|
101
|
+
# Moderate consistency with high variance → OPEN_ENDED
|
|
102
|
+
if 0.45 <= sim < 0.70 and var > 0.15:
|
|
103
|
+
return UncertaintyType.OPEN_ENDED, "moderate_consistency_high_variance"
|
|
104
|
+
|
|
105
|
+
# Safe fallback → EPISTEMIC
|
|
106
|
+
return UncertaintyType.EPISTEMIC, "fallback"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Semantic consistency analyzer using sentence-transformers.
|
|
2
|
+
|
|
3
|
+
Uses the all-MiniLM-L6-v2 model to compute pairwise cosine similarity
|
|
4
|
+
between collected response samples. The model is loaded ONCE at
|
|
5
|
+
initialization and never reloaded per request.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True, slots=True)
|
|
17
|
+
class ConsistencyResult:
|
|
18
|
+
"""Result of semantic consistency analysis."""
|
|
19
|
+
|
|
20
|
+
mean_similarity: float
|
|
21
|
+
variance: float
|
|
22
|
+
pairwise_similarities: list[float]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SemanticConsistencyAnalyzer:
|
|
26
|
+
"""Computes pairwise semantic similarity between response samples.
|
|
27
|
+
|
|
28
|
+
The sentence-transformers model is loaded exactly once during __init__
|
|
29
|
+
and reused for all subsequent calls. No per-request model loading.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2") -> None:
|
|
33
|
+
from sentence_transformers import SentenceTransformer
|
|
34
|
+
|
|
35
|
+
self._model: Any = SentenceTransformer(model_name)
|
|
36
|
+
|
|
37
|
+
def analyze(self, samples: list[str]) -> ConsistencyResult:
|
|
38
|
+
"""Compute pairwise cosine similarity between all samples.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
samples: List of response texts (must have at least 2).
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
ConsistencyResult with mean similarity, variance, and all
|
|
45
|
+
pairwise similarity values.
|
|
46
|
+
"""
|
|
47
|
+
if len(samples) < 2:
|
|
48
|
+
return ConsistencyResult(
|
|
49
|
+
mean_similarity=1.0,
|
|
50
|
+
variance=0.0,
|
|
51
|
+
pairwise_similarities=[1.0],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Encode all samples at once (batch operation)
|
|
55
|
+
embeddings = self._model.encode(samples, convert_to_numpy=True)
|
|
56
|
+
embeddings = np.array(embeddings)
|
|
57
|
+
|
|
58
|
+
# Normalize for cosine similarity
|
|
59
|
+
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
|
60
|
+
norms = np.where(norms == 0, 1.0, norms)
|
|
61
|
+
normalized = embeddings / norms
|
|
62
|
+
|
|
63
|
+
# Compute pairwise cosine similarity matrix
|
|
64
|
+
sim_matrix = np.dot(normalized, normalized.T)
|
|
65
|
+
|
|
66
|
+
# Extract upper triangle (excluding diagonal)
|
|
67
|
+
n = len(samples)
|
|
68
|
+
pairwise: list[float] = []
|
|
69
|
+
for i in range(n):
|
|
70
|
+
for j in range(i + 1, n):
|
|
71
|
+
pairwise.append(float(sim_matrix[i, j]))
|
|
72
|
+
|
|
73
|
+
if not pairwise:
|
|
74
|
+
return ConsistencyResult(
|
|
75
|
+
mean_similarity=1.0,
|
|
76
|
+
variance=0.0,
|
|
77
|
+
pairwise_similarities=[1.0],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
mean_sim = float(np.mean(pairwise))
|
|
81
|
+
var_sim = float(np.var(pairwise))
|
|
82
|
+
|
|
83
|
+
return ConsistencyResult(
|
|
84
|
+
mean_similarity=mean_sim,
|
|
85
|
+
variance=var_sim,
|
|
86
|
+
pairwise_similarities=pairwise,
|
|
87
|
+
)
|
brix/analysis/refusal.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Refusal detection heuristics for LLM response samples.
|
|
2
|
+
|
|
3
|
+
Detects common patterns indicating the model is refusing to answer,
|
|
4
|
+
acknowledging limitations, or deferring to human expertise.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
# Common refusal/deferral phrases found in LLM responses
|
|
10
|
+
_REFUSAL_PATTERNS: list[str] = [
|
|
11
|
+
"i cannot",
|
|
12
|
+
"i can't",
|
|
13
|
+
"i'm not able to",
|
|
14
|
+
"i am not able to",
|
|
15
|
+
"i'm unable to",
|
|
16
|
+
"i am unable to",
|
|
17
|
+
"i don't have",
|
|
18
|
+
"i do not have",
|
|
19
|
+
"i shouldn't",
|
|
20
|
+
"i should not",
|
|
21
|
+
"consult a professional",
|
|
22
|
+
"consult a doctor",
|
|
23
|
+
"consult a lawyer",
|
|
24
|
+
"consult an attorney",
|
|
25
|
+
"consult a financial advisor",
|
|
26
|
+
"seek professional",
|
|
27
|
+
"seek medical",
|
|
28
|
+
"seek legal",
|
|
29
|
+
"not qualified to",
|
|
30
|
+
"beyond my capabilities",
|
|
31
|
+
"outside my expertise",
|
|
32
|
+
"i'm not a doctor",
|
|
33
|
+
"i'm not a lawyer",
|
|
34
|
+
"i'm not a financial",
|
|
35
|
+
"not medical advice",
|
|
36
|
+
"not legal advice",
|
|
37
|
+
"not financial advice",
|
|
38
|
+
"please consult",
|
|
39
|
+
"please seek",
|
|
40
|
+
"i must decline",
|
|
41
|
+
"i have to decline",
|
|
42
|
+
"i apologize, but i cannot",
|
|
43
|
+
"as an ai",
|
|
44
|
+
"as a language model",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def detect_refusal(text: str) -> bool:
|
|
49
|
+
"""Check if a single response contains refusal language.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
text: A single LLM response text.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
True if refusal language is detected.
|
|
56
|
+
"""
|
|
57
|
+
lowered = text.lower()
|
|
58
|
+
return any(pattern in lowered for pattern in _REFUSAL_PATTERNS)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def count_refusals(samples: list[str]) -> int:
|
|
62
|
+
"""Count how many samples in a list contain refusal language.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
samples: List of LLM response texts.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Number of samples containing refusal patterns.
|
|
69
|
+
"""
|
|
70
|
+
return sum(1 for s in samples if detect_refusal(s))
|
brix/balance/__init__.py
ADDED
|
File without changes
|
brix/balance/tracker.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Balance Index tracker — TP/FN/TN/FP counters and harmonic mean computation.
|
|
2
|
+
|
|
3
|
+
The Balance Index is defined as:
|
|
4
|
+
Balance Index = 2 * (R * U) / (R + U)
|
|
5
|
+
where R = TP / (TP + FN), U = TN / (TN + FP)
|
|
6
|
+
|
|
7
|
+
Supports both heuristic defaults and explicit feedback from the caller.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from uuid import UUID
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class BalanceState:
|
|
18
|
+
"""Current state of the balance tracker counters."""
|
|
19
|
+
|
|
20
|
+
tp: int = 0 # Risky query correctly intercepted
|
|
21
|
+
fn: int = 0 # Risky query passed without intervention
|
|
22
|
+
tn: int = 0 # Safe query correctly passed
|
|
23
|
+
fp: int = 0 # Safe query incorrectly intercepted
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BalanceTracker:
|
|
27
|
+
"""Tracks reliability and utility metrics across a session.
|
|
28
|
+
|
|
29
|
+
Computes the running Balance Index after every request. Supports
|
|
30
|
+
both heuristic auto-classification and explicit feedback via
|
|
31
|
+
the feedback() method.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
self._state = BalanceState()
|
|
36
|
+
self._pending: dict[UUID, _PendingDecision] = {}
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def state(self) -> BalanceState:
|
|
40
|
+
"""Current counter state."""
|
|
41
|
+
return self._state
|
|
42
|
+
|
|
43
|
+
def record_decision(
|
|
44
|
+
self,
|
|
45
|
+
decision_id: UUID,
|
|
46
|
+
intervention_necessary: bool,
|
|
47
|
+
circuit_breaker_hit: bool,
|
|
48
|
+
risk_score: float,
|
|
49
|
+
) -> tuple[bool, bool, float]:
|
|
50
|
+
"""Record a decision using heuristic classification.
|
|
51
|
+
|
|
52
|
+
Heuristic rules:
|
|
53
|
+
- CB hit + intervention → TP (assumed risky, correctly caught)
|
|
54
|
+
- High risk (>0.70) + intervention → TP
|
|
55
|
+
- Medium risk + intervention → TP (conservative)
|
|
56
|
+
- Low risk + no intervention → TN (safe, correctly passed)
|
|
57
|
+
- Low risk + intervention → FP (safe, incorrectly caught)
|
|
58
|
+
- High risk + no intervention → FN (risky, missed)
|
|
59
|
+
|
|
60
|
+
Also stores the decision for possible later feedback override.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
decision_id: Unique ID for this decision.
|
|
64
|
+
intervention_necessary: Whether intervention was applied.
|
|
65
|
+
circuit_breaker_hit: Whether a CB fired.
|
|
66
|
+
risk_score: Computed risk score.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Tuple of (reliability_signal, utility_signal, balance_index).
|
|
70
|
+
"""
|
|
71
|
+
is_risky = circuit_breaker_hit or risk_score > 0.40
|
|
72
|
+
|
|
73
|
+
if is_risky and intervention_necessary:
|
|
74
|
+
self._state.tp += 1
|
|
75
|
+
reliability_signal = True
|
|
76
|
+
utility_signal = True
|
|
77
|
+
elif is_risky and not intervention_necessary:
|
|
78
|
+
self._state.fn += 1
|
|
79
|
+
reliability_signal = False
|
|
80
|
+
utility_signal = True
|
|
81
|
+
elif not is_risky and not intervention_necessary:
|
|
82
|
+
self._state.tn += 1
|
|
83
|
+
reliability_signal = True
|
|
84
|
+
utility_signal = True
|
|
85
|
+
else: # not risky, but intervention happened
|
|
86
|
+
self._state.fp += 1
|
|
87
|
+
reliability_signal = True
|
|
88
|
+
utility_signal = False
|
|
89
|
+
|
|
90
|
+
# Store for potential feedback override
|
|
91
|
+
self._pending[decision_id] = _PendingDecision(
|
|
92
|
+
is_risky=is_risky,
|
|
93
|
+
intervention_applied=intervention_necessary,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
balance = self.compute_balance_index()
|
|
97
|
+
return reliability_signal, utility_signal, balance
|
|
98
|
+
|
|
99
|
+
def feedback(self, decision_id: UUID, was_intervention_necessary: bool) -> None:
|
|
100
|
+
"""Provide ground-truth feedback to correct heuristic classification.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
decision_id: The decision to correct.
|
|
104
|
+
was_intervention_necessary: True if intervention was actually needed.
|
|
105
|
+
"""
|
|
106
|
+
pending = self._pending.pop(decision_id, None)
|
|
107
|
+
if pending is None:
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
# Reverse the heuristic classification
|
|
111
|
+
if pending.is_risky and pending.intervention_applied:
|
|
112
|
+
self._state.tp -= 1
|
|
113
|
+
elif pending.is_risky and not pending.intervention_applied:
|
|
114
|
+
self._state.fn -= 1
|
|
115
|
+
elif not pending.is_risky and not pending.intervention_applied:
|
|
116
|
+
self._state.tn -= 1
|
|
117
|
+
else:
|
|
118
|
+
self._state.fp -= 1
|
|
119
|
+
|
|
120
|
+
# Apply ground-truth classification
|
|
121
|
+
actually_risky = was_intervention_necessary
|
|
122
|
+
if actually_risky and pending.intervention_applied:
|
|
123
|
+
self._state.tp += 1
|
|
124
|
+
elif actually_risky and not pending.intervention_applied:
|
|
125
|
+
self._state.fn += 1
|
|
126
|
+
elif not actually_risky and not pending.intervention_applied:
|
|
127
|
+
self._state.tn += 1
|
|
128
|
+
else:
|
|
129
|
+
self._state.fp += 1
|
|
130
|
+
|
|
131
|
+
def compute_balance_index(self) -> float:
|
|
132
|
+
"""Compute the current Balance Index.
|
|
133
|
+
|
|
134
|
+
Balance Index = 2 * R * U / (R + U)
|
|
135
|
+
where R = TP / (TP + FN), U = TN / (TN + FP)
|
|
136
|
+
|
|
137
|
+
Returns 0.0 if insufficient data to compute.
|
|
138
|
+
"""
|
|
139
|
+
r = self._reliability_score()
|
|
140
|
+
u = self._utility_score()
|
|
141
|
+
if r + u == 0:
|
|
142
|
+
return 0.0
|
|
143
|
+
return 2.0 * r * u / (r + u)
|
|
144
|
+
|
|
145
|
+
def _reliability_score(self) -> float:
|
|
146
|
+
"""R = TP / (TP + FN). Returns 0.0 if no relevant data."""
|
|
147
|
+
total = self._state.tp + self._state.fn
|
|
148
|
+
if total == 0:
|
|
149
|
+
return 0.0
|
|
150
|
+
return self._state.tp / total
|
|
151
|
+
|
|
152
|
+
def _utility_score(self) -> float:
|
|
153
|
+
"""U = TN / (TN + FP). Returns 0.0 if no relevant data."""
|
|
154
|
+
total = self._state.tn + self._state.fp
|
|
155
|
+
if total == 0:
|
|
156
|
+
return 0.0
|
|
157
|
+
return self._state.tn / total
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass(frozen=True, slots=True)
|
|
161
|
+
class _PendingDecision:
|
|
162
|
+
"""Internal record of a decision awaiting potential feedback."""
|
|
163
|
+
|
|
164
|
+
is_risky: bool
|
|
165
|
+
intervention_applied: bool
|
brix/cli/__init__.py
ADDED
|
File without changes
|