genassert 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
genassert/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ """
2
+ genassert — pytest-native semantic testing for generative AI applications.
3
+
4
+ Drop-in pytest plugin. No servers. No SaaS. No config.
5
+ Works with OpenAI, Anthropic, LiteLLM, or any LLM client.
6
+ """
7
+
8
+ from genassert.assertions.intent import assert_intent
9
+ from genassert.assertions.tone import assert_tone
10
+ from genassert.assertions.hallucination import assert_no_hallucination
11
+ from genassert.assertions.budget import assert_token_budget
12
+ from genassert.assertions.schema import assert_schema
13
+ from genassert.assertions.similarity import assert_similar_to
14
+ from genassert.assertions.language import assert_language
15
+ from genassert.assertions.pii import assert_no_pii, PIIMatch
16
+ from genassert.assertions.readability import assert_readability
17
+ from genassert.assertions.sentiment import assert_sentiment
18
+ from genassert.baseline import record_baseline, compare_baseline
19
+ from genassert.judge import LocalJudge
20
+
21
+ __version__ = "0.2.0"
22
+ __all__ = [
23
+ # Core semantic assertions
24
+ "assert_intent",
25
+ "assert_tone",
26
+ "assert_no_hallucination",
27
+ "assert_similar_to",
28
+ # Structural assertions
29
+ "assert_token_budget",
30
+ "assert_schema",
31
+ # Advanced assertions (new in 0.2.0)
32
+ "assert_language",
33
+ "assert_no_pii",
34
+ "assert_readability",
35
+ "assert_sentiment",
36
+ # Types
37
+ "PIIMatch",
38
+ # Baseline regression
39
+ "record_baseline",
40
+ "compare_baseline",
41
+ # Local judge
42
+ "LocalJudge",
43
+ ]
genassert/_embed.py ADDED
@@ -0,0 +1,113 @@
1
+ """
2
+ Embedding backend for genassert.
3
+
4
+ Priority order (auto-detected):
5
+ 1. sentence-transformers (local, free, fast) — recommended
6
+ 2. openai embeddings (requires OPENAI_API_KEY)
7
+ 3. numpy hash-based fallback (no deps, for smoke tests only)
8
+
9
+ Set GENASSERT_EMBED_BACKEND=openai|local|fallback to force a backend.
10
+ """
11
+
12
+ from __future__ import annotations
13
+ import os
14
+ import hashlib
15
+ import math
16
+ from functools import lru_cache
17
+
18
+ _BACKEND_ENV = "GENASSERT_EMBED_BACKEND"
19
+ _MODEL_ENV = "GENASSERT_EMBED_MODEL"
20
+
21
+ _DEFAULT_LOCAL_MODEL = "all-MiniLM-L6-v2"
22
+ _DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
23
+
24
+
25
+ @lru_cache(maxsize=512)
26
+ def embed_text(text: str) -> tuple[float, ...]:
27
+ """
28
+ Embed `text` into a float vector using the best available backend.
29
+ Results are cached in-process for performance.
30
+ """
31
+ backend = os.environ.get(_BACKEND_ENV, "auto").lower()
32
+
33
+ if backend == "openai":
34
+ return _embed_openai(text)
35
+ elif backend in ("local", "sentence-transformers"):
36
+ return _embed_local(text)
37
+ elif backend == "fallback":
38
+ return _embed_fallback(text)
39
+ else:
40
+ # auto-detect
41
+ return _embed_auto(text)
42
+
43
+
44
+ def _embed_auto(text: str) -> tuple[float, ...]:
45
+ """Try backends in priority order."""
46
+ try:
47
+ return _embed_local(text)
48
+ except ImportError:
49
+ pass
50
+ try:
51
+ return _embed_openai(text)
52
+ except (ImportError, RuntimeError):
53
+ pass
54
+ return _embed_fallback(text)
55
+
56
+
57
+ def _embed_local(text: str) -> tuple[float, ...]:
58
+ """Use sentence-transformers (local, no API cost)."""
59
+ try:
60
+ from sentence_transformers import SentenceTransformer
61
+ except ImportError:
62
+ raise ImportError(
63
+ "sentence-transformers not installed.\n"
64
+ " Install: pip install sentence-transformers\n"
65
+ " Or set GENASSERT_EMBED_BACKEND=openai"
66
+ )
67
+ model_name = os.environ.get(_MODEL_ENV, _DEFAULT_LOCAL_MODEL)
68
+ model = _get_local_model(model_name)
69
+ vec = model.encode(text, normalize_embeddings=True)
70
+ return tuple(float(v) for v in vec)
71
+
72
+
73
+ @lru_cache(maxsize=4)
74
+ def _get_local_model(model_name: str):
75
+ from sentence_transformers import SentenceTransformer
76
+ return SentenceTransformer(model_name)
77
+
78
+
79
+ def _embed_openai(text: str) -> tuple[float, ...]:
80
+ """Use OpenAI embeddings API."""
81
+ try:
82
+ import openai
83
+ except ImportError:
84
+ raise ImportError("openai not installed. pip install openai")
85
+
86
+ api_key = os.environ.get("OPENAI_API_KEY")
87
+ if not api_key:
88
+ raise RuntimeError(
89
+ "OPENAI_API_KEY not set. "
90
+ "Set it or use GENASSERT_EMBED_BACKEND=local"
91
+ )
92
+
93
+ model = os.environ.get(_MODEL_ENV, _DEFAULT_OPENAI_MODEL)
94
+ client = openai.OpenAI(api_key=api_key)
95
+ result = client.embeddings.create(input=[text], model=model)
96
+ vec = result.data[0].embedding
97
+ return tuple(float(v) for v in vec)
98
+
99
+
100
+ def _embed_fallback(text: str) -> tuple[float, ...]:
101
+ """
102
+ Hash-based pseudo-embedding. No dependencies, but ONLY suitable for
103
+ smoke tests. Will NOT produce meaningful semantic similarity scores.
104
+ """
105
+ dim = 384
106
+ vec = []
107
+ for i in range(dim):
108
+ seed = hashlib.sha256(f"{i}:{text}".encode()).digest()
109
+ val = int.from_bytes(seed[:4], "big") / (2**32)
110
+ vec.append(val - 0.5)
111
+ # normalize
112
+ mag = math.sqrt(sum(v**2 for v in vec))
113
+ return tuple(v / mag for v in vec)
@@ -0,0 +1 @@
1
+ # assertions package
@@ -0,0 +1,62 @@
1
+ """
2
+ assert_token_budget: Verify LLM response stays within a token limit.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+
8
+ def assert_token_budget(
9
+ response: str,
10
+ max_tokens: int,
11
+ tokenizer: str = "approx",
12
+ ) -> None:
13
+ """
14
+ Assert that `response` does not exceed `max_tokens`.
15
+
16
+ Parameters
17
+ ----------
18
+ response:
19
+ The LLM output to evaluate.
20
+ max_tokens:
21
+ Maximum allowed token count.
22
+ tokenizer:
23
+ "approx" uses a fast word-based approximation (~1.3 tokens/word).
24
+ "tiktoken" uses OpenAI's tiktoken (requires `pip install tiktoken`).
25
+ "chars" uses character count divided by 4.
26
+
27
+ Raises
28
+ ------
29
+ AssertionError
30
+ If the response exceeds the token budget.
31
+
32
+ Examples
33
+ --------
34
+ >>> assert_token_budget(response, max_tokens=200)
35
+ >>> assert_token_budget(response, max_tokens=500, tokenizer="tiktoken")
36
+ """
37
+ count = _count_tokens(response, tokenizer)
38
+ if count > max_tokens:
39
+ raise AssertionError(
40
+ f"Token budget exceeded.\n"
41
+ f" Max allowed : {max_tokens} tokens\n"
42
+ f" Actual count: {count} tokens (method: {tokenizer!r})\n"
43
+ f" Response preview: {response[:200]!r}"
44
+ )
45
+
46
+
47
+ def _count_tokens(text: str, method: str) -> int:
48
+ if method == "tiktoken":
49
+ try:
50
+ import tiktoken
51
+ enc = tiktoken.get_encoding("cl100k_base")
52
+ return len(enc.encode(text))
53
+ except ImportError:
54
+ raise ImportError(
55
+ "tokenizer='tiktoken' requires: pip install tiktoken"
56
+ )
57
+ elif method == "chars":
58
+ return len(text) // 4
59
+ else:
60
+ # approx: ~1.3 tokens per word is a reasonable average
61
+ words = len(text.split())
62
+ return int(words * 1.3)
@@ -0,0 +1,89 @@
1
+ """
2
+ assert_no_hallucination: Verify LLM output doesn't contradict known facts.
3
+
4
+ Checks that factual claims in the response are consistent with a provided
5
+ list of ground-truth facts, using semantic similarity and contradiction detection.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+
11
+ def assert_no_hallucination(
12
+ response: str,
13
+ known_facts: list[str],
14
+ contradiction_threshold: float = 0.85,
15
+ ) -> None:
16
+ """
17
+ Assert that `response` does not contradict any of the `known_facts`.
18
+
19
+ This does NOT verify that every fact is mentioned — it checks that
20
+ the response does not make claims that semantically contradict the facts.
21
+
22
+ Parameters
23
+ ----------
24
+ response:
25
+ The LLM output to evaluate.
26
+ known_facts:
27
+ A list of factual statements that must NOT be contradicted.
28
+ E.g. ["The capital of France is Paris", "The product costs $49/month"]
29
+ contradiction_threshold:
30
+ If the negation of a fact is highly similar to the response, flag it.
31
+ Default 0.85.
32
+
33
+ Raises
34
+ ------
35
+ AssertionError
36
+ If the response appears to contradict one or more known facts.
37
+
38
+ Examples
39
+ --------
40
+ >>> assert_no_hallucination(
41
+ ... response,
42
+ ... known_facts=["Python was created by Guido van Rossum in 1991"]
43
+ ... )
44
+ """
45
+ from genassert._embed import embed_text
46
+ from genassert.assertions.intent import _cosine_similarity
47
+
48
+ response_embedding = embed_text(response)
49
+ contradictions = []
50
+
51
+ for fact in known_facts:
52
+ # Build negation of the fact to detect contradictions
53
+ negation = _negate_fact(fact)
54
+ negation_embedding = embed_text(negation)
55
+ similarity = _cosine_similarity(response_embedding, negation_embedding)
56
+
57
+ if similarity > contradiction_threshold:
58
+ contradictions.append((fact, similarity))
59
+
60
+ if contradictions:
61
+ details = "\n".join(
62
+ f" - Contradicts: {fact!r} (score: {score:.3f})"
63
+ for fact, score in contradictions
64
+ )
65
+ raise AssertionError(
66
+ f"Hallucination detected — response contradicts known facts:\n{details}\n"
67
+ f" Response preview: {response[:200]!r}"
68
+ )
69
+
70
+
71
+ def _negate_fact(fact: str) -> str:
72
+ """Produce a simple semantic negation of a fact for contradiction detection."""
73
+ fact = fact.strip()
74
+ negations = [
75
+ ("is not", "is"),
76
+ ("are not", "are"),
77
+ ("was not", "was"),
78
+ ("were not", "were"),
79
+ ("does not", "does"),
80
+ ("did not", "did"),
81
+ ("cannot", "can"),
82
+ ("will not", "will"),
83
+ ]
84
+ lower = fact.lower()
85
+ for neg, pos in negations:
86
+ if pos in lower:
87
+ return fact.replace(pos, neg, 1)
88
+ # Fallback: prepend "It is false that"
89
+ return f"It is false that {fact}"
@@ -0,0 +1,62 @@
1
+ """
2
+ assert_intent: Check that an LLM response addresses the expected intent.
3
+
4
+ Uses embedding cosine similarity to compare the response against a
5
+ natural-language description of what the response should convey.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ import os
10
+ from genassert._embed import embed_text
11
+
12
+
13
+ def assert_intent(
14
+ response: str,
15
+ expected_intent: str,
16
+ threshold: float = 0.72,
17
+ model: str | None = None,
18
+ ) -> None:
19
+ """
20
+ Assert that `response` semantically matches the `expected_intent`.
21
+
22
+ Parameters
23
+ ----------
24
+ response:
25
+ The LLM output to evaluate.
26
+ expected_intent:
27
+ A plain-English description of what the response should convey.
28
+ E.g. "a concise summary of the article".
29
+ threshold:
30
+ Cosine similarity threshold (0–1). Default 0.72 works well for
31
+ most English text pairs. Lower = more lenient, higher = stricter.
32
+ model:
33
+ Optional embedding model override.
34
+
35
+ Raises
36
+ ------
37
+ AssertionError
38
+ If the cosine similarity between `response` and `expected_intent`
39
+ is below `threshold`.
40
+
41
+ Examples
42
+ --------
43
+ >>> assert_intent(response, "a polite refusal to the user's request")
44
+ >>> assert_intent(response, "Python code that reads a CSV file", threshold=0.80)
45
+ """
46
+ similarity = _cosine_similarity(embed_text(response), embed_text(expected_intent))
47
+ if similarity < threshold:
48
+ raise AssertionError(
49
+ f"Intent assertion failed.\n"
50
+ f" Expected intent : {expected_intent!r}\n"
51
+ f" Cosine similarity: {similarity:.3f} (threshold: {threshold})\n"
52
+ f" Response preview : {response[:200]!r}"
53
+ )
54
+
55
+
56
+ def _cosine_similarity(a: list[float], b: list[float]) -> float:
57
+ dot = sum(x * y for x, y in zip(a, b))
58
+ mag_a = sum(x**2 for x in a) ** 0.5
59
+ mag_b = sum(x**2 for x in b) ** 0.5
60
+ if mag_a == 0 or mag_b == 0:
61
+ return 0.0
62
+ return dot / (mag_a * mag_b)
@@ -0,0 +1,145 @@
1
+ """
2
+ assert_language: Verify LLM response is written in the expected language.
3
+
4
+ Uses character n-gram frequency profiles to detect language without
5
+ any external dependencies. Supports 20+ common languages.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ import re
10
+ import unicodedata
11
+
12
+ # Script/character-range based fast detection
13
+ _SCRIPT_HINTS: list[tuple[str, tuple[int, int]]] = [
14
+ ("arabic", (0x0600, 0x06FF)),
15
+ ("hebrew", (0x0590, 0x05FF)),
16
+ ("chinese", (0x4E00, 0x9FFF)),
17
+ ("japanese", (0x3040, 0x30FF)),
18
+ ("korean", (0xAC00, 0xD7AF)),
19
+ ("thai", (0x0E00, 0x0E7F)),
20
+ ("greek", (0x0370, 0x03FF)),
21
+ ("cyrillic", (0x0400, 0x04FF)),
22
+ ("devanagari", (0x0900, 0x097F)),
23
+ ]
24
+
25
+ # Common high-frequency words per language (stop-word fingerprint)
26
+ _STOPWORDS: dict[str, list[str]] = {
27
+ "english": ["the", "and", "is", "in", "to", "of", "a", "that", "it", "was"],
28
+ "spanish": ["el", "la", "de", "que", "en", "los", "se", "las", "un", "una"],
29
+ "french": ["le", "la", "les", "de", "et", "en", "un", "une", "du", "que"],
30
+ "german": ["der", "die", "das", "und", "in", "den", "von", "zu", "ist", "mit"],
31
+ "portuguese": ["de", "da", "do", "que", "em", "os", "as", "um", "uma", "para"],
32
+ "italian": ["il", "la", "di", "che", "e", "un", "una", "in", "del", "per"],
33
+ "dutch": ["de", "het", "een", "van", "en", "in", "is", "dat", "op", "te"],
34
+ "russian": ["и", "в", "не", "на", "я", "что", "с", "он", "как", "это"],
35
+ "polish": ["i", "w", "nie", "to", "się", "na", "jest", "z", "że", "do"],
36
+ "turkish": ["bir", "bu", "ve", "da", "de", "için", "ile", "var", "ne", "mi"],
37
+ "swedish": ["och", "i", "en", "att", "det", "av", "på", "är", "för", "med"],
38
+ "norwegian": ["og", "i", "en", "er", "til", "av", "på", "et", "som", "for"],
39
+ "danish": ["og", "i", "en", "er", "til", "af", "på", "et", "som", "for"],
40
+ "finnish": ["ja", "on", "ei", "se", "että", "hän", "oli", "en", "niin", "jo"],
41
+ "czech": ["a", "je", "to", "v", "se", "na", "že", "z", "do", "s"],
42
+ "hungarian": ["a", "az", "és", "hogy", "nem", "van", "egy", "is", "de", "meg"],
43
+ "romanian": ["și", "în", "de", "că", "este", "la", "cu", "se", "nu", "din"],
44
+ "indonesian": ["yang", "dan", "di", "ke", "dari", "ini", "itu", "untuk", "dengan", "ada"],
45
+ }
46
+
47
+ _LANG_ALIASES: dict[str, str] = {
48
+ "en": "english", "es": "spanish", "fr": "french", "de": "german",
49
+ "pt": "portuguese", "it": "italian", "nl": "dutch", "ru": "russian",
50
+ "pl": "polish", "tr": "turkish", "sv": "swedish", "no": "norwegian",
51
+ "da": "danish", "fi": "finnish", "cs": "czech", "hu": "hungarian",
52
+ "ro": "romanian", "id": "indonesian",
53
+ "ar": "arabic", "he": "hebrew", "zh": "chinese", "ja": "japanese",
54
+ "ko": "korean", "th": "thai", "el": "greek", "hi": "devanagari",
55
+ }
56
+
57
+
58
+ def assert_language(
59
+ response: str,
60
+ expected_language: str,
61
+ min_confidence: float = 0.3,
62
+ ) -> str:
63
+ """
64
+ Assert that `response` is written in the expected language.
65
+
66
+ Parameters
67
+ ----------
68
+ response:
69
+ The LLM output to evaluate.
70
+ expected_language:
71
+ Language name (e.g. "english", "spanish", "french") or ISO 639-1
72
+ code (e.g. "en", "es", "fr").
73
+ min_confidence:
74
+ Minimum confidence score (0–1) to accept the detection. Default 0.3.
75
+
76
+ Returns
77
+ -------
78
+ str
79
+ The detected language name.
80
+
81
+ Raises
82
+ ------
83
+ AssertionError
84
+ If the detected language doesn't match expected.
85
+ ValueError
86
+ If expected_language is not recognised.
87
+
88
+ Examples
89
+ --------
90
+ >>> assert_language(response, "english")
91
+ >>> assert_language(response, "fr") # ISO code
92
+ >>> assert_language(response, "spanish")
93
+ """
94
+ lang = expected_language.lower().strip()
95
+ lang = _LANG_ALIASES.get(lang, lang)
96
+
97
+ detected, confidence = _detect_language(response)
98
+
99
+ if detected != lang:
100
+ raise AssertionError(
101
+ f"Language assertion failed.\n"
102
+ f" Expected language : {lang!r}\n"
103
+ f" Detected language : {detected!r} (confidence: {confidence:.2f})\n"
104
+ f" Response preview : {response[:150]!r}"
105
+ )
106
+ if confidence < min_confidence:
107
+ raise AssertionError(
108
+ f"Language detected as {detected!r} but confidence too low.\n"
109
+ f" Confidence: {confidence:.2f} (min: {min_confidence})\n"
110
+ f" Response preview: {response[:150]!r}"
111
+ )
112
+ return detected
113
+
114
+
115
+ def _detect_language(text: str) -> tuple[str, float]:
116
+ """Return (language_name, confidence) for `text`."""
117
+ if not text.strip():
118
+ return "unknown", 0.0
119
+
120
+ # Fast path: script detection for non-Latin scripts
121
+ for lang, (lo, hi) in _SCRIPT_HINTS:
122
+ count = sum(1 for ch in text if lo <= ord(ch) <= hi)
123
+ ratio = count / max(len(text), 1)
124
+ if ratio > 0.2:
125
+ return lang, min(1.0, ratio * 3)
126
+
127
+ # Stopword frequency scoring for Latin-script languages
128
+ words = set(re.findall(r"\b[a-zA-ZÀ-ÿ]+\b", text.lower()))
129
+ scores: dict[str, float] = {}
130
+ for lang, stopwords in _STOPWORDS.items():
131
+ hits = sum(1 for w in stopwords if w in words)
132
+ scores[lang] = hits / len(stopwords)
133
+
134
+ if not scores:
135
+ return "unknown", 0.0
136
+
137
+ best = max(scores, key=lambda k: scores[k])
138
+ best_score = scores[best]
139
+
140
+ # Normalise against second-best to get relative confidence
141
+ sorted_scores = sorted(scores.values(), reverse=True)
142
+ second = sorted_scores[1] if len(sorted_scores) > 1 else 0.0
143
+ confidence = best_score - second if best_score > 0 else 0.0
144
+
145
+ return best, min(1.0, confidence * 5)
@@ -0,0 +1,107 @@
1
+ """
2
+ assert_no_pii: Detect PII (Personally Identifiable Information) leakage in LLM responses.
3
+
4
+ Checks for emails, phone numbers, SSNs, credit card numbers, IP addresses,
5
+ and common name/address patterns — all via regex, zero dependencies.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ import re
10
+ from dataclasses import dataclass, field
11
+
12
+
13
+ @dataclass
14
+ class PIIMatch:
15
+ type: str
16
+ value: str
17
+ start: int
18
+ end: int
19
+
20
+
21
+ # ── PII Patterns ──────────────────────────────────────────────────────────────
22
+
23
+ _PATTERNS: dict[str, str] = {
24
+ "email": r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b",
25
+ "phone_us": r"\b(\+1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}\b",
26
+ "phone_intl": r"\+\d{1,3}[\s\-.]?\(?\d{1,4}\)?[\s\-.]?\d{1,4}[\s\-.]?\d{1,9}",
27
+ "ssn": r"\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b",
28
+ "credit_card": r"\b(?:\d{4}[\s\-]?){3}\d{4}\b",
29
+ "ip_address": r"\b(?:\d{1,3}\.){3}\d{1,3}\b",
30
+ "date_of_birth": r"\b(?:DOB|date of birth|born)[:\s]+\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}\b",
31
+ "passport": r"\b[A-Z]{1,2}\d{6,9}\b",
32
+ "iban": r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}([A-Z0-9]?){0,16}\b",
33
+ }
34
+
35
+ _COMPILED = {name: re.compile(pattern, re.IGNORECASE) for name, pattern in _PATTERNS.items()}
36
+
37
+
38
+ def assert_no_pii(
39
+ response: str,
40
+ checks: list[str] | None = None,
41
+ allow: list[str] | None = None,
42
+ ) -> list[PIIMatch]:
43
+ """
44
+ Assert that `response` contains no PII.
45
+
46
+ Parameters
47
+ ----------
48
+ response:
49
+ The LLM output to evaluate.
50
+ checks:
51
+ List of PII types to check. Default: all types.
52
+ Options: "email", "phone_us", "phone_intl", "ssn", "credit_card",
53
+ "ip_address", "date_of_birth", "passport", "iban"
54
+ allow:
55
+ List of PII types to explicitly allow (skip checking).
56
+ Useful when IP addresses or emails are expected in output.
57
+
58
+ Returns
59
+ -------
60
+ list[PIIMatch]
61
+ Empty list if no PII found (assertion passes).
62
+
63
+ Raises
64
+ ------
65
+ AssertionError
66
+ If any PII is detected.
67
+
68
+ Examples
69
+ --------
70
+ >>> assert_no_pii(response)
71
+ >>> assert_no_pii(response, checks=["email", "ssn"])
72
+ >>> assert_no_pii(response, allow=["ip_address"]) # allow IPs
73
+ """
74
+ active = set(checks or _COMPILED.keys()) - set(allow or [])
75
+ found: list[PIIMatch] = []
76
+
77
+ for pii_type in active:
78
+ pattern = _COMPILED.get(pii_type)
79
+ if pattern is None:
80
+ raise ValueError(f"Unknown PII type: {pii_type!r}. Valid: {list(_COMPILED)}")
81
+ for m in pattern.finditer(response):
82
+ found.append(PIIMatch(
83
+ type=pii_type,
84
+ value=_redact(m.group()),
85
+ start=m.start(),
86
+ end=m.end(),
87
+ ))
88
+
89
+ if found:
90
+ details = "\n".join(
91
+ f" [{p.type}] {p.value!r} at position {p.start}"
92
+ for p in found
93
+ )
94
+ raise AssertionError(
95
+ f"PII detected in LLM response ({len(found)} match{'es' if len(found) > 1 else ''}):\n"
96
+ f"{details}\n"
97
+ f" Response preview: {response[:200]!r}"
98
+ )
99
+
100
+ return found
101
+
102
+
103
+ def _redact(value: str) -> str:
104
+ """Partially redact a PII value for safe display in error messages."""
105
+ if len(value) <= 4:
106
+ return "****"
107
+ return value[:2] + "*" * (len(value) - 4) + value[-2:]