mentar 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mentar/__init__.py +6 -0
- mentar/cli/__init__.py +1 -0
- mentar/cli/__main__.py +62 -0
- mentar/db/__init__.py +4 -0
- mentar/db/store.py +416 -0
- mentar/dialogue/__init__.py +4 -0
- mentar/engine/__init__.py +4 -0
- mentar/engine/bkt.py +99 -0
- mentar/engine/fringe.py +104 -0
- mentar/engine/probe_classify.py +79 -0
- mentar/eval/__init__.py +4 -0
- mentar/eval/verify_numeric.py +619 -0
- mentar/grounding/__init__.py +65 -0
- mentar/grounding/cache.py +127 -0
- mentar/grounding/reader.py +271 -0
- mentar/grounding/resolve.py +125 -0
- mentar/grounding/source_map.py +120 -0
- mentar/grounding/sources.py +267 -0
- mentar/grounding/wrapper.py +50 -0
- mentar/inference/__init__.py +7 -0
- mentar/safety/__init__.py +4 -0
- mentar/safety/escalation.py +316 -0
- mentar/tools/__init__.py +4 -0
- mentar/tools/validate_template.py +322 -0
- mentar-0.1.0.dev0.dist-info/METADATA +178 -0
- mentar-0.1.0.dev0.dist-info/RECORD +29 -0
- mentar-0.1.0.dev0.dist-info/WHEEL +5 -0
- mentar-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- mentar-0.1.0.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
r"""ZIM source-location handling: local paths, mounted NAS, and SMB/Samba shares.
|
|
2
|
+
|
|
3
|
+
A ZIM lives somewhere; ``libzim`` can only open a **local filesystem path** (it
|
|
4
|
+
mmaps the file for random access). This module turns a configured *location* into
|
|
5
|
+
a local path the reader can open:
|
|
6
|
+
|
|
7
|
+
materialize_zim(location, cfg) -> Path | None
|
|
8
|
+
|
|
9
|
+
Supported location forms:
|
|
10
|
+
- **Local path** ``/data/zims/vikidia.zim`` — returned as-is.
|
|
11
|
+
- **Mounted NAS / share** ``/mnt/nas/zims/vikidia.zim`` or a Windows drive /
|
|
12
|
+
UNC that the OS has already mounted — also just a
|
|
13
|
+
filesystem path, returned as-is (NO copy: the whole
|
|
14
|
+
point of NAS storage is to avoid a local copy).
|
|
15
|
+
- **SMB URL / UNC** ``smb://nas/share/vikidia.zim`` /
|
|
16
|
+
``\\nas\share\vikidia.zim`` / ``//nas/share/...`` —
|
|
17
|
+
not a real local file, so it is **copied once** to a
|
|
18
|
+
local cache dir (``grounding.zim_cache_dir``) via
|
|
19
|
+
``smbclient`` (optional dep ``smbprotocol``), then the
|
|
20
|
+
cached path is returned.
|
|
21
|
+
|
|
22
|
+
NOW (W7.4): local + mounted-NAS + SMB read/download.
|
|
23
|
+
FUTURE GOAL: pull from global Kiwix mirrors to any reasonable destination on any
|
|
24
|
+
OS (see ``scripts/fetch_zim.py``). Catalog/mirror discovery is not built yet.
|
|
25
|
+
|
|
26
|
+
Degradation contract (SAFETY §1.5 / SPEC §15): every failure returns ``None`` and
|
|
27
|
+
logs a warning — this module NEVER raises. ``smbprotocol`` is optional; if an SMB
|
|
28
|
+
location is requested without it installed, we warn and return ``None``.
|
|
29
|
+
|
|
30
|
+
Spec: docs/design/W7_grounding_reader.md (ZIM acquisition / SMB read row).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import logging
|
|
36
|
+
import os
|
|
37
|
+
import re
|
|
38
|
+
import shutil
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
from typing import Optional
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
_COPY_CHUNK = 8 * 1024 * 1024 # 8 MiB streaming chunk for SMB → local copies
|
|
45
|
+
_DEFAULT_ZIM_CACHE = ".cache/zim"
|
|
46
|
+
_DATE_RE = r"\d{4}-\d{2}" # Kiwix embeds a YYYY-MM build date in the filename
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ── Location classification & joining ─────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def is_smb_location(location: str) -> bool:
|
|
53
|
+
"""True if ``location`` is an SMB URL or UNC path (not a plain/mounted path).
|
|
54
|
+
|
|
55
|
+
Recognises ``smb://host/share/...``, ``\\\\host\\share\\...`` and
|
|
56
|
+
``//host/share/...``. A path that the OS has already *mounted* (e.g.
|
|
57
|
+
``/mnt/nas/...`` or ``Z:\\...``) is NOT an SMB location — it is a normal
|
|
58
|
+
filesystem path and needs no SMB client.
|
|
59
|
+
"""
|
|
60
|
+
s = str(location).strip()
|
|
61
|
+
return s.startswith("smb://") or s.startswith("\\\\") or s.startswith("//")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def smb_url_to_unc(location: str) -> str:
|
|
65
|
+
"""Normalise an SMB location to a UNC path (``\\\\host\\share\\...``).
|
|
66
|
+
|
|
67
|
+
``smbclient`` expects UNC. ``smb://nas/share/f.zim`` and ``//nas/share/f.zim``
|
|
68
|
+
both become ``\\\\nas\\share\\f.zim``; an already-UNC path is returned as-is.
|
|
69
|
+
"""
|
|
70
|
+
s = str(location).strip()
|
|
71
|
+
if s.startswith("smb://"):
|
|
72
|
+
return "\\\\" + s[len("smb://"):].replace("/", "\\")
|
|
73
|
+
if s.startswith("//"):
|
|
74
|
+
return "\\\\" + s[2:].replace("/", "\\")
|
|
75
|
+
return s # already \\host\share\...
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def join_location(base: str, filename: str) -> str:
|
|
79
|
+
"""Join ``filename`` onto a ZIM directory ``base`` (SMB-aware).
|
|
80
|
+
|
|
81
|
+
For local/mounted bases this is an ordinary path join. For SMB bases the
|
|
82
|
+
scheme/separator style is preserved (``smb://`` and ``//`` join with ``/``;
|
|
83
|
+
a backslash UNC joins with ``\\``).
|
|
84
|
+
"""
|
|
85
|
+
b = str(base)
|
|
86
|
+
if is_smb_location(b):
|
|
87
|
+
if b.startswith("\\\\"):
|
|
88
|
+
return b.rstrip("\\/") + "\\" + filename
|
|
89
|
+
return b.rstrip("/") + "/" + filename
|
|
90
|
+
return str(Path(b).expanduser() / filename)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ── Filename grammar: <project>_<lang>_<selection>_<flavour>_<YYYY-MM>.zim ─────
|
|
94
|
+
# Kiwix names embed project, language, selection (subject/"all"), flavour
|
|
95
|
+
# (maxi|nopic|mini) and a YYYY-MM build date — e.g. wikipedia_en_astronomy_maxi_2026-02.zim
|
|
96
|
+
# or wikipedia_ace_all_nopic_2026-04.zim. A source is declared by those parts in
|
|
97
|
+
# config; the NEWEST matching file is used automatically (latest wins) unless pinned.
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def parse_index(html: str) -> list[str]:
|
|
101
|
+
"""Return the .zim filenames linked in a Kiwix directory-index HTML page."""
|
|
102
|
+
return re.findall(r'href="([^"?/]+\.zim)"', html)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def pick_latest(filenames: list[str], regex: str) -> Optional[str]:
|
|
106
|
+
"""Pick the newest filename matching ``regex`` (YYYY-MM sorts lexicographically)."""
|
|
107
|
+
rx = re.compile(regex)
|
|
108
|
+
cands = sorted(f for f in filenames if rx.search(f))
|
|
109
|
+
return cands[-1] if cands else None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def build_filename_regex(spec: dict) -> str:
|
|
113
|
+
"""Build an anchored filename regex from a structured source spec.
|
|
114
|
+
|
|
115
|
+
spec keys: ``project`` (req), ``lang`` (req), ``selection`` (opt, e.g. "all" /
|
|
116
|
+
"astronomy" / "simple_all"), ``flavour`` (opt, e.g. "maxi" / "nopic"),
|
|
117
|
+
``pin`` (opt: a ``YYYY-MM`` date to fix the build; a full ``*.zim`` pin is
|
|
118
|
+
handled earlier in :func:`resolve_filename`).
|
|
119
|
+
"""
|
|
120
|
+
parts = [re.escape(str(spec["project"])), re.escape(str(spec["lang"]))]
|
|
121
|
+
if spec.get("selection"):
|
|
122
|
+
parts.append(re.escape(str(spec["selection"])))
|
|
123
|
+
if spec.get("flavour"):
|
|
124
|
+
parts.append(re.escape(str(spec["flavour"])))
|
|
125
|
+
body = "_".join(parts)
|
|
126
|
+
pin = spec.get("pin")
|
|
127
|
+
date = re.escape(str(pin)) if (pin and re.fullmatch(_DATE_RE, str(pin))) else _DATE_RE
|
|
128
|
+
return rf"^{body}_{date}\.zim$"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def list_zim_dir(zim_dir: str, cfg: dict) -> list[str]:
|
|
132
|
+
"""List ``*.zim`` filenames in a local/mounted dir or an SMB dir. ``[]`` on failure."""
|
|
133
|
+
try:
|
|
134
|
+
if is_smb_location(zim_dir):
|
|
135
|
+
try:
|
|
136
|
+
import smbclient
|
|
137
|
+
except ImportError:
|
|
138
|
+
logger.warning("list_zim_dir: SMB dir %r needs the [nas] extra (smbprotocol)", zim_dir)
|
|
139
|
+
return []
|
|
140
|
+
_configure_smb_auth(cfg)
|
|
141
|
+
return [f for f in smbclient.listdir(smb_url_to_unc(zim_dir)) if f.endswith(".zim")]
|
|
142
|
+
d = Path(zim_dir).expanduser()
|
|
143
|
+
if not d.is_dir():
|
|
144
|
+
return []
|
|
145
|
+
return [f.name for f in d.iterdir() if f.suffix == ".zim"]
|
|
146
|
+
except Exception:
|
|
147
|
+
logger.warning("list_zim_dir: cannot list %r — returning []", zim_dir, exc_info=True)
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def resolve_filename(spec, zim_dir: str, cfg: dict) -> Optional[str]:
|
|
152
|
+
"""Resolve a source spec to a concrete ZIM filename present in ``zim_dir``.
|
|
153
|
+
|
|
154
|
+
``spec`` may be:
|
|
155
|
+
- **str** — an exact filename (legacy / manual). Returned as-is.
|
|
156
|
+
- **dict** — ``{project, lang, selection?, flavour?, pin?}``. The newest file
|
|
157
|
+
in ``zim_dir`` matching the grammar is chosen (latest ``YYYY-MM`` wins); a
|
|
158
|
+
``pin`` of a full ``*.zim`` name or a ``YYYY-MM`` date overrides "latest".
|
|
159
|
+
|
|
160
|
+
Returns ``None`` if nothing matches (caller applies the degradation contract).
|
|
161
|
+
"""
|
|
162
|
+
if isinstance(spec, str):
|
|
163
|
+
return spec or None
|
|
164
|
+
if not isinstance(spec, dict):
|
|
165
|
+
return None
|
|
166
|
+
pin = spec.get("pin")
|
|
167
|
+
if pin and str(pin).endswith(".zim"):
|
|
168
|
+
return str(pin) # explicit file pin — no listing needed
|
|
169
|
+
regex = build_filename_regex(spec)
|
|
170
|
+
latest = pick_latest(list_zim_dir(zim_dir, cfg), regex)
|
|
171
|
+
if latest is None:
|
|
172
|
+
logger.warning("resolve_filename: no ZIM in %r matches %s (spec=%r)", zim_dir, regex, spec)
|
|
173
|
+
return latest
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ── Materialization ───────────────────────────────────────────────────────────
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def materialize_zim(location: str, cfg: dict) -> Optional[Path]:
|
|
180
|
+
"""Return a local filesystem path libzim can open, or ``None`` on failure.
|
|
181
|
+
|
|
182
|
+
Local / mounted paths are returned as-is (no copy). SMB locations are copied
|
|
183
|
+
once to ``grounding.zim_cache_dir`` and the cached path returned. Never raises.
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
loc = str(location)
|
|
187
|
+
if not is_smb_location(loc):
|
|
188
|
+
p = Path(loc).expanduser()
|
|
189
|
+
if not p.exists():
|
|
190
|
+
logger.warning("materialize_zim: ZIM not found at local/mounted path: %s", p)
|
|
191
|
+
return None
|
|
192
|
+
return p
|
|
193
|
+
return _materialize_smb(loc, cfg)
|
|
194
|
+
except Exception:
|
|
195
|
+
logger.warning("materialize_zim: unexpected error for %r — returning None", location, exc_info=True)
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _zim_cache_dir(cfg: dict) -> Path:
|
|
200
|
+
raw = (cfg.get("zim_cache_dir") or _DEFAULT_ZIM_CACHE)
|
|
201
|
+
return Path(os.path.expanduser(str(raw)))
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _configure_smb_auth(cfg: dict) -> None:
|
|
205
|
+
"""Apply SMB credentials from ``cfg['smb']`` to the global smbclient config.
|
|
206
|
+
|
|
207
|
+
No-op when SMB is not enabled or no username is configured (anonymous /
|
|
208
|
+
pre-registered sessions still work). Importing smbclient is the caller's job.
|
|
209
|
+
"""
|
|
210
|
+
smb = cfg.get("smb") or {}
|
|
211
|
+
if not smb.get("enabled"):
|
|
212
|
+
return
|
|
213
|
+
user = smb.get("username") or None
|
|
214
|
+
pw = smb.get("password") or None
|
|
215
|
+
domain = smb.get("domain") or None
|
|
216
|
+
if user and domain and "\\" not in user and "@" not in user:
|
|
217
|
+
user = f"{domain}\\{user}"
|
|
218
|
+
if user or pw:
|
|
219
|
+
import smbclient
|
|
220
|
+
smbclient.ClientConfig(username=user, password=pw)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _materialize_smb(location: str, cfg: dict) -> Optional[Path]:
|
|
224
|
+
"""Copy an SMB ZIM to the local cache and return the cached path, or None."""
|
|
225
|
+
try:
|
|
226
|
+
import smbclient
|
|
227
|
+
from smbclient import open_file
|
|
228
|
+
except ImportError:
|
|
229
|
+
logger.warning(
|
|
230
|
+
"materialize_zim: SMB location %r requested but 'smbprotocol' is not installed. " # t7.3-exempt: operator log message, not a prompt
|
|
231
|
+
"Install it with: pip install 'mentar[nas]' (or mount the share and point "
|
|
232
|
+
"grounding.zim_dir at the mount). Returning None.",
|
|
233
|
+
location,
|
|
234
|
+
)
|
|
235
|
+
return None
|
|
236
|
+
|
|
237
|
+
unc = smb_url_to_unc(location)
|
|
238
|
+
_configure_smb_auth(cfg)
|
|
239
|
+
|
|
240
|
+
cache_dir = _zim_cache_dir(cfg)
|
|
241
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
242
|
+
local = cache_dir / Path(unc.replace("\\", "/")).name
|
|
243
|
+
|
|
244
|
+
# Reuse a cached copy when its size matches the remote (cheap freshness check).
|
|
245
|
+
remote_size: Optional[int] = None
|
|
246
|
+
try:
|
|
247
|
+
remote_size = smbclient.stat(unc).st_size
|
|
248
|
+
except Exception:
|
|
249
|
+
logger.warning("materialize_zim: cannot stat SMB path %s", unc, exc_info=True)
|
|
250
|
+
if local.exists() and remote_size is not None and local.stat().st_size == remote_size:
|
|
251
|
+
logger.debug("materialize_zim: reusing cached SMB copy %s", local)
|
|
252
|
+
return local
|
|
253
|
+
|
|
254
|
+
logger.info("materialize_zim: copying SMB ZIM %s → %s (this can be large)", unc, local)
|
|
255
|
+
tmp = local.with_name(local.name + ".tmp")
|
|
256
|
+
try:
|
|
257
|
+
with open_file(unc, mode="rb") as src, open(tmp, "wb") as dst:
|
|
258
|
+
shutil.copyfileobj(src, dst, length=_COPY_CHUNK)
|
|
259
|
+
os.replace(tmp, local)
|
|
260
|
+
except Exception:
|
|
261
|
+
logger.warning("materialize_zim: failed copying SMB ZIM %s", unc, exc_info=True)
|
|
262
|
+
try:
|
|
263
|
+
tmp.unlink(missing_ok=True)
|
|
264
|
+
except Exception:
|
|
265
|
+
pass
|
|
266
|
+
return None
|
|
267
|
+
return local
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Grounding passage wrapper: return inner text for {{grounding_passage}}.
|
|
2
|
+
|
|
3
|
+
SAFETY §1.5 / W2.3 contract (grounding-as-data):
|
|
4
|
+
The <<<GROUNDING_BEGIN>>> / <<<GROUNDING_END>>> markers already live in
|
|
5
|
+
``prompts/system_prompt.md``. This module returns the **inner text only** —
|
|
6
|
+
it never double-wraps. The prompt layer is the exclusive owner of the
|
|
7
|
+
markers; this module merely enforces the length bound and ensures a clean
|
|
8
|
+
return value.
|
|
9
|
+
|
|
10
|
+
Safety principle:
|
|
11
|
+
The reader returns passage content **verbatim as data**. This module does
|
|
12
|
+
NOT strip, filter, or interpret the passage — it only length-bounds it.
|
|
13
|
+
Prompt injection resistance is handled by the system prompt's marker framing
|
|
14
|
+
(SAFETY §1.5). Stripping "suspicious" strings here would be a security
|
|
15
|
+
theatre that silently corrupts legitimate educational content.
|
|
16
|
+
|
|
17
|
+
Spec: docs/design/W7_grounding_reader.md (Safety row + wrapper.py row).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
_DEFAULT_MAX_CHARS = 1200
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def wrap_passage(passage: str, cfg: dict) -> str:
|
|
30
|
+
"""Length-bound ``passage`` and return the inner text for ``{{grounding_passage}}``.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
passage: Raw plain-text passage resolved by the reader (may be "").
|
|
34
|
+
cfg: The ``grounding:`` config block (for ``max_passage_chars``).
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The passage, truncated to ``max_passage_chars`` if needed. Returns ""
|
|
38
|
+
on empty / whitespace-only input. Never raises.
|
|
39
|
+
"""
|
|
40
|
+
if not passage or not passage.strip():
|
|
41
|
+
return ""
|
|
42
|
+
|
|
43
|
+
max_chars: int = int(cfg.get("max_passage_chars", _DEFAULT_MAX_CHARS))
|
|
44
|
+
|
|
45
|
+
# Length-bound (SPEC §15 / config contract)
|
|
46
|
+
if len(passage) > max_chars:
|
|
47
|
+
passage = passage[:max_chars].rstrip()
|
|
48
|
+
logger.debug("wrap_passage: truncated to %d chars", max_chars)
|
|
49
|
+
|
|
50
|
+
return passage
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Pluggable LLM backend abstraction: generate(prompt, grounding_passages, constraints) -> text.
|
|
2
|
+
|
|
3
|
+
Primary local backend: llama.cpp (GGUF) — lightest, broadest hardware support (2026-06-15).
|
|
4
|
+
Also: vLLM (capable-GPU tier), Ollama (wrapper), Gemini/Claude API (opt-in, parent owns key).
|
|
5
|
+
llama.cpp + vLLM both expose OpenAI-compatible endpoints → one provider path (base_url swap).
|
|
6
|
+
Spec: docs/SPEC.md §20.1.
|
|
7
|
+
"""
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
"""Child-safety escalation classifier.
|
|
2
|
+
|
|
3
|
+
Spec: docs/SAFETY.md Layer 3 (§3.1–§3.5); docs/design/W2.2_escalation.md §2–§4.
|
|
4
|
+
Tests: tests/safety/test_escalation.py (T2.1) — 20 positive + 20 negative fixtures.
|
|
5
|
+
|
|
6
|
+
This module is PURE: no DB writes, no FSM transitions, no LLM calls. The caller
|
|
7
|
+
(session loop / FSM driver) is responsible for logging, freeze, and handoff once
|
|
8
|
+
`classify()` returns a non-None match. See design §4 and `handle_trigger()` below
|
|
9
|
+
for the thin caller-side helper that demonstrates the end-to-end flow.
|
|
10
|
+
|
|
11
|
+
Two open rollout guards (W2.2 design §7 / SAFETY.md §3.5 — do not lose):
|
|
12
|
+
1. Handoff wording (HANDOFF_MESSAGE_PRIMARY/SUPPORT) has NOT been reviewed by a
|
|
13
|
+
safeguarding professional or child-communication specialist. Professional review
|
|
14
|
+
is REQUIRED before any rollout beyond the supervised single-family Phase-0 pilot.
|
|
15
|
+
2. Emergency-services signposting is explicitly absent (pilot routes to physically-
|
|
16
|
+
present parent). This FAILS when the parent is the source of harm (physical_danger /
|
|
17
|
+
abuse_disclosure cases). Crisis-helpline signposting with safeguarding input is
|
|
18
|
+
REQUIRED before unsupervised/independent mode is enabled.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import re
|
|
24
|
+
import sqlite3
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from enum import Enum
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Optional
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
32
|
+
# Public API types (design §3)
|
|
33
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
class TriggerClass(str, Enum):
|
|
36
|
+
HARM_TO_SELF = "harm_to_self"
|
|
37
|
+
PHYSICAL_DANGER = "physical_danger"
|
|
38
|
+
SEVERE_DISTRESS = "severe_distress"
|
|
39
|
+
ABUSE_DISCLOSURE = "abuse_disclosure"
|
|
40
|
+
SECRECY_REQUEST = "secrecy_request"
|
|
41
|
+
ADVERSARIAL_JAILBREAK = "adversarial_jailbreak"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Severity(str, Enum):
|
|
45
|
+
CRITICAL = "critical"
|
|
46
|
+
HIGH = "high"
|
|
47
|
+
LOW = "low"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class TriggerMatch:
|
|
52
|
+
trigger_class: TriggerClass
|
|
53
|
+
severity: Severity
|
|
54
|
+
matched_span: str # the substring that fired (for log/debug — NOT the alert)
|
|
55
|
+
# NB: the FULL verbatim input is logged separately and never truncated.
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
59
|
+
# Fixed handoff messages — VERBATIM from SAFETY.md §3.4
|
|
60
|
+
# These are NEVER passed through the LLM. (design §3, §4 Step 3)
|
|
61
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
HANDOFF_MESSAGE_PRIMARY: str = (
|
|
64
|
+
"This is something to talk about with your grown-up. "
|
|
65
|
+
"Let's pause our lesson here."
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
HANDOFF_MESSAGE_SUPPORT: str = (
|
|
69
|
+
"Your grown-up can help you with this. "
|
|
70
|
+
"Your lesson will be here when you're ready to come back."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
75
|
+
# Per-class keyword/regex banks (design §3 — module-level so Bucket D can
|
|
76
|
+
# swap them wholesale).
|
|
77
|
+
#
|
|
78
|
+
# Matching philosophy (design §3 classifier rules):
|
|
79
|
+
# • Case-insensitive, compiled once.
|
|
80
|
+
# • Sensitivity-biased: false positives are acceptable; false negatives are not.
|
|
81
|
+
# • Word-boundary-aware where it prevents obvious trust-eroding false positives
|
|
82
|
+
# (e.g. "die" in "diet"/"died in the game") — any exception is documented.
|
|
83
|
+
# • Substring match otherwise (no boundary needed where context is unambiguous).
|
|
84
|
+
#
|
|
85
|
+
# Word-boundary approach: \b is used around short ambiguous roots (die/hurt/kill).
|
|
86
|
+
# Longer phrases and full self-harm expressions are matched as substrings because
|
|
87
|
+
# the phrase itself is unambiguous in any surrounding context.
|
|
88
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
# ── A: harm_to_self (CRITICAL) ───────────────────────────────────────────────
|
|
91
|
+
# Patterns covering suicidal ideation and self-harm intent.
|
|
92
|
+
# "die" is word-boundary protected: "die" alone triggers but "diet"/"died" do not.
|
|
93
|
+
# "hurt myself" / "harm myself" are phrase-matched (no boundary needed).
|
|
94
|
+
# "don't want to be here anymore" and "no reason to live" catch indirect ideation.
|
|
95
|
+
_HARM_TO_SELF_PATTERNS: list[re.Pattern[str]] = [p for p in (
|
|
96
|
+
re.compile(r"i want to (die|kill myself|end my life|hurt myself|harm myself)", re.I),
|
|
97
|
+
re.compile(r"i('m going| am going| want) to (hurt|harm|kill) myself", re.I),
|
|
98
|
+
re.compile(r"i('m| am) going to (hurt|harm|kill) myself", re.I),
|
|
99
|
+
re.compile(r"(hurt|harm|kill) myself", re.I),
|
|
100
|
+
re.compile(r"want to (end|take) my (life|own life)", re.I),
|
|
101
|
+
re.compile(r"no reason to (live|go on|be alive)", re.I),
|
|
102
|
+
# "I don't want to be here anymore" — indirect self-harm ideation
|
|
103
|
+
re.compile(r"(don'?t|do not) want to be here (anymore|any more)", re.I),
|
|
104
|
+
# standalone "want to die" — word-boundary on "die" avoids "died"/"dies"
|
|
105
|
+
# but we use phrase match since "want to die" is unambiguous
|
|
106
|
+
re.compile(r"want to die", re.I),
|
|
107
|
+
# bare "i want to die" also caught above; "i might as well die" etc.
|
|
108
|
+
re.compile(r"\bi might as well (die|be dead)\b", re.I),
|
|
109
|
+
# "I'm thinking about ending it all" — indirect
|
|
110
|
+
re.compile(r"thinking (about|of) (ending|killing) (it|myself|my life)", re.I),
|
|
111
|
+
# "I've been cutting myself" / "I cut myself on purpose"
|
|
112
|
+
re.compile(r"(cutting|cut) myself (on purpose|to hurt|because)", re.I),
|
|
113
|
+
)]
|
|
114
|
+
|
|
115
|
+
# ── A: physical_danger (CRITICAL) ────────────────────────────────────────────
|
|
116
|
+
# Patterns for immediate physical danger / abuse by another person.
|
|
117
|
+
# "hurt me" is phrase-matched; "hit me" similarly.
|
|
118
|
+
_PHYSICAL_DANGER_PATTERNS: list[re.Pattern[str]] = [p for p in (
|
|
119
|
+
re.compile(r"someone (is|has been|keeps|was) (hurting|hitting|beating|abusing|punching|kicking) me", re.I),
|
|
120
|
+
re.compile(r"(he|she|they|he's|she's) (hurt|hit|beat|kicked|punched|abused) me", re.I),
|
|
121
|
+
re.compile(r"(i am|i'?m) (being|getting) (hurt|hit|beaten|abused|punched|kicked)", re.I),
|
|
122
|
+
re.compile(r"(don'?t feel|don'?t feel) safe (at home|in my house|at school|here)", re.I),
|
|
123
|
+
re.compile(r"(i'?m|i am) not safe", re.I),
|
|
124
|
+
re.compile(r"not safe at home", re.I),
|
|
125
|
+
re.compile(r"(someone|a person|grown.?up|adult|man|woman|he|she|they) (hurts|hits|beats|kicks|punches) me", re.I),
|
|
126
|
+
re.compile(r"(my|our) (dad|mum|mom|stepdad|stepmom|uncle|aunt|teacher|brother|sister|carer|guardian) (hurts|hits|beats|abuses|punches|kicks) me", re.I),
|
|
127
|
+
re.compile(r"being (physically )?abused", re.I),
|
|
128
|
+
# "I'm in danger" / "I'm scared someone will hurt me"
|
|
129
|
+
re.compile(r"(i'?m|i am) in danger", re.I),
|
|
130
|
+
re.compile(r"scared (that )?(someone|he|she|they) will (hurt|hit|kill|harm) me", re.I),
|
|
131
|
+
)]
|
|
132
|
+
|
|
133
|
+
# ── B: severe_distress (HIGH) ────────────────────────────────────────────────
|
|
134
|
+
# Patterns for severe emotional/welfare distress beyond normal frustration.
|
|
135
|
+
# Calibration note (design §5): "I'm a bit sad today" is documented as BELOW
|
|
136
|
+
# threshold in v0.1 (see negative fixtures). We require explicit despair/
|
|
137
|
+
# hopelessness words, not mild mood reports.
|
|
138
|
+
# "dying of boredom" / "killing me" in game/school context → NOT fired (see negatives).
|
|
139
|
+
_SEVERE_DISTRESS_PATTERNS: list[re.Pattern[str]] = [p for p in (
|
|
140
|
+
re.compile(r"(everything|nothing|life) (is|feels?) (hopeless|pointless|meaningless|worthless|terrible|awful)", re.I),
|
|
141
|
+
re.compile(r"i('?ve| have) been (crying|sobbing) (all|every|for) (day|night|week|hours?)", re.I),
|
|
142
|
+
re.compile(r"(i feel|i'?m feeling) (completely )?(hopeless|worthless|empty|broken|destroyed|devastated)", re.I),
|
|
143
|
+
re.compile(r"nobody (loves?|cares? about|wants) me", re.I),
|
|
144
|
+
re.compile(r"(i hate|i can'?t stand) (my life|myself|being alive|living)", re.I),
|
|
145
|
+
re.compile(r"(can'?t|cannot) (cope|go on|take (it|this|anymore)|keep going)", re.I),
|
|
146
|
+
re.compile(r"(every ?thing|my whole life) is (falling apart|ruined|over|destroyed)", re.I),
|
|
147
|
+
# persistent despair markers
|
|
148
|
+
re.compile(r"(always|never) (feel|felt|going to) (better|happy|good|okay|ok)\b", re.I),
|
|
149
|
+
re.compile(r"(too much|no point) (to|in) (living|life|going on|continuing)", re.I),
|
|
150
|
+
)]
|
|
151
|
+
|
|
152
|
+
# ── C: abuse_disclosure (CRITICAL) ───────────────────────────────────────────
|
|
153
|
+
# Patterns for grooming, sexual abuse, exploitation disclosures.
|
|
154
|
+
# "touched" used in abuse context; "private parts" / "private areas" etc.
|
|
155
|
+
_ABUSE_DISCLOSURE_PATTERNS: list[re.Pattern[str]] = [p for p in (
|
|
156
|
+
re.compile(r"(touched|touches|touching) (my|our) (private|body|bits|parts?|area)", re.I),
|
|
157
|
+
re.compile(r"someone (touched|is touching|has touched|touched me) in (a )?wrong (way|place)", re.I),
|
|
158
|
+
re.compile(r"(sexual|inappropriate|rude|dirty) (touch|touching|things?|photos?|pictures?|videos?)", re.I),
|
|
159
|
+
re.compile(r"(show(ing|n)?|send(ing)?|sent|shar(e|ing)) (me )?(naked|nude|rude|sexual|private) (photos?|pictures?|videos?|images?)", re.I),
|
|
160
|
+
re.compile(r"(ask(ing|ed)? me to) (take off|remove) (my |their )?(clothes|clothing|shirt|top)", re.I),
|
|
161
|
+
re.compile(r"(adult|grown.?up|man|woman|teacher|coach|uncle|neighbour|neighbor|stranger|person|he|she|they) (want(s|ed)? (to )?(touch|see|have|do))", re.I),
|
|
162
|
+
# Grooming patterns: gifts, special relationship, "our secret"
|
|
163
|
+
re.compile(r"(he|she|they|it) (is|was) (being|getting) (really )?(special|nice|friendly|close) (to me|with me) but", re.I),
|
|
164
|
+
re.compile(r"(sexual|sexually) (abused?|assault(ed)?|exploit(ed)?|harass(ed)?)", re.I),
|
|
165
|
+
re.compile(r"abuse(d)? (me|my body|sexually)", re.I),
|
|
166
|
+
# Euphemistic disclosures — simplified to catch the span that matters
|
|
167
|
+
re.compile(r"(a grown.?up|an adult|someone older) (did|does|made|makes) (something|things?) (to me|with me)", re.I),
|
|
168
|
+
re.compile(r"(he|she|they) (made|make|asked|ask|told|tell) me (to )?(do|touch|show) (something|things?|myself|them)", re.I),
|
|
169
|
+
)]
|
|
170
|
+
|
|
171
|
+
# ── C: secrecy_request (HIGH) ────────────────────────────────────────────────
|
|
172
|
+
# Patterns where the child tries to enlist the AI in keeping secrets from parents.
|
|
173
|
+
# Also covers disclosure that another adult has demanded secrecy.
|
|
174
|
+
_SECRECY_REQUEST_PATTERNS: list[re.Pattern[str]] = [p for p in (
|
|
175
|
+
re.compile(r"(don'?t|do not|please don'?t) (tell|let|show) (my )?(mum|mom|mummy|mommy|dad|daddy|father|mother|parent|carer|guardian|teacher|anyone|grown.?up)", re.I),
|
|
176
|
+
re.compile(r"(keep|kept) (this |it )?(a )?secret (from|between)", re.I),
|
|
177
|
+
re.compile(r"(it'?s|this is) (just )?our secret", re.I),
|
|
178
|
+
re.compile(r"(promise|swear) (me )?(you )?(won'?t|never|not) tell", re.I),
|
|
179
|
+
re.compile(r"(he|she|they) (told|said|told me|says) (not to|to never|to not) tell (anyone|anyone|my mum|my mom|my dad|my parent)", re.I),
|
|
180
|
+
re.compile(r"(told|said|asked) me (not|never) to (say|tell|talk about) (anything|this|it)", re.I),
|
|
181
|
+
re.compile(r"(nobody|no one|don'?t let anyone) (must|should|can) know (about this|about it)", re.I),
|
|
182
|
+
)]
|
|
183
|
+
|
|
184
|
+
# ── D: adversarial_jailbreak (LOW) ───────────────────────────────────────────
|
|
185
|
+
# Patterns for jailbreak / safety-override attempts (Layer 1 §1.4 already blocks
|
|
186
|
+
# inline; this class ensures they reach the escalation log for audit).
|
|
187
|
+
_ADVERSARIAL_JAILBREAK_PATTERNS: list[re.Pattern[str]] = [p for p in (
|
|
188
|
+
re.compile(r"(ignore|disregard|forget|bypass|override) (your |all )?(previous |your |the )?(instructions?|rules?|guidelines?|safety|system prompt|restrictions?)", re.I),
|
|
189
|
+
re.compile(r"(pretend|act|behave|imagine) (like |as if )?(you (are|have) no |you don'?t have |you'?re |you are )(a different|rules?|restrictions?|guidelines?|an? (uncensored|unrestricted|free|evil|jailbroken))", re.I),
|
|
190
|
+
re.compile(r"(you are now|you'?re now|from now on (you are|you'?re|act as)) (a |an )?(?!a tutor|helping|assisting)", re.I),
|
|
191
|
+
re.compile(r"(jailbreak|dan mode|developer mode|god mode|unrestricted mode)", re.I),
|
|
192
|
+
re.compile(r"(your new (instructions?|rules?|system prompt)|new system prompt)", re.I),
|
|
193
|
+
re.compile(r"(pretend you have no rules|pretend you'?re a different ai|pretend (there are|you have) no (rules|restrictions|limits?))", re.I),
|
|
194
|
+
re.compile(r"(do anything now|say anything|no (rules|limits?|restrictions?))\b", re.I),
|
|
195
|
+
)]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
199
|
+
# Severity + ordering table (design §2 — precedence: A > B > C > D)
|
|
200
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
201
|
+
|
|
202
|
+
# List of (TriggerClass, Severity, patterns) in descending priority order.
|
|
203
|
+
# For ties in severity, the order here decides (A critical > A critical again,
|
|
204
|
+
# then B high > C high; D low is last).
|
|
205
|
+
_CLASSIFIER_BANKS: list[tuple[TriggerClass, Severity, list[re.Pattern[str]]]] = [
|
|
206
|
+
(TriggerClass.HARM_TO_SELF, Severity.CRITICAL, _HARM_TO_SELF_PATTERNS),
|
|
207
|
+
(TriggerClass.PHYSICAL_DANGER, Severity.CRITICAL, _PHYSICAL_DANGER_PATTERNS),
|
|
208
|
+
(TriggerClass.ABUSE_DISCLOSURE, Severity.CRITICAL, _ABUSE_DISCLOSURE_PATTERNS),
|
|
209
|
+
(TriggerClass.SEVERE_DISTRESS, Severity.HIGH, _SEVERE_DISTRESS_PATTERNS),
|
|
210
|
+
(TriggerClass.SECRECY_REQUEST, Severity.HIGH, _SECRECY_REQUEST_PATTERNS),
|
|
211
|
+
(TriggerClass.ADVERSARIAL_JAILBREAK, Severity.LOW, _ADVERSARIAL_JAILBREAK_PATTERNS),
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
# Severity rank for precedence comparison (higher = higher priority).
|
|
215
|
+
_SEVERITY_RANK: dict[Severity, int] = {
|
|
216
|
+
Severity.CRITICAL: 3,
|
|
217
|
+
Severity.HIGH: 2,
|
|
218
|
+
Severity.LOW: 1,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
223
|
+
# Classifier (design §3)
|
|
224
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
225
|
+
|
|
226
|
+
def classify(text: str) -> Optional[TriggerMatch]:
|
|
227
|
+
"""Single-turn classifier. Returns the HIGHEST-severity match, or None.
|
|
228
|
+
|
|
229
|
+
Sensitivity-biased: when in doubt, fire. False positives are acceptable and
|
|
230
|
+
expected (SAFETY.md §3.2); false negatives are not.
|
|
231
|
+
|
|
232
|
+
Precedence on multiple matches: highest Severity wins; ties broken by the
|
|
233
|
+
category order A > B > C > D (i.e., the order in _CLASSIFIER_BANKS).
|
|
234
|
+
The list is already sorted in descending priority so the first higher-or-
|
|
235
|
+
equal severity match found in a linear scan is returned.
|
|
236
|
+
"""
|
|
237
|
+
if not text or not text.strip():
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
best: Optional[TriggerMatch] = None
|
|
241
|
+
|
|
242
|
+
for trigger_class, severity, patterns in _CLASSIFIER_BANKS:
|
|
243
|
+
# Early exit: nothing can beat a CRITICAL already found, and the banks
|
|
244
|
+
# are ordered so subsequent banks are equal-or-lower severity.
|
|
245
|
+
if best is not None and _SEVERITY_RANK[severity] < _SEVERITY_RANK[best.severity]:
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
for pattern in patterns:
|
|
249
|
+
m = pattern.search(text)
|
|
250
|
+
if m:
|
|
251
|
+
candidate = TriggerMatch(
|
|
252
|
+
trigger_class=trigger_class,
|
|
253
|
+
severity=severity,
|
|
254
|
+
matched_span=m.group(0),
|
|
255
|
+
)
|
|
256
|
+
if best is None or _SEVERITY_RANK[severity] > _SEVERITY_RANK[best.severity]:
|
|
257
|
+
best = candidate
|
|
258
|
+
break # one match per class is sufficient; move to next class
|
|
259
|
+
|
|
260
|
+
return best
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
264
|
+
# Thin caller helper — demonstrates the §4 flow end-to-end
|
|
265
|
+
# (classify stays pure/separate; this is the caller side)
|
|
266
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
267
|
+
|
|
268
|
+
def handle_trigger(
|
|
269
|
+
text: str,
|
|
270
|
+
learner_id: int,
|
|
271
|
+
conn: sqlite3.Connection,
|
|
272
|
+
) -> Optional[TriggerMatch]:
|
|
273
|
+
"""Run the §4 flow for one child turn against a live SQLite connection.
|
|
274
|
+
|
|
275
|
+
Returns the TriggerMatch if triggered (caller should enter ESCALATION_FREEZE
|
|
276
|
+
for all classes except adversarial_jailbreak per §4.3), or None if no match.
|
|
277
|
+
|
|
278
|
+
Steps performed (design §4):
|
|
279
|
+
1. classify() — pure; no side effects.
|
|
280
|
+
2. LOG — insert escalation_log row with full untruncated verbatim text.
|
|
281
|
+
For adversarial_jailbreak: session_outcome='logged_only' (design §4.3).
|
|
282
|
+
For all others: session_outcome='frozen'.
|
|
283
|
+
3. HANDOFF — render HANDOFF_MESSAGE_PRIMARY + _SUPPORT (returned in log
|
|
284
|
+
row comment; caller is responsible for display).
|
|
285
|
+
4. ALERT — on-screen flag + log row (pilot: log row already written above;
|
|
286
|
+
alert carries trigger_class only, NOT verbatim text per §4 Step 4).
|
|
287
|
+
FREEZE + WAIT + RESUMPTION GATE are FSM responsibilities (not this helper).
|
|
288
|
+
|
|
289
|
+
Note: adversarial_jailbreak is logged_only — do NOT enter ESCALATION_FREEZE.
|
|
290
|
+
"""
|
|
291
|
+
match = classify(text)
|
|
292
|
+
if match is None:
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
# §4.3 — adversarial_jailbreak is logged-only, not frozen
|
|
296
|
+
if match.trigger_class == TriggerClass.ADVERSARIAL_JAILBREAK:
|
|
297
|
+
session_outcome = "logged_only"
|
|
298
|
+
else:
|
|
299
|
+
session_outcome = "frozen"
|
|
300
|
+
|
|
301
|
+
# §4 Step 2 — LOG: full verbatim text, never truncated
|
|
302
|
+
conn.execute(
|
|
303
|
+
"""
|
|
304
|
+
INSERT INTO escalation_log
|
|
305
|
+
(learner_id, trigger_class, trigger_text_verbatim, session_outcome)
|
|
306
|
+
VALUES (?, ?, ?, ?)
|
|
307
|
+
""",
|
|
308
|
+
(learner_id, match.trigger_class.value, text, session_outcome),
|
|
309
|
+
)
|
|
310
|
+
conn.commit()
|
|
311
|
+
|
|
312
|
+
# §4 Step 3 — HANDOFF (caller renders these; we return the match so caller knows)
|
|
313
|
+
# §4 Step 4 — ALERT: the log row IS the pilot alert (on-screen flag in parent view)
|
|
314
|
+
# The alert surfaces trigger_class + timestamp ONLY — never verbatim text.
|
|
315
|
+
|
|
316
|
+
return match
|
mentar/tools/__init__.py
ADDED