dos-kernel 0.22.0__py3-none-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/answer_shape.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""`answer_shape` — the "is this output an ANSWER, or a non-answer?" verdict (docs/156 §4).
|
|
2
|
+
|
|
3
|
+
The picker/grounding-boundary closure of the **grounded-but-not-an-answer** gap that
|
|
4
|
+
the first real third-party DOS adoption surfaced (docs/156, a third-party grounded-RAG
|
|
5
|
+
app; the `project-dos-grounded-rag-adoption` recall). There, the numeric grounding gate
|
|
6
|
+
worked perfectly — every shipped *number* was witnessed — and the app still shipped, as
|
|
7
|
+
its "answer" to question q_025, a **5,780-char leaked chain-of-thought log** with
|
|
8
|
+
`refused=False`. The gate guarded the *facts*; nothing guarded that the output was an
|
|
9
|
+
*answer*. "Never shipped a wrong number" was literally true and badly misleading.
|
|
10
|
+
|
|
11
|
+
This is the missing leaf primitive docs/156 §4 named (build order Phase 2): a pure,
|
|
12
|
+
domain-free verdict an assembly policy can require *before* it ships an output —
|
|
13
|
+
|
|
14
|
+
ship ⟺ grounded AND answer_shape ≠ NON_ANSWER
|
|
15
|
+
|
|
16
|
+
so a structurally-disqualified output (an empty stub, a process/CoT log, a bare
|
|
17
|
+
refusal pasted as content) is caught even when every claim inside it grounds.
|
|
18
|
+
|
|
19
|
+
⚠ THE HONESTY BOUNDARY — read this before extending. This verdict judges **shape**,
|
|
20
|
+
never **correctness or relevance.** It answers the *mechanically-checkable* question
|
|
21
|
+
"is this output the KIND of thing that could be an answer, or is it structurally a
|
|
22
|
+
non-answer?" — NOT the semantic question "is this a GOOD / CORRECT / RELEVANT answer to
|
|
23
|
+
the question?". That second question is the Tier-3 gestalt the kernel deliberately
|
|
24
|
+
ABSTAINS on (docs/212/213/215, the `project-dos-non-coding-domains-world-witness-axis`
|
|
25
|
+
arc; the `project-dos-wall-presence-not-goal` W2/W3 gap): it has no independent witness,
|
|
26
|
+
so it belongs to a JUDGE (advisory, fail-to-abstain) or a HUMAN, never to a deterministic
|
|
27
|
+
oracle. `ANSWER_SHAPED` therefore means *"shaped like an answer,"* and explicitly NOT
|
|
28
|
+
*"a right answer"* — confusing the two would be the consistency-is-not-grounding trap
|
|
29
|
+
(`feedback-consistency-is-not-grounding`) one level up. On anything it cannot decide from
|
|
30
|
+
shape alone, this verdict returns INDETERMINATE — the abstain floor — never a false
|
|
31
|
+
`ANSWER_SHAPED`.
|
|
32
|
+
|
|
33
|
+
So where does it sit on the witness ladder (docs/192)? It is a **W2-presence-class**
|
|
34
|
+
check on the OUTPUT itself: "an answer-shaped artifact is present," the same altitude as
|
|
35
|
+
`verify()`'s file-path rung ("a real commit touched the path") — useful and sound for
|
|
36
|
+
what it claims, and pointedly NOT a W3 goal-witness. It is also *advisory*: it REPORTS a
|
|
37
|
+
shape; the consumer (an assembly policy) decides whether to withhold. PDP, not PEP.
|
|
38
|
+
|
|
39
|
+
The three states (mutually exclusive):
|
|
40
|
+
|
|
41
|
+
* ``ANSWER_SHAPED`` — passes the structural floor: non-empty, at or above the
|
|
42
|
+
viability length, and matches no disqualifying marker. Shaped
|
|
43
|
+
like an answer. (NOT a claim of correctness — see the boundary.)
|
|
44
|
+
* ``NON_ANSWER`` — structurally disqualified: empty/whitespace-only, below the
|
|
45
|
+
viability floor, OR matches a declared non-answer marker (a
|
|
46
|
+
process/CoT-log signature, a bare-refusal signature, a stub).
|
|
47
|
+
The q_025 catch. The dangerous case a grounding gate misses.
|
|
48
|
+
* ``INDETERMINATE`` — no policy supplied, or the text is non-trivial but the policy
|
|
49
|
+
cannot disqualify it on shape — the abstain floor. The semantic
|
|
50
|
+
"is it a good answer?" residue goes here, to a JUDGE / HUMAN.
|
|
51
|
+
|
|
52
|
+
The markers are **policy, not hardcode.** docs/156 §5 specifically criticised the host's
|
|
53
|
+
finance-shaped `_TOOL_LEAK` / `strip_cot` regex pile as the wrong thing to lift into the
|
|
54
|
+
kernel. So this module ships a *generic* default policy (the obvious cross-domain
|
|
55
|
+
signatures — a fenced reasoning block, "let me think", a tool-call dump, a bare "I
|
|
56
|
+
cannot") and lets a host DECLARE its own `AnswerShapePolicy` (the closed-enum-as-data /
|
|
57
|
+
policy-injection pattern used across the kernel: `dos.reasons`, `dos.stamp`,
|
|
58
|
+
`overlap_policy`). The kernel carries the *fold + the floor*; the host carries the
|
|
59
|
+
*signatures*.
|
|
60
|
+
|
|
61
|
+
⚓ Pure; the candidate text + the policy are handed in at the caller boundary (the
|
|
62
|
+
drafted answer, the declared markers). No I/O, no model call, no regex compilation at
|
|
63
|
+
import. Returns a verdict; NEVER raises (a bad pattern degrades to "not matched", never
|
|
64
|
+
an exception — the fail-safe direction is to NOT over-disqualify, the dual of
|
|
65
|
+
`run_judge`'s fail-to-abstain).
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
from __future__ import annotations
|
|
69
|
+
|
|
70
|
+
import enum
|
|
71
|
+
import re
|
|
72
|
+
from dataclasses import dataclass
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class AnswerShape(str, enum.Enum):
|
|
76
|
+
"""The typed answer-shape verdict (docs/156 §4).
|
|
77
|
+
|
|
78
|
+
`str`-valued so it round-trips a `--json` token / exit code without a lookup table
|
|
79
|
+
(the `Reconciliation` / `Completion` / `gate_classify.Verdict` idiom). The
|
|
80
|
+
load-bearing asymmetry: only `NON_ANSWER` is a positive disqualification;
|
|
81
|
+
`ANSWER_SHAPED` is "no structural disqualifier found" (NOT "correct"), and
|
|
82
|
+
`INDETERMINATE` is the abstain floor.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
ANSWER_SHAPED = "ANSWER_SHAPED" # shaped like an answer — no disqualifier (NOT "correct")
|
|
86
|
+
NON_ANSWER = "NON_ANSWER" # structurally disqualified — empty / too-short / marker hit
|
|
87
|
+
INDETERMINATE = "INDETERMINATE" # cannot decide on shape alone — abstain to JUDGE/HUMAN
|
|
88
|
+
|
|
89
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
90
|
+
return self.value
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def is_shippable(self) -> bool:
|
|
94
|
+
"""True iff an assembly policy MAY ship this on shape grounds (ANSWER_SHAPED only).
|
|
95
|
+
|
|
96
|
+
Note the asymmetry with `is_disqualified`: INDETERMINATE is neither shippable
|
|
97
|
+
nor disqualified — it means "shape can't decide; ask a JUDGE/HUMAN". A consumer
|
|
98
|
+
that treats INDETERMINATE as shippable has skipped the residual question, not
|
|
99
|
+
answered it.
|
|
100
|
+
"""
|
|
101
|
+
return self is AnswerShape.ANSWER_SHAPED
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def is_disqualified(self) -> bool:
|
|
105
|
+
"""True iff this output was positively ruled out as a non-answer."""
|
|
106
|
+
return self is AnswerShape.NON_ANSWER
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True)
|
|
110
|
+
class AnswerShapePolicy:
|
|
111
|
+
"""The declared, swappable shape rules — markers as DATA, not hardcode (docs/156 §5).
|
|
112
|
+
|
|
113
|
+
``min_viable_chars`` is the length floor below which a non-empty output is too small
|
|
114
|
+
to be an answer (a bare "0", an ack token, a truncated stub). Default 1 disables the
|
|
115
|
+
floor *as a length test* (only empty/whitespace disqualifies) — set it per host
|
|
116
|
+
(the RAG app's q_025 leaked-CoT was 5,780 chars, so length alone never catches that;
|
|
117
|
+
the *markers* do — length catches the opposite failure, the empty/stub end).
|
|
118
|
+
|
|
119
|
+
``non_answer_patterns`` is the host's closed set of disqualifying regexes — a
|
|
120
|
+
process/CoT-log signature, a bare-refusal signature, a tool-call dump. Matched
|
|
121
|
+
case-insensitively, in a `search` (anywhere in the text). The kernel ships a generic
|
|
122
|
+
cross-domain default (`GENERIC_ANSWER_SHAPE_POLICY`); a host declares its own. An
|
|
123
|
+
invalid pattern is skipped at match time (never raises — the fail-safe is to
|
|
124
|
+
UNDER-disqualify, so a broken host rule degrades to "ANSWER_SHAPED", not a crash).
|
|
125
|
+
|
|
126
|
+
``answer_markers`` (optional) is the dual — a closed set of positive answer
|
|
127
|
+
signatures (e.g. a host's structured "Answer:" prefix or a required citation token).
|
|
128
|
+
When non-empty, a non-trivial text that matches NONE of them is INDETERMINATE (not
|
|
129
|
+
ANSWER_SHAPED) — the policy is saying "I only call something answer-shaped if it
|
|
130
|
+
carries one of my positive markers; otherwise I abstain." When empty (the default),
|
|
131
|
+
absence-of-disqualifier is enough for ANSWER_SHAPED. This is how a strict host opts
|
|
132
|
+
into positive-evidence-required without the kernel guessing.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
min_viable_chars: int = 1
|
|
136
|
+
non_answer_patterns: tuple[str, ...] = ()
|
|
137
|
+
answer_markers: tuple[str, ...] = ()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# The generic, domain-free default. The cross-domain non-answer signatures — NOT a
|
|
141
|
+
# finance-shaped pile (docs/156 §5's lesson). Each is a structural tell that the output
|
|
142
|
+
# is a process artifact / a refusal / a stub rather than a delivered answer. A host with
|
|
143
|
+
# domain-specific leaks (the RAG app's tool-leak markers) declares its own policy ON TOP.
|
|
144
|
+
GENERIC_NON_ANSWER_PATTERNS: tuple[str, ...] = (
|
|
145
|
+
r"<thinking>", # a leaked reasoning block (open tag is enough)
|
|
146
|
+
r"</thinking>",
|
|
147
|
+
r"<scratchpad>",
|
|
148
|
+
r"\blet me (?:think|reason)\b", # narrated chain-of-thought
|
|
149
|
+
r"\bstep 1:\s", # an enumerated process log presented as the answer
|
|
150
|
+
r"\bi cannot\b.*\b(?:answer|help|comply|provide)\b", # a bare refusal pasted as content
|
|
151
|
+
r"\bi'?m (?:sorry|unable)\b.*\b(?:cannot|can't|unable)\b",
|
|
152
|
+
r"^\s*(?:tool_call|function_call|tool_use)\b", # a raw tool-call dump
|
|
153
|
+
r"\btraceback \(most recent call last\)", # a stack trace shipped as the answer
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
GENERIC_ANSWER_SHAPE_POLICY = AnswerShapePolicy(
|
|
157
|
+
min_viable_chars=1,
|
|
158
|
+
non_answer_patterns=GENERIC_NON_ANSWER_PATTERNS,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass(frozen=True)
|
|
163
|
+
class AnswerShapeVerdict:
|
|
164
|
+
"""The single verdict `classify` returns, with the inputs echoed back for legibility.
|
|
165
|
+
|
|
166
|
+
``state`` is the typed `AnswerShape`. ``length`` is the candidate's char count.
|
|
167
|
+
``matched`` is the disqualifying pattern that fired (empty when none did). ``reason``
|
|
168
|
+
is the operator-facing one-liner. The echoed fields make a surfaced verdict
|
|
169
|
+
self-explaining (the `ReconciliationVerdict` idiom).
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
state: AnswerShape
|
|
173
|
+
length: int
|
|
174
|
+
matched: str
|
|
175
|
+
reason: str
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def is_shippable(self) -> bool:
|
|
179
|
+
return self.state.is_shippable
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def is_disqualified(self) -> bool:
|
|
183
|
+
return self.state.is_disqualified
|
|
184
|
+
|
|
185
|
+
def to_dict(self) -> dict:
|
|
186
|
+
return {
|
|
187
|
+
"state": self.state.value,
|
|
188
|
+
"length": self.length,
|
|
189
|
+
"matched": self.matched,
|
|
190
|
+
"is_shippable": self.is_shippable,
|
|
191
|
+
"is_disqualified": self.is_disqualified,
|
|
192
|
+
"reason": self.reason,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _safe_search(pattern: str, text: str) -> bool:
|
|
197
|
+
"""Case-insensitive `re.search`, fail-safe to False on a bad pattern.
|
|
198
|
+
|
|
199
|
+
A host-declared regex that does not compile must NOT crash the verdict — the
|
|
200
|
+
fail-safe direction is to UNDER-disqualify (treat it as "not matched"), the dual of
|
|
201
|
+
`run_judge`'s fail-to-abstain. A broken disqualifier therefore degrades a possible
|
|
202
|
+
`NON_ANSWER` toward `ANSWER_SHAPED`/`INDETERMINATE`, never toward an exception.
|
|
203
|
+
"""
|
|
204
|
+
try:
|
|
205
|
+
return re.search(pattern, text, re.IGNORECASE | re.MULTILINE) is not None
|
|
206
|
+
except re.error:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def classify(
|
|
211
|
+
text: "str | None",
|
|
212
|
+
*,
|
|
213
|
+
policy: "AnswerShapePolicy | None" = GENERIC_ANSWER_SHAPE_POLICY,
|
|
214
|
+
) -> AnswerShapeVerdict:
|
|
215
|
+
"""Classify an output's SHAPE: ANSWER_SHAPED / NON_ANSWER / INDETERMINATE. PURE.
|
|
216
|
+
|
|
217
|
+
``text`` is the candidate output (a drafted answer), gathered at the boundary.
|
|
218
|
+
``policy`` is the declared shape rules; the generic default if omitted, or `None`
|
|
219
|
+
to force INDETERMINATE (no rules → cannot judge shape → abstain).
|
|
220
|
+
|
|
221
|
+
The decision order (first match wins):
|
|
222
|
+
|
|
223
|
+
1. ``policy is None`` → INDETERMINATE (no rules; abstain — the floor).
|
|
224
|
+
2. ``text`` empty / whitespace-only → NON_ANSWER (nothing was delivered).
|
|
225
|
+
3. ``len(text) < min_viable_chars`` → NON_ANSWER (below the viability floor — a
|
|
226
|
+
stub / ack token, not an answer).
|
|
227
|
+
4. a ``non_answer_patterns`` hit → NON_ANSWER (a process/CoT-log / bare-refusal
|
|
228
|
+
/ tool-dump signature — the q_025 catch).
|
|
229
|
+
5. ``answer_markers`` non-empty AND
|
|
230
|
+
none matched → INDETERMINATE (the strict host required a
|
|
231
|
+
positive answer marker and found none; abstain).
|
|
232
|
+
6. otherwise → ANSWER_SHAPED (no disqualifier; shaped like
|
|
233
|
+
an answer — NOT a claim of correctness).
|
|
234
|
+
|
|
235
|
+
Returns an `AnswerShapeVerdict`; NEVER raises. Remember the boundary: a `NON_ANSWER`
|
|
236
|
+
is a sound structural disqualification; an `ANSWER_SHAPED` is only "shape is fine,"
|
|
237
|
+
and the semantic correctness/relevance question is for a JUDGE/HUMAN (INDETERMINATE
|
|
238
|
+
is where shape honestly cannot decide).
|
|
239
|
+
"""
|
|
240
|
+
if policy is None:
|
|
241
|
+
return AnswerShapeVerdict(
|
|
242
|
+
state=AnswerShape.INDETERMINATE,
|
|
243
|
+
length=len(text or ""),
|
|
244
|
+
matched="",
|
|
245
|
+
reason="no answer-shape policy supplied — cannot judge shape; abstain "
|
|
246
|
+
"(the semantic 'is it an answer?' question goes to a JUDGE/HUMAN)",
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
raw = text or ""
|
|
250
|
+
stripped = raw.strip()
|
|
251
|
+
n = len(raw)
|
|
252
|
+
|
|
253
|
+
if not stripped:
|
|
254
|
+
return AnswerShapeVerdict(
|
|
255
|
+
state=AnswerShape.NON_ANSWER,
|
|
256
|
+
length=n,
|
|
257
|
+
matched="",
|
|
258
|
+
reason="empty / whitespace-only output — nothing was delivered (NON_ANSWER)",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if len(stripped) < max(1, int(policy.min_viable_chars)):
|
|
262
|
+
return AnswerShapeVerdict(
|
|
263
|
+
state=AnswerShape.NON_ANSWER,
|
|
264
|
+
length=n,
|
|
265
|
+
matched="",
|
|
266
|
+
reason=(f"output is {len(stripped)} non-space chars, below the viability "
|
|
267
|
+
f"floor of {policy.min_viable_chars} — a stub/ack token, not an "
|
|
268
|
+
f"answer (NON_ANSWER)"),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
for pat in policy.non_answer_patterns:
|
|
272
|
+
if _safe_search(pat, raw):
|
|
273
|
+
return AnswerShapeVerdict(
|
|
274
|
+
state=AnswerShape.NON_ANSWER,
|
|
275
|
+
length=n,
|
|
276
|
+
matched=pat,
|
|
277
|
+
reason=(f"output matched the non-answer signature {pat!r} — a "
|
|
278
|
+
f"process/CoT-log, bare refusal, or tool dump pasted as the "
|
|
279
|
+
f"answer (the grounded-but-not-an-answer catch, docs/156 §4)"),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if policy.answer_markers:
|
|
283
|
+
if not any(_safe_search(m, raw) for m in policy.answer_markers):
|
|
284
|
+
return AnswerShapeVerdict(
|
|
285
|
+
state=AnswerShape.INDETERMINATE,
|
|
286
|
+
length=n,
|
|
287
|
+
matched="",
|
|
288
|
+
reason=("no disqualifier fired, but the policy requires a positive "
|
|
289
|
+
"answer marker and none matched — abstain on shape (route the "
|
|
290
|
+
"semantic question to a JUDGE/HUMAN)"),
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
return AnswerShapeVerdict(
|
|
294
|
+
state=AnswerShape.ANSWER_SHAPED,
|
|
295
|
+
length=n,
|
|
296
|
+
matched="",
|
|
297
|
+
reason=("no structural disqualifier — shaped like an answer (NOT a claim of "
|
|
298
|
+
"correctness or relevance; that is a JUDGE/HUMAN question)"),
|
|
299
|
+
)
|