cortex-loop 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +7 -0
- cortex/adapters.py +339 -0
- cortex/blocklist.py +51 -0
- cortex/challenges.py +210 -0
- cortex/cli.py +7 -0
- cortex/core.py +601 -0
- cortex/core_helpers.py +190 -0
- cortex/data/identity_preamble.md +5 -0
- cortex/data/layer1_part_a.md +65 -0
- cortex/data/layer1_part_b.md +17 -0
- cortex/executive.py +295 -0
- cortex/foundation.py +185 -0
- cortex/genome.py +348 -0
- cortex/graveyard.py +226 -0
- cortex/hooks/__init__.py +27 -0
- cortex/hooks/_shared.py +167 -0
- cortex/hooks/post_tool_use.py +13 -0
- cortex/hooks/pre_tool_use.py +13 -0
- cortex/hooks/session_start.py +13 -0
- cortex/hooks/stop.py +13 -0
- cortex/invariants.py +258 -0
- cortex/packs.py +118 -0
- cortex/repomap.py +6 -0
- cortex/requirements.py +497 -0
- cortex/retry.py +312 -0
- cortex/stop_contract.py +217 -0
- cortex/stop_payload.py +122 -0
- cortex/stop_policy.py +100 -0
- cortex/stop_runtime.py +400 -0
- cortex/stop_signals.py +75 -0
- cortex/store.py +793 -0
- cortex/templates/__init__.py +10 -0
- cortex/utils.py +58 -0
- cortex_loop-0.1.0a1.dist-info/METADATA +121 -0
- cortex_loop-0.1.0a1.dist-info/RECORD +52 -0
- cortex_loop-0.1.0a1.dist-info/WHEEL +5 -0
- cortex_loop-0.1.0a1.dist-info/entry_points.txt +3 -0
- cortex_loop-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- cortex_loop-0.1.0a1.dist-info/top_level.txt +3 -0
- cortex_ops_cli/__init__.py +3 -0
- cortex_ops_cli/_adapter_validation.py +119 -0
- cortex_ops_cli/_check_report.py +454 -0
- cortex_ops_cli/_check_report_output.py +270 -0
- cortex_ops_cli/_openai_bridge_probe.py +241 -0
- cortex_ops_cli/_openai_bridge_protocol.py +469 -0
- cortex_ops_cli/_runtime_profile_templates.py +341 -0
- cortex_ops_cli/_runtime_profiles.py +445 -0
- cortex_ops_cli/gemini_hooks.py +301 -0
- cortex_ops_cli/main.py +911 -0
- cortex_ops_cli/openai_app_server_bridge.py +375 -0
- cortex_repomap/__init__.py +1 -0
- cortex_repomap/engine.py +1201 -0
cortex/requirements.py
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import shlex
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, NamedTuple
|
|
8
|
+
|
|
9
|
+
from .utils import _as_string_list, _normalize_repo_relative_path, _unique_list
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RequirementAuditEvaluation(NamedTuple):
|
|
13
|
+
report: dict[str, Any] | None
|
|
14
|
+
details: dict[str, Any] | None
|
|
15
|
+
gap: bool
|
|
16
|
+
missing: bool
|
|
17
|
+
diagnostics: list[dict[str, Any]]
|
|
18
|
+
warnings: list[str]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TruthClaimsEvaluation(NamedTuple):
|
|
22
|
+
report: dict[str, Any] | None
|
|
23
|
+
gap: bool
|
|
24
|
+
diagnostics: list[dict[str, Any]]
|
|
25
|
+
warnings: list[str]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def evaluate_requirement_audit_payload(
|
|
29
|
+
payload: Any,
|
|
30
|
+
*,
|
|
31
|
+
require_requirement_audit: bool,
|
|
32
|
+
require_evidence_for_passed_requirement: bool,
|
|
33
|
+
required_requirement_ids: list[str],
|
|
34
|
+
root: Path,
|
|
35
|
+
witness: Mapping[str, list[str]] | None = None,
|
|
36
|
+
) -> RequirementAuditEvaluation:
|
|
37
|
+
if payload is None:
|
|
38
|
+
if require_requirement_audit:
|
|
39
|
+
return RequirementAuditEvaluation(
|
|
40
|
+
report=None,
|
|
41
|
+
details=None,
|
|
42
|
+
gap=False,
|
|
43
|
+
missing=True,
|
|
44
|
+
diagnostics=[_diagnostic([], ["requirement_audit with items covering required requirements"], "No requirement_audit was provided for a required requirement gate.", "comprehension_gap", "far")],
|
|
45
|
+
warnings=[
|
|
46
|
+
"No requirement_audit provided in Stop payload. Include requirement_audit to prove "
|
|
47
|
+
"prompt requirement coverage with evidence."
|
|
48
|
+
],
|
|
49
|
+
)
|
|
50
|
+
return RequirementAuditEvaluation(
|
|
51
|
+
report=None,
|
|
52
|
+
details=None,
|
|
53
|
+
gap=False,
|
|
54
|
+
missing=False,
|
|
55
|
+
diagnostics=[],
|
|
56
|
+
warnings=[],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
details = validate_requirement_audit(
|
|
60
|
+
payload,
|
|
61
|
+
require_evidence_for_passed_requirement=require_evidence_for_passed_requirement,
|
|
62
|
+
required_requirement_ids=required_requirement_ids,
|
|
63
|
+
root=root,
|
|
64
|
+
witness=witness,
|
|
65
|
+
)
|
|
66
|
+
warnings: list[str] = []
|
|
67
|
+
gap = not details["ok"]
|
|
68
|
+
if gap:
|
|
69
|
+
warnings.append("Requirement audit reported gaps: " + "; ".join(details.get("errors", [])))
|
|
70
|
+
warnings.extend(f"Requirement audit note: {note}" for note in details.get("warnings", []))
|
|
71
|
+
return RequirementAuditEvaluation(
|
|
72
|
+
report=minimal_requirement_audit_report(details),
|
|
73
|
+
details=details,
|
|
74
|
+
gap=gap,
|
|
75
|
+
missing=False,
|
|
76
|
+
diagnostics=[dict(item) for item in details.get("diagnostics", [])],
|
|
77
|
+
warnings=warnings,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def evaluate_truth_claims_payload(
|
|
82
|
+
payload: Any,
|
|
83
|
+
*,
|
|
84
|
+
root: Path,
|
|
85
|
+
witness: Mapping[str, list[str]] | None = None,
|
|
86
|
+
observed_modified_files: list[str] | None = None,
|
|
87
|
+
modified_files_error: str | None = None,
|
|
88
|
+
) -> TruthClaimsEvaluation:
|
|
89
|
+
if payload is None:
|
|
90
|
+
return TruthClaimsEvaluation(report=None, gap=False, diagnostics=[], warnings=[])
|
|
91
|
+
|
|
92
|
+
if not isinstance(payload, Mapping):
|
|
93
|
+
report = {
|
|
94
|
+
"ok": False,
|
|
95
|
+
"modified_files_claimed": [],
|
|
96
|
+
"modified_files_verified": [],
|
|
97
|
+
"modified_files_unverified": [],
|
|
98
|
+
"modified_files_uncheckable": [],
|
|
99
|
+
"tests_ran_claimed": [],
|
|
100
|
+
"tests_ran_verified": [],
|
|
101
|
+
"tests_ran_unverified": [],
|
|
102
|
+
"tests_ran_uncheckable": [],
|
|
103
|
+
"errors": ["Invalid truth_claims format; expected an object."],
|
|
104
|
+
"warnings": [],
|
|
105
|
+
}
|
|
106
|
+
return TruthClaimsEvaluation(
|
|
107
|
+
report=report,
|
|
108
|
+
gap=True,
|
|
109
|
+
diagnostics=[_diagnostic(["truth_claims payload with invalid shape"], ["truth_claims object"], "Truth claims used an invalid payload shape.", "comprehension_gap", "far")],
|
|
110
|
+
warnings=["Truth claims reported gaps: Invalid truth_claims format; expected an object."],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
modified_claims = _normalize_modified_file_claims(payload.get("modified_files"), root=root.resolve())
|
|
114
|
+
tests_ran_claims = _unique_list(
|
|
115
|
+
[normalized for value in _as_string_list(payload.get("tests_ran")) if (normalized := _normalize_command(value))]
|
|
116
|
+
)
|
|
117
|
+
report_warnings: list[str] = []
|
|
118
|
+
errors: list[str] = []
|
|
119
|
+
diagnostics: list[dict[str, Any]] = []
|
|
120
|
+
|
|
121
|
+
modified_verified: list[str] = []
|
|
122
|
+
modified_unverified: list[str] = []
|
|
123
|
+
modified_uncheckable: list[str] = []
|
|
124
|
+
tests_ran_verified: list[str] = []
|
|
125
|
+
tests_ran_unverified: list[str] = []
|
|
126
|
+
tests_ran_uncheckable: list[str] = []
|
|
127
|
+
|
|
128
|
+
if not modified_claims and not tests_ran_claims:
|
|
129
|
+
report_warnings.append(
|
|
130
|
+
"truth_claims provided with no supported claims; expected modified_files and/or tests_ran."
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if modified_claims:
|
|
134
|
+
changed_files = set(_as_string_list(observed_modified_files)) if observed_modified_files is not None else None
|
|
135
|
+
if changed_files is None:
|
|
136
|
+
modified_uncheckable = list(modified_claims)
|
|
137
|
+
error_reason = str(modified_files_error or "").strip()
|
|
138
|
+
report_warnings.append(
|
|
139
|
+
"truth_claims.modified_files are uncheckable: "
|
|
140
|
+
+ (error_reason or "session-scoped modified-files evidence unavailable")
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
for claimed_path in modified_claims:
|
|
144
|
+
if claimed_path in changed_files:
|
|
145
|
+
modified_verified.append(claimed_path)
|
|
146
|
+
else:
|
|
147
|
+
modified_unverified.append(claimed_path)
|
|
148
|
+
if modified_unverified:
|
|
149
|
+
errors.append(
|
|
150
|
+
"truth_claims.modified_files not observed in repository changes: "
|
|
151
|
+
+ ", ".join(modified_unverified)
|
|
152
|
+
)
|
|
153
|
+
diagnostics.append(_diagnostic(modified_unverified, modified_claims, "Some claimed modified files were not observed in the session-scoped file delta.", "execution_gap", "moderate"))
|
|
154
|
+
|
|
155
|
+
if tests_ran_claims:
|
|
156
|
+
observed_commands = _unique_list(
|
|
157
|
+
[
|
|
158
|
+
normalized
|
|
159
|
+
for value in _as_string_list((witness or {}).get("commands"))
|
|
160
|
+
if (normalized := _normalize_command(value))
|
|
161
|
+
]
|
|
162
|
+
)
|
|
163
|
+
if not observed_commands:
|
|
164
|
+
tests_ran_uncheckable = list(tests_ran_claims)
|
|
165
|
+
report_warnings.append("truth_claims.tests_ran are uncheckable: no observed command events in session")
|
|
166
|
+
else:
|
|
167
|
+
for claim in tests_ran_claims:
|
|
168
|
+
if any(_command_claim_matches(claim, observed) for observed in observed_commands):
|
|
169
|
+
tests_ran_verified.append(claim)
|
|
170
|
+
else:
|
|
171
|
+
tests_ran_unverified.append(claim)
|
|
172
|
+
if tests_ran_unverified:
|
|
173
|
+
errors.append(
|
|
174
|
+
"truth_claims.tests_ran not witnessed in session events: " + ", ".join(tests_ran_unverified)
|
|
175
|
+
)
|
|
176
|
+
diagnostics.append(_diagnostic(tests_ran_verified, tests_ran_claims, "Some claimed test commands were not witnessed in session events.", "execution_gap", "moderate"))
|
|
177
|
+
|
|
178
|
+
report = {
|
|
179
|
+
"ok": len(errors) == 0,
|
|
180
|
+
"modified_files_claimed": modified_claims,
|
|
181
|
+
"modified_files_verified": modified_verified,
|
|
182
|
+
"modified_files_unverified": modified_unverified,
|
|
183
|
+
"modified_files_uncheckable": modified_uncheckable,
|
|
184
|
+
"tests_ran_claimed": tests_ran_claims,
|
|
185
|
+
"tests_ran_verified": tests_ran_verified,
|
|
186
|
+
"tests_ran_unverified": tests_ran_unverified,
|
|
187
|
+
"tests_ran_uncheckable": tests_ran_uncheckable,
|
|
188
|
+
"diagnostics": diagnostics,
|
|
189
|
+
"errors": errors,
|
|
190
|
+
"warnings": report_warnings,
|
|
191
|
+
}
|
|
192
|
+
warnings: list[str] = []
|
|
193
|
+
if errors:
|
|
194
|
+
warnings.append("Truth claims reported gaps: " + "; ".join(errors))
|
|
195
|
+
warnings.extend(f"Truth claims note: {warning}" for warning in report_warnings)
|
|
196
|
+
return TruthClaimsEvaluation(
|
|
197
|
+
report=report,
|
|
198
|
+
gap=bool(errors),
|
|
199
|
+
diagnostics=diagnostics,
|
|
200
|
+
warnings=warnings,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def minimal_requirement_audit_report(report: Mapping[str, Any]) -> dict[str, Any]:
|
|
205
|
+
return {
|
|
206
|
+
"ok": bool(report.get("ok")),
|
|
207
|
+
"errors": [str(v) for v in _as_string_list(report.get("errors"))],
|
|
208
|
+
"missing_required_ids": [str(v) for v in _as_string_list(report.get("missing_required_ids"))],
|
|
209
|
+
"item_count": int(report.get("item_count") or 0),
|
|
210
|
+
"pass_count": int(report.get("pass_count") or 0),
|
|
211
|
+
"fail_count": int(report.get("fail_count") or 0),
|
|
212
|
+
"diagnostics": [dict(item) for item in report.get("diagnostics", []) if isinstance(item, Mapping)],
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _diagnostic(
|
|
217
|
+
evidence_found: list[str],
|
|
218
|
+
evidence_expected: list[str],
|
|
219
|
+
gap_description: str,
|
|
220
|
+
gap_characterization: str,
|
|
221
|
+
distance_signal: str,
|
|
222
|
+
) -> dict[str, Any]:
|
|
223
|
+
return {
|
|
224
|
+
"evidence_found": [str(item) for item in evidence_found if str(item).strip()],
|
|
225
|
+
"evidence_expected": [str(item) for item in evidence_expected if str(item).strip()],
|
|
226
|
+
"gap_description": gap_description,
|
|
227
|
+
"gap_characterization": gap_characterization,
|
|
228
|
+
"distance_signal": distance_signal,
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def evaluate_evidence_reference(
|
|
233
|
+
reference: str,
|
|
234
|
+
*,
|
|
235
|
+
root: Path,
|
|
236
|
+
witness: Mapping[str, list[str]] | None = None,
|
|
237
|
+
) -> dict[str, str]:
|
|
238
|
+
observed_commands = [_normalize_command(v) for v in _as_string_list((witness or {}).get("commands"))]
|
|
239
|
+
observed_tools = {v.lower() for v in _as_string_list((witness or {}).get("tools"))}
|
|
240
|
+
return _evaluate_evidence_reference(
|
|
241
|
+
reference,
|
|
242
|
+
root=root,
|
|
243
|
+
observed_commands=observed_commands,
|
|
244
|
+
observed_tools=observed_tools,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def validate_requirement_audit(
|
|
249
|
+
payload: Any,
|
|
250
|
+
*,
|
|
251
|
+
require_evidence_for_passed_requirement: bool,
|
|
252
|
+
required_requirement_ids: list[str],
|
|
253
|
+
root: Path,
|
|
254
|
+
witness: Mapping[str, list[str]] | None = None,
|
|
255
|
+
) -> dict[str, Any]:
|
|
256
|
+
errors: list[str] = []
|
|
257
|
+
warnings: list[str] = []
|
|
258
|
+
diagnostics: list[dict[str, Any]] = []
|
|
259
|
+
pass_count = 0
|
|
260
|
+
fail_count = 0
|
|
261
|
+
unique_ids: set[str] = set()
|
|
262
|
+
missing_required_ids: list[str] = []
|
|
263
|
+
observed_commands = [_normalize_command(v) for v in _as_string_list((witness or {}).get("commands"))]
|
|
264
|
+
observed_tools = {v.lower() for v in _as_string_list((witness or {}).get("tools"))}
|
|
265
|
+
|
|
266
|
+
if not isinstance(payload, Mapping):
|
|
267
|
+
return {
|
|
268
|
+
"ok": False,
|
|
269
|
+
"item_count": 0,
|
|
270
|
+
"pass_count": 0,
|
|
271
|
+
"fail_count": 0,
|
|
272
|
+
"missing_required_ids": list(required_requirement_ids),
|
|
273
|
+
"diagnostics": [_diagnostic(["requirement_audit payload with invalid shape"], ["requirement_audit object with items"], "Requirement audit used an invalid payload shape.", "comprehension_gap", "far")],
|
|
274
|
+
"warnings": [],
|
|
275
|
+
"errors": ["Invalid requirement_audit format; expected an object."],
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
|
279
|
+
if not items:
|
|
280
|
+
errors.append("requirement_audit.items must be a non-empty list.")
|
|
281
|
+
diagnostics.append(_diagnostic([], ["requirement_audit.items with at least one requirement entry"], "Requirement audit did not provide any requirement items.", "comprehension_gap", "far"))
|
|
282
|
+
|
|
283
|
+
for idx, item in enumerate(items):
|
|
284
|
+
if not isinstance(item, Mapping):
|
|
285
|
+
errors.append(f"requirement_audit.items[{idx}] must be an object.")
|
|
286
|
+
diagnostics.append(_diagnostic([f"items[{idx}]={type(item).__name__}"], [f"items[{idx}] as requirement object"], "Requirement audit item used an invalid shape.", "comprehension_gap", "far"))
|
|
287
|
+
continue
|
|
288
|
+
item_id = str(item.get("id") or "").strip()
|
|
289
|
+
if not item_id:
|
|
290
|
+
errors.append(f"requirement_audit.items[{idx}] is missing a non-empty id.")
|
|
291
|
+
diagnostics.append(_diagnostic([f"items[{idx}] without id"], [f"items[{idx}].id"], "Requirement audit item is missing its requirement id.", "comprehension_gap", "far"))
|
|
292
|
+
continue
|
|
293
|
+
if item_id in unique_ids:
|
|
294
|
+
errors.append(f"requirement_audit.items[{idx}] has duplicate id '{item_id}'.")
|
|
295
|
+
diagnostics.append(_diagnostic([item_id], ["unique requirement ids"], f"Requirement '{item_id}' was reported more than once.", "comprehension_gap", "moderate"))
|
|
296
|
+
unique_ids.add(item_id)
|
|
297
|
+
|
|
298
|
+
status = str(item.get("status") or "").strip().lower()
|
|
299
|
+
if status == "pass":
|
|
300
|
+
pass_count += 1
|
|
301
|
+
evidence = _as_string_list(item.get("evidence"))
|
|
302
|
+
if require_evidence_for_passed_requirement and not evidence:
|
|
303
|
+
errors.append(f"requirement '{item_id}' is pass but has no evidence.")
|
|
304
|
+
diagnostics.append(_diagnostic([f"requirement '{item_id}' marked pass without evidence"], [f"verified evidence for requirement '{item_id}'"], f"Requirement '{item_id}' was marked pass without evidence.", "execution_gap", "close"))
|
|
305
|
+
for evidence_ref in evidence:
|
|
306
|
+
check = _evaluate_evidence_reference(
|
|
307
|
+
evidence_ref,
|
|
308
|
+
root=root,
|
|
309
|
+
observed_commands=observed_commands,
|
|
310
|
+
observed_tools=observed_tools,
|
|
311
|
+
)
|
|
312
|
+
if check["status"] == "unverified":
|
|
313
|
+
errors.append(f"requirement '{item_id}' evidence is unverified: {check['reason']}")
|
|
314
|
+
diagnostics.append(_diagnostic([evidence_ref], [f"verified evidence for requirement '{item_id}'"], f"Requirement '{item_id}' evidence could not be verified.", "execution_gap", "moderate"))
|
|
315
|
+
elif check["status"] == "uncheckable":
|
|
316
|
+
warnings.append(f"requirement '{item_id}' evidence is uncheckable: {check['reason']}")
|
|
317
|
+
elif status == "fail":
|
|
318
|
+
fail_count += 1
|
|
319
|
+
if not str(item.get("gap") or "").strip():
|
|
320
|
+
errors.append(f"requirement '{item_id}' is fail but has no gap description.")
|
|
321
|
+
diagnostics.append(_diagnostic([f"requirement '{item_id}' marked fail"], [f"gap description for requirement '{item_id}'"], f"Requirement '{item_id}' failed without describing the gap.", "execution_gap", "close"))
|
|
322
|
+
else:
|
|
323
|
+
errors.append(f"requirement '{item_id}' has invalid status '{status}' (expected pass|fail).")
|
|
324
|
+
diagnostics.append(_diagnostic([f"requirement '{item_id}' status={status or 'missing'}"], ["status=pass|fail"], f"Requirement '{item_id}' used an invalid status value.", "comprehension_gap", "far"))
|
|
325
|
+
|
|
326
|
+
if required_requirement_ids:
|
|
327
|
+
missing_required_ids = [rid for rid in required_requirement_ids if rid not in unique_ids]
|
|
328
|
+
if missing_required_ids:
|
|
329
|
+
errors.append("requirement_audit missing required ids: " + ", ".join(missing_required_ids))
|
|
330
|
+
diagnostics.append(_diagnostic(sorted(unique_ids), required_requirement_ids, "Requirement audit omitted required requirement ids from the session contract.", "comprehension_gap", "moderate"))
|
|
331
|
+
|
|
332
|
+
expected_verdict = "pass" if (items and fail_count == 0 and not errors) else "fail"
|
|
333
|
+
completeness_verdict = payload.get("completeness_verdict")
|
|
334
|
+
if completeness_verdict is not None:
|
|
335
|
+
normalized_verdict = str(completeness_verdict).strip().lower()
|
|
336
|
+
if normalized_verdict not in {"pass", "fail"}:
|
|
337
|
+
errors.append("requirement_audit.completeness_verdict must be 'pass' or 'fail'.")
|
|
338
|
+
diagnostics.append(_diagnostic([f"completeness_verdict={normalized_verdict}"], ["completeness_verdict=pass|fail"], "Requirement audit used an invalid completeness verdict value.", "comprehension_gap", "far"))
|
|
339
|
+
elif normalized_verdict != expected_verdict:
|
|
340
|
+
errors.append(
|
|
341
|
+
f"requirement_audit.completeness_verdict={normalized_verdict} "
|
|
342
|
+
f"does not match computed verdict={expected_verdict}."
|
|
343
|
+
)
|
|
344
|
+
diagnostics.append(_diagnostic([f"completeness_verdict={normalized_verdict}"], [f"completeness_verdict={expected_verdict}"], "Requirement audit completeness verdict did not match the computed result.", "execution_gap", "close"))
|
|
345
|
+
|
|
346
|
+
return {
|
|
347
|
+
"ok": len(errors) == 0 and fail_count == 0,
|
|
348
|
+
"item_count": len(items),
|
|
349
|
+
"pass_count": pass_count,
|
|
350
|
+
"fail_count": fail_count,
|
|
351
|
+
"missing_required_ids": missing_required_ids,
|
|
352
|
+
"diagnostics": diagnostics,
|
|
353
|
+
"warnings": warnings,
|
|
354
|
+
"errors": errors,
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _evaluate_evidence_reference(
|
|
359
|
+
reference: str,
|
|
360
|
+
*,
|
|
361
|
+
root: Path,
|
|
362
|
+
observed_commands: list[str],
|
|
363
|
+
observed_tools: set[str],
|
|
364
|
+
) -> dict[str, str]:
|
|
365
|
+
kind, claim = _classify_evidence_reference(reference, root=root)
|
|
366
|
+
if kind == "path":
|
|
367
|
+
path = _evidence_reference_path(reference, root)
|
|
368
|
+
if path is not None and path.exists():
|
|
369
|
+
return {"kind": "path", "status": "verified", "reason": "path exists"}
|
|
370
|
+
return {
|
|
371
|
+
"kind": "path",
|
|
372
|
+
"status": "unverified",
|
|
373
|
+
"reason": f"path does not exist: {path}",
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
if kind == "tool":
|
|
377
|
+
if not observed_tools:
|
|
378
|
+
return {"kind": "tool", "status": "uncheckable", "reason": "no observed tool events in session"}
|
|
379
|
+
if claim in observed_tools:
|
|
380
|
+
return {"kind": "tool", "status": "verified", "reason": f"tool observed: {claim}"}
|
|
381
|
+
return {"kind": "tool", "status": "unverified", "reason": f"tool not observed: {claim}"}
|
|
382
|
+
|
|
383
|
+
if kind == "command":
|
|
384
|
+
if not observed_commands:
|
|
385
|
+
return {
|
|
386
|
+
"kind": "command",
|
|
387
|
+
"status": "uncheckable",
|
|
388
|
+
"reason": "no observed command events in session",
|
|
389
|
+
}
|
|
390
|
+
if claim and any(_command_claim_matches(claim, cmd) for cmd in observed_commands):
|
|
391
|
+
return {"kind": "command", "status": "verified", "reason": "command matched session witness"}
|
|
392
|
+
return {
|
|
393
|
+
"kind": "command",
|
|
394
|
+
"status": "unverified",
|
|
395
|
+
"reason": "command not witnessed in session events",
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return {"kind": "note", "status": "uncheckable", "reason": "reference is non-verifiable note text"}
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _classify_evidence_reference(reference: str, *, root: Path) -> tuple[str, str]:
|
|
402
|
+
text = str(reference).strip()
|
|
403
|
+
lower = text.lower()
|
|
404
|
+
if lower.startswith("tool:"):
|
|
405
|
+
return "tool", lower.split(":", 1)[1].strip()
|
|
406
|
+
if lower.startswith("cmd:"):
|
|
407
|
+
return "command", _normalize_command(text.split(":", 1)[1].strip())
|
|
408
|
+
if _looks_like_command(text):
|
|
409
|
+
return "command", _normalize_command(text)
|
|
410
|
+
if _evidence_reference_path(text, root) is not None:
|
|
411
|
+
return "path", text
|
|
412
|
+
return "note", ""
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _evidence_reference_path(reference: str, root: Path) -> Path | None:
|
|
416
|
+
text = str(reference).strip()
|
|
417
|
+
if not text or text.startswith(("http://", "https://")):
|
|
418
|
+
return None
|
|
419
|
+
|
|
420
|
+
path_text = text.split("#", 1)[0].strip()
|
|
421
|
+
if " " in path_text:
|
|
422
|
+
first_token = path_text.split(None, 1)[0].strip()
|
|
423
|
+
if first_token and (
|
|
424
|
+
any(sep in first_token for sep in ("/", "\\"))
|
|
425
|
+
or first_token.startswith((".", "~"))
|
|
426
|
+
):
|
|
427
|
+
path_text = first_token
|
|
428
|
+
path_text = re.sub(r":\d+(?::\d+|-\d+)?$", "", path_text).strip()
|
|
429
|
+
path_text = path_text.rstrip(".,;:")
|
|
430
|
+
if not path_text:
|
|
431
|
+
return None
|
|
432
|
+
if not any(sep in path_text for sep in ("/", "\\")) and not path_text.startswith((".", "~")):
|
|
433
|
+
return None
|
|
434
|
+
|
|
435
|
+
path = Path(path_text).expanduser()
|
|
436
|
+
if not path.is_absolute():
|
|
437
|
+
path = root / path
|
|
438
|
+
return path.resolve()
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _looks_like_command(text: str) -> bool:
|
|
442
|
+
return bool(
|
|
443
|
+
re.search(
|
|
444
|
+
r"\b(pytest|npm|pnpm|yarn|npx|ruff|mypy|go test|cargo test|python\s+-m)\b",
|
|
445
|
+
text.lower(),
|
|
446
|
+
)
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _normalize_command(text: str) -> str:
|
|
451
|
+
value = text.strip().strip("`").lower()
|
|
452
|
+
value = re.sub(r"[.,;:!?]+$", "", value).strip()
|
|
453
|
+
value = re.sub(r"\s+", " ", value)
|
|
454
|
+
value = re.sub(r"[:\-]\s*(ok|pass|passed|success|succeeded)$", "", value).strip()
|
|
455
|
+
return value
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _command_claim_matches(claim: str, observed: str) -> bool:
|
|
459
|
+
claim_tokens = _command_tokens(claim)
|
|
460
|
+
observed_tokens = _command_tokens(observed)
|
|
461
|
+
if not claim_tokens or not observed_tokens:
|
|
462
|
+
return False
|
|
463
|
+
return claim_tokens == observed_tokens or (
|
|
464
|
+
len(claim_tokens) <= len(observed_tokens) and observed_tokens[: len(claim_tokens)] == claim_tokens
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _command_tokens(command: str) -> tuple[str, ...]:
|
|
469
|
+
normalized = _normalize_command(command)
|
|
470
|
+
if not normalized:
|
|
471
|
+
return ()
|
|
472
|
+
try:
|
|
473
|
+
raw_tokens = shlex.split(normalized)
|
|
474
|
+
except ValueError:
|
|
475
|
+
raw_tokens = normalized.split()
|
|
476
|
+
tokens = [token.strip().lower() for token in raw_tokens if token.strip()]
|
|
477
|
+
if not tokens:
|
|
478
|
+
return ()
|
|
479
|
+
|
|
480
|
+
while True:
|
|
481
|
+
if len(tokens) >= 3 and tokens[0] in {"bash", "sh"} and tokens[1] == "-lc":
|
|
482
|
+
return _command_tokens(" ".join(tokens[2:]))
|
|
483
|
+
if len(tokens) >= 2 and tuple(tokens[:2]) in {("uv", "run"), ("poetry", "run"), ("pipenv", "run")}:
|
|
484
|
+
tokens = tokens[2:]
|
|
485
|
+
continue
|
|
486
|
+
if len(tokens) >= 3 and tokens[0] in {"python", "python3", "py"} and tokens[1:3] == ["-m", "pytest"]:
|
|
487
|
+
tokens = ["pytest", *tokens[3:]]
|
|
488
|
+
continue
|
|
489
|
+
break
|
|
490
|
+
|
|
491
|
+
return tuple(tokens)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _normalize_modified_file_claims(value: Any, *, root: Path) -> list[str]:
|
|
495
|
+
return _unique_list(
|
|
496
|
+
[normalized for raw in _as_string_list(value) if (normalized := _normalize_repo_relative_path(raw, root=root))]
|
|
497
|
+
)
|