ai-collab-open-system 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.aict/START_HERE.md +127 -0
- package/.aict/WORKSPACE_MANIFEST.json +91 -0
- package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
- package/.aict/acceptance/FAILURE_MODES.md +40 -0
- package/.aict/acceptance/PROMPT.md +47 -0
- package/.aict/acceptance/README.md +44 -0
- package/.aict/acceptance/TEMPLATE.md +57 -0
- package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
- package/.aict/adapters/claude-code/ADAPTER.md +28 -0
- package/.aict/adapters/cline/ADAPTER.md +28 -0
- package/.aict/adapters/codex/ADAPTER.md +28 -0
- package/.aict/adapters/copilot/ADAPTER.md +28 -0
- package/.aict/adapters/cursor/ADAPTER.md +28 -0
- package/.aict/adapters/windsurf/ADAPTER.md +28 -0
- package/.aict/context/EXAMPLE.synthetic.md +53 -0
- package/.aict/context/FAILURE_MODES.md +40 -0
- package/.aict/context/PROMPT.md +47 -0
- package/.aict/context/README.md +44 -0
- package/.aict/context/TEMPLATE.md +63 -0
- package/.aict/cookbook/README.md +8 -0
- package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
- package/.aict/cookbook/connect-a-tool.md +67 -0
- package/.aict/cookbook/review-a-half-product.md +79 -0
- package/.aict/cookbook/run-a-first-loop.md +81 -0
- package/.aict/examples/README.md +21 -0
- package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
- package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
- package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
- package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
- package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
- package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
- package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
- package/.aict/examples/content-production-harvest/CASE.md +87 -0
- package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
- package/.aict/guard/EXAMPLE.synthetic.md +51 -0
- package/.aict/guard/FAILURE_MODES.md +40 -0
- package/.aict/guard/PROMPT.md +47 -0
- package/.aict/guard/README.md +44 -0
- package/.aict/guard/TEMPLATE.md +60 -0
- package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
- package/.aict/handoff/FAILURE_MODES.md +40 -0
- package/.aict/handoff/PROMPT.md +47 -0
- package/.aict/handoff/README.md +44 -0
- package/.aict/handoff/TEMPLATE.md +60 -0
- package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
- package/.aict/harvest/FAILURE_MODES.md +40 -0
- package/.aict/harvest/PROMPT.md +47 -0
- package/.aict/harvest/README.md +44 -0
- package/.aict/harvest/TEMPLATE.md +60 -0
- package/.aict/mechanisms/README.md +34 -0
- package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
- package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
- package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
- package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
- package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
- package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
- package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
- package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
- package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
- package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
- package/.aict/mechanisms/collaboration-coach/README.md +79 -0
- package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
- package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
- package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
- package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
- package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
- package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
- package/.aict/mechanisms/dual-guard/README.md +81 -0
- package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
- package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
- package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
- package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
- package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
- package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
- package/.aict/mechanisms/half-product-review/README.md +30 -0
- package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
- package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
- package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
- package/.aict/mechanisms/handoff-abc/README.md +82 -0
- package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
- package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
- package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
- package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
- package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
- package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
- package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
- package/.aict/mechanisms/honest-calibration/README.md +81 -0
- package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
- package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
- package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
- package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
- package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
- package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
- package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
- package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
- package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
- package/.aict/mechanisms/root-cause-brake/README.md +79 -0
- package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
- package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
- package/.aict/mechanisms/scout-review-controller/README.md +30 -0
- package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
- package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
- package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
- package/.aict/mechanisms/single-tool-guard/README.md +83 -0
- package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
- package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
- package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
- package/.aict/mechanisms/task-splitting/README.md +79 -0
- package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
- package/.aict/modes/README.md +11 -0
- package/.aict/modes/execute.md +31 -0
- package/.aict/modes/handoff.md +29 -0
- package/.aict/modes/harvest.md +30 -0
- package/.aict/modes/review.md +28 -0
- package/.aict/modes/shape.md +34 -0
- package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
- package/.aict/privacy/PRIVACY.md +36 -0
- package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
- package/.aict/profile/CANDIDATES.md +44 -0
- package/.aict/profile/EXAMPLE.synthetic.md +49 -0
- package/.aict/profile/FAILURE_MODES.md +40 -0
- package/.aict/profile/PROMPT.md +47 -0
- package/.aict/profile/README.md +44 -0
- package/.aict/profile/TEMPLATE.md +57 -0
- package/.aict/prompts/acceptance-definition.md +109 -0
- package/.aict/prompts/guard-review.md +116 -0
- package/.aict/prompts/handoff-generation.md +110 -0
- package/.aict/prompts/harvest-extraction.md +110 -0
- package/.aict/prompts/mode-switching.md +66 -0
- package/.aict/prompts/profile-creation.md +66 -0
- package/.aict/prompts/profile-refinement.md +66 -0
- package/.aict/prompts/project-context-packaging.md +113 -0
- package/.aict/prompts/red-team-challenge.md +106 -0
- package/.aict/prompts/rule-update-proposal.md +114 -0
- package/.aict/prompts/workflow-reset.md +109 -0
- package/.aict/roles/README.md +18 -0
- package/.aict/roles/executor.md +34 -0
- package/.aict/roles/harvester.md +33 -0
- package/.aict/roles/owner-controller.md +38 -0
- package/.aict/roles/scout.md +33 -0
- package/.aict/roles/supervisor.md +34 -0
- package/.aict/roles/system-guardian.md +34 -0
- package/.aict/skills/acceptance/SKILL.md +43 -0
- package/.aict/skills/context/SKILL.md +44 -0
- package/.aict/skills/evidence-pack/SKILL.md +42 -0
- package/.aict/skills/guard/SKILL.md +46 -0
- package/.aict/skills/handoff/SKILL.md +44 -0
- package/.aict/skills/harvest/SKILL.md +44 -0
- package/.aict/skills/mode-switch/SKILL.md +42 -0
- package/.aict/skills/profile/SKILL.md +42 -0
- package/.aict/skills/red-team/SKILL.md +42 -0
- package/.aict/skills/single-tool-guard/SKILL.md +42 -0
- package/.aict/state/CURRENT_STATE.md +13 -0
- package/.aict/state/DECISIONS.md +7 -0
- package/.aict/state/TASK_LOG.md +7 -0
- package/.aict/state/evidence.jsonl +2 -0
- package/.aict/state/learning-ledger.jsonl +1 -0
- package/.aict/state/receipts.jsonl +1 -0
- package/.aict/state/runs.jsonl +1 -0
- package/.aict/state/tasks.jsonl +1 -0
- package/.aict/walkthroughs/10-minute-your-task.md +107 -0
- package/.aict/walkthroughs/10-minute.md +43 -0
- package/.aict/walkthroughs/30-minute.md +22 -0
- package/.aict/walkthroughs/60-minute.md +27 -0
- package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
- package/CHANGELOG.md +23 -0
- package/CODE_OF_CONDUCT.md +20 -0
- package/CONTRIBUTING.md +30 -0
- package/KNOWN_LIMITATIONS.md +54 -0
- package/LICENSE +199 -0
- package/PRODUCT_CONTRACT.md +446 -0
- package/README.md +245 -0
- package/RELEASE_CHECKLIST.md +78 -0
- package/SECURITY.md +56 -0
- package/START_HERE.md +89 -0
- package/bin/ai-collab.js +2 -0
- package/docs/DOGFOOD.md +85 -0
- package/docs/FEEDBACK.md +61 -0
- package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
- package/docs/FREE_VS_PAID.md +53 -0
- package/docs/PUBLIC_BOUNDARY.md +36 -0
- package/docs/PUBLIC_MAPPING.md +178 -0
- package/docs/RELEASE_PRIORITY.md +23 -0
- package/docs/WHY_THIS_EXISTS.md +36 -0
- package/docs/open-system/00-start-here.md +60 -0
- package/docs/open-system/01-ai-collaboration-os.md +33 -0
- package/docs/open-system/02-six-layer-architecture.md +45 -0
- package/docs/open-system/03-role-system.md +33 -0
- package/docs/open-system/04-core-mechanisms.md +34 -0
- package/docs/open-system/05-failure-patterns.md +31 -0
- package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
- package/package.json +69 -0
- package/privacy-manifest.json +78 -0
- package/privacy-scan.local.json.example +18 -0
- package/scripts/lib/forbidden-in-pack.js +55 -0
- package/scripts/pack-check.js +154 -0
- package/scripts/privacy-scan.js +487 -0
- package/scripts/validate-contract.js +160 -0
- package/src/adapters.js +590 -0
- package/src/bootstrap.js +1184 -0
- package/src/catalog.js +2723 -0
- package/src/cli.js +2899 -0
- package/src/dialogue.js +470 -0
- package/src/i18n.js +1034 -0
- package/src/ledger.js +2011 -0
- package/src/render.js +1381 -0
- package/src/sendmodel.js +452 -0
- package/src/validate.js +1307 -0
- package/src/workspace.js +1679 -0
- package/tests/contract.test.js +8514 -0
package/src/validate.js
ADDED
|
@@ -0,0 +1,1307 @@
|
|
|
1
|
+
import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import {
|
|
4
|
+
mechanismDefinitions,
|
|
5
|
+
requiredAdapterIds,
|
|
6
|
+
requiredCaseIds,
|
|
7
|
+
requiredMechanismIds,
|
|
8
|
+
requiredPromptFiles,
|
|
9
|
+
requiredSkillIds,
|
|
10
|
+
requiredWorkspaceDirs
|
|
11
|
+
} from "./catalog.js";
|
|
12
|
+
import {
|
|
13
|
+
parseLedgerFile,
|
|
14
|
+
ledgerPath,
|
|
15
|
+
TASK_STATUSES,
|
|
16
|
+
GUARD_LEVELS,
|
|
17
|
+
RECEIPT_VERDICTS,
|
|
18
|
+
REVIEW_MODES,
|
|
19
|
+
doneRequiresEvidence,
|
|
20
|
+
ownedEvidenceIds,
|
|
21
|
+
ownedRerunEvidenceIds,
|
|
22
|
+
ownedCrossFamilyGuardEvidenceIds,
|
|
23
|
+
guardLevelVerdictError,
|
|
24
|
+
guardLevelRank,
|
|
25
|
+
computeReceiptGuardLevel,
|
|
26
|
+
ownerAcceptanceError,
|
|
27
|
+
receiptStatusFor,
|
|
28
|
+
specialEvidenceStructureError,
|
|
29
|
+
rerunRunReconcileError,
|
|
30
|
+
learningRecordError,
|
|
31
|
+
EVIDENCE_KIND_RERUN,
|
|
32
|
+
RECEIPT_STATUSES
|
|
33
|
+
} from "./ledger.js";
|
|
34
|
+
|
|
35
|
+
// True for a rerun evidence row that carries a (present, non-blank) runId — the
|
|
36
|
+
// rows the L4 reconciliation read-check (2c) inspects. A rerun with no runId is a
|
|
37
|
+
// valid generic rerun that simply cannot reach L4, so it is intentionally excluded.
|
|
38
|
+
function isRerunWithRunId(record) {
|
|
39
|
+
return (
|
|
40
|
+
record != null &&
|
|
41
|
+
typeof record === "object" &&
|
|
42
|
+
record.kind === EVIDENCE_KIND_RERUN &&
|
|
43
|
+
typeof record.runId === "string" &&
|
|
44
|
+
record.runId.trim().length > 0
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function read(file) {
|
|
49
|
+
return readFileSync(file, "utf8");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function exists(root, ...parts) {
|
|
53
|
+
return existsSync(path.join(root, ...parts));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function requireFile(errors, root, ...parts) {
|
|
57
|
+
const file = path.join(root, ...parts);
|
|
58
|
+
if (!existsSync(file) || statSync(file).isDirectory()) {
|
|
59
|
+
errors.push(`missing file ${path.relative(root, file)}`);
|
|
60
|
+
return "";
|
|
61
|
+
}
|
|
62
|
+
return read(file);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function requireDir(errors, root, ...parts) {
|
|
66
|
+
const dir = path.join(root, ...parts);
|
|
67
|
+
if (!existsSync(dir) || !statSync(dir).isDirectory()) {
|
|
68
|
+
errors.push(`missing directory ${path.relative(root, dir)}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function includesAll(errors, label, content, phrases) {
|
|
73
|
+
for (const phrase of phrases) {
|
|
74
|
+
if (!new RegExp(phrase, "i").test(content)) {
|
|
75
|
+
errors.push(`${label} missing ${phrase}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// --- Deep-validation helpers ----------------------------------------------
|
|
81
|
+
//
|
|
82
|
+
// These power the structural depth checks (P2): they look past "the file
|
|
83
|
+
// exists and contains keyword X" into "this file actually carries the
|
|
84
|
+
// substance a real workspace would". Each helper is intentionally cheap and
|
|
85
|
+
// deterministic so a degraded workspace fails loudly with a pointable reason.
|
|
86
|
+
|
|
87
|
+
const MECHANISM_BY_ID = new Map(mechanismDefinitions.map((mechanism) => [mechanism.id, mechanism]));
|
|
88
|
+
|
|
89
|
+
// Mechanisms whose catalog entry carries the deepened 9-element shape
|
|
90
|
+
// (antiTrigger + inputsDetailed + outputShape + passBar + rejectBar + misuse).
|
|
91
|
+
// Their rendered README therefore must expose those structural anchors; a thin
|
|
92
|
+
// mechanism that only has Purpose/When/Input/Process/Package files is fine for
|
|
93
|
+
// the lighter mechanisms but a regression for these.
|
|
94
|
+
const DEEP_MECHANISM_README_ANCHORS = [
|
|
95
|
+
"## When not to use",
|
|
96
|
+
"## Input materials",
|
|
97
|
+
"## Output shape",
|
|
98
|
+
"## Pass bar",
|
|
99
|
+
"## Reject bar",
|
|
100
|
+
"## Common misuse"
|
|
101
|
+
];
|
|
102
|
+
|
|
103
|
+
function isDeepMechanism(mechanismId) {
|
|
104
|
+
const mechanism = MECHANISM_BY_ID.get(mechanismId);
|
|
105
|
+
if (!mechanism) return false;
|
|
106
|
+
return Boolean(
|
|
107
|
+
mechanism.antiTrigger &&
|
|
108
|
+
mechanism.inputsDetailed &&
|
|
109
|
+
mechanism.outputShape &&
|
|
110
|
+
mechanism.passBar &&
|
|
111
|
+
mechanism.rejectBar &&
|
|
112
|
+
mechanism.misuse
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function readIfPresent(root, ...parts) {
|
|
117
|
+
const file = path.join(root, ...parts);
|
|
118
|
+
if (!existsSync(file) || statSync(file).isDirectory()) return null;
|
|
119
|
+
return read(file);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function countFences(content) {
|
|
123
|
+
return (content.match(/^```/gm) ?? []).length;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function nonEmptyLines(content) {
|
|
127
|
+
return content.split("\n").filter((line) => line.trim().length > 0);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// "Substance" lines = non-empty lines that are not part of the fixed artifact
|
|
131
|
+
// scaffold (the title, the standard `## ` section headings, fence markers, and
|
|
132
|
+
// the boilerplate "Why this exists" trailer). This is what separates a real
|
|
133
|
+
// filled artifact from a hollowed-out template that still keeps its headings.
|
|
134
|
+
function substanceLines(content) {
|
|
135
|
+
return content
|
|
136
|
+
.split("\n")
|
|
137
|
+
.map((line) => line.trim())
|
|
138
|
+
.filter((line) => line.length > 0)
|
|
139
|
+
.filter((line) => !line.startsWith("#"))
|
|
140
|
+
.filter((line) => !line.startsWith("```"))
|
|
141
|
+
.filter((line) => !/^This artifact makes the case runnable and reviewable\./i.test(line));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Level-2 headings, lower-cased, in document order — used both to detect
|
|
145
|
+
// duplicated stacked sections inside one file and to compare across cases.
|
|
146
|
+
function level2Headings(content) {
|
|
147
|
+
const headings = [];
|
|
148
|
+
for (const match of content.matchAll(/^##\s+(.+?)\s*$/gm)) {
|
|
149
|
+
headings.push(match[1].trim().toLowerCase());
|
|
150
|
+
}
|
|
151
|
+
return headings;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// A normalized signature of a case CASE.md body with the per-case unique text
|
|
155
|
+
// (code fences) stripped, used to catch "same boilerplate copied across N
|
|
156
|
+
// cases". Two genuinely different cases share headings but differ in prose; a
|
|
157
|
+
// copy-paste clone collapses to (near) the same signature.
|
|
158
|
+
function caseBodySignature(content) {
|
|
159
|
+
return content
|
|
160
|
+
.replace(/```[\s\S]*?```/g, " ")
|
|
161
|
+
.toLowerCase()
|
|
162
|
+
.replace(/[^a-z0-9]+/g, " ")
|
|
163
|
+
.split(/\s+/)
|
|
164
|
+
.filter((token) => token.length > 3)
|
|
165
|
+
.join(" ")
|
|
166
|
+
.trim();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ===========================================================================
|
|
170
|
+
// Deep structural validation (P2), refactored into one named sub-function per
|
|
171
|
+
// numbered block. Unlike the ledger checks (which return error arrays), the
|
|
172
|
+
// deep blocks interleave `tick()` calls inside their loops and write to BOTH
|
|
173
|
+
// `errors` and `warnings`, so each block keeps the original passthrough shape:
|
|
174
|
+
// it receives the shared `errors` / `warnings` / `tick` and performs its own
|
|
175
|
+
// ticks at the EXACT points the inline block did. The split is mechanical
|
|
176
|
+
// (extract-method) — every pushed string, every tick, and their order are
|
|
177
|
+
// byte-for-byte the pre-refactor behavior; the deepValidate orchestrator just
|
|
178
|
+
// calls the blocks in sequence and threads the values they share (the parsed
|
|
179
|
+
// manifest, the flagship artifact bodies, the case-dir list). Each is exported
|
|
180
|
+
// so a unit test can exercise one structural block in isolation too.
|
|
181
|
+
// ===========================================================================
|
|
182
|
+
|
|
183
|
+
// (1) Manifest really exists, parses, and its declared files/dirs are real.
|
|
184
|
+
// Returns the parsed manifest (or null) so check (2) can reuse it without a
|
|
185
|
+
// second parse — exactly the data flow of the original inline blocks.
|
|
186
|
+
export function deepCheckManifest(workspace, errors, tick) {
|
|
187
|
+
tick();
|
|
188
|
+
const manifestRaw = readIfPresent(workspace, "WORKSPACE_MANIFEST.json");
|
|
189
|
+
let manifest = null;
|
|
190
|
+
if (manifestRaw === null) {
|
|
191
|
+
errors.push("manifest WORKSPACE_MANIFEST.json is missing");
|
|
192
|
+
} else {
|
|
193
|
+
try {
|
|
194
|
+
manifest = JSON.parse(manifestRaw);
|
|
195
|
+
} catch (parseError) {
|
|
196
|
+
errors.push(`manifest WORKSPACE_MANIFEST.json is not valid JSON (${parseError.message})`);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (manifest) {
|
|
201
|
+
for (const field of ["name", "workspaceDirs", "layers", "mechanisms", "prompts", "skills", "adapters", "syntheticCases"]) {
|
|
202
|
+
if (manifest[field] === undefined) errors.push(`manifest missing field "${field}"`);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Every directory the manifest declares must actually exist on disk.
|
|
206
|
+
if (Array.isArray(manifest.workspaceDirs)) {
|
|
207
|
+
for (const dir of manifest.workspaceDirs) {
|
|
208
|
+
if (!exists(workspace, dir) || !statSync(path.join(workspace, dir)).isDirectory()) {
|
|
209
|
+
errors.push(`manifest declares directory "${dir}" but it is missing on disk`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Manifest-listed mechanism / prompt / skill / adapter / case assets exist.
|
|
215
|
+
if (Array.isArray(manifest.mechanisms)) {
|
|
216
|
+
for (const mechanism of manifest.mechanisms) {
|
|
217
|
+
if (!exists(workspace, "mechanisms", mechanism, "README.md")) {
|
|
218
|
+
errors.push(`manifest lists mechanism "${mechanism}" but mechanisms/${mechanism}/README.md is missing`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
if (Array.isArray(manifest.prompts)) {
|
|
223
|
+
for (const prompt of manifest.prompts) {
|
|
224
|
+
if (!exists(workspace, "prompts", prompt)) {
|
|
225
|
+
errors.push(`manifest lists prompt "${prompt}" but prompts/${prompt} is missing`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
if (Array.isArray(manifest.skills)) {
|
|
230
|
+
for (const skill of manifest.skills) {
|
|
231
|
+
if (!exists(workspace, "skills", skill, "SKILL.md")) {
|
|
232
|
+
errors.push(`manifest lists skill "${skill}" but skills/${skill}/SKILL.md is missing`);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
if (Array.isArray(manifest.adapters)) {
|
|
237
|
+
for (const adapter of manifest.adapters) {
|
|
238
|
+
if (!exists(workspace, "adapters", adapter, "ADAPTER.md")) {
|
|
239
|
+
errors.push(`manifest lists adapter "${adapter}" but adapters/${adapter}/ADAPTER.md is missing`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
if (Array.isArray(manifest.syntheticCases)) {
|
|
244
|
+
for (const caseId of manifest.syntheticCases) {
|
|
245
|
+
if (!exists(workspace, "examples", caseId, "CASE.md")) {
|
|
246
|
+
errors.push(`manifest lists case "${caseId}" but examples/${caseId}/CASE.md is missing`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return manifest;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// (2) Declared workspaceDirs == the actual governance dir set under .aict/.
|
|
256
|
+
// "Governance" dirs = the canonical set the manifest is supposed to own;
|
|
257
|
+
// walkthroughs/ is generated but intentionally not a manifest dir, so it
|
|
258
|
+
// is excluded from the equality check rather than reported as extra.
|
|
259
|
+
export function deepCheckWorkspaceDirs(workspace, manifest, errors, tick) {
|
|
260
|
+
tick();
|
|
261
|
+
if (manifest && Array.isArray(manifest.workspaceDirs)) {
|
|
262
|
+
const declared = new Set(manifest.workspaceDirs);
|
|
263
|
+
const actualDirs = readdirSync(workspace, { withFileTypes: true })
|
|
264
|
+
.filter((entry) => entry.isDirectory())
|
|
265
|
+
.map((entry) => entry.name);
|
|
266
|
+
const nonManifestDirs = new Set(["walkthroughs"]);
|
|
267
|
+
|
|
268
|
+
for (const dir of actualDirs) {
|
|
269
|
+
if (!declared.has(dir) && !nonManifestDirs.has(dir)) {
|
|
270
|
+
errors.push(`directory "${dir}" exists under .aict/ but is not declared in manifest.workspaceDirs`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
for (const dir of declared) {
|
|
274
|
+
if (!actualDirs.includes(dir)) {
|
|
275
|
+
errors.push(`manifest.workspaceDirs declares "${dir}" but no such directory exists`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
// Cross-check against the generator's own canonical list so a manifest that
|
|
279
|
+
// was hand-edited away from the generator is caught too.
|
|
280
|
+
for (const dir of requiredWorkspaceDirs) {
|
|
281
|
+
if (!declared.has(dir)) errors.push(`manifest.workspaceDirs is missing canonical dir "${dir}"`);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// (3) Mechanism schema completeness: 5 files each, and deepened mechanisms
|
|
287
|
+
// keep their 9-element README structure anchors. (Ticks once per mechanism,
|
|
288
|
+
// matching the inline loop.)
|
|
289
|
+
export function deepCheckMechanismSchema(workspace, errors, tick) {
|
|
290
|
+
for (const mechanism of requiredMechanismIds) {
|
|
291
|
+
tick();
|
|
292
|
+
const readme = readIfPresent(workspace, "mechanisms", mechanism, "README.md");
|
|
293
|
+
if (readme === null) {
|
|
294
|
+
// requireFile in the base pass already reports the missing file; skip.
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
if (isDeepMechanism(mechanism)) {
|
|
298
|
+
for (const anchor of DEEP_MECHANISM_README_ANCHORS) {
|
|
299
|
+
if (!readme.includes(anchor)) {
|
|
300
|
+
errors.push(`mechanisms/${mechanism}/README.md lost deepened structure anchor "${anchor}"`);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// (4) Flagship lab completeness: the 11-step artifact chain is present, the
|
|
308
|
+
// causal-chain trio exists, and guard-review really cites first-ai-output
|
|
309
|
+
// line numbers (not just mentions the filename). Returns the three
|
|
310
|
+
// load-bearing artifact bodies (firstAi/guard/revised) + flagshipId so the
|
|
311
|
+
// depth block (5) can reuse them without re-reading — the original flow.
|
|
312
|
+
// This function performs BOTH the presence tick AND the causal-chain tick,
|
|
313
|
+
// in that order, exactly as the inline code did.
|
|
314
|
+
export function deepCheckFlagship(workspace, errors, warnings, tick) {
|
|
315
|
+
tick();
|
|
316
|
+
const flagshipId = "ai-coding-long-task";
|
|
317
|
+
const flagshipArtifactsDir = path.join(workspace, "examples", flagshipId, "artifacts");
|
|
318
|
+
const flagshipArtifacts = [
|
|
319
|
+
"context-package.md",
|
|
320
|
+
"acceptance-card.md",
|
|
321
|
+
"execution-prompt.md",
|
|
322
|
+
"first-ai-output.md",
|
|
323
|
+
"guard-review.md",
|
|
324
|
+
"revised-output.md",
|
|
325
|
+
"handoff-note.md",
|
|
326
|
+
"harvest-seed.md"
|
|
327
|
+
];
|
|
328
|
+
// 11 environment "rungs" of the loop = case file + the 8 artifacts + the
|
|
329
|
+
// case's two narrative proof surfaces (raw-input/baseline). We assert the
|
|
330
|
+
// load-bearing artifacts directly.
|
|
331
|
+
if (!exists(workspace, "examples", flagshipId, "CASE.md")) {
|
|
332
|
+
errors.push(`flagship examples/${flagshipId}/CASE.md is missing`);
|
|
333
|
+
}
|
|
334
|
+
for (const artifact of flagshipArtifacts) {
|
|
335
|
+
if (!existsSync(path.join(flagshipArtifactsDir, artifact))) {
|
|
336
|
+
errors.push(`flagship artifact examples/${flagshipId}/artifacts/${artifact} is missing`);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
const firstAi = readIfPresent(workspace, "examples", flagshipId, "artifacts", "first-ai-output.md");
|
|
341
|
+
const guard = readIfPresent(workspace, "examples", flagshipId, "artifacts", "guard-review.md");
|
|
342
|
+
const revised = readIfPresent(workspace, "examples", flagshipId, "artifacts", "revised-output.md");
|
|
343
|
+
|
|
344
|
+
// Causal link: guard-review must reference first-ai-output.md AND quote a line
|
|
345
|
+
// range (the cited onKeyDown stub), proving it actually reviewed the code, not
|
|
346
|
+
// just name-dropped the file.
|
|
347
|
+
tick();
|
|
348
|
+
if (guard !== null) {
|
|
349
|
+
if (!/first-ai-output\.md/i.test(guard)) {
|
|
350
|
+
errors.push(`flagship guard-review.md does not reference first-ai-output.md (causal chain broken)`);
|
|
351
|
+
}
|
|
352
|
+
const lineRefs = guard.match(/\blines?\s+\d+(?:\s*-\s*\d+)?/gi) ?? [];
|
|
353
|
+
if (lineRefs.length === 0) {
|
|
354
|
+
errors.push(`flagship guard-review.md cites no line numbers from first-ai-output.md (cannot prove it reviewed the code)`);
|
|
355
|
+
}
|
|
356
|
+
if (!/\blines?\s+27\s*-\s*30\b/i.test(guard)) {
|
|
357
|
+
warnings.push(`flagship guard-review.md no longer cites the onKeyDown stub at lines 27-30 (causal-chain anchor weakened)`);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return { flagshipId, firstAi, guard, revised };
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// (5) Minimum artifact depth: the causal-chain artifacts must carry real
|
|
365
|
+
// structural substance (code fences, a verdict, evidence), not just a
|
|
366
|
+
// template header + one sentence. Then every ordinary case artifact must
|
|
367
|
+
// clear a minimum substance floor. Returns caseDirs (computed here, reused
|
|
368
|
+
// by check 9). Performs the three flagship-depth ticks plus one tick per
|
|
369
|
+
// ordinary case artifact, in the original order.
|
|
370
|
+
export function deepCheckArtifactDepth(workspace, firstAi, guard, revised, errors, tick) {
|
|
371
|
+
tick();
|
|
372
|
+
if (firstAi !== null) {
|
|
373
|
+
if (countFences(firstAi) < 2) {
|
|
374
|
+
errors.push(`flagship first-ai-output.md has no fenced code block (boilerplate, not a runnable artifact)`);
|
|
375
|
+
}
|
|
376
|
+
if (!/completion claim/i.test(firstAi)) {
|
|
377
|
+
errors.push(`flagship first-ai-output.md is missing the completion claim it is supposed to expose`);
|
|
378
|
+
}
|
|
379
|
+
if (substanceLines(firstAi).length < 12) {
|
|
380
|
+
errors.push(`flagship first-ai-output.md is too thin (${substanceLines(firstAi).length} substance lines; looks like boilerplate)`);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
tick();
|
|
384
|
+
if (guard !== null) {
|
|
385
|
+
if (!/verdict/i.test(guard)) {
|
|
386
|
+
errors.push(`flagship guard-review.md has no verdict (a review without a verdict is boilerplate)`);
|
|
387
|
+
}
|
|
388
|
+
if (!/evidence/i.test(guard)) {
|
|
389
|
+
errors.push(`flagship guard-review.md cites no evidence section`);
|
|
390
|
+
}
|
|
391
|
+
if (substanceLines(guard).length < 8) {
|
|
392
|
+
errors.push(`flagship guard-review.md is too thin (${substanceLines(guard).length} substance lines; looks like boilerplate)`);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
tick();
|
|
396
|
+
if (revised !== null) {
|
|
397
|
+
if (countFences(revised) < 2) {
|
|
398
|
+
errors.push(`flagship revised-output.md has no fenced code block (the fix is not actually shown)`);
|
|
399
|
+
}
|
|
400
|
+
if (!/Arrow(?:Up|Down)/.test(revised) || !/moveTask/.test(revised)) {
|
|
401
|
+
errors.push(`flagship revised-output.md does not show the keyboard reorder fix (ArrowUp/Down -> moveTask)`);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Every ordinary case artifact must clear a minimum substance floor so a case
|
|
406
|
+
// cannot be hollowed into the bare scaffold. The scaffold itself contributes
|
|
407
|
+
// ~0 substance lines (title + headings + trailer are all stripped), so the
|
|
408
|
+
// synthetic content body is what is measured here.
|
|
409
|
+
const caseDirs = exists(workspace, "examples")
|
|
410
|
+
? readdirSync(path.join(workspace, "examples"), { withFileTypes: true })
|
|
411
|
+
.filter((entry) => entry.isDirectory())
|
|
412
|
+
.map((entry) => entry.name)
|
|
413
|
+
: [];
|
|
414
|
+
for (const caseId of caseDirs) {
|
|
415
|
+
const artifactsDir = path.join(workspace, "examples", caseId, "artifacts");
|
|
416
|
+
if (!existsSync(artifactsDir)) continue;
|
|
417
|
+
for (const artifact of readdirSync(artifactsDir).filter((file) => file.endsWith(".md"))) {
|
|
418
|
+
tick();
|
|
419
|
+
const content = read(path.join(artifactsDir, artifact));
|
|
420
|
+
const substance = substanceLines(content);
|
|
421
|
+
// Floor of 2 substance lines: a real artifact has at least a "how to use"
|
|
422
|
+
// body + a "synthetic content" body + a review note. A gutted artifact
|
|
423
|
+
// (headings only, or one stub sentence) falls under this.
|
|
424
|
+
if (substance.length < 2) {
|
|
425
|
+
errors.push(`examples/${caseId}/artifacts/${artifact} is boilerplate-only (${substance.length} substance lines)`);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return caseDirs;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// (6) Cookbook recipes must carry the 8-element shape AND a real copy-paste
|
|
434
|
+
// block (a fenced code block), not just a prose outline. (Ticks once per
|
|
435
|
+
// recipe, matching the inline loop.)
|
|
436
|
+
export function deepCheckCookbook(workspace, errors, tick) {
|
|
437
|
+
const cookbookRecipes = ["run-a-first-loop.md", "connect-a-tool.md", "review-a-half-product.md"];
|
|
438
|
+
const cookbookElements = [
|
|
439
|
+
"When to use",
|
|
440
|
+
"Prerequisites",
|
|
441
|
+
"Steps",
|
|
442
|
+
"Copy-paste block",
|
|
443
|
+
"Expected output",
|
|
444
|
+
"Failure handling",
|
|
445
|
+
"Privacy note",
|
|
446
|
+
"Next step"
|
|
447
|
+
];
|
|
448
|
+
for (const recipe of cookbookRecipes) {
|
|
449
|
+
tick();
|
|
450
|
+
const content = readIfPresent(workspace, "cookbook", recipe);
|
|
451
|
+
if (content === null) continue; // base pass reports the missing file.
|
|
452
|
+
for (const element of cookbookElements) {
|
|
453
|
+
if (!new RegExp(element, "i").test(content)) {
|
|
454
|
+
errors.push(`cookbook/${recipe} missing operational element "${element}"`);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
if (countFences(content) < 2) {
|
|
458
|
+
errors.push(`cookbook/${recipe} has no copy-paste fenced block (empty outline, not a do-it recipe)`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// (8 entry) Entry path consistency: the generated START_HERE first screen must
|
|
464
|
+
// point at the real flagship loop surfaces the workspace ships, so a reader
|
|
465
|
+
// who follows it does not hit a dead link.
|
|
466
|
+
export function deepCheckEntryPath(workspace, flagshipId, errors, tick) {
|
|
467
|
+
tick();
|
|
468
|
+
const startHere = readIfPresent(workspace, "START_HERE.md");
|
|
469
|
+
if (startHere !== null) {
|
|
470
|
+
const previewTargets = [
|
|
471
|
+
["walkthroughs/10-minute.md", path.join(workspace, "walkthroughs", "10-minute.md")],
|
|
472
|
+
["examples/ai-coding-long-task/CASE.md", path.join(workspace, "examples", flagshipId, "CASE.md")]
|
|
473
|
+
];
|
|
474
|
+
for (const [label, target] of previewTargets) {
|
|
475
|
+
if (new RegExp(label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i").test(startHere) && !existsSync(target)) {
|
|
476
|
+
errors.push(`START_HERE points to "${label}" but that path does not exist in the workspace`);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
// The handoff preview line must reflect the accepted/post-revised state, not
|
|
480
|
+
// a stale "keyboard test pending" that contradicts revised-output.md.
|
|
481
|
+
const handoffLine = (startHere.match(/^Handoff:.*$/m) ?? [])[0] ?? "";
|
|
482
|
+
if (handoffLine && /keyboard[^.\n]*\b(pending|missing)\b|\b(pending|missing)\b[^.\n]*keyboard/i.test(handoffLine)) {
|
|
483
|
+
errors.push(`START_HERE "Handoff:" preview still describes keyboard work as pending/missing (contradicts the accepted revised output)`);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// (9) Duplicate-boilerplate detection: no level-2 heading repeats inside a
|
|
489
|
+
// single CASE.md, and no two cases collapse to the same body signature
|
|
490
|
+
// (the "same boilerplate copied N times" failure). Ticks once per case in
|
|
491
|
+
// the per-case loop, then one final tick for the cross-case comparison.
|
|
492
|
+
export function deepCheckDuplicateBoilerplate(workspace, caseDirs, errors, tick) {
|
|
493
|
+
const caseSignatures = [];
|
|
494
|
+
for (const caseId of caseDirs) {
|
|
495
|
+
tick();
|
|
496
|
+
const caseContent = readIfPresent(workspace, "examples", caseId, "CASE.md");
|
|
497
|
+
if (caseContent === null) continue;
|
|
498
|
+
|
|
499
|
+
const headings = level2Headings(caseContent);
|
|
500
|
+
const seen = new Map();
|
|
501
|
+
for (const heading of headings) {
|
|
502
|
+
seen.set(heading, (seen.get(heading) ?? 0) + 1);
|
|
503
|
+
}
|
|
504
|
+
for (const [heading, count] of seen) {
|
|
505
|
+
if (count >= 2) {
|
|
506
|
+
errors.push(`examples/${caseId}/CASE.md duplicates level-2 heading "## ${heading}" (${count}x; stacked boilerplate)`);
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
caseSignatures.push({ caseId, signature: caseBodySignature(caseContent) });
|
|
511
|
+
}
|
|
512
|
+
// Cross-case duplicate boilerplate: identical (or empty) signatures mean the
|
|
513
|
+
// synthetic prose was copied wholesale instead of being a distinct case.
|
|
514
|
+
tick();
|
|
515
|
+
for (let i = 0; i < caseSignatures.length; i += 1) {
|
|
516
|
+
for (let j = i + 1; j < caseSignatures.length; j += 1) {
|
|
517
|
+
const a = caseSignatures[i];
|
|
518
|
+
const b = caseSignatures[j];
|
|
519
|
+
if (a.signature.length > 0 && a.signature === b.signature) {
|
|
520
|
+
errors.push(`examples/${a.caseId}/CASE.md and examples/${b.caseId}/CASE.md are identical boilerplate copies`);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// Deep structural validation orchestrator (P2). Thin sequencer: runs each
|
|
527
|
+
// numbered block (above) in the EXACT original order, threading the values the
|
|
528
|
+
// blocks share (parsed manifest -> dir check; flagship artifact bodies -> depth
|
|
529
|
+
// check; case-dir list -> duplicate check). Tick order/count and the
|
|
530
|
+
// errors/warnings contents are unchanged from the pre-refactor inline body.
|
|
531
|
+
function deepValidate(workspace, errors, warnings, counters) {
|
|
532
|
+
const tick = () => {
|
|
533
|
+
counters.deepChecks += 1;
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
const manifest = deepCheckManifest(workspace, errors, tick); // (1)
|
|
537
|
+
deepCheckWorkspaceDirs(workspace, manifest, errors, tick); // (2)
|
|
538
|
+
deepCheckMechanismSchema(workspace, errors, tick); // (3)
|
|
539
|
+
const { flagshipId, firstAi, guard, revised } = deepCheckFlagship(workspace, errors, warnings, tick); // (4)
|
|
540
|
+
const caseDirs = deepCheckArtifactDepth(workspace, firstAi, guard, revised, errors, tick); // (5)
|
|
541
|
+
deepCheckCookbook(workspace, errors, tick); // (6)
|
|
542
|
+
deepCheckEntryPath(workspace, flagshipId, errors, tick); // (8 entry)
|
|
543
|
+
deepCheckDuplicateBoilerplate(workspace, caseDirs, errors, tick); // (9)
|
|
544
|
+
|
|
545
|
+
// ---------------------------------------------------------------------
|
|
546
|
+
// (7) PUBLIC_MAPPING coverage — scope note.
|
|
547
|
+
// docs/PUBLIC_MAPPING.md lives in the repo, NOT inside the .aict user
|
|
548
|
+
// workspace this validator inspects, so it is intentionally out of scope
|
|
549
|
+
// here and is covered by the contract test layer instead. No check is
|
|
550
|
+
// emitted; see the blind-spots note in the task report.
|
|
551
|
+
// ---------------------------------------------------------------------
|
|
552
|
+
|
|
553
|
+
// ---------------------------------------------------------------------
|
|
554
|
+
// (8) Run-layer ledger integrity (P1). The five JSONL ledgers under state/
|
|
555
|
+
// are the live substance of the run loop, so a degraded ledger (corrupt
|
|
556
|
+
// line, orphaned evidence, illegal status, broken reference, a "done"
|
|
557
|
+
// task with no evidence, an accepted receipt with no evidence) must fail
|
|
558
|
+
// loudly with a pointable reason — never silently accept inconsistent
|
|
559
|
+
// state. Reads go through the SAME ledger.js parser the CLI writes with,
|
|
560
|
+
// so the on-disk shape cannot drift between writer and reader.
|
|
561
|
+
// ---------------------------------------------------------------------
|
|
562
|
+
validateLedgers(workspace, errors, tick);
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// ===========================================================================
|
|
566
|
+
// Run-layer ledger validation — refactored into one named sub-function per
|
|
567
|
+
// numbered integrity check. Each `check*` function:
|
|
568
|
+
// - receives a parsed-ledger context `ctx` (the five record arrays plus the
|
|
569
|
+
// derived id Sets), reads only what it needs, and
|
|
570
|
+
// - RETURNS an array of error strings (never mutates shared state).
|
|
571
|
+
// The error strings are byte-for-byte identical to the pre-refactor inline
|
|
572
|
+
// blocks — this is a behavior-preserving split, NOT a wording change. The
|
|
573
|
+
// `validateLedgers` orchestrator calls them in the original order, ticks once
|
|
574
|
+
// per check exactly as before, and appends each returned array to `errors`.
|
|
575
|
+
// Exporting them lets a unit test feed one check a hand-built ledger directly
|
|
576
|
+
// (no whole-workspace round-trip), which is what closes the thin-coverage gaps
|
|
577
|
+
// the mutation tests surfaced.
|
|
578
|
+
// ===========================================================================
|
|
579
|
+
|
|
580
|
+
// Build the parsed-ledger context once. Checks 1 (bad JSONL) and 1b (per-ledger
|
|
581
|
+
// id integrity) are folded in here because they BOTH run during the parse pass
|
|
582
|
+
// in the original (1 emits parse errors per file; 1b walks each file's records),
|
|
583
|
+
// and they produce the parse `errors` the orchestrator ticks for. The returned
|
|
584
|
+
// object carries the parse/id errors (already ordered file-by-file) plus every
|
|
585
|
+
// derived value the later checks read, so no check re-parses or re-derives.
|
|
586
|
+
//
|
|
587
|
+
// Exported so a unit test can build the same context (from a temp state dir it
|
|
588
|
+
// populated with hand-crafted .jsonl rows) and feed an individual check
|
|
589
|
+
// directly — the precise, fast path the thin-coverage tests use instead of a
|
|
590
|
+
// whole validateWorkspace round-trip.
|
|
591
|
+
export function buildLedgerContext(stateDir) {
|
|
592
|
+
const LEDGER_KEYS = ["tasks", "evidence", "runs", "receipts", "learning"];
|
|
593
|
+
const parsed = {};
|
|
594
|
+
|
|
595
|
+
// (1) Bad JSONL: any non-empty line that does not parse as JSON, OR parses to
|
|
596
|
+
// a non-object (null / array / scalar), is an error with file + line number.
|
|
597
|
+
// The parser tags each error kind so a type error reads "record must be an
|
|
598
|
+
// object" (pointable) instead of crashing a later record.id access with a
|
|
599
|
+
// non-pointable TypeError. parseErrorsByKey[key] is the ordered list for that
|
|
600
|
+
// ledger so the orchestrator can tick + append per file, preserving order.
|
|
601
|
+
const parseErrorsByKey = {};
|
|
602
|
+
for (const key of LEDGER_KEYS) {
|
|
603
|
+
const file = ledgerPath(stateDir, key);
|
|
604
|
+
const { records, errors: parseErrors } = parseLedgerFile(file);
|
|
605
|
+
const fileErrors = [];
|
|
606
|
+
for (const parseError of parseErrors) {
|
|
607
|
+
const reason = parseError.kind === "type"
|
|
608
|
+
? parseError.message
|
|
609
|
+
: `is not valid JSON (${parseError.message})`;
|
|
610
|
+
fileErrors.push(`ledger ${path.basename(file)}:${parseError.line} ${reason}`);
|
|
611
|
+
}
|
|
612
|
+
parseErrorsByKey[key] = fileErrors;
|
|
613
|
+
parsed[key] = records;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
// (1b) Per-ledger id integrity: within ONE ledger every record needs a
|
|
617
|
+
// non-empty string id, and ids must be unique. Without this the cross-ledger
|
|
618
|
+
// Sets below silently fold duplicate ids into one entry (so a duplicate task
|
|
619
|
+
// id or a blank id passes unnoticed). Report each with a pointable file + id.
|
|
620
|
+
const idErrorsByKey = {};
|
|
621
|
+
for (const key of LEDGER_KEYS) {
|
|
622
|
+
const file = ledgerPath(stateDir, key);
|
|
623
|
+
const seen = new Set();
|
|
624
|
+
const fileErrors = [];
|
|
625
|
+
for (const record of parsed[key]) {
|
|
626
|
+
const id = record.id;
|
|
627
|
+
if (typeof id !== "string" || id.length === 0) {
|
|
628
|
+
fileErrors.push(`ledger ${path.basename(file)} has a record with a missing or non-string id`);
|
|
629
|
+
continue;
|
|
630
|
+
}
|
|
631
|
+
if (seen.has(id)) {
|
|
632
|
+
fileErrors.push(`ledger ${path.basename(file)} has duplicate id "${id}"`);
|
|
633
|
+
}
|
|
634
|
+
seen.add(id);
|
|
635
|
+
}
|
|
636
|
+
idErrorsByKey[key] = fileErrors;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
const tasks = parsed.tasks;
|
|
640
|
+
const evidence = parsed.evidence;
|
|
641
|
+
const receipts = parsed.receipts;
|
|
642
|
+
// The runs ledger is now load-bearing for the L4 gate: a rerun only counts
|
|
643
|
+
// toward L4 if it reconciles against a recorded run here (A1 L4 reconciliation).
|
|
644
|
+
const runs = parsed.runs;
|
|
645
|
+
const learning = parsed.learning;
|
|
646
|
+
|
|
647
|
+
return {
|
|
648
|
+
LEDGER_KEYS,
|
|
649
|
+
parseErrorsByKey,
|
|
650
|
+
idErrorsByKey,
|
|
651
|
+
tasks,
|
|
652
|
+
evidence,
|
|
653
|
+
receipts,
|
|
654
|
+
runs,
|
|
655
|
+
learning,
|
|
656
|
+
taskIds: new Set(tasks.map((task) => task.id)),
|
|
657
|
+
evidenceIds: new Set(evidence.map((item) => item.id)),
|
|
658
|
+
// Set of task ids that have at least one piece of evidence — used by check 5.
|
|
659
|
+
tasksWithEvidence: new Set(evidence.map((item) => item.taskId))
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// (3) Illegal task status: task.status must be in the enum.
|
|
664
|
+
export function checkTaskStatusEnum(ctx) {
|
|
665
|
+
const errors = [];
|
|
666
|
+
for (const task of ctx.tasks) {
|
|
667
|
+
if (!TASK_STATUSES.includes(task.status)) {
|
|
668
|
+
errors.push(`ledger tasks.jsonl task ${task.id ?? "(no id)"} has illegal status "${task.status}" (allowed: ${TASK_STATUSES.join(", ")})`);
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
return errors;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
// (5) A done task must have evidence. A task marked done with no evidence row
|
|
675
|
+
// pointing at it is exactly the "thin done" the system exists to catch.
|
|
676
|
+
// Uses the SAME doneRequiresEvidence predicate the CLI writer (task update)
|
|
677
|
+
// applies, so the write-time check and this read-time check cannot drift.
|
|
678
|
+
export function checkDoneRequiresEvidence(ctx) {
|
|
679
|
+
const errors = [];
|
|
680
|
+
for (const task of ctx.tasks) {
|
|
681
|
+
if (doneRequiresEvidence(task.status) && !ctx.tasksWithEvidence.has(task.id)) {
|
|
682
|
+
errors.push(`ledger tasks.jsonl task ${task.id} is "done" but has no evidence (only blocked/partial/unverified may have none)`);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
return errors;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// (2) Orphan evidence: evidence.taskId must reference an existing task.
|
|
689
|
+
export function checkOrphanEvidence(ctx) {
|
|
690
|
+
const errors = [];
|
|
691
|
+
for (const item of ctx.evidence) {
|
|
692
|
+
if (!ctx.taskIds.has(item.taskId)) {
|
|
693
|
+
errors.push(`ledger evidence.jsonl evidence ${item.id ?? "(no id)"} references unknown task "${item.taskId}" (orphan)`);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
return errors;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// (2b) Special-evidence structure (P2 structure gate): a load-bearing kind
|
|
700
|
+
// (cross_family_guard / rerun) must carry its required structured fields,
|
|
701
|
+
// not just the right label. Uses the SAME specialEvidenceStructureError
|
|
702
|
+
// predicate the CLI writer applies at evidence-add time, so a hand-planted
|
|
703
|
+
// empty-shell special row (e.g. a cross_family_guard with no
|
|
704
|
+
// reviewer/family/ref, or a rerun with no command/exitCode) is caught
|
|
705
|
+
// read-time exactly as the writer refuses it write-time — even if no
|
|
706
|
+
// receipt cites it yet. Generic kinds return null here and are unaffected.
|
|
707
|
+
export function checkSpecialEvidenceStructure(ctx) {
|
|
708
|
+
const errors = [];
|
|
709
|
+
for (const item of ctx.evidence) {
|
|
710
|
+
const structureError = specialEvidenceStructureError(item);
|
|
711
|
+
if (structureError) {
|
|
712
|
+
errors.push(`ledger evidence.jsonl evidence ${item.id ?? "(no id)"}: ${structureError}`);
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
return errors;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
// (2c) Rerun-run reconciliation (A1 L4 reconciliation, read side): a rerun row
|
|
719
|
+
// that carries a runId MUST reconcile against a recorded run in runs.jsonl
|
|
720
|
+
// (same task, finished, executed:true, matching exitCode + command + output hash), using the SAME
|
|
721
|
+
// rerunRunReconcileError the CLI applies at evidence-add time. This catches a
|
|
722
|
+
// hand-edited jsonl that bolts a runId onto a rerun whose exitCode/command
|
|
723
|
+
// disagree with the recorded run (the red-team "runs=1 but rerun says 0"
|
|
724
|
+
// forgery), even before any receipt cites it. A rerun with NO runId is NOT
|
|
725
|
+
// flagged here (it is a valid generic rerun that simply cannot reach L4 — the
|
|
726
|
+
// L4 gate is enforced where a receipt claims L4, via ownedRerunEvidenceIds);
|
|
727
|
+
// only a PRESENT-but-broken runId is an integrity error. Structurally-
|
|
728
|
+
// incomplete rerun rows are already reported by (2b); skip them so the
|
|
729
|
+
// reconcile check does not double-report.
|
|
730
|
+
export function checkRerunRunReconcile(ctx) {
|
|
731
|
+
const errors = [];
|
|
732
|
+
for (const item of ctx.evidence) {
|
|
733
|
+
if (!isRerunWithRunId(item)) continue;
|
|
734
|
+
if (specialEvidenceStructureError(item) !== null) continue; // (2b) already reported
|
|
735
|
+
const reconcileError = rerunRunReconcileError(item, ctx.runs);
|
|
736
|
+
if (reconcileError) {
|
|
737
|
+
errors.push(`ledger evidence.jsonl evidence ${item.id ?? "(no id)"}: ${reconcileError}`);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
return errors;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// (4) Broken reference: every id in receipt.evidenceIds must exist in evidence.
|
|
744
|
+
export function checkReceiptEvidenceRefs(ctx) {
|
|
745
|
+
const errors = [];
|
|
746
|
+
for (const receipt of ctx.receipts) {
|
|
747
|
+
const ids = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
|
|
748
|
+
for (const id of ids) {
|
|
749
|
+
if (!ctx.evidenceIds.has(id)) {
|
|
750
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} references unknown evidence "${id}" (broken reference)`);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
return errors;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// (4b) Receipt task reference: receipt.taskId must point at an existing task.
|
|
758
|
+
// A receipt for a task that does not exist is a dangling receipt — and it
|
|
759
|
+
// is also the entry point the cross-task check (4c) needs, since "does
|
|
760
|
+
// this evidence belong to the receipt's task" is meaningless if the task
|
|
761
|
+
// itself is unknown. Same rule the CLI writer enforces at receipt create.
|
|
762
|
+
export function checkReceiptTaskRef(ctx) {
|
|
763
|
+
const errors = [];
|
|
764
|
+
for (const receipt of ctx.receipts) {
|
|
765
|
+
if (!ctx.taskIds.has(receipt.taskId)) {
|
|
766
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} references unknown task "${receipt.taskId}"`);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
return errors;
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// (4c) Cross-task evidence: every evidence id a receipt cites must belong to
|
|
773
|
+
// the receipt's OWN task. Citing another task's evidence is the back door
|
|
774
|
+
// that lets a task with no evidence of its own be written "accepted" by
|
|
775
|
+
// borrowing someone else's proof. Uses the SAME ownedEvidenceIds filter
|
|
776
|
+
// the CLI writer applies, so the write-time guard and this read-time check
|
|
777
|
+
// cannot drift. Unknown ids are already reported by (4); here we flag only
|
|
778
|
+
// ids that resolve to a real evidence row owned by a DIFFERENT task.
|
|
779
|
+
export function checkReceiptCrossTaskEvidence(ctx) {
|
|
780
|
+
const errors = [];
|
|
781
|
+
for (const receipt of ctx.receipts) {
|
|
782
|
+
const ids = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
|
|
783
|
+
const owned = new Set(ownedEvidenceIds(ids, receipt.taskId, ctx.evidence));
|
|
784
|
+
for (const id of ids) {
|
|
785
|
+
if (ctx.evidenceIds.has(id) && !owned.has(id)) {
|
|
786
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} cites evidence "${id}" that belongs to another task (not task "${receipt.taskId}")`);
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
return errors;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// (4d) rerunEvidenceIds reference integrity (G2): rerunEvidenceIds is a real
|
|
794
|
+
// cited-evidence list (the writer rejects unknown/foreign ids in it at
|
|
795
|
+
// receipt-create time exactly like evidenceIds), but read-time it was only
|
|
796
|
+
// consulted inside the L4-pass boolean — so a hand-planted receipt with a
|
|
797
|
+
// bad rerunEvidenceIds (an id that does not exist, or one owned by another
|
|
798
|
+
// task) slipped past the global reference checks unless it happened to be
|
|
799
|
+
// an L4 pass. This check covers ALL receipts, mirroring (4) + (4c) for the
|
|
800
|
+
// plain evidenceIds list: an unknown id is a broken reference; a known id
|
|
801
|
+
// owned by a different task is a cross-task citation. Uses the SAME
|
|
802
|
+
// ownedEvidenceIds ownership predicate the writer applies, so write-time
|
|
803
|
+
// and read-time cannot drift. (The L4 kind/structure requirement is a
|
|
804
|
+
// separate, stronger gate handled by check 8 via ownedRerunEvidenceIds.)
|
|
805
|
+
export function checkRerunEvidenceIdRefs(ctx) {
|
|
806
|
+
const errors = [];
|
|
807
|
+
for (const receipt of ctx.receipts) {
|
|
808
|
+
const rerunIds = Array.isArray(receipt.rerunEvidenceIds) ? receipt.rerunEvidenceIds : [];
|
|
809
|
+
if (rerunIds.length === 0) continue;
|
|
810
|
+
const ownedRerunByTask = new Set(ownedEvidenceIds(rerunIds, receipt.taskId, ctx.evidence));
|
|
811
|
+
for (const id of rerunIds) {
|
|
812
|
+
if (!ctx.evidenceIds.has(id)) {
|
|
813
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} references unknown rerun evidence "${id}" (broken reference)`);
|
|
814
|
+
} else if (!ownedRerunByTask.has(id)) {
|
|
815
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} cites rerun evidence "${id}" that belongs to another task (not task "${receipt.taskId}")`);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
return errors;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// (6) Accepted receipt must cite SAME-TASK evidence: an accepted verdict with no
|
|
823
|
+
// evidence that belongs to its own task is an unsupported acceptance — whether
|
|
824
|
+
// the evidenceIds list is empty OR it only cites another task's evidence.
|
|
825
|
+
// Counting via ownedEvidenceIds (not raw length) closes the cross-task back
|
|
826
|
+
// door at the status level too, and matches `receipt accept`, which also keys
|
|
827
|
+
// on owned regular evidence — so the two never disagree on what backs an
|
|
828
|
+
// acceptance. (A clean pass needs a cited cross_family_guard row to reach L3+,
|
|
829
|
+
// so a legitimately-accepted pass always has same-task evidenceIds.)
|
|
830
|
+
export function checkAcceptedReceiptHasEvidence(ctx) {
|
|
831
|
+
const errors = [];
|
|
832
|
+
for (const receipt of ctx.receipts) {
|
|
833
|
+
if (receipt.status !== "accepted") continue;
|
|
834
|
+
const ids = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
|
|
835
|
+
const owned = ownedEvidenceIds(ids, receipt.taskId, ctx.evidence);
|
|
836
|
+
if (owned.length === 0) {
|
|
837
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} is "accepted" but cites no evidence`);
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
return errors;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// (7) Guard level present + valid (P2): every receipt must carry a guardLevel
|
|
844
|
+
// in the enum. The level grades the evidence the guard saw and is what the
|
|
845
|
+
// verdict-consistency check (8) bounds the verdict against, so a missing or
|
|
846
|
+
// bogus level is rejected here first.
|
|
847
|
+
export function checkGuardLevelEnum(ctx) {
|
|
848
|
+
const errors = [];
|
|
849
|
+
for (const receipt of ctx.receipts) {
|
|
850
|
+
if (!GUARD_LEVELS.includes(receipt.guardLevel)) {
|
|
851
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has missing/illegal guardLevel "${receipt.guardLevel}" (allowed: ${GUARD_LEVELS.join(", ")})`);
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
return errors;
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// (8) Verdict x guardLevel consistency (P2 core): the verdict a receipt carries
|
|
858
|
+
// must be one its guard level can back. Uses the SAME guardLevelVerdictError
|
|
859
|
+
// predicate the CLI writer applies, so a hand-planted row (an L0 "pass", an
|
|
860
|
+
// L2 "pass", a pass below L3, or an L4 "pass" with no rerun output) is
|
|
861
|
+
// caught read-time exactly as the writer refuses it write-time. hasRerun is
|
|
862
|
+
// computed from rerun ids that actually belong to this task (a rerun id
|
|
863
|
+
// borrowed from another task does not satisfy the L4 requirement). Receipts
|
|
864
|
+
// whose guardLevel is already invalid are skipped (reported by check 7).
|
|
865
|
+
export function checkVerdictGuardLevelConsistency(ctx) {
|
|
866
|
+
const errors = [];
|
|
867
|
+
for (const receipt of ctx.receipts) {
|
|
868
|
+
if (!GUARD_LEVELS.includes(receipt.guardLevel)) continue;
|
|
869
|
+
const rerunIds = Array.isArray(receipt.rerunEvidenceIds) ? receipt.rerunEvidenceIds : [];
|
|
870
|
+
// A1 L4 reconciliation: ownedRerun counts a rerun toward L4 only if it
|
|
871
|
+
// references a recorded run that reconciles (runs passed), so a hand-planted L4
|
|
872
|
+
// whose rerun output disagrees with the recorded run is flagged read-time.
|
|
873
|
+
const ownedRerun = ownedRerunEvidenceIds(rerunIds, receipt.taskId, ctx.evidence, ctx.runs);
|
|
874
|
+
// P2 evidence-gate: an L3 pass must cite a real cross_family_guard evidence
|
|
875
|
+
// row owned by this task; computed the SAME way the CLI writer computes it so
|
|
876
|
+
// a hand-planted L3 "pass" on a kind:"note" row is flagged read-time exactly
|
|
877
|
+
// as the writer refuses it write-time.
|
|
878
|
+
const evidenceIds = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
|
|
879
|
+
const ownedCrossFamily = ownedCrossFamilyGuardEvidenceIds(evidenceIds, receipt.taskId, ctx.evidence);
|
|
880
|
+
const consistencyError = guardLevelVerdictError(
|
|
881
|
+
receipt.guardLevel,
|
|
882
|
+
receipt.verdict,
|
|
883
|
+
ownedRerun.length > 0,
|
|
884
|
+
ownedCrossFamily.length > 0,
|
|
885
|
+
rerunIds.length > 0
|
|
886
|
+
);
|
|
887
|
+
if (consistencyError) {
|
|
888
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"}: ${consistencyError}`);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
return errors;
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
// (8b) Review-mode validity (A1): a receipt's reviewMode, when present, must be
|
|
895
|
+
// a legal REVIEW_MODES value. The reviewMode is the load-bearing input to
|
|
896
|
+
// the level computation, so a bogus mode (a typo, or a made-up "binding")
|
|
897
|
+
// must be caught before check 8c trusts it. reviewMode is OPTIONAL on a row
|
|
898
|
+
// (a pre-A1 receipt has none; the computation infers it), so absence is
|
|
899
|
+
// fine — only a present-but-illegal value is flagged.
|
|
900
|
+
export function checkReviewModeEnum(ctx) {
|
|
901
|
+
const errors = [];
|
|
902
|
+
for (const receipt of ctx.receipts) {
|
|
903
|
+
if (receipt.reviewMode !== undefined && !REVIEW_MODES.includes(receipt.reviewMode)) {
|
|
904
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has illegal reviewMode "${receipt.reviewMode}" (allowed: ${REVIEW_MODES.join(", ")})`);
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
return errors;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// (8c) Computed-level integrity (A1 CORE, read side): the guard level is COMPUTED
|
|
911
|
+
// from the review method + the evidence, never self-asserted. A hand-edited
|
|
912
|
+
// receipts.jsonl could set guardLevel HIGHER than the method + evidence
|
|
913
|
+
// support (e.g. guardLevel "L4" on a row with no rerun output, or "L3" with
|
|
914
|
+
// reviewMode "same_family_subagent"). We RE-COMPUTE the level the SAME way
|
|
915
|
+
// the CLI writer does and flag any receipt whose stored level OUTRANKS the
|
|
916
|
+
// computed one — the read-time twin of "the CLI never stores a level above
|
|
917
|
+
// the evidence". A stored level <= computed is allowed (a row may under-claim
|
|
918
|
+
// its level; only OVER-claiming is the silent-green danger). Receipts whose
|
|
919
|
+
// guardLevel or reviewMode is already invalid are skipped (reported above).
|
|
920
|
+
export function checkReceiptComputedLevel(ctx) {
|
|
921
|
+
const errors = [];
|
|
922
|
+
for (const receipt of ctx.receipts) {
|
|
923
|
+
if (!GUARD_LEVELS.includes(receipt.guardLevel)) continue;
|
|
924
|
+
if (receipt.reviewMode !== undefined && !REVIEW_MODES.includes(receipt.reviewMode)) continue;
|
|
925
|
+
// RE-COMPUTE the level from the receipt's own evidence — the SAME shared helper
|
|
926
|
+
// the handoff drafter uses, so the two never drift on what the evidence backs.
|
|
927
|
+
const computed = computeReceiptGuardLevel(receipt, ctx.evidence, ctx.runs);
|
|
928
|
+
if (guardLevelRank(receipt.guardLevel) > guardLevelRank(computed.level)) {
|
|
929
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} claims guard level "${receipt.guardLevel}" but the review method + evidence only support "${computed.level}" (${computed.reason}); the level is computed, not self-asserted`);
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
return errors;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
// (8d) Family-honesty marker integrity (A1/C1): when the computed level rests on a
|
|
936
|
+
// SELF-DECLARED cross-family claim (familyUnverified), the stored row MUST
|
|
937
|
+
// carry familyUnverified: true — so a hand-edit cannot strip the
|
|
938
|
+
// "unverified" mark off any cross-family level to make it read like a hard pass.
|
|
939
|
+
// Conversely, familyUnverified must NOT be set on a row the computation does
|
|
940
|
+
// not flag (a non-cross-family level), so a row cannot
|
|
941
|
+
// falsely advertise an unverified caveat it has not earned either way. Only
|
|
942
|
+
// receipts whose stored level matches the computed level are checked here
|
|
943
|
+
// (an over-claimed level is already reported by 8c, and re-flagging its
|
|
944
|
+
// marker would be noise).
|
|
945
|
+
export function checkFamilyUnverifiedMarker(ctx) {
|
|
946
|
+
const errors = [];
|
|
947
|
+
for (const receipt of ctx.receipts) {
|
|
948
|
+
if (!GUARD_LEVELS.includes(receipt.guardLevel)) continue;
|
|
949
|
+
if (receipt.reviewMode !== undefined && !REVIEW_MODES.includes(receipt.reviewMode)) continue;
|
|
950
|
+
// Same shared re-computation as 8c (single source of the family-verification truth).
|
|
951
|
+
const computed = computeReceiptGuardLevel(receipt, ctx.evidence, ctx.runs);
|
|
952
|
+
if (receipt.guardLevel !== computed.level) continue; // over/under-claim handled by 8c
|
|
953
|
+
const storedUnverified = receipt.familyUnverified === true;
|
|
954
|
+
if (computed.familyUnverified && !storedUnverified) {
|
|
955
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} is a self-declared cross-family level (${computed.level}) but is missing the familyUnverified: true marker (the cross-family family is unverified and must be marked so)`);
|
|
956
|
+
} else if (!computed.familyUnverified && storedUnverified) {
|
|
957
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} carries familyUnverified: true but its computed level (${computed.level}) is not an unverified cross-family level (the marker is unwarranted)`);
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
return errors;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
// (9) Owner acceptance integrity (P2): an "accepted" pass_with_risk receipt
|
|
964
|
+
// MUST carry the owner-acceptance marker (ownerAccepted: true). A risk
|
|
965
|
+
// receipt exists precisely because a human accepted the named residual
|
|
966
|
+
// risk; an accepted risk receipt with no owner mark is an unsupported
|
|
967
|
+
// acceptance. Same ownerAcceptanceError predicate the CLI accept path uses.
|
|
968
|
+
export function checkOwnerAcceptanceMarker(ctx) {
|
|
969
|
+
const errors = [];
|
|
970
|
+
for (const receipt of ctx.receipts) {
|
|
971
|
+
const acceptanceError = ownerAcceptanceError(receipt);
|
|
972
|
+
if (acceptanceError) {
|
|
973
|
+
errors.push(`ledger receipts.jsonl ${acceptanceError}`);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
return errors;
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
// (10) Receipt status reverse-consistency (P2 evidence-gate, REJECT follow-up):
|
|
980
|
+
// a receipt's status is not just a free-form label — it is DERIVED from
|
|
981
|
+
// (verdict, owned-evidence, ownerAccepted) by the SAME receiptStatusFor
|
|
982
|
+
// rule the writer applies. Before this check, a hand-planted row could
|
|
983
|
+
// carry a status that contradicts its own verdict (e.g. verdict "reject"
|
|
984
|
+
// with status "accepted", or "pass_with_risk" written "accepted" with no
|
|
985
|
+
// owner sign-off) and slip past as long as it cited some evidence. Here we
|
|
986
|
+
// (a) require status to be a legal enum value, then (b) RE-COMPUTE the
|
|
987
|
+
// expected status and flag any receipt whose stored status differs — so
|
|
988
|
+
// the status can never claim more (or less) than the rule grants.
|
|
989
|
+
export function checkReceiptStatusReverse(ctx) {
|
|
990
|
+
const errors = [];
|
|
991
|
+
for (const receipt of ctx.receipts) {
|
|
992
|
+
// (a) status must be one of the three legal values.
|
|
993
|
+
if (!RECEIPT_STATUSES.includes(receipt.status)) {
|
|
994
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has missing/illegal status "${receipt.status}" (allowed: ${RECEIPT_STATUSES.join(", ")})`);
|
|
995
|
+
continue; // a bogus status cannot be meaningfully reverse-computed.
|
|
996
|
+
}
|
|
997
|
+
// (b) reverse-compute the status the rule would assign and compare. A verdict
|
|
998
|
+
// outside the enum is already reported by check 8's predicate; receiptStatusFor
|
|
999
|
+
// treats any non-accepting verdict as "rejected", so we only reverse-check
|
|
1000
|
+
// receipts whose verdict is a known value to avoid a confusing double report.
|
|
1001
|
+
if (!RECEIPT_VERDICTS.includes(receipt.verdict)) continue;
|
|
1002
|
+
const evidenceIds = Array.isArray(receipt.evidenceIds) ? receipt.evidenceIds : [];
|
|
1003
|
+
// Re-derive status from owned regular evidence — the SAME basis the CLI writer
|
|
1004
|
+
// and `receipt accept` use, so the three never disagree. (Under the L4 rule a
|
|
1005
|
+
// clean pass always carries a cited cross_family_guard row in evidenceIds, so a
|
|
1006
|
+
// top-level pass reverse-computes to "accepted", never a contradictory "pending".)
|
|
1007
|
+
const owned = ownedEvidenceIds(evidenceIds, receipt.taskId, ctx.evidence);
|
|
1008
|
+
const expected = receiptStatusFor(receipt.verdict, owned, receipt.ownerAccepted === true);
|
|
1009
|
+
if (receipt.status !== expected) {
|
|
1010
|
+
errors.push(`ledger receipts.jsonl receipt ${receipt.id ?? "(no id)"} has status "${receipt.status}" but verdict "${receipt.verdict}" with ${owned.length} own-task evidence and ownerAccepted=${receipt.ownerAccepted === true} computes to "${expected}" (status contradicts the rule)`);
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
return errors;
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
// (11) Learning-ledger record shape (P4): each learning row must carry a legal
|
|
1017
|
+
// type (harvest/profile), a non-empty content, and a legal status
|
|
1018
|
+
// (proposed/confirmed/edited/dropped). Uses the SAME learningRecordError
|
|
1019
|
+
// predicate the CLI writer (learning add / confirm / edit / drop) applies,
|
|
1020
|
+
// so a row the writer would refuse is flagged read-time too — and a
|
|
1021
|
+
// hand-edited ledger that drifts off the enum (a bogus type, a typo'd
|
|
1022
|
+
// status, an emptied content) is caught instead of silently feeding the
|
|
1023
|
+
// status recall a malformed preference. (P1 had id-integrity only; this is
|
|
1024
|
+
// the P4 type/status/content contract.)
|
|
1025
|
+
export function checkLearningRecordShape(ctx) {
|
|
1026
|
+
const errors = [];
|
|
1027
|
+
for (const row of ctx.learning) {
|
|
1028
|
+
const shapeError = learningRecordError(row);
|
|
1029
|
+
if (shapeError) {
|
|
1030
|
+
errors.push(`ledger learning-ledger.jsonl learning ${row.id ?? "(no id)"}: ${shapeError}`);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
return errors;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
// (12) Orphan learning row: a learning row MAY be unbound (no taskId), but a
|
|
1037
|
+
// taskId that is present must name a real task — a learning row pointing at
|
|
1038
|
+
// a non-existent task is a dangling binding, the same standard the evidence
|
|
1039
|
+
// orphan check (check 2) holds. Rows with no taskId are skipped (legitimate
|
|
1040
|
+
// cross-task lessons).
|
|
1041
|
+
export function checkOrphanLearning(ctx) {
|
|
1042
|
+
const errors = [];
|
|
1043
|
+
for (const row of ctx.learning) {
|
|
1044
|
+
if (row.taskId !== undefined && !ctx.taskIds.has(row.taskId)) {
|
|
1045
|
+
errors.push(`ledger learning-ledger.jsonl learning ${row.id ?? "(no id)"} references unknown task "${row.taskId}" (orphan)`);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
return errors;
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
// Run-layer ledger validation. Split out for readability; called from
|
|
1052
|
+
// deepValidate so its checks land in the same `errors` list (CLI check + the
|
|
1053
|
+
// contract validator both keep failing on a degraded ledger with no interface
|
|
1054
|
+
// change). Each `tick()` records one performed check.
|
|
1055
|
+
//
|
|
1056
|
+
// This is now a thin ORCHESTRATOR: it builds the parsed-ledger context once,
|
|
1057
|
+
// then runs each numbered check (above) IN THE ORIGINAL ORDER, ticking once per
|
|
1058
|
+
// check and appending the check's returned errors. The parse pass (check 1) and
|
|
1059
|
+
// the per-ledger id pass (check 1b) each tick once per ledger file, exactly as
|
|
1060
|
+
// the original loops did, so the tick count and error order are unchanged.
|
|
1061
|
+
function validateLedgers(workspace, errors, tick) {
|
|
1062
|
+
const stateDir = path.join(workspace, "state");
|
|
1063
|
+
const ctx = buildLedgerContext(stateDir);
|
|
1064
|
+
|
|
1065
|
+
// (1) Bad JSONL — one tick + append per ledger file (preserves order/count).
|
|
1066
|
+
for (const key of ctx.LEDGER_KEYS) {
|
|
1067
|
+
tick();
|
|
1068
|
+
for (const error of ctx.parseErrorsByKey[key]) errors.push(error);
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
// (1b) Per-ledger id integrity — one tick + append per ledger file.
|
|
1072
|
+
for (const key of ctx.LEDGER_KEYS) {
|
|
1073
|
+
tick();
|
|
1074
|
+
for (const error of ctx.idErrorsByKey[key]) errors.push(error);
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
// Each remaining numbered check: tick once, append its returned errors. The
|
|
1078
|
+
// call order below is the EXACT order the inline checks ran in originally.
|
|
1079
|
+
for (const check of [
|
|
1080
|
+
checkTaskStatusEnum, // (3)
|
|
1081
|
+
checkDoneRequiresEvidence, // (5)
|
|
1082
|
+
checkOrphanEvidence, // (2)
|
|
1083
|
+
checkSpecialEvidenceStructure, // (2b)
|
|
1084
|
+
checkRerunRunReconcile, // (2c)
|
|
1085
|
+
checkReceiptEvidenceRefs, // (4)
|
|
1086
|
+
checkReceiptTaskRef, // (4b)
|
|
1087
|
+
checkReceiptCrossTaskEvidence, // (4c)
|
|
1088
|
+
checkRerunEvidenceIdRefs, // (4d)
|
|
1089
|
+
checkAcceptedReceiptHasEvidence, // (6)
|
|
1090
|
+
checkGuardLevelEnum, // (7)
|
|
1091
|
+
checkVerdictGuardLevelConsistency, // (8)
|
|
1092
|
+
checkReviewModeEnum, // (8b)
|
|
1093
|
+
checkReceiptComputedLevel, // (8c)
|
|
1094
|
+
checkFamilyUnverifiedMarker, // (8d)
|
|
1095
|
+
checkOwnerAcceptanceMarker, // (9)
|
|
1096
|
+
checkReceiptStatusReverse, // (10)
|
|
1097
|
+
checkLearningRecordShape, // (11)
|
|
1098
|
+
checkOrphanLearning // (12)
|
|
1099
|
+
]) {
|
|
1100
|
+
tick();
|
|
1101
|
+
for (const error of check(ctx)) errors.push(error);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
export function validateWorkspace(workspace) {
|
|
1106
|
+
const errors = [];
|
|
1107
|
+
const warnings = [];
|
|
1108
|
+
let checks = 0;
|
|
1109
|
+
|
|
1110
|
+
requireDir(errors, workspace);
|
|
1111
|
+
const startHere = requireFile(errors, workspace, "START_HERE.md");
|
|
1112
|
+
checks += 1;
|
|
1113
|
+
includesAll(errors, "START_HERE.md", startHere, ["10-minute path", "30-minute path", "60-minute path", "guard", "handoff", "harvest"]);
|
|
1114
|
+
if (/doctor/i.test(startHere.slice(0, 1200))) {
|
|
1115
|
+
errors.push("START_HERE first screen must not lead with doctor");
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
for (const dir of requiredWorkspaceDirs) {
|
|
1119
|
+
requireDir(errors, workspace, dir);
|
|
1120
|
+
checks += 1;
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
for (const layer of ["profile", "context", "acceptance", "guard", "handoff", "harvest"]) {
|
|
1124
|
+
for (const file of ["README.md", "PROMPT.md", "TEMPLATE.md", "EXAMPLE.synthetic.md", "FAILURE_MODES.md"]) {
|
|
1125
|
+
requireFile(errors, workspace, layer, file);
|
|
1126
|
+
checks += 1;
|
|
1127
|
+
}
|
|
1128
|
+
const combined = ["README.md", "PROMPT.md", "TEMPLATE.md", "EXAMPLE.synthetic.md", "FAILURE_MODES.md"]
|
|
1129
|
+
.map((file) => (exists(workspace, layer, file) ? read(path.join(workspace, layer, file)) : ""))
|
|
1130
|
+
.join("\n");
|
|
1131
|
+
includesAll(errors, layer, combined, [
|
|
1132
|
+
"Purpose",
|
|
1133
|
+
"When to use",
|
|
1134
|
+
"Input shape",
|
|
1135
|
+
"Output shape",
|
|
1136
|
+
"Copy-paste prompt",
|
|
1137
|
+
"Blank template",
|
|
1138
|
+
"Filled synthetic example",
|
|
1139
|
+
"Common failure modes",
|
|
1140
|
+
"Claude Code",
|
|
1141
|
+
"Codex",
|
|
1142
|
+
"Cursor",
|
|
1143
|
+
"Windsurf",
|
|
1144
|
+
"Copilot",
|
|
1145
|
+
"Cline"
|
|
1146
|
+
]);
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
// Profile candidate buffer (P0-5): a proposed preference must pass through
|
|
1150
|
+
// profile/CANDIDATES.md before it can graduate into the long-term profile.
|
|
1151
|
+
// Require the file and its four-state machine so a degraded workspace that
|
|
1152
|
+
// drops the buffer (and lets unreviewed guesses edit the profile) fails loudly.
|
|
1153
|
+
const candidates = requireFile(errors, workspace, "profile", "CANDIDATES.md");
|
|
1154
|
+
checks += 1;
|
|
1155
|
+
includesAll(errors, "profile/CANDIDATES.md", candidates, [
|
|
1156
|
+
"State machine",
|
|
1157
|
+
"proposed",
|
|
1158
|
+
"confirmed",
|
|
1159
|
+
"edited",
|
|
1160
|
+
"dropped"
|
|
1161
|
+
]);
|
|
1162
|
+
|
|
1163
|
+
for (const mechanism of requiredMechanismIds) {
|
|
1164
|
+
for (const file of ["README.md", "PROMPT.md", "TEMPLATE.md", "EXAMPLE.synthetic.md", "FAILURE_MODES.md"]) {
|
|
1165
|
+
const content = requireFile(errors, workspace, "mechanisms", mechanism, file);
|
|
1166
|
+
checks += 1;
|
|
1167
|
+
includesAll(errors, `${mechanism}/${file}`, content, ["AI Collaboration Open System", "local-first", "public-safe"]);
|
|
1168
|
+
if (/TBD|TODO|placeholder/i.test(content)) errors.push(`${mechanism}/${file} contains placeholder text`);
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
for (const file of ["README.md", "owner-controller.md", "executor.md", "system-guardian.md", "scout.md", "harvester.md"]) {
|
|
1173
|
+
requireFile(errors, workspace, "roles", file);
|
|
1174
|
+
checks += 1;
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
for (const file of ["README.md", "execute.md", "review.md", "handoff.md", "harvest.md"]) {
|
|
1178
|
+
requireFile(errors, workspace, "modes", file);
|
|
1179
|
+
checks += 1;
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
for (const file of ["README.md", "run-a-first-loop.md", "connect-a-tool.md", "review-a-half-product.md"]) {
|
|
1183
|
+
requireFile(errors, workspace, "cookbook", file);
|
|
1184
|
+
checks += 1;
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
for (const file of ["CURRENT_STATE.md", "TASK_LOG.md", "DECISIONS.md"]) {
|
|
1188
|
+
requireFile(errors, workspace, "state", file);
|
|
1189
|
+
checks += 1;
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
// P1 run-layer ledgers must exist (their content integrity is checked in the
|
|
1193
|
+
// deep ledger pass; here we only assert presence so a workspace that dropped a
|
|
1194
|
+
// ledger fails the base check too).
|
|
1195
|
+
for (const file of ["tasks.jsonl", "evidence.jsonl", "runs.jsonl", "receipts.jsonl", "learning-ledger.jsonl"]) {
|
|
1196
|
+
requireFile(errors, workspace, "state", file);
|
|
1197
|
+
checks += 1;
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
for (const file of requiredPromptFiles) {
|
|
1201
|
+
const content = requireFile(errors, workspace, "prompts", file);
|
|
1202
|
+
checks += 1;
|
|
1203
|
+
includesAll(errors, file, content, ["Copy-paste prompt", "Expected output"]);
|
|
1204
|
+
if (/TBD|TODO|placeholder/i.test(content)) errors.push(`${file} contains placeholder text`);
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
for (const skill of requiredSkillIds) {
|
|
1208
|
+
const content = requireFile(errors, workspace, "skills", skill, "SKILL.md");
|
|
1209
|
+
checks += 1;
|
|
1210
|
+
includesAll(errors, `${skill} skill`, content, ["name:", "When to use", "Output", "Safety"]);
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
const shared = requireFile(errors, workspace, "adapters", "SHARED_CORE_CONTRACT.md");
|
|
1214
|
+
includesAll(errors, "shared core", shared, ["Profile", "Context", "Acceptance", "Guard", "Handoff", "Harvest"]);
|
|
1215
|
+
for (const adapter of requiredAdapterIds) {
|
|
1216
|
+
const content = requireFile(errors, workspace, "adapters", adapter, "ADAPTER.md");
|
|
1217
|
+
checks += 1;
|
|
1218
|
+
includesAll(errors, `${adapter} adapter`, content, ["SHARED_CORE_CONTRACT.md", "profile", "context", "acceptance", "guard", "handoff", "harvest"]);
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
const caseDirs = exists(workspace, "examples")
|
|
1222
|
+
? readdirSync(path.join(workspace, "examples"), { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name)
|
|
1223
|
+
: [];
|
|
1224
|
+
for (const caseId of requiredCaseIds) {
|
|
1225
|
+
if (!caseDirs.includes(caseId)) errors.push(`missing synthetic case ${caseId}`);
|
|
1226
|
+
const content = requireFile(errors, workspace, "examples", caseId, "CASE.md");
|
|
1227
|
+
checks += 1;
|
|
1228
|
+
includesAll(errors, caseId, content, [
|
|
1229
|
+
"Confusing raw input",
|
|
1230
|
+
"Likely single-agent failure",
|
|
1231
|
+
"AI Collaboration OS process",
|
|
1232
|
+
"Context package",
|
|
1233
|
+
"Acceptance card",
|
|
1234
|
+
"Handoff note",
|
|
1235
|
+
"Harvest seed",
|
|
1236
|
+
"Before/after comparison",
|
|
1237
|
+
"Messy starting point",
|
|
1238
|
+
"Workspace setup",
|
|
1239
|
+
"Profile/context",
|
|
1240
|
+
"Acceptance",
|
|
1241
|
+
"Execution prompt",
|
|
1242
|
+
"Guard review",
|
|
1243
|
+
"Handoff",
|
|
1244
|
+
"Harvest",
|
|
1245
|
+
"What changes compared with a single raw AI chat"
|
|
1246
|
+
]);
|
|
1247
|
+
for (const artifact of ["context-package.md", "acceptance-card.md", "execution-prompt.md", "guard-review.md", "handoff-note.md", "harvest-seed.md"]) {
|
|
1248
|
+
requireFile(errors, workspace, "examples", caseId, "artifacts", artifact);
|
|
1249
|
+
checks += 1;
|
|
1250
|
+
}
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
for (const file of ["PRIVACY.md", "COMMERCIAL_BOUNDARY.md", "REDACTION_CHECKLIST.md"]) {
|
|
1254
|
+
requireFile(errors, workspace, "privacy", file);
|
|
1255
|
+
checks += 1;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
for (const file of ["10-minute-your-task.md", "10-minute.md", "30-minute.md", "60-minute.md", "synthetic-loop-transcript.md"]) {
|
|
1259
|
+
const content = requireFile(errors, workspace, "walkthroughs", file);
|
|
1260
|
+
checks += 1;
|
|
1261
|
+
includesAll(errors, file, content, ["Goal", "Expected"]);
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
// The real first-run walkthrough (10-minute-your-task.md) is the main path the
|
|
1265
|
+
// init/guide/help all point users to, so it is held to a higher bar than the
|
|
1266
|
+
// generic "Goal/Expected" floor: its loop must actually carry the hardened
|
|
1267
|
+
// evidence chain (P0-4) and the profile-candidate buffer (P0-5), not a thin
|
|
1268
|
+
// "report what you did" + "drop it into the profile". If the walkthrough is
|
|
1269
|
+
// gutted back to the soft version, these anchors disappear and validation fails.
|
|
1270
|
+
const yourTask = requireFile(errors, workspace, "walkthroughs", "10-minute-your-task.md");
|
|
1271
|
+
checks += 1;
|
|
1272
|
+
includesAll(errors, "10-minute-your-task.md", yourTask, [
|
|
1273
|
+
"Evidence Pack", // Step 2 must produce a structured evidence pack
|
|
1274
|
+
// The Evidence Pack must keep its six concrete segments, not collapse back to
|
|
1275
|
+
// a vague "report what you changed". Each segment is what makes the Step 3
|
|
1276
|
+
// re-check checkable; dropping one silently re-softens the loop.
|
|
1277
|
+
"Changed files / diff", // segment 1: what changed
|
|
1278
|
+
"Commands run", // segment 2: how it was verified
|
|
1279
|
+
"Command output summary", // segment 3: real output, not paraphrase
|
|
1280
|
+
"exit code", // segment 4: exit codes (0 = passed)
|
|
1281
|
+
"Acceptance mapping", // segment 5: AC -> PASS/FAIL/NOT-VERIFIED
|
|
1282
|
+
"Not verified", // segment 6: what could not be proven
|
|
1283
|
+
"INSUFFICIENT_EVIDENCE", // Step 3 reviewer verdict when evidence is absent/thin
|
|
1284
|
+
"REJECT", // Step 3 verdict when an evidence-grounded hard defect exists
|
|
1285
|
+
"acceptance", // re-check maps evidence to acceptance criteria
|
|
1286
|
+
"CANDIDATES\\.md", // Step 4 buffers profile candidates instead of dropping them in
|
|
1287
|
+
"proposed", // ...via the proposed/confirmed/edited/dropped state machine
|
|
1288
|
+
"confirmed",
|
|
1289
|
+
"dropped"
|
|
1290
|
+
]);
|
|
1291
|
+
// Guard the specific regression P0-5 fixed: candidates must not be dropped
|
|
1292
|
+
// straight into the long-term profile dir.
|
|
1293
|
+
if (/drop it into\s+`?\.\.\/profile\/`?[^C]/i.test(yourTask)) {
|
|
1294
|
+
errors.push("10-minute-your-task.md still drops profile candidates straight into ../profile/ (must buffer in CANDIDATES.md first)");
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
// Deep structural validation (P2): goes past presence + keyword into the
|
|
1298
|
+
// substance / integrity of the workspace. Failures are appended to the same
|
|
1299
|
+
// `errors` list so CLI `check` and the contract validator keep failing on a
|
|
1300
|
+
// degraded workspace without any interface change; advisory findings go to
|
|
1301
|
+
// `warnings`.
|
|
1302
|
+
const counters = { deepChecks: 0 };
|
|
1303
|
+
deepValidate(workspace, errors, warnings, counters);
|
|
1304
|
+
checks += counters.deepChecks;
|
|
1305
|
+
|
|
1306
|
+
return { ok: errors.length === 0, errors, warnings, checks, deepChecks: counters.deepChecks };
|
|
1307
|
+
}
|