@fenglimg/fabric-cli 2.0.0-rc.22 → 2.0.0-rc.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,39 @@ try {
22
22
  // install integration tests, not silently swallow).
23
23
  const { renderBanner, readFabricLanguage } = require("./lib/banner-i18n.cjs");
24
24
 
25
+ // v2.0.0-rc.24 TASK-04: shared cite-line parser (CJS twin of
26
+ // packages/shared/src/cite-line-parser.ts, byte-shipped via installHookLibs).
27
+ // Provides `parseCiteLine(raw)` → { cite_ids, cite_tags, cite_commitments }.
28
+ // Hook runtime has no node_modules access; the twin is hand-synced and
29
+ // behavior-parity-tested against the TS source.
30
+ let citeLineParser = null;
31
+ try {
32
+ citeLineParser = require("./lib/cite-line-parser.cjs");
33
+ } catch {
34
+ // Helper module missing — degrade silently. parseKbLine falls back to a
35
+ // legacy in-file regex when the lib is unavailable (e.g. mid-upgrade where
36
+ // hook script lands before lib is copied). New cite_commitments output is
37
+ // empty in degraded mode.
38
+ citeLineParser = null;
39
+ }
40
+
41
+ // v2.0.0-rc.24 TASK-05: L1 enforcement layer — soft Stop hook reminder for
42
+ // [recalled] cites of decision/pitfall types that arrived without operator
43
+ // contract or skip:<reason>. Reads .fabric/agents.meta.json (via
44
+ // lib/cite-contract-reminder.cjs#readKnowledgeTypeMap) to type-route cite
45
+ // ids per B6 lock; emits one
46
+ // ⚠ KB: <id> cited as [recalled] but missing contract; add → edit:<glob>
47
+ // or → skip:<reason> next turn
48
+ // line to stderr per offending id. Non-blocking, never throws.
49
+ let citeContractReminder = null;
50
+ try {
51
+ citeContractReminder = require("./lib/cite-contract-reminder.cjs");
52
+ } catch {
53
+ // Helper module missing — soft reminder simply doesn't fire. Audit-side
54
+ // doctor (TASK-08) still catches contract violations at the next run.
55
+ citeContractReminder = null;
56
+ }
57
+
25
58
  // CONSTANTS — duplicated from packages/server/src/services/_shared.ts.
26
59
  // DRY violation accepted: this hook script runs in user repos WITHOUT
27
60
  // node_modules access, so it cannot import from @fenglimg/fabric-server.
@@ -1005,93 +1038,45 @@ function tryReadStdinJson() {
1005
1038
  }
1006
1039
 
1007
1040
  /**
1008
- * v2.0.0-rc.20 TASK-03: parse the raw text that follows the `KB:` prefix on
1009
- * the first non-empty line of an assistant turn. Returns parsed cite ids and
1010
- * the per-id tag enum vocabulary (planned/recalled/chained-from/dismissed/
1011
- * none). Never throws; best-effort tolerant parser.
1041
+ * v2.0.0-rc.20 TASK-03 v2.0.0-rc.24 TASK-04: legacy shim signature for
1042
+ * parsing the raw text that follows the `KB:` prefix on the first non-empty
1043
+ * line of an assistant turn. As of rc.24 the implementation delegates to the
1044
+ * shared `parseCiteLine` (inline-shipped via lib/cite-line-parser.cjs) to
1045
+ * eliminate per-client regex drift.
1012
1046
  *
1013
- * Vocabulary contract (mirrored in
1014
- * packages/shared/src/schemas/event-ledger.ts → assistantTurnObservedEventSchema):
1015
- * - "none" ids=[], tags=["none"]
1016
- * - "KP-001" ids=["KP-001"], tags=[]
1017
- * - "KP-001, KT-DEC-0009 (review)" → ids=["KP-001","KT-DEC-0009"], tags=["review"]
1018
- * - "KP-001 [recalled][chained-from KP-002]"
1019
- * ids=["KP-001"], tags=["recalled","chained-from"]
1020
- * - "dismissed:<reason>" → ids=[], tags=["dismissed"]
1021
- * (kb_line_raw preserves the full "dismissed:<reason>" verbatim;
1022
- * the parsed `cite_tags` only carries the enum value "dismissed".)
1047
+ * Contract (rc.24 strict mode — superset of rc.20):
1048
+ * - Sentinel `none` (incl. `[no-relevant]` / `[not-applicable]` tail)
1049
+ * cite_ids=[], cite_tags=["none"], cite_commitments=[]
1050
+ * - `KT-DEC-0001 [planned]` cite_ids=["KT-DEC-0001"], cite_tags=["planned"],
1051
+ * cite_commitments=[{operators:[], skip_reason:null}]
1052
+ * - `KT-DEC-0001 [recalled] edit:foo.ts` cite_commitments=[{operators:
1053
+ * [{kind:"edit", target:"foo.ts"}], skip_reason:null}]
1054
+ * - `KT-DEC-0001 [recalled] → skip:sequencing` → cite_commitments=[{operators:
1055
+ * [], skip_reason:"sequencing"}]
1056
+ * - Id form is now strict `K[TP]-[A-Z]+-\d+` (rc.20 lax form `KP-001`
1057
+ * without letter-prefix is rejected — see TASK-03 schema).
1023
1058
  *
1024
- * Tags are filtered to the Zod enum set
1025
- * { planned, recalled, chained-from, dismissed, none } before being returned
1026
- * arbitrary parenthetical/bracket text outside the enum is dropped (silently)
1027
- * so the emitted event always round-trips through the schema.
1059
+ * Argument is the post-`KB:` substring (matches the rc.20 call site). Returns
1060
+ * { cite_ids, cite_tags, cite_commitments }; cite_commitments was added in
1061
+ * rc.24 and is always present (empty array when no cite-line found).
1062
+ *
1063
+ * Never throws.
1028
1064
  */
1029
1065
  function parseKbLine(raw) {
1030
- const result = { cite_ids: [], cite_tags: [] };
1031
- if (typeof raw !== "string") return result;
1032
- const trimmed = raw.trim();
1033
- if (trimmed.length === 0) return result;
1034
-
1035
- // dismissed:<reason> → tag="dismissed", no ids.
1036
- if (/^dismissed:/i.test(trimmed)) {
1037
- result.cite_tags.push("dismissed");
1038
- return result;
1066
+ // Compose the full `KB: <raw>` line because the shared parser anchors on
1067
+ // the `KB:` prefix. Handles the legacy `none` / `<sentinel>` inputs naturally
1068
+ // because parseCiteLine's SENTINEL_RE matches the composed line.
1069
+ if (typeof raw !== "string") {
1070
+ return { cite_ids: [], cite_tags: [], cite_commitments: [] };
1039
1071
  }
1040
- // bare "none" tag="none".
1041
- if (/^none$/i.test(trimmed)) {
1042
- result.cite_tags.push("none");
1043
- return result;
1072
+ const composed = `KB: ${raw}`;
1073
+ if (citeLineParser && typeof citeLineParser.parseCiteLine === "function") {
1074
+ return citeLineParser.parseCiteLine(composed);
1044
1075
  }
1045
-
1046
- // Allowed tag enum (matches assistantTurnObservedEventSchema.cite_tags z.enum).
1047
- const ALLOWED_TAGS = new Set(["planned", "recalled", "chained-from", "dismissed", "none"]);
1048
- const tagSet = new Set();
1049
-
1050
- // Extract bracketed tags: `[recalled]`, `[chained-from KP-002]`, etc.
1051
- // We keep only the leading enum token (split on whitespace) so trailing
1052
- // ref-ids inside the bracket are discarded — they're not part of the tag
1053
- // vocabulary.
1054
- const bracketRegex = /\[([^\]]+)\]/g;
1055
- let bracketMatch;
1056
- let stripped = trimmed;
1057
- while ((bracketMatch = bracketRegex.exec(trimmed)) !== null) {
1058
- const inner = bracketMatch[1].trim();
1059
- if (inner.length === 0) continue;
1060
- const head = inner.split(/\s+/)[0].toLowerCase();
1061
- if (ALLOWED_TAGS.has(head)) tagSet.add(head);
1062
- }
1063
- stripped = stripped.replace(bracketRegex, " ");
1064
-
1065
- // Extract parenthetical tags: `(review)`, `(planned)`, etc. Only enum
1066
- // members are retained.
1067
- const parenRegex = /\(([^)]+)\)/g;
1068
- let parenMatch;
1069
- while ((parenMatch = parenRegex.exec(trimmed)) !== null) {
1070
- const inner = parenMatch[1].trim().toLowerCase();
1071
- if (inner.length === 0) continue;
1072
- if (ALLOWED_TAGS.has(inner)) tagSet.add(inner);
1073
- }
1074
- stripped = stripped.replace(parenRegex, " ");
1075
-
1076
- // Remaining content: comma-separated ids, possibly with stray whitespace.
1077
- // We split on comma, trim, and keep tokens that look like ref-ids
1078
- // (uppercase letter prefix). This filters out leftover english words and
1079
- // is permissive enough to allow custom id schemes (KP-, KT-, KD-, etc.).
1080
- const parts = stripped.split(",");
1081
- for (const partRaw of parts) {
1082
- const part = partRaw.trim();
1083
- if (part.length === 0) continue;
1084
- // Take the leading token (whitespace-bounded) so "KT-DEC-0009 garbage"
1085
- // still yields "KT-DEC-0009".
1086
- const token = part.split(/\s+/)[0];
1087
- if (/^[A-Z][A-Z0-9-]+$/.test(token)) {
1088
- result.cite_ids.push(token);
1089
- }
1090
- }
1091
-
1092
- // Materialise tagSet → array (preserves insertion order via Set semantics).
1093
- for (const t of tagSet) result.cite_tags.push(t);
1094
- return result;
1076
+ // Degraded fallback: lib missing (e.g. partial install). Emit empty result
1077
+ // so downstream consumers see the cite-line as unobservable rather than
1078
+ // mis-parsed. The Stop-hook contract is best-effort, never blocking.
1079
+ return { cite_ids: [], cite_tags: [], cite_commitments: [] };
1095
1080
  }
1096
1081
 
1097
1082
  /**
@@ -1186,6 +1171,13 @@ function extractAndWriteAssistantTurnsBestEffort(cwd, stdinPayload) {
1186
1171
  kb_line_raw: turn.kb_line_raw,
1187
1172
  cite_ids: Array.isArray(turn.cite_ids) ? turn.cite_ids : [],
1188
1173
  cite_tags: Array.isArray(turn.cite_tags) ? turn.cite_tags : [],
1174
+ // rc.24 TASK-04: cite_commitments parallel array (assistantTurn
1175
+ // ObservedEventSchema gained this slot in rc.24 TASK-01). Empty
1176
+ // array for legacy turns or when the parser lib is unavailable —
1177
+ // the schema defaults `.default([])` so omitting it would also be
1178
+ // valid, but emitting an explicit `[]` keeps the on-disk shape
1179
+ // uniform across rc.24+ events.
1180
+ cite_commitments: Array.isArray(turn.cite_commitments) ? turn.cite_commitments : [],
1189
1181
  turn_id: `${sessionId}-${turn.envelope_index}`,
1190
1182
  envelope_index: turn.envelope_index,
1191
1183
  timestamp: new Date().toISOString(),
@@ -1280,6 +1272,11 @@ function summarizeTranscript(transcriptPath) {
1280
1272
  let kbLineRaw = null;
1281
1273
  let citeIds = [];
1282
1274
  let citeTags = [];
1275
+ // rc.24 TASK-04: parallel `cite_commitments` array, populated by the
1276
+ // shared cite-line parser. One entry per non-sentinel cite (index-aligned
1277
+ // with cite_ids). Sentinel `KB: none` contributes a `cite_tags=["none"]`
1278
+ // entry but no commitment — matches the parseCiteLine index contract.
1279
+ let citeCommitments = [];
1283
1280
  if (typeof firstText === "string" && firstText.length > 0) {
1284
1281
  // First non-empty line.
1285
1282
  const linesOfText = firstText.split(/\r?\n/);
@@ -1291,19 +1288,23 @@ function summarizeTranscript(transcriptPath) {
1291
1288
  }
1292
1289
  }
1293
1290
  if (firstNonEmpty.length > 0) {
1294
- // KB: none (case-insensitive on the literal `none`).
1295
- const noneMatch = firstNonEmpty.match(/^KB:\s*none\s*$/i);
1296
- const kbMatch = firstNonEmpty.match(/^KB:\s+(.+)$/);
1297
- if (noneMatch) {
1291
+ // rc.24 TASK-04: route the FULL `KB: ...` line to the shared parser.
1292
+ // parseCiteLine handles sentinels (`KB: none [<reason>]`) AND full
1293
+ // cite form including contract tail (`KB: KT-DEC-0001 [recalled] →
1294
+ // edit:foo.ts`) uniformly. The sentinel's `[<reason>]` tail stays in
1295
+ // `kb_line_raw` for doctor's downstream histogram parse; cite_tags
1296
+ // still emits the bare `none` token (schema enum-bound).
1297
+ if (/^KB:\s*/i.test(firstNonEmpty)) {
1298
1298
  kbLineRaw = firstNonEmpty;
1299
- const parsed = parseKbLine("none");
1300
- citeIds = parsed.cite_ids;
1301
- citeTags = parsed.cite_tags;
1302
- } else if (kbMatch) {
1303
- kbLineRaw = firstNonEmpty;
1304
- const parsed = parseKbLine(kbMatch[1]);
1305
- citeIds = parsed.cite_ids;
1306
- citeTags = parsed.cite_tags;
1299
+ if (citeLineParser && typeof citeLineParser.parseCiteLine === "function") {
1300
+ const parsed = citeLineParser.parseCiteLine(firstNonEmpty);
1301
+ citeIds = parsed.cite_ids;
1302
+ citeTags = parsed.cite_tags;
1303
+ citeCommitments = parsed.cite_commitments;
1304
+ }
1305
+ // Degraded mode (lib missing) → keep kbLineRaw but emit empty
1306
+ // arrays; doctor downstream treats this as "turn observed, parse
1307
+ // unavailable" without crashing.
1307
1308
  }
1308
1309
  }
1309
1310
  }
@@ -1312,6 +1313,7 @@ function summarizeTranscript(transcriptPath) {
1312
1313
  kb_line_raw: kbLineRaw,
1313
1314
  cite_ids: citeIds,
1314
1315
  cite_tags: citeTags,
1316
+ cite_commitments: citeCommitments,
1315
1317
  });
1316
1318
  }
1317
1319
 
@@ -1356,6 +1358,50 @@ function summarizeTranscript(transcriptPath) {
1356
1358
  return out;
1357
1359
  }
1358
1360
 
1361
+ /**
1362
+ * v2.0.0-rc.24 TASK-05: emit soft L1 reminder to stderr when assistant turns
1363
+ * cited a decision/pitfall id with [recalled] but no operator contract and no
1364
+ * skip:<reason>. Reads agents.meta.json once per invocation; aggregated per
1365
+ * turn (one line per offending id). Non-blocking — never throws, always
1366
+ * returns the array of emitted reminder strings (for unit tests + callers
1367
+ * that want to observe what was written).
1368
+ *
1369
+ * The reminder writes go to stderr (the hook contract: stdout is structured
1370
+ * banner JSON consumed by the harness; stderr is free-text system message
1371
+ * that surfaces back to the model on the next turn in cc / codex / cursor).
1372
+ */
1373
+ function emitCiteContractRemindersBestEffort(cwd, stdinPayload, stderr) {
1374
+ if (citeContractReminder === null) return [];
1375
+ if (stdinPayload === null || typeof stdinPayload !== "object") return [];
1376
+ try {
1377
+ const transcript = summarizeTranscript(stdinPayload.transcript_path);
1378
+ const turns = transcript.assistant_turns;
1379
+ if (!Array.isArray(turns) || turns.length === 0) return [];
1380
+
1381
+ const idTypeMap = citeContractReminder.readKnowledgeTypeMap(cwd);
1382
+ if (!(idTypeMap instanceof Map) || idTypeMap.size === 0) return [];
1383
+
1384
+ const reminders = citeContractReminder.formatContractMissingReminders({
1385
+ assistant_turns: turns,
1386
+ idTypeMap,
1387
+ });
1388
+ if (!Array.isArray(reminders) || reminders.length === 0) return [];
1389
+
1390
+ const sink = stderr || process.stderr;
1391
+ for (const line of reminders) {
1392
+ try {
1393
+ sink.write(line + "\n");
1394
+ } catch {
1395
+ // Sink write failure must not abort emission of remaining reminders.
1396
+ }
1397
+ }
1398
+ return reminders;
1399
+ } catch {
1400
+ // Outer guard — never throw. Hook continues silently.
1401
+ return [];
1402
+ }
1403
+ }
1404
+
1359
1405
  /**
1360
1406
  * v2.0.0-rc.7 T5: writeSessionDigestBestEffort — non-blocking digest fan-out.
1361
1407
  * Called from main() before the existing decide() flow. Failure is silently
@@ -1409,6 +1455,16 @@ function main(env, stdio) {
1409
1455
  // the hook's other I/O).
1410
1456
  extractAndWriteAssistantTurnsBestEffort(cwd, stdinPayload);
1411
1457
 
1458
+ // v2.0.0-rc.24 TASK-05: L1 soft reminder layer. Surfaces ⚠ KB:<id> lines
1459
+ // to stderr when decision/pitfall cites arrived with [recalled] tag but
1460
+ // empty contract. Non-blocking, never throws; doctor (TASK-08) catches
1461
+ // any contract violation the model ignored.
1462
+ emitCiteContractRemindersBestEffort(
1463
+ cwd,
1464
+ stdinPayload,
1465
+ stdio && stdio.stderr,
1466
+ );
1467
+
1412
1468
  const events = readLedger(cwd);
1413
1469
  let pendingStats;
1414
1470
  try {
@@ -1622,6 +1678,10 @@ module.exports = {
1622
1678
  parseKbLine,
1623
1679
  detectClient,
1624
1680
  extractAndWriteAssistantTurnsBestEffort,
1681
+ // v2.0.0-rc.24 TASK-05: L1 soft reminder helpers (exported for unit testing
1682
+ // of the contract-missing emission contract). The lib module itself is
1683
+ // also exported indirectly via the reminder helper.
1684
+ emitCiteContractRemindersBestEffort,
1625
1685
  CONSTANTS: {
1626
1686
  FABRIC_DIR,
1627
1687
  EVENT_LEDGER_FILE,
@@ -0,0 +1,173 @@
1
+ // v2.0.0-rc.24 TASK-05: L1 Stop hook soft reminder for missing cite contract.
2
+ //
3
+ // Reads `.fabric/agents.meta.json` to build a stable_id → knowledge_type lookup
4
+ // map, then scans summarised assistant turns (cite_ids + cite_tags +
5
+ // cite_commitments parallel arrays produced by lib/cite-line-parser.cjs) for
6
+ // turns that cited a decision-class or pitfall-class id with [recalled] tag
7
+ // but no operator commitment and no skip:<reason>.
8
+ //
9
+ // Emits one reminder line per offending id (deduplicated across the turn
10
+ // summary). Non-blocking — caller writes the lines to stderr; failure to
11
+ // load the meta file or absence of offenders means zero output.
12
+ //
13
+ // Reminder template (rc.24 lock B2 / L1 enforcement layer):
14
+ // ⚠ KB: <id> cited as [recalled] but missing contract; add → edit:<glob>
15
+ // or → skip:<reason> next turn
16
+ //
17
+ // Type filter rationale: only `decision` and `pitfall` types are contract-
18
+ // required per rc.24 design lock B6 (idTypeMap routing). `model`,
19
+ // `guideline`, `process` use reference-cite or LLM-judge (deferred to rc.25+)
20
+ // and are intentionally skipped here to avoid false-positive nudges.
21
+ //
22
+ // agents.meta.json schema note: `description.knowledge_type` values are
23
+ // SINGULAR (`decision`, `pitfall`, `model`, `guideline`, `process`) per
24
+ // packages/shared/src/schemas/agents-meta.ts. The reminder filter normalises
25
+ // any plural input defensively but the canonical contract is singular.
26
+ //
27
+ // Reading happens once per hook invocation (caller passes the projectRoot;
28
+ // the lib does the fs read internally). The map is small (<200 entries in
29
+ // typical corpora) so caching beyond the per-invocation scope is unnecessary.
30
+
31
+ const { existsSync, readFileSync } = require("node:fs");
32
+ const { join } = require("node:path");
33
+
34
+ const FABRIC_DIR = ".fabric";
35
+ const AGENTS_META_FILE = "agents.meta.json";
36
+
37
+ // Knowledge types that require contract commitments on [recalled] cites.
38
+ // Matches the singular form persisted by `withDerivedAgentsMetaNodeDefaults`
39
+ // in packages/shared/src/schemas/agents-meta.ts. We accept both singular
40
+ // and plural defensively so a future schema change to plurals doesn't
41
+ // silently break the filter.
42
+ const CONTRACT_REQUIRED_TYPES = new Set([
43
+ "decision",
44
+ "decisions",
45
+ "pitfall",
46
+ "pitfalls",
47
+ ]);
48
+
49
+ /**
50
+ * Build a Map<stable_id, knowledge_type> from <projectRoot>/.fabric/agents.meta.json.
51
+ *
52
+ * Never throws — missing file, malformed JSON, missing nodes key, etc. all
53
+ * yield an empty Map. The caller's downstream filter then becomes a no-op
54
+ * (no id resolves → no reminders).
55
+ *
56
+ * @param {string} projectRoot - workspace root
57
+ * @returns {Map<string, string>} stable_id → knowledge_type (singular)
58
+ */
59
+ function readKnowledgeTypeMap(projectRoot) {
60
+ const out = new Map();
61
+ if (typeof projectRoot !== "string" || projectRoot.length === 0) return out;
62
+
63
+ const metaPath = join(projectRoot, FABRIC_DIR, AGENTS_META_FILE);
64
+ if (!existsSync(metaPath)) return out;
65
+
66
+ let raw;
67
+ try {
68
+ raw = readFileSync(metaPath, "utf8");
69
+ } catch {
70
+ return out;
71
+ }
72
+
73
+ let parsed;
74
+ try {
75
+ parsed = JSON.parse(raw);
76
+ } catch {
77
+ return out;
78
+ }
79
+
80
+ if (parsed === null || typeof parsed !== "object") return out;
81
+ const nodes = parsed.nodes;
82
+ if (nodes === null || typeof nodes !== "object") return out;
83
+
84
+ for (const [id, node] of Object.entries(nodes)) {
85
+ if (node === null || typeof node !== "object") continue;
86
+ const description = node.description;
87
+ if (description === null || typeof description !== "object") continue;
88
+ const kt = description.knowledge_type;
89
+ if (typeof kt !== "string" || kt.length === 0) continue;
90
+ out.set(id, kt);
91
+ }
92
+
93
+ return out;
94
+ }
95
+
96
+ /**
97
+ * Scan parsed assistant turns for cites that should have a contract but
98
+ * don't, returning the reminder lines to emit.
99
+ *
100
+ * Filter (all must hold for a given index i within a turn):
101
+ * 1. cite_tags includes "recalled" (turn-level — applies to the cited id)
102
+ * 2. cite_commitments[i].operators is empty AND cite_commitments[i].skip_reason is null
103
+ * 3. idTypeMap.get(cite_ids[i]) is in {decision, pitfall}
104
+ *
105
+ * Tag-level filter clarification: rc.20 cite_tags is parallel to ALL parsed
106
+ * lines (including sentinels), but for the contract-missing reminder we use
107
+ * the turn-level semantic — if the assistant tagged the cite as [recalled],
108
+ * the operator-or-skip contract applies. Per TASK-04 invariant, cite_ids and
109
+ * cite_commitments are parallel index-aligned arrays (length-N each).
110
+ *
111
+ * Sentinel turns (cite_ids=[], cite_tags=["none"]) contribute no offenders
112
+ * because the cite_ids loop has zero iterations.
113
+ *
114
+ * Offenders are deduplicated by id across the entire turn array; multiple
115
+ * turns citing the same id yield ONE reminder line.
116
+ *
117
+ * @param {Object} args
118
+ * @param {Array<{cite_ids: string[], cite_tags: string[], cite_commitments: Array<{operators: Array<unknown>, skip_reason: string|null}>}>} args.assistant_turns
119
+ * @param {Map<string, string>} args.idTypeMap
120
+ * @returns {string[]} reminder lines (empty when no offenders)
121
+ */
122
+ function formatContractMissingReminders({ assistant_turns, idTypeMap }) {
123
+ if (!Array.isArray(assistant_turns) || assistant_turns.length === 0) return [];
124
+ if (!(idTypeMap instanceof Map) || idTypeMap.size === 0) return [];
125
+
126
+ const offenders = new Set();
127
+
128
+ for (const turn of assistant_turns) {
129
+ if (turn === null || typeof turn !== "object") continue;
130
+ const citeIds = Array.isArray(turn.cite_ids) ? turn.cite_ids : [];
131
+ const citeTags = Array.isArray(turn.cite_tags) ? turn.cite_tags : [];
132
+ const commitments = Array.isArray(turn.cite_commitments) ? turn.cite_commitments : [];
133
+
134
+ // Turn-level: the [recalled] tag must appear in the turn's tag set.
135
+ if (!citeTags.includes("recalled")) continue;
136
+
137
+ // Iterate by cite_ids.length — sentinel entries don't have ids so they
138
+ // contribute zero iterations even if cite_tags carries "none".
139
+ for (let i = 0; i < citeIds.length; i += 1) {
140
+ const id = citeIds[i];
141
+ if (typeof id !== "string" || id.length === 0) continue;
142
+
143
+ const type = idTypeMap.get(id);
144
+ if (!CONTRACT_REQUIRED_TYPES.has(type)) continue;
145
+
146
+ const commitment = commitments[i];
147
+ if (commitment === null || typeof commitment !== "object") continue;
148
+ const operators = Array.isArray(commitment.operators) ? commitment.operators : [];
149
+ const skipReason = commitment.skip_reason;
150
+ const hasContract = operators.length > 0 || (typeof skipReason === "string" && skipReason.length > 0);
151
+ if (hasContract) continue;
152
+
153
+ offenders.add(id);
154
+ }
155
+ }
156
+
157
+ if (offenders.size === 0) return [];
158
+
159
+ // Stable order: insertion order is the order ids first appeared across turns.
160
+ const reminders = [];
161
+ for (const id of offenders) {
162
+ reminders.push(
163
+ `⚠ KB: ${id} cited as [recalled] but missing contract; add \`→ edit:<glob>\` or \`→ skip:<reason>\` next turn`,
164
+ );
165
+ }
166
+ return reminders;
167
+ }
168
+
169
+ module.exports = {
170
+ readKnowledgeTypeMap,
171
+ formatContractMissingReminders,
172
+ CONTRACT_REQUIRED_TYPES,
173
+ };
@@ -0,0 +1,118 @@
1
+ // v2.0.0-rc.24 TASK-04: CJS twin of packages/shared/src/cite-line-parser.ts.
2
+ //
3
+ // Hook runtime has NO node_modules access, so the shared TS module cannot be
4
+ // imported. This file is a hand-authored CJS mirror; behavioral parity is
5
+ // asserted by packages/cli/__tests__/cite-line-parser-parity.test.ts which
6
+ // runs both implementations against the same corpus and asserts identical
7
+ // output. Any drift between this file and ../../shared/src/cite-line-parser.ts
8
+ // MUST be reflected in BOTH files plus the parity-test corpus, otherwise the
9
+ // parity test fails and blocks the commit.
10
+ //
11
+ // Why a hand-authored twin (not transpile-at-install or string-template inject)?
12
+ // - tsup/esbuild are CLI build-time deps, NOT install-time deps; bundling
13
+ // them into the install pipeline grows the user-facing footprint.
14
+ // - The parser is small (≤150 LOC), pure (zero deps), and rarely changes —
15
+ // hand-syncing is cheaper than introducing transpile machinery.
16
+ // - The existing `installHookLibs` pipeline auto-copies every `.cjs` under
17
+ // templates/hooks/lib/ to each client's hooks/lib/ dir, so this file
18
+ // auto-ships to cc/codex/cursor with no install pipeline change.
19
+ //
20
+ // Vocabulary contract (mirrored 1:1 with the TS source):
21
+ // - cite_tags enum: planned | recalled | chained-from | dismissed | none
22
+ // - operator kinds: edit | not_edit | require | forbid
23
+ // (source token `!edit:` → schema kind `not_edit`)
24
+ // - skip:<reason> captures everything after the first colon, so
25
+ // `skip:other:non-codifiable` yields skip_reason="other:non-codifiable".
26
+ // - Index contract: cite_commitments[i] ↔ cite_ids[i]. Sentinel `KB: none`
27
+ // contributes a "none" cite_tag only — no id, no commitment.
28
+
29
+ const ID_RE = /^K[TP]-[A-Z]+-\d+$/;
30
+ const SENTINEL_RE = /^KB:\s*none\b\s*(?:\[[^\]]*\])?\s*$/i;
31
+ const FULL_RE =
32
+ /^KB:\s+(K[TP]-[A-Z]+-\d+)(?:\s+\(([^)]*)\))?(?:\s+\[([^\]]+)\])?(?:\s+→\s*(.+))?\s*$/;
33
+
34
+ const ALLOWED_TAGS = new Set([
35
+ "planned",
36
+ "recalled",
37
+ "chained-from",
38
+ "dismissed",
39
+ "none",
40
+ ]);
41
+
42
+ function parseTag(rawTag) {
43
+ if (!rawTag) return "none";
44
+ // Tags may carry tails like `chained-from KT-DEC-0001` or
45
+ // `dismissed:scope-mismatch`; head token (whitespace/colon-bounded) wins.
46
+ const head = rawTag.trim().split(/[\s:]+/)[0].toLowerCase();
47
+ return ALLOWED_TAGS.has(head) ? head : "none";
48
+ }
49
+
50
+ function parseContractTail(tail) {
51
+ const result = { operators: [], skip_reason: null };
52
+ if (!tail) return result;
53
+ const tokens = tail.trim().split(/\s+/).filter((t) => t.length > 0);
54
+ for (const token of tokens) {
55
+ // skip:<reason> — reason may itself contain a colon (skip:other:<text>).
56
+ const skipMatch = token.match(/^skip:(.+)$/i);
57
+ if (skipMatch) {
58
+ if (result.skip_reason === null) result.skip_reason = skipMatch[1];
59
+ continue;
60
+ }
61
+ // !edit:<target> → schema kind "not_edit".
62
+ const notEditMatch = token.match(/^!edit:(.+)$/i);
63
+ if (notEditMatch) {
64
+ result.operators.push({ kind: "not_edit", target: notEditMatch[1] });
65
+ continue;
66
+ }
67
+ const opMatch = token.match(/^(edit|require|forbid):(.+)$/i);
68
+ if (opMatch) {
69
+ result.operators.push({
70
+ kind: opMatch[1].toLowerCase(),
71
+ target: opMatch[2],
72
+ });
73
+ }
74
+ // Unknown token → forward-compat drop.
75
+ }
76
+ return result;
77
+ }
78
+
79
+ function parseLine(line) {
80
+ const trimmed = line.trim();
81
+ if (trimmed.length === 0) return null;
82
+ if (SENTINEL_RE.test(trimmed)) {
83
+ return { id: null, tag: "none", commitment: null };
84
+ }
85
+ const fullMatch = trimmed.match(FULL_RE);
86
+ if (fullMatch) {
87
+ const id = fullMatch[1];
88
+ if (!ID_RE.test(id)) return null;
89
+ return {
90
+ id,
91
+ tag: parseTag(fullMatch[3]),
92
+ commitment: parseContractTail(fullMatch[4]),
93
+ };
94
+ }
95
+ return null;
96
+ }
97
+
98
+ /**
99
+ * Parse one or more newline-separated `KB:` cite lines into structured arrays
100
+ * matching the assistant_turn_observed event-ledger fields. Tolerates
101
+ * whitespace, CR/LF, blank lines, interleaved prose. Never throws.
102
+ */
103
+ function parseCiteLine(raw) {
104
+ const result = { cite_ids: [], cite_tags: [], cite_commitments: [] };
105
+ if (typeof raw !== "string") return result;
106
+ for (const line of raw.split(/\r?\n/)) {
107
+ const parsed = parseLine(line);
108
+ if (!parsed) continue;
109
+ result.cite_tags.push(parsed.tag);
110
+ if (parsed.id !== null) result.cite_ids.push(parsed.id);
111
+ if (parsed.commitment !== null) {
112
+ result.cite_commitments.push(parsed.commitment);
113
+ }
114
+ }
115
+ return result;
116
+ }
117
+
118
+ module.exports = { parseCiteLine };