@agenr/agenr-plugin 1.7.4 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,115 @@
1
1
  import {
2
+ EMBEDDING_DIMENSIONS,
3
+ ENTRY_SELECT_COLUMNS,
2
4
  ENTRY_TYPES,
3
5
  EPISODE_ACTIVITY_LEVELS,
4
6
  EXPIRY_LEVELS,
5
- composeEmbeddingText
6
- } from "./chunk-NIQKTINU.js";
7
+ VECTOR_INDEX_NAME,
8
+ applyClaimKeyLifecycle,
9
+ buildActiveEntryClause,
10
+ buildExtractedClaimKeyLifecycle,
11
+ buildInferredIngestClaimKeySupportContext,
12
+ buildManualClaimKeyLifecycle,
13
+ buildPrecomputedClaimKeyLifecycle,
14
+ composeEmbeddingText,
15
+ hasPrecomputedClaimKeyLifecycleFields,
16
+ mapEntryRow,
17
+ parseClaimKeyConfidence,
18
+ parseClaimKeySource,
19
+ parseClaimKeyStatus,
20
+ parseClaimSupportMode,
21
+ readNumber,
22
+ readOptionalString,
23
+ readRequiredString,
24
+ validateTemporalValidityRange
25
+ } from "./chunk-LVDQXSHP.js";
7
26
  import {
8
- parseRelativeDate
9
- } from "./chunk-7WL5EAQZ.js";
27
+ compactClaimKey,
28
+ describeClaimKeyNormalizationFailure,
29
+ describeExtractedClaimKeyRejection,
30
+ inspectClaimKey,
31
+ isTrustedClaimKeyForCleanup,
32
+ normalizeClaimKey,
33
+ normalizeClaimKeySegment,
34
+ parseRelativeDate,
35
+ resolveClaimSlotPolicy,
36
+ validateExtractedClaimKey
37
+ } from "./chunk-GUDCFFRV.js";
10
38
 
11
39
  // src/adapters/openclaw/transcript/parser.ts
12
40
  import { createHash } from "crypto";
13
41
  import * as fs2 from "fs/promises";
14
42
 
43
+ // src/adapters/openclaw/session/session-id.ts
44
+ import path from "path";
45
+ function deriveOpenClawSessionIdFromFilePath(sessionFile, logger) {
46
+ const normalizedSessionFile = sessionFile.trim();
47
+ if (normalizedSessionFile.length === 0) {
48
+ debugLog(logger, "session-id", "cannot derive session id from empty session file path");
49
+ return void 0;
50
+ }
51
+ const fileName = path.basename(normalizedSessionFile);
52
+ const sessionId = fileName.replace(/\.jsonl(?:\..*)?$/i, "").trim();
53
+ debugLog(logger, "session-id", `derived session id "${sessionId || "<empty>"}" from file=${normalizedSessionFile}`);
54
+ return sessionId.length > 0 ? sessionId : void 0;
55
+ }
56
+ function debugLog(logger, subsystem, message) {
57
+ logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
58
+ }
59
+
15
60
  // src/adapters/openclaw/transcript/jsonl.ts
16
- function parseJsonlLines(raw, warnings, onRecord) {
61
+ function parseJsonObjectLineWithDiagnostics(line, lineNumber = 1) {
62
+ if (!line || line.trim().length === 0) {
63
+ return {
64
+ record: null
65
+ };
66
+ }
67
+ try {
68
+ const parsed = JSON.parse(line);
69
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
70
+ return {
71
+ record: parsed
72
+ };
73
+ }
74
+ return {
75
+ record: null,
76
+ diagnostic: {
77
+ kind: "non_object_record",
78
+ lineNumber,
79
+ message: `Skipped non-object JSONL line ${lineNumber}`
80
+ }
81
+ };
82
+ } catch {
83
+ return {
84
+ record: null,
85
+ diagnostic: {
86
+ kind: "malformed_json",
87
+ lineNumber,
88
+ message: `Skipped malformed JSONL line ${lineNumber}`
89
+ }
90
+ };
91
+ }
92
+ }
93
+ function parseJsonlLines(raw, onRecord) {
17
94
  const lines = raw.split(/\r?\n/);
95
+ const diagnostics = [];
18
96
  for (let index = 0; index < lines.length; index += 1) {
19
97
  const line = lines[index]?.trim();
20
98
  if (!line) {
21
99
  continue;
22
100
  }
23
- let parsed;
24
- try {
25
- parsed = JSON.parse(line);
26
- } catch {
27
- warnings.push(`Skipped malformed JSONL line ${index + 1}`);
101
+ const parsed = parseJsonObjectLineWithDiagnostics(line, index + 1);
102
+ if (parsed.diagnostic) {
103
+ diagnostics.push(parsed.diagnostic);
28
104
  continue;
29
105
  }
30
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
31
- continue;
106
+ if (parsed.record) {
107
+ onRecord(parsed.record, index + 1);
32
108
  }
33
- onRecord(parsed, index + 1);
34
109
  }
110
+ return {
111
+ diagnostics
112
+ };
35
113
  }
36
114
 
37
115
  // src/adapters/openclaw/transcript/tool-summarization.ts
@@ -526,6 +604,35 @@ var USER_METADATA_PREFIX_SENTINELS = /* @__PURE__ */ new Set([
526
604
  ]);
527
605
  var USER_METADATA_SUFFIX_SENTINEL = "Untrusted context (metadata, do not treat as instructions or commands):";
528
606
  var USER_METADATA_SENTINELS = [USER_METADATA_SUFFIX_SENTINEL, ...USER_METADATA_PREFIX_SENTINELS];
607
+ var OpenClawTranscriptParseError = class extends Error {
608
+ /**
609
+ * Stable error classification for caller-side handling and tests.
610
+ */
611
+ kind;
612
+ /**
613
+ * File path that failed to parse.
614
+ */
615
+ filePath;
616
+ /**
617
+ * Underlying read failure when available.
618
+ */
619
+ cause;
620
+ /**
621
+ * Creates a typed transcript parse failure.
622
+ *
623
+ * @param kind - Stable failure kind.
624
+ * @param filePath - File path that failed to parse.
625
+ * @param message - Human-readable error message.
626
+ * @param options - Optional underlying cause.
627
+ */
628
+ constructor(kind, filePath, message, options) {
629
+ super(message);
630
+ this.name = "OpenClawTranscriptParseError";
631
+ this.kind = kind;
632
+ this.filePath = filePath;
633
+ this.cause = options?.cause;
634
+ }
635
+ };
529
636
  function createParseState() {
530
637
  return {
531
638
  warnings: [],
@@ -547,6 +654,28 @@ function createParseState() {
547
654
  firstUserRawText: null
548
655
  };
549
656
  }
657
+ function toTranscriptDiagnostic(diagnostic) {
658
+ return {
659
+ kind: diagnostic.kind,
660
+ lineNumber: diagnostic.lineNumber,
661
+ message: diagnostic.message
662
+ };
663
+ }
664
+ function formatTranscriptDiagnosticWarning(diagnostic) {
665
+ return diagnostic.message;
666
+ }
667
+ async function readTranscriptFileStrict(filePath) {
668
+ try {
669
+ return await fs2.readFile(filePath, "utf8");
670
+ } catch (error) {
671
+ if (isFileNotFound(error)) {
672
+ throw new OpenClawTranscriptParseError("missing_file", filePath, `Transcript file not found: ${filePath}`, { cause: error });
673
+ }
674
+ throw new OpenClawTranscriptParseError("unreadable_file", filePath, `Could not read transcript file ${filePath}: ${formatErrorMessage(error)}`, {
675
+ cause: error
676
+ });
677
+ }
678
+ }
550
679
  function extractRawMessageText(content) {
551
680
  if (typeof content === "string") {
552
681
  return content;
@@ -768,7 +897,7 @@ function handleMessageRecord(state, record, message) {
768
897
  }
769
898
  if (role === "system") {
770
899
  state.stats.systemDropped += 1;
771
- return;
900
+ return "known_skip";
772
901
  }
773
902
  const timestamp = extractTimestamp(record) ?? extractTimestamp(message);
774
903
  if (role === "user") {
@@ -778,14 +907,14 @@ function handleMessageRecord(state, record, message) {
778
907
  }
779
908
  const text = stripOpenClawUserMetadata(message.content);
780
909
  if (!text) {
781
- return;
910
+ return "known_skip";
782
911
  }
783
912
  if (isPureBase64(text)) {
784
913
  state.stats.base64Dropped += 1;
785
- return;
914
+ return "known_skip";
786
915
  }
787
916
  pushMessage(state.messages, "user", text, timestamp);
788
- return;
917
+ return "accepted";
789
918
  }
790
919
  if (role === "assistant") {
791
920
  const toolCalls = extractToolCallBlocks(message.content);
@@ -798,48 +927,50 @@ function handleMessageRecord(state, record, message) {
798
927
  const assistantText = [...extractAssistantTextParts(message.content), ...toolCalls.map((toolCall) => summarizeToolCall(toolCall))].join(" ").trim();
799
928
  addModelUsed(state, message.model);
800
929
  if (!assistantText) {
801
- return;
930
+ return "known_skip";
802
931
  }
803
932
  if (isPureBase64(assistantText)) {
804
933
  state.stats.base64Dropped += 1;
805
- return;
934
+ return "known_skip";
806
935
  }
807
936
  pushMessage(state.messages, "assistant", truncateWithMarker(assistantText, 5e3), timestamp);
808
- return;
937
+ return "accepted";
809
938
  }
810
939
  if (role !== "toolResult") {
811
- return;
940
+ return "structurally_invalid";
812
941
  }
813
942
  const toolContext = resolveToolContext(state, message);
814
943
  const toolName = getString(message.name) ?? getString(message.tool) ?? getString(record.name) ?? getString(record.tool) ?? toolContext?.name;
815
944
  const toolArgs = toolContext?.args ?? {};
816
945
  const toolText = normalizeMessageText(message.content);
817
946
  if (!toolText) {
818
- return;
947
+ return "known_skip";
819
948
  }
820
949
  if (isPureBase64(toolText)) {
821
950
  state.stats.base64Dropped += 1;
822
- return;
951
+ return "known_skip";
823
952
  }
824
953
  const decision = shouldKeepToolResult(toolName, toolText, TOOL_RESULT_POLICY);
825
954
  if (decision.keep) {
826
955
  state.stats.toolResultsKept += 1;
827
956
  pushMessage(state.messages, "assistant", decision.truncateTo ? truncateWithMarker(toolText, decision.truncateTo) : toolText, timestamp);
828
- return;
957
+ return "accepted";
829
958
  }
830
959
  state.stats.toolResultsDropped += 1;
831
960
  pushMessage(state.messages, "assistant", toolResultPlaceholder(toolName ?? "unknown", toolArgs), timestamp);
961
+ return "accepted";
832
962
  }
833
963
  function handleRecord(state, record) {
834
964
  if (record.type === "session") {
835
965
  state.sessionId = getString(record.id) ?? state.sessionId;
836
966
  state.sessionTimestamp = extractTimestamp(record) ?? state.sessionTimestamp;
837
967
  state.sessionLabel = normalizeSessionLabel(getString(record.conversation_label) ?? "") ?? state.sessionLabel;
968
+ state.workingDirectory = getString(record.cwd) ?? state.workingDirectory;
838
969
  addModelUsed(state, record.model);
839
970
  if (!state.surfaceDetected) {
840
971
  setDetectedSurface(state, readInboundSurface(record));
841
972
  }
842
- return;
973
+ return "accepted";
843
974
  }
844
975
  if (!state.surfaceDetected) {
845
976
  setDetectedSurface(state, readInboundSurface(record));
@@ -847,21 +978,30 @@ function handleRecord(state, record) {
847
978
  if (record.type === "model_change") {
848
979
  addModelUsed(state, record.modelId);
849
980
  state.stats.skippedRecordTypes += 1;
850
- return;
981
+ return "known_skip";
851
982
  }
852
983
  if (typeof record.type === "string" && SKIPPED_RECORD_TYPES.has(record.type)) {
853
984
  state.stats.skippedRecordTypes += 1;
854
- return;
985
+ return "known_skip";
855
986
  }
856
987
  const message = asRecord(record.message);
857
988
  if (!message) {
858
- return;
989
+ return "structurally_invalid";
859
990
  }
860
- handleMessageRecord(state, record, message);
991
+ return handleMessageRecord(state, record, message);
861
992
  }
862
993
  function buildFilterWarning(stats) {
863
994
  return `Filtered transcript: ${stats.toolResultsDropped} tool results dropped, ${stats.toolResultsKept} kept, ${stats.systemDropped} system dropped, ${stats.base64Dropped} base64 dropped.`;
864
995
  }
996
+ function isFileNotFound(error) {
997
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
998
+ }
999
+ function formatErrorMessage(error) {
1000
+ if (error instanceof Error) {
1001
+ return error.message;
1002
+ }
1003
+ return String(error);
1004
+ }
865
1005
  var OpenClawTranscriptParser = class {
866
1006
  /**
867
1007
  * Parses an OpenClaw JSONL transcript file into agenr transcript data.
@@ -871,13 +1011,23 @@ var OpenClawTranscriptParser = class {
871
1011
  * @returns Parsed transcript messages, warnings, and metadata.
872
1012
  */
873
1013
  async parseFile(filePath, options) {
874
- const raw = await fs2.readFile(filePath, "utf8");
1014
+ const raw = await readTranscriptFileStrict(filePath);
875
1015
  const verbose = options?.verbose === true;
876
1016
  const state = createParseState();
877
1017
  const transcriptHash = createHash("sha256").update(raw).digest("hex");
878
- parseJsonlLines(raw, state.warnings, (record) => {
879
- handleRecord(state, record);
1018
+ const diagnostics = [];
1019
+ const jsonlResult = parseJsonlLines(raw, (record, lineNumber) => {
1020
+ const outcome = handleRecord(state, record);
1021
+ if (outcome === "structurally_invalid") {
1022
+ diagnostics.push({
1023
+ kind: "structurally_invalid_record",
1024
+ lineNumber,
1025
+ message: `Skipped structurally invalid transcript record on line ${lineNumber}`
1026
+ });
1027
+ }
880
1028
  });
1029
+ diagnostics.push(...jsonlResult.diagnostics.map(toTranscriptDiagnostic));
1030
+ state.warnings.push(...diagnostics.map(formatTranscriptDiagnosticWarning));
881
1031
  if (!state.surfaceDetected && state.firstUserRawText) {
882
1032
  setDetectedSurface(state, inferSurfaceFromContent(state.firstUserRawText));
883
1033
  }
@@ -887,6 +1037,7 @@ var OpenClawTranscriptParser = class {
887
1037
  }
888
1038
  const startedAt = state.sessionTimestamp ?? state.messages[0]?.timestamp ?? fallbackTimestamp;
889
1039
  const endedAt = state.messages[state.messages.length - 1]?.timestamp ?? state.sessionTimestamp ?? fallbackTimestamp;
1040
+ const stableSessionId = state.sessionId ?? deriveOpenClawSessionIdFromFilePath(filePath);
890
1041
  return {
891
1042
  messages: state.messages,
892
1043
  warnings: state.warnings,
@@ -899,515 +1050,1446 @@ var OpenClawTranscriptParser = class {
899
1050
  transcriptHash,
900
1051
  modelsUsed: state.modelsUsed.length > 0 ? state.modelsUsed : void 0,
901
1052
  reconstructedSurface: state.detectedSurface,
902
- surfaceReconstructionSource: state.surfaceDetected ? "reconstructed" : "none"
1053
+ surfaceReconstructionSource: state.surfaceDetected ? "reconstructed" : "none",
1054
+ sourceIdentity: stableSessionId ? `openclaw-session:${stableSessionId}` : void 0,
1055
+ sourceIdentityKind: stableSessionId ? "openclaw_session" : void 0,
1056
+ workingDirectory: state.workingDirectory
903
1057
  }
904
1058
  };
905
1059
  }
906
1060
  };
907
1061
  var openClawTranscriptParser = new OpenClawTranscriptParser();
908
1062
 
909
- // src/core/claim-key.ts
910
- var UNKNOWN_SEGMENT = "unknown";
911
- var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user", "we", "our_team", "the_project", "this_project"]);
912
- var GENERIC_ENTITIES = /* @__PURE__ */ new Set([
1063
+ // src/adapters/db/openclaw-repository.ts
1064
+ var ZERO_VECTOR = JSON.stringify(Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0));
1065
+ function createOpenClawRepository(executor, options = {}) {
1066
+ return {
1067
+ listCoreEntries: async (limit) => listCoreEntries(executor, limit),
1068
+ findEntryBySubject: async (subject) => findEntryBySubject(executor, subject),
1069
+ findMostRecentEntry: async () => findMostRecentEntry(executor),
1070
+ getEntryTrace: async (entryId) => getEntryTrace(executor, entryId, options.claimSlotPolicyConfig),
1071
+ getMemoryStatusSnapshot: async () => getMemoryStatusSnapshot(executor),
1072
+ probeVectorAvailability: async () => probeVectorAvailability(executor)
1073
+ };
1074
+ }
1075
+ async function listCoreEntries(executor, limit) {
1076
+ if (limit <= 0) {
1077
+ return [];
1078
+ }
1079
+ const result = await executor.execute({
1080
+ sql: `
1081
+ SELECT
1082
+ ${ENTRY_SELECT_COLUMNS}
1083
+ FROM entries
1084
+ WHERE ${buildActiveEntryClause()}
1085
+ AND expiry = 'core'
1086
+ ORDER BY importance DESC, created_at DESC
1087
+ LIMIT ?
1088
+ `,
1089
+ args: [limit]
1090
+ });
1091
+ return result.rows.map((row) => mapEntryRow(row));
1092
+ }
1093
+ async function findEntryBySubject(executor, subject) {
1094
+ const normalizedSubject = subject.trim();
1095
+ if (normalizedSubject.length === 0) {
1096
+ return null;
1097
+ }
1098
+ const result = await executor.execute({
1099
+ sql: `
1100
+ SELECT
1101
+ ${ENTRY_SELECT_COLUMNS},
1102
+ CASE
1103
+ WHEN lower(subject) = lower(?) THEN 0
1104
+ WHEN lower(subject) LIKE lower(?) THEN 1
1105
+ ELSE 2
1106
+ END AS match_rank
1107
+ FROM entries
1108
+ WHERE lower(subject) = lower(?)
1109
+ OR lower(subject) LIKE lower(?)
1110
+ ORDER BY match_rank ASC, created_at DESC
1111
+ LIMIT 1
1112
+ `,
1113
+ args: [normalizedSubject, `%${normalizedSubject}%`, normalizedSubject, `%${normalizedSubject}%`]
1114
+ });
1115
+ const row = result.rows[0];
1116
+ return row ? mapEntryRow(row) : null;
1117
+ }
1118
+ async function findMostRecentEntry(executor) {
1119
+ const result = await executor.execute({
1120
+ sql: `
1121
+ SELECT
1122
+ ${ENTRY_SELECT_COLUMNS}
1123
+ FROM entries
1124
+ ORDER BY created_at DESC
1125
+ LIMIT 1
1126
+ `
1127
+ });
1128
+ const row = result.rows[0];
1129
+ return row ? mapEntryRow(row) : null;
1130
+ }
1131
+ async function getEntryTrace(executor, entryId, claimSlotPolicyConfig) {
1132
+ const entry = await getEntryByIdIncludingInactive(executor, entryId);
1133
+ if (!entry) {
1134
+ return null;
1135
+ }
1136
+ const [supersededBy, supersedes, claimFamily, recallEvents] = await Promise.all([
1137
+ entry.superseded_by ? getEntryByIdIncludingInactive(executor, entry.superseded_by) : Promise.resolve(null),
1138
+ listSupersededEntries(executor, entry.id),
1139
+ entry.claim_key ? getClaimFamily(executor, entry.claim_key, claimSlotPolicyConfig) : Promise.resolve(void 0),
1140
+ listRecallEvents(executor, entry.id)
1141
+ ]);
1142
+ return {
1143
+ entry,
1144
+ ...supersededBy ? { supersededBy } : {},
1145
+ supersedes,
1146
+ ...claimFamily ? { claimFamily } : {},
1147
+ recallEvents
1148
+ };
1149
+ }
1150
+ async function getMemoryStatusSnapshot(executor) {
1151
+ const result = await executor.execute({
1152
+ sql: `
1153
+ SELECT
1154
+ COUNT(*) AS active_entries,
1155
+ SUM(CASE WHEN expiry = 'core' THEN 1 ELSE 0 END) AS core_entries,
1156
+ COUNT(DISTINCT source_file) AS source_files
1157
+ FROM entries
1158
+ WHERE ${buildActiveEntryClause()}
1159
+ `
1160
+ });
1161
+ const row = result.rows[0];
1162
+ if (!row) {
1163
+ return {
1164
+ activeEntries: 0,
1165
+ coreEntries: 0,
1166
+ sourceFiles: 0
1167
+ };
1168
+ }
1169
+ return {
1170
+ activeEntries: readNumber(row, "active_entries", 0),
1171
+ coreEntries: readNumber(row, "core_entries", 0),
1172
+ sourceFiles: readNumber(row, "source_files", 0)
1173
+ };
1174
+ }
1175
+ async function probeVectorAvailability(executor) {
1176
+ try {
1177
+ await executor.execute({
1178
+ sql: `
1179
+ SELECT COUNT(*) AS matches
1180
+ FROM vector_top_k('${VECTOR_INDEX_NAME}', vector32(?), ?) AS matches
1181
+ `,
1182
+ args: [ZERO_VECTOR, 1]
1183
+ });
1184
+ return true;
1185
+ } catch {
1186
+ return false;
1187
+ }
1188
+ }
1189
+ async function getEntryByIdIncludingInactive(executor, entryId) {
1190
+ const normalizedId = entryId.trim();
1191
+ if (normalizedId.length === 0) {
1192
+ return null;
1193
+ }
1194
+ const result = await executor.execute({
1195
+ sql: `
1196
+ SELECT
1197
+ ${ENTRY_SELECT_COLUMNS}
1198
+ FROM entries
1199
+ WHERE id = ?
1200
+ LIMIT 1
1201
+ `,
1202
+ args: [normalizedId]
1203
+ });
1204
+ const row = result.rows[0];
1205
+ return row ? mapEntryRow(row) : null;
1206
+ }
1207
+ async function listSupersededEntries(executor, entryId) {
1208
+ const result = await executor.execute({
1209
+ sql: `
1210
+ SELECT
1211
+ ${ENTRY_SELECT_COLUMNS}
1212
+ FROM entries
1213
+ WHERE superseded_by = ?
1214
+ ORDER BY created_at DESC
1215
+ `,
1216
+ args: [entryId]
1217
+ });
1218
+ return result.rows.map((row) => mapEntryRow(row));
1219
+ }
1220
+ async function getClaimFamily(executor, claimKey, claimSlotPolicyConfig) {
1221
+ const normalizedClaimKey = claimKey.trim();
1222
+ if (normalizedClaimKey.length === 0) {
1223
+ return void 0;
1224
+ }
1225
+ const result = await executor.execute({
1226
+ sql: `
1227
+ SELECT
1228
+ ${ENTRY_SELECT_COLUMNS}
1229
+ FROM entries
1230
+ WHERE claim_key = ?
1231
+ ORDER BY created_at ASC, id ASC
1232
+ `,
1233
+ args: [normalizedClaimKey]
1234
+ });
1235
+ const entries = result.rows.map((row) => mapEntryRow(row));
1236
+ const slotPolicy = resolveClaimSlotPolicy(normalizedClaimKey, claimSlotPolicyConfig);
1237
+ return {
1238
+ claimKey: normalizedClaimKey,
1239
+ slotPolicy: slotPolicy.policy,
1240
+ slotPolicyReason: slotPolicy.reason,
1241
+ entries
1242
+ };
1243
+ }
1244
+ async function listRecallEvents(executor, entryId) {
1245
+ const result = await executor.execute({
1246
+ sql: `
1247
+ SELECT
1248
+ query,
1249
+ session_key,
1250
+ recalled_at
1251
+ FROM recall_events
1252
+ WHERE entry_id = ?
1253
+ ORDER BY recalled_at DESC
1254
+ LIMIT 10
1255
+ `,
1256
+ args: [entryId]
1257
+ });
1258
+ return result.rows.map((row) => ({
1259
+ query: readOptionalString(row, "query"),
1260
+ sessionKey: readOptionalString(row, "session_key"),
1261
+ recalledAt: readRequiredString(row, "recalled_at")
1262
+ }));
1263
+ }
1264
+
1265
+ // src/core/store/pipeline.ts
1266
+ import { randomUUID } from "crypto";
1267
+
1268
+ // src/core/supersession.ts
1269
+ function validateSupersessionRules(oldEntry, newEntry) {
1270
+ if (oldEntry.type !== newEntry.type) {
1271
+ return {
1272
+ ok: false,
1273
+ reason: "type_mismatch"
1274
+ };
1275
+ }
1276
+ if (oldEntry.type === "milestone") {
1277
+ return {
1278
+ ok: false,
1279
+ reason: "milestone"
1280
+ };
1281
+ }
1282
+ if (oldEntry.expiry === "core") {
1283
+ return {
1284
+ ok: false,
1285
+ reason: "core_expiry"
1286
+ };
1287
+ }
1288
+ return {
1289
+ ok: true
1290
+ };
1291
+ }
1292
+ function describeSupersessionRuleFailure(reason) {
1293
+ switch (reason) {
1294
+ case "type_mismatch":
1295
+ return "Supersession requires both entries to have the same type.";
1296
+ case "milestone":
1297
+ return "Milestone entries are never superseded automatically.";
1298
+ case "core_expiry":
1299
+ return "Core-expiry entries are never superseded automatically.";
1300
+ }
1301
+ }
1302
+
1303
+ // src/core/claim-key-entity-family.ts
1304
+ var ENTITY_FAMILY_GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
1305
+ "a",
1306
+ "an",
1307
+ "and",
1308
+ "are",
1309
+ "as",
1310
+ "at",
1311
+ "be",
1312
+ "by",
1313
+ "for",
1314
+ "from",
1315
+ "in",
1316
+ "into",
1317
+ "is",
1318
+ "it",
1319
+ "of",
1320
+ "on",
1321
+ "or",
1322
+ "that",
1323
+ "the",
1324
+ "their",
1325
+ "this",
1326
+ "to",
1327
+ "was",
1328
+ "with"
1329
+ ]);
1330
+ var MAX_ATTRIBUTE_BUCKET_SIZE = 12;
1331
+ var MAX_EVIDENCE_VALUES = 6;
1332
+ var CANONICAL_SELECTION_MARGIN = 3;
1333
+ var SINGLETON_ALIAS_MAX_FAMILY_SIZE = 2;
1334
+ var SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT = 3;
1335
+ var SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA = 0.05;
1336
+ var SINGLETON_ALIAS_SCOPE_TOKENS = /* @__PURE__ */ new Set([
1337
+ "agent",
913
1338
  "app",
914
- "company",
915
- "config",
916
- "data",
1339
+ "branch",
1340
+ "build",
1341
+ "cluster",
1342
+ "daemon",
917
1343
  "device",
918
- "entity",
1344
+ "env",
919
1345
  "environment",
920
- "item",
921
- "organization",
922
- "person",
923
- "place",
1346
+ "gateway",
1347
+ "host",
1348
+ "machine",
1349
+ "node",
1350
+ "plugin",
924
1351
  "project",
1352
+ "repo",
1353
+ "repository",
1354
+ "server",
925
1355
  "service",
926
- "setting",
1356
+ "session",
927
1357
  "system",
928
- "team",
929
- "thing",
930
- "user",
931
1358
  "workspace"
932
1359
  ]);
933
- var GENERIC_ATTRIBUTES = /* @__PURE__ */ new Set(["info", "details", "config", "stuff", "thing", "data"]);
934
- var COMPACTION_RELATION_TOKENS = /* @__PURE__ */ new Set([
935
- "after",
936
- "before",
937
- "depend",
938
- "depends",
939
- "follows",
940
- "follow",
941
- "keep",
942
- "keeps",
943
- "maintain",
944
- "maintains",
945
- "need",
946
- "needs",
947
- "precede",
948
- "precedes",
949
- "preserve",
950
- "preserves",
951
- "require",
952
- "required",
953
- "requires",
954
- "retain",
955
- "retains"
956
- ]);
957
- var COMPACTION_BREAK_TOKENS = /* @__PURE__ */ new Set(["about", "across", "and", "between", "during", "for", "from", "into", "onto", "or", "to", "with"]);
958
- var COMPACTION_WEAK_LEADING_TOKENS = /* @__PURE__ */ new Set(["actual", "authoritative", "canonical", "concrete", "current", "durable", "existing", "real"]);
959
- var ACTION_CONDITION_TOKENS = /* @__PURE__ */ new Set(["activate", "activation", "apply", "fire", "launch", "run", "start", "trigger"]);
960
- var TRAILING_OBJECT_COMPACTION_PREPOSITIONS = /* @__PURE__ */ new Set(["about", "for", "from", "into", "onto", "to", "with"]);
961
- var TRAILING_OBJECT_TRANSFER_HEADS = /* @__PURE__ */ new Set([
962
- "access",
963
- "boundary",
964
- "condition",
965
- "contract",
966
- "guide",
967
- "path",
968
- "policy",
969
- "preference",
970
- "process",
971
- "rule",
972
- "schedule",
973
- "support",
974
- "surface",
975
- "window",
976
- "workflow"
977
- ]);
978
- var STABLE_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
979
- "access",
980
- "boundary",
981
- "condition",
982
- "contract",
983
- "default",
984
- "dependency",
985
- "guide",
986
- "mode",
987
- "order",
988
- "path",
989
- "policy",
990
- "preference",
991
- "preservation",
992
- "process",
993
- "requirement",
994
- "rule",
995
- "schedule",
996
- "setting",
997
- "status",
998
- "strategy",
999
- "support",
1000
- "surface",
1001
- "timezone",
1002
- "truth",
1003
- "version",
1004
- "window",
1005
- "workflow"
1006
- ]);
1007
- function normalizeClaimKeySegment(value) {
1008
- return value.trim().toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/_+/g, "_").replace(/^_+|_+$/g, "");
1009
- }
1010
- function normalizeClaimKey(value) {
1011
- const trimmed = value.trim();
1012
- if (trimmed.length === 0) {
1013
- return { ok: false, reason: "empty" };
1360
+ function detectClaimKeyEntityFamilyCandidates(entries) {
1361
+ const profiles = buildTrustedClaimKeyEntityProfiles(entries);
1362
+ if (profiles.size < 2) {
1363
+ return [];
1014
1364
  }
1015
- const slashCount = Array.from(trimmed).filter((character) => character === "/").length;
1016
- if (slashCount === 0) {
1017
- return { ok: false, reason: "missing_separator" };
1365
+ const pairSupport = buildPairSupport(profiles);
1366
+ if (pairSupport.length === 0) {
1367
+ return [];
1018
1368
  }
1019
- if (slashCount !== 1) {
1020
- return { ok: false, reason: "too_many_segments" };
1369
+ const adjacency = /* @__PURE__ */ new Map();
1370
+ for (const support of pairSupport) {
1371
+ const [leftEntity, rightEntity] = support.entityPrefixes;
1372
+ getOrCreateSet(adjacency, leftEntity).add(rightEntity);
1373
+ getOrCreateSet(adjacency, rightEntity).add(leftEntity);
1021
1374
  }
1022
- const [rawEntity = "", rawAttribute = ""] = trimmed.split("/");
1023
- const entity = normalizeClaimKeySegment(rawEntity);
1024
- if (entity.length === 0) {
1025
- return { ok: false, reason: "empty_entity" };
1375
+ const visited = /* @__PURE__ */ new Set();
1376
+ const families = [];
1377
+ for (const entityPrefix of adjacency.keys()) {
1378
+ if (visited.has(entityPrefix)) {
1379
+ continue;
1380
+ }
1381
+ const component = collectConnectedEntityComponent(entityPrefix, adjacency, visited);
1382
+ if (component.length < 2) {
1383
+ continue;
1384
+ }
1385
+ const componentSet = new Set(component);
1386
+ const componentSupport = pairSupport.filter((support) => {
1387
+ const [leftEntity, rightEntity] = support.entityPrefixes;
1388
+ return componentSet.has(leftEntity) && componentSet.has(rightEntity);
1389
+ });
1390
+ const canonicalSelection = selectCanonicalEntityPrefix(component, componentSupport, profiles);
1391
+ const canonicalEntityPrefix = canonicalSelection.canonicalEntityPrefix;
1392
+ const autoConverge = canonicalEntityPrefix !== null && component.filter((entity) => entity !== canonicalEntityPrefix).every((entity) => {
1393
+ const support = findPairSupport(componentSupport, canonicalEntityPrefix, entity);
1394
+ return support?.autoSafe === true;
1395
+ });
1396
+ const componentProfiles = component.map((entity) => profiles.get(entity)).filter((profile) => Boolean(profile));
1397
+ const entryIds = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.entryIds]));
1398
+ const claimKeys = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.claimKeys]));
1399
+ const confidence = componentSupport.length > 0 ? Math.max(...componentSupport.map((support) => support.confidence)) : 0.75;
1400
+ families.push({
1401
+ entityPrefixes: [...component].sort((left, right) => left.localeCompare(right)),
1402
+ entryIds,
1403
+ claimKeys,
1404
+ canonicalEntityPrefix,
1405
+ canonicalSelectionReasons: canonicalSelection.reasons,
1406
+ confidence,
1407
+ autoConverge,
1408
+ unresolvedReason: canonicalSelection.unresolvedReason ?? (autoConverge ? null : "Entity-family evidence is strong enough to stage, but not every alias mapping is low-risk."),
1409
+ pairSupport: componentSupport.sort((left, right) => {
1410
+ const leftKey = left.entityPrefixes.join("::");
1411
+ const rightKey = right.entityPrefixes.join("::");
1412
+ return leftKey.localeCompare(rightKey);
1413
+ })
1414
+ });
1026
1415
  }
1027
- const attribute = normalizeClaimKeySegment(rawAttribute);
1028
- if (attribute.length === 0) {
1029
- return { ok: false, reason: "empty_attribute" };
1416
+ return families.sort((left, right) => {
1417
+ if (right.confidence !== left.confidence) {
1418
+ return right.confidence - left.confidence;
1419
+ }
1420
+ const leftKey = left.entityPrefixes.join("::");
1421
+ const rightKey = right.entityPrefixes.join("::");
1422
+ return leftKey.localeCompare(rightKey);
1423
+ });
1424
+ }
1425
+ function summarizeClaimKeyEntityPrefixStats(observations) {
1426
+ const counts = /* @__PURE__ */ new Map();
1427
+ for (const observation of observations) {
1428
+ const rawClaimKey = observation.claim_key?.trim();
1429
+ if (!rawClaimKey) {
1430
+ continue;
1431
+ }
1432
+ const inspection = inspectClaimKey(rawClaimKey);
1433
+ if (!inspection.normalized) {
1434
+ continue;
1435
+ }
1436
+ const entityPrefix = inspection.normalized.entity;
1437
+ const existing = counts.get(entityPrefix) ?? {
1438
+ entityPrefix,
1439
+ activeEntryCount: 0,
1440
+ trustedEntryCount: 0,
1441
+ tentativeEntryCount: 0,
1442
+ unresolvedEntryCount: 0,
1443
+ legacyEntryCount: 0,
1444
+ deterministicRepairEntryCount: 0,
1445
+ manualEntryCount: 0,
1446
+ modelEntryCount: 0,
1447
+ jsonRetryEntryCount: 0,
1448
+ surgeonFamilyReuseEntryCount: 0
1449
+ };
1450
+ existing.activeEntryCount += 1;
1451
+ switch (observation.claim_key_status) {
1452
+ case "trusted":
1453
+ existing.trustedEntryCount += 1;
1454
+ break;
1455
+ case "tentative":
1456
+ existing.tentativeEntryCount += 1;
1457
+ break;
1458
+ case "unresolved":
1459
+ existing.unresolvedEntryCount += 1;
1460
+ break;
1461
+ default:
1462
+ existing.legacyEntryCount += 1;
1463
+ break;
1464
+ }
1465
+ switch (observation.claim_key_source) {
1466
+ case "deterministic_repair":
1467
+ existing.deterministicRepairEntryCount += 1;
1468
+ break;
1469
+ case "manual":
1470
+ existing.manualEntryCount += 1;
1471
+ break;
1472
+ case "model":
1473
+ existing.modelEntryCount += 1;
1474
+ break;
1475
+ case "json_retry":
1476
+ existing.jsonRetryEntryCount += 1;
1477
+ break;
1478
+ case "surgeon_family_reuse":
1479
+ existing.surgeonFamilyReuseEntryCount += 1;
1480
+ break;
1481
+ default:
1482
+ break;
1483
+ }
1484
+ counts.set(entityPrefix, existing);
1030
1485
  }
1031
- if (entity === UNKNOWN_SEGMENT && attribute === UNKNOWN_SEGMENT) {
1032
- return { ok: false, reason: "unknown_pair" };
1486
+ return [...counts.values()].sort((left, right) => {
1487
+ if (right.activeEntryCount !== left.activeEntryCount) {
1488
+ return right.activeEntryCount - left.activeEntryCount;
1489
+ }
1490
+ if (right.trustedEntryCount !== left.trustedEntryCount) {
1491
+ return right.trustedEntryCount - left.trustedEntryCount;
1492
+ }
1493
+ return left.entityPrefix.localeCompare(right.entityPrefix);
1494
+ });
1495
+ }
1496
+ function detectClaimKeySingletonAliasCandidates(observations) {
1497
+ return detectClaimKeySingletonAliasCandidatesFromStats(summarizeClaimKeyEntityPrefixStats(observations));
1498
+ }
1499
+ function detectClaimKeySingletonAliasCandidatesFromStats(stats) {
1500
+ const candidatesByAlias = /* @__PURE__ */ new Map();
1501
+ const dominantFamilies = stats.filter((profile) => profile.trustedEntryCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT);
1502
+ const aliasFamilies = stats.filter((profile) => {
1503
+ return profile.activeEntryCount > 0 && profile.activeEntryCount <= SINGLETON_ALIAS_MAX_FAMILY_SIZE && profile.trustedEntryCount < profile.activeEntryCount && buildLowTrustEntryCount(profile) >= 1;
1504
+ });
1505
+ for (const aliasProfile of aliasFamilies) {
1506
+ for (const dominantProfile of dominantFamilies) {
1507
+ if (aliasProfile.entityPrefix === dominantProfile.entityPrefix || dominantProfile.activeEntryCount <= aliasProfile.activeEntryCount) {
1508
+ continue;
1509
+ }
1510
+ const candidate = evaluateSingletonAliasCandidate(aliasProfile, dominantProfile);
1511
+ if (!candidate) {
1512
+ continue;
1513
+ }
1514
+ const existing = candidatesByAlias.get(aliasProfile.entityPrefix) ?? [];
1515
+ existing.push(candidate);
1516
+ candidatesByAlias.set(aliasProfile.entityPrefix, existing);
1517
+ }
1033
1518
  }
1034
- return {
1035
- ok: true,
1036
- value: {
1037
- claimKey: `${entity}/${attribute}`,
1038
- entity,
1039
- attribute
1519
+ return [...candidatesByAlias.values()].flatMap(selectBestSingletonAliasCandidate).sort((left, right) => right.confidence - left.confidence || left.aliasEntityPrefix.localeCompare(right.aliasEntityPrefix));
1520
+ }
1521
+ function buildTrustedClaimKeyEntityProfiles(entries) {
1522
+ const profiles = /* @__PURE__ */ new Map();
1523
+ for (const entry of entries) {
1524
+ const rawClaimKey = entry.claim_key?.trim();
1525
+ if (!rawClaimKey) {
1526
+ continue;
1527
+ }
1528
+ const inspection = inspectClaimKey(rawClaimKey);
1529
+ if (!inspection.canonical || !inspection.normalized || inspection.suspectReasons.length > 0) {
1530
+ continue;
1531
+ }
1532
+ const entityPrefix = inspection.normalized.entity;
1533
+ const attribute = inspection.normalized.attribute;
1534
+ const profile = getOrCreateProfile(profiles, entityPrefix);
1535
+ profile.entryIds.add(entry.id);
1536
+ profile.claimKeys.add(inspection.normalized.claimKey);
1537
+ profile.attributeSet.add(attribute);
1538
+ const [attributeHead = attribute] = attribute.split("_");
1539
+ if (attributeHead) {
1540
+ profile.attributeHeadSet.add(attributeHead);
1541
+ }
1542
+ for (const tag of entry.tags) {
1543
+ const normalizedTag = normalizeClaimKeySegment(tag);
1544
+ if (normalizedTag) {
1545
+ profile.tags.add(normalizedTag);
1546
+ }
1547
+ }
1548
+ for (const token of tokenizeGrounding(entry.source_context)) {
1549
+ profile.sourceContextTokens.add(token);
1040
1550
  }
1551
+ for (const token of tokenizeGrounding(entry.subject)) {
1552
+ profile.subjectTokens.add(token);
1553
+ }
1554
+ profile.entryCount += 1;
1555
+ profile.totalQualityScore += entry.quality_score;
1556
+ }
1557
+ return profiles;
1558
+ }
1559
+ function getOrCreateProfile(profiles, entityPrefix) {
1560
+ const existing = profiles.get(entityPrefix);
1561
+ if (existing) {
1562
+ return existing;
1563
+ }
1564
+ const tokenList = entityPrefix.split("_").filter((token) => token.length > 0);
1565
+ const created = {
1566
+ entityPrefix,
1567
+ entryIds: /* @__PURE__ */ new Set(),
1568
+ claimKeys: /* @__PURE__ */ new Set(),
1569
+ attributeSet: /* @__PURE__ */ new Set(),
1570
+ attributeHeadSet: /* @__PURE__ */ new Set(),
1571
+ tags: /* @__PURE__ */ new Set(),
1572
+ sourceContextTokens: /* @__PURE__ */ new Set(),
1573
+ subjectTokens: /* @__PURE__ */ new Set(),
1574
+ entryCount: 0,
1575
+ totalQualityScore: 0,
1576
+ tokenList,
1577
+ sortedTokenSignature: [...tokenList].sort().join("_"),
1578
+ compactSignature: tokenList.join("")
1041
1579
  };
1580
+ profiles.set(entityPrefix, created);
1581
+ return created;
1582
+ }
1583
+ function buildPairSupport(profiles) {
1584
+ const candidatePairs = /* @__PURE__ */ new Set();
1585
+ const attributeBuckets = /* @__PURE__ */ new Map();
1586
+ for (const profile of profiles.values()) {
1587
+ for (const attribute of profile.attributeSet) {
1588
+ const bucket = attributeBuckets.get(attribute);
1589
+ if (bucket) {
1590
+ bucket.push(profile.entityPrefix);
1591
+ } else {
1592
+ attributeBuckets.set(attribute, [profile.entityPrefix]);
1593
+ }
1594
+ }
1595
+ }
1596
+ for (const entities of attributeBuckets.values()) {
1597
+ const normalizedEntities = normalizeStringArray(entities);
1598
+ if (normalizedEntities.length < 2 || normalizedEntities.length > MAX_ATTRIBUTE_BUCKET_SIZE) {
1599
+ continue;
1600
+ }
1601
+ for (let index = 0; index < normalizedEntities.length; index += 1) {
1602
+ const leftEntity = normalizedEntities[index];
1603
+ if (!leftEntity) {
1604
+ continue;
1605
+ }
1606
+ for (let peerIndex = index + 1; peerIndex < normalizedEntities.length; peerIndex += 1) {
1607
+ const rightEntity = normalizedEntities[peerIndex];
1608
+ if (!rightEntity) {
1609
+ continue;
1610
+ }
1611
+ candidatePairs.add(buildPairKey(leftEntity, rightEntity));
1612
+ }
1613
+ }
1614
+ }
1615
+ const support = [];
1616
+ for (const pairKey of candidatePairs) {
1617
+ const [leftEntity = "", rightEntity = ""] = pairKey.split("::");
1618
+ const leftProfile = profiles.get(leftEntity);
1619
+ const rightProfile = profiles.get(rightEntity);
1620
+ if (!leftProfile || !rightProfile) {
1621
+ continue;
1622
+ }
1623
+ const pairSupport = evaluateEntityFamilyPairSupport(leftProfile, rightProfile);
1624
+ if (pairSupport) {
1625
+ support.push(pairSupport);
1626
+ }
1627
+ }
1628
+ return support;
1042
1629
  }
1043
- function compactClaimKey(claimKey) {
1044
- const normalized = normalizeClaimKey(claimKey);
1045
- if (!normalized.ok) {
1630
+ function evaluateEntityFamilyPairSupport(leftProfile, rightProfile) {
1631
+ const sharedAttributes = intersectSets(leftProfile.attributeSet, rightProfile.attributeSet);
1632
+ if (sharedAttributes.length === 0) {
1046
1633
  return null;
1047
1634
  }
1048
- let attributeTokens = normalized.value.attribute.split("_").filter((token) => token.length > 0);
1049
- const entityTokens = normalized.value.entity.split("_").filter((token) => token.length > 0);
1050
- const reasons = [];
1051
- if (entityTokens.length > 0 && startsWithTokens(attributeTokens, entityTokens) && attributeTokens.length > entityTokens.length) {
1052
- attributeTokens = attributeTokens.slice(entityTokens.length);
1053
- reasons.push("removed duplicated entity prefix from attribute");
1054
- }
1055
- if (entityTokens.length > 0 && attributeTokens.length > entityTokens.length + 1 && endsWithTokens(attributeTokens, entityTokens) && TRAILING_OBJECT_COMPACTION_PREPOSITIONS.has(attributeTokens[attributeTokens.length - entityTokens.length - 1] ?? "")) {
1056
- attributeTokens = attributeTokens.slice(0, attributeTokens.length - entityTokens.length - 1);
1057
- reasons.push("removed duplicated entity suffix from attribute");
1058
- }
1059
- const sourceOfTruthCompaction = compactSourceOfTruthAttribute(attributeTokens);
1060
- if (sourceOfTruthCompaction) {
1061
- attributeTokens = sourceOfTruthCompaction.attributeTokens;
1062
- reasons.push(sourceOfTruthCompaction.reason);
1063
- } else {
1064
- const relationCompaction = compactRelationAttribute(attributeTokens);
1065
- if (relationCompaction) {
1066
- attributeTokens = relationCompaction.attributeTokens;
1067
- reasons.push(relationCompaction.reason);
1068
- } else {
1069
- const trailingObjectCompaction = compactTrailingObjectAttribute(attributeTokens);
1070
- if (trailingObjectCompaction) {
1071
- attributeTokens = trailingObjectCompaction.attributeTokens;
1072
- reasons.push(trailingObjectCompaction.reason);
1073
- }
1635
+ const sharedAttributeHeads = intersectSets(leftProfile.attributeHeadSet, rightProfile.attributeHeadSet);
1636
+ const sharedTags = intersectSets(leftProfile.tags, rightProfile.tags);
1637
+ const sharedSourceContextTokens = intersectSets(leftProfile.sourceContextTokens, rightProfile.sourceContextTokens);
1638
+ const sharedSubjectTokens = intersectSets(leftProfile.subjectTokens, rightProfile.subjectTokens);
1639
+ const lexicalRelation = evaluateEntityLexicalRelation(leftProfile, rightProfile);
1640
+ const groundingScore = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0) + (sharedSubjectTokens.length >= 2 ? 1 : 0);
1641
+ const groundingAnchorCount = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0);
1642
+ const qualifies = sharedAttributes.length >= 3 || sharedAttributes.length >= 2 && (lexicalRelation.kind !== null || groundingAnchorCount >= 1) || sharedAttributes.length === 1 && lexicalRelation.kind !== null && groundingAnchorCount >= 1;
1643
+ if (!qualifies) {
1644
+ return null;
1645
+ }
1646
+ const evidence = [
1647
+ {
1648
+ kind: "shared_attribute_overlap",
1649
+ detail: `Shared attributes: ${sharedAttributes.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
1074
1650
  }
1651
+ ];
1652
+ if (sharedAttributeHeads.length >= 2) {
1653
+ evidence.push({
1654
+ kind: "shared_attribute_head_overlap",
1655
+ detail: `Shared attribute families: ${sharedAttributeHeads.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
1656
+ });
1075
1657
  }
1076
- const attribute = attributeTokens.join("_");
1077
- if (attribute.length === 0) {
1078
- return {
1079
- claimKey: normalized.value.claimKey,
1080
- entity: normalized.value.entity,
1081
- attribute: normalized.value.attribute,
1082
- compactedFrom: null,
1083
- reason: null
1084
- };
1658
+ if (sharedTags.length > 0) {
1659
+ evidence.push({
1660
+ kind: "shared_tag_grounding",
1661
+ detail: `Shared tags: ${sharedTags.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
1662
+ });
1663
+ }
1664
+ if (sharedSourceContextTokens.length >= 2) {
1665
+ evidence.push({
1666
+ kind: "shared_source_context_grounding",
1667
+ detail: `Shared source-context tokens: ${sharedSourceContextTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
1668
+ });
1669
+ }
1670
+ if (sharedSubjectTokens.length >= 2) {
1671
+ evidence.push({
1672
+ kind: "shared_subject_grounding",
1673
+ detail: `Shared subject tokens: ${sharedSubjectTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
1674
+ });
1675
+ }
1676
+ if (lexicalRelation.kind && lexicalRelation.detail) {
1677
+ evidence.push({
1678
+ kind: lexicalRelation.kind,
1679
+ detail: lexicalRelation.detail
1680
+ });
1085
1681
  }
1086
- const compactedClaimKey = `${normalized.value.entity}/${attribute}`;
1682
+ const confidence = Math.min(
1683
+ 0.98,
1684
+ 0.48 + Math.min(sharedAttributes.length, 3) * 0.12 + Math.min(groundingScore, 3) * 0.08 + lexicalRelation.strengthScore * 0.05
1685
+ );
1087
1686
  return {
1088
- claimKey: compactedClaimKey,
1089
- entity: normalized.value.entity,
1090
- attribute,
1091
- compactedFrom: compactedClaimKey !== normalized.value.claimKey ? normalized.value.claimKey : null,
1092
- reason: reasons.length > 0 ? joinCompactionReasons(reasons) : null
1687
+ entityPrefixes: [leftProfile.entityPrefix, rightProfile.entityPrefix],
1688
+ supportingEntryIds: normalizeStringArray([...leftProfile.entryIds, ...rightProfile.entryIds]),
1689
+ sharedAttributes,
1690
+ confidence,
1691
+ autoSafe: lexicalRelation.autoSafe && (sharedAttributes.length >= 2 || sharedAttributes.length === 1 && groundingAnchorCount >= 1 && groundingScore >= 2),
1692
+ preferredCanonicalEntityPrefix: lexicalRelation.preferredCanonicalEntityPrefix,
1693
+ evidence
1093
1694
  };
1094
1695
  }
1095
- function validateExtractedClaimKey(claimKey) {
1096
- if (SELF_REFERENTIAL_ENTITIES.has(claimKey.entity)) {
1696
+ function evaluateEntityLexicalRelation(leftProfile, rightProfile) {
1697
+ const leftTokens = leftProfile.tokenList;
1698
+ const rightTokens = rightProfile.tokenList;
1699
+ if (leftProfile.compactSignature === rightProfile.compactSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
1700
+ const preferredCanonicalEntityPrefix = leftTokens.length === rightTokens.length ? null : leftTokens.length > rightTokens.length ? leftProfile.entityPrefix : rightProfile.entityPrefix;
1097
1701
  return {
1098
- ok: false,
1099
- reason: "self_referential_entity",
1100
- value: claimKey
1702
+ kind: "lexical_separator_variant",
1703
+ detail: preferredCanonicalEntityPrefix === null ? "Entity prefixes collapse to the same compact lexical form." : `Entity prefixes collapse to the same compact lexical form; "${preferredCanonicalEntityPrefix}" preserves clearer token boundaries.`,
1704
+ autoSafe: true,
1705
+ preferredCanonicalEntityPrefix,
1706
+ strengthScore: 3
1101
1707
  };
1102
1708
  }
1103
- if (GENERIC_ATTRIBUTES.has(claimKey.attribute)) {
1709
+ if (leftProfile.sortedTokenSignature.length > 0 && leftProfile.sortedTokenSignature === rightProfile.sortedTokenSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
1104
1710
  return {
1105
- ok: false,
1106
- reason: "generic_attribute",
1107
- value: claimKey
1711
+ kind: "lexical_token_reordering",
1712
+ detail: "Entity prefixes use the same lexical tokens in a different order.",
1713
+ autoSafe: true,
1714
+ preferredCanonicalEntityPrefix: null,
1715
+ strengthScore: 2
1108
1716
  };
1109
1717
  }
1110
- if (isValueShapedAttribute(claimKey.attribute)) {
1718
+ const leftInitialism = buildInitialism(leftTokens);
1719
+ const rightInitialism = buildInitialism(rightTokens);
1720
+ if (leftInitialism.length >= 2 && leftInitialism === rightProfile.entityPrefix) {
1111
1721
  return {
1112
- ok: false,
1113
- reason: "value_shaped_attribute",
1114
- value: claimKey
1722
+ kind: "lexical_initialism_expansion",
1723
+ detail: `Entity prefix "${rightProfile.entityPrefix}" matches the initialism of "${leftProfile.entityPrefix}".`,
1724
+ autoSafe: false,
1725
+ preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
1726
+ strengthScore: 1
1727
+ };
1728
+ }
1729
+ if (rightInitialism.length >= 2 && rightInitialism === leftProfile.entityPrefix) {
1730
+ return {
1731
+ kind: "lexical_initialism_expansion",
1732
+ detail: `Entity prefix "${leftProfile.entityPrefix}" matches the initialism of "${rightProfile.entityPrefix}".`,
1733
+ autoSafe: false,
1734
+ preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
1735
+ strengthScore: 1
1736
+ };
1737
+ }
1738
+ if (isTokenSubset(leftTokens, rightTokens)) {
1739
+ return {
1740
+ kind: "lexical_token_subset",
1741
+ detail: `"${leftProfile.entityPrefix}" is a lexical subset of "${rightProfile.entityPrefix}".`,
1742
+ autoSafe: false,
1743
+ preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
1744
+ strengthScore: 1
1745
+ };
1746
+ }
1747
+ if (isTokenSubset(rightTokens, leftTokens)) {
1748
+ return {
1749
+ kind: "lexical_token_subset",
1750
+ detail: `"${rightProfile.entityPrefix}" is a lexical subset of "${leftProfile.entityPrefix}".`,
1751
+ autoSafe: false,
1752
+ preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
1753
+ strengthScore: 1
1115
1754
  };
1116
1755
  }
1117
1756
  return {
1118
- ok: true,
1119
- value: claimKey
1757
+ kind: null,
1758
+ detail: null,
1759
+ autoSafe: false,
1760
+ preferredCanonicalEntityPrefix: null,
1761
+ strengthScore: 0
1120
1762
  };
1121
1763
  }
1122
- function inspectClaimKey(value) {
1123
- const rawClaimKey = value.trim();
1124
- const normalized = normalizeClaimKey(rawClaimKey);
1125
- if (!normalized.ok) {
1764
+ function selectCanonicalEntityPrefix(entityPrefixes, pairSupport, profiles) {
1765
+ const scoreByEntity = /* @__PURE__ */ new Map();
1766
+ const reasonsByEntity = /* @__PURE__ */ new Map();
1767
+ for (const entityPrefix of entityPrefixes) {
1768
+ const profile = profiles.get(entityPrefix);
1769
+ if (!profile) {
1770
+ continue;
1771
+ }
1772
+ let score = Math.min(profile.attributeSet.size, 6) * 2 + Math.min(profile.entryCount, 6) + Math.round(profile.totalQualityScore / Math.max(profile.entryCount, 1));
1773
+ const reasons = [];
1774
+ if (profile.attributeSet.size >= 2) {
1775
+ reasons.push("broader trusted attribute coverage");
1776
+ }
1777
+ for (const support of pairSupport) {
1778
+ if (support.preferredCanonicalEntityPrefix === entityPrefix) {
1779
+ score += 4;
1780
+ } else if (support.preferredCanonicalEntityPrefix !== null) {
1781
+ score -= 2;
1782
+ }
1783
+ }
1784
+ const formSpecificity = scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles);
1785
+ score += formSpecificity.score;
1786
+ if (formSpecificity.reason) {
1787
+ reasons.push(formSpecificity.reason);
1788
+ }
1789
+ const lexicalVotes2 = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === entityPrefix).length;
1790
+ if (lexicalVotes2 > 0) {
1791
+ reasons.push(`lexical alias evidence prefers "${entityPrefix}"`);
1792
+ }
1793
+ scoreByEntity.set(entityPrefix, score);
1794
+ reasonsByEntity.set(entityPrefix, normalizeStringArray(reasons));
1795
+ }
1796
+ const ranked = [...scoreByEntity.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]));
1797
+ const [bestCandidate, secondCandidate] = ranked;
1798
+ if (!bestCandidate) {
1126
1799
  return {
1127
- rawClaimKey,
1128
- canonical: false,
1129
- normalizationFailure: normalized.reason,
1130
- suspectReasons: []
1800
+ canonicalEntityPrefix: null,
1801
+ reasons: [],
1802
+ unresolvedReason: "No canonical entity prefix could be selected from the detected family."
1131
1803
  };
1132
1804
  }
1133
- const suspectReasons = /* @__PURE__ */ new Set();
1134
- const validation = validateExtractedClaimKey(normalized.value);
1135
- if (!validation.ok) {
1136
- suspectReasons.add(validation.reason);
1805
+ const [bestEntityPrefix, bestScore] = bestCandidate;
1806
+ const secondScore = secondCandidate?.[1] ?? Number.NEGATIVE_INFINITY;
1807
+ const bestProfile = profiles.get(bestEntityPrefix);
1808
+ if (!bestProfile) {
1809
+ return {
1810
+ canonicalEntityPrefix: null,
1811
+ reasons: [],
1812
+ unresolvedReason: "No canonical entity prefix could be selected from the detected family."
1813
+ };
1137
1814
  }
1138
- if (GENERIC_ENTITIES.has(normalized.value.entity)) {
1139
- suspectReasons.add("generic_entity");
1815
+ const directPeerSupport = entityPrefixes.filter((entityPrefix) => entityPrefix !== bestEntityPrefix).map((entityPrefix) => findPairSupport(pairSupport, bestEntityPrefix, entityPrefix));
1816
+ const hasDirectSupportToAllPeers = directPeerSupport.every((support) => support !== null);
1817
+ const hasLexicalSupportToAllPeers = directPeerSupport.every((support) => support?.evidence.some((evidence) => evidence.kind.startsWith("lexical_")) === true);
1818
+ const lexicalVotes = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === bestEntityPrefix).length;
1819
+ if (!hasDirectSupportToAllPeers || !hasLexicalSupportToAllPeers || lexicalVotes === 0 || bestScore - secondScore < CANONICAL_SELECTION_MARGIN) {
1820
+ return {
1821
+ canonicalEntityPrefix: null,
1822
+ reasons: [],
1823
+ unresolvedReason: "Multiple plausible canonical entity prefixes remain after conservative scoring."
1824
+ };
1140
1825
  }
1141
1826
  return {
1142
- rawClaimKey,
1143
- canonical: normalized.value.claimKey === rawClaimKey,
1144
- normalized: normalized.value,
1145
- suspectReasons: [...suspectReasons]
1827
+ canonicalEntityPrefix: bestEntityPrefix,
1828
+ reasons: reasonsByEntity.get(bestEntityPrefix) ?? [],
1829
+ unresolvedReason: null
1146
1830
  };
1147
1831
  }
1148
- function isTrustedClaimKeyForCleanup(value) {
1149
- const inspection = inspectClaimKey(value);
1150
- return Boolean(inspection.canonical && inspection.normalized && inspection.suspectReasons.length === 0);
1151
- }
1152
- function describeClaimKeyNormalizationFailure(reason) {
1153
- switch (reason) {
1154
- case "empty":
1155
- return "claim key was empty";
1156
- case "missing_separator":
1157
- return "claim key must contain exactly one '/'";
1158
- case "too_many_segments":
1159
- return "claim key must contain exactly one '/'";
1160
- case "empty_entity":
1161
- return "claim key entity was empty after normalization";
1162
- case "empty_attribute":
1163
- return "claim key attribute was empty after normalization";
1164
- case "unknown_pair":
1165
- return 'claim key "unknown/unknown" is not allowed';
1166
- }
1167
- }
1168
- function describeExtractedClaimKeyRejection(reason, claimKey) {
1169
- switch (reason) {
1170
- case "self_referential_entity":
1171
- return `entity "${claimKey.entity}" is self-referential`;
1172
- case "generic_attribute":
1173
- return `attribute "${claimKey.attribute}" is too generic`;
1174
- case "value_shaped_attribute":
1175
- return `attribute "${claimKey.attribute}" looks value-shaped`;
1832
+ function scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles) {
1833
+ const profile = profiles.get(entityPrefix);
1834
+ if (!profile) {
1835
+ return { score: 0, reason: null };
1836
+ }
1837
+ let score = 0;
1838
+ let reason = null;
1839
+ const compactPeers = entityPrefixes.filter((peerEntityPrefix) => peerEntityPrefix !== entityPrefix).map((peerEntityPrefix) => profiles.get(peerEntityPrefix)).filter((peerProfile) => Boolean(peerProfile)).filter((peerProfile) => peerProfile.compactSignature === profile.compactSignature);
1840
+ if (profile.tokenList.length >= 2 && compactPeers.some((peerProfile) => peerProfile.tokenList.length < profile.tokenList.length)) {
1841
+ score += 2;
1842
+ reason = "less abbreviated lexical form";
1843
+ }
1844
+ if (entityPrefix.length <= 3 && compactPeers.length === 0) {
1845
+ score -= 1;
1846
+ }
1847
+ return { score, reason };
1848
+ }
1849
+ function collectConnectedEntityComponent(startingEntityPrefix, adjacency, visited) {
1850
+ const queue = [startingEntityPrefix];
1851
+ const component = [];
1852
+ visited.add(startingEntityPrefix);
1853
+ while (queue.length > 0) {
1854
+ const entityPrefix = queue.shift();
1855
+ if (!entityPrefix) {
1856
+ continue;
1857
+ }
1858
+ component.push(entityPrefix);
1859
+ const peers = adjacency.get(entityPrefix);
1860
+ if (!peers) {
1861
+ continue;
1862
+ }
1863
+ for (const peer of peers) {
1864
+ if (visited.has(peer)) {
1865
+ continue;
1866
+ }
1867
+ visited.add(peer);
1868
+ queue.push(peer);
1869
+ }
1176
1870
  }
1871
+ return component;
1177
1872
  }
1178
- function describeClaimKeySuspicion(reason, claimKey) {
1179
- switch (reason) {
1180
- case "generic_entity":
1181
- return `entity "${claimKey.entity}" is too generic`;
1182
- case "self_referential_entity":
1183
- case "generic_attribute":
1184
- case "value_shaped_attribute":
1185
- return describeExtractedClaimKeyRejection(reason, claimKey);
1873
+ function findPairSupport(pairSupport, leftEntityPrefix, rightEntityPrefix) {
1874
+ for (const support of pairSupport) {
1875
+ const [leftEntity, rightEntity] = support.entityPrefixes;
1876
+ if (leftEntity === leftEntityPrefix && rightEntity === rightEntityPrefix || leftEntity === rightEntityPrefix && rightEntity === leftEntityPrefix) {
1877
+ return support;
1878
+ }
1186
1879
  }
1880
+ return null;
1187
1881
  }
1188
- function isValueShapedAttribute(attribute) {
1189
- return /^\d+(?:_\d+)*$/u.test(attribute) || /^v\d+(?:_\d+)*$/u.test(attribute);
1190
- }
1191
- function compactSourceOfTruthAttribute(attributeTokens) {
1192
- const sourceOfTruthIndex = findSourceOfTruthPhraseIndex(attributeTokens);
1193
- if (sourceOfTruthIndex === -1) {
1882
+ function evaluateSingletonAliasCandidate(aliasProfile, dominantProfile) {
1883
+ const lexicalRelation = evaluateSingletonAliasLexicalRelation(aliasProfile.entityPrefix, dominantProfile.entityPrefix);
1884
+ if (!lexicalRelation.kind || !lexicalRelation.detail || lexicalRelation.scopeLike) {
1194
1885
  return null;
1195
1886
  }
1196
- const normalizedPhrase = ["source", "of", "truth"];
1197
- if (attributeTokens.length === normalizedPhrase.length && startsWithTokens(attributeTokens, normalizedPhrase)) {
1887
+ const dominantTrustedCount = dominantProfile.trustedEntryCount;
1888
+ if (dominantTrustedCount < SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT) {
1198
1889
  return null;
1199
1890
  }
1200
- const before = attributeTokens.slice(0, sourceOfTruthIndex);
1201
- const after = attributeTokens.slice(sourceOfTruthIndex + normalizedPhrase.length);
1202
- const leadingAllowed = before.every((token) => COMPACTION_WEAK_LEADING_TOKENS.has(token));
1203
- const hasMixedStableFamily = before.some((token) => STABLE_ATTRIBUTE_HEADS.has(token)) || after.some((token) => STABLE_ATTRIBUTE_HEADS.has(token));
1204
- const hasConjunctionNoise = before.includes("and") || before.includes("or") || after.includes("and") || after.includes("or");
1205
- if (!leadingAllowed || hasMixedStableFamily || hasConjunctionNoise) {
1891
+ const aliasLowTrustCount = buildLowTrustEntryCount(aliasProfile);
1892
+ if (aliasLowTrustCount === 0) {
1206
1893
  return null;
1207
1894
  }
1895
+ const evidence = [
1896
+ {
1897
+ kind: "singleton_family_size",
1898
+ detail: `"${aliasProfile.entityPrefix}" has ${aliasProfile.activeEntryCount} active keyed ${pluralize(aliasProfile.activeEntryCount, "entry")}.`
1899
+ },
1900
+ {
1901
+ kind: "dominant_trusted_family",
1902
+ detail: `"${dominantProfile.entityPrefix}" already has ${dominantTrustedCount} trusted ${pluralize(dominantTrustedCount, "entry")}.`
1903
+ },
1904
+ {
1905
+ kind: "low_trust_creation_path",
1906
+ detail: describeLowTrustAliasFamily(aliasProfile)
1907
+ },
1908
+ {
1909
+ kind: lexicalRelation.kind,
1910
+ detail: lexicalRelation.detail
1911
+ }
1912
+ ];
1913
+ const confidence = Math.min(
1914
+ 0.98,
1915
+ 0.58 + Math.min(dominantTrustedCount, 6) * 0.05 + Math.min(aliasLowTrustCount, 2) * 0.05 + Math.min(dominantProfile.activeEntryCount - aliasProfile.activeEntryCount, 6) * 0.02 + lexicalRelation.strengthScore * 0.08
1916
+ );
1208
1917
  return {
1209
- attributeTokens: normalizedPhrase,
1210
- reason: "collapsed source-of-truth phrasing into the stable canonical slot"
1918
+ aliasEntityPrefix: aliasProfile.entityPrefix,
1919
+ dominantEntityPrefix: dominantProfile.entityPrefix,
1920
+ aliasFamilySize: aliasProfile.activeEntryCount,
1921
+ dominantFamilySize: dominantProfile.activeEntryCount,
1922
+ dominantTrustedCount,
1923
+ aliasLowTrustCount,
1924
+ confidence,
1925
+ canonicalReuseSafe: lexicalRelation.canonicalReuseSafe && aliasProfile.activeEntryCount === 1 && aliasLowTrustCount === aliasProfile.activeEntryCount && dominantTrustedCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT,
1926
+ evidence
1211
1927
  };
1212
1928
  }
1213
- function compactRelationAttribute(attributeTokens) {
1214
- const relationIndex = attributeTokens.findIndex((token) => COMPACTION_RELATION_TOKENS.has(token));
1215
- if (relationIndex === -1) {
1216
- return null;
1929
+ function selectBestSingletonAliasCandidate(candidates) {
1930
+ const ranked = [...candidates].sort(
1931
+ (left, right) => right.confidence - left.confidence || left.dominantEntityPrefix.localeCompare(right.dominantEntityPrefix)
1932
+ );
1933
+ const [best, runnerUp] = ranked;
1934
+ if (!best) {
1935
+ return [];
1217
1936
  }
1218
- const relation = attributeTokens[relationIndex] ?? "";
1219
- const left = attributeTokens.slice(0, relationIndex);
1220
- const right = attributeTokens.slice(relationIndex + 1);
1221
- if (left.length === 0 && right.length === 0) {
1222
- return null;
1937
+ if (runnerUp && best.confidence - runnerUp.confidence < SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA) {
1938
+ return [];
1223
1939
  }
1224
- if (isRequirementRelation(relation)) {
1225
- const conditionAction = extractConditionAction(right);
1226
- if (conditionAction) {
1227
- return {
1228
- attributeTokens: [conditionAction, "condition"],
1229
- reason: `collapsed a sentence-like ${conditionAction} requirement into a stable condition slot`
1230
- };
1231
- }
1232
- const requirementFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
1233
- if (!requirementFocus) {
1234
- return null;
1235
- }
1940
+ return [best];
1941
+ }
1942
+ function evaluateSingletonAliasLexicalRelation(aliasEntityPrefix, dominantEntityPrefix) {
1943
+ const aliasTokens = aliasEntityPrefix.split("_").filter((token) => token.length > 0);
1944
+ const dominantTokens = dominantEntityPrefix.split("_").filter((token) => token.length > 0);
1945
+ const aliasCompactSignature = aliasTokens.join("");
1946
+ const dominantCompactSignature = dominantTokens.join("");
1947
+ if (aliasCompactSignature === dominantCompactSignature && aliasEntityPrefix !== dominantEntityPrefix) {
1236
1948
  return {
1237
- attributeTokens: [...requirementFocus, "requirement"],
1238
- reason: "collapsed a sentence-like requirement phrase into a stable requirement slot"
1949
+ kind: "lexical_separator_variant",
1950
+ detail: `Entity prefixes "${aliasEntityPrefix}" and "${dominantEntityPrefix}" collapse to the same compact lexical form.`,
1951
+ canonicalReuseSafe: true,
1952
+ scopeLike: false,
1953
+ strengthScore: 3
1239
1954
  };
1240
1955
  }
1241
- if (isOrderingRelation(relation)) {
1242
- const orderingFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
1243
- if (!orderingFocus) {
1244
- return null;
1245
- }
1956
+ if (!isTokenSubset(dominantTokens, aliasTokens)) {
1246
1957
  return {
1247
- attributeTokens: [...orderingFocus, "order"],
1248
- reason: "collapsed a sentence-like ordering phrase into a stable order slot"
1958
+ kind: null,
1959
+ detail: null,
1960
+ canonicalReuseSafe: false,
1961
+ scopeLike: false,
1962
+ strengthScore: 0
1249
1963
  };
1250
1964
  }
1251
- if (isPreservationRelation(relation)) {
1252
- const preservationFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
1253
- if (!preservationFocus) {
1254
- return null;
1255
- }
1965
+ const dominantTokenSet = new Set(dominantTokens);
1966
+ const addedTokens = aliasTokens.filter((token) => !dominantTokenSet.has(token));
1967
+ const scopeLike = addedTokens.length !== 1 || addedTokens.some((token) => SINGLETON_ALIAS_SCOPE_TOKENS.has(token));
1968
+ if (scopeLike) {
1256
1969
  return {
1257
- attributeTokens: [...preservationFocus, "preservation"],
1258
- reason: "collapsed a sentence-like preservation phrase into a stable preservation slot"
1970
+ kind: null,
1971
+ detail: null,
1972
+ canonicalReuseSafe: false,
1973
+ scopeLike: true,
1974
+ strengthScore: 0
1259
1975
  };
1260
1976
  }
1261
- return null;
1977
+ return {
1978
+ kind: "lexical_token_subset",
1979
+ detail: `"${aliasEntityPrefix}" extends "${dominantEntityPrefix}" by the added token "${addedTokens[0]}".`,
1980
+ canonicalReuseSafe: true,
1981
+ scopeLike: false,
1982
+ strengthScore: 2
1983
+ };
1262
1984
  }
1263
- function compactTrailingObjectAttribute(attributeTokens) {
1264
- const prepositionIndex = attributeTokens.findIndex((token) => TRAILING_OBJECT_COMPACTION_PREPOSITIONS.has(token));
1265
- if (prepositionIndex <= 0 || prepositionIndex >= attributeTokens.length - 1) {
1266
- return null;
1985
+ function buildLowTrustEntryCount(profile) {
1986
+ const deterministicOnlyCount = Math.max(0, profile.deterministicRepairEntryCount - profile.tentativeEntryCount);
1987
+ return profile.tentativeEntryCount + profile.unresolvedEntryCount + deterministicOnlyCount;
1988
+ }
1989
+ function describeLowTrustAliasFamily(profile) {
1990
+ const reasons = [];
1991
+ if (profile.deterministicRepairEntryCount > 0) {
1992
+ reasons.push(`${profile.deterministicRepairEntryCount} deterministic repair ${pluralize(profile.deterministicRepairEntryCount, "entry")}`);
1267
1993
  }
1268
- const left = trimWeakLeadingTokens(attributeTokens.slice(0, prepositionIndex));
1269
- const right = attributeTokens.slice(prepositionIndex + 1);
1270
- if (left.length === 0 || left.length > 3 || left.includes("and") || left.includes("or") || left.some((token) => COMPACTION_RELATION_TOKENS.has(token))) {
1271
- return null;
1994
+ if (profile.tentativeEntryCount > 0) {
1995
+ reasons.push(`${profile.tentativeEntryCount} tentative ${pluralize(profile.tentativeEntryCount, "entry")}`);
1272
1996
  }
1273
- const head = left[left.length - 1];
1274
- if (!head || !TRAILING_OBJECT_TRANSFER_HEADS.has(head)) {
1275
- return null;
1997
+ if (profile.unresolvedEntryCount > 0) {
1998
+ reasons.push(`${profile.unresolvedEntryCount} unresolved ${pluralize(profile.unresolvedEntryCount, "entry")}`);
1276
1999
  }
1277
- const objectFocus = extractCompactionFocus(right, 2);
1278
- if (!objectFocus) {
1279
- return null;
2000
+ if (reasons.length === 0) {
2001
+ return `"${profile.entityPrefix}" is not fully trusted yet.`;
1280
2002
  }
1281
- const headCore = extractStableHeadCore(left, 2);
1282
- if (!headCore) {
1283
- return null;
2003
+ return `"${profile.entityPrefix}" is low-trust because it has ${reasons.join(", ")}.`;
2004
+ }
2005
+ function buildInitialism(tokens) {
2006
+ if (tokens.length < 2) {
2007
+ return "";
1284
2008
  }
1285
- return {
1286
- attributeTokens: [...objectFocus, ...headCore],
1287
- reason: "collapsed a trailing object phrase into a compact stable slot name"
1288
- };
2009
+ return tokens.map((token) => token[0] ?? "").join("");
1289
2010
  }
1290
- function findSourceOfTruthPhraseIndex(tokens) {
1291
- for (let index = 0; index <= tokens.length - 3; index += 1) {
1292
- if (tokens[index] === "source" && tokens[index + 1] === "of" && tokens[index + 2] === "truth") {
1293
- return index;
1294
- }
2011
+ function isTokenSubset(subsetTokens, supersetTokens) {
2012
+ if (subsetTokens.length === 0 || subsetTokens.length >= supersetTokens.length) {
2013
+ return false;
1295
2014
  }
1296
- return -1;
2015
+ const superset = new Set(supersetTokens);
2016
+ return subsetTokens.every((token) => superset.has(token));
1297
2017
  }
1298
- function extractConditionAction(tokens) {
1299
- for (let index = tokens.length - 1; index >= 0; index -= 1) {
1300
- const token = tokens[index];
1301
- if (token && ACTION_CONDITION_TOKENS.has(token)) {
1302
- return token;
1303
- }
2018
+ function tokenizeGrounding(value) {
2019
+ if (!value) {
2020
+ return [];
1304
2021
  }
1305
- return null;
2022
+ const normalized = normalizeClaimKeySegment(value);
2023
+ if (!normalized) {
2024
+ return [];
2025
+ }
2026
+ return normalized.split("_").filter((token) => token.length >= 2 && !ENTITY_FAMILY_GROUNDING_STOP_TOKENS.has(token));
1306
2027
  }
1307
- function extractCompactionFocus(tokens, limit) {
1308
- const compactable = trimWeakLeadingTokens(tokens).filter((token) => token.length > 0);
1309
- const segments = splitTokensOnBreaks(compactable).filter((segment) => segment.length > 0);
1310
- const preferredSegment = segments[0];
1311
- if (!preferredSegment || preferredSegment.length === 0) {
1312
- return null;
2028
+ function intersectSets(left, right) {
2029
+ const intersection = [];
2030
+ const [small, large] = left.size <= right.size ? [left, right] : [right, left];
2031
+ for (const value of small) {
2032
+ if (large.has(value)) {
2033
+ intersection.push(value);
2034
+ }
1313
2035
  }
1314
- return preferredSegment.slice(0, limit);
2036
+ return intersection.sort((first, second) => first.localeCompare(second));
1315
2037
  }
1316
- function extractStableHeadCore(tokens, limit) {
1317
- const compactable = trimWeakLeadingTokens(tokens).filter((token) => token.length > 0);
1318
- const head = compactable[compactable.length - 1];
1319
- if (!head || !STABLE_ATTRIBUTE_HEADS.has(head)) {
1320
- return null;
2038
+ function normalizeStringArray(values) {
2039
+ return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
2040
+ }
2041
+ function buildPairKey(leftEntityPrefix, rightEntityPrefix) {
2042
+ return [leftEntityPrefix, rightEntityPrefix].sort((left, right) => left.localeCompare(right)).join("::");
2043
+ }
2044
+ function getOrCreateSet(map, key) {
2045
+ const existing = map.get(key);
2046
+ if (existing) {
2047
+ return existing;
1321
2048
  }
1322
- return compactable.slice(Math.max(0, compactable.length - limit));
2049
+ const created = /* @__PURE__ */ new Set();
2050
+ map.set(key, created);
2051
+ return created;
1323
2052
  }
1324
- function splitTokensOnBreaks(tokens) {
1325
- const segments = [];
1326
- let current = [];
1327
- for (const token of tokens) {
1328
- if (COMPACTION_BREAK_TOKENS.has(token)) {
1329
- if (current.length > 0) {
1330
- segments.push(current);
1331
- current = [];
1332
- }
2053
+ function pluralize(count, noun) {
2054
+ return count === 1 ? noun : `${noun}s`;
2055
+ }
2056
+
2057
+ // src/core/claim-key-support.ts
2058
+ var MAX_AUTO_APPLY_ATTRIBUTE_TOKENS = 4;
2059
+ var GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
2060
+ "a",
2061
+ "an",
2062
+ "and",
2063
+ "are",
2064
+ "as",
2065
+ "at",
2066
+ "be",
2067
+ "by",
2068
+ "for",
2069
+ "from",
2070
+ "how",
2071
+ "in",
2072
+ "into",
2073
+ "is",
2074
+ "it",
2075
+ "of",
2076
+ "on",
2077
+ "or",
2078
+ "our",
2079
+ "that",
2080
+ "the",
2081
+ "their",
2082
+ "this",
2083
+ "to",
2084
+ "we",
2085
+ "with"
2086
+ ]);
2087
+ var AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["to", "for", "from", "with", "about", "into", "onto", "between", "during"]);
2088
+ var POLICY_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["policy", "default", "workflow", "process", "strategy", "guardrail", "rule", "boundary"]);
2089
+ var AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["source", "truth", "guide", "runbook", "reference"]);
2090
+ var ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set([
2091
+ "adapter",
2092
+ "boundary",
2093
+ "architecture",
2094
+ "backend",
2095
+ "storage",
2096
+ "model",
2097
+ "support",
2098
+ "contract",
2099
+ "interface",
2100
+ "surface"
2101
+ ]);
2102
+ var STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
2103
+ "access",
2104
+ "boundary",
2105
+ "condition",
2106
+ "contract",
2107
+ "dependency",
2108
+ "mode",
2109
+ "owner",
2110
+ "order",
2111
+ "path",
2112
+ "policy",
2113
+ "preference",
2114
+ "preservation",
2115
+ "process",
2116
+ "requirement",
2117
+ "role",
2118
+ "rule",
2119
+ "schedule",
2120
+ "sequencing",
2121
+ "setting",
2122
+ "status",
2123
+ "strategy",
2124
+ "support",
2125
+ "surface",
2126
+ "timezone",
2127
+ "version",
2128
+ "window",
2129
+ "workflow",
2130
+ "workspace"
2131
+ ]);
2132
+ function buildTrustedClaimKeySupportSeed(entries) {
2133
+ const claimKeyStats = /* @__PURE__ */ new Map();
2134
+ const trustedEntries = [];
2135
+ for (const entry of entries) {
2136
+ const claimKey = entry.claim_key?.trim();
2137
+ if (!claimKey || !isTrustedClaimKeyForCleanup(claimKey)) {
1333
2138
  continue;
1334
2139
  }
1335
- current.push(token);
1336
- }
1337
- if (current.length > 0) {
1338
- segments.push(current);
2140
+ const inspection = inspectClaimKey(claimKey);
2141
+ if (!inspection.normalized) {
2142
+ continue;
2143
+ }
2144
+ const existing = claimKeyStats.get(claimKey);
2145
+ if (existing) {
2146
+ existing.count += 1;
2147
+ existing.maxImportance = Math.max(existing.maxImportance, entry.importance);
2148
+ existing.latestCreatedAt = existing.latestCreatedAt.localeCompare(entry.created_at) >= 0 ? existing.latestCreatedAt : entry.created_at;
2149
+ continue;
2150
+ }
2151
+ claimKeyStats.set(claimKey, {
2152
+ count: 1,
2153
+ maxImportance: entry.importance,
2154
+ latestCreatedAt: entry.created_at
2155
+ });
2156
+ trustedEntries.push({
2157
+ id: entry.id,
2158
+ claimKey: inspection.normalized.claimKey,
2159
+ entity: inspection.normalized.entity,
2160
+ attribute: inspection.normalized.attribute,
2161
+ type: entry.type,
2162
+ tags: normalizeGroundingTags(entry.tags),
2163
+ sourceContextTokens: tokenizeGroundingText(entry.source_context),
2164
+ subjectTokens: tokenizeGroundingText(entry.subject),
2165
+ createdAt: entry.created_at
2166
+ });
1339
2167
  }
1340
- return segments;
2168
+ const orderedClaimKeys = [...claimKeyStats.entries()].sort((left, right) => {
2169
+ const countDelta = right[1].count - left[1].count;
2170
+ if (countDelta !== 0) {
2171
+ return countDelta;
2172
+ }
2173
+ const importanceDelta = right[1].maxImportance - left[1].maxImportance;
2174
+ if (importanceDelta !== 0) {
2175
+ return importanceDelta;
2176
+ }
2177
+ const createdAtDelta = right[1].latestCreatedAt.localeCompare(left[1].latestCreatedAt);
2178
+ if (createdAtDelta !== 0) {
2179
+ return createdAtDelta;
2180
+ }
2181
+ return left[0].localeCompare(right[0]);
2182
+ }).map(([claimKey]) => claimKey);
2183
+ const orderedEntries = orderedClaimKeys.flatMap(
2184
+ (claimKey) => trustedEntries.filter((entry) => entry.claimKey === claimKey).sort((left, right) => {
2185
+ const createdAtDelta = right.createdAt.localeCompare(left.createdAt);
2186
+ if (createdAtDelta !== 0) {
2187
+ return createdAtDelta;
2188
+ }
2189
+ return left.id.localeCompare(right.id);
2190
+ })
2191
+ );
2192
+ return {
2193
+ entries: orderedEntries
2194
+ };
1341
2195
  }
1342
- function trimWeakLeadingTokens(tokens) {
1343
- let start = 0;
1344
- while (start < tokens.length && COMPACTION_WEAK_LEADING_TOKENS.has(tokens[start] ?? "")) {
1345
- start += 1;
1346
- }
1347
- return tokens.slice(start);
2196
+ function buildClaimKeySupportSeedFromExamples(claimKeys) {
2197
+ const entries = claimKeys.flatMap((claimKey, index) => {
2198
+ const inspection = inspectClaimKey(claimKey);
2199
+ if (!inspection.normalized || !isTrustedClaimKeyForCleanup(inspection.normalized.claimKey)) {
2200
+ return [];
2201
+ }
2202
+ return [
2203
+ {
2204
+ id: `example:${index + 1}`,
2205
+ claimKey: inspection.normalized.claimKey,
2206
+ entity: inspection.normalized.entity,
2207
+ attribute: inspection.normalized.attribute,
2208
+ tags: [],
2209
+ sourceContextTokens: [],
2210
+ subjectTokens: [],
2211
+ createdAt: "1970-01-01T00:00:00.000Z"
2212
+ }
2213
+ ];
2214
+ });
2215
+ return { entries };
1348
2216
  }
1349
- function joinCompactionReasons(reasons) {
1350
- if (reasons.length <= 1) {
1351
- return reasons[0] ?? "";
2217
+ function evaluateClaimKeySupport(entry, targetClaimKey, trustedHints) {
2218
+ const inspection = inspectClaimKey(targetClaimKey);
2219
+ const normalized = inspection.normalized;
2220
+ if (!normalized) {
2221
+ return createEmptyClaimKeySupportEvaluation();
1352
2222
  }
1353
- return `${reasons.slice(0, -1).join(", ")} and ${reasons[reasons.length - 1]}`;
2223
+ const entryTagSet = new Set(normalizeGroundingTags(entry.tags));
2224
+ const entrySourceTokens = new Set(tokenizeGroundingText(entry.source_context));
2225
+ const relevantEntries = trustedHints.entries.filter((trustedEntry) => {
2226
+ if (entry.id && trustedEntry.id === entry.id) {
2227
+ return false;
2228
+ }
2229
+ return trustedEntry.claimKey === normalized.claimKey || trustedEntry.entity === normalized.entity;
2230
+ });
2231
+ const exactReuseEntries = relevantEntries.filter((trustedEntry) => trustedEntry.claimKey === normalized.claimKey);
2232
+ const familyReuseEntries = relevantEntries.filter(
2233
+ (trustedEntry) => trustedEntry.claimKey !== normalized.claimKey && trustedEntry.entity === normalized.entity
2234
+ );
2235
+ const groundedExactReuseEntries = exactReuseEntries.filter((trustedEntry) => {
2236
+ const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
2237
+ return grounding.tagGrounding || grounding.sourceContextGrounding;
2238
+ });
2239
+ const groundedFamilyReuseEntries = familyReuseEntries.filter((trustedEntry) => {
2240
+ const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
2241
+ return grounding.tagGrounding || grounding.sourceContextGrounding;
2242
+ });
2243
+ const tagGrounding = relevantEntries.some((trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).tagGrounding);
2244
+ const sourceContextGrounding = relevantEntries.some(
2245
+ (trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).sourceContextGrounding
2246
+ );
2247
+ const localGrounding = tagGrounding || sourceContextGrounding;
2248
+ const lexicalAlignment = inspectCandidateLexicalAlignment(entry, normalized.entity, normalized.attribute);
2249
+ const templateSupport = matchesConservativeTemplateSupport(entry, normalized.attribute);
2250
+ const stableSlotSupport = matchesStableFamilySlotSupport(normalized.attribute);
2251
+ const trustedExactReuse = exactReuseEntries.length > 0 && (groundedExactReuseEntries.length > 0 || exactReuseEntries.every((candidate) => candidate.id.startsWith("example:")));
2252
+ const trustedEntityFamilyReuse = groundedFamilyReuseEntries.length > 0 || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:"));
2253
+ const promotionSupport = resolveClaimKeyPromotionSupport({
2254
+ exactReuseCount: trustedExactReuse ? Math.max(1, groundedExactReuseEntries.length) : 0,
2255
+ familyReuseCount: familyReuseEntries.length,
2256
+ groundedFamilyReuseCount: groundedFamilyReuseEntries.length > 0 ? groundedFamilyReuseEntries.length : familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")) ? familyReuseEntries.length : 0,
2257
+ localGrounding: localGrounding || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")),
2258
+ templateSupport,
2259
+ stableSlotSupport,
2260
+ lexicalAlignment
2261
+ });
2262
+ const supportedProposal = lexicalAlignment.any && (templateSupport || stableSlotSupport || trustedExactReuse || trustedEntityFamilyReuse || localGrounding);
2263
+ const supportEvidence = [
2264
+ trustedExactReuse ? "trusted_exact_reuse" : null,
2265
+ trustedEntityFamilyReuse ? "trusted_entity_family_reuse" : null,
2266
+ tagGrounding ? "tag_grounding" : null,
2267
+ sourceContextGrounding ? "source_context_grounding" : null,
2268
+ lexicalAlignment.entity ? "entity_lexical_alignment" : null,
2269
+ lexicalAlignment.attribute ? "attribute_lexical_alignment" : null,
2270
+ lexicalAlignment.strongEntityAttribute ? "strong_entity_attribute_lexical_alignment" : null,
2271
+ templateSupport ? "template_support" : null,
2272
+ stableSlotSupport ? "stable_slot_support" : null,
2273
+ promotionSupport.relaxedStableSlotFamilyGate ? "single_grounded_family_sibling" : null
2274
+ ].filter((value) => value !== null);
2275
+ const rationaleFragments = [
2276
+ trustedExactReuse ? `trusted exact reuse from ${Math.max(1, groundedExactReuseEntries.length)} matching entr${Math.max(1, groundedExactReuseEntries.length) === 1 ? "y" : "ies"}` : null,
2277
+ trustedEntityFamilyReuse ? `trusted ${normalized.entity} family reuse from ${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length)} supporting entr${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length) === 1 ? "y" : "ies"}` : null,
2278
+ tagGrounding ? "overlapping tags with trusted corpus entries" : null,
2279
+ sourceContextGrounding ? "overlapping source_context with trusted corpus entries" : null,
2280
+ lexicalAlignment.strongEntityAttribute ? "strong entity and slot lexical alignment" : null,
2281
+ lexicalAlignment.attribute ? lexicalAlignment.strongEntityAttribute ? null : "clear lexical alignment to the proposed slot" : lexicalAlignment.entity ? "clear lexical alignment to the proposed entity" : null,
2282
+ templateSupport ? "a conservative policy/default/source-of-truth template match" : null,
2283
+ stableSlotSupport ? "a stable compact slot head in a well-established entity family" : null,
2284
+ promotionSupport.relaxedStableSlotFamilyGate ? "one grounded family sibling cleared the stable-slot family gate" : null
2285
+ ].filter((value) => value !== null);
2286
+ return {
2287
+ autoApplyClass: promotionSupport.autoApplyClass,
2288
+ supportedProposal,
2289
+ trustedExactReuse,
2290
+ trustedEntityFamilyReuse,
2291
+ tagGrounding,
2292
+ sourceContextGrounding,
2293
+ localGrounding,
2294
+ entityLexicalAlignment: lexicalAlignment.entity,
2295
+ attributeLexicalAlignment: lexicalAlignment.attribute,
2296
+ strongEntityAttributeLexicalAlignment: lexicalAlignment.strongEntityAttribute,
2297
+ lexicalAlignment: lexicalAlignment.any,
2298
+ templateSupport,
2299
+ stableSlotSupport,
2300
+ familyReuseCount: familyReuseEntries.length,
2301
+ groundedFamilyReuseCount: groundedFamilyReuseEntries.length,
2302
+ relaxedStableSlotFamilyGate: promotionSupport.relaxedStableSlotFamilyGate,
2303
+ supportingEntryIds: normalizeStringArray2([
2304
+ ...groundedExactReuseEntries.map((candidate) => candidate.id),
2305
+ ...groundedFamilyReuseEntries.map((candidate) => candidate.id),
2306
+ ...familyReuseEntries.filter((candidate) => candidate.id.startsWith("example:")).map((candidate) => candidate.id)
2307
+ ]),
2308
+ supportEvidence,
2309
+ rationaleFragments
2310
+ };
1354
2311
  }
1355
- function isRequirementRelation(token) {
1356
- return token === "depend" || token === "depends" || token === "need" || token === "needs" || token === "required" || token === "require" || token === "requires";
2312
+ function createEmptyClaimKeySupportEvaluation() {
2313
+ return {
2314
+ autoApplyClass: null,
2315
+ supportedProposal: false,
2316
+ trustedExactReuse: false,
2317
+ trustedEntityFamilyReuse: false,
2318
+ tagGrounding: false,
2319
+ sourceContextGrounding: false,
2320
+ localGrounding: false,
2321
+ entityLexicalAlignment: false,
2322
+ attributeLexicalAlignment: false,
2323
+ strongEntityAttributeLexicalAlignment: false,
2324
+ lexicalAlignment: false,
2325
+ templateSupport: false,
2326
+ stableSlotSupport: false,
2327
+ familyReuseCount: 0,
2328
+ groundedFamilyReuseCount: 0,
2329
+ relaxedStableSlotFamilyGate: false,
2330
+ supportingEntryIds: [],
2331
+ supportEvidence: [],
2332
+ rationaleFragments: []
2333
+ };
1357
2334
  }
1358
- function isOrderingRelation(token) {
1359
- return token === "after" || token === "before" || token === "follow" || token === "follows" || token === "precede" || token === "precedes";
2335
+ function evaluateClaimKeyCompactness(claimKey, prior) {
2336
+ const compacted = compactClaimKey(claimKey);
2337
+ if (!compacted) {
2338
+ return {
2339
+ claimKey,
2340
+ compactedFrom: null,
2341
+ compactionReason: null,
2342
+ compactEnoughForAutoApply: false,
2343
+ blockerReason: "invalid_claim_key"
2344
+ };
2345
+ }
2346
+ const attributeTokens = compacted.attribute.split("_").filter((token) => token.length > 0);
2347
+ const compactEnoughForAutoApply = attributeTokens.length > 0 && attributeTokens.length <= MAX_AUTO_APPLY_ATTRIBUTE_TOKENS && !attributeTokens.some((token) => AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS.has(token));
2348
+ const compactedFrom = compacted.compactedFrom ?? prior?.priorCompactedFrom ?? null;
2349
+ const compactionReason = compacted.reason && prior?.priorCompactionReason ? `${prior.priorCompactionReason} and ${compacted.reason}` : compacted.reason ?? prior?.priorCompactionReason ?? null;
2350
+ return {
2351
+ claimKey: compacted.claimKey,
2352
+ compactedFrom,
2353
+ compactionReason,
2354
+ compactEnoughForAutoApply,
2355
+ blockerReason: compactEnoughForAutoApply ? null : "non_compact_canonical_slot"
2356
+ };
1360
2357
  }
1361
- function isPreservationRelation(token) {
1362
- return token === "keep" || token === "keeps" || token === "maintain" || token === "maintains" || token === "preserve" || token === "preserves" || token === "retain" || token === "retains";
2358
+ function normalizeGroundingTags(tags) {
2359
+ return normalizeStringArray2((tags ?? []).map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0));
1363
2360
  }
1364
- function startsWithTokens(tokens, prefix) {
1365
- return prefix.every((token, index) => tokens[index] === token);
2361
+ function tokenizeGroundingText(value) {
2362
+ if (!value) {
2363
+ return [];
2364
+ }
2365
+ return normalizeStringArray2(
2366
+ value.split(/[^a-zA-Z0-9]+/u).map((token) => normalizeClaimKeySegment(token)).filter((token) => token.length > 2 && !GROUNDING_STOP_TOKENS.has(token))
2367
+ );
1366
2368
  }
1367
- function endsWithTokens(tokens, suffix) {
1368
- return suffix.every((token, index) => tokens[tokens.length - suffix.length + index] === token);
2369
+ function buildEntryLocalLexicalTokens(entry) {
2370
+ return normalizeStringArray2([
2371
+ ...tokenizeGroundingText(entry.subject),
2372
+ ...tokenizeGroundingText(entry.content),
2373
+ ...tokenizeGroundingText(entry.source_context),
2374
+ ...normalizeGroundingTags(entry.tags)
2375
+ ]);
1369
2376
  }
1370
-
1371
- // src/core/store/pipeline.ts
1372
- import { randomUUID } from "crypto";
1373
-
1374
- // src/core/supersession.ts
1375
- function validateSupersessionRules(oldEntry, newEntry) {
1376
- if (oldEntry.type !== newEntry.type) {
2377
+ function resolveClaimKeyPromotionSupport(input) {
2378
+ if (input.exactReuseCount > 0 && (input.lexicalAlignment.attribute || input.templateSupport)) {
1377
2379
  return {
1378
- ok: false,
1379
- reason: "type_mismatch"
2380
+ autoApplyClass: "trusted_exact_reuse_grounded",
2381
+ relaxedStableSlotFamilyGate: false
1380
2382
  };
1381
2383
  }
1382
- if (oldEntry.type === "milestone") {
2384
+ if (input.templateSupport && input.localGrounding && input.familyReuseCount > 0 && (input.lexicalAlignment.attribute || input.lexicalAlignment.entity)) {
1383
2385
  return {
1384
- ok: false,
1385
- reason: "milestone"
2386
+ autoApplyClass: "trusted_family_template_grounded",
2387
+ relaxedStableSlotFamilyGate: false
1386
2388
  };
1387
2389
  }
1388
- if (oldEntry.expiry === "core") {
2390
+ const relaxedStableSlotFamilyGate = input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && input.familyReuseCount === 1 && input.lexicalAlignment.strongEntityAttribute;
2391
+ if (input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && (input.familyReuseCount >= 2 || relaxedStableSlotFamilyGate) && input.lexicalAlignment.attribute) {
1389
2392
  return {
1390
- ok: false,
1391
- reason: "core_expiry"
2393
+ autoApplyClass: "trusted_family_stable_slot",
2394
+ relaxedStableSlotFamilyGate
2395
+ };
2396
+ }
2397
+ if (input.localGrounding && input.groundedFamilyReuseCount > 0 && input.lexicalAlignment.strongEntityAttribute) {
2398
+ return {
2399
+ autoApplyClass: "trusted_family_grounded_alignment",
2400
+ relaxedStableSlotFamilyGate: false
1392
2401
  };
1393
2402
  }
1394
- return {
1395
- ok: true
1396
- };
2403
+ return {
2404
+ autoApplyClass: null,
2405
+ relaxedStableSlotFamilyGate: false
2406
+ };
2407
+ }
2408
+ function inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry) {
2409
+ return {
2410
+ tagGrounding: countSetOverlap(entryTagSet, trustedEntry.tags) > 0,
2411
+ sourceContextGrounding: countSetOverlap(entrySourceTokens, trustedEntry.sourceContextTokens) > 0
2412
+ };
2413
+ }
2414
+ function inspectCandidateLexicalAlignment(entry, entity, attribute) {
2415
+ const lexicalTokens = new Set(buildEntryLocalLexicalTokens(entry));
2416
+ const entityTokens = entity.split("_").filter((token) => token.length > 0);
2417
+ const attributeTokens = attribute.split("_").filter((token) => token.length > 0 && !GROUNDING_STOP_TOKENS.has(token));
2418
+ const entityOverlapCount = countSetOverlap(lexicalTokens, entityTokens);
2419
+ const attributeOverlapCount = countSetOverlap(lexicalTokens, attributeTokens);
2420
+ const entityAlignment = entityOverlapCount > 0;
2421
+ const attributeAlignment = attributeOverlapCount > 0;
2422
+ const strongAttributeAlignment = attributeTokens.length > 0 && attributeOverlapCount >= Math.min(attributeTokens.length, 2);
2423
+ return {
2424
+ entity: entityAlignment,
2425
+ attribute: attributeAlignment,
2426
+ any: entityAlignment || attributeAlignment,
2427
+ strongEntityAttribute: entityAlignment && strongAttributeAlignment,
2428
+ entityOverlapCount,
2429
+ attributeOverlapCount
2430
+ };
2431
+ }
2432
+ function matchesConservativeTemplateSupport(entry, attribute) {
2433
+ const attributeTokens = new Set(attribute.split("_").filter((token) => token.length > 0));
2434
+ const subjectText = entry.subject.toLowerCase();
2435
+ const contentText = entry.content.toLowerCase();
2436
+ const combinedText = `${subjectText}
2437
+ ${contentText}`;
2438
+ const authoritativePattern = /\b(authoritative|source of truth|source of record|canonical guide|canonical reference|primary guide|runbook)\b/u.test(
2439
+ combinedText
2440
+ );
2441
+ if (authoritativePattern && intersects(attributeTokens, AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS)) {
2442
+ return true;
2443
+ }
2444
+ const policyPattern = /\b(should|must|should stay|must stay|always|never|default(?:s)? to|default(?:s)?|policy|guardrail|required|preference|prefers?)\b/u.test(combinedText);
2445
+ if (policyPattern && intersects(attributeTokens, POLICY_TEMPLATE_ATTRIBUTE_TOKENS)) {
2446
+ return true;
2447
+ }
2448
+ const architecturePattern = /\b(uses|supports|backed by|architecture|boundary|workflow|process|pipeline|adapter|layer|contract|interface|surface)\b/u.test(
2449
+ combinedText
2450
+ );
2451
+ return architecturePattern && intersects(attributeTokens, ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS);
2452
+ }
2453
+ function matchesStableFamilySlotSupport(attribute) {
2454
+ const tokens = attribute.split("_").filter((token) => token.length > 0);
2455
+ if (tokens.length === 0 || tokens.length > MAX_AUTO_APPLY_ATTRIBUTE_TOKENS) {
2456
+ return false;
2457
+ }
2458
+ const head = tokens[tokens.length - 1];
2459
+ return typeof head === "string" && STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS.has(head);
2460
+ }
2461
+ function countSetOverlap(left, right) {
2462
+ let count = 0;
2463
+ for (const value of right) {
2464
+ if (left.has(value)) {
2465
+ count += 1;
2466
+ }
2467
+ }
2468
+ return count;
2469
+ }
2470
+ function intersects(left, right) {
2471
+ for (const value of left) {
2472
+ if (right.has(value)) {
2473
+ return true;
2474
+ }
2475
+ }
2476
+ return false;
1397
2477
  }
1398
- function describeSupersessionRuleFailure(reason) {
1399
- switch (reason) {
1400
- case "type_mismatch":
1401
- return "Supersession requires both entries to have the same type.";
1402
- case "milestone":
1403
- return "Milestone entries are never superseded automatically.";
1404
- case "core_expiry":
1405
- return "Core-expiry entries are never superseded automatically.";
2478
+ function normalizeStringArray2(values) {
2479
+ const seen = /* @__PURE__ */ new Set();
2480
+ const normalized = [];
2481
+ for (const value of values) {
2482
+ if (!value || seen.has(value)) {
2483
+ continue;
2484
+ }
2485
+ seen.add(value);
2486
+ normalized.push(value);
1406
2487
  }
2488
+ return normalized;
1407
2489
  }
1408
2490
 
1409
2491
  // src/core/store/claim-extraction.ts
1410
- var SELF_REFERENTIAL_ENTITIES2 = /* @__PURE__ */ new Set(["i", "me", "the_user", "myself", "user", "we", "our_team", "the_project", "this_project"]);
2492
+ var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "the_user", "myself", "user", "we", "our_team", "the_project", "this_project"]);
1411
2493
  var USER_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user"]);
1412
2494
  var PROJECT_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["the_project", "this_project"]);
1413
2495
  var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
@@ -1443,7 +2525,20 @@ var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
1443
2525
  ]);
1444
2526
  var MAX_ENTITY_HINTS = 12;
1445
2527
  var MAX_CLAIM_KEY_EXAMPLES = 8;
2528
+ var MAX_SUPPORT_CLAIM_KEY_EXAMPLES = 128;
1446
2529
  var DEFAULT_REPAIR_CONFIDENCE = 0.86;
2530
+ var HIGH_CONFIDENCE_BACKFILL_THRESHOLD = 0.92;
2531
+ var SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.72;
2532
+ var COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.74;
2533
+ var PROPOSAL_CONFIDENCE_THRESHOLD = 0.75;
2534
+ var SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD = 0.65;
2535
+ function applyClaimExtractionResultToEntry(entry, extracted) {
2536
+ const lifecycle = buildExtractedClaimKeyLifecycle(extracted, buildInferredIngestClaimKeySupportContext(entry));
2537
+ if (!lifecycle) {
2538
+ return;
2539
+ }
2540
+ applyClaimKeyLifecycle(entry, lifecycle);
2541
+ }
1447
2542
  async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
1448
2543
  if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
1449
2544
  return null;
@@ -1487,30 +2582,170 @@ async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
1487
2582
  options.onPreviewOutcome?.(buildPreviewOutcome("rejected_candidate", attempt));
1488
2583
  return tryDeterministicClaimKeyRepair(entry, normalizedHints);
1489
2584
  }
1490
- async function extractClaimKey(entry, llm, config, options = {}) {
1491
- const preview = await previewClaimKeyExtraction(entry, llm, config, options);
1492
- if (!preview) {
1493
- return null;
2585
+ async function extractClaimKeyDecision(entry, llm, config, options = {}) {
2586
+ if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
2587
+ return {
2588
+ result: null,
2589
+ diagnostic: {
2590
+ outcome: "ineligible_type",
2591
+ confidence: null,
2592
+ path: null,
2593
+ warning: null,
2594
+ suggestedClaimKey: null,
2595
+ reviewable: false,
2596
+ supportEvidence: [],
2597
+ rationale: "entry type is not eligible for claim-key extraction"
2598
+ }
2599
+ };
2600
+ }
2601
+ const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
2602
+ let attempt;
2603
+ try {
2604
+ attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
2605
+ } catch (error) {
2606
+ const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
2607
+ if (repaired2) {
2608
+ return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
2609
+ }
2610
+ const warning = formatClaimExtractionError(error);
2611
+ options.onWarning?.(`Claim extraction failed for "${entry.subject}": ${warning}`);
2612
+ return {
2613
+ result: null,
2614
+ diagnostic: {
2615
+ outcome: "extraction_failure",
2616
+ confidence: null,
2617
+ path: null,
2618
+ warning,
2619
+ suggestedClaimKey: null,
2620
+ reviewable: false,
2621
+ supportEvidence: [],
2622
+ rationale: "claim extraction failed before a safe candidate could be produced"
2623
+ }
2624
+ };
2625
+ }
2626
+ if (attempt.response.no_claim === true) {
2627
+ return {
2628
+ result: null,
2629
+ diagnostic: {
2630
+ outcome: "no_claim",
2631
+ confidence: normalizeConfidence(attempt.response.confidence),
2632
+ path: attempt.path,
2633
+ warning: null,
2634
+ suggestedClaimKey: null,
2635
+ reviewable: false,
2636
+ supportEvidence: [],
2637
+ rationale: "model explicitly returned no_claim"
2638
+ }
2639
+ };
1494
2640
  }
1495
- if (preview.path === "deterministic_repair" || preview.confidence >= config.confidenceThreshold) {
1496
- return preview;
2641
+ const warnings = [];
2642
+ const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, (warning) => {
2643
+ warnings.push(warning);
2644
+ options.onWarning?.(warning);
2645
+ });
2646
+ if (!candidate) {
2647
+ const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
2648
+ if (repaired2) {
2649
+ return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
2650
+ }
2651
+ return {
2652
+ result: null,
2653
+ diagnostic: {
2654
+ outcome: "rejected_candidate",
2655
+ confidence: normalizeConfidence(attempt.response.confidence),
2656
+ path: attempt.path,
2657
+ warning: warnings[0] ?? null,
2658
+ suggestedClaimKey: null,
2659
+ reviewable: false,
2660
+ supportEvidence: [],
2661
+ rationale: "model proposed a structurally unsafe or non-canonical claim key"
2662
+ }
2663
+ };
1497
2664
  }
1498
- const deterministicRepair = tryDeterministicClaimKeyRepair(entry, normalizeClaimExtractionHints(options.hints ?? {}));
1499
- if (deterministicRepair) {
1500
- return deterministicRepair;
2665
+ const result = toClaimExtractionResult(candidate, attempt.path);
2666
+ if (result.confidence >= config.confidenceThreshold) {
2667
+ return {
2668
+ result,
2669
+ diagnostic: buildAcceptedDiagnostic(result, result.confidence >= config.confidenceThreshold ? "candidate met the ingest confidence threshold" : null)
2670
+ };
1501
2671
  }
1502
- return null;
2672
+ const support = evaluateClaimKeySupport(
2673
+ {
2674
+ subject: entry.subject,
2675
+ content: entry.content,
2676
+ type: entry.type,
2677
+ tags: entry.tags,
2678
+ source_context: entry.source_context
2679
+ },
2680
+ result.claimKey ?? "",
2681
+ buildClaimKeySupportSeedFromExamples(options.supportClaimKeys ?? [])
2682
+ );
2683
+ const compactness = evaluateClaimKeyCompactness(result.claimKey ?? "", {
2684
+ priorCompactedFrom: result.compactedFrom ?? null,
2685
+ priorCompactionReason: result.compactionReason ?? null
2686
+ });
2687
+ const autoApplyThreshold = support.autoApplyClass !== null && compactness.compactedFrom ? COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : support.autoApplyClass !== null ? SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : HIGH_CONFIDENCE_BACKFILL_THRESHOLD;
2688
+ const proposalThreshold = support.supportedProposal ? SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD : PROPOSAL_CONFIDENCE_THRESHOLD;
2689
+ if (compactness.claimKey !== result.claimKey) {
2690
+ result.claimKey = compactness.claimKey;
2691
+ result.compactedFrom = compactness.compactedFrom;
2692
+ result.compactionReason = compactness.compactionReason;
2693
+ }
2694
+ if (result.confidence >= autoApplyThreshold && compactness.compactEnoughForAutoApply) {
2695
+ result.acceptanceRationale = support.autoApplyClass !== null ? `accepted below the default threshold via ${describeSupportPromotionClass(support)}` : "accepted as a high-confidence preview";
2696
+ return {
2697
+ result,
2698
+ diagnostic: buildAcceptedDiagnostic(
2699
+ result,
2700
+ support.autoApplyClass !== null ? `supported near-miss candidate cleared the conservative auto-apply threshold via ${describeSupportPromotionClass(support)}` : `candidate cleared the conservative high-confidence threshold of ${autoApplyThreshold.toFixed(2)}`
2701
+ )
2702
+ };
2703
+ }
2704
+ const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
2705
+ if (repaired && (!result.claimKey || repaired.claimKey === result.claimKey)) {
2706
+ return finalizeDeterministicRepairDecision(repaired, options.entityPrefixStats);
2707
+ }
2708
+ if (result.confidence >= proposalThreshold) {
2709
+ return {
2710
+ result: null,
2711
+ diagnostic: {
2712
+ outcome: "low_confidence_candidate",
2713
+ confidence: result.confidence,
2714
+ path: result.path,
2715
+ warning: warnings[0] ?? null,
2716
+ suggestedClaimKey: result.claimKey,
2717
+ reviewable: true,
2718
+ supportEvidence: support.supportEvidence,
2719
+ rationale: support.rationaleFragments.length > 0 ? `candidate stayed below the auto-apply threshold but has structured support from ${support.rationaleFragments.join(", ")}` : `candidate stayed below the auto-apply threshold of ${autoApplyThreshold.toFixed(2)}`
2720
+ }
2721
+ };
2722
+ }
2723
+ return {
2724
+ result: null,
2725
+ diagnostic: {
2726
+ outcome: "low_confidence_candidate",
2727
+ confidence: result.confidence,
2728
+ path: result.path,
2729
+ warning: warnings[0] ?? null,
2730
+ suggestedClaimKey: result.claimKey,
2731
+ reviewable: false,
2732
+ supportEvidence: support.supportEvidence,
2733
+ rationale: "candidate stayed below both the conservative auto-apply and review thresholds"
2734
+ }
2735
+ };
1503
2736
  }
1504
2737
  async function getEntityHints(db) {
1505
2738
  return db.getDistinctClaimKeyPrefixes();
1506
2739
  }
1507
- async function runBatchClaimExtraction(results, ports, config, _concurrency = 10, onWarning) {
2740
+ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10, onWarning, onDiagnostic) {
1508
2741
  if (!config.enabled) {
1509
2742
  return /* @__PURE__ */ new Map();
1510
2743
  }
1511
2744
  const hintState = await loadClaimExtractionHintState(ports.db);
1512
2745
  const llm = ports.createLlm();
1513
2746
  const extractedEntries = /* @__PURE__ */ new Map();
2747
+ const diagnostics = /* @__PURE__ */ new Map();
2748
+ const retryEntries = [];
1514
2749
  for (const result of results) {
1515
2750
  for (const entry of result.entries) {
1516
2751
  if (entry.claim_key) {
@@ -1518,33 +2753,89 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
1518
2753
  continue;
1519
2754
  }
1520
2755
  if (!config.eligibleTypes.includes(entry.type)) {
2756
+ diagnostics.set(entry, {
2757
+ outcome: "ineligible_type",
2758
+ confidence: null,
2759
+ path: null,
2760
+ warning: null,
2761
+ suggestedClaimKey: null,
2762
+ reviewable: false,
2763
+ supportEvidence: [],
2764
+ rationale: "entry type is not eligible for claim-key extraction"
2765
+ });
1521
2766
  continue;
1522
2767
  }
1523
- try {
1524
- const extracted = await extractClaimKey(
1525
- {
1526
- type: entry.type,
1527
- subject: entry.subject,
1528
- content: entry.content
1529
- },
1530
- llm,
1531
- config,
1532
- {
1533
- hints: buildEntryHints(hintState, entry),
1534
- onWarning
1535
- }
1536
- );
1537
- if (extracted?.claimKey) {
1538
- entry.claim_key = extracted.claimKey;
1539
- recordClaimKeyHint(hintState, extracted.claimKey);
1540
- extractedEntries.set(entry, extracted);
1541
- }
1542
- } catch {
2768
+ const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning);
2769
+ diagnostics.set(entry, decision.diagnostic);
2770
+ if (decision.result?.claimKey) {
2771
+ applyClaimExtractionResultToEntry(entry, decision.result);
2772
+ recordClaimKeyHint(hintState, decision.result.claimKey);
2773
+ extractedEntries.set(entry, decision.result);
2774
+ continue;
2775
+ }
2776
+ retryEntries.push(entry);
2777
+ }
2778
+ }
2779
+ if (retryEntries.length > 0 && extractedEntries.size > 0) {
2780
+ for (const entry of retryEntries) {
2781
+ if (entry.claim_key) {
2782
+ continue;
2783
+ }
2784
+ const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning);
2785
+ diagnostics.set(entry, decision.diagnostic);
2786
+ if (!decision.result?.claimKey) {
2787
+ continue;
2788
+ }
2789
+ applyClaimExtractionResultToEntry(entry, decision.result);
2790
+ recordClaimKeyHint(hintState, decision.result.claimKey);
2791
+ extractedEntries.set(entry, decision.result);
2792
+ }
2793
+ }
2794
+ for (const result of results) {
2795
+ for (const entry of result.entries) {
2796
+ const diagnostic = diagnostics.get(entry);
2797
+ if (diagnostic) {
2798
+ onDiagnostic?.(entry, diagnostic);
1543
2799
  }
1544
2800
  }
1545
2801
  }
1546
2802
  return extractedEntries;
1547
2803
  }
2804
+ async function extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning) {
2805
+ try {
2806
+ return await extractClaimKeyDecision(
2807
+ {
2808
+ type: entry.type,
2809
+ subject: entry.subject,
2810
+ content: entry.content,
2811
+ tags: entry.tags,
2812
+ source_context: entry.source_context
2813
+ },
2814
+ llm,
2815
+ config,
2816
+ {
2817
+ hints: buildEntryHints(hintState, entry),
2818
+ onWarning,
2819
+ supportClaimKeys: [...hintState.supportClaimKeys],
2820
+ entityPrefixStats: hintState.entityPrefixStats
2821
+ }
2822
+ );
2823
+ } catch {
2824
+ return {
2825
+ result: null,
2826
+ diagnostic: {
2827
+ outcome: "extraction_failure",
2828
+ confidence: null,
2829
+ path: null,
2830
+ warning: "claim extraction failed unexpectedly",
2831
+ suggestedClaimKey: null,
2832
+ reviewable: false,
2833
+ supportEvidence: [],
2834
+ rationale: "claim extraction failed unexpectedly"
2835
+ }
2836
+ };
2837
+ }
2838
+ }
1548
2839
  function buildClaimExtractionSystemPrompt(hints, promptMode) {
1549
2840
  const metadataHints = [hints.userEntity ? `user_id=${hints.userEntity}` : null, hints.projectEntity ? `project=${hints.projectEntity}` : null].filter(
1550
2841
  (value) => value !== null
@@ -1595,6 +2886,9 @@ function buildClaimExtractionSystemPrompt(hints, promptMode) {
1595
2886
  '- "Agenr keeps pure logic in src/core and adapters outside it so future hosts can plug in cleanly." -> agenr/core_adapter_boundary',
1596
2887
  '- "The before-prompt-build hook only triggers after a real agent turn or message." -> before_prompt_build_hook/trigger_condition',
1597
2888
  '- "Durable memory preserves context across sessions." -> durable_memory/context_preservation',
2889
+ '- "SQLite in this environment supports window functions." -> sqlite/window_function_support',
2890
+ '- "Meeting-recorder transcripts need manual cleanup before durable ingest." -> meeting_recorder/transcript_cleanup_workflow',
2891
+ '- "Reflection synthesis can hallucinate when it summarizes from partial notes." -> reflection_synthesis/hallucination_risk',
1598
2892
  "",
1599
2893
  "Negative examples:",
1600
2894
  "- Bad: jim/america_chicago -> Good: jim/timezone",
@@ -1675,6 +2969,127 @@ function buildClaimExtractionCandidate(entry, response, hints, onWarning) {
1675
2969
  compactionReason: compactedClaimKey.reason
1676
2970
  };
1677
2971
  }
2972
+ function toClaimExtractionResult(candidate, path4) {
2973
+ return {
2974
+ claimKey: candidate.claimKey,
2975
+ confidence: candidate.confidence,
2976
+ rawEntity: candidate.rawEntity,
2977
+ rawAttribute: candidate.rawAttribute,
2978
+ path: path4,
2979
+ ...candidate.compactedFrom ? {
2980
+ compactedFrom: candidate.compactedFrom,
2981
+ compactionReason: candidate.compactionReason
2982
+ } : {}
2983
+ };
2984
+ }
2985
+ function buildAcceptedDiagnostic(result, rationale) {
2986
+ return {
2987
+ outcome: "accepted",
2988
+ confidence: result.confidence,
2989
+ path: result.path,
2990
+ warning: null,
2991
+ suggestedClaimKey: result.claimKey,
2992
+ reviewable: false,
2993
+ supportEvidence: [],
2994
+ rationale
2995
+ };
2996
+ }
2997
+ function finalizeDeterministicRepairDecision(repaired, entityPrefixStats) {
2998
+ const aliasCandidate = findSingletonAliasReuseCandidate(repaired, entityPrefixStats);
2999
+ if (!aliasCandidate) {
3000
+ return {
3001
+ result: repaired,
3002
+ diagnostic: buildAcceptedDiagnostic(repaired, "deterministic possessive-slot repair recovered the missing claim key")
3003
+ };
3004
+ }
3005
+ if (aliasCandidate.canonicalReuseSafe) {
3006
+ const reusedResult = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix);
3007
+ reusedResult.acceptanceRationale = `reused dominant entity family "${aliasCandidate.dominantEntityPrefix}" instead of minting singleton alias "${aliasCandidate.aliasEntityPrefix}"`;
3008
+ return {
3009
+ result: reusedResult,
3010
+ diagnostic: buildAcceptedDiagnostic(
3011
+ reusedResult,
3012
+ `deterministic repair reused dominant family "${aliasCandidate.dominantEntityPrefix}" instead of new singleton alias "${aliasCandidate.aliasEntityPrefix}"`
3013
+ )
3014
+ };
3015
+ }
3016
+ const suggestedClaimKey = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix).claimKey;
3017
+ return {
3018
+ result: null,
3019
+ diagnostic: {
3020
+ outcome: "low_confidence_candidate",
3021
+ confidence: repaired.confidence,
3022
+ path: repaired.path,
3023
+ warning: null,
3024
+ suggestedClaimKey,
3025
+ reviewable: true,
3026
+ supportEvidence: aliasCandidate.evidence.map((evidence) => evidence.kind),
3027
+ rationale: `deterministic repair would create singleton alias "${aliasCandidate.aliasEntityPrefix}" next to dominant trusted family "${aliasCandidate.dominantEntityPrefix}", so the new namespace was staged for review`
3028
+ }
3029
+ };
3030
+ }
3031
+ function findSingletonAliasReuseCandidate(repaired, entityPrefixStats) {
3032
+ const claimKey = repaired.claimKey;
3033
+ if (!claimKey || !entityPrefixStats || entityPrefixStats.length === 0) {
3034
+ return null;
3035
+ }
3036
+ const [entityPrefix = ""] = claimKey.split("/", 1);
3037
+ if (!entityPrefix) {
3038
+ return null;
3039
+ }
3040
+ const augmentedStats = summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix);
3041
+ return detectClaimKeySingletonAliasCandidatesFromStats(augmentedStats).find((candidate) => candidate.aliasEntityPrefix === entityPrefix) ?? null;
3042
+ }
3043
+ function summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix) {
3044
+ const existing = entityPrefixStats.find((profile) => profile.entityPrefix === entityPrefix);
3045
+ if (existing) {
3046
+ return entityPrefixStats;
3047
+ }
3048
+ return [
3049
+ ...entityPrefixStats,
3050
+ {
3051
+ entityPrefix,
3052
+ activeEntryCount: 1,
3053
+ trustedEntryCount: 0,
3054
+ tentativeEntryCount: 1,
3055
+ unresolvedEntryCount: 0,
3056
+ legacyEntryCount: 0,
3057
+ deterministicRepairEntryCount: 1,
3058
+ manualEntryCount: 0,
3059
+ modelEntryCount: 0,
3060
+ jsonRetryEntryCount: 0,
3061
+ surgeonFamilyReuseEntryCount: 0
3062
+ }
3063
+ ];
3064
+ }
3065
+ function rewriteClaimKeyEntityPrefix(result, entityPrefix) {
3066
+ const claimKey = result.claimKey;
3067
+ if (!claimKey) {
3068
+ return result;
3069
+ }
3070
+ const [, attribute = ""] = claimKey.split("/", 2);
3071
+ return {
3072
+ ...result,
3073
+ claimKey: `${entityPrefix}/${attribute}`
3074
+ };
3075
+ }
3076
+ function formatClaimExtractionError(error) {
3077
+ return error instanceof Error ? error.message : String(error);
3078
+ }
3079
+ function describeSupportPromotionClass(support) {
3080
+ switch (support.autoApplyClass) {
3081
+ case "trusted_exact_reuse_grounded":
3082
+ return "trusted exact-key reuse with local grounding";
3083
+ case "trusted_family_template_grounded":
3084
+ return "trusted family reuse plus grounded template support";
3085
+ case "trusted_family_stable_slot":
3086
+ return "trusted family reuse plus a stable compact slot";
3087
+ case "trusted_family_grounded_alignment":
3088
+ return "trusted family reuse plus grounded dual lexical alignment";
3089
+ default:
3090
+ return "structural support";
3091
+ }
3092
+ }
1678
3093
  function tryDeterministicClaimKeyRepair(entry, hints) {
1679
3094
  const repaired = parsePossessiveClaim(entry.subject) ?? parsePossessiveStatement(entry.content);
1680
3095
  if (!repaired) {
@@ -1702,24 +3117,38 @@ function tryDeterministicClaimKeyRepair(entry, hints) {
1702
3117
  };
1703
3118
  }
1704
3119
  async function loadClaimExtractionHintState(db) {
1705
- const [entityHintResult, claimKeyExampleResult] = await Promise.allSettled([getEntityHints(db), getClaimKeyExamples(db)]);
3120
+ const [entityHintResult, promptClaimKeyExampleResult, supportClaimKeyExampleResult, entityPrefixStatsResult] = await Promise.allSettled([
3121
+ getEntityHints(db),
3122
+ getClaimKeyExamples(db, MAX_CLAIM_KEY_EXAMPLES),
3123
+ getClaimKeyExamples(db, MAX_SUPPORT_CLAIM_KEY_EXAMPLES),
3124
+ getClaimKeyEntityPrefixStats(db)
3125
+ ]);
1706
3126
  return createHintState({
1707
3127
  entityHints: entityHintResult.status === "fulfilled" ? entityHintResult.value : [],
1708
- claimKeyExamples: claimKeyExampleResult.status === "fulfilled" ? claimKeyExampleResult.value : []
3128
+ claimKeyExamples: promptClaimKeyExampleResult.status === "fulfilled" ? promptClaimKeyExampleResult.value : [],
3129
+ supportClaimKeys: supportClaimKeyExampleResult.status === "fulfilled" ? supportClaimKeyExampleResult.value : [],
3130
+ entityPrefixStats: entityPrefixStatsResult.status === "fulfilled" ? entityPrefixStatsResult.value : []
1709
3131
  });
1710
3132
  }
1711
- async function getClaimKeyExamples(db) {
3133
+ async function getClaimKeyExamples(db, limit) {
1712
3134
  if (typeof db.getClaimKeyExamples !== "function") {
1713
3135
  return [];
1714
3136
  }
1715
- return db.getClaimKeyExamples(MAX_CLAIM_KEY_EXAMPLES);
3137
+ return db.getClaimKeyExamples(limit);
3138
+ }
3139
+ async function getClaimKeyEntityPrefixStats(db) {
3140
+ if (typeof db.getClaimKeyEntityPrefixStats !== "function") {
3141
+ return [];
3142
+ }
3143
+ return db.getClaimKeyEntityPrefixStats();
1716
3144
  }
1717
3145
  function createHintState(input) {
1718
3146
  const claimKeyExamples = normalizeClaimKeyExamples(input.claimKeyExamples ?? []);
3147
+ const supportClaimKeys = normalizeSupportClaimKeys(input.supportClaimKeys ?? []);
1719
3148
  const entityHints = limitUnique(
1720
3149
  [
1721
3150
  ...normalizeEntityHints(input.entityHints ?? []),
1722
- ...claimKeyExamples.flatMap((claimKey) => {
3151
+ ...supportClaimKeys.flatMap((claimKey) => {
1723
3152
  const normalizedClaimKey = normalizeClaimKey(claimKey);
1724
3153
  return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
1725
3154
  })
@@ -1728,7 +3157,9 @@ function createHintState(input) {
1728
3157
  );
1729
3158
  return {
1730
3159
  entityHints,
1731
- claimKeyExamples
3160
+ claimKeyExamples,
3161
+ supportClaimKeys,
3162
+ entityPrefixStats: input.entityPrefixStats ?? []
1732
3163
  };
1733
3164
  }
1734
3165
  function buildEntryHints(state, entry) {
@@ -1747,6 +3178,7 @@ function recordClaimKeyHint(state, claimKey) {
1747
3178
  return;
1748
3179
  }
1749
3180
  state.claimKeyExamples = prependUnique(state.claimKeyExamples, normalizedClaimKey.value.claimKey, MAX_CLAIM_KEY_EXAMPLES);
3181
+ state.supportClaimKeys = prependUnique(state.supportClaimKeys, normalizedClaimKey.value.claimKey, MAX_SUPPORT_CLAIM_KEY_EXAMPLES);
1750
3182
  state.entityHints = prependUnique(state.entityHints, normalizedClaimKey.value.entity, MAX_ENTITY_HINTS);
1751
3183
  }
1752
3184
  function normalizeClaimExtractionHints(hints) {
@@ -1789,7 +3221,7 @@ function normalizeEntity(value, hints) {
1789
3221
  if (normalizedValue.length === 0) {
1790
3222
  return "";
1791
3223
  }
1792
- if (!SELF_REFERENTIAL_ENTITIES2.has(normalizedValue)) {
3224
+ if (!SELF_REFERENTIAL_ENTITIES.has(normalizedValue)) {
1793
3225
  return normalizedValue;
1794
3226
  }
1795
3227
  if (USER_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.userEntity) {
@@ -1814,7 +3246,7 @@ function normalizeEntity(value, hints) {
1814
3246
  }
1815
3247
  function normalizeEntityHints(entityHints) {
1816
3248
  return limitUnique(
1817
- entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !SELF_REFERENTIAL_ENTITIES2.has(entityHint)),
3249
+ entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !SELF_REFERENTIAL_ENTITIES.has(entityHint)),
1818
3250
  MAX_ENTITY_HINTS
1819
3251
  );
1820
3252
  }
@@ -1827,12 +3259,21 @@ function normalizeClaimKeyExamples(claimKeyExamples) {
1827
3259
  MAX_CLAIM_KEY_EXAMPLES
1828
3260
  );
1829
3261
  }
3262
+ function normalizeSupportClaimKeys(claimKeys) {
3263
+ return limitUnique(
3264
+ claimKeys.flatMap((claimKey) => {
3265
+ const normalizedClaimKey = normalizeClaimKey(claimKey);
3266
+ return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
3267
+ }),
3268
+ MAX_SUPPORT_CLAIM_KEY_EXAMPLES
3269
+ );
3270
+ }
1830
3271
  function normalizeMetadataEntity(value) {
1831
3272
  if (typeof value !== "string") {
1832
3273
  return void 0;
1833
3274
  }
1834
3275
  const normalized = normalizeClaimKeySegment(value);
1835
- if (normalized.length === 0 || SELF_REFERENTIAL_ENTITIES2.has(normalized) || !/[a-z]/u.test(normalized)) {
3276
+ if (normalized.length === 0 || SELF_REFERENTIAL_ENTITIES.has(normalized) || !/[a-z]/u.test(normalized)) {
1836
3277
  return void 0;
1837
3278
  }
1838
3279
  return normalized;
@@ -1949,13 +3390,9 @@ function validateEntriesWithIndexes(inputs) {
1949
3390
  rejectedInputIndexes.push(index);
1950
3391
  continue;
1951
3392
  }
1952
- if (input.valid_from !== void 0 && !isIsoTimestamp(input.valid_from)) {
1953
- errors.push(`Entry ${index} has an invalid valid_from timestamp.`);
1954
- rejectedInputIndexes.push(index);
1955
- continue;
1956
- }
1957
- if (input.valid_to !== void 0 && !isIsoTimestamp(input.valid_to)) {
1958
- errors.push(`Entry ${index} has an invalid valid_to timestamp.`);
3393
+ const temporalValidity = validateTemporalValidityRange(input.valid_from, input.valid_to);
3394
+ if (!temporalValidity.ok) {
3395
+ errors.push(`Entry ${index} ${temporalValidity.message}`);
1959
3396
  rejectedInputIndexes.push(index);
1960
3397
  continue;
1961
3398
  }
@@ -1974,6 +3411,42 @@ function validateEntriesWithIndexes(inputs) {
1974
3411
  }
1975
3412
  }
1976
3413
  }
3414
+ const claimKeyRaw = normalizedClaimKey ? normalizeOptionalString(input.claim_key_raw) : void 0;
3415
+ const claimKeyStatus = normalizedClaimKey ? normalizeClaimKeyStatus(input.claim_key_status, index, warnings) : void 0;
3416
+ const claimKeySource = normalizedClaimKey ? normalizeClaimKeySource(input.claim_key_source, index, warnings) : void 0;
3417
+ const claimKeyConfidence = normalizedClaimKey ? normalizeClaimKeyConfidence(input.claim_key_confidence, index, warnings) : void 0;
3418
+ const claimKeyRationale = normalizedClaimKey ? normalizeOptionalString(input.claim_key_rationale) : void 0;
3419
+ const claimSupportSourceKind = normalizedClaimKey ? normalizeOptionalString(input.claim_support_source_kind) : void 0;
3420
+ const claimSupportLocator = normalizedClaimKey ? normalizeOptionalString(input.claim_support_locator) : void 0;
3421
+ const claimSupportObservedAt = normalizedClaimKey && input.claim_support_observed_at !== void 0 ? normalizeClaimSupportObservedAt(input.claim_support_observed_at, index, warnings) : void 0;
3422
+ const claimSupportMode = normalizedClaimKey && input.claim_support_mode !== void 0 ? normalizeClaimSupportMode(input.claim_support_mode, index, warnings) : void 0;
3423
+ const hasPrecomputedLifecycleFields = hasPrecomputedClaimKeyLifecycleFields(input);
3424
+ const resolvedPrecomputedLifecycle = normalizedClaimKey && hasPrecomputedLifecycleFields ? buildPrecomputedClaimKeyLifecycle({
3425
+ claim_key: normalizedClaimKey,
3426
+ claim_key_raw: claimKeyRaw,
3427
+ claim_key_status: claimKeyStatus,
3428
+ claim_key_source: claimKeySource,
3429
+ claim_key_confidence: claimKeyConfidence,
3430
+ claim_key_rationale: claimKeyRationale,
3431
+ claim_support_source_kind: claimSupportSourceKind,
3432
+ claim_support_locator: claimSupportLocator,
3433
+ claim_support_observed_at: claimSupportObservedAt,
3434
+ claim_support_mode: claimSupportMode
3435
+ }) : void 0;
3436
+ if (hasPrecomputedLifecycleFields) {
3437
+ if (!normalizedClaimKey) {
3438
+ errors.push(`Entry ${index} provided claim-key lifecycle metadata without a valid claim key.`);
3439
+ rejectedInputIndexes.push(index);
3440
+ continue;
3441
+ }
3442
+ if (!resolvedPrecomputedLifecycle) {
3443
+ errors.push(
3444
+ `Entry ${index} provided partial or invalid claim-key lifecycle metadata. Complete bundles require claim_key_status, claim_key_source, claim_key_confidence, and claim_key_rationale.`
3445
+ );
3446
+ rejectedInputIndexes.push(index);
3447
+ continue;
3448
+ }
3449
+ }
1977
3450
  valid.push({
1978
3451
  inputIndex: index,
1979
3452
  input: {
@@ -1990,8 +3463,17 @@ function validateEntriesWithIndexes(inputs) {
1990
3463
  created_at: normalizeOptionalString(input.created_at),
1991
3464
  supersedes: normalizeOptionalString(input.supersedes),
1992
3465
  claim_key: normalizedClaimKey,
1993
- valid_from: normalizeOptionalString(input.valid_from),
1994
- valid_to: normalizeOptionalString(input.valid_to)
3466
+ claim_key_raw: resolvedPrecomputedLifecycle?.claim_key_raw ?? claimKeyRaw,
3467
+ claim_key_status: resolvedPrecomputedLifecycle?.claim_key_status,
3468
+ claim_key_source: resolvedPrecomputedLifecycle?.claim_key_source,
3469
+ claim_key_confidence: resolvedPrecomputedLifecycle?.claim_key_confidence,
3470
+ claim_key_rationale: resolvedPrecomputedLifecycle?.claim_key_rationale,
3471
+ claim_support_source_kind: resolvedPrecomputedLifecycle?.claim_support_source_kind ?? claimSupportSourceKind,
3472
+ claim_support_locator: resolvedPrecomputedLifecycle?.claim_support_locator ?? claimSupportLocator,
3473
+ claim_support_observed_at: resolvedPrecomputedLifecycle?.claim_support_observed_at ?? claimSupportObservedAt,
3474
+ claim_support_mode: resolvedPrecomputedLifecycle?.claim_support_mode ?? claimSupportMode,
3475
+ valid_from: temporalValidity.value.validFrom,
3476
+ valid_to: temporalValidity.value.validTo
1995
3477
  }
1996
3478
  });
1997
3479
  }
@@ -2016,6 +3498,56 @@ function normalizeOptionalString(value) {
2016
3498
  const normalized = value?.trim();
2017
3499
  return normalized && normalized.length > 0 ? normalized : void 0;
2018
3500
  }
3501
+ function normalizeClaimSupportObservedAt(value, index, warnings) {
3502
+ const normalized = normalizeOptionalString(value);
3503
+ if (!normalized) {
3504
+ return void 0;
3505
+ }
3506
+ if (!isIsoTimestamp(normalized)) {
3507
+ warnings.push(`Entry ${index} provided invalid claim_support_observed_at ${JSON.stringify(value)} and it was dropped.`);
3508
+ return void 0;
3509
+ }
3510
+ return normalized;
3511
+ }
3512
+ function normalizeClaimKeyStatus(value, index, warnings) {
3513
+ const parsed = parseClaimKeyStatus(value);
3514
+ if (parsed) {
3515
+ return parsed;
3516
+ }
3517
+ if (value !== void 0) {
3518
+ warnings.push(`Entry ${index} provided invalid claim_key_status ${JSON.stringify(value)} and it was dropped.`);
3519
+ }
3520
+ return void 0;
3521
+ }
3522
+ function normalizeClaimKeySource(value, index, warnings) {
3523
+ const parsed = parseClaimKeySource(value);
3524
+ if (parsed) {
3525
+ return parsed;
3526
+ }
3527
+ if (value !== void 0) {
3528
+ warnings.push(`Entry ${index} provided invalid claim_key_source ${JSON.stringify(value)} and it was dropped.`);
3529
+ }
3530
+ return void 0;
3531
+ }
3532
+ function normalizeClaimKeyConfidence(value, index, warnings) {
3533
+ if (value === void 0) {
3534
+ return void 0;
3535
+ }
3536
+ const parsed = parseClaimKeyConfidence(value);
3537
+ if (parsed !== void 0) {
3538
+ return parsed;
3539
+ }
3540
+ warnings.push(`Entry ${index} provided invalid claim_key_confidence ${JSON.stringify(value)} and it was dropped.`);
3541
+ return void 0;
3542
+ }
3543
+ function normalizeClaimSupportMode(value, index, warnings) {
3544
+ const parsed = parseClaimSupportMode(value);
3545
+ if (parsed) {
3546
+ return parsed;
3547
+ }
3548
+ warnings.push(`Entry ${index} provided invalid claim_support_mode ${JSON.stringify(value)} and it was dropped.`);
3549
+ return void 0;
3550
+ }
2019
3551
  function areValidTags(value) {
2020
3552
  return Array.isArray(value) && value.every((tag) => typeof tag === "string");
2021
3553
  }
@@ -2035,7 +3567,7 @@ function isIsoTimestamp(value) {
2035
3567
 
2036
3568
  // src/core/store/pipeline.ts
2037
3569
  var AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE = 0.9;
2038
- var AUTO_SUPERSESSION_ELIGIBLE_PATHS = /* @__PURE__ */ new Set(["model", "json_retry"]);
3570
+ var AUTO_SUPERSESSION_ELIGIBLE_SOURCES = /* @__PURE__ */ new Set(["model", "json_retry"]);
2039
3571
  async function storeEntriesDetailed(inputs, db, embedding, options = {}) {
2040
3572
  if (inputs.length === 0) {
2041
3573
  return { stored: 0, skipped: 0, rejected: 0, details: [] };
@@ -2069,6 +3601,7 @@ async function storeEntriesDetailed(inputs, db, embedding, options = {}) {
2069
3601
  }
2070
3602
  const pendingEntries = plan.pendingEntries;
2071
3603
  const extractedClaimKeys = await maybeExtractClaimKeys(pendingEntries, options);
3604
+ applyExtractedClaimKeyMetadata(pendingEntries, extractedClaimKeys);
2072
3605
  const embeddings = await resolvePendingEmbeddings(inputs, pendingEntries, embedding, options.precomputedEmbeddings);
2073
3606
  await persistEntries(db, pendingEntries, embeddings, extractedClaimKeys, options.claimExtraction?.config, options.onWarning);
2074
3607
  return {
@@ -2150,6 +3683,7 @@ async function persistEntries(db, preparedEntries, embeddings, extractedClaimKey
2150
3683
  }
2151
3684
  function buildEntry(preparedEntry, embedding) {
2152
3685
  const now = (/* @__PURE__ */ new Date()).toISOString();
3686
+ const acceptedClaimKey = preparedEntry.claimKey;
2153
3687
  return {
2154
3688
  id: randomUUID(),
2155
3689
  type: preparedEntry.input.type,
@@ -2169,7 +3703,16 @@ function buildEntry(preparedEntry, embedding) {
2169
3703
  recall_count: 0,
2170
3704
  valid_from: preparedEntry.input.valid_from,
2171
3705
  valid_to: preparedEntry.input.valid_to,
2172
- claim_key: preparedEntry.input.claim_key,
3706
+ claim_key: acceptedClaimKey?.claim_key ?? preparedEntry.input.claim_key,
3707
+ claim_key_raw: acceptedClaimKey?.claim_key_raw,
3708
+ claim_key_status: acceptedClaimKey?.claim_key_status,
3709
+ claim_key_source: acceptedClaimKey?.claim_key_source,
3710
+ claim_key_confidence: acceptedClaimKey?.claim_key_confidence,
3711
+ claim_key_rationale: acceptedClaimKey?.claim_key_rationale,
3712
+ claim_support_source_kind: acceptedClaimKey?.claim_support_source_kind,
3713
+ claim_support_locator: acceptedClaimKey?.claim_support_locator,
3714
+ claim_support_observed_at: acceptedClaimKey?.claim_support_observed_at,
3715
+ claim_support_mode: acceptedClaimKey?.claim_support_mode,
2173
3716
  retired: false,
2174
3717
  created_at: preparedEntry.input.created_at ?? now,
2175
3718
  updated_at: now
@@ -2193,7 +3736,13 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
2193
3736
  },
2194
3737
  claimExtraction.config,
2195
3738
  1,
2196
- options.onWarning
3739
+ options.onWarning,
3740
+ (entry, diagnostic) => {
3741
+ const preparedEntry = preparedEntries.find((candidate) => candidate.input === entry);
3742
+ if (preparedEntry) {
3743
+ options.onClaimExtractionDiagnostic?.(preparedEntry.inputIndex, diagnostic);
3744
+ }
3745
+ }
2197
3746
  );
2198
3747
  const extractedClaimKeys = /* @__PURE__ */ new Map();
2199
3748
  for (const preparedEntry of preparedEntries) {
@@ -2212,12 +3761,26 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
2212
3761
  function hasTransactionSupport(db) {
2213
3762
  return typeof db.withTransaction === "function";
2214
3763
  }
3764
+ function applyExtractedClaimKeyMetadata(preparedEntries, extractedClaimKeys) {
3765
+ for (const preparedEntry of preparedEntries) {
3766
+ if (preparedEntry.claimKey) {
3767
+ continue;
3768
+ }
3769
+ const extractedClaimKey = extractedClaimKeys.get(preparedEntry.inputIndex);
3770
+ const acceptedClaimKey = buildPrecomputedClaimKeyLifecycle(preparedEntry.input) ?? (extractedClaimKey ? buildExtractedClaimKeyLifecycle(extractedClaimKey, buildInferredIngestClaimKeySupportContext(preparedEntry.input)) : void 0);
3771
+ if (!acceptedClaimKey) {
3772
+ continue;
3773
+ }
3774
+ preparedEntry.claimKey = acceptedClaimKey;
3775
+ applyClaimKeyLifecycle(preparedEntry.input, acceptedClaimKey);
3776
+ }
3777
+ }
2215
3778
  async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, claimExtractionConfig) {
2216
3779
  const plans = /* @__PURE__ */ new Map();
2217
3780
  const preparedEntriesByClaimKey = groupPreparedEntriesByClaimKey(preparedEntries);
2218
3781
  const siblingCache = /* @__PURE__ */ new Map();
2219
3782
  for (const preparedEntry of preparedEntries) {
2220
- const claimKey = preparedEntry.input.claim_key;
3783
+ const claimKey = preparedEntry.claimKey?.claim_key ?? preparedEntry.input.claim_key;
2221
3784
  if (!claimKey || preparedEntry.input.supersedes) {
2222
3785
  continue;
2223
3786
  }
@@ -2244,10 +3807,10 @@ async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, cla
2244
3807
  if (!sibling) {
2245
3808
  continue;
2246
3809
  }
2247
- if (!isAutoSupersessionEligible(preparedEntry, extractedClaimKeys, claimExtractionConfig)) {
3810
+ if (!isAutoSupersessionEligible(preparedEntry.claimKey, claimExtractionConfig)) {
2248
3811
  plans.set(preparedEntry.inputIndex, {
2249
3812
  kind: "skip",
2250
- warning: buildAutoSupersessionEligibilityWarning(preparedEntry, extractedClaimKeys.get(preparedEntry.inputIndex))
3813
+ warning: buildAutoSupersessionEligibilityWarning(preparedEntry)
2251
3814
  });
2252
3815
  continue;
2253
3816
  }
@@ -2272,7 +3835,7 @@ async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, cla
2272
3835
  function groupPreparedEntriesByClaimKey(preparedEntries) {
2273
3836
  const grouped = /* @__PURE__ */ new Map();
2274
3837
  for (const preparedEntry of preparedEntries) {
2275
- const claimKey = preparedEntry.input.claim_key;
3838
+ const claimKey = preparedEntry.claimKey?.claim_key ?? preparedEntry.input.claim_key;
2276
3839
  if (!claimKey) {
2277
3840
  continue;
2278
3841
  }
@@ -2291,28 +3854,31 @@ async function getClaimKeySiblings(db, cache, claimKey) {
2291
3854
  cache.set(claimKey, siblings);
2292
3855
  return siblings;
2293
3856
  }
2294
- function isAutoSupersessionEligible(preparedEntry, extractedClaimKeys, claimExtractionConfig) {
2295
- if (preparedEntry.claimKeySource === "manual") {
2296
- return true;
2297
- }
2298
- const extractedClaimKey = extractedClaimKeys.get(preparedEntry.inputIndex);
2299
- if (!extractedClaimKey || !claimExtractionConfig) {
3857
+ function isAutoSupersessionEligible(claimKey, claimExtractionConfig) {
3858
+ if (!claimKey || claimKey.claim_key_status !== "trusted") {
2300
3859
  return false;
2301
3860
  }
2302
- if (!AUTO_SUPERSESSION_ELIGIBLE_PATHS.has(extractedClaimKey.path)) {
3861
+ if (claimKey.claim_key_source === "manual") {
3862
+ return true;
3863
+ }
3864
+ if (!AUTO_SUPERSESSION_ELIGIBLE_SOURCES.has(claimKey.claim_key_source) || !claimExtractionConfig) {
2303
3865
  return false;
2304
3866
  }
2305
- return extractedClaimKey.confidence >= Math.max(claimExtractionConfig.confidenceThreshold, AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE);
3867
+ return claimKey.claim_key_confidence >= Math.max(claimExtractionConfig.confidenceThreshold, AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE);
2306
3868
  }
2307
- function buildAutoSupersessionEligibilityWarning(preparedEntry, extractedClaimKey) {
2308
- const claimKey = preparedEntry.input.claim_key ?? "(missing)";
2309
- if (preparedEntry.claimKeySource === "manual") {
3869
+ function buildAutoSupersessionEligibilityWarning(preparedEntry) {
3870
+ const acceptedClaimKey = preparedEntry.claimKey;
3871
+ const claimKey = acceptedClaimKey?.claim_key ?? preparedEntry.input.claim_key ?? "(missing)";
3872
+ if (!acceptedClaimKey) {
3873
+ return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not explicit or a tracked high-confidence extraction.`;
3874
+ }
3875
+ if (acceptedClaimKey.claim_key_source === "manual") {
2310
3876
  return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not eligible for automatic linking.`;
2311
3877
  }
2312
- if (extractedClaimKey) {
2313
- return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the extracted claim key came from ${extractedClaimKey.path} at confidence ${extractedClaimKey.confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
3878
+ if (acceptedClaimKey.claim_key_status !== "trusted") {
3879
+ return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the accepted claim key is ${acceptedClaimKey.claim_key_status} from ${acceptedClaimKey.claim_key_source} at confidence ${acceptedClaimKey.claim_key_confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
2314
3880
  }
2315
- return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not explicit or a tracked high-confidence extraction.`;
3881
+ return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the extracted claim key came from ${acceptedClaimKey.claim_key_source} at confidence ${acceptedClaimKey.claim_key_confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
2316
3882
  }
2317
3883
  function buildAutoSupersessionRuleWarning(preparedEntry, sibling, reason) {
2318
3884
  if (reason === "type_mismatch") {
@@ -2332,7 +3898,7 @@ async function buildStorePlan(inputs, db) {
2332
3898
  inputIndex,
2333
3899
  contentHash: computeContentHash(input.content, input.source_file),
2334
3900
  normContentHash: computeNormContentHash(input.content),
2335
- claimKeySource: input.claim_key ? "manual" : void 0
3901
+ claimKey: buildManualAcceptedClaimKey(inputs[inputIndex], input)
2336
3902
  }));
2337
3903
  const afterBatchContentHash = dedupePreparedEntries(preparedEntries, "contentHash", "content_hash", details);
2338
3904
  const existingHashes = await db.findExistingHashes(afterBatchContentHash.map((entry) => entry.contentHash));
@@ -2388,6 +3954,31 @@ function formatPipelineError(error) {
2388
3954
  function sortStoreDetails(details) {
2389
3955
  return [...details].sort((left, right) => left.inputIndex - right.inputIndex);
2390
3956
  }
3957
+ function buildManualAcceptedClaimKey(rawInput, normalizedInput) {
3958
+ const canonicalClaimKey = normalizedInput.claim_key;
3959
+ if (!canonicalClaimKey) {
3960
+ return void 0;
3961
+ }
3962
+ const precomputedAcceptedClaimKey = buildPrecomputedClaimKeyLifecycle(normalizedInput);
3963
+ if (precomputedAcceptedClaimKey) {
3964
+ return precomputedAcceptedClaimKey;
3965
+ }
3966
+ if (rawInput && hasPrecomputedClaimKeyLifecycleFields(rawInput)) {
3967
+ throw new Error("Store inputs with claim-key lifecycle metadata must provide a complete valid lifecycle bundle.");
3968
+ }
3969
+ return buildManualClaimKeyLifecycle({
3970
+ claimKey: canonicalClaimKey,
3971
+ rawClaimKey: normalizedInput.claim_key_raw ?? normalizeOptionalString2(rawInput?.claim_key),
3972
+ supportSourceKind: normalizedInput.claim_support_source_kind,
3973
+ supportLocator: normalizedInput.claim_support_locator,
3974
+ supportObservedAt: normalizedInput.claim_support_observed_at,
3975
+ supportMode: normalizedInput.claim_support_mode
3976
+ });
3977
+ }
3978
+ function normalizeOptionalString2(value) {
3979
+ const normalized = value?.trim();
3980
+ return normalized && normalized.length > 0 ? normalized : void 0;
3981
+ }
2391
3982
 
2392
3983
  // src/core/episode/summary-prompt.ts
2393
3984
  var EPISODE_SUMMARY_SYSTEM_PROMPT = [
@@ -2505,7 +4096,7 @@ async function generateEpisodeSummary(transcript, llm) {
2505
4096
  }
2506
4097
 
2507
4098
  // src/app/episode-ingest/service/preflight.ts
2508
- import path from "path";
4099
+ import path2 from "path";
2509
4100
 
2510
4101
  // src/core/episode/transcript-render.ts
2511
4102
  var MIN_EPISODE_MESSAGES = 4;
@@ -2836,9 +4427,9 @@ function resolveSessionMeta(filePath, parsedSessionId, registryMeta, reconstruct
2836
4427
  };
2837
4428
  }
2838
4429
  function deriveAgentIdFromPath(filePath) {
2839
- const resolved = path.resolve(filePath);
2840
- const parent = path.basename(path.dirname(resolved));
2841
- const grandparent = path.basename(path.dirname(path.dirname(resolved)));
4430
+ const resolved = path2.resolve(filePath);
4431
+ const parent = path2.basename(path2.dirname(resolved));
4432
+ const grandparent = path2.basename(path2.dirname(path2.dirname(resolved)));
2842
4433
  if (parent !== "sessions") {
2843
4434
  return null;
2844
4435
  }
@@ -3166,50 +4757,59 @@ function resolveRecentCutoff(recent, now) {
3166
4757
  return cutoff;
3167
4758
  }
3168
4759
 
3169
- // src/adapters/openclaw/session/session-id.ts
3170
- import path2 from "path";
3171
- function deriveOpenClawSessionIdFromFilePath(sessionFile, logger) {
3172
- const normalizedSessionFile = sessionFile.trim();
3173
- if (normalizedSessionFile.length === 0) {
3174
- debugLog(logger, "session-id", "cannot derive session id from empty session file path");
3175
- return void 0;
3176
- }
3177
- const fileName = path2.basename(normalizedSessionFile);
3178
- const sessionId = fileName.replace(/\.jsonl(?:\..*)?$/i, "").trim();
3179
- debugLog(logger, "session-id", `derived session id "${sessionId || "<empty>"}" from file=${normalizedSessionFile}`);
3180
- return sessionId.length > 0 ? sessionId : void 0;
3181
- }
3182
- function debugLog(logger, subsystem, message) {
3183
- logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
3184
- }
3185
-
3186
4760
  // src/adapters/openclaw/session/sessions-store-reader.ts
3187
4761
  import * as fs3 from "fs/promises";
3188
4762
  import path3 from "path";
3189
4763
  async function readOpenClawSessionsStore(sessionsDir, logger) {
3190
- const normalizedSessionsDir = sessionsDir.trim();
3191
- if (normalizedSessionsDir.length === 0) {
4764
+ if (sessionsDir.trim().length === 0) {
3192
4765
  debugLog2(logger, "sessions-store-reader", "skipping sessions.json read because sessionsDir is empty");
3193
4766
  return [];
3194
4767
  }
4768
+ const result = await readOpenClawSessionsStoreWithDiagnostics(sessionsDir);
4769
+ for (const diagnostic of result.diagnostics) {
4770
+ debugLog2(logger, "sessions-store-reader", diagnostic.message);
4771
+ }
4772
+ if (result.diagnostics.length === 0) {
4773
+ debugLog2(
4774
+ logger,
4775
+ "sessions-store-reader",
4776
+ `loaded sessions.json entries=${result.entries.length} path=${path3.join(path3.resolve(sessionsDir.trim()), "sessions.json")}`
4777
+ );
4778
+ }
4779
+ return result.entries;
4780
+ }
4781
+ async function readOpenClawSessionsStoreWithDiagnostics(sessionsDir) {
4782
+ const normalizedSessionsDir = sessionsDir.trim();
4783
+ if (normalizedSessionsDir.length === 0) {
4784
+ return {
4785
+ entries: [],
4786
+ diagnostics: []
4787
+ };
4788
+ }
3195
4789
  const resolvedSessionsDir = path3.resolve(normalizedSessionsDir);
3196
4790
  const sessionsJsonPath = path3.join(resolvedSessionsDir, "sessions.json");
3197
4791
  try {
3198
4792
  const raw = await fs3.readFile(sessionsJsonPath, "utf8");
3199
4793
  const parsed = JSON.parse(raw);
3200
4794
  if (!isRecord(parsed)) {
3201
- debugLog2(logger, "sessions-store-reader", `sessions.json did not contain an object: path=${sessionsJsonPath}`);
3202
- return [];
4795
+ return {
4796
+ entries: [],
4797
+ diagnostics: [
4798
+ {
4799
+ kind: "structurally_invalid_file",
4800
+ message: `sessions.json did not contain an object: path=${sessionsJsonPath}`,
4801
+ path: sessionsJsonPath
4802
+ }
4803
+ ]
4804
+ };
3203
4805
  }
3204
4806
  const entries = [];
3205
4807
  for (const [sessionKey, value] of Object.entries(parsed)) {
3206
4808
  const normalizedSessionKey = sessionKey.trim();
3207
4809
  if (normalizedSessionKey.length === 0) {
3208
- debugLog2(logger, "sessions-store-reader", `skipping blank session key in ${sessionsJsonPath}`);
3209
4810
  continue;
3210
4811
  }
3211
4812
  if (!isRecord(value)) {
3212
- debugLog2(logger, "sessions-store-reader", `skipping non-object entry for key=${normalizedSessionKey}`);
3213
4813
  continue;
3214
4814
  }
3215
4815
  const sessionId = asTrimmedString(value["sessionId"]);
@@ -3229,26 +4829,52 @@ async function readOpenClawSessionsStore(sessionsDir, logger) {
3229
4829
  ...updatedAt !== void 0 ? { updatedAt } : {}
3230
4830
  });
3231
4831
  }
3232
- debugLog2(logger, "sessions-store-reader", `loaded sessions.json entries=${entries.length} path=${sessionsJsonPath}`);
3233
- return entries;
4832
+ return {
4833
+ entries,
4834
+ diagnostics: []
4835
+ };
3234
4836
  } catch (error) {
3235
- if (isFileNotFound(error)) {
3236
- debugLog2(logger, "sessions-store-reader", `sessions.json missing at ${sessionsJsonPath}`);
3237
- return [];
4837
+ if (isFileNotFound2(error)) {
4838
+ return {
4839
+ entries: [],
4840
+ diagnostics: [
4841
+ {
4842
+ kind: "missing_file",
4843
+ message: `sessions.json missing at ${sessionsJsonPath}`,
4844
+ path: sessionsJsonPath
4845
+ }
4846
+ ]
4847
+ };
3238
4848
  }
3239
4849
  if (error instanceof SyntaxError) {
3240
- debugLog2(logger, "sessions-store-reader", `sessions.json parse failed at ${sessionsJsonPath}: ${error.message}`);
3241
- return [];
4850
+ return {
4851
+ entries: [],
4852
+ diagnostics: [
4853
+ {
4854
+ kind: "malformed_json",
4855
+ message: `sessions.json parse failed at ${sessionsJsonPath}: ${error.message}`,
4856
+ path: sessionsJsonPath
4857
+ }
4858
+ ]
4859
+ };
3242
4860
  }
3243
- debugLog2(logger, "sessions-store-reader", `sessions.json read failed at ${sessionsJsonPath}: ${formatErrorMessage(error)}`);
3244
- return [];
4861
+ return {
4862
+ entries: [],
4863
+ diagnostics: [
4864
+ {
4865
+ kind: "unreadable_file",
4866
+ message: `sessions.json read failed at ${sessionsJsonPath}: ${formatErrorMessage2(error)}`,
4867
+ path: sessionsJsonPath
4868
+ }
4869
+ ]
4870
+ };
3245
4871
  }
3246
4872
  }
3247
4873
  function resolveSessionStorePath(candidatePath, sessionsDir) {
3248
4874
  return path3.isAbsolute(candidatePath) ? path3.resolve(candidatePath) : path3.resolve(sessionsDir, candidatePath);
3249
4875
  }
3250
4876
  function isRecord(value) {
3251
- return typeof value === "object" && value !== null;
4877
+ return typeof value === "object" && value !== null && !Array.isArray(value);
3252
4878
  }
3253
4879
  function asTrimmedString(value) {
3254
4880
  return typeof value === "string" && value.trim().length > 0 ? value.trim() : void 0;
@@ -3259,10 +4885,10 @@ function asFiniteNumber(value) {
3259
4885
  function debugLog2(logger, subsystem, message) {
3260
4886
  logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
3261
4887
  }
3262
- function isFileNotFound(error) {
4888
+ function isFileNotFound2(error) {
3263
4889
  return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
3264
4890
  }
3265
- function formatErrorMessage(error) {
4891
+ function formatErrorMessage2(error) {
3266
4892
  if (error instanceof Error) {
3267
4893
  return error.message;
3268
4894
  }
@@ -3297,26 +4923,32 @@ function parseTuiSessionKey(sessionKey) {
3297
4923
  }
3298
4924
 
3299
4925
  export {
3300
- normalizeClaimKeySegment,
3301
- normalizeClaimKey,
3302
- compactClaimKey,
3303
- inspectClaimKey,
3304
- isTrustedClaimKeyForCleanup,
3305
- describeClaimKeyNormalizationFailure,
3306
- describeClaimKeySuspicion,
4926
+ detectClaimKeyEntityFamilyCandidates,
4927
+ detectClaimKeySingletonAliasCandidates,
4928
+ buildTrustedClaimKeySupportSeed,
4929
+ evaluateClaimKeySupport,
4930
+ evaluateClaimKeyCompactness,
4931
+ normalizeGroundingTags,
4932
+ tokenizeGroundingText,
4933
+ buildEntryLocalLexicalTokens,
4934
+ applyClaimExtractionResultToEntry,
3307
4935
  previewClaimKeyExtraction,
3308
4936
  runBatchClaimExtraction,
3309
4937
  validateSupersessionRules,
3310
4938
  describeSupersessionRuleFailure,
4939
+ computeContentHash,
4940
+ computeNormContentHash,
4941
+ validateEntriesWithIndexes,
3311
4942
  storeEntriesDetailed,
4943
+ deriveOpenClawSessionIdFromFilePath,
3312
4944
  OpenClawTranscriptParser,
3313
4945
  openClawTranscriptParser,
3314
- deriveOpenClawSessionIdFromFilePath,
3315
4946
  readOpenClawSessionsStore,
3316
4947
  parseTuiSessionKey,
3317
4948
  backfillEpisodeEmbeddings,
3318
4949
  prepareEpisodeIngest,
3319
4950
  ingestEpisodeTranscript,
3320
4951
  executeEpisodeIngestPlan,
3321
- createEpisodeIngestPlan
4952
+ createEpisodeIngestPlan,
4953
+ createOpenClawRepository
3322
4954
  };