@agenr/agenr-plugin 1.7.4 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-GUDCFFRV.js +1517 -0
- package/dist/chunk-LVDQXSHP.js +5122 -0
- package/dist/{chunk-NXCCTZ4G.js → chunk-O45JQ6O3.js} +2212 -544
- package/dist/index.js +334 -261
- package/openclaw.plugin.json +31 -8
- package/package.json +2 -2
- package/dist/chunk-7WL5EAQZ.js +0 -758
- package/dist/chunk-IZDGXMTQ.js +0 -839
- package/dist/chunk-NIQKTINU.js +0 -2545
|
@@ -1,37 +1,115 @@
|
|
|
1
1
|
import {
|
|
2
|
+
EMBEDDING_DIMENSIONS,
|
|
3
|
+
ENTRY_SELECT_COLUMNS,
|
|
2
4
|
ENTRY_TYPES,
|
|
3
5
|
EPISODE_ACTIVITY_LEVELS,
|
|
4
6
|
EXPIRY_LEVELS,
|
|
5
|
-
|
|
6
|
-
|
|
7
|
+
VECTOR_INDEX_NAME,
|
|
8
|
+
applyClaimKeyLifecycle,
|
|
9
|
+
buildActiveEntryClause,
|
|
10
|
+
buildExtractedClaimKeyLifecycle,
|
|
11
|
+
buildInferredIngestClaimKeySupportContext,
|
|
12
|
+
buildManualClaimKeyLifecycle,
|
|
13
|
+
buildPrecomputedClaimKeyLifecycle,
|
|
14
|
+
composeEmbeddingText,
|
|
15
|
+
hasPrecomputedClaimKeyLifecycleFields,
|
|
16
|
+
mapEntryRow,
|
|
17
|
+
parseClaimKeyConfidence,
|
|
18
|
+
parseClaimKeySource,
|
|
19
|
+
parseClaimKeyStatus,
|
|
20
|
+
parseClaimSupportMode,
|
|
21
|
+
readNumber,
|
|
22
|
+
readOptionalString,
|
|
23
|
+
readRequiredString,
|
|
24
|
+
validateTemporalValidityRange
|
|
25
|
+
} from "./chunk-LVDQXSHP.js";
|
|
7
26
|
import {
|
|
8
|
-
|
|
9
|
-
|
|
27
|
+
compactClaimKey,
|
|
28
|
+
describeClaimKeyNormalizationFailure,
|
|
29
|
+
describeExtractedClaimKeyRejection,
|
|
30
|
+
inspectClaimKey,
|
|
31
|
+
isTrustedClaimKeyForCleanup,
|
|
32
|
+
normalizeClaimKey,
|
|
33
|
+
normalizeClaimKeySegment,
|
|
34
|
+
parseRelativeDate,
|
|
35
|
+
resolveClaimSlotPolicy,
|
|
36
|
+
validateExtractedClaimKey
|
|
37
|
+
} from "./chunk-GUDCFFRV.js";
|
|
10
38
|
|
|
11
39
|
// src/adapters/openclaw/transcript/parser.ts
|
|
12
40
|
import { createHash } from "crypto";
|
|
13
41
|
import * as fs2 from "fs/promises";
|
|
14
42
|
|
|
43
|
+
// src/adapters/openclaw/session/session-id.ts
|
|
44
|
+
import path from "path";
|
|
45
|
+
function deriveOpenClawSessionIdFromFilePath(sessionFile, logger) {
|
|
46
|
+
const normalizedSessionFile = sessionFile.trim();
|
|
47
|
+
if (normalizedSessionFile.length === 0) {
|
|
48
|
+
debugLog(logger, "session-id", "cannot derive session id from empty session file path");
|
|
49
|
+
return void 0;
|
|
50
|
+
}
|
|
51
|
+
const fileName = path.basename(normalizedSessionFile);
|
|
52
|
+
const sessionId = fileName.replace(/\.jsonl(?:\..*)?$/i, "").trim();
|
|
53
|
+
debugLog(logger, "session-id", `derived session id "${sessionId || "<empty>"}" from file=${normalizedSessionFile}`);
|
|
54
|
+
return sessionId.length > 0 ? sessionId : void 0;
|
|
55
|
+
}
|
|
56
|
+
function debugLog(logger, subsystem, message) {
|
|
57
|
+
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
15
60
|
// src/adapters/openclaw/transcript/jsonl.ts
|
|
16
|
-
function
|
|
61
|
+
function parseJsonObjectLineWithDiagnostics(line, lineNumber = 1) {
|
|
62
|
+
if (!line || line.trim().length === 0) {
|
|
63
|
+
return {
|
|
64
|
+
record: null
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
const parsed = JSON.parse(line);
|
|
69
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
70
|
+
return {
|
|
71
|
+
record: parsed
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
record: null,
|
|
76
|
+
diagnostic: {
|
|
77
|
+
kind: "non_object_record",
|
|
78
|
+
lineNumber,
|
|
79
|
+
message: `Skipped non-object JSONL line ${lineNumber}`
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
} catch {
|
|
83
|
+
return {
|
|
84
|
+
record: null,
|
|
85
|
+
diagnostic: {
|
|
86
|
+
kind: "malformed_json",
|
|
87
|
+
lineNumber,
|
|
88
|
+
message: `Skipped malformed JSONL line ${lineNumber}`
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function parseJsonlLines(raw, onRecord) {
|
|
17
94
|
const lines = raw.split(/\r?\n/);
|
|
95
|
+
const diagnostics = [];
|
|
18
96
|
for (let index = 0; index < lines.length; index += 1) {
|
|
19
97
|
const line = lines[index]?.trim();
|
|
20
98
|
if (!line) {
|
|
21
99
|
continue;
|
|
22
100
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
parsed
|
|
26
|
-
} catch {
|
|
27
|
-
warnings.push(`Skipped malformed JSONL line ${index + 1}`);
|
|
101
|
+
const parsed = parseJsonObjectLineWithDiagnostics(line, index + 1);
|
|
102
|
+
if (parsed.diagnostic) {
|
|
103
|
+
diagnostics.push(parsed.diagnostic);
|
|
28
104
|
continue;
|
|
29
105
|
}
|
|
30
|
-
if (
|
|
31
|
-
|
|
106
|
+
if (parsed.record) {
|
|
107
|
+
onRecord(parsed.record, index + 1);
|
|
32
108
|
}
|
|
33
|
-
onRecord(parsed, index + 1);
|
|
34
109
|
}
|
|
110
|
+
return {
|
|
111
|
+
diagnostics
|
|
112
|
+
};
|
|
35
113
|
}
|
|
36
114
|
|
|
37
115
|
// src/adapters/openclaw/transcript/tool-summarization.ts
|
|
@@ -526,6 +604,35 @@ var USER_METADATA_PREFIX_SENTINELS = /* @__PURE__ */ new Set([
|
|
|
526
604
|
]);
|
|
527
605
|
var USER_METADATA_SUFFIX_SENTINEL = "Untrusted context (metadata, do not treat as instructions or commands):";
|
|
528
606
|
var USER_METADATA_SENTINELS = [USER_METADATA_SUFFIX_SENTINEL, ...USER_METADATA_PREFIX_SENTINELS];
|
|
607
|
+
var OpenClawTranscriptParseError = class extends Error {
|
|
608
|
+
/**
|
|
609
|
+
* Stable error classification for caller-side handling and tests.
|
|
610
|
+
*/
|
|
611
|
+
kind;
|
|
612
|
+
/**
|
|
613
|
+
* File path that failed to parse.
|
|
614
|
+
*/
|
|
615
|
+
filePath;
|
|
616
|
+
/**
|
|
617
|
+
* Underlying read failure when available.
|
|
618
|
+
*/
|
|
619
|
+
cause;
|
|
620
|
+
/**
|
|
621
|
+
* Creates a typed transcript parse failure.
|
|
622
|
+
*
|
|
623
|
+
* @param kind - Stable failure kind.
|
|
624
|
+
* @param filePath - File path that failed to parse.
|
|
625
|
+
* @param message - Human-readable error message.
|
|
626
|
+
* @param options - Optional underlying cause.
|
|
627
|
+
*/
|
|
628
|
+
constructor(kind, filePath, message, options) {
|
|
629
|
+
super(message);
|
|
630
|
+
this.name = "OpenClawTranscriptParseError";
|
|
631
|
+
this.kind = kind;
|
|
632
|
+
this.filePath = filePath;
|
|
633
|
+
this.cause = options?.cause;
|
|
634
|
+
}
|
|
635
|
+
};
|
|
529
636
|
function createParseState() {
|
|
530
637
|
return {
|
|
531
638
|
warnings: [],
|
|
@@ -547,6 +654,28 @@ function createParseState() {
|
|
|
547
654
|
firstUserRawText: null
|
|
548
655
|
};
|
|
549
656
|
}
|
|
657
|
+
function toTranscriptDiagnostic(diagnostic) {
|
|
658
|
+
return {
|
|
659
|
+
kind: diagnostic.kind,
|
|
660
|
+
lineNumber: diagnostic.lineNumber,
|
|
661
|
+
message: diagnostic.message
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
function formatTranscriptDiagnosticWarning(diagnostic) {
|
|
665
|
+
return diagnostic.message;
|
|
666
|
+
}
|
|
667
|
+
async function readTranscriptFileStrict(filePath) {
|
|
668
|
+
try {
|
|
669
|
+
return await fs2.readFile(filePath, "utf8");
|
|
670
|
+
} catch (error) {
|
|
671
|
+
if (isFileNotFound(error)) {
|
|
672
|
+
throw new OpenClawTranscriptParseError("missing_file", filePath, `Transcript file not found: ${filePath}`, { cause: error });
|
|
673
|
+
}
|
|
674
|
+
throw new OpenClawTranscriptParseError("unreadable_file", filePath, `Could not read transcript file ${filePath}: ${formatErrorMessage(error)}`, {
|
|
675
|
+
cause: error
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
}
|
|
550
679
|
function extractRawMessageText(content) {
|
|
551
680
|
if (typeof content === "string") {
|
|
552
681
|
return content;
|
|
@@ -768,7 +897,7 @@ function handleMessageRecord(state, record, message) {
|
|
|
768
897
|
}
|
|
769
898
|
if (role === "system") {
|
|
770
899
|
state.stats.systemDropped += 1;
|
|
771
|
-
return;
|
|
900
|
+
return "known_skip";
|
|
772
901
|
}
|
|
773
902
|
const timestamp = extractTimestamp(record) ?? extractTimestamp(message);
|
|
774
903
|
if (role === "user") {
|
|
@@ -778,14 +907,14 @@ function handleMessageRecord(state, record, message) {
|
|
|
778
907
|
}
|
|
779
908
|
const text = stripOpenClawUserMetadata(message.content);
|
|
780
909
|
if (!text) {
|
|
781
|
-
return;
|
|
910
|
+
return "known_skip";
|
|
782
911
|
}
|
|
783
912
|
if (isPureBase64(text)) {
|
|
784
913
|
state.stats.base64Dropped += 1;
|
|
785
|
-
return;
|
|
914
|
+
return "known_skip";
|
|
786
915
|
}
|
|
787
916
|
pushMessage(state.messages, "user", text, timestamp);
|
|
788
|
-
return;
|
|
917
|
+
return "accepted";
|
|
789
918
|
}
|
|
790
919
|
if (role === "assistant") {
|
|
791
920
|
const toolCalls = extractToolCallBlocks(message.content);
|
|
@@ -798,48 +927,50 @@ function handleMessageRecord(state, record, message) {
|
|
|
798
927
|
const assistantText = [...extractAssistantTextParts(message.content), ...toolCalls.map((toolCall) => summarizeToolCall(toolCall))].join(" ").trim();
|
|
799
928
|
addModelUsed(state, message.model);
|
|
800
929
|
if (!assistantText) {
|
|
801
|
-
return;
|
|
930
|
+
return "known_skip";
|
|
802
931
|
}
|
|
803
932
|
if (isPureBase64(assistantText)) {
|
|
804
933
|
state.stats.base64Dropped += 1;
|
|
805
|
-
return;
|
|
934
|
+
return "known_skip";
|
|
806
935
|
}
|
|
807
936
|
pushMessage(state.messages, "assistant", truncateWithMarker(assistantText, 5e3), timestamp);
|
|
808
|
-
return;
|
|
937
|
+
return "accepted";
|
|
809
938
|
}
|
|
810
939
|
if (role !== "toolResult") {
|
|
811
|
-
return;
|
|
940
|
+
return "structurally_invalid";
|
|
812
941
|
}
|
|
813
942
|
const toolContext = resolveToolContext(state, message);
|
|
814
943
|
const toolName = getString(message.name) ?? getString(message.tool) ?? getString(record.name) ?? getString(record.tool) ?? toolContext?.name;
|
|
815
944
|
const toolArgs = toolContext?.args ?? {};
|
|
816
945
|
const toolText = normalizeMessageText(message.content);
|
|
817
946
|
if (!toolText) {
|
|
818
|
-
return;
|
|
947
|
+
return "known_skip";
|
|
819
948
|
}
|
|
820
949
|
if (isPureBase64(toolText)) {
|
|
821
950
|
state.stats.base64Dropped += 1;
|
|
822
|
-
return;
|
|
951
|
+
return "known_skip";
|
|
823
952
|
}
|
|
824
953
|
const decision = shouldKeepToolResult(toolName, toolText, TOOL_RESULT_POLICY);
|
|
825
954
|
if (decision.keep) {
|
|
826
955
|
state.stats.toolResultsKept += 1;
|
|
827
956
|
pushMessage(state.messages, "assistant", decision.truncateTo ? truncateWithMarker(toolText, decision.truncateTo) : toolText, timestamp);
|
|
828
|
-
return;
|
|
957
|
+
return "accepted";
|
|
829
958
|
}
|
|
830
959
|
state.stats.toolResultsDropped += 1;
|
|
831
960
|
pushMessage(state.messages, "assistant", toolResultPlaceholder(toolName ?? "unknown", toolArgs), timestamp);
|
|
961
|
+
return "accepted";
|
|
832
962
|
}
|
|
833
963
|
function handleRecord(state, record) {
|
|
834
964
|
if (record.type === "session") {
|
|
835
965
|
state.sessionId = getString(record.id) ?? state.sessionId;
|
|
836
966
|
state.sessionTimestamp = extractTimestamp(record) ?? state.sessionTimestamp;
|
|
837
967
|
state.sessionLabel = normalizeSessionLabel(getString(record.conversation_label) ?? "") ?? state.sessionLabel;
|
|
968
|
+
state.workingDirectory = getString(record.cwd) ?? state.workingDirectory;
|
|
838
969
|
addModelUsed(state, record.model);
|
|
839
970
|
if (!state.surfaceDetected) {
|
|
840
971
|
setDetectedSurface(state, readInboundSurface(record));
|
|
841
972
|
}
|
|
842
|
-
return;
|
|
973
|
+
return "accepted";
|
|
843
974
|
}
|
|
844
975
|
if (!state.surfaceDetected) {
|
|
845
976
|
setDetectedSurface(state, readInboundSurface(record));
|
|
@@ -847,21 +978,30 @@ function handleRecord(state, record) {
|
|
|
847
978
|
if (record.type === "model_change") {
|
|
848
979
|
addModelUsed(state, record.modelId);
|
|
849
980
|
state.stats.skippedRecordTypes += 1;
|
|
850
|
-
return;
|
|
981
|
+
return "known_skip";
|
|
851
982
|
}
|
|
852
983
|
if (typeof record.type === "string" && SKIPPED_RECORD_TYPES.has(record.type)) {
|
|
853
984
|
state.stats.skippedRecordTypes += 1;
|
|
854
|
-
return;
|
|
985
|
+
return "known_skip";
|
|
855
986
|
}
|
|
856
987
|
const message = asRecord(record.message);
|
|
857
988
|
if (!message) {
|
|
858
|
-
return;
|
|
989
|
+
return "structurally_invalid";
|
|
859
990
|
}
|
|
860
|
-
handleMessageRecord(state, record, message);
|
|
991
|
+
return handleMessageRecord(state, record, message);
|
|
861
992
|
}
|
|
862
993
|
function buildFilterWarning(stats) {
|
|
863
994
|
return `Filtered transcript: ${stats.toolResultsDropped} tool results dropped, ${stats.toolResultsKept} kept, ${stats.systemDropped} system dropped, ${stats.base64Dropped} base64 dropped.`;
|
|
864
995
|
}
|
|
996
|
+
function isFileNotFound(error) {
|
|
997
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
998
|
+
}
|
|
999
|
+
function formatErrorMessage(error) {
|
|
1000
|
+
if (error instanceof Error) {
|
|
1001
|
+
return error.message;
|
|
1002
|
+
}
|
|
1003
|
+
return String(error);
|
|
1004
|
+
}
|
|
865
1005
|
var OpenClawTranscriptParser = class {
|
|
866
1006
|
/**
|
|
867
1007
|
* Parses an OpenClaw JSONL transcript file into agenr transcript data.
|
|
@@ -871,13 +1011,23 @@ var OpenClawTranscriptParser = class {
|
|
|
871
1011
|
* @returns Parsed transcript messages, warnings, and metadata.
|
|
872
1012
|
*/
|
|
873
1013
|
async parseFile(filePath, options) {
|
|
874
|
-
const raw = await
|
|
1014
|
+
const raw = await readTranscriptFileStrict(filePath);
|
|
875
1015
|
const verbose = options?.verbose === true;
|
|
876
1016
|
const state = createParseState();
|
|
877
1017
|
const transcriptHash = createHash("sha256").update(raw).digest("hex");
|
|
878
|
-
|
|
879
|
-
|
|
1018
|
+
const diagnostics = [];
|
|
1019
|
+
const jsonlResult = parseJsonlLines(raw, (record, lineNumber) => {
|
|
1020
|
+
const outcome = handleRecord(state, record);
|
|
1021
|
+
if (outcome === "structurally_invalid") {
|
|
1022
|
+
diagnostics.push({
|
|
1023
|
+
kind: "structurally_invalid_record",
|
|
1024
|
+
lineNumber,
|
|
1025
|
+
message: `Skipped structurally invalid transcript record on line ${lineNumber}`
|
|
1026
|
+
});
|
|
1027
|
+
}
|
|
880
1028
|
});
|
|
1029
|
+
diagnostics.push(...jsonlResult.diagnostics.map(toTranscriptDiagnostic));
|
|
1030
|
+
state.warnings.push(...diagnostics.map(formatTranscriptDiagnosticWarning));
|
|
881
1031
|
if (!state.surfaceDetected && state.firstUserRawText) {
|
|
882
1032
|
setDetectedSurface(state, inferSurfaceFromContent(state.firstUserRawText));
|
|
883
1033
|
}
|
|
@@ -887,6 +1037,7 @@ var OpenClawTranscriptParser = class {
|
|
|
887
1037
|
}
|
|
888
1038
|
const startedAt = state.sessionTimestamp ?? state.messages[0]?.timestamp ?? fallbackTimestamp;
|
|
889
1039
|
const endedAt = state.messages[state.messages.length - 1]?.timestamp ?? state.sessionTimestamp ?? fallbackTimestamp;
|
|
1040
|
+
const stableSessionId = state.sessionId ?? deriveOpenClawSessionIdFromFilePath(filePath);
|
|
890
1041
|
return {
|
|
891
1042
|
messages: state.messages,
|
|
892
1043
|
warnings: state.warnings,
|
|
@@ -899,515 +1050,1446 @@ var OpenClawTranscriptParser = class {
|
|
|
899
1050
|
transcriptHash,
|
|
900
1051
|
modelsUsed: state.modelsUsed.length > 0 ? state.modelsUsed : void 0,
|
|
901
1052
|
reconstructedSurface: state.detectedSurface,
|
|
902
|
-
surfaceReconstructionSource: state.surfaceDetected ? "reconstructed" : "none"
|
|
1053
|
+
surfaceReconstructionSource: state.surfaceDetected ? "reconstructed" : "none",
|
|
1054
|
+
sourceIdentity: stableSessionId ? `openclaw-session:${stableSessionId}` : void 0,
|
|
1055
|
+
sourceIdentityKind: stableSessionId ? "openclaw_session" : void 0,
|
|
1056
|
+
workingDirectory: state.workingDirectory
|
|
903
1057
|
}
|
|
904
1058
|
};
|
|
905
1059
|
}
|
|
906
1060
|
};
|
|
907
1061
|
var openClawTranscriptParser = new OpenClawTranscriptParser();
|
|
908
1062
|
|
|
909
|
-
// src/
|
|
910
|
-
var
|
|
911
|
-
|
|
912
|
-
|
|
1063
|
+
// src/adapters/db/openclaw-repository.ts
|
|
1064
|
+
var ZERO_VECTOR = JSON.stringify(Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0));
|
|
1065
|
+
function createOpenClawRepository(executor, options = {}) {
|
|
1066
|
+
return {
|
|
1067
|
+
listCoreEntries: async (limit) => listCoreEntries(executor, limit),
|
|
1068
|
+
findEntryBySubject: async (subject) => findEntryBySubject(executor, subject),
|
|
1069
|
+
findMostRecentEntry: async () => findMostRecentEntry(executor),
|
|
1070
|
+
getEntryTrace: async (entryId) => getEntryTrace(executor, entryId, options.claimSlotPolicyConfig),
|
|
1071
|
+
getMemoryStatusSnapshot: async () => getMemoryStatusSnapshot(executor),
|
|
1072
|
+
probeVectorAvailability: async () => probeVectorAvailability(executor)
|
|
1073
|
+
};
|
|
1074
|
+
}
|
|
1075
|
+
async function listCoreEntries(executor, limit) {
|
|
1076
|
+
if (limit <= 0) {
|
|
1077
|
+
return [];
|
|
1078
|
+
}
|
|
1079
|
+
const result = await executor.execute({
|
|
1080
|
+
sql: `
|
|
1081
|
+
SELECT
|
|
1082
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1083
|
+
FROM entries
|
|
1084
|
+
WHERE ${buildActiveEntryClause()}
|
|
1085
|
+
AND expiry = 'core'
|
|
1086
|
+
ORDER BY importance DESC, created_at DESC
|
|
1087
|
+
LIMIT ?
|
|
1088
|
+
`,
|
|
1089
|
+
args: [limit]
|
|
1090
|
+
});
|
|
1091
|
+
return result.rows.map((row) => mapEntryRow(row));
|
|
1092
|
+
}
|
|
1093
|
+
async function findEntryBySubject(executor, subject) {
|
|
1094
|
+
const normalizedSubject = subject.trim();
|
|
1095
|
+
if (normalizedSubject.length === 0) {
|
|
1096
|
+
return null;
|
|
1097
|
+
}
|
|
1098
|
+
const result = await executor.execute({
|
|
1099
|
+
sql: `
|
|
1100
|
+
SELECT
|
|
1101
|
+
${ENTRY_SELECT_COLUMNS},
|
|
1102
|
+
CASE
|
|
1103
|
+
WHEN lower(subject) = lower(?) THEN 0
|
|
1104
|
+
WHEN lower(subject) LIKE lower(?) THEN 1
|
|
1105
|
+
ELSE 2
|
|
1106
|
+
END AS match_rank
|
|
1107
|
+
FROM entries
|
|
1108
|
+
WHERE lower(subject) = lower(?)
|
|
1109
|
+
OR lower(subject) LIKE lower(?)
|
|
1110
|
+
ORDER BY match_rank ASC, created_at DESC
|
|
1111
|
+
LIMIT 1
|
|
1112
|
+
`,
|
|
1113
|
+
args: [normalizedSubject, `%${normalizedSubject}%`, normalizedSubject, `%${normalizedSubject}%`]
|
|
1114
|
+
});
|
|
1115
|
+
const row = result.rows[0];
|
|
1116
|
+
return row ? mapEntryRow(row) : null;
|
|
1117
|
+
}
|
|
1118
|
+
async function findMostRecentEntry(executor) {
|
|
1119
|
+
const result = await executor.execute({
|
|
1120
|
+
sql: `
|
|
1121
|
+
SELECT
|
|
1122
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1123
|
+
FROM entries
|
|
1124
|
+
ORDER BY created_at DESC
|
|
1125
|
+
LIMIT 1
|
|
1126
|
+
`
|
|
1127
|
+
});
|
|
1128
|
+
const row = result.rows[0];
|
|
1129
|
+
return row ? mapEntryRow(row) : null;
|
|
1130
|
+
}
|
|
1131
|
+
async function getEntryTrace(executor, entryId, claimSlotPolicyConfig) {
|
|
1132
|
+
const entry = await getEntryByIdIncludingInactive(executor, entryId);
|
|
1133
|
+
if (!entry) {
|
|
1134
|
+
return null;
|
|
1135
|
+
}
|
|
1136
|
+
const [supersededBy, supersedes, claimFamily, recallEvents] = await Promise.all([
|
|
1137
|
+
entry.superseded_by ? getEntryByIdIncludingInactive(executor, entry.superseded_by) : Promise.resolve(null),
|
|
1138
|
+
listSupersededEntries(executor, entry.id),
|
|
1139
|
+
entry.claim_key ? getClaimFamily(executor, entry.claim_key, claimSlotPolicyConfig) : Promise.resolve(void 0),
|
|
1140
|
+
listRecallEvents(executor, entry.id)
|
|
1141
|
+
]);
|
|
1142
|
+
return {
|
|
1143
|
+
entry,
|
|
1144
|
+
...supersededBy ? { supersededBy } : {},
|
|
1145
|
+
supersedes,
|
|
1146
|
+
...claimFamily ? { claimFamily } : {},
|
|
1147
|
+
recallEvents
|
|
1148
|
+
};
|
|
1149
|
+
}
|
|
1150
|
+
async function getMemoryStatusSnapshot(executor) {
|
|
1151
|
+
const result = await executor.execute({
|
|
1152
|
+
sql: `
|
|
1153
|
+
SELECT
|
|
1154
|
+
COUNT(*) AS active_entries,
|
|
1155
|
+
SUM(CASE WHEN expiry = 'core' THEN 1 ELSE 0 END) AS core_entries,
|
|
1156
|
+
COUNT(DISTINCT source_file) AS source_files
|
|
1157
|
+
FROM entries
|
|
1158
|
+
WHERE ${buildActiveEntryClause()}
|
|
1159
|
+
`
|
|
1160
|
+
});
|
|
1161
|
+
const row = result.rows[0];
|
|
1162
|
+
if (!row) {
|
|
1163
|
+
return {
|
|
1164
|
+
activeEntries: 0,
|
|
1165
|
+
coreEntries: 0,
|
|
1166
|
+
sourceFiles: 0
|
|
1167
|
+
};
|
|
1168
|
+
}
|
|
1169
|
+
return {
|
|
1170
|
+
activeEntries: readNumber(row, "active_entries", 0),
|
|
1171
|
+
coreEntries: readNumber(row, "core_entries", 0),
|
|
1172
|
+
sourceFiles: readNumber(row, "source_files", 0)
|
|
1173
|
+
};
|
|
1174
|
+
}
|
|
1175
|
+
async function probeVectorAvailability(executor) {
|
|
1176
|
+
try {
|
|
1177
|
+
await executor.execute({
|
|
1178
|
+
sql: `
|
|
1179
|
+
SELECT COUNT(*) AS matches
|
|
1180
|
+
FROM vector_top_k('${VECTOR_INDEX_NAME}', vector32(?), ?) AS matches
|
|
1181
|
+
`,
|
|
1182
|
+
args: [ZERO_VECTOR, 1]
|
|
1183
|
+
});
|
|
1184
|
+
return true;
|
|
1185
|
+
} catch {
|
|
1186
|
+
return false;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
async function getEntryByIdIncludingInactive(executor, entryId) {
|
|
1190
|
+
const normalizedId = entryId.trim();
|
|
1191
|
+
if (normalizedId.length === 0) {
|
|
1192
|
+
return null;
|
|
1193
|
+
}
|
|
1194
|
+
const result = await executor.execute({
|
|
1195
|
+
sql: `
|
|
1196
|
+
SELECT
|
|
1197
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1198
|
+
FROM entries
|
|
1199
|
+
WHERE id = ?
|
|
1200
|
+
LIMIT 1
|
|
1201
|
+
`,
|
|
1202
|
+
args: [normalizedId]
|
|
1203
|
+
});
|
|
1204
|
+
const row = result.rows[0];
|
|
1205
|
+
return row ? mapEntryRow(row) : null;
|
|
1206
|
+
}
|
|
1207
|
+
async function listSupersededEntries(executor, entryId) {
|
|
1208
|
+
const result = await executor.execute({
|
|
1209
|
+
sql: `
|
|
1210
|
+
SELECT
|
|
1211
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1212
|
+
FROM entries
|
|
1213
|
+
WHERE superseded_by = ?
|
|
1214
|
+
ORDER BY created_at DESC
|
|
1215
|
+
`,
|
|
1216
|
+
args: [entryId]
|
|
1217
|
+
});
|
|
1218
|
+
return result.rows.map((row) => mapEntryRow(row));
|
|
1219
|
+
}
|
|
1220
|
+
async function getClaimFamily(executor, claimKey, claimSlotPolicyConfig) {
|
|
1221
|
+
const normalizedClaimKey = claimKey.trim();
|
|
1222
|
+
if (normalizedClaimKey.length === 0) {
|
|
1223
|
+
return void 0;
|
|
1224
|
+
}
|
|
1225
|
+
const result = await executor.execute({
|
|
1226
|
+
sql: `
|
|
1227
|
+
SELECT
|
|
1228
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1229
|
+
FROM entries
|
|
1230
|
+
WHERE claim_key = ?
|
|
1231
|
+
ORDER BY created_at ASC, id ASC
|
|
1232
|
+
`,
|
|
1233
|
+
args: [normalizedClaimKey]
|
|
1234
|
+
});
|
|
1235
|
+
const entries = result.rows.map((row) => mapEntryRow(row));
|
|
1236
|
+
const slotPolicy = resolveClaimSlotPolicy(normalizedClaimKey, claimSlotPolicyConfig);
|
|
1237
|
+
return {
|
|
1238
|
+
claimKey: normalizedClaimKey,
|
|
1239
|
+
slotPolicy: slotPolicy.policy,
|
|
1240
|
+
slotPolicyReason: slotPolicy.reason,
|
|
1241
|
+
entries
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
async function listRecallEvents(executor, entryId) {
|
|
1245
|
+
const result = await executor.execute({
|
|
1246
|
+
sql: `
|
|
1247
|
+
SELECT
|
|
1248
|
+
query,
|
|
1249
|
+
session_key,
|
|
1250
|
+
recalled_at
|
|
1251
|
+
FROM recall_events
|
|
1252
|
+
WHERE entry_id = ?
|
|
1253
|
+
ORDER BY recalled_at DESC
|
|
1254
|
+
LIMIT 10
|
|
1255
|
+
`,
|
|
1256
|
+
args: [entryId]
|
|
1257
|
+
});
|
|
1258
|
+
return result.rows.map((row) => ({
|
|
1259
|
+
query: readOptionalString(row, "query"),
|
|
1260
|
+
sessionKey: readOptionalString(row, "session_key"),
|
|
1261
|
+
recalledAt: readRequiredString(row, "recalled_at")
|
|
1262
|
+
}));
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// src/core/store/pipeline.ts
|
|
1266
|
+
import { randomUUID } from "crypto";
|
|
1267
|
+
|
|
1268
|
+
// src/core/supersession.ts
|
|
1269
|
+
function validateSupersessionRules(oldEntry, newEntry) {
|
|
1270
|
+
if (oldEntry.type !== newEntry.type) {
|
|
1271
|
+
return {
|
|
1272
|
+
ok: false,
|
|
1273
|
+
reason: "type_mismatch"
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1276
|
+
if (oldEntry.type === "milestone") {
|
|
1277
|
+
return {
|
|
1278
|
+
ok: false,
|
|
1279
|
+
reason: "milestone"
|
|
1280
|
+
};
|
|
1281
|
+
}
|
|
1282
|
+
if (oldEntry.expiry === "core") {
|
|
1283
|
+
return {
|
|
1284
|
+
ok: false,
|
|
1285
|
+
reason: "core_expiry"
|
|
1286
|
+
};
|
|
1287
|
+
}
|
|
1288
|
+
return {
|
|
1289
|
+
ok: true
|
|
1290
|
+
};
|
|
1291
|
+
}
|
|
1292
|
+
function describeSupersessionRuleFailure(reason) {
|
|
1293
|
+
switch (reason) {
|
|
1294
|
+
case "type_mismatch":
|
|
1295
|
+
return "Supersession requires both entries to have the same type.";
|
|
1296
|
+
case "milestone":
|
|
1297
|
+
return "Milestone entries are never superseded automatically.";
|
|
1298
|
+
case "core_expiry":
|
|
1299
|
+
return "Core-expiry entries are never superseded automatically.";
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// src/core/claim-key-entity-family.ts
|
|
1304
|
+
var ENTITY_FAMILY_GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
|
|
1305
|
+
"a",
|
|
1306
|
+
"an",
|
|
1307
|
+
"and",
|
|
1308
|
+
"are",
|
|
1309
|
+
"as",
|
|
1310
|
+
"at",
|
|
1311
|
+
"be",
|
|
1312
|
+
"by",
|
|
1313
|
+
"for",
|
|
1314
|
+
"from",
|
|
1315
|
+
"in",
|
|
1316
|
+
"into",
|
|
1317
|
+
"is",
|
|
1318
|
+
"it",
|
|
1319
|
+
"of",
|
|
1320
|
+
"on",
|
|
1321
|
+
"or",
|
|
1322
|
+
"that",
|
|
1323
|
+
"the",
|
|
1324
|
+
"their",
|
|
1325
|
+
"this",
|
|
1326
|
+
"to",
|
|
1327
|
+
"was",
|
|
1328
|
+
"with"
|
|
1329
|
+
]);
|
|
1330
|
+
var MAX_ATTRIBUTE_BUCKET_SIZE = 12;
|
|
1331
|
+
var MAX_EVIDENCE_VALUES = 6;
|
|
1332
|
+
var CANONICAL_SELECTION_MARGIN = 3;
|
|
1333
|
+
var SINGLETON_ALIAS_MAX_FAMILY_SIZE = 2;
|
|
1334
|
+
var SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT = 3;
|
|
1335
|
+
var SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA = 0.05;
|
|
1336
|
+
var SINGLETON_ALIAS_SCOPE_TOKENS = /* @__PURE__ */ new Set([
|
|
1337
|
+
"agent",
|
|
913
1338
|
"app",
|
|
914
|
-
"
|
|
915
|
-
"
|
|
916
|
-
"
|
|
1339
|
+
"branch",
|
|
1340
|
+
"build",
|
|
1341
|
+
"cluster",
|
|
1342
|
+
"daemon",
|
|
917
1343
|
"device",
|
|
918
|
-
"
|
|
1344
|
+
"env",
|
|
919
1345
|
"environment",
|
|
920
|
-
"
|
|
921
|
-
"
|
|
922
|
-
"
|
|
923
|
-
"
|
|
1346
|
+
"gateway",
|
|
1347
|
+
"host",
|
|
1348
|
+
"machine",
|
|
1349
|
+
"node",
|
|
1350
|
+
"plugin",
|
|
924
1351
|
"project",
|
|
1352
|
+
"repo",
|
|
1353
|
+
"repository",
|
|
1354
|
+
"server",
|
|
925
1355
|
"service",
|
|
926
|
-
"
|
|
1356
|
+
"session",
|
|
927
1357
|
"system",
|
|
928
|
-
"team",
|
|
929
|
-
"thing",
|
|
930
|
-
"user",
|
|
931
1358
|
"workspace"
|
|
932
1359
|
]);
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
"depend",
|
|
938
|
-
"depends",
|
|
939
|
-
"follows",
|
|
940
|
-
"follow",
|
|
941
|
-
"keep",
|
|
942
|
-
"keeps",
|
|
943
|
-
"maintain",
|
|
944
|
-
"maintains",
|
|
945
|
-
"need",
|
|
946
|
-
"needs",
|
|
947
|
-
"precede",
|
|
948
|
-
"precedes",
|
|
949
|
-
"preserve",
|
|
950
|
-
"preserves",
|
|
951
|
-
"require",
|
|
952
|
-
"required",
|
|
953
|
-
"requires",
|
|
954
|
-
"retain",
|
|
955
|
-
"retains"
|
|
956
|
-
]);
|
|
957
|
-
var COMPACTION_BREAK_TOKENS = /* @__PURE__ */ new Set(["about", "across", "and", "between", "during", "for", "from", "into", "onto", "or", "to", "with"]);
|
|
958
|
-
var COMPACTION_WEAK_LEADING_TOKENS = /* @__PURE__ */ new Set(["actual", "authoritative", "canonical", "concrete", "current", "durable", "existing", "real"]);
|
|
959
|
-
var ACTION_CONDITION_TOKENS = /* @__PURE__ */ new Set(["activate", "activation", "apply", "fire", "launch", "run", "start", "trigger"]);
|
|
960
|
-
var TRAILING_OBJECT_COMPACTION_PREPOSITIONS = /* @__PURE__ */ new Set(["about", "for", "from", "into", "onto", "to", "with"]);
|
|
961
|
-
var TRAILING_OBJECT_TRANSFER_HEADS = /* @__PURE__ */ new Set([
|
|
962
|
-
"access",
|
|
963
|
-
"boundary",
|
|
964
|
-
"condition",
|
|
965
|
-
"contract",
|
|
966
|
-
"guide",
|
|
967
|
-
"path",
|
|
968
|
-
"policy",
|
|
969
|
-
"preference",
|
|
970
|
-
"process",
|
|
971
|
-
"rule",
|
|
972
|
-
"schedule",
|
|
973
|
-
"support",
|
|
974
|
-
"surface",
|
|
975
|
-
"window",
|
|
976
|
-
"workflow"
|
|
977
|
-
]);
|
|
978
|
-
var STABLE_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
979
|
-
"access",
|
|
980
|
-
"boundary",
|
|
981
|
-
"condition",
|
|
982
|
-
"contract",
|
|
983
|
-
"default",
|
|
984
|
-
"dependency",
|
|
985
|
-
"guide",
|
|
986
|
-
"mode",
|
|
987
|
-
"order",
|
|
988
|
-
"path",
|
|
989
|
-
"policy",
|
|
990
|
-
"preference",
|
|
991
|
-
"preservation",
|
|
992
|
-
"process",
|
|
993
|
-
"requirement",
|
|
994
|
-
"rule",
|
|
995
|
-
"schedule",
|
|
996
|
-
"setting",
|
|
997
|
-
"status",
|
|
998
|
-
"strategy",
|
|
999
|
-
"support",
|
|
1000
|
-
"surface",
|
|
1001
|
-
"timezone",
|
|
1002
|
-
"truth",
|
|
1003
|
-
"version",
|
|
1004
|
-
"window",
|
|
1005
|
-
"workflow"
|
|
1006
|
-
]);
|
|
1007
|
-
function normalizeClaimKeySegment(value) {
|
|
1008
|
-
return value.trim().toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/_+/g, "_").replace(/^_+|_+$/g, "");
|
|
1009
|
-
}
|
|
1010
|
-
function normalizeClaimKey(value) {
|
|
1011
|
-
const trimmed = value.trim();
|
|
1012
|
-
if (trimmed.length === 0) {
|
|
1013
|
-
return { ok: false, reason: "empty" };
|
|
1360
|
+
function detectClaimKeyEntityFamilyCandidates(entries) {
|
|
1361
|
+
const profiles = buildTrustedClaimKeyEntityProfiles(entries);
|
|
1362
|
+
if (profiles.size < 2) {
|
|
1363
|
+
return [];
|
|
1014
1364
|
}
|
|
1015
|
-
const
|
|
1016
|
-
if (
|
|
1017
|
-
return
|
|
1365
|
+
const pairSupport = buildPairSupport(profiles);
|
|
1366
|
+
if (pairSupport.length === 0) {
|
|
1367
|
+
return [];
|
|
1018
1368
|
}
|
|
1019
|
-
|
|
1020
|
-
|
|
1369
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
1370
|
+
for (const support of pairSupport) {
|
|
1371
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
1372
|
+
getOrCreateSet(adjacency, leftEntity).add(rightEntity);
|
|
1373
|
+
getOrCreateSet(adjacency, rightEntity).add(leftEntity);
|
|
1021
1374
|
}
|
|
1022
|
-
const
|
|
1023
|
-
const
|
|
1024
|
-
|
|
1025
|
-
|
|
1375
|
+
const visited = /* @__PURE__ */ new Set();
|
|
1376
|
+
const families = [];
|
|
1377
|
+
for (const entityPrefix of adjacency.keys()) {
|
|
1378
|
+
if (visited.has(entityPrefix)) {
|
|
1379
|
+
continue;
|
|
1380
|
+
}
|
|
1381
|
+
const component = collectConnectedEntityComponent(entityPrefix, adjacency, visited);
|
|
1382
|
+
if (component.length < 2) {
|
|
1383
|
+
continue;
|
|
1384
|
+
}
|
|
1385
|
+
const componentSet = new Set(component);
|
|
1386
|
+
const componentSupport = pairSupport.filter((support) => {
|
|
1387
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
1388
|
+
return componentSet.has(leftEntity) && componentSet.has(rightEntity);
|
|
1389
|
+
});
|
|
1390
|
+
const canonicalSelection = selectCanonicalEntityPrefix(component, componentSupport, profiles);
|
|
1391
|
+
const canonicalEntityPrefix = canonicalSelection.canonicalEntityPrefix;
|
|
1392
|
+
const autoConverge = canonicalEntityPrefix !== null && component.filter((entity) => entity !== canonicalEntityPrefix).every((entity) => {
|
|
1393
|
+
const support = findPairSupport(componentSupport, canonicalEntityPrefix, entity);
|
|
1394
|
+
return support?.autoSafe === true;
|
|
1395
|
+
});
|
|
1396
|
+
const componentProfiles = component.map((entity) => profiles.get(entity)).filter((profile) => Boolean(profile));
|
|
1397
|
+
const entryIds = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.entryIds]));
|
|
1398
|
+
const claimKeys = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.claimKeys]));
|
|
1399
|
+
const confidence = componentSupport.length > 0 ? Math.max(...componentSupport.map((support) => support.confidence)) : 0.75;
|
|
1400
|
+
families.push({
|
|
1401
|
+
entityPrefixes: [...component].sort((left, right) => left.localeCompare(right)),
|
|
1402
|
+
entryIds,
|
|
1403
|
+
claimKeys,
|
|
1404
|
+
canonicalEntityPrefix,
|
|
1405
|
+
canonicalSelectionReasons: canonicalSelection.reasons,
|
|
1406
|
+
confidence,
|
|
1407
|
+
autoConverge,
|
|
1408
|
+
unresolvedReason: canonicalSelection.unresolvedReason ?? (autoConverge ? null : "Entity-family evidence is strong enough to stage, but not every alias mapping is low-risk."),
|
|
1409
|
+
pairSupport: componentSupport.sort((left, right) => {
|
|
1410
|
+
const leftKey = left.entityPrefixes.join("::");
|
|
1411
|
+
const rightKey = right.entityPrefixes.join("::");
|
|
1412
|
+
return leftKey.localeCompare(rightKey);
|
|
1413
|
+
})
|
|
1414
|
+
});
|
|
1026
1415
|
}
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1416
|
+
return families.sort((left, right) => {
|
|
1417
|
+
if (right.confidence !== left.confidence) {
|
|
1418
|
+
return right.confidence - left.confidence;
|
|
1419
|
+
}
|
|
1420
|
+
const leftKey = left.entityPrefixes.join("::");
|
|
1421
|
+
const rightKey = right.entityPrefixes.join("::");
|
|
1422
|
+
return leftKey.localeCompare(rightKey);
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1425
|
+
function summarizeClaimKeyEntityPrefixStats(observations) {
|
|
1426
|
+
const counts = /* @__PURE__ */ new Map();
|
|
1427
|
+
for (const observation of observations) {
|
|
1428
|
+
const rawClaimKey = observation.claim_key?.trim();
|
|
1429
|
+
if (!rawClaimKey) {
|
|
1430
|
+
continue;
|
|
1431
|
+
}
|
|
1432
|
+
const inspection = inspectClaimKey(rawClaimKey);
|
|
1433
|
+
if (!inspection.normalized) {
|
|
1434
|
+
continue;
|
|
1435
|
+
}
|
|
1436
|
+
const entityPrefix = inspection.normalized.entity;
|
|
1437
|
+
const existing = counts.get(entityPrefix) ?? {
|
|
1438
|
+
entityPrefix,
|
|
1439
|
+
activeEntryCount: 0,
|
|
1440
|
+
trustedEntryCount: 0,
|
|
1441
|
+
tentativeEntryCount: 0,
|
|
1442
|
+
unresolvedEntryCount: 0,
|
|
1443
|
+
legacyEntryCount: 0,
|
|
1444
|
+
deterministicRepairEntryCount: 0,
|
|
1445
|
+
manualEntryCount: 0,
|
|
1446
|
+
modelEntryCount: 0,
|
|
1447
|
+
jsonRetryEntryCount: 0,
|
|
1448
|
+
surgeonFamilyReuseEntryCount: 0
|
|
1449
|
+
};
|
|
1450
|
+
existing.activeEntryCount += 1;
|
|
1451
|
+
switch (observation.claim_key_status) {
|
|
1452
|
+
case "trusted":
|
|
1453
|
+
existing.trustedEntryCount += 1;
|
|
1454
|
+
break;
|
|
1455
|
+
case "tentative":
|
|
1456
|
+
existing.tentativeEntryCount += 1;
|
|
1457
|
+
break;
|
|
1458
|
+
case "unresolved":
|
|
1459
|
+
existing.unresolvedEntryCount += 1;
|
|
1460
|
+
break;
|
|
1461
|
+
default:
|
|
1462
|
+
existing.legacyEntryCount += 1;
|
|
1463
|
+
break;
|
|
1464
|
+
}
|
|
1465
|
+
switch (observation.claim_key_source) {
|
|
1466
|
+
case "deterministic_repair":
|
|
1467
|
+
existing.deterministicRepairEntryCount += 1;
|
|
1468
|
+
break;
|
|
1469
|
+
case "manual":
|
|
1470
|
+
existing.manualEntryCount += 1;
|
|
1471
|
+
break;
|
|
1472
|
+
case "model":
|
|
1473
|
+
existing.modelEntryCount += 1;
|
|
1474
|
+
break;
|
|
1475
|
+
case "json_retry":
|
|
1476
|
+
existing.jsonRetryEntryCount += 1;
|
|
1477
|
+
break;
|
|
1478
|
+
case "surgeon_family_reuse":
|
|
1479
|
+
existing.surgeonFamilyReuseEntryCount += 1;
|
|
1480
|
+
break;
|
|
1481
|
+
default:
|
|
1482
|
+
break;
|
|
1483
|
+
}
|
|
1484
|
+
counts.set(entityPrefix, existing);
|
|
1030
1485
|
}
|
|
1031
|
-
|
|
1032
|
-
|
|
1486
|
+
return [...counts.values()].sort((left, right) => {
|
|
1487
|
+
if (right.activeEntryCount !== left.activeEntryCount) {
|
|
1488
|
+
return right.activeEntryCount - left.activeEntryCount;
|
|
1489
|
+
}
|
|
1490
|
+
if (right.trustedEntryCount !== left.trustedEntryCount) {
|
|
1491
|
+
return right.trustedEntryCount - left.trustedEntryCount;
|
|
1492
|
+
}
|
|
1493
|
+
return left.entityPrefix.localeCompare(right.entityPrefix);
|
|
1494
|
+
});
|
|
1495
|
+
}
|
|
1496
|
+
function detectClaimKeySingletonAliasCandidates(observations) {
|
|
1497
|
+
return detectClaimKeySingletonAliasCandidatesFromStats(summarizeClaimKeyEntityPrefixStats(observations));
|
|
1498
|
+
}
|
|
1499
|
+
function detectClaimKeySingletonAliasCandidatesFromStats(stats) {
|
|
1500
|
+
const candidatesByAlias = /* @__PURE__ */ new Map();
|
|
1501
|
+
const dominantFamilies = stats.filter((profile) => profile.trustedEntryCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT);
|
|
1502
|
+
const aliasFamilies = stats.filter((profile) => {
|
|
1503
|
+
return profile.activeEntryCount > 0 && profile.activeEntryCount <= SINGLETON_ALIAS_MAX_FAMILY_SIZE && profile.trustedEntryCount < profile.activeEntryCount && buildLowTrustEntryCount(profile) >= 1;
|
|
1504
|
+
});
|
|
1505
|
+
for (const aliasProfile of aliasFamilies) {
|
|
1506
|
+
for (const dominantProfile of dominantFamilies) {
|
|
1507
|
+
if (aliasProfile.entityPrefix === dominantProfile.entityPrefix || dominantProfile.activeEntryCount <= aliasProfile.activeEntryCount) {
|
|
1508
|
+
continue;
|
|
1509
|
+
}
|
|
1510
|
+
const candidate = evaluateSingletonAliasCandidate(aliasProfile, dominantProfile);
|
|
1511
|
+
if (!candidate) {
|
|
1512
|
+
continue;
|
|
1513
|
+
}
|
|
1514
|
+
const existing = candidatesByAlias.get(aliasProfile.entityPrefix) ?? [];
|
|
1515
|
+
existing.push(candidate);
|
|
1516
|
+
candidatesByAlias.set(aliasProfile.entityPrefix, existing);
|
|
1517
|
+
}
|
|
1033
1518
|
}
|
|
1034
|
-
return
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1519
|
+
return [...candidatesByAlias.values()].flatMap(selectBestSingletonAliasCandidate).sort((left, right) => right.confidence - left.confidence || left.aliasEntityPrefix.localeCompare(right.aliasEntityPrefix));
|
|
1520
|
+
}
|
|
1521
|
+
function buildTrustedClaimKeyEntityProfiles(entries) {
|
|
1522
|
+
const profiles = /* @__PURE__ */ new Map();
|
|
1523
|
+
for (const entry of entries) {
|
|
1524
|
+
const rawClaimKey = entry.claim_key?.trim();
|
|
1525
|
+
if (!rawClaimKey) {
|
|
1526
|
+
continue;
|
|
1040
1527
|
}
|
|
1528
|
+
const inspection = inspectClaimKey(rawClaimKey);
|
|
1529
|
+
if (!inspection.canonical || !inspection.normalized || inspection.suspectReasons.length > 0) {
|
|
1530
|
+
continue;
|
|
1531
|
+
}
|
|
1532
|
+
const entityPrefix = inspection.normalized.entity;
|
|
1533
|
+
const attribute = inspection.normalized.attribute;
|
|
1534
|
+
const profile = getOrCreateProfile(profiles, entityPrefix);
|
|
1535
|
+
profile.entryIds.add(entry.id);
|
|
1536
|
+
profile.claimKeys.add(inspection.normalized.claimKey);
|
|
1537
|
+
profile.attributeSet.add(attribute);
|
|
1538
|
+
const [attributeHead = attribute] = attribute.split("_");
|
|
1539
|
+
if (attributeHead) {
|
|
1540
|
+
profile.attributeHeadSet.add(attributeHead);
|
|
1541
|
+
}
|
|
1542
|
+
for (const tag of entry.tags) {
|
|
1543
|
+
const normalizedTag = normalizeClaimKeySegment(tag);
|
|
1544
|
+
if (normalizedTag) {
|
|
1545
|
+
profile.tags.add(normalizedTag);
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
for (const token of tokenizeGrounding(entry.source_context)) {
|
|
1549
|
+
profile.sourceContextTokens.add(token);
|
|
1550
|
+
}
|
|
1551
|
+
for (const token of tokenizeGrounding(entry.subject)) {
|
|
1552
|
+
profile.subjectTokens.add(token);
|
|
1553
|
+
}
|
|
1554
|
+
profile.entryCount += 1;
|
|
1555
|
+
profile.totalQualityScore += entry.quality_score;
|
|
1556
|
+
}
|
|
1557
|
+
return profiles;
|
|
1558
|
+
}
|
|
1559
|
+
function getOrCreateProfile(profiles, entityPrefix) {
|
|
1560
|
+
const existing = profiles.get(entityPrefix);
|
|
1561
|
+
if (existing) {
|
|
1562
|
+
return existing;
|
|
1563
|
+
}
|
|
1564
|
+
const tokenList = entityPrefix.split("_").filter((token) => token.length > 0);
|
|
1565
|
+
const created = {
|
|
1566
|
+
entityPrefix,
|
|
1567
|
+
entryIds: /* @__PURE__ */ new Set(),
|
|
1568
|
+
claimKeys: /* @__PURE__ */ new Set(),
|
|
1569
|
+
attributeSet: /* @__PURE__ */ new Set(),
|
|
1570
|
+
attributeHeadSet: /* @__PURE__ */ new Set(),
|
|
1571
|
+
tags: /* @__PURE__ */ new Set(),
|
|
1572
|
+
sourceContextTokens: /* @__PURE__ */ new Set(),
|
|
1573
|
+
subjectTokens: /* @__PURE__ */ new Set(),
|
|
1574
|
+
entryCount: 0,
|
|
1575
|
+
totalQualityScore: 0,
|
|
1576
|
+
tokenList,
|
|
1577
|
+
sortedTokenSignature: [...tokenList].sort().join("_"),
|
|
1578
|
+
compactSignature: tokenList.join("")
|
|
1041
1579
|
};
|
|
1580
|
+
profiles.set(entityPrefix, created);
|
|
1581
|
+
return created;
|
|
1582
|
+
}
|
|
1583
|
+
function buildPairSupport(profiles) {
|
|
1584
|
+
const candidatePairs = /* @__PURE__ */ new Set();
|
|
1585
|
+
const attributeBuckets = /* @__PURE__ */ new Map();
|
|
1586
|
+
for (const profile of profiles.values()) {
|
|
1587
|
+
for (const attribute of profile.attributeSet) {
|
|
1588
|
+
const bucket = attributeBuckets.get(attribute);
|
|
1589
|
+
if (bucket) {
|
|
1590
|
+
bucket.push(profile.entityPrefix);
|
|
1591
|
+
} else {
|
|
1592
|
+
attributeBuckets.set(attribute, [profile.entityPrefix]);
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
for (const entities of attributeBuckets.values()) {
|
|
1597
|
+
const normalizedEntities = normalizeStringArray(entities);
|
|
1598
|
+
if (normalizedEntities.length < 2 || normalizedEntities.length > MAX_ATTRIBUTE_BUCKET_SIZE) {
|
|
1599
|
+
continue;
|
|
1600
|
+
}
|
|
1601
|
+
for (let index = 0; index < normalizedEntities.length; index += 1) {
|
|
1602
|
+
const leftEntity = normalizedEntities[index];
|
|
1603
|
+
if (!leftEntity) {
|
|
1604
|
+
continue;
|
|
1605
|
+
}
|
|
1606
|
+
for (let peerIndex = index + 1; peerIndex < normalizedEntities.length; peerIndex += 1) {
|
|
1607
|
+
const rightEntity = normalizedEntities[peerIndex];
|
|
1608
|
+
if (!rightEntity) {
|
|
1609
|
+
continue;
|
|
1610
|
+
}
|
|
1611
|
+
candidatePairs.add(buildPairKey(leftEntity, rightEntity));
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
const support = [];
|
|
1616
|
+
for (const pairKey of candidatePairs) {
|
|
1617
|
+
const [leftEntity = "", rightEntity = ""] = pairKey.split("::");
|
|
1618
|
+
const leftProfile = profiles.get(leftEntity);
|
|
1619
|
+
const rightProfile = profiles.get(rightEntity);
|
|
1620
|
+
if (!leftProfile || !rightProfile) {
|
|
1621
|
+
continue;
|
|
1622
|
+
}
|
|
1623
|
+
const pairSupport = evaluateEntityFamilyPairSupport(leftProfile, rightProfile);
|
|
1624
|
+
if (pairSupport) {
|
|
1625
|
+
support.push(pairSupport);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
return support;
|
|
1042
1629
|
}
|
|
1043
|
-
function
|
|
1044
|
-
const
|
|
1045
|
-
if (
|
|
1630
|
+
function evaluateEntityFamilyPairSupport(leftProfile, rightProfile) {
|
|
1631
|
+
const sharedAttributes = intersectSets(leftProfile.attributeSet, rightProfile.attributeSet);
|
|
1632
|
+
if (sharedAttributes.length === 0) {
|
|
1046
1633
|
return null;
|
|
1047
1634
|
}
|
|
1048
|
-
|
|
1049
|
-
const
|
|
1050
|
-
const
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
}
|
|
1059
|
-
const
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
} else {
|
|
1064
|
-
const relationCompaction = compactRelationAttribute(attributeTokens);
|
|
1065
|
-
if (relationCompaction) {
|
|
1066
|
-
attributeTokens = relationCompaction.attributeTokens;
|
|
1067
|
-
reasons.push(relationCompaction.reason);
|
|
1068
|
-
} else {
|
|
1069
|
-
const trailingObjectCompaction = compactTrailingObjectAttribute(attributeTokens);
|
|
1070
|
-
if (trailingObjectCompaction) {
|
|
1071
|
-
attributeTokens = trailingObjectCompaction.attributeTokens;
|
|
1072
|
-
reasons.push(trailingObjectCompaction.reason);
|
|
1073
|
-
}
|
|
1635
|
+
const sharedAttributeHeads = intersectSets(leftProfile.attributeHeadSet, rightProfile.attributeHeadSet);
|
|
1636
|
+
const sharedTags = intersectSets(leftProfile.tags, rightProfile.tags);
|
|
1637
|
+
const sharedSourceContextTokens = intersectSets(leftProfile.sourceContextTokens, rightProfile.sourceContextTokens);
|
|
1638
|
+
const sharedSubjectTokens = intersectSets(leftProfile.subjectTokens, rightProfile.subjectTokens);
|
|
1639
|
+
const lexicalRelation = evaluateEntityLexicalRelation(leftProfile, rightProfile);
|
|
1640
|
+
const groundingScore = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0) + (sharedSubjectTokens.length >= 2 ? 1 : 0);
|
|
1641
|
+
const groundingAnchorCount = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0);
|
|
1642
|
+
const qualifies = sharedAttributes.length >= 3 || sharedAttributes.length >= 2 && (lexicalRelation.kind !== null || groundingAnchorCount >= 1) || sharedAttributes.length === 1 && lexicalRelation.kind !== null && groundingAnchorCount >= 1;
|
|
1643
|
+
if (!qualifies) {
|
|
1644
|
+
return null;
|
|
1645
|
+
}
|
|
1646
|
+
const evidence = [
|
|
1647
|
+
{
|
|
1648
|
+
kind: "shared_attribute_overlap",
|
|
1649
|
+
detail: `Shared attributes: ${sharedAttributes.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1074
1650
|
}
|
|
1651
|
+
];
|
|
1652
|
+
if (sharedAttributeHeads.length >= 2) {
|
|
1653
|
+
evidence.push({
|
|
1654
|
+
kind: "shared_attribute_head_overlap",
|
|
1655
|
+
detail: `Shared attribute families: ${sharedAttributeHeads.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1656
|
+
});
|
|
1075
1657
|
}
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
attribute: normalized.value.attribute,
|
|
1082
|
-
compactedFrom: null,
|
|
1083
|
-
reason: null
|
|
1084
|
-
};
|
|
1658
|
+
if (sharedTags.length > 0) {
|
|
1659
|
+
evidence.push({
|
|
1660
|
+
kind: "shared_tag_grounding",
|
|
1661
|
+
detail: `Shared tags: ${sharedTags.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1662
|
+
});
|
|
1085
1663
|
}
|
|
1086
|
-
|
|
1664
|
+
if (sharedSourceContextTokens.length >= 2) {
|
|
1665
|
+
evidence.push({
|
|
1666
|
+
kind: "shared_source_context_grounding",
|
|
1667
|
+
detail: `Shared source-context tokens: ${sharedSourceContextTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1668
|
+
});
|
|
1669
|
+
}
|
|
1670
|
+
if (sharedSubjectTokens.length >= 2) {
|
|
1671
|
+
evidence.push({
|
|
1672
|
+
kind: "shared_subject_grounding",
|
|
1673
|
+
detail: `Shared subject tokens: ${sharedSubjectTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1674
|
+
});
|
|
1675
|
+
}
|
|
1676
|
+
if (lexicalRelation.kind && lexicalRelation.detail) {
|
|
1677
|
+
evidence.push({
|
|
1678
|
+
kind: lexicalRelation.kind,
|
|
1679
|
+
detail: lexicalRelation.detail
|
|
1680
|
+
});
|
|
1681
|
+
}
|
|
1682
|
+
const confidence = Math.min(
|
|
1683
|
+
0.98,
|
|
1684
|
+
0.48 + Math.min(sharedAttributes.length, 3) * 0.12 + Math.min(groundingScore, 3) * 0.08 + lexicalRelation.strengthScore * 0.05
|
|
1685
|
+
);
|
|
1087
1686
|
return {
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1687
|
+
entityPrefixes: [leftProfile.entityPrefix, rightProfile.entityPrefix],
|
|
1688
|
+
supportingEntryIds: normalizeStringArray([...leftProfile.entryIds, ...rightProfile.entryIds]),
|
|
1689
|
+
sharedAttributes,
|
|
1690
|
+
confidence,
|
|
1691
|
+
autoSafe: lexicalRelation.autoSafe && (sharedAttributes.length >= 2 || sharedAttributes.length === 1 && groundingAnchorCount >= 1 && groundingScore >= 2),
|
|
1692
|
+
preferredCanonicalEntityPrefix: lexicalRelation.preferredCanonicalEntityPrefix,
|
|
1693
|
+
evidence
|
|
1093
1694
|
};
|
|
1094
1695
|
}
|
|
1095
|
-
function
|
|
1096
|
-
|
|
1696
|
+
function evaluateEntityLexicalRelation(leftProfile, rightProfile) {
|
|
1697
|
+
const leftTokens = leftProfile.tokenList;
|
|
1698
|
+
const rightTokens = rightProfile.tokenList;
|
|
1699
|
+
if (leftProfile.compactSignature === rightProfile.compactSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
|
|
1700
|
+
const preferredCanonicalEntityPrefix = leftTokens.length === rightTokens.length ? null : leftTokens.length > rightTokens.length ? leftProfile.entityPrefix : rightProfile.entityPrefix;
|
|
1097
1701
|
return {
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1702
|
+
kind: "lexical_separator_variant",
|
|
1703
|
+
detail: preferredCanonicalEntityPrefix === null ? "Entity prefixes collapse to the same compact lexical form." : `Entity prefixes collapse to the same compact lexical form; "${preferredCanonicalEntityPrefix}" preserves clearer token boundaries.`,
|
|
1704
|
+
autoSafe: true,
|
|
1705
|
+
preferredCanonicalEntityPrefix,
|
|
1706
|
+
strengthScore: 3
|
|
1101
1707
|
};
|
|
1102
1708
|
}
|
|
1103
|
-
if (
|
|
1709
|
+
if (leftProfile.sortedTokenSignature.length > 0 && leftProfile.sortedTokenSignature === rightProfile.sortedTokenSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
|
|
1104
1710
|
return {
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1711
|
+
kind: "lexical_token_reordering",
|
|
1712
|
+
detail: "Entity prefixes use the same lexical tokens in a different order.",
|
|
1713
|
+
autoSafe: true,
|
|
1714
|
+
preferredCanonicalEntityPrefix: null,
|
|
1715
|
+
strengthScore: 2
|
|
1108
1716
|
};
|
|
1109
1717
|
}
|
|
1110
|
-
|
|
1718
|
+
const leftInitialism = buildInitialism(leftTokens);
|
|
1719
|
+
const rightInitialism = buildInitialism(rightTokens);
|
|
1720
|
+
if (leftInitialism.length >= 2 && leftInitialism === rightProfile.entityPrefix) {
|
|
1111
1721
|
return {
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1722
|
+
kind: "lexical_initialism_expansion",
|
|
1723
|
+
detail: `Entity prefix "${rightProfile.entityPrefix}" matches the initialism of "${leftProfile.entityPrefix}".`,
|
|
1724
|
+
autoSafe: false,
|
|
1725
|
+
preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
|
|
1726
|
+
strengthScore: 1
|
|
1727
|
+
};
|
|
1728
|
+
}
|
|
1729
|
+
if (rightInitialism.length >= 2 && rightInitialism === leftProfile.entityPrefix) {
|
|
1730
|
+
return {
|
|
1731
|
+
kind: "lexical_initialism_expansion",
|
|
1732
|
+
detail: `Entity prefix "${leftProfile.entityPrefix}" matches the initialism of "${rightProfile.entityPrefix}".`,
|
|
1733
|
+
autoSafe: false,
|
|
1734
|
+
preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
|
|
1735
|
+
strengthScore: 1
|
|
1736
|
+
};
|
|
1737
|
+
}
|
|
1738
|
+
if (isTokenSubset(leftTokens, rightTokens)) {
|
|
1739
|
+
return {
|
|
1740
|
+
kind: "lexical_token_subset",
|
|
1741
|
+
detail: `"${leftProfile.entityPrefix}" is a lexical subset of "${rightProfile.entityPrefix}".`,
|
|
1742
|
+
autoSafe: false,
|
|
1743
|
+
preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
|
|
1744
|
+
strengthScore: 1
|
|
1745
|
+
};
|
|
1746
|
+
}
|
|
1747
|
+
if (isTokenSubset(rightTokens, leftTokens)) {
|
|
1748
|
+
return {
|
|
1749
|
+
kind: "lexical_token_subset",
|
|
1750
|
+
detail: `"${rightProfile.entityPrefix}" is a lexical subset of "${leftProfile.entityPrefix}".`,
|
|
1751
|
+
autoSafe: false,
|
|
1752
|
+
preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
|
|
1753
|
+
strengthScore: 1
|
|
1115
1754
|
};
|
|
1116
1755
|
}
|
|
1117
1756
|
return {
|
|
1118
|
-
|
|
1119
|
-
|
|
1757
|
+
kind: null,
|
|
1758
|
+
detail: null,
|
|
1759
|
+
autoSafe: false,
|
|
1760
|
+
preferredCanonicalEntityPrefix: null,
|
|
1761
|
+
strengthScore: 0
|
|
1120
1762
|
};
|
|
1121
1763
|
}
|
|
1122
|
-
function
|
|
1123
|
-
const
|
|
1124
|
-
const
|
|
1125
|
-
|
|
1764
|
+
function selectCanonicalEntityPrefix(entityPrefixes, pairSupport, profiles) {
|
|
1765
|
+
const scoreByEntity = /* @__PURE__ */ new Map();
|
|
1766
|
+
const reasonsByEntity = /* @__PURE__ */ new Map();
|
|
1767
|
+
for (const entityPrefix of entityPrefixes) {
|
|
1768
|
+
const profile = profiles.get(entityPrefix);
|
|
1769
|
+
if (!profile) {
|
|
1770
|
+
continue;
|
|
1771
|
+
}
|
|
1772
|
+
let score = Math.min(profile.attributeSet.size, 6) * 2 + Math.min(profile.entryCount, 6) + Math.round(profile.totalQualityScore / Math.max(profile.entryCount, 1));
|
|
1773
|
+
const reasons = [];
|
|
1774
|
+
if (profile.attributeSet.size >= 2) {
|
|
1775
|
+
reasons.push("broader trusted attribute coverage");
|
|
1776
|
+
}
|
|
1777
|
+
for (const support of pairSupport) {
|
|
1778
|
+
if (support.preferredCanonicalEntityPrefix === entityPrefix) {
|
|
1779
|
+
score += 4;
|
|
1780
|
+
} else if (support.preferredCanonicalEntityPrefix !== null) {
|
|
1781
|
+
score -= 2;
|
|
1782
|
+
}
|
|
1783
|
+
}
|
|
1784
|
+
const formSpecificity = scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles);
|
|
1785
|
+
score += formSpecificity.score;
|
|
1786
|
+
if (formSpecificity.reason) {
|
|
1787
|
+
reasons.push(formSpecificity.reason);
|
|
1788
|
+
}
|
|
1789
|
+
const lexicalVotes2 = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === entityPrefix).length;
|
|
1790
|
+
if (lexicalVotes2 > 0) {
|
|
1791
|
+
reasons.push(`lexical alias evidence prefers "${entityPrefix}"`);
|
|
1792
|
+
}
|
|
1793
|
+
scoreByEntity.set(entityPrefix, score);
|
|
1794
|
+
reasonsByEntity.set(entityPrefix, normalizeStringArray(reasons));
|
|
1795
|
+
}
|
|
1796
|
+
const ranked = [...scoreByEntity.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]));
|
|
1797
|
+
const [bestCandidate, secondCandidate] = ranked;
|
|
1798
|
+
if (!bestCandidate) {
|
|
1126
1799
|
return {
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
suspectReasons: []
|
|
1800
|
+
canonicalEntityPrefix: null,
|
|
1801
|
+
reasons: [],
|
|
1802
|
+
unresolvedReason: "No canonical entity prefix could be selected from the detected family."
|
|
1131
1803
|
};
|
|
1132
1804
|
}
|
|
1133
|
-
const
|
|
1134
|
-
const
|
|
1135
|
-
|
|
1136
|
-
|
|
1805
|
+
const [bestEntityPrefix, bestScore] = bestCandidate;
|
|
1806
|
+
const secondScore = secondCandidate?.[1] ?? Number.NEGATIVE_INFINITY;
|
|
1807
|
+
const bestProfile = profiles.get(bestEntityPrefix);
|
|
1808
|
+
if (!bestProfile) {
|
|
1809
|
+
return {
|
|
1810
|
+
canonicalEntityPrefix: null,
|
|
1811
|
+
reasons: [],
|
|
1812
|
+
unresolvedReason: "No canonical entity prefix could be selected from the detected family."
|
|
1813
|
+
};
|
|
1137
1814
|
}
|
|
1138
|
-
|
|
1139
|
-
|
|
1815
|
+
const directPeerSupport = entityPrefixes.filter((entityPrefix) => entityPrefix !== bestEntityPrefix).map((entityPrefix) => findPairSupport(pairSupport, bestEntityPrefix, entityPrefix));
|
|
1816
|
+
const hasDirectSupportToAllPeers = directPeerSupport.every((support) => support !== null);
|
|
1817
|
+
const hasLexicalSupportToAllPeers = directPeerSupport.every((support) => support?.evidence.some((evidence) => evidence.kind.startsWith("lexical_")) === true);
|
|
1818
|
+
const lexicalVotes = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === bestEntityPrefix).length;
|
|
1819
|
+
if (!hasDirectSupportToAllPeers || !hasLexicalSupportToAllPeers || lexicalVotes === 0 || bestScore - secondScore < CANONICAL_SELECTION_MARGIN) {
|
|
1820
|
+
return {
|
|
1821
|
+
canonicalEntityPrefix: null,
|
|
1822
|
+
reasons: [],
|
|
1823
|
+
unresolvedReason: "Multiple plausible canonical entity prefixes remain after conservative scoring."
|
|
1824
|
+
};
|
|
1140
1825
|
}
|
|
1141
1826
|
return {
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
suspectReasons: [...suspectReasons]
|
|
1827
|
+
canonicalEntityPrefix: bestEntityPrefix,
|
|
1828
|
+
reasons: reasonsByEntity.get(bestEntityPrefix) ?? [],
|
|
1829
|
+
unresolvedReason: null
|
|
1146
1830
|
};
|
|
1147
1831
|
}
|
|
1148
|
-
function
|
|
1149
|
-
const
|
|
1150
|
-
|
|
1151
|
-
}
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1832
|
+
function scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles) {
|
|
1833
|
+
const profile = profiles.get(entityPrefix);
|
|
1834
|
+
if (!profile) {
|
|
1835
|
+
return { score: 0, reason: null };
|
|
1836
|
+
}
|
|
1837
|
+
let score = 0;
|
|
1838
|
+
let reason = null;
|
|
1839
|
+
const compactPeers = entityPrefixes.filter((peerEntityPrefix) => peerEntityPrefix !== entityPrefix).map((peerEntityPrefix) => profiles.get(peerEntityPrefix)).filter((peerProfile) => Boolean(peerProfile)).filter((peerProfile) => peerProfile.compactSignature === profile.compactSignature);
|
|
1840
|
+
if (profile.tokenList.length >= 2 && compactPeers.some((peerProfile) => peerProfile.tokenList.length < profile.tokenList.length)) {
|
|
1841
|
+
score += 2;
|
|
1842
|
+
reason = "less abbreviated lexical form";
|
|
1843
|
+
}
|
|
1844
|
+
if (entityPrefix.length <= 3 && compactPeers.length === 0) {
|
|
1845
|
+
score -= 1;
|
|
1846
|
+
}
|
|
1847
|
+
return { score, reason };
|
|
1848
|
+
}
|
|
1849
|
+
function collectConnectedEntityComponent(startingEntityPrefix, adjacency, visited) {
|
|
1850
|
+
const queue = [startingEntityPrefix];
|
|
1851
|
+
const component = [];
|
|
1852
|
+
visited.add(startingEntityPrefix);
|
|
1853
|
+
while (queue.length > 0) {
|
|
1854
|
+
const entityPrefix = queue.shift();
|
|
1855
|
+
if (!entityPrefix) {
|
|
1856
|
+
continue;
|
|
1857
|
+
}
|
|
1858
|
+
component.push(entityPrefix);
|
|
1859
|
+
const peers = adjacency.get(entityPrefix);
|
|
1860
|
+
if (!peers) {
|
|
1861
|
+
continue;
|
|
1862
|
+
}
|
|
1863
|
+
for (const peer of peers) {
|
|
1864
|
+
if (visited.has(peer)) {
|
|
1865
|
+
continue;
|
|
1866
|
+
}
|
|
1867
|
+
visited.add(peer);
|
|
1868
|
+
queue.push(peer);
|
|
1869
|
+
}
|
|
1176
1870
|
}
|
|
1871
|
+
return component;
|
|
1177
1872
|
}
|
|
1178
|
-
function
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
case "value_shaped_attribute":
|
|
1185
|
-
return describeExtractedClaimKeyRejection(reason, claimKey);
|
|
1873
|
+
function findPairSupport(pairSupport, leftEntityPrefix, rightEntityPrefix) {
|
|
1874
|
+
for (const support of pairSupport) {
|
|
1875
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
1876
|
+
if (leftEntity === leftEntityPrefix && rightEntity === rightEntityPrefix || leftEntity === rightEntityPrefix && rightEntity === leftEntityPrefix) {
|
|
1877
|
+
return support;
|
|
1878
|
+
}
|
|
1186
1879
|
}
|
|
1880
|
+
return null;
|
|
1187
1881
|
}
|
|
1188
|
-
function
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
function compactSourceOfTruthAttribute(attributeTokens) {
|
|
1192
|
-
const sourceOfTruthIndex = findSourceOfTruthPhraseIndex(attributeTokens);
|
|
1193
|
-
if (sourceOfTruthIndex === -1) {
|
|
1882
|
+
function evaluateSingletonAliasCandidate(aliasProfile, dominantProfile) {
|
|
1883
|
+
const lexicalRelation = evaluateSingletonAliasLexicalRelation(aliasProfile.entityPrefix, dominantProfile.entityPrefix);
|
|
1884
|
+
if (!lexicalRelation.kind || !lexicalRelation.detail || lexicalRelation.scopeLike) {
|
|
1194
1885
|
return null;
|
|
1195
1886
|
}
|
|
1196
|
-
const
|
|
1197
|
-
if (
|
|
1887
|
+
const dominantTrustedCount = dominantProfile.trustedEntryCount;
|
|
1888
|
+
if (dominantTrustedCount < SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT) {
|
|
1198
1889
|
return null;
|
|
1199
1890
|
}
|
|
1200
|
-
const
|
|
1201
|
-
|
|
1202
|
-
const leadingAllowed = before.every((token) => COMPACTION_WEAK_LEADING_TOKENS.has(token));
|
|
1203
|
-
const hasMixedStableFamily = before.some((token) => STABLE_ATTRIBUTE_HEADS.has(token)) || after.some((token) => STABLE_ATTRIBUTE_HEADS.has(token));
|
|
1204
|
-
const hasConjunctionNoise = before.includes("and") || before.includes("or") || after.includes("and") || after.includes("or");
|
|
1205
|
-
if (!leadingAllowed || hasMixedStableFamily || hasConjunctionNoise) {
|
|
1891
|
+
const aliasLowTrustCount = buildLowTrustEntryCount(aliasProfile);
|
|
1892
|
+
if (aliasLowTrustCount === 0) {
|
|
1206
1893
|
return null;
|
|
1207
1894
|
}
|
|
1895
|
+
const evidence = [
|
|
1896
|
+
{
|
|
1897
|
+
kind: "singleton_family_size",
|
|
1898
|
+
detail: `"${aliasProfile.entityPrefix}" has ${aliasProfile.activeEntryCount} active keyed ${pluralize(aliasProfile.activeEntryCount, "entry")}.`
|
|
1899
|
+
},
|
|
1900
|
+
{
|
|
1901
|
+
kind: "dominant_trusted_family",
|
|
1902
|
+
detail: `"${dominantProfile.entityPrefix}" already has ${dominantTrustedCount} trusted ${pluralize(dominantTrustedCount, "entry")}.`
|
|
1903
|
+
},
|
|
1904
|
+
{
|
|
1905
|
+
kind: "low_trust_creation_path",
|
|
1906
|
+
detail: describeLowTrustAliasFamily(aliasProfile)
|
|
1907
|
+
},
|
|
1908
|
+
{
|
|
1909
|
+
kind: lexicalRelation.kind,
|
|
1910
|
+
detail: lexicalRelation.detail
|
|
1911
|
+
}
|
|
1912
|
+
];
|
|
1913
|
+
const confidence = Math.min(
|
|
1914
|
+
0.98,
|
|
1915
|
+
0.58 + Math.min(dominantTrustedCount, 6) * 0.05 + Math.min(aliasLowTrustCount, 2) * 0.05 + Math.min(dominantProfile.activeEntryCount - aliasProfile.activeEntryCount, 6) * 0.02 + lexicalRelation.strengthScore * 0.08
|
|
1916
|
+
);
|
|
1208
1917
|
return {
|
|
1209
|
-
|
|
1210
|
-
|
|
1918
|
+
aliasEntityPrefix: aliasProfile.entityPrefix,
|
|
1919
|
+
dominantEntityPrefix: dominantProfile.entityPrefix,
|
|
1920
|
+
aliasFamilySize: aliasProfile.activeEntryCount,
|
|
1921
|
+
dominantFamilySize: dominantProfile.activeEntryCount,
|
|
1922
|
+
dominantTrustedCount,
|
|
1923
|
+
aliasLowTrustCount,
|
|
1924
|
+
confidence,
|
|
1925
|
+
canonicalReuseSafe: lexicalRelation.canonicalReuseSafe && aliasProfile.activeEntryCount === 1 && aliasLowTrustCount === aliasProfile.activeEntryCount && dominantTrustedCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT,
|
|
1926
|
+
evidence
|
|
1211
1927
|
};
|
|
1212
1928
|
}
|
|
1213
|
-
function
|
|
1214
|
-
const
|
|
1215
|
-
|
|
1216
|
-
|
|
1929
|
+
function selectBestSingletonAliasCandidate(candidates) {
|
|
1930
|
+
const ranked = [...candidates].sort(
|
|
1931
|
+
(left, right) => right.confidence - left.confidence || left.dominantEntityPrefix.localeCompare(right.dominantEntityPrefix)
|
|
1932
|
+
);
|
|
1933
|
+
const [best, runnerUp] = ranked;
|
|
1934
|
+
if (!best) {
|
|
1935
|
+
return [];
|
|
1217
1936
|
}
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
const right = attributeTokens.slice(relationIndex + 1);
|
|
1221
|
-
if (left.length === 0 && right.length === 0) {
|
|
1222
|
-
return null;
|
|
1937
|
+
if (runnerUp && best.confidence - runnerUp.confidence < SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA) {
|
|
1938
|
+
return [];
|
|
1223
1939
|
}
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
const requirementFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
|
|
1233
|
-
if (!requirementFocus) {
|
|
1234
|
-
return null;
|
|
1235
|
-
}
|
|
1940
|
+
return [best];
|
|
1941
|
+
}
|
|
1942
|
+
function evaluateSingletonAliasLexicalRelation(aliasEntityPrefix, dominantEntityPrefix) {
|
|
1943
|
+
const aliasTokens = aliasEntityPrefix.split("_").filter((token) => token.length > 0);
|
|
1944
|
+
const dominantTokens = dominantEntityPrefix.split("_").filter((token) => token.length > 0);
|
|
1945
|
+
const aliasCompactSignature = aliasTokens.join("");
|
|
1946
|
+
const dominantCompactSignature = dominantTokens.join("");
|
|
1947
|
+
if (aliasCompactSignature === dominantCompactSignature && aliasEntityPrefix !== dominantEntityPrefix) {
|
|
1236
1948
|
return {
|
|
1237
|
-
|
|
1238
|
-
|
|
1949
|
+
kind: "lexical_separator_variant",
|
|
1950
|
+
detail: `Entity prefixes "${aliasEntityPrefix}" and "${dominantEntityPrefix}" collapse to the same compact lexical form.`,
|
|
1951
|
+
canonicalReuseSafe: true,
|
|
1952
|
+
scopeLike: false,
|
|
1953
|
+
strengthScore: 3
|
|
1239
1954
|
};
|
|
1240
1955
|
}
|
|
1241
|
-
if (
|
|
1242
|
-
const orderingFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
|
|
1243
|
-
if (!orderingFocus) {
|
|
1244
|
-
return null;
|
|
1245
|
-
}
|
|
1956
|
+
if (!isTokenSubset(dominantTokens, aliasTokens)) {
|
|
1246
1957
|
return {
|
|
1247
|
-
|
|
1248
|
-
|
|
1958
|
+
kind: null,
|
|
1959
|
+
detail: null,
|
|
1960
|
+
canonicalReuseSafe: false,
|
|
1961
|
+
scopeLike: false,
|
|
1962
|
+
strengthScore: 0
|
|
1249
1963
|
};
|
|
1250
1964
|
}
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1965
|
+
const dominantTokenSet = new Set(dominantTokens);
|
|
1966
|
+
const addedTokens = aliasTokens.filter((token) => !dominantTokenSet.has(token));
|
|
1967
|
+
const scopeLike = addedTokens.length !== 1 || addedTokens.some((token) => SINGLETON_ALIAS_SCOPE_TOKENS.has(token));
|
|
1968
|
+
if (scopeLike) {
|
|
1256
1969
|
return {
|
|
1257
|
-
|
|
1258
|
-
|
|
1970
|
+
kind: null,
|
|
1971
|
+
detail: null,
|
|
1972
|
+
canonicalReuseSafe: false,
|
|
1973
|
+
scopeLike: true,
|
|
1974
|
+
strengthScore: 0
|
|
1259
1975
|
};
|
|
1260
1976
|
}
|
|
1261
|
-
return
|
|
1977
|
+
return {
|
|
1978
|
+
kind: "lexical_token_subset",
|
|
1979
|
+
detail: `"${aliasEntityPrefix}" extends "${dominantEntityPrefix}" by the added token "${addedTokens[0]}".`,
|
|
1980
|
+
canonicalReuseSafe: true,
|
|
1981
|
+
scopeLike: false,
|
|
1982
|
+
strengthScore: 2
|
|
1983
|
+
};
|
|
1262
1984
|
}
|
|
1263
|
-
function
|
|
1264
|
-
const
|
|
1265
|
-
|
|
1266
|
-
|
|
1985
|
+
function buildLowTrustEntryCount(profile) {
|
|
1986
|
+
const deterministicOnlyCount = Math.max(0, profile.deterministicRepairEntryCount - profile.tentativeEntryCount);
|
|
1987
|
+
return profile.tentativeEntryCount + profile.unresolvedEntryCount + deterministicOnlyCount;
|
|
1988
|
+
}
|
|
1989
|
+
function describeLowTrustAliasFamily(profile) {
|
|
1990
|
+
const reasons = [];
|
|
1991
|
+
if (profile.deterministicRepairEntryCount > 0) {
|
|
1992
|
+
reasons.push(`${profile.deterministicRepairEntryCount} deterministic repair ${pluralize(profile.deterministicRepairEntryCount, "entry")}`);
|
|
1267
1993
|
}
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
if (left.length === 0 || left.length > 3 || left.includes("and") || left.includes("or") || left.some((token) => COMPACTION_RELATION_TOKENS.has(token))) {
|
|
1271
|
-
return null;
|
|
1994
|
+
if (profile.tentativeEntryCount > 0) {
|
|
1995
|
+
reasons.push(`${profile.tentativeEntryCount} tentative ${pluralize(profile.tentativeEntryCount, "entry")}`);
|
|
1272
1996
|
}
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
return null;
|
|
1997
|
+
if (profile.unresolvedEntryCount > 0) {
|
|
1998
|
+
reasons.push(`${profile.unresolvedEntryCount} unresolved ${pluralize(profile.unresolvedEntryCount, "entry")}`);
|
|
1276
1999
|
}
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
return null;
|
|
2000
|
+
if (reasons.length === 0) {
|
|
2001
|
+
return `"${profile.entityPrefix}" is not fully trusted yet.`;
|
|
1280
2002
|
}
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
2003
|
+
return `"${profile.entityPrefix}" is low-trust because it has ${reasons.join(", ")}.`;
|
|
2004
|
+
}
|
|
2005
|
+
function buildInitialism(tokens) {
|
|
2006
|
+
if (tokens.length < 2) {
|
|
2007
|
+
return "";
|
|
1284
2008
|
}
|
|
1285
|
-
return
|
|
1286
|
-
attributeTokens: [...objectFocus, ...headCore],
|
|
1287
|
-
reason: "collapsed a trailing object phrase into a compact stable slot name"
|
|
1288
|
-
};
|
|
2009
|
+
return tokens.map((token) => token[0] ?? "").join("");
|
|
1289
2010
|
}
|
|
1290
|
-
function
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
return index;
|
|
1294
|
-
}
|
|
2011
|
+
function isTokenSubset(subsetTokens, supersetTokens) {
|
|
2012
|
+
if (subsetTokens.length === 0 || subsetTokens.length >= supersetTokens.length) {
|
|
2013
|
+
return false;
|
|
1295
2014
|
}
|
|
1296
|
-
|
|
2015
|
+
const superset = new Set(supersetTokens);
|
|
2016
|
+
return subsetTokens.every((token) => superset.has(token));
|
|
1297
2017
|
}
|
|
1298
|
-
function
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
if (token && ACTION_CONDITION_TOKENS.has(token)) {
|
|
1302
|
-
return token;
|
|
1303
|
-
}
|
|
2018
|
+
function tokenizeGrounding(value) {
|
|
2019
|
+
if (!value) {
|
|
2020
|
+
return [];
|
|
1304
2021
|
}
|
|
1305
|
-
|
|
2022
|
+
const normalized = normalizeClaimKeySegment(value);
|
|
2023
|
+
if (!normalized) {
|
|
2024
|
+
return [];
|
|
2025
|
+
}
|
|
2026
|
+
return normalized.split("_").filter((token) => token.length >= 2 && !ENTITY_FAMILY_GROUNDING_STOP_TOKENS.has(token));
|
|
1306
2027
|
}
|
|
1307
|
-
function
|
|
1308
|
-
const
|
|
1309
|
-
const
|
|
1310
|
-
const
|
|
1311
|
-
|
|
1312
|
-
|
|
2028
|
+
function intersectSets(left, right) {
|
|
2029
|
+
const intersection = [];
|
|
2030
|
+
const [small, large] = left.size <= right.size ? [left, right] : [right, left];
|
|
2031
|
+
for (const value of small) {
|
|
2032
|
+
if (large.has(value)) {
|
|
2033
|
+
intersection.push(value);
|
|
2034
|
+
}
|
|
1313
2035
|
}
|
|
1314
|
-
return
|
|
2036
|
+
return intersection.sort((first, second) => first.localeCompare(second));
|
|
1315
2037
|
}
|
|
1316
|
-
function
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
2038
|
+
function normalizeStringArray(values) {
|
|
2039
|
+
return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
|
|
2040
|
+
}
|
|
2041
|
+
function buildPairKey(leftEntityPrefix, rightEntityPrefix) {
|
|
2042
|
+
return [leftEntityPrefix, rightEntityPrefix].sort((left, right) => left.localeCompare(right)).join("::");
|
|
2043
|
+
}
|
|
2044
|
+
function getOrCreateSet(map, key) {
|
|
2045
|
+
const existing = map.get(key);
|
|
2046
|
+
if (existing) {
|
|
2047
|
+
return existing;
|
|
1321
2048
|
}
|
|
1322
|
-
|
|
2049
|
+
const created = /* @__PURE__ */ new Set();
|
|
2050
|
+
map.set(key, created);
|
|
2051
|
+
return created;
|
|
1323
2052
|
}
|
|
1324
|
-
function
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
2053
|
+
function pluralize(count, noun) {
|
|
2054
|
+
return count === 1 ? noun : `${noun}s`;
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
// src/core/claim-key-support.ts
|
|
2058
|
+
var MAX_AUTO_APPLY_ATTRIBUTE_TOKENS = 4;
|
|
2059
|
+
var GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
|
|
2060
|
+
"a",
|
|
2061
|
+
"an",
|
|
2062
|
+
"and",
|
|
2063
|
+
"are",
|
|
2064
|
+
"as",
|
|
2065
|
+
"at",
|
|
2066
|
+
"be",
|
|
2067
|
+
"by",
|
|
2068
|
+
"for",
|
|
2069
|
+
"from",
|
|
2070
|
+
"how",
|
|
2071
|
+
"in",
|
|
2072
|
+
"into",
|
|
2073
|
+
"is",
|
|
2074
|
+
"it",
|
|
2075
|
+
"of",
|
|
2076
|
+
"on",
|
|
2077
|
+
"or",
|
|
2078
|
+
"our",
|
|
2079
|
+
"that",
|
|
2080
|
+
"the",
|
|
2081
|
+
"their",
|
|
2082
|
+
"this",
|
|
2083
|
+
"to",
|
|
2084
|
+
"we",
|
|
2085
|
+
"with"
|
|
2086
|
+
]);
|
|
2087
|
+
var AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["to", "for", "from", "with", "about", "into", "onto", "between", "during"]);
|
|
2088
|
+
var POLICY_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["policy", "default", "workflow", "process", "strategy", "guardrail", "rule", "boundary"]);
|
|
2089
|
+
var AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["source", "truth", "guide", "runbook", "reference"]);
|
|
2090
|
+
var ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set([
|
|
2091
|
+
"adapter",
|
|
2092
|
+
"boundary",
|
|
2093
|
+
"architecture",
|
|
2094
|
+
"backend",
|
|
2095
|
+
"storage",
|
|
2096
|
+
"model",
|
|
2097
|
+
"support",
|
|
2098
|
+
"contract",
|
|
2099
|
+
"interface",
|
|
2100
|
+
"surface"
|
|
2101
|
+
]);
|
|
2102
|
+
var STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
2103
|
+
"access",
|
|
2104
|
+
"boundary",
|
|
2105
|
+
"condition",
|
|
2106
|
+
"contract",
|
|
2107
|
+
"dependency",
|
|
2108
|
+
"mode",
|
|
2109
|
+
"owner",
|
|
2110
|
+
"order",
|
|
2111
|
+
"path",
|
|
2112
|
+
"policy",
|
|
2113
|
+
"preference",
|
|
2114
|
+
"preservation",
|
|
2115
|
+
"process",
|
|
2116
|
+
"requirement",
|
|
2117
|
+
"role",
|
|
2118
|
+
"rule",
|
|
2119
|
+
"schedule",
|
|
2120
|
+
"sequencing",
|
|
2121
|
+
"setting",
|
|
2122
|
+
"status",
|
|
2123
|
+
"strategy",
|
|
2124
|
+
"support",
|
|
2125
|
+
"surface",
|
|
2126
|
+
"timezone",
|
|
2127
|
+
"version",
|
|
2128
|
+
"window",
|
|
2129
|
+
"workflow",
|
|
2130
|
+
"workspace"
|
|
2131
|
+
]);
|
|
2132
|
+
function buildTrustedClaimKeySupportSeed(entries) {
|
|
2133
|
+
const claimKeyStats = /* @__PURE__ */ new Map();
|
|
2134
|
+
const trustedEntries = [];
|
|
2135
|
+
for (const entry of entries) {
|
|
2136
|
+
const claimKey = entry.claim_key?.trim();
|
|
2137
|
+
if (!claimKey || !isTrustedClaimKeyForCleanup(claimKey)) {
|
|
1333
2138
|
continue;
|
|
1334
2139
|
}
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
2140
|
+
const inspection = inspectClaimKey(claimKey);
|
|
2141
|
+
if (!inspection.normalized) {
|
|
2142
|
+
continue;
|
|
2143
|
+
}
|
|
2144
|
+
const existing = claimKeyStats.get(claimKey);
|
|
2145
|
+
if (existing) {
|
|
2146
|
+
existing.count += 1;
|
|
2147
|
+
existing.maxImportance = Math.max(existing.maxImportance, entry.importance);
|
|
2148
|
+
existing.latestCreatedAt = existing.latestCreatedAt.localeCompare(entry.created_at) >= 0 ? existing.latestCreatedAt : entry.created_at;
|
|
2149
|
+
continue;
|
|
2150
|
+
}
|
|
2151
|
+
claimKeyStats.set(claimKey, {
|
|
2152
|
+
count: 1,
|
|
2153
|
+
maxImportance: entry.importance,
|
|
2154
|
+
latestCreatedAt: entry.created_at
|
|
2155
|
+
});
|
|
2156
|
+
trustedEntries.push({
|
|
2157
|
+
id: entry.id,
|
|
2158
|
+
claimKey: inspection.normalized.claimKey,
|
|
2159
|
+
entity: inspection.normalized.entity,
|
|
2160
|
+
attribute: inspection.normalized.attribute,
|
|
2161
|
+
type: entry.type,
|
|
2162
|
+
tags: normalizeGroundingTags(entry.tags),
|
|
2163
|
+
sourceContextTokens: tokenizeGroundingText(entry.source_context),
|
|
2164
|
+
subjectTokens: tokenizeGroundingText(entry.subject),
|
|
2165
|
+
createdAt: entry.created_at
|
|
2166
|
+
});
|
|
1339
2167
|
}
|
|
1340
|
-
|
|
2168
|
+
const orderedClaimKeys = [...claimKeyStats.entries()].sort((left, right) => {
|
|
2169
|
+
const countDelta = right[1].count - left[1].count;
|
|
2170
|
+
if (countDelta !== 0) {
|
|
2171
|
+
return countDelta;
|
|
2172
|
+
}
|
|
2173
|
+
const importanceDelta = right[1].maxImportance - left[1].maxImportance;
|
|
2174
|
+
if (importanceDelta !== 0) {
|
|
2175
|
+
return importanceDelta;
|
|
2176
|
+
}
|
|
2177
|
+
const createdAtDelta = right[1].latestCreatedAt.localeCompare(left[1].latestCreatedAt);
|
|
2178
|
+
if (createdAtDelta !== 0) {
|
|
2179
|
+
return createdAtDelta;
|
|
2180
|
+
}
|
|
2181
|
+
return left[0].localeCompare(right[0]);
|
|
2182
|
+
}).map(([claimKey]) => claimKey);
|
|
2183
|
+
const orderedEntries = orderedClaimKeys.flatMap(
|
|
2184
|
+
(claimKey) => trustedEntries.filter((entry) => entry.claimKey === claimKey).sort((left, right) => {
|
|
2185
|
+
const createdAtDelta = right.createdAt.localeCompare(left.createdAt);
|
|
2186
|
+
if (createdAtDelta !== 0) {
|
|
2187
|
+
return createdAtDelta;
|
|
2188
|
+
}
|
|
2189
|
+
return left.id.localeCompare(right.id);
|
|
2190
|
+
})
|
|
2191
|
+
);
|
|
2192
|
+
return {
|
|
2193
|
+
entries: orderedEntries
|
|
2194
|
+
};
|
|
1341
2195
|
}
|
|
1342
|
-
function
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
2196
|
+
function buildClaimKeySupportSeedFromExamples(claimKeys) {
|
|
2197
|
+
const entries = claimKeys.flatMap((claimKey, index) => {
|
|
2198
|
+
const inspection = inspectClaimKey(claimKey);
|
|
2199
|
+
if (!inspection.normalized || !isTrustedClaimKeyForCleanup(inspection.normalized.claimKey)) {
|
|
2200
|
+
return [];
|
|
2201
|
+
}
|
|
2202
|
+
return [
|
|
2203
|
+
{
|
|
2204
|
+
id: `example:${index + 1}`,
|
|
2205
|
+
claimKey: inspection.normalized.claimKey,
|
|
2206
|
+
entity: inspection.normalized.entity,
|
|
2207
|
+
attribute: inspection.normalized.attribute,
|
|
2208
|
+
tags: [],
|
|
2209
|
+
sourceContextTokens: [],
|
|
2210
|
+
subjectTokens: [],
|
|
2211
|
+
createdAt: "1970-01-01T00:00:00.000Z"
|
|
2212
|
+
}
|
|
2213
|
+
];
|
|
2214
|
+
});
|
|
2215
|
+
return { entries };
|
|
1348
2216
|
}
|
|
1349
|
-
function
|
|
1350
|
-
|
|
1351
|
-
|
|
2217
|
+
function evaluateClaimKeySupport(entry, targetClaimKey, trustedHints) {
|
|
2218
|
+
const inspection = inspectClaimKey(targetClaimKey);
|
|
2219
|
+
const normalized = inspection.normalized;
|
|
2220
|
+
if (!normalized) {
|
|
2221
|
+
return createEmptyClaimKeySupportEvaluation();
|
|
1352
2222
|
}
|
|
1353
|
-
|
|
2223
|
+
const entryTagSet = new Set(normalizeGroundingTags(entry.tags));
|
|
2224
|
+
const entrySourceTokens = new Set(tokenizeGroundingText(entry.source_context));
|
|
2225
|
+
const relevantEntries = trustedHints.entries.filter((trustedEntry) => {
|
|
2226
|
+
if (entry.id && trustedEntry.id === entry.id) {
|
|
2227
|
+
return false;
|
|
2228
|
+
}
|
|
2229
|
+
return trustedEntry.claimKey === normalized.claimKey || trustedEntry.entity === normalized.entity;
|
|
2230
|
+
});
|
|
2231
|
+
const exactReuseEntries = relevantEntries.filter((trustedEntry) => trustedEntry.claimKey === normalized.claimKey);
|
|
2232
|
+
const familyReuseEntries = relevantEntries.filter(
|
|
2233
|
+
(trustedEntry) => trustedEntry.claimKey !== normalized.claimKey && trustedEntry.entity === normalized.entity
|
|
2234
|
+
);
|
|
2235
|
+
const groundedExactReuseEntries = exactReuseEntries.filter((trustedEntry) => {
|
|
2236
|
+
const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
|
|
2237
|
+
return grounding.tagGrounding || grounding.sourceContextGrounding;
|
|
2238
|
+
});
|
|
2239
|
+
const groundedFamilyReuseEntries = familyReuseEntries.filter((trustedEntry) => {
|
|
2240
|
+
const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
|
|
2241
|
+
return grounding.tagGrounding || grounding.sourceContextGrounding;
|
|
2242
|
+
});
|
|
2243
|
+
const tagGrounding = relevantEntries.some((trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).tagGrounding);
|
|
2244
|
+
const sourceContextGrounding = relevantEntries.some(
|
|
2245
|
+
(trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).sourceContextGrounding
|
|
2246
|
+
);
|
|
2247
|
+
const localGrounding = tagGrounding || sourceContextGrounding;
|
|
2248
|
+
const lexicalAlignment = inspectCandidateLexicalAlignment(entry, normalized.entity, normalized.attribute);
|
|
2249
|
+
const templateSupport = matchesConservativeTemplateSupport(entry, normalized.attribute);
|
|
2250
|
+
const stableSlotSupport = matchesStableFamilySlotSupport(normalized.attribute);
|
|
2251
|
+
const trustedExactReuse = exactReuseEntries.length > 0 && (groundedExactReuseEntries.length > 0 || exactReuseEntries.every((candidate) => candidate.id.startsWith("example:")));
|
|
2252
|
+
const trustedEntityFamilyReuse = groundedFamilyReuseEntries.length > 0 || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:"));
|
|
2253
|
+
const promotionSupport = resolveClaimKeyPromotionSupport({
|
|
2254
|
+
exactReuseCount: trustedExactReuse ? Math.max(1, groundedExactReuseEntries.length) : 0,
|
|
2255
|
+
familyReuseCount: familyReuseEntries.length,
|
|
2256
|
+
groundedFamilyReuseCount: groundedFamilyReuseEntries.length > 0 ? groundedFamilyReuseEntries.length : familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")) ? familyReuseEntries.length : 0,
|
|
2257
|
+
localGrounding: localGrounding || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")),
|
|
2258
|
+
templateSupport,
|
|
2259
|
+
stableSlotSupport,
|
|
2260
|
+
lexicalAlignment
|
|
2261
|
+
});
|
|
2262
|
+
const supportedProposal = lexicalAlignment.any && (templateSupport || stableSlotSupport || trustedExactReuse || trustedEntityFamilyReuse || localGrounding);
|
|
2263
|
+
const supportEvidence = [
|
|
2264
|
+
trustedExactReuse ? "trusted_exact_reuse" : null,
|
|
2265
|
+
trustedEntityFamilyReuse ? "trusted_entity_family_reuse" : null,
|
|
2266
|
+
tagGrounding ? "tag_grounding" : null,
|
|
2267
|
+
sourceContextGrounding ? "source_context_grounding" : null,
|
|
2268
|
+
lexicalAlignment.entity ? "entity_lexical_alignment" : null,
|
|
2269
|
+
lexicalAlignment.attribute ? "attribute_lexical_alignment" : null,
|
|
2270
|
+
lexicalAlignment.strongEntityAttribute ? "strong_entity_attribute_lexical_alignment" : null,
|
|
2271
|
+
templateSupport ? "template_support" : null,
|
|
2272
|
+
stableSlotSupport ? "stable_slot_support" : null,
|
|
2273
|
+
promotionSupport.relaxedStableSlotFamilyGate ? "single_grounded_family_sibling" : null
|
|
2274
|
+
].filter((value) => value !== null);
|
|
2275
|
+
const rationaleFragments = [
|
|
2276
|
+
trustedExactReuse ? `trusted exact reuse from ${Math.max(1, groundedExactReuseEntries.length)} matching entr${Math.max(1, groundedExactReuseEntries.length) === 1 ? "y" : "ies"}` : null,
|
|
2277
|
+
trustedEntityFamilyReuse ? `trusted ${normalized.entity} family reuse from ${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length)} supporting entr${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length) === 1 ? "y" : "ies"}` : null,
|
|
2278
|
+
tagGrounding ? "overlapping tags with trusted corpus entries" : null,
|
|
2279
|
+
sourceContextGrounding ? "overlapping source_context with trusted corpus entries" : null,
|
|
2280
|
+
lexicalAlignment.strongEntityAttribute ? "strong entity and slot lexical alignment" : null,
|
|
2281
|
+
lexicalAlignment.attribute ? lexicalAlignment.strongEntityAttribute ? null : "clear lexical alignment to the proposed slot" : lexicalAlignment.entity ? "clear lexical alignment to the proposed entity" : null,
|
|
2282
|
+
templateSupport ? "a conservative policy/default/source-of-truth template match" : null,
|
|
2283
|
+
stableSlotSupport ? "a stable compact slot head in a well-established entity family" : null,
|
|
2284
|
+
promotionSupport.relaxedStableSlotFamilyGate ? "one grounded family sibling cleared the stable-slot family gate" : null
|
|
2285
|
+
].filter((value) => value !== null);
|
|
2286
|
+
return {
|
|
2287
|
+
autoApplyClass: promotionSupport.autoApplyClass,
|
|
2288
|
+
supportedProposal,
|
|
2289
|
+
trustedExactReuse,
|
|
2290
|
+
trustedEntityFamilyReuse,
|
|
2291
|
+
tagGrounding,
|
|
2292
|
+
sourceContextGrounding,
|
|
2293
|
+
localGrounding,
|
|
2294
|
+
entityLexicalAlignment: lexicalAlignment.entity,
|
|
2295
|
+
attributeLexicalAlignment: lexicalAlignment.attribute,
|
|
2296
|
+
strongEntityAttributeLexicalAlignment: lexicalAlignment.strongEntityAttribute,
|
|
2297
|
+
lexicalAlignment: lexicalAlignment.any,
|
|
2298
|
+
templateSupport,
|
|
2299
|
+
stableSlotSupport,
|
|
2300
|
+
familyReuseCount: familyReuseEntries.length,
|
|
2301
|
+
groundedFamilyReuseCount: groundedFamilyReuseEntries.length,
|
|
2302
|
+
relaxedStableSlotFamilyGate: promotionSupport.relaxedStableSlotFamilyGate,
|
|
2303
|
+
supportingEntryIds: normalizeStringArray2([
|
|
2304
|
+
...groundedExactReuseEntries.map((candidate) => candidate.id),
|
|
2305
|
+
...groundedFamilyReuseEntries.map((candidate) => candidate.id),
|
|
2306
|
+
...familyReuseEntries.filter((candidate) => candidate.id.startsWith("example:")).map((candidate) => candidate.id)
|
|
2307
|
+
]),
|
|
2308
|
+
supportEvidence,
|
|
2309
|
+
rationaleFragments
|
|
2310
|
+
};
|
|
1354
2311
|
}
|
|
1355
|
-
function
|
|
1356
|
-
return
|
|
2312
|
+
function createEmptyClaimKeySupportEvaluation() {
|
|
2313
|
+
return {
|
|
2314
|
+
autoApplyClass: null,
|
|
2315
|
+
supportedProposal: false,
|
|
2316
|
+
trustedExactReuse: false,
|
|
2317
|
+
trustedEntityFamilyReuse: false,
|
|
2318
|
+
tagGrounding: false,
|
|
2319
|
+
sourceContextGrounding: false,
|
|
2320
|
+
localGrounding: false,
|
|
2321
|
+
entityLexicalAlignment: false,
|
|
2322
|
+
attributeLexicalAlignment: false,
|
|
2323
|
+
strongEntityAttributeLexicalAlignment: false,
|
|
2324
|
+
lexicalAlignment: false,
|
|
2325
|
+
templateSupport: false,
|
|
2326
|
+
stableSlotSupport: false,
|
|
2327
|
+
familyReuseCount: 0,
|
|
2328
|
+
groundedFamilyReuseCount: 0,
|
|
2329
|
+
relaxedStableSlotFamilyGate: false,
|
|
2330
|
+
supportingEntryIds: [],
|
|
2331
|
+
supportEvidence: [],
|
|
2332
|
+
rationaleFragments: []
|
|
2333
|
+
};
|
|
1357
2334
|
}
|
|
1358
|
-
function
|
|
1359
|
-
|
|
2335
|
+
function evaluateClaimKeyCompactness(claimKey, prior) {
|
|
2336
|
+
const compacted = compactClaimKey(claimKey);
|
|
2337
|
+
if (!compacted) {
|
|
2338
|
+
return {
|
|
2339
|
+
claimKey,
|
|
2340
|
+
compactedFrom: null,
|
|
2341
|
+
compactionReason: null,
|
|
2342
|
+
compactEnoughForAutoApply: false,
|
|
2343
|
+
blockerReason: "invalid_claim_key"
|
|
2344
|
+
};
|
|
2345
|
+
}
|
|
2346
|
+
const attributeTokens = compacted.attribute.split("_").filter((token) => token.length > 0);
|
|
2347
|
+
const compactEnoughForAutoApply = attributeTokens.length > 0 && attributeTokens.length <= MAX_AUTO_APPLY_ATTRIBUTE_TOKENS && !attributeTokens.some((token) => AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS.has(token));
|
|
2348
|
+
const compactedFrom = compacted.compactedFrom ?? prior?.priorCompactedFrom ?? null;
|
|
2349
|
+
const compactionReason = compacted.reason && prior?.priorCompactionReason ? `${prior.priorCompactionReason} and ${compacted.reason}` : compacted.reason ?? prior?.priorCompactionReason ?? null;
|
|
2350
|
+
return {
|
|
2351
|
+
claimKey: compacted.claimKey,
|
|
2352
|
+
compactedFrom,
|
|
2353
|
+
compactionReason,
|
|
2354
|
+
compactEnoughForAutoApply,
|
|
2355
|
+
blockerReason: compactEnoughForAutoApply ? null : "non_compact_canonical_slot"
|
|
2356
|
+
};
|
|
1360
2357
|
}
|
|
1361
|
-
function
|
|
1362
|
-
return
|
|
2358
|
+
function normalizeGroundingTags(tags) {
|
|
2359
|
+
return normalizeStringArray2((tags ?? []).map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0));
|
|
1363
2360
|
}
|
|
1364
|
-
function
|
|
1365
|
-
|
|
2361
|
+
function tokenizeGroundingText(value) {
|
|
2362
|
+
if (!value) {
|
|
2363
|
+
return [];
|
|
2364
|
+
}
|
|
2365
|
+
return normalizeStringArray2(
|
|
2366
|
+
value.split(/[^a-zA-Z0-9]+/u).map((token) => normalizeClaimKeySegment(token)).filter((token) => token.length > 2 && !GROUNDING_STOP_TOKENS.has(token))
|
|
2367
|
+
);
|
|
1366
2368
|
}
|
|
1367
|
-
function
|
|
1368
|
-
return
|
|
2369
|
+
function buildEntryLocalLexicalTokens(entry) {
|
|
2370
|
+
return normalizeStringArray2([
|
|
2371
|
+
...tokenizeGroundingText(entry.subject),
|
|
2372
|
+
...tokenizeGroundingText(entry.content),
|
|
2373
|
+
...tokenizeGroundingText(entry.source_context),
|
|
2374
|
+
...normalizeGroundingTags(entry.tags)
|
|
2375
|
+
]);
|
|
1369
2376
|
}
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
import { randomUUID } from "crypto";
|
|
1373
|
-
|
|
1374
|
-
// src/core/supersession.ts
|
|
1375
|
-
function validateSupersessionRules(oldEntry, newEntry) {
|
|
1376
|
-
if (oldEntry.type !== newEntry.type) {
|
|
2377
|
+
function resolveClaimKeyPromotionSupport(input) {
|
|
2378
|
+
if (input.exactReuseCount > 0 && (input.lexicalAlignment.attribute || input.templateSupport)) {
|
|
1377
2379
|
return {
|
|
1378
|
-
|
|
1379
|
-
|
|
2380
|
+
autoApplyClass: "trusted_exact_reuse_grounded",
|
|
2381
|
+
relaxedStableSlotFamilyGate: false
|
|
1380
2382
|
};
|
|
1381
2383
|
}
|
|
1382
|
-
if (
|
|
2384
|
+
if (input.templateSupport && input.localGrounding && input.familyReuseCount > 0 && (input.lexicalAlignment.attribute || input.lexicalAlignment.entity)) {
|
|
1383
2385
|
return {
|
|
1384
|
-
|
|
1385
|
-
|
|
2386
|
+
autoApplyClass: "trusted_family_template_grounded",
|
|
2387
|
+
relaxedStableSlotFamilyGate: false
|
|
1386
2388
|
};
|
|
1387
2389
|
}
|
|
1388
|
-
|
|
2390
|
+
const relaxedStableSlotFamilyGate = input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && input.familyReuseCount === 1 && input.lexicalAlignment.strongEntityAttribute;
|
|
2391
|
+
if (input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && (input.familyReuseCount >= 2 || relaxedStableSlotFamilyGate) && input.lexicalAlignment.attribute) {
|
|
1389
2392
|
return {
|
|
1390
|
-
|
|
1391
|
-
|
|
2393
|
+
autoApplyClass: "trusted_family_stable_slot",
|
|
2394
|
+
relaxedStableSlotFamilyGate
|
|
2395
|
+
};
|
|
2396
|
+
}
|
|
2397
|
+
if (input.localGrounding && input.groundedFamilyReuseCount > 0 && input.lexicalAlignment.strongEntityAttribute) {
|
|
2398
|
+
return {
|
|
2399
|
+
autoApplyClass: "trusted_family_grounded_alignment",
|
|
2400
|
+
relaxedStableSlotFamilyGate: false
|
|
1392
2401
|
};
|
|
1393
2402
|
}
|
|
1394
2403
|
return {
|
|
1395
|
-
|
|
2404
|
+
autoApplyClass: null,
|
|
2405
|
+
relaxedStableSlotFamilyGate: false
|
|
1396
2406
|
};
|
|
1397
2407
|
}
|
|
1398
|
-
function
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
2408
|
+
function inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry) {
|
|
2409
|
+
return {
|
|
2410
|
+
tagGrounding: countSetOverlap(entryTagSet, trustedEntry.tags) > 0,
|
|
2411
|
+
sourceContextGrounding: countSetOverlap(entrySourceTokens, trustedEntry.sourceContextTokens) > 0
|
|
2412
|
+
};
|
|
2413
|
+
}
|
|
2414
|
+
function inspectCandidateLexicalAlignment(entry, entity, attribute) {
|
|
2415
|
+
const lexicalTokens = new Set(buildEntryLocalLexicalTokens(entry));
|
|
2416
|
+
const entityTokens = entity.split("_").filter((token) => token.length > 0);
|
|
2417
|
+
const attributeTokens = attribute.split("_").filter((token) => token.length > 0 && !GROUNDING_STOP_TOKENS.has(token));
|
|
2418
|
+
const entityOverlapCount = countSetOverlap(lexicalTokens, entityTokens);
|
|
2419
|
+
const attributeOverlapCount = countSetOverlap(lexicalTokens, attributeTokens);
|
|
2420
|
+
const entityAlignment = entityOverlapCount > 0;
|
|
2421
|
+
const attributeAlignment = attributeOverlapCount > 0;
|
|
2422
|
+
const strongAttributeAlignment = attributeTokens.length > 0 && attributeOverlapCount >= Math.min(attributeTokens.length, 2);
|
|
2423
|
+
return {
|
|
2424
|
+
entity: entityAlignment,
|
|
2425
|
+
attribute: attributeAlignment,
|
|
2426
|
+
any: entityAlignment || attributeAlignment,
|
|
2427
|
+
strongEntityAttribute: entityAlignment && strongAttributeAlignment,
|
|
2428
|
+
entityOverlapCount,
|
|
2429
|
+
attributeOverlapCount
|
|
2430
|
+
};
|
|
2431
|
+
}
|
|
2432
|
+
function matchesConservativeTemplateSupport(entry, attribute) {
|
|
2433
|
+
const attributeTokens = new Set(attribute.split("_").filter((token) => token.length > 0));
|
|
2434
|
+
const subjectText = entry.subject.toLowerCase();
|
|
2435
|
+
const contentText = entry.content.toLowerCase();
|
|
2436
|
+
const combinedText = `${subjectText}
|
|
2437
|
+
${contentText}`;
|
|
2438
|
+
const authoritativePattern = /\b(authoritative|source of truth|source of record|canonical guide|canonical reference|primary guide|runbook)\b/u.test(
|
|
2439
|
+
combinedText
|
|
2440
|
+
);
|
|
2441
|
+
if (authoritativePattern && intersects(attributeTokens, AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS)) {
|
|
2442
|
+
return true;
|
|
2443
|
+
}
|
|
2444
|
+
const policyPattern = /\b(should|must|should stay|must stay|always|never|default(?:s)? to|default(?:s)?|policy|guardrail|required|preference|prefers?)\b/u.test(combinedText);
|
|
2445
|
+
if (policyPattern && intersects(attributeTokens, POLICY_TEMPLATE_ATTRIBUTE_TOKENS)) {
|
|
2446
|
+
return true;
|
|
2447
|
+
}
|
|
2448
|
+
const architecturePattern = /\b(uses|supports|backed by|architecture|boundary|workflow|process|pipeline|adapter|layer|contract|interface|surface)\b/u.test(
|
|
2449
|
+
combinedText
|
|
2450
|
+
);
|
|
2451
|
+
return architecturePattern && intersects(attributeTokens, ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS);
|
|
2452
|
+
}
|
|
2453
|
+
function matchesStableFamilySlotSupport(attribute) {
|
|
2454
|
+
const tokens = attribute.split("_").filter((token) => token.length > 0);
|
|
2455
|
+
if (tokens.length === 0 || tokens.length > MAX_AUTO_APPLY_ATTRIBUTE_TOKENS) {
|
|
2456
|
+
return false;
|
|
2457
|
+
}
|
|
2458
|
+
const head = tokens[tokens.length - 1];
|
|
2459
|
+
return typeof head === "string" && STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS.has(head);
|
|
2460
|
+
}
|
|
2461
|
+
function countSetOverlap(left, right) {
|
|
2462
|
+
let count = 0;
|
|
2463
|
+
for (const value of right) {
|
|
2464
|
+
if (left.has(value)) {
|
|
2465
|
+
count += 1;
|
|
2466
|
+
}
|
|
2467
|
+
}
|
|
2468
|
+
return count;
|
|
2469
|
+
}
|
|
2470
|
+
function intersects(left, right) {
|
|
2471
|
+
for (const value of left) {
|
|
2472
|
+
if (right.has(value)) {
|
|
2473
|
+
return true;
|
|
2474
|
+
}
|
|
2475
|
+
}
|
|
2476
|
+
return false;
|
|
2477
|
+
}
|
|
2478
|
+
function normalizeStringArray2(values) {
|
|
2479
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2480
|
+
const normalized = [];
|
|
2481
|
+
for (const value of values) {
|
|
2482
|
+
if (!value || seen.has(value)) {
|
|
2483
|
+
continue;
|
|
2484
|
+
}
|
|
2485
|
+
seen.add(value);
|
|
2486
|
+
normalized.push(value);
|
|
1406
2487
|
}
|
|
2488
|
+
return normalized;
|
|
1407
2489
|
}
|
|
1408
2490
|
|
|
1409
2491
|
// src/core/store/claim-extraction.ts
|
|
1410
|
-
var
|
|
2492
|
+
var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "the_user", "myself", "user", "we", "our_team", "the_project", "this_project"]);
|
|
1411
2493
|
var USER_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user"]);
|
|
1412
2494
|
var PROJECT_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["the_project", "this_project"]);
|
|
1413
2495
|
var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
@@ -1443,7 +2525,20 @@ var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
|
1443
2525
|
]);
|
|
1444
2526
|
var MAX_ENTITY_HINTS = 12;
|
|
1445
2527
|
var MAX_CLAIM_KEY_EXAMPLES = 8;
|
|
2528
|
+
var MAX_SUPPORT_CLAIM_KEY_EXAMPLES = 128;
|
|
1446
2529
|
var DEFAULT_REPAIR_CONFIDENCE = 0.86;
|
|
2530
|
+
var HIGH_CONFIDENCE_BACKFILL_THRESHOLD = 0.92;
|
|
2531
|
+
var SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.72;
|
|
2532
|
+
var COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.74;
|
|
2533
|
+
var PROPOSAL_CONFIDENCE_THRESHOLD = 0.75;
|
|
2534
|
+
var SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD = 0.65;
|
|
2535
|
+
function applyClaimExtractionResultToEntry(entry, extracted) {
|
|
2536
|
+
const lifecycle = buildExtractedClaimKeyLifecycle(extracted, buildInferredIngestClaimKeySupportContext(entry));
|
|
2537
|
+
if (!lifecycle) {
|
|
2538
|
+
return;
|
|
2539
|
+
}
|
|
2540
|
+
applyClaimKeyLifecycle(entry, lifecycle);
|
|
2541
|
+
}
|
|
1447
2542
|
async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
|
|
1448
2543
|
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
1449
2544
|
return null;
|
|
@@ -1487,64 +2582,296 @@ async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
|
|
|
1487
2582
|
options.onPreviewOutcome?.(buildPreviewOutcome("rejected_candidate", attempt));
|
|
1488
2583
|
return tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1489
2584
|
}
|
|
1490
|
-
async function
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
2585
|
+
async function extractClaimKeyDecision(entry, llm, config, options = {}) {
|
|
2586
|
+
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
2587
|
+
return {
|
|
2588
|
+
result: null,
|
|
2589
|
+
diagnostic: {
|
|
2590
|
+
outcome: "ineligible_type",
|
|
2591
|
+
confidence: null,
|
|
2592
|
+
path: null,
|
|
2593
|
+
warning: null,
|
|
2594
|
+
suggestedClaimKey: null,
|
|
2595
|
+
reviewable: false,
|
|
2596
|
+
supportEvidence: [],
|
|
2597
|
+
rationale: "entry type is not eligible for claim-key extraction"
|
|
2598
|
+
}
|
|
2599
|
+
};
|
|
1494
2600
|
}
|
|
1495
|
-
|
|
1496
|
-
|
|
2601
|
+
const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
|
|
2602
|
+
let attempt;
|
|
2603
|
+
try {
|
|
2604
|
+
attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
|
|
2605
|
+
} catch (error) {
|
|
2606
|
+
const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2607
|
+
if (repaired2) {
|
|
2608
|
+
return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
|
|
2609
|
+
}
|
|
2610
|
+
const warning = formatClaimExtractionError(error);
|
|
2611
|
+
options.onWarning?.(`Claim extraction failed for "${entry.subject}": ${warning}`);
|
|
2612
|
+
return {
|
|
2613
|
+
result: null,
|
|
2614
|
+
diagnostic: {
|
|
2615
|
+
outcome: "extraction_failure",
|
|
2616
|
+
confidence: null,
|
|
2617
|
+
path: null,
|
|
2618
|
+
warning,
|
|
2619
|
+
suggestedClaimKey: null,
|
|
2620
|
+
reviewable: false,
|
|
2621
|
+
supportEvidence: [],
|
|
2622
|
+
rationale: "claim extraction failed before a safe candidate could be produced"
|
|
2623
|
+
}
|
|
2624
|
+
};
|
|
1497
2625
|
}
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
2626
|
+
if (attempt.response.no_claim === true) {
|
|
2627
|
+
return {
|
|
2628
|
+
result: null,
|
|
2629
|
+
diagnostic: {
|
|
2630
|
+
outcome: "no_claim",
|
|
2631
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
2632
|
+
path: attempt.path,
|
|
2633
|
+
warning: null,
|
|
2634
|
+
suggestedClaimKey: null,
|
|
2635
|
+
reviewable: false,
|
|
2636
|
+
supportEvidence: [],
|
|
2637
|
+
rationale: "model explicitly returned no_claim"
|
|
2638
|
+
}
|
|
2639
|
+
};
|
|
1501
2640
|
}
|
|
1502
|
-
|
|
2641
|
+
const warnings = [];
|
|
2642
|
+
const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, (warning) => {
|
|
2643
|
+
warnings.push(warning);
|
|
2644
|
+
options.onWarning?.(warning);
|
|
2645
|
+
});
|
|
2646
|
+
if (!candidate) {
|
|
2647
|
+
const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2648
|
+
if (repaired2) {
|
|
2649
|
+
return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
|
|
2650
|
+
}
|
|
2651
|
+
return {
|
|
2652
|
+
result: null,
|
|
2653
|
+
diagnostic: {
|
|
2654
|
+
outcome: "rejected_candidate",
|
|
2655
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
2656
|
+
path: attempt.path,
|
|
2657
|
+
warning: warnings[0] ?? null,
|
|
2658
|
+
suggestedClaimKey: null,
|
|
2659
|
+
reviewable: false,
|
|
2660
|
+
supportEvidence: [],
|
|
2661
|
+
rationale: "model proposed a structurally unsafe or non-canonical claim key"
|
|
2662
|
+
}
|
|
2663
|
+
};
|
|
2664
|
+
}
|
|
2665
|
+
const result = toClaimExtractionResult(candidate, attempt.path);
|
|
2666
|
+
if (result.confidence >= config.confidenceThreshold) {
|
|
2667
|
+
return {
|
|
2668
|
+
result,
|
|
2669
|
+
diagnostic: buildAcceptedDiagnostic(result, result.confidence >= config.confidenceThreshold ? "candidate met the ingest confidence threshold" : null)
|
|
2670
|
+
};
|
|
2671
|
+
}
|
|
2672
|
+
const support = evaluateClaimKeySupport(
|
|
2673
|
+
{
|
|
2674
|
+
subject: entry.subject,
|
|
2675
|
+
content: entry.content,
|
|
2676
|
+
type: entry.type,
|
|
2677
|
+
tags: entry.tags,
|
|
2678
|
+
source_context: entry.source_context
|
|
2679
|
+
},
|
|
2680
|
+
result.claimKey ?? "",
|
|
2681
|
+
buildClaimKeySupportSeedFromExamples(options.supportClaimKeys ?? [])
|
|
2682
|
+
);
|
|
2683
|
+
const compactness = evaluateClaimKeyCompactness(result.claimKey ?? "", {
|
|
2684
|
+
priorCompactedFrom: result.compactedFrom ?? null,
|
|
2685
|
+
priorCompactionReason: result.compactionReason ?? null
|
|
2686
|
+
});
|
|
2687
|
+
const autoApplyThreshold = support.autoApplyClass !== null && compactness.compactedFrom ? COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : support.autoApplyClass !== null ? SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : HIGH_CONFIDENCE_BACKFILL_THRESHOLD;
|
|
2688
|
+
const proposalThreshold = support.supportedProposal ? SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD : PROPOSAL_CONFIDENCE_THRESHOLD;
|
|
2689
|
+
if (compactness.claimKey !== result.claimKey) {
|
|
2690
|
+
result.claimKey = compactness.claimKey;
|
|
2691
|
+
result.compactedFrom = compactness.compactedFrom;
|
|
2692
|
+
result.compactionReason = compactness.compactionReason;
|
|
2693
|
+
}
|
|
2694
|
+
if (result.confidence >= autoApplyThreshold && compactness.compactEnoughForAutoApply) {
|
|
2695
|
+
result.acceptanceRationale = support.autoApplyClass !== null ? `accepted below the default threshold via ${describeSupportPromotionClass(support)}` : "accepted as a high-confidence preview";
|
|
2696
|
+
return {
|
|
2697
|
+
result,
|
|
2698
|
+
diagnostic: buildAcceptedDiagnostic(
|
|
2699
|
+
result,
|
|
2700
|
+
support.autoApplyClass !== null ? `supported near-miss candidate cleared the conservative auto-apply threshold via ${describeSupportPromotionClass(support)}` : `candidate cleared the conservative high-confidence threshold of ${autoApplyThreshold.toFixed(2)}`
|
|
2701
|
+
)
|
|
2702
|
+
};
|
|
2703
|
+
}
|
|
2704
|
+
const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2705
|
+
if (repaired && (!result.claimKey || repaired.claimKey === result.claimKey)) {
|
|
2706
|
+
return finalizeDeterministicRepairDecision(repaired, options.entityPrefixStats);
|
|
2707
|
+
}
|
|
2708
|
+
if (result.confidence >= proposalThreshold) {
|
|
2709
|
+
return {
|
|
2710
|
+
result: null,
|
|
2711
|
+
diagnostic: {
|
|
2712
|
+
outcome: "low_confidence_candidate",
|
|
2713
|
+
confidence: result.confidence,
|
|
2714
|
+
path: result.path,
|
|
2715
|
+
warning: warnings[0] ?? null,
|
|
2716
|
+
suggestedClaimKey: result.claimKey,
|
|
2717
|
+
reviewable: true,
|
|
2718
|
+
supportEvidence: support.supportEvidence,
|
|
2719
|
+
rationale: support.rationaleFragments.length > 0 ? `candidate stayed below the auto-apply threshold but has structured support from ${support.rationaleFragments.join(", ")}` : `candidate stayed below the auto-apply threshold of ${autoApplyThreshold.toFixed(2)}`
|
|
2720
|
+
}
|
|
2721
|
+
};
|
|
2722
|
+
}
|
|
2723
|
+
return {
|
|
2724
|
+
result: null,
|
|
2725
|
+
diagnostic: {
|
|
2726
|
+
outcome: "low_confidence_candidate",
|
|
2727
|
+
confidence: result.confidence,
|
|
2728
|
+
path: result.path,
|
|
2729
|
+
warning: warnings[0] ?? null,
|
|
2730
|
+
suggestedClaimKey: result.claimKey,
|
|
2731
|
+
reviewable: false,
|
|
2732
|
+
supportEvidence: support.supportEvidence,
|
|
2733
|
+
rationale: "candidate stayed below both the conservative auto-apply and review thresholds"
|
|
2734
|
+
}
|
|
2735
|
+
};
|
|
1503
2736
|
}
|
|
1504
2737
|
async function getEntityHints(db) {
|
|
1505
2738
|
return db.getDistinctClaimKeyPrefixes();
|
|
1506
2739
|
}
|
|
1507
|
-
async function runBatchClaimExtraction(results, ports, config,
|
|
2740
|
+
async function runBatchClaimExtraction(results, ports, config, concurrency = 10, onWarning, onDiagnostic) {
|
|
1508
2741
|
if (!config.enabled) {
|
|
1509
2742
|
return /* @__PURE__ */ new Map();
|
|
1510
2743
|
}
|
|
1511
2744
|
const hintState = await loadClaimExtractionHintState(ports.db);
|
|
1512
2745
|
const llm = ports.createLlm();
|
|
1513
2746
|
const extractedEntries = /* @__PURE__ */ new Map();
|
|
1514
|
-
|
|
1515
|
-
|
|
2747
|
+
const diagnostics = /* @__PURE__ */ new Map();
|
|
2748
|
+
const retryEntries = [];
|
|
2749
|
+
const stageSize = normalizeClaimExtractionConcurrency(concurrency);
|
|
2750
|
+
const orderedEntries = results.flatMap((result) => result.entries);
|
|
2751
|
+
for (let stageStart = 0; stageStart < orderedEntries.length; stageStart += stageSize) {
|
|
2752
|
+
const stageEntries = orderedEntries.slice(stageStart, stageStart + stageSize);
|
|
2753
|
+
const stageRequests = [];
|
|
2754
|
+
for (const entry of stageEntries) {
|
|
1516
2755
|
if (entry.claim_key) {
|
|
1517
2756
|
recordClaimKeyHint(hintState, entry.claim_key);
|
|
1518
2757
|
continue;
|
|
1519
2758
|
}
|
|
1520
2759
|
if (!config.eligibleTypes.includes(entry.type)) {
|
|
2760
|
+
diagnostics.set(entry, {
|
|
2761
|
+
outcome: "ineligible_type",
|
|
2762
|
+
confidence: null,
|
|
2763
|
+
path: null,
|
|
2764
|
+
warning: null,
|
|
2765
|
+
suggestedClaimKey: null,
|
|
2766
|
+
reviewable: false,
|
|
2767
|
+
supportEvidence: [],
|
|
2768
|
+
rationale: "entry type is not eligible for claim-key extraction"
|
|
2769
|
+
});
|
|
1521
2770
|
continue;
|
|
1522
2771
|
}
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
2772
|
+
stageRequests.push({
|
|
2773
|
+
entry,
|
|
2774
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
2775
|
+
});
|
|
2776
|
+
}
|
|
2777
|
+
const stageDecisions = await Promise.all(
|
|
2778
|
+
stageRequests.map(async ({ entry, hintSnapshot }) => ({
|
|
2779
|
+
entry,
|
|
2780
|
+
decision: await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning)
|
|
2781
|
+
}))
|
|
2782
|
+
);
|
|
2783
|
+
for (const { entry, decision } of stageDecisions) {
|
|
2784
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
2785
|
+
if (decision.result?.claimKey) {
|
|
2786
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2787
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2788
|
+
extractedEntries.set(entry, decision.result);
|
|
2789
|
+
continue;
|
|
2790
|
+
}
|
|
2791
|
+
retryEntries.push(entry);
|
|
2792
|
+
}
|
|
2793
|
+
}
|
|
2794
|
+
if (retryEntries.length > 0 && extractedEntries.size > 0) {
|
|
2795
|
+
for (let stageStart = 0; stageStart < retryEntries.length; stageStart += stageSize) {
|
|
2796
|
+
const stageRequests = retryEntries.slice(stageStart, stageStart + stageSize).filter((entry) => !entry.claim_key).map((entry) => ({
|
|
2797
|
+
entry,
|
|
2798
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
2799
|
+
}));
|
|
2800
|
+
const stageDecisions = await Promise.all(
|
|
2801
|
+
stageRequests.map(async ({ entry, hintSnapshot }) => ({
|
|
2802
|
+
entry,
|
|
2803
|
+
decision: await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning)
|
|
2804
|
+
}))
|
|
2805
|
+
);
|
|
2806
|
+
for (const { entry, decision } of stageDecisions) {
|
|
2807
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
2808
|
+
if (!decision.result?.claimKey) {
|
|
2809
|
+
continue;
|
|
1541
2810
|
}
|
|
1542
|
-
|
|
2811
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2812
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2813
|
+
extractedEntries.set(entry, decision.result);
|
|
2814
|
+
}
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
for (const result of results) {
|
|
2818
|
+
for (const entry of result.entries) {
|
|
2819
|
+
const diagnostic = diagnostics.get(entry);
|
|
2820
|
+
if (diagnostic) {
|
|
2821
|
+
onDiagnostic?.(entry, diagnostic);
|
|
1543
2822
|
}
|
|
1544
2823
|
}
|
|
1545
2824
|
}
|
|
1546
2825
|
return extractedEntries;
|
|
1547
2826
|
}
|
|
2827
|
+
function normalizeClaimExtractionConcurrency(value) {
|
|
2828
|
+
if (!Number.isInteger(value) || value <= 0) {
|
|
2829
|
+
return 10;
|
|
2830
|
+
}
|
|
2831
|
+
return value;
|
|
2832
|
+
}
|
|
2833
|
+
function buildClaimExtractionHintSnapshot(hintState, entry) {
|
|
2834
|
+
return {
|
|
2835
|
+
hints: buildEntryHints(hintState, entry),
|
|
2836
|
+
supportClaimKeys: [...hintState.supportClaimKeys],
|
|
2837
|
+
entityPrefixStats: hintState.entityPrefixStats
|
|
2838
|
+
};
|
|
2839
|
+
}
|
|
2840
|
+
async function extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning) {
|
|
2841
|
+
try {
|
|
2842
|
+
return await extractClaimKeyDecision(
|
|
2843
|
+
{
|
|
2844
|
+
type: entry.type,
|
|
2845
|
+
subject: entry.subject,
|
|
2846
|
+
content: entry.content,
|
|
2847
|
+
tags: entry.tags,
|
|
2848
|
+
source_context: entry.source_context
|
|
2849
|
+
},
|
|
2850
|
+
llm,
|
|
2851
|
+
config,
|
|
2852
|
+
{
|
|
2853
|
+
hints: hintSnapshot.hints,
|
|
2854
|
+
onWarning,
|
|
2855
|
+
supportClaimKeys: hintSnapshot.supportClaimKeys,
|
|
2856
|
+
entityPrefixStats: hintSnapshot.entityPrefixStats
|
|
2857
|
+
}
|
|
2858
|
+
);
|
|
2859
|
+
} catch {
|
|
2860
|
+
return {
|
|
2861
|
+
result: null,
|
|
2862
|
+
diagnostic: {
|
|
2863
|
+
outcome: "extraction_failure",
|
|
2864
|
+
confidence: null,
|
|
2865
|
+
path: null,
|
|
2866
|
+
warning: "claim extraction failed unexpectedly",
|
|
2867
|
+
suggestedClaimKey: null,
|
|
2868
|
+
reviewable: false,
|
|
2869
|
+
supportEvidence: [],
|
|
2870
|
+
rationale: "claim extraction failed unexpectedly"
|
|
2871
|
+
}
|
|
2872
|
+
};
|
|
2873
|
+
}
|
|
2874
|
+
}
|
|
1548
2875
|
function buildClaimExtractionSystemPrompt(hints, promptMode) {
|
|
1549
2876
|
const metadataHints = [hints.userEntity ? `user_id=${hints.userEntity}` : null, hints.projectEntity ? `project=${hints.projectEntity}` : null].filter(
|
|
1550
2877
|
(value) => value !== null
|
|
@@ -1595,6 +2922,9 @@ function buildClaimExtractionSystemPrompt(hints, promptMode) {
|
|
|
1595
2922
|
'- "Agenr keeps pure logic in src/core and adapters outside it so future hosts can plug in cleanly." -> agenr/core_adapter_boundary',
|
|
1596
2923
|
'- "The before-prompt-build hook only triggers after a real agent turn or message." -> before_prompt_build_hook/trigger_condition',
|
|
1597
2924
|
'- "Durable memory preserves context across sessions." -> durable_memory/context_preservation',
|
|
2925
|
+
'- "SQLite in this environment supports window functions." -> sqlite/window_function_support',
|
|
2926
|
+
'- "Meeting-recorder transcripts need manual cleanup before durable ingest." -> meeting_recorder/transcript_cleanup_workflow',
|
|
2927
|
+
'- "Reflection synthesis can hallucinate when it summarizes from partial notes." -> reflection_synthesis/hallucination_risk',
|
|
1598
2928
|
"",
|
|
1599
2929
|
"Negative examples:",
|
|
1600
2930
|
"- Bad: jim/america_chicago -> Good: jim/timezone",
|
|
@@ -1675,6 +3005,127 @@ function buildClaimExtractionCandidate(entry, response, hints, onWarning) {
|
|
|
1675
3005
|
compactionReason: compactedClaimKey.reason
|
|
1676
3006
|
};
|
|
1677
3007
|
}
|
|
3008
|
+
function toClaimExtractionResult(candidate, path4) {
|
|
3009
|
+
return {
|
|
3010
|
+
claimKey: candidate.claimKey,
|
|
3011
|
+
confidence: candidate.confidence,
|
|
3012
|
+
rawEntity: candidate.rawEntity,
|
|
3013
|
+
rawAttribute: candidate.rawAttribute,
|
|
3014
|
+
path: path4,
|
|
3015
|
+
...candidate.compactedFrom ? {
|
|
3016
|
+
compactedFrom: candidate.compactedFrom,
|
|
3017
|
+
compactionReason: candidate.compactionReason
|
|
3018
|
+
} : {}
|
|
3019
|
+
};
|
|
3020
|
+
}
|
|
3021
|
+
function buildAcceptedDiagnostic(result, rationale) {
|
|
3022
|
+
return {
|
|
3023
|
+
outcome: "accepted",
|
|
3024
|
+
confidence: result.confidence,
|
|
3025
|
+
path: result.path,
|
|
3026
|
+
warning: null,
|
|
3027
|
+
suggestedClaimKey: result.claimKey,
|
|
3028
|
+
reviewable: false,
|
|
3029
|
+
supportEvidence: [],
|
|
3030
|
+
rationale
|
|
3031
|
+
};
|
|
3032
|
+
}
|
|
3033
|
+
function finalizeDeterministicRepairDecision(repaired, entityPrefixStats) {
|
|
3034
|
+
const aliasCandidate = findSingletonAliasReuseCandidate(repaired, entityPrefixStats);
|
|
3035
|
+
if (!aliasCandidate) {
|
|
3036
|
+
return {
|
|
3037
|
+
result: repaired,
|
|
3038
|
+
diagnostic: buildAcceptedDiagnostic(repaired, "deterministic possessive-slot repair recovered the missing claim key")
|
|
3039
|
+
};
|
|
3040
|
+
}
|
|
3041
|
+
if (aliasCandidate.canonicalReuseSafe) {
|
|
3042
|
+
const reusedResult = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix);
|
|
3043
|
+
reusedResult.acceptanceRationale = `reused dominant entity family "${aliasCandidate.dominantEntityPrefix}" instead of minting singleton alias "${aliasCandidate.aliasEntityPrefix}"`;
|
|
3044
|
+
return {
|
|
3045
|
+
result: reusedResult,
|
|
3046
|
+
diagnostic: buildAcceptedDiagnostic(
|
|
3047
|
+
reusedResult,
|
|
3048
|
+
`deterministic repair reused dominant family "${aliasCandidate.dominantEntityPrefix}" instead of new singleton alias "${aliasCandidate.aliasEntityPrefix}"`
|
|
3049
|
+
)
|
|
3050
|
+
};
|
|
3051
|
+
}
|
|
3052
|
+
const suggestedClaimKey = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix).claimKey;
|
|
3053
|
+
return {
|
|
3054
|
+
result: null,
|
|
3055
|
+
diagnostic: {
|
|
3056
|
+
outcome: "low_confidence_candidate",
|
|
3057
|
+
confidence: repaired.confidence,
|
|
3058
|
+
path: repaired.path,
|
|
3059
|
+
warning: null,
|
|
3060
|
+
suggestedClaimKey,
|
|
3061
|
+
reviewable: true,
|
|
3062
|
+
supportEvidence: aliasCandidate.evidence.map((evidence) => evidence.kind),
|
|
3063
|
+
rationale: `deterministic repair would create singleton alias "${aliasCandidate.aliasEntityPrefix}" next to dominant trusted family "${aliasCandidate.dominantEntityPrefix}", so the new namespace was staged for review`
|
|
3064
|
+
}
|
|
3065
|
+
};
|
|
3066
|
+
}
|
|
3067
|
+
function findSingletonAliasReuseCandidate(repaired, entityPrefixStats) {
|
|
3068
|
+
const claimKey = repaired.claimKey;
|
|
3069
|
+
if (!claimKey || !entityPrefixStats || entityPrefixStats.length === 0) {
|
|
3070
|
+
return null;
|
|
3071
|
+
}
|
|
3072
|
+
const [entityPrefix = ""] = claimKey.split("/", 1);
|
|
3073
|
+
if (!entityPrefix) {
|
|
3074
|
+
return null;
|
|
3075
|
+
}
|
|
3076
|
+
const augmentedStats = summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix);
|
|
3077
|
+
return detectClaimKeySingletonAliasCandidatesFromStats(augmentedStats).find((candidate) => candidate.aliasEntityPrefix === entityPrefix) ?? null;
|
|
3078
|
+
}
|
|
3079
|
+
function summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix) {
|
|
3080
|
+
const existing = entityPrefixStats.find((profile) => profile.entityPrefix === entityPrefix);
|
|
3081
|
+
if (existing) {
|
|
3082
|
+
return entityPrefixStats;
|
|
3083
|
+
}
|
|
3084
|
+
return [
|
|
3085
|
+
...entityPrefixStats,
|
|
3086
|
+
{
|
|
3087
|
+
entityPrefix,
|
|
3088
|
+
activeEntryCount: 1,
|
|
3089
|
+
trustedEntryCount: 0,
|
|
3090
|
+
tentativeEntryCount: 1,
|
|
3091
|
+
unresolvedEntryCount: 0,
|
|
3092
|
+
legacyEntryCount: 0,
|
|
3093
|
+
deterministicRepairEntryCount: 1,
|
|
3094
|
+
manualEntryCount: 0,
|
|
3095
|
+
modelEntryCount: 0,
|
|
3096
|
+
jsonRetryEntryCount: 0,
|
|
3097
|
+
surgeonFamilyReuseEntryCount: 0
|
|
3098
|
+
}
|
|
3099
|
+
];
|
|
3100
|
+
}
|
|
3101
|
+
function rewriteClaimKeyEntityPrefix(result, entityPrefix) {
|
|
3102
|
+
const claimKey = result.claimKey;
|
|
3103
|
+
if (!claimKey) {
|
|
3104
|
+
return result;
|
|
3105
|
+
}
|
|
3106
|
+
const [, attribute = ""] = claimKey.split("/", 2);
|
|
3107
|
+
return {
|
|
3108
|
+
...result,
|
|
3109
|
+
claimKey: `${entityPrefix}/${attribute}`
|
|
3110
|
+
};
|
|
3111
|
+
}
|
|
3112
|
+
function formatClaimExtractionError(error) {
|
|
3113
|
+
return error instanceof Error ? error.message : String(error);
|
|
3114
|
+
}
|
|
3115
|
+
function describeSupportPromotionClass(support) {
|
|
3116
|
+
switch (support.autoApplyClass) {
|
|
3117
|
+
case "trusted_exact_reuse_grounded":
|
|
3118
|
+
return "trusted exact-key reuse with local grounding";
|
|
3119
|
+
case "trusted_family_template_grounded":
|
|
3120
|
+
return "trusted family reuse plus grounded template support";
|
|
3121
|
+
case "trusted_family_stable_slot":
|
|
3122
|
+
return "trusted family reuse plus a stable compact slot";
|
|
3123
|
+
case "trusted_family_grounded_alignment":
|
|
3124
|
+
return "trusted family reuse plus grounded dual lexical alignment";
|
|
3125
|
+
default:
|
|
3126
|
+
return "structural support";
|
|
3127
|
+
}
|
|
3128
|
+
}
|
|
1678
3129
|
function tryDeterministicClaimKeyRepair(entry, hints) {
|
|
1679
3130
|
const repaired = parsePossessiveClaim(entry.subject) ?? parsePossessiveStatement(entry.content);
|
|
1680
3131
|
if (!repaired) {
|
|
@@ -1702,24 +3153,38 @@ function tryDeterministicClaimKeyRepair(entry, hints) {
|
|
|
1702
3153
|
};
|
|
1703
3154
|
}
|
|
1704
3155
|
async function loadClaimExtractionHintState(db) {
|
|
1705
|
-
const [entityHintResult,
|
|
3156
|
+
const [entityHintResult, promptClaimKeyExampleResult, supportClaimKeyExampleResult, entityPrefixStatsResult] = await Promise.allSettled([
|
|
3157
|
+
getEntityHints(db),
|
|
3158
|
+
getClaimKeyExamples(db, MAX_CLAIM_KEY_EXAMPLES),
|
|
3159
|
+
getClaimKeyExamples(db, MAX_SUPPORT_CLAIM_KEY_EXAMPLES),
|
|
3160
|
+
getClaimKeyEntityPrefixStats(db)
|
|
3161
|
+
]);
|
|
1706
3162
|
return createHintState({
|
|
1707
3163
|
entityHints: entityHintResult.status === "fulfilled" ? entityHintResult.value : [],
|
|
1708
|
-
claimKeyExamples:
|
|
3164
|
+
claimKeyExamples: promptClaimKeyExampleResult.status === "fulfilled" ? promptClaimKeyExampleResult.value : [],
|
|
3165
|
+
supportClaimKeys: supportClaimKeyExampleResult.status === "fulfilled" ? supportClaimKeyExampleResult.value : [],
|
|
3166
|
+
entityPrefixStats: entityPrefixStatsResult.status === "fulfilled" ? entityPrefixStatsResult.value : []
|
|
1709
3167
|
});
|
|
1710
3168
|
}
|
|
1711
|
-
async function getClaimKeyExamples(db) {
|
|
3169
|
+
async function getClaimKeyExamples(db, limit) {
|
|
1712
3170
|
if (typeof db.getClaimKeyExamples !== "function") {
|
|
1713
3171
|
return [];
|
|
1714
3172
|
}
|
|
1715
|
-
return db.getClaimKeyExamples(
|
|
3173
|
+
return db.getClaimKeyExamples(limit);
|
|
3174
|
+
}
|
|
3175
|
+
async function getClaimKeyEntityPrefixStats(db) {
|
|
3176
|
+
if (typeof db.getClaimKeyEntityPrefixStats !== "function") {
|
|
3177
|
+
return [];
|
|
3178
|
+
}
|
|
3179
|
+
return db.getClaimKeyEntityPrefixStats();
|
|
1716
3180
|
}
|
|
1717
3181
|
function createHintState(input) {
|
|
1718
3182
|
const claimKeyExamples = normalizeClaimKeyExamples(input.claimKeyExamples ?? []);
|
|
3183
|
+
const supportClaimKeys = normalizeSupportClaimKeys(input.supportClaimKeys ?? []);
|
|
1719
3184
|
const entityHints = limitUnique(
|
|
1720
3185
|
[
|
|
1721
3186
|
...normalizeEntityHints(input.entityHints ?? []),
|
|
1722
|
-
...
|
|
3187
|
+
...supportClaimKeys.flatMap((claimKey) => {
|
|
1723
3188
|
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
1724
3189
|
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
1725
3190
|
})
|
|
@@ -1728,7 +3193,9 @@ function createHintState(input) {
|
|
|
1728
3193
|
);
|
|
1729
3194
|
return {
|
|
1730
3195
|
entityHints,
|
|
1731
|
-
claimKeyExamples
|
|
3196
|
+
claimKeyExamples,
|
|
3197
|
+
supportClaimKeys,
|
|
3198
|
+
entityPrefixStats: input.entityPrefixStats ?? []
|
|
1732
3199
|
};
|
|
1733
3200
|
}
|
|
1734
3201
|
function buildEntryHints(state, entry) {
|
|
@@ -1747,6 +3214,7 @@ function recordClaimKeyHint(state, claimKey) {
|
|
|
1747
3214
|
return;
|
|
1748
3215
|
}
|
|
1749
3216
|
state.claimKeyExamples = prependUnique(state.claimKeyExamples, normalizedClaimKey.value.claimKey, MAX_CLAIM_KEY_EXAMPLES);
|
|
3217
|
+
state.supportClaimKeys = prependUnique(state.supportClaimKeys, normalizedClaimKey.value.claimKey, MAX_SUPPORT_CLAIM_KEY_EXAMPLES);
|
|
1750
3218
|
state.entityHints = prependUnique(state.entityHints, normalizedClaimKey.value.entity, MAX_ENTITY_HINTS);
|
|
1751
3219
|
}
|
|
1752
3220
|
function normalizeClaimExtractionHints(hints) {
|
|
@@ -1789,7 +3257,7 @@ function normalizeEntity(value, hints) {
|
|
|
1789
3257
|
if (normalizedValue.length === 0) {
|
|
1790
3258
|
return "";
|
|
1791
3259
|
}
|
|
1792
|
-
if (!
|
|
3260
|
+
if (!SELF_REFERENTIAL_ENTITIES.has(normalizedValue)) {
|
|
1793
3261
|
return normalizedValue;
|
|
1794
3262
|
}
|
|
1795
3263
|
if (USER_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.userEntity) {
|
|
@@ -1814,7 +3282,7 @@ function normalizeEntity(value, hints) {
|
|
|
1814
3282
|
}
|
|
1815
3283
|
function normalizeEntityHints(entityHints) {
|
|
1816
3284
|
return limitUnique(
|
|
1817
|
-
entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !
|
|
3285
|
+
entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !SELF_REFERENTIAL_ENTITIES.has(entityHint)),
|
|
1818
3286
|
MAX_ENTITY_HINTS
|
|
1819
3287
|
);
|
|
1820
3288
|
}
|
|
@@ -1827,12 +3295,21 @@ function normalizeClaimKeyExamples(claimKeyExamples) {
|
|
|
1827
3295
|
MAX_CLAIM_KEY_EXAMPLES
|
|
1828
3296
|
);
|
|
1829
3297
|
}
|
|
3298
|
+
function normalizeSupportClaimKeys(claimKeys) {
|
|
3299
|
+
return limitUnique(
|
|
3300
|
+
claimKeys.flatMap((claimKey) => {
|
|
3301
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
3302
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
|
|
3303
|
+
}),
|
|
3304
|
+
MAX_SUPPORT_CLAIM_KEY_EXAMPLES
|
|
3305
|
+
);
|
|
3306
|
+
}
|
|
1830
3307
|
function normalizeMetadataEntity(value) {
|
|
1831
3308
|
if (typeof value !== "string") {
|
|
1832
3309
|
return void 0;
|
|
1833
3310
|
}
|
|
1834
3311
|
const normalized = normalizeClaimKeySegment(value);
|
|
1835
|
-
if (normalized.length === 0 ||
|
|
3312
|
+
if (normalized.length === 0 || SELF_REFERENTIAL_ENTITIES.has(normalized) || !/[a-z]/u.test(normalized)) {
|
|
1836
3313
|
return void 0;
|
|
1837
3314
|
}
|
|
1838
3315
|
return normalized;
|
|
@@ -1949,13 +3426,9 @@ function validateEntriesWithIndexes(inputs) {
|
|
|
1949
3426
|
rejectedInputIndexes.push(index);
|
|
1950
3427
|
continue;
|
|
1951
3428
|
}
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
continue;
|
|
1956
|
-
}
|
|
1957
|
-
if (input.valid_to !== void 0 && !isIsoTimestamp(input.valid_to)) {
|
|
1958
|
-
errors.push(`Entry ${index} has an invalid valid_to timestamp.`);
|
|
3429
|
+
const temporalValidity = validateTemporalValidityRange(input.valid_from, input.valid_to);
|
|
3430
|
+
if (!temporalValidity.ok) {
|
|
3431
|
+
errors.push(`Entry ${index} ${temporalValidity.message}`);
|
|
1959
3432
|
rejectedInputIndexes.push(index);
|
|
1960
3433
|
continue;
|
|
1961
3434
|
}
|
|
@@ -1974,6 +3447,42 @@ function validateEntriesWithIndexes(inputs) {
|
|
|
1974
3447
|
}
|
|
1975
3448
|
}
|
|
1976
3449
|
}
|
|
3450
|
+
const claimKeyRaw = normalizedClaimKey ? normalizeOptionalString(input.claim_key_raw) : void 0;
|
|
3451
|
+
const claimKeyStatus = normalizedClaimKey ? normalizeClaimKeyStatus(input.claim_key_status, index, warnings) : void 0;
|
|
3452
|
+
const claimKeySource = normalizedClaimKey ? normalizeClaimKeySource(input.claim_key_source, index, warnings) : void 0;
|
|
3453
|
+
const claimKeyConfidence = normalizedClaimKey ? normalizeClaimKeyConfidence(input.claim_key_confidence, index, warnings) : void 0;
|
|
3454
|
+
const claimKeyRationale = normalizedClaimKey ? normalizeOptionalString(input.claim_key_rationale) : void 0;
|
|
3455
|
+
const claimSupportSourceKind = normalizedClaimKey ? normalizeOptionalString(input.claim_support_source_kind) : void 0;
|
|
3456
|
+
const claimSupportLocator = normalizedClaimKey ? normalizeOptionalString(input.claim_support_locator) : void 0;
|
|
3457
|
+
const claimSupportObservedAt = normalizedClaimKey && input.claim_support_observed_at !== void 0 ? normalizeClaimSupportObservedAt(input.claim_support_observed_at, index, warnings) : void 0;
|
|
3458
|
+
const claimSupportMode = normalizedClaimKey && input.claim_support_mode !== void 0 ? normalizeClaimSupportMode(input.claim_support_mode, index, warnings) : void 0;
|
|
3459
|
+
const hasPrecomputedLifecycleFields = hasPrecomputedClaimKeyLifecycleFields(input);
|
|
3460
|
+
const resolvedPrecomputedLifecycle = normalizedClaimKey && hasPrecomputedLifecycleFields ? buildPrecomputedClaimKeyLifecycle({
|
|
3461
|
+
claim_key: normalizedClaimKey,
|
|
3462
|
+
claim_key_raw: claimKeyRaw,
|
|
3463
|
+
claim_key_status: claimKeyStatus,
|
|
3464
|
+
claim_key_source: claimKeySource,
|
|
3465
|
+
claim_key_confidence: claimKeyConfidence,
|
|
3466
|
+
claim_key_rationale: claimKeyRationale,
|
|
3467
|
+
claim_support_source_kind: claimSupportSourceKind,
|
|
3468
|
+
claim_support_locator: claimSupportLocator,
|
|
3469
|
+
claim_support_observed_at: claimSupportObservedAt,
|
|
3470
|
+
claim_support_mode: claimSupportMode
|
|
3471
|
+
}) : void 0;
|
|
3472
|
+
if (hasPrecomputedLifecycleFields) {
|
|
3473
|
+
if (!normalizedClaimKey) {
|
|
3474
|
+
errors.push(`Entry ${index} provided claim-key lifecycle metadata without a valid claim key.`);
|
|
3475
|
+
rejectedInputIndexes.push(index);
|
|
3476
|
+
continue;
|
|
3477
|
+
}
|
|
3478
|
+
if (!resolvedPrecomputedLifecycle) {
|
|
3479
|
+
errors.push(
|
|
3480
|
+
`Entry ${index} provided partial or invalid claim-key lifecycle metadata. Complete bundles require claim_key_status, claim_key_source, claim_key_confidence, and claim_key_rationale.`
|
|
3481
|
+
);
|
|
3482
|
+
rejectedInputIndexes.push(index);
|
|
3483
|
+
continue;
|
|
3484
|
+
}
|
|
3485
|
+
}
|
|
1977
3486
|
valid.push({
|
|
1978
3487
|
inputIndex: index,
|
|
1979
3488
|
input: {
|
|
@@ -1990,8 +3499,17 @@ function validateEntriesWithIndexes(inputs) {
|
|
|
1990
3499
|
created_at: normalizeOptionalString(input.created_at),
|
|
1991
3500
|
supersedes: normalizeOptionalString(input.supersedes),
|
|
1992
3501
|
claim_key: normalizedClaimKey,
|
|
1993
|
-
|
|
1994
|
-
|
|
3502
|
+
claim_key_raw: resolvedPrecomputedLifecycle?.claim_key_raw ?? claimKeyRaw,
|
|
3503
|
+
claim_key_status: resolvedPrecomputedLifecycle?.claim_key_status,
|
|
3504
|
+
claim_key_source: resolvedPrecomputedLifecycle?.claim_key_source,
|
|
3505
|
+
claim_key_confidence: resolvedPrecomputedLifecycle?.claim_key_confidence,
|
|
3506
|
+
claim_key_rationale: resolvedPrecomputedLifecycle?.claim_key_rationale,
|
|
3507
|
+
claim_support_source_kind: resolvedPrecomputedLifecycle?.claim_support_source_kind ?? claimSupportSourceKind,
|
|
3508
|
+
claim_support_locator: resolvedPrecomputedLifecycle?.claim_support_locator ?? claimSupportLocator,
|
|
3509
|
+
claim_support_observed_at: resolvedPrecomputedLifecycle?.claim_support_observed_at ?? claimSupportObservedAt,
|
|
3510
|
+
claim_support_mode: resolvedPrecomputedLifecycle?.claim_support_mode ?? claimSupportMode,
|
|
3511
|
+
valid_from: temporalValidity.value.validFrom,
|
|
3512
|
+
valid_to: temporalValidity.value.validTo
|
|
1995
3513
|
}
|
|
1996
3514
|
});
|
|
1997
3515
|
}
|
|
@@ -2016,6 +3534,56 @@ function normalizeOptionalString(value) {
|
|
|
2016
3534
|
const normalized = value?.trim();
|
|
2017
3535
|
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
2018
3536
|
}
|
|
3537
|
+
function normalizeClaimSupportObservedAt(value, index, warnings) {
|
|
3538
|
+
const normalized = normalizeOptionalString(value);
|
|
3539
|
+
if (!normalized) {
|
|
3540
|
+
return void 0;
|
|
3541
|
+
}
|
|
3542
|
+
if (!isIsoTimestamp(normalized)) {
|
|
3543
|
+
warnings.push(`Entry ${index} provided invalid claim_support_observed_at ${JSON.stringify(value)} and it was dropped.`);
|
|
3544
|
+
return void 0;
|
|
3545
|
+
}
|
|
3546
|
+
return normalized;
|
|
3547
|
+
}
|
|
3548
|
+
function normalizeClaimKeyStatus(value, index, warnings) {
|
|
3549
|
+
const parsed = parseClaimKeyStatus(value);
|
|
3550
|
+
if (parsed) {
|
|
3551
|
+
return parsed;
|
|
3552
|
+
}
|
|
3553
|
+
if (value !== void 0) {
|
|
3554
|
+
warnings.push(`Entry ${index} provided invalid claim_key_status ${JSON.stringify(value)} and it was dropped.`);
|
|
3555
|
+
}
|
|
3556
|
+
return void 0;
|
|
3557
|
+
}
|
|
3558
|
+
function normalizeClaimKeySource(value, index, warnings) {
|
|
3559
|
+
const parsed = parseClaimKeySource(value);
|
|
3560
|
+
if (parsed) {
|
|
3561
|
+
return parsed;
|
|
3562
|
+
}
|
|
3563
|
+
if (value !== void 0) {
|
|
3564
|
+
warnings.push(`Entry ${index} provided invalid claim_key_source ${JSON.stringify(value)} and it was dropped.`);
|
|
3565
|
+
}
|
|
3566
|
+
return void 0;
|
|
3567
|
+
}
|
|
3568
|
+
function normalizeClaimKeyConfidence(value, index, warnings) {
|
|
3569
|
+
if (value === void 0) {
|
|
3570
|
+
return void 0;
|
|
3571
|
+
}
|
|
3572
|
+
const parsed = parseClaimKeyConfidence(value);
|
|
3573
|
+
if (parsed !== void 0) {
|
|
3574
|
+
return parsed;
|
|
3575
|
+
}
|
|
3576
|
+
warnings.push(`Entry ${index} provided invalid claim_key_confidence ${JSON.stringify(value)} and it was dropped.`);
|
|
3577
|
+
return void 0;
|
|
3578
|
+
}
|
|
3579
|
+
function normalizeClaimSupportMode(value, index, warnings) {
|
|
3580
|
+
const parsed = parseClaimSupportMode(value);
|
|
3581
|
+
if (parsed) {
|
|
3582
|
+
return parsed;
|
|
3583
|
+
}
|
|
3584
|
+
warnings.push(`Entry ${index} provided invalid claim_support_mode ${JSON.stringify(value)} and it was dropped.`);
|
|
3585
|
+
return void 0;
|
|
3586
|
+
}
|
|
2019
3587
|
function areValidTags(value) {
|
|
2020
3588
|
return Array.isArray(value) && value.every((tag) => typeof tag === "string");
|
|
2021
3589
|
}
|
|
@@ -2035,7 +3603,7 @@ function isIsoTimestamp(value) {
|
|
|
2035
3603
|
|
|
2036
3604
|
// src/core/store/pipeline.ts
|
|
2037
3605
|
var AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE = 0.9;
|
|
2038
|
-
var
|
|
3606
|
+
var AUTO_SUPERSESSION_ELIGIBLE_SOURCES = /* @__PURE__ */ new Set(["model", "json_retry"]);
|
|
2039
3607
|
async function storeEntriesDetailed(inputs, db, embedding, options = {}) {
|
|
2040
3608
|
if (inputs.length === 0) {
|
|
2041
3609
|
return { stored: 0, skipped: 0, rejected: 0, details: [] };
|
|
@@ -2069,6 +3637,7 @@ async function storeEntriesDetailed(inputs, db, embedding, options = {}) {
|
|
|
2069
3637
|
}
|
|
2070
3638
|
const pendingEntries = plan.pendingEntries;
|
|
2071
3639
|
const extractedClaimKeys = await maybeExtractClaimKeys(pendingEntries, options);
|
|
3640
|
+
applyExtractedClaimKeyMetadata(pendingEntries, extractedClaimKeys);
|
|
2072
3641
|
const embeddings = await resolvePendingEmbeddings(inputs, pendingEntries, embedding, options.precomputedEmbeddings);
|
|
2073
3642
|
await persistEntries(db, pendingEntries, embeddings, extractedClaimKeys, options.claimExtraction?.config, options.onWarning);
|
|
2074
3643
|
return {
|
|
@@ -2150,6 +3719,7 @@ async function persistEntries(db, preparedEntries, embeddings, extractedClaimKey
|
|
|
2150
3719
|
}
|
|
2151
3720
|
function buildEntry(preparedEntry, embedding) {
|
|
2152
3721
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
3722
|
+
const acceptedClaimKey = preparedEntry.claimKey;
|
|
2153
3723
|
return {
|
|
2154
3724
|
id: randomUUID(),
|
|
2155
3725
|
type: preparedEntry.input.type,
|
|
@@ -2169,7 +3739,16 @@ function buildEntry(preparedEntry, embedding) {
|
|
|
2169
3739
|
recall_count: 0,
|
|
2170
3740
|
valid_from: preparedEntry.input.valid_from,
|
|
2171
3741
|
valid_to: preparedEntry.input.valid_to,
|
|
2172
|
-
claim_key: preparedEntry.input.claim_key,
|
|
3742
|
+
claim_key: acceptedClaimKey?.claim_key ?? preparedEntry.input.claim_key,
|
|
3743
|
+
claim_key_raw: acceptedClaimKey?.claim_key_raw,
|
|
3744
|
+
claim_key_status: acceptedClaimKey?.claim_key_status,
|
|
3745
|
+
claim_key_source: acceptedClaimKey?.claim_key_source,
|
|
3746
|
+
claim_key_confidence: acceptedClaimKey?.claim_key_confidence,
|
|
3747
|
+
claim_key_rationale: acceptedClaimKey?.claim_key_rationale,
|
|
3748
|
+
claim_support_source_kind: acceptedClaimKey?.claim_support_source_kind,
|
|
3749
|
+
claim_support_locator: acceptedClaimKey?.claim_support_locator,
|
|
3750
|
+
claim_support_observed_at: acceptedClaimKey?.claim_support_observed_at,
|
|
3751
|
+
claim_support_mode: acceptedClaimKey?.claim_support_mode,
|
|
2173
3752
|
retired: false,
|
|
2174
3753
|
created_at: preparedEntry.input.created_at ?? now,
|
|
2175
3754
|
updated_at: now
|
|
@@ -2192,8 +3771,14 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
|
|
|
2192
3771
|
db: claimExtraction.db
|
|
2193
3772
|
},
|
|
2194
3773
|
claimExtraction.config,
|
|
2195
|
-
|
|
2196
|
-
options.onWarning
|
|
3774
|
+
claimExtraction.config.concurrency ?? 10,
|
|
3775
|
+
options.onWarning,
|
|
3776
|
+
(entry, diagnostic) => {
|
|
3777
|
+
const preparedEntry = preparedEntries.find((candidate) => candidate.input === entry);
|
|
3778
|
+
if (preparedEntry) {
|
|
3779
|
+
options.onClaimExtractionDiagnostic?.(preparedEntry.inputIndex, diagnostic);
|
|
3780
|
+
}
|
|
3781
|
+
}
|
|
2197
3782
|
);
|
|
2198
3783
|
const extractedClaimKeys = /* @__PURE__ */ new Map();
|
|
2199
3784
|
for (const preparedEntry of preparedEntries) {
|
|
@@ -2212,12 +3797,26 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
|
|
|
2212
3797
|
function hasTransactionSupport(db) {
|
|
2213
3798
|
return typeof db.withTransaction === "function";
|
|
2214
3799
|
}
|
|
3800
|
+
function applyExtractedClaimKeyMetadata(preparedEntries, extractedClaimKeys) {
|
|
3801
|
+
for (const preparedEntry of preparedEntries) {
|
|
3802
|
+
if (preparedEntry.claimKey) {
|
|
3803
|
+
continue;
|
|
3804
|
+
}
|
|
3805
|
+
const extractedClaimKey = extractedClaimKeys.get(preparedEntry.inputIndex);
|
|
3806
|
+
const acceptedClaimKey = buildPrecomputedClaimKeyLifecycle(preparedEntry.input) ?? (extractedClaimKey ? buildExtractedClaimKeyLifecycle(extractedClaimKey, buildInferredIngestClaimKeySupportContext(preparedEntry.input)) : void 0);
|
|
3807
|
+
if (!acceptedClaimKey) {
|
|
3808
|
+
continue;
|
|
3809
|
+
}
|
|
3810
|
+
preparedEntry.claimKey = acceptedClaimKey;
|
|
3811
|
+
applyClaimKeyLifecycle(preparedEntry.input, acceptedClaimKey);
|
|
3812
|
+
}
|
|
3813
|
+
}
|
|
2215
3814
|
async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, claimExtractionConfig) {
|
|
2216
3815
|
const plans = /* @__PURE__ */ new Map();
|
|
2217
3816
|
const preparedEntriesByClaimKey = groupPreparedEntriesByClaimKey(preparedEntries);
|
|
2218
3817
|
const siblingCache = /* @__PURE__ */ new Map();
|
|
2219
3818
|
for (const preparedEntry of preparedEntries) {
|
|
2220
|
-
const claimKey = preparedEntry.input.claim_key;
|
|
3819
|
+
const claimKey = preparedEntry.claimKey?.claim_key ?? preparedEntry.input.claim_key;
|
|
2221
3820
|
if (!claimKey || preparedEntry.input.supersedes) {
|
|
2222
3821
|
continue;
|
|
2223
3822
|
}
|
|
@@ -2244,10 +3843,10 @@ async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, cla
|
|
|
2244
3843
|
if (!sibling) {
|
|
2245
3844
|
continue;
|
|
2246
3845
|
}
|
|
2247
|
-
if (!isAutoSupersessionEligible(preparedEntry,
|
|
3846
|
+
if (!isAutoSupersessionEligible(preparedEntry.claimKey, claimExtractionConfig)) {
|
|
2248
3847
|
plans.set(preparedEntry.inputIndex, {
|
|
2249
3848
|
kind: "skip",
|
|
2250
|
-
warning: buildAutoSupersessionEligibilityWarning(preparedEntry
|
|
3849
|
+
warning: buildAutoSupersessionEligibilityWarning(preparedEntry)
|
|
2251
3850
|
});
|
|
2252
3851
|
continue;
|
|
2253
3852
|
}
|
|
@@ -2272,7 +3871,7 @@ async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, cla
|
|
|
2272
3871
|
function groupPreparedEntriesByClaimKey(preparedEntries) {
|
|
2273
3872
|
const grouped = /* @__PURE__ */ new Map();
|
|
2274
3873
|
for (const preparedEntry of preparedEntries) {
|
|
2275
|
-
const claimKey = preparedEntry.input.claim_key;
|
|
3874
|
+
const claimKey = preparedEntry.claimKey?.claim_key ?? preparedEntry.input.claim_key;
|
|
2276
3875
|
if (!claimKey) {
|
|
2277
3876
|
continue;
|
|
2278
3877
|
}
|
|
@@ -2291,28 +3890,31 @@ async function getClaimKeySiblings(db, cache, claimKey) {
|
|
|
2291
3890
|
cache.set(claimKey, siblings);
|
|
2292
3891
|
return siblings;
|
|
2293
3892
|
}
|
|
2294
|
-
function isAutoSupersessionEligible(
|
|
2295
|
-
if (
|
|
2296
|
-
return true;
|
|
2297
|
-
}
|
|
2298
|
-
const extractedClaimKey = extractedClaimKeys.get(preparedEntry.inputIndex);
|
|
2299
|
-
if (!extractedClaimKey || !claimExtractionConfig) {
|
|
3893
|
+
function isAutoSupersessionEligible(claimKey, claimExtractionConfig) {
|
|
3894
|
+
if (!claimKey || claimKey.claim_key_status !== "trusted") {
|
|
2300
3895
|
return false;
|
|
2301
3896
|
}
|
|
2302
|
-
if (
|
|
3897
|
+
if (claimKey.claim_key_source === "manual") {
|
|
3898
|
+
return true;
|
|
3899
|
+
}
|
|
3900
|
+
if (!AUTO_SUPERSESSION_ELIGIBLE_SOURCES.has(claimKey.claim_key_source) || !claimExtractionConfig) {
|
|
2303
3901
|
return false;
|
|
2304
3902
|
}
|
|
2305
|
-
return
|
|
3903
|
+
return claimKey.claim_key_confidence >= Math.max(claimExtractionConfig.confidenceThreshold, AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE);
|
|
2306
3904
|
}
|
|
2307
|
-
function buildAutoSupersessionEligibilityWarning(preparedEntry
|
|
2308
|
-
const
|
|
2309
|
-
|
|
3905
|
+
function buildAutoSupersessionEligibilityWarning(preparedEntry) {
|
|
3906
|
+
const acceptedClaimKey = preparedEntry.claimKey;
|
|
3907
|
+
const claimKey = acceptedClaimKey?.claim_key ?? preparedEntry.input.claim_key ?? "(missing)";
|
|
3908
|
+
if (!acceptedClaimKey) {
|
|
3909
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not explicit or a tracked high-confidence extraction.`;
|
|
3910
|
+
}
|
|
3911
|
+
if (acceptedClaimKey.claim_key_source === "manual") {
|
|
2310
3912
|
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not eligible for automatic linking.`;
|
|
2311
3913
|
}
|
|
2312
|
-
if (
|
|
2313
|
-
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the
|
|
3914
|
+
if (acceptedClaimKey.claim_key_status !== "trusted") {
|
|
3915
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the accepted claim key is ${acceptedClaimKey.claim_key_status} from ${acceptedClaimKey.claim_key_source} at confidence ${acceptedClaimKey.claim_key_confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
|
|
2314
3916
|
}
|
|
2315
|
-
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim
|
|
3917
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the extracted claim key came from ${acceptedClaimKey.claim_key_source} at confidence ${acceptedClaimKey.claim_key_confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
|
|
2316
3918
|
}
|
|
2317
3919
|
function buildAutoSupersessionRuleWarning(preparedEntry, sibling, reason) {
|
|
2318
3920
|
if (reason === "type_mismatch") {
|
|
@@ -2332,7 +3934,7 @@ async function buildStorePlan(inputs, db) {
|
|
|
2332
3934
|
inputIndex,
|
|
2333
3935
|
contentHash: computeContentHash(input.content, input.source_file),
|
|
2334
3936
|
normContentHash: computeNormContentHash(input.content),
|
|
2335
|
-
|
|
3937
|
+
claimKey: buildManualAcceptedClaimKey(inputs[inputIndex], input)
|
|
2336
3938
|
}));
|
|
2337
3939
|
const afterBatchContentHash = dedupePreparedEntries(preparedEntries, "contentHash", "content_hash", details);
|
|
2338
3940
|
const existingHashes = await db.findExistingHashes(afterBatchContentHash.map((entry) => entry.contentHash));
|
|
@@ -2388,6 +3990,31 @@ function formatPipelineError(error) {
|
|
|
2388
3990
|
function sortStoreDetails(details) {
|
|
2389
3991
|
return [...details].sort((left, right) => left.inputIndex - right.inputIndex);
|
|
2390
3992
|
}
|
|
3993
|
+
function buildManualAcceptedClaimKey(rawInput, normalizedInput) {
|
|
3994
|
+
const canonicalClaimKey = normalizedInput.claim_key;
|
|
3995
|
+
if (!canonicalClaimKey) {
|
|
3996
|
+
return void 0;
|
|
3997
|
+
}
|
|
3998
|
+
const precomputedAcceptedClaimKey = buildPrecomputedClaimKeyLifecycle(normalizedInput);
|
|
3999
|
+
if (precomputedAcceptedClaimKey) {
|
|
4000
|
+
return precomputedAcceptedClaimKey;
|
|
4001
|
+
}
|
|
4002
|
+
if (rawInput && hasPrecomputedClaimKeyLifecycleFields(rawInput)) {
|
|
4003
|
+
throw new Error("Store inputs with claim-key lifecycle metadata must provide a complete valid lifecycle bundle.");
|
|
4004
|
+
}
|
|
4005
|
+
return buildManualClaimKeyLifecycle({
|
|
4006
|
+
claimKey: canonicalClaimKey,
|
|
4007
|
+
rawClaimKey: normalizedInput.claim_key_raw ?? normalizeOptionalString2(rawInput?.claim_key),
|
|
4008
|
+
supportSourceKind: normalizedInput.claim_support_source_kind,
|
|
4009
|
+
supportLocator: normalizedInput.claim_support_locator,
|
|
4010
|
+
supportObservedAt: normalizedInput.claim_support_observed_at,
|
|
4011
|
+
supportMode: normalizedInput.claim_support_mode
|
|
4012
|
+
});
|
|
4013
|
+
}
|
|
4014
|
+
function normalizeOptionalString2(value) {
|
|
4015
|
+
const normalized = value?.trim();
|
|
4016
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
4017
|
+
}
|
|
2391
4018
|
|
|
2392
4019
|
// src/core/episode/summary-prompt.ts
|
|
2393
4020
|
var EPISODE_SUMMARY_SYSTEM_PROMPT = [
|
|
@@ -2505,7 +4132,7 @@ async function generateEpisodeSummary(transcript, llm) {
|
|
|
2505
4132
|
}
|
|
2506
4133
|
|
|
2507
4134
|
// src/app/episode-ingest/service/preflight.ts
|
|
2508
|
-
import
|
|
4135
|
+
import path2 from "path";
|
|
2509
4136
|
|
|
2510
4137
|
// src/core/episode/transcript-render.ts
|
|
2511
4138
|
var MIN_EPISODE_MESSAGES = 4;
|
|
@@ -2836,9 +4463,9 @@ function resolveSessionMeta(filePath, parsedSessionId, registryMeta, reconstruct
|
|
|
2836
4463
|
};
|
|
2837
4464
|
}
|
|
2838
4465
|
function deriveAgentIdFromPath(filePath) {
|
|
2839
|
-
const resolved =
|
|
2840
|
-
const parent =
|
|
2841
|
-
const grandparent =
|
|
4466
|
+
const resolved = path2.resolve(filePath);
|
|
4467
|
+
const parent = path2.basename(path2.dirname(resolved));
|
|
4468
|
+
const grandparent = path2.basename(path2.dirname(path2.dirname(resolved)));
|
|
2842
4469
|
if (parent !== "sessions") {
|
|
2843
4470
|
return null;
|
|
2844
4471
|
}
|
|
@@ -3166,50 +4793,59 @@ function resolveRecentCutoff(recent, now) {
|
|
|
3166
4793
|
return cutoff;
|
|
3167
4794
|
}
|
|
3168
4795
|
|
|
3169
|
-
// src/adapters/openclaw/session/session-id.ts
|
|
3170
|
-
import path2 from "path";
|
|
3171
|
-
function deriveOpenClawSessionIdFromFilePath(sessionFile, logger) {
|
|
3172
|
-
const normalizedSessionFile = sessionFile.trim();
|
|
3173
|
-
if (normalizedSessionFile.length === 0) {
|
|
3174
|
-
debugLog(logger, "session-id", "cannot derive session id from empty session file path");
|
|
3175
|
-
return void 0;
|
|
3176
|
-
}
|
|
3177
|
-
const fileName = path2.basename(normalizedSessionFile);
|
|
3178
|
-
const sessionId = fileName.replace(/\.jsonl(?:\..*)?$/i, "").trim();
|
|
3179
|
-
debugLog(logger, "session-id", `derived session id "${sessionId || "<empty>"}" from file=${normalizedSessionFile}`);
|
|
3180
|
-
return sessionId.length > 0 ? sessionId : void 0;
|
|
3181
|
-
}
|
|
3182
|
-
function debugLog(logger, subsystem, message) {
|
|
3183
|
-
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
3184
|
-
}
|
|
3185
|
-
|
|
3186
4796
|
// src/adapters/openclaw/session/sessions-store-reader.ts
|
|
3187
4797
|
import * as fs3 from "fs/promises";
|
|
3188
4798
|
import path3 from "path";
|
|
3189
4799
|
async function readOpenClawSessionsStore(sessionsDir, logger) {
|
|
3190
|
-
|
|
3191
|
-
if (normalizedSessionsDir.length === 0) {
|
|
4800
|
+
if (sessionsDir.trim().length === 0) {
|
|
3192
4801
|
debugLog2(logger, "sessions-store-reader", "skipping sessions.json read because sessionsDir is empty");
|
|
3193
4802
|
return [];
|
|
3194
4803
|
}
|
|
4804
|
+
const result = await readOpenClawSessionsStoreWithDiagnostics(sessionsDir);
|
|
4805
|
+
for (const diagnostic of result.diagnostics) {
|
|
4806
|
+
debugLog2(logger, "sessions-store-reader", diagnostic.message);
|
|
4807
|
+
}
|
|
4808
|
+
if (result.diagnostics.length === 0) {
|
|
4809
|
+
debugLog2(
|
|
4810
|
+
logger,
|
|
4811
|
+
"sessions-store-reader",
|
|
4812
|
+
`loaded sessions.json entries=${result.entries.length} path=${path3.join(path3.resolve(sessionsDir.trim()), "sessions.json")}`
|
|
4813
|
+
);
|
|
4814
|
+
}
|
|
4815
|
+
return result.entries;
|
|
4816
|
+
}
|
|
4817
|
+
async function readOpenClawSessionsStoreWithDiagnostics(sessionsDir) {
|
|
4818
|
+
const normalizedSessionsDir = sessionsDir.trim();
|
|
4819
|
+
if (normalizedSessionsDir.length === 0) {
|
|
4820
|
+
return {
|
|
4821
|
+
entries: [],
|
|
4822
|
+
diagnostics: []
|
|
4823
|
+
};
|
|
4824
|
+
}
|
|
3195
4825
|
const resolvedSessionsDir = path3.resolve(normalizedSessionsDir);
|
|
3196
4826
|
const sessionsJsonPath = path3.join(resolvedSessionsDir, "sessions.json");
|
|
3197
4827
|
try {
|
|
3198
4828
|
const raw = await fs3.readFile(sessionsJsonPath, "utf8");
|
|
3199
4829
|
const parsed = JSON.parse(raw);
|
|
3200
4830
|
if (!isRecord(parsed)) {
|
|
3201
|
-
|
|
3202
|
-
|
|
4831
|
+
return {
|
|
4832
|
+
entries: [],
|
|
4833
|
+
diagnostics: [
|
|
4834
|
+
{
|
|
4835
|
+
kind: "structurally_invalid_file",
|
|
4836
|
+
message: `sessions.json did not contain an object: path=${sessionsJsonPath}`,
|
|
4837
|
+
path: sessionsJsonPath
|
|
4838
|
+
}
|
|
4839
|
+
]
|
|
4840
|
+
};
|
|
3203
4841
|
}
|
|
3204
4842
|
const entries = [];
|
|
3205
4843
|
for (const [sessionKey, value] of Object.entries(parsed)) {
|
|
3206
4844
|
const normalizedSessionKey = sessionKey.trim();
|
|
3207
4845
|
if (normalizedSessionKey.length === 0) {
|
|
3208
|
-
debugLog2(logger, "sessions-store-reader", `skipping blank session key in ${sessionsJsonPath}`);
|
|
3209
4846
|
continue;
|
|
3210
4847
|
}
|
|
3211
4848
|
if (!isRecord(value)) {
|
|
3212
|
-
debugLog2(logger, "sessions-store-reader", `skipping non-object entry for key=${normalizedSessionKey}`);
|
|
3213
4849
|
continue;
|
|
3214
4850
|
}
|
|
3215
4851
|
const sessionId = asTrimmedString(value["sessionId"]);
|
|
@@ -3229,26 +4865,52 @@ async function readOpenClawSessionsStore(sessionsDir, logger) {
|
|
|
3229
4865
|
...updatedAt !== void 0 ? { updatedAt } : {}
|
|
3230
4866
|
});
|
|
3231
4867
|
}
|
|
3232
|
-
|
|
3233
|
-
|
|
4868
|
+
return {
|
|
4869
|
+
entries,
|
|
4870
|
+
diagnostics: []
|
|
4871
|
+
};
|
|
3234
4872
|
} catch (error) {
|
|
3235
|
-
if (
|
|
3236
|
-
|
|
3237
|
-
|
|
4873
|
+
if (isFileNotFound2(error)) {
|
|
4874
|
+
return {
|
|
4875
|
+
entries: [],
|
|
4876
|
+
diagnostics: [
|
|
4877
|
+
{
|
|
4878
|
+
kind: "missing_file",
|
|
4879
|
+
message: `sessions.json missing at ${sessionsJsonPath}`,
|
|
4880
|
+
path: sessionsJsonPath
|
|
4881
|
+
}
|
|
4882
|
+
]
|
|
4883
|
+
};
|
|
3238
4884
|
}
|
|
3239
4885
|
if (error instanceof SyntaxError) {
|
|
3240
|
-
|
|
3241
|
-
|
|
4886
|
+
return {
|
|
4887
|
+
entries: [],
|
|
4888
|
+
diagnostics: [
|
|
4889
|
+
{
|
|
4890
|
+
kind: "malformed_json",
|
|
4891
|
+
message: `sessions.json parse failed at ${sessionsJsonPath}: ${error.message}`,
|
|
4892
|
+
path: sessionsJsonPath
|
|
4893
|
+
}
|
|
4894
|
+
]
|
|
4895
|
+
};
|
|
3242
4896
|
}
|
|
3243
|
-
|
|
3244
|
-
|
|
4897
|
+
return {
|
|
4898
|
+
entries: [],
|
|
4899
|
+
diagnostics: [
|
|
4900
|
+
{
|
|
4901
|
+
kind: "unreadable_file",
|
|
4902
|
+
message: `sessions.json read failed at ${sessionsJsonPath}: ${formatErrorMessage2(error)}`,
|
|
4903
|
+
path: sessionsJsonPath
|
|
4904
|
+
}
|
|
4905
|
+
]
|
|
4906
|
+
};
|
|
3245
4907
|
}
|
|
3246
4908
|
}
|
|
3247
4909
|
function resolveSessionStorePath(candidatePath, sessionsDir) {
|
|
3248
4910
|
return path3.isAbsolute(candidatePath) ? path3.resolve(candidatePath) : path3.resolve(sessionsDir, candidatePath);
|
|
3249
4911
|
}
|
|
3250
4912
|
function isRecord(value) {
|
|
3251
|
-
return typeof value === "object" && value !== null;
|
|
4913
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3252
4914
|
}
|
|
3253
4915
|
function asTrimmedString(value) {
|
|
3254
4916
|
return typeof value === "string" && value.trim().length > 0 ? value.trim() : void 0;
|
|
@@ -3259,10 +4921,10 @@ function asFiniteNumber(value) {
|
|
|
3259
4921
|
function debugLog2(logger, subsystem, message) {
|
|
3260
4922
|
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
3261
4923
|
}
|
|
3262
|
-
function
|
|
4924
|
+
function isFileNotFound2(error) {
|
|
3263
4925
|
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
3264
4926
|
}
|
|
3265
|
-
function
|
|
4927
|
+
function formatErrorMessage2(error) {
|
|
3266
4928
|
if (error instanceof Error) {
|
|
3267
4929
|
return error.message;
|
|
3268
4930
|
}
|
|
@@ -3297,26 +4959,32 @@ function parseTuiSessionKey(sessionKey) {
|
|
|
3297
4959
|
}
|
|
3298
4960
|
|
|
3299
4961
|
export {
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
4962
|
+
detectClaimKeyEntityFamilyCandidates,
|
|
4963
|
+
detectClaimKeySingletonAliasCandidates,
|
|
4964
|
+
buildTrustedClaimKeySupportSeed,
|
|
4965
|
+
evaluateClaimKeySupport,
|
|
4966
|
+
evaluateClaimKeyCompactness,
|
|
4967
|
+
normalizeGroundingTags,
|
|
4968
|
+
tokenizeGroundingText,
|
|
4969
|
+
buildEntryLocalLexicalTokens,
|
|
4970
|
+
applyClaimExtractionResultToEntry,
|
|
3307
4971
|
previewClaimKeyExtraction,
|
|
3308
4972
|
runBatchClaimExtraction,
|
|
3309
4973
|
validateSupersessionRules,
|
|
3310
4974
|
describeSupersessionRuleFailure,
|
|
4975
|
+
computeContentHash,
|
|
4976
|
+
computeNormContentHash,
|
|
4977
|
+
validateEntriesWithIndexes,
|
|
3311
4978
|
storeEntriesDetailed,
|
|
4979
|
+
deriveOpenClawSessionIdFromFilePath,
|
|
3312
4980
|
OpenClawTranscriptParser,
|
|
3313
4981
|
openClawTranscriptParser,
|
|
3314
|
-
deriveOpenClawSessionIdFromFilePath,
|
|
3315
4982
|
readOpenClawSessionsStore,
|
|
3316
4983
|
parseTuiSessionKey,
|
|
3317
4984
|
backfillEpisodeEmbeddings,
|
|
3318
4985
|
prepareEpisodeIngest,
|
|
3319
4986
|
ingestEpisodeTranscript,
|
|
3320
4987
|
executeEpisodeIngestPlan,
|
|
3321
|
-
createEpisodeIngestPlan
|
|
4988
|
+
createEpisodeIngestPlan,
|
|
4989
|
+
createOpenClawRepository
|
|
3322
4990
|
};
|