@agenr/agenr-plugin 1.7.3 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-6CEKKEFZ.js +4954 -0
- package/dist/chunk-ETQPUJGS.js +0 -0
- package/dist/chunk-GUDCFFRV.js +1517 -0
- package/dist/chunk-LVDQXSHP.js +5122 -0
- package/dist/index.js +334 -261
- package/openclaw.plugin.json +31 -8
- package/package.json +2 -2
- package/dist/chunk-7WL5EAQZ.js +0 -758
|
@@ -0,0 +1,4954 @@
|
|
|
1
|
+
import {
|
|
2
|
+
EMBEDDING_DIMENSIONS,
|
|
3
|
+
ENTRY_SELECT_COLUMNS,
|
|
4
|
+
ENTRY_TYPES,
|
|
5
|
+
EPISODE_ACTIVITY_LEVELS,
|
|
6
|
+
EXPIRY_LEVELS,
|
|
7
|
+
VECTOR_INDEX_NAME,
|
|
8
|
+
applyClaimKeyLifecycle,
|
|
9
|
+
buildActiveEntryClause,
|
|
10
|
+
buildExtractedClaimKeyLifecycle,
|
|
11
|
+
buildInferredIngestClaimKeySupportContext,
|
|
12
|
+
buildManualClaimKeyLifecycle,
|
|
13
|
+
buildPrecomputedClaimKeyLifecycle,
|
|
14
|
+
composeEmbeddingText,
|
|
15
|
+
hasPrecomputedClaimKeyLifecycleFields,
|
|
16
|
+
mapEntryRow,
|
|
17
|
+
parseClaimKeyConfidence,
|
|
18
|
+
parseClaimKeySource,
|
|
19
|
+
parseClaimKeyStatus,
|
|
20
|
+
parseClaimSupportMode,
|
|
21
|
+
readNumber,
|
|
22
|
+
readOptionalString,
|
|
23
|
+
readRequiredString,
|
|
24
|
+
validateTemporalValidityRange
|
|
25
|
+
} from "./chunk-LVDQXSHP.js";
|
|
26
|
+
import {
|
|
27
|
+
compactClaimKey,
|
|
28
|
+
describeClaimKeyNormalizationFailure,
|
|
29
|
+
describeExtractedClaimKeyRejection,
|
|
30
|
+
inspectClaimKey,
|
|
31
|
+
isTrustedClaimKeyForCleanup,
|
|
32
|
+
normalizeClaimKey,
|
|
33
|
+
normalizeClaimKeySegment,
|
|
34
|
+
parseRelativeDate,
|
|
35
|
+
resolveClaimSlotPolicy,
|
|
36
|
+
validateExtractedClaimKey
|
|
37
|
+
} from "./chunk-GUDCFFRV.js";
|
|
38
|
+
|
|
39
|
+
// src/adapters/openclaw/transcript/parser.ts
|
|
40
|
+
import { createHash } from "crypto";
|
|
41
|
+
import * as fs2 from "fs/promises";
|
|
42
|
+
|
|
43
|
+
// src/adapters/openclaw/session/session-id.ts
|
|
44
|
+
import path from "path";
|
|
45
|
+
function deriveOpenClawSessionIdFromFilePath(sessionFile, logger) {
|
|
46
|
+
const normalizedSessionFile = sessionFile.trim();
|
|
47
|
+
if (normalizedSessionFile.length === 0) {
|
|
48
|
+
debugLog(logger, "session-id", "cannot derive session id from empty session file path");
|
|
49
|
+
return void 0;
|
|
50
|
+
}
|
|
51
|
+
const fileName = path.basename(normalizedSessionFile);
|
|
52
|
+
const sessionId = fileName.replace(/\.jsonl(?:\..*)?$/i, "").trim();
|
|
53
|
+
debugLog(logger, "session-id", `derived session id "${sessionId || "<empty>"}" from file=${normalizedSessionFile}`);
|
|
54
|
+
return sessionId.length > 0 ? sessionId : void 0;
|
|
55
|
+
}
|
|
56
|
+
function debugLog(logger, subsystem, message) {
|
|
57
|
+
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// src/adapters/openclaw/transcript/jsonl.ts
|
|
61
|
+
function parseJsonObjectLineWithDiagnostics(line, lineNumber = 1) {
|
|
62
|
+
if (!line || line.trim().length === 0) {
|
|
63
|
+
return {
|
|
64
|
+
record: null
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
const parsed = JSON.parse(line);
|
|
69
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
70
|
+
return {
|
|
71
|
+
record: parsed
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
record: null,
|
|
76
|
+
diagnostic: {
|
|
77
|
+
kind: "non_object_record",
|
|
78
|
+
lineNumber,
|
|
79
|
+
message: `Skipped non-object JSONL line ${lineNumber}`
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
} catch {
|
|
83
|
+
return {
|
|
84
|
+
record: null,
|
|
85
|
+
diagnostic: {
|
|
86
|
+
kind: "malformed_json",
|
|
87
|
+
lineNumber,
|
|
88
|
+
message: `Skipped malformed JSONL line ${lineNumber}`
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function parseJsonlLines(raw, onRecord) {
|
|
94
|
+
const lines = raw.split(/\r?\n/);
|
|
95
|
+
const diagnostics = [];
|
|
96
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
97
|
+
const line = lines[index]?.trim();
|
|
98
|
+
if (!line) {
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
const parsed = parseJsonObjectLineWithDiagnostics(line, index + 1);
|
|
102
|
+
if (parsed.diagnostic) {
|
|
103
|
+
diagnostics.push(parsed.diagnostic);
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
if (parsed.record) {
|
|
107
|
+
onRecord(parsed.record, index + 1);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
diagnostics
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// src/adapters/openclaw/transcript/tool-summarization.ts
|
|
116
|
+
var DEFAULT_TOOL_RESULT_DROP_NAMES = ["read", "web_fetch", "browser", "screenshot", "snapshot", "canvas", "tts"];
|
|
117
|
+
var DEFAULT_TOOL_RESULT_KEEP_NAMES = ["web_search", "memory_search", "memory_get", "image"];
|
|
118
|
+
var DEFAULT_TOOL_RESULT_DROP_NAME_SET = new Set(DEFAULT_TOOL_RESULT_DROP_NAMES);
|
|
119
|
+
var DEFAULT_TOOL_RESULT_KEEP_NAME_SET = new Set(DEFAULT_TOOL_RESULT_KEEP_NAMES);
|
|
120
|
+
function asRecord(value) {
|
|
121
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
122
|
+
}
|
|
123
|
+
function getString(value) {
|
|
124
|
+
return typeof value === "string" && value.trim().length > 0 ? value : void 0;
|
|
125
|
+
}
|
|
126
|
+
function truncateInline(value, max) {
|
|
127
|
+
if (value.length <= max) {
|
|
128
|
+
return value;
|
|
129
|
+
}
|
|
130
|
+
return value.slice(0, max);
|
|
131
|
+
}
|
|
132
|
+
function firstStringArgValue(args, max) {
|
|
133
|
+
for (const value of Object.values(args)) {
|
|
134
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
135
|
+
return truncateInline(value.trim(), max);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return void 0;
|
|
139
|
+
}
|
|
140
|
+
function extractAgenrStoreEntries(args) {
|
|
141
|
+
const nestedEntries = Array.isArray(args.entries) ? args.entries.flatMap((entry) => {
|
|
142
|
+
const record = asRecord(entry);
|
|
143
|
+
return record ? [record] : [];
|
|
144
|
+
}) : [];
|
|
145
|
+
if (nestedEntries.length > 0) {
|
|
146
|
+
return nestedEntries;
|
|
147
|
+
}
|
|
148
|
+
if (getString(args.type) || getString(args.subject) || getString(args.content) || getString(args.claimKey) || getString(args.claim_key) || getString(args.supersedes)) {
|
|
149
|
+
return [args];
|
|
150
|
+
}
|
|
151
|
+
return [];
|
|
152
|
+
}
|
|
153
|
+
function summarizeAgenrStoreEntry(entry) {
|
|
154
|
+
const type = getString(entry.type) ?? "unknown";
|
|
155
|
+
const subject = getString(entry.subject) ?? "(no subject)";
|
|
156
|
+
const claimKey = getString(entry.claimKey) ?? getString(entry.claim_key);
|
|
157
|
+
const claimKeySuffix = claimKey ? ` claim_key=${JSON.stringify(truncateInline(claimKey.trim(), 120))}` : "";
|
|
158
|
+
return `${type}: "${truncateInline(subject, 60)}"${claimKeySuffix}`;
|
|
159
|
+
}
|
|
160
|
+
function toolIdentifier(toolName, args) {
|
|
161
|
+
const normalizedToolName = toolName.trim().toLowerCase();
|
|
162
|
+
if (normalizedToolName === "read" || normalizedToolName === "edit" || normalizedToolName === "write") {
|
|
163
|
+
return getString(args.file_path) ?? getString(args.path) ?? getString(args.file) ?? "(unknown file)";
|
|
164
|
+
}
|
|
165
|
+
if (normalizedToolName === "exec") {
|
|
166
|
+
const command = getString(args.command) ?? getString(args.cmd) ?? "(unknown command)";
|
|
167
|
+
return truncateInline(command, 100);
|
|
168
|
+
}
|
|
169
|
+
if (normalizedToolName === "web_fetch") {
|
|
170
|
+
return getString(args.url) ?? "(unknown url)";
|
|
171
|
+
}
|
|
172
|
+
if (normalizedToolName === "web_search") {
|
|
173
|
+
return getString(args.query) ?? "(unknown query)";
|
|
174
|
+
}
|
|
175
|
+
if (normalizedToolName === "browser") {
|
|
176
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
177
|
+
const targetUrl = getString(args.targetUrl) ?? getString(args.url);
|
|
178
|
+
return targetUrl ? `${action} ${targetUrl}` : action;
|
|
179
|
+
}
|
|
180
|
+
if (normalizedToolName === "agenr_store") {
|
|
181
|
+
const entries = extractAgenrStoreEntries(args);
|
|
182
|
+
return `${entries.length} entr${entries.length === 1 ? "y" : "ies"}`;
|
|
183
|
+
}
|
|
184
|
+
if (normalizedToolName === "agenr_recall") {
|
|
185
|
+
const query = getString(args.query) ?? "(no query)";
|
|
186
|
+
return `"${truncateInline(query, 80)}"`;
|
|
187
|
+
}
|
|
188
|
+
if (normalizedToolName === "message") {
|
|
189
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
190
|
+
const target = getString(args.target) ?? getString(args.to) ?? "(unknown target)";
|
|
191
|
+
return `${truncateInline(action, 80)} to ${truncateInline(target, 80)}`;
|
|
192
|
+
}
|
|
193
|
+
if (normalizedToolName === "sessions_spawn") {
|
|
194
|
+
return getString(args.label) ?? getString(args.task)?.slice(0, 60) ?? "(unknown task)";
|
|
195
|
+
}
|
|
196
|
+
if (normalizedToolName === "image") {
|
|
197
|
+
return getString(args.image) ?? getString(args.url) ?? getString(args.path) ?? "(unknown image)";
|
|
198
|
+
}
|
|
199
|
+
if (normalizedToolName === "canvas") {
|
|
200
|
+
return getString(args.action) ?? "(unknown action)";
|
|
201
|
+
}
|
|
202
|
+
if (normalizedToolName === "tts") {
|
|
203
|
+
const text = getString(args.text) ?? "(unknown text)";
|
|
204
|
+
return truncateInline(text, 50);
|
|
205
|
+
}
|
|
206
|
+
return firstStringArgValue(args, 80) ?? "(unknown)";
|
|
207
|
+
}
|
|
208
|
+
function extractToolCallBlocks(content) {
|
|
209
|
+
if (!Array.isArray(content)) {
|
|
210
|
+
return [];
|
|
211
|
+
}
|
|
212
|
+
const toolCalls = [];
|
|
213
|
+
for (const block of content) {
|
|
214
|
+
const record = asRecord(block);
|
|
215
|
+
if (!record) {
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
219
|
+
const name = getString(record.name) ?? getString(record.tool) ?? getString(record.tool_name);
|
|
220
|
+
const args = asRecord(record.arguments) ?? asRecord(record.args) ?? asRecord(record.input) ?? {};
|
|
221
|
+
const id = getString(record.id) ?? getString(record.toolCallId) ?? getString(record.tool_call_id) ?? getString(record.call_id);
|
|
222
|
+
if ((type === "toolcall" || type === "tool_call" || type === "tool_use" || type === "tooluse") && name) {
|
|
223
|
+
toolCalls.push({ name, args, id });
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
if (!type && name && ("arguments" in record || "args" in record || "input" in record)) {
|
|
227
|
+
toolCalls.push({ name, args, id });
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return toolCalls;
|
|
231
|
+
}
|
|
232
|
+
function summarizeToolCall(call, options) {
|
|
233
|
+
const normalizedToolName = call.name.trim().toLowerCase();
|
|
234
|
+
const override = options?.overrides?.[normalizedToolName];
|
|
235
|
+
if (override) {
|
|
236
|
+
const summary = override(call);
|
|
237
|
+
if (summary) {
|
|
238
|
+
return summary;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
const args = call.args;
|
|
242
|
+
const filePath = getString(args.file_path) ?? getString(args.path) ?? getString(args.file);
|
|
243
|
+
if (normalizedToolName === "read") {
|
|
244
|
+
return `[called Read: ${filePath ?? "(unknown file)"}]`;
|
|
245
|
+
}
|
|
246
|
+
if (normalizedToolName === "write") {
|
|
247
|
+
const content = getString(args.content) ?? getString(args.text) ?? "";
|
|
248
|
+
return `[called Write: ${filePath ?? "(unknown file)"} - ${content.length} chars]`;
|
|
249
|
+
}
|
|
250
|
+
if (normalizedToolName === "edit") {
|
|
251
|
+
const oldText = getString(args.oldText) ?? getString(args.old_string) ?? "";
|
|
252
|
+
return `[called Edit: ${filePath ?? "(unknown file)"} - replaced ${oldText.length} chars]`;
|
|
253
|
+
}
|
|
254
|
+
if (normalizedToolName === "exec") {
|
|
255
|
+
const command = getString(args.command) ?? getString(args.cmd) ?? "(unknown command)";
|
|
256
|
+
return `[called exec: ${truncateInline(command, 200)}]`;
|
|
257
|
+
}
|
|
258
|
+
if (normalizedToolName === "web_search") {
|
|
259
|
+
const query = getString(args.query) ?? "(unknown query)";
|
|
260
|
+
return `[called web_search: ${truncateInline(query, 200)}]`;
|
|
261
|
+
}
|
|
262
|
+
if (normalizedToolName === "web_fetch") {
|
|
263
|
+
const url = getString(args.url) ?? "(unknown url)";
|
|
264
|
+
return `[called web_fetch: ${truncateInline(url, 200)}]`;
|
|
265
|
+
}
|
|
266
|
+
if (normalizedToolName === "browser") {
|
|
267
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
268
|
+
return `[called browser: ${truncateInline(action, 200)}]`;
|
|
269
|
+
}
|
|
270
|
+
if (normalizedToolName === "message") {
|
|
271
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
272
|
+
const target = getString(args.target) ?? getString(args.to) ?? "(unknown target)";
|
|
273
|
+
return `[called message: ${truncateInline(action, 200)} to ${truncateInline(target, 200)}]`;
|
|
274
|
+
}
|
|
275
|
+
if (normalizedToolName === "agenr_store") {
|
|
276
|
+
const entries = extractAgenrStoreEntries(args);
|
|
277
|
+
if (entries.length === 0) {
|
|
278
|
+
return "[attempted brain store: (empty)]";
|
|
279
|
+
}
|
|
280
|
+
const summaries = entries.slice(0, 3).map(summarizeAgenrStoreEntry);
|
|
281
|
+
const countSuffix = entries.length > 3 ? ` (+${entries.length - 3} more)` : "";
|
|
282
|
+
return `[attempted brain store: ${summaries.join(", ")}${countSuffix}]`;
|
|
283
|
+
}
|
|
284
|
+
if (normalizedToolName === "agenr_recall") {
|
|
285
|
+
const query = getString(args.query) ?? "(no query)";
|
|
286
|
+
return `[recalled from brain: "${truncateInline(query, 100)}"]`;
|
|
287
|
+
}
|
|
288
|
+
if (normalizedToolName === "sessions_spawn") {
|
|
289
|
+
const label = getString(args.label);
|
|
290
|
+
const mode = getString(args.mode) ?? "run";
|
|
291
|
+
const model = getString(args.model);
|
|
292
|
+
const modelSuffix = model ? ` model=${model}` : "";
|
|
293
|
+
if (label) {
|
|
294
|
+
return `[spawned sub-agent: ${label} (${mode}${modelSuffix})]`;
|
|
295
|
+
}
|
|
296
|
+
const task = getString(args.task) ?? "(no task)";
|
|
297
|
+
return `[spawned sub-agent: ${truncateInline(task, 80)} (${mode}${modelSuffix})]`;
|
|
298
|
+
}
|
|
299
|
+
const relevantArgValue = firstStringArgValue(
|
|
300
|
+
Object.fromEntries(
|
|
301
|
+
Object.entries(args).filter(
|
|
302
|
+
([key]) => !["buffer", "content", "data", "newText", "new_string", "oldText", "old_string"].includes(key) && !(normalizedToolName === "write" && key === "text")
|
|
303
|
+
)
|
|
304
|
+
),
|
|
305
|
+
80
|
|
306
|
+
) ?? "(no args)";
|
|
307
|
+
return `[called ${call.name}: ${relevantArgValue}]`;
|
|
308
|
+
}
|
|
309
|
+
function toolResultPlaceholder(toolName, args) {
|
|
310
|
+
const normalizedToolName = toolName.trim().length > 0 ? toolName.trim() : "unknown";
|
|
311
|
+
const identifier = toolIdentifier(normalizedToolName, args);
|
|
312
|
+
return `[tool result from ${normalizedToolName}: ${identifier} - filtered]`;
|
|
313
|
+
}
|
|
314
|
+
function shouldKeepToolResult(toolName, text, policy) {
|
|
315
|
+
const normalizedToolName = (toolName ?? "").trim().toLowerCase();
|
|
316
|
+
const dropToolNames = policy?.dropToolNames ?? DEFAULT_TOOL_RESULT_DROP_NAME_SET;
|
|
317
|
+
const keepToolNames = policy?.keepToolNames ?? DEFAULT_TOOL_RESULT_KEEP_NAME_SET;
|
|
318
|
+
if (normalizedToolName && dropToolNames.has(normalizedToolName)) {
|
|
319
|
+
return { keep: false };
|
|
320
|
+
}
|
|
321
|
+
if (normalizedToolName && keepToolNames.has(normalizedToolName)) {
|
|
322
|
+
return { keep: true, truncateTo: 2e3 };
|
|
323
|
+
}
|
|
324
|
+
if (normalizedToolName === "exec") {
|
|
325
|
+
if (text.length < 1e3) {
|
|
326
|
+
return { keep: true, truncateTo: 2e3 };
|
|
327
|
+
}
|
|
328
|
+
if (/(error|failed|fail)/i.test(text)) {
|
|
329
|
+
return { keep: true, truncateTo: 2e3 };
|
|
330
|
+
}
|
|
331
|
+
return { keep: false };
|
|
332
|
+
}
|
|
333
|
+
if (text.length < 500) {
|
|
334
|
+
return { keep: true, truncateTo: 2e3 };
|
|
335
|
+
}
|
|
336
|
+
return { keep: false };
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// src/adapters/openclaw/transcript/message-content.ts
|
|
340
|
+
var TEXT_BLOCK_TYPES = /* @__PURE__ */ new Set(["input_text", "output_text", "text"]);
|
|
341
|
+
function normalizeWhitespace(value) {
|
|
342
|
+
return value.replace(/\s+/g, " ").trim();
|
|
343
|
+
}
|
|
344
|
+
function extractTextBlocks(content) {
|
|
345
|
+
if (typeof content === "string") {
|
|
346
|
+
const normalized = normalizeWhitespace(content);
|
|
347
|
+
return normalized ? [normalized] : [];
|
|
348
|
+
}
|
|
349
|
+
if (!Array.isArray(content)) {
|
|
350
|
+
return [];
|
|
351
|
+
}
|
|
352
|
+
const textBlocks = [];
|
|
353
|
+
let nonTextBlockCount = 0;
|
|
354
|
+
for (const block of content) {
|
|
355
|
+
if (typeof block === "string") {
|
|
356
|
+
const normalized = normalizeWhitespace(block);
|
|
357
|
+
if (normalized) {
|
|
358
|
+
textBlocks.push(normalized);
|
|
359
|
+
}
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
const record = asRecord(block);
|
|
363
|
+
if (!record) {
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
if (typeof record.text === "string") {
|
|
367
|
+
const normalized = normalizeWhitespace(record.text);
|
|
368
|
+
if (normalized) {
|
|
369
|
+
textBlocks.push(normalized);
|
|
370
|
+
}
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
374
|
+
if (typeof record.content === "string" && TEXT_BLOCK_TYPES.has(type)) {
|
|
375
|
+
const normalized = normalizeWhitespace(record.content);
|
|
376
|
+
if (normalized) {
|
|
377
|
+
textBlocks.push(normalized);
|
|
378
|
+
}
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
nonTextBlockCount += 1;
|
|
382
|
+
}
|
|
383
|
+
if (textBlocks.length === 0 && nonTextBlockCount > 0) {
|
|
384
|
+
textBlocks.push(`[non-text content omitted: ${nonTextBlockCount} block${nonTextBlockCount === 1 ? "" : "s"}]`);
|
|
385
|
+
}
|
|
386
|
+
return textBlocks;
|
|
387
|
+
}
|
|
388
|
+
function extractRawTextBlocks(content) {
|
|
389
|
+
if (typeof content === "string") {
|
|
390
|
+
return [content];
|
|
391
|
+
}
|
|
392
|
+
if (!Array.isArray(content)) {
|
|
393
|
+
return [];
|
|
394
|
+
}
|
|
395
|
+
const textBlocks = [];
|
|
396
|
+
for (const block of content) {
|
|
397
|
+
if (typeof block === "string") {
|
|
398
|
+
textBlocks.push(block);
|
|
399
|
+
continue;
|
|
400
|
+
}
|
|
401
|
+
const record = asRecord(block);
|
|
402
|
+
if (!record) {
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
if (typeof record.text === "string") {
|
|
406
|
+
textBlocks.push(record.text);
|
|
407
|
+
continue;
|
|
408
|
+
}
|
|
409
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
410
|
+
if (typeof record.content === "string" && TEXT_BLOCK_TYPES.has(type)) {
|
|
411
|
+
textBlocks.push(record.content);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return textBlocks;
|
|
415
|
+
}
|
|
416
|
+
function normalizeLabel(value) {
|
|
417
|
+
return value.trim().toLowerCase().replace(/[\s_-]+/g, "-").replace(/^-+|-+$/g, "");
|
|
418
|
+
}
|
|
419
|
+
function normalizeMessageText(content) {
|
|
420
|
+
return normalizeWhitespace(extractTextBlocks(content).join("\n"));
|
|
421
|
+
}
|
|
422
|
+
function normalizeOpenClawRole(value) {
|
|
423
|
+
if (typeof value !== "string") {
|
|
424
|
+
return "unknown";
|
|
425
|
+
}
|
|
426
|
+
const normalized = value.trim().toLowerCase();
|
|
427
|
+
if (normalized === "user" || normalized === "human") {
|
|
428
|
+
return "user";
|
|
429
|
+
}
|
|
430
|
+
if (normalized === "assistant" || normalized === "ai" || normalized === "developer") {
|
|
431
|
+
return "assistant";
|
|
432
|
+
}
|
|
433
|
+
if (normalized === "system") {
|
|
434
|
+
return "system";
|
|
435
|
+
}
|
|
436
|
+
if (normalized === "tool" || normalized === "toolresult" || normalized === "tool_result") {
|
|
437
|
+
return "toolResult";
|
|
438
|
+
}
|
|
439
|
+
return "unknown";
|
|
440
|
+
}
|
|
441
|
+
function truncateWithMarker(text, maxChars) {
|
|
442
|
+
if (text.length <= maxChars) {
|
|
443
|
+
return text;
|
|
444
|
+
}
|
|
445
|
+
return `${text.slice(0, maxChars)}
|
|
446
|
+
[...truncated]`;
|
|
447
|
+
}
|
|
448
|
+
function isPureBase64(text) {
|
|
449
|
+
const trimmed = text.trim();
|
|
450
|
+
if (trimmed.length < 500) {
|
|
451
|
+
return false;
|
|
452
|
+
}
|
|
453
|
+
if (!/[+/=]/.test(trimmed)) {
|
|
454
|
+
return false;
|
|
455
|
+
}
|
|
456
|
+
return /^[A-Za-z0-9+/=\s]{500,}$/.test(trimmed);
|
|
457
|
+
}
|
|
458
|
+
function normalizeSessionLabel(value) {
|
|
459
|
+
const normalized = normalizeLabel(value);
|
|
460
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
461
|
+
}
|
|
462
|
+
function extractConversationLabel(content) {
|
|
463
|
+
const rawTextBlocks = extractRawTextBlocks(content);
|
|
464
|
+
for (const block of rawTextBlocks) {
|
|
465
|
+
const matches = block.matchAll(/```(?:json)?\s*([\s\S]*?)\s*```/gi);
|
|
466
|
+
for (const match of matches) {
|
|
467
|
+
const candidate = match[1];
|
|
468
|
+
if (!candidate) {
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
try {
|
|
472
|
+
const parsed = JSON.parse(candidate);
|
|
473
|
+
const record = asRecord(parsed);
|
|
474
|
+
const conversationLabel = record ? getString(record.conversation_label) : void 0;
|
|
475
|
+
const normalizedLabel = conversationLabel ? normalizeSessionLabel(conversationLabel) : void 0;
|
|
476
|
+
if (normalizedLabel) {
|
|
477
|
+
return normalizedLabel;
|
|
478
|
+
}
|
|
479
|
+
} catch {
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return void 0;
|
|
484
|
+
}
|
|
485
|
+
function extractAssistantTextParts(content) {
|
|
486
|
+
if (typeof content === "string") {
|
|
487
|
+
const normalized = normalizeWhitespace(content);
|
|
488
|
+
return normalized ? [normalized] : [];
|
|
489
|
+
}
|
|
490
|
+
if (!Array.isArray(content)) {
|
|
491
|
+
return [];
|
|
492
|
+
}
|
|
493
|
+
const textParts = [];
|
|
494
|
+
for (const block of content) {
|
|
495
|
+
if (typeof block === "string") {
|
|
496
|
+
const normalized = normalizeWhitespace(block);
|
|
497
|
+
if (normalized) {
|
|
498
|
+
textParts.push(normalized);
|
|
499
|
+
}
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
const record = asRecord(block);
|
|
503
|
+
if (!record) {
|
|
504
|
+
continue;
|
|
505
|
+
}
|
|
506
|
+
if (typeof record.text === "string") {
|
|
507
|
+
const normalized = normalizeWhitespace(record.text);
|
|
508
|
+
if (normalized) {
|
|
509
|
+
textParts.push(normalized);
|
|
510
|
+
}
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
514
|
+
if (typeof record.content === "string" && TEXT_BLOCK_TYPES.has(type)) {
|
|
515
|
+
const normalized = normalizeWhitespace(record.content);
|
|
516
|
+
if (normalized) {
|
|
517
|
+
textParts.push(normalized);
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
return textParts;
|
|
522
|
+
}
|
|
523
|
+
function pushMessage(messages, role, text, timestamp) {
|
|
524
|
+
messages.push({
|
|
525
|
+
index: messages.length,
|
|
526
|
+
role,
|
|
527
|
+
text,
|
|
528
|
+
timestamp
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// src/adapters/openclaw/transcript/timestamps.ts
|
|
533
|
+
import * as fs from "fs/promises";
|
|
534
|
+
function parseTimestampValue(value) {
|
|
535
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
536
|
+
const parsed = new Date(value);
|
|
537
|
+
if (!Number.isNaN(parsed.getTime())) {
|
|
538
|
+
return parsed.toISOString();
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
542
|
+
const milliseconds = value > 1e12 ? value : value * 1e3;
|
|
543
|
+
const parsed = new Date(milliseconds);
|
|
544
|
+
if (!Number.isNaN(parsed.getTime())) {
|
|
545
|
+
return parsed.toISOString();
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
return void 0;
|
|
549
|
+
}
|
|
550
|
+
function extractTimestamp(record) {
|
|
551
|
+
for (const field of ["timestamp", "ts", "created_at", "createdAt", "time", "date"]) {
|
|
552
|
+
const parsed = parseTimestampValue(record[field]);
|
|
553
|
+
if (parsed) {
|
|
554
|
+
return parsed;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
return void 0;
|
|
558
|
+
}
|
|
559
|
+
async function getFileMtimeTimestamp(filePath) {
|
|
560
|
+
try {
|
|
561
|
+
const stat2 = await fs.stat(filePath);
|
|
562
|
+
return parseTimestampValue(stat2.mtime.toISOString());
|
|
563
|
+
} catch {
|
|
564
|
+
return void 0;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
async function resolveTimestampFallback(filePath, ...candidates) {
|
|
568
|
+
for (const candidate of candidates) {
|
|
569
|
+
const parsed = parseTimestampValue(candidate);
|
|
570
|
+
if (parsed) {
|
|
571
|
+
return parsed;
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
const fileMtime = await getFileMtimeTimestamp(filePath);
|
|
575
|
+
if (fileMtime) {
|
|
576
|
+
return fileMtime;
|
|
577
|
+
}
|
|
578
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
579
|
+
}
|
|
580
|
+
async function applyMessageTimestampFallbacks(filePath, messages, options) {
|
|
581
|
+
const fallbackTimestamp = await resolveTimestampFallback(filePath, options?.sessionTimestamp);
|
|
582
|
+
for (const message of messages) {
|
|
583
|
+
message.timestamp = parseTimestampValue(message.timestamp) ?? fallbackTimestamp;
|
|
584
|
+
}
|
|
585
|
+
return fallbackTimestamp;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// src/adapters/openclaw/transcript/parser.ts
|
|
589
|
+
var SKIPPED_RECORD_TYPES = /* @__PURE__ */ new Set(["compaction", "custom", "thinking_level_change"]);
|
|
590
|
+
var TOOL_RESULT_POLICY = {
|
|
591
|
+
dropToolNames: /* @__PURE__ */ new Set([...DEFAULT_TOOL_RESULT_DROP_NAMES, "agenr_recall", "image"]),
|
|
592
|
+
keepToolNames: new Set(DEFAULT_TOOL_RESULT_KEEP_NAMES.filter((name) => name !== "image"))
|
|
593
|
+
};
|
|
594
|
+
var RAW_TEXT_BLOCK_TYPES = /* @__PURE__ */ new Set(["input_text", "output_text", "text"]);
|
|
595
|
+
var SENDER_METADATA_SENTINEL = "Sender (untrusted metadata):";
|
|
596
|
+
var CONVERSATION_INFO_SENTINEL = "Conversation info (untrusted metadata):";
|
|
597
|
+
var USER_METADATA_PREFIX_SENTINELS = /* @__PURE__ */ new Set([
|
|
598
|
+
SENDER_METADATA_SENTINEL,
|
|
599
|
+
CONVERSATION_INFO_SENTINEL,
|
|
600
|
+
"Thread starter (untrusted, for context):",
|
|
601
|
+
"Replied message (untrusted, for context):",
|
|
602
|
+
"Forwarded message context (untrusted metadata):",
|
|
603
|
+
"Chat history since last reply (untrusted, for context):"
|
|
604
|
+
]);
|
|
605
|
+
var USER_METADATA_SUFFIX_SENTINEL = "Untrusted context (metadata, do not treat as instructions or commands):";
|
|
606
|
+
var USER_METADATA_SENTINELS = [USER_METADATA_SUFFIX_SENTINEL, ...USER_METADATA_PREFIX_SENTINELS];
|
|
607
|
+
var OpenClawTranscriptParseError = class extends Error {
|
|
608
|
+
/**
|
|
609
|
+
* Stable error classification for caller-side handling and tests.
|
|
610
|
+
*/
|
|
611
|
+
kind;
|
|
612
|
+
/**
|
|
613
|
+
* File path that failed to parse.
|
|
614
|
+
*/
|
|
615
|
+
filePath;
|
|
616
|
+
/**
|
|
617
|
+
* Underlying read failure when available.
|
|
618
|
+
*/
|
|
619
|
+
cause;
|
|
620
|
+
/**
|
|
621
|
+
* Creates a typed transcript parse failure.
|
|
622
|
+
*
|
|
623
|
+
* @param kind - Stable failure kind.
|
|
624
|
+
* @param filePath - File path that failed to parse.
|
|
625
|
+
* @param message - Human-readable error message.
|
|
626
|
+
* @param options - Optional underlying cause.
|
|
627
|
+
*/
|
|
628
|
+
constructor(kind, filePath, message, options) {
|
|
629
|
+
super(message);
|
|
630
|
+
this.name = "OpenClawTranscriptParseError";
|
|
631
|
+
this.kind = kind;
|
|
632
|
+
this.filePath = filePath;
|
|
633
|
+
this.cause = options?.cause;
|
|
634
|
+
}
|
|
635
|
+
};
|
|
636
|
+
function createParseState() {
|
|
637
|
+
return {
|
|
638
|
+
warnings: [],
|
|
639
|
+
messages: [],
|
|
640
|
+
stats: {
|
|
641
|
+
totalMessageRecords: 0,
|
|
642
|
+
systemDropped: 0,
|
|
643
|
+
base64Dropped: 0,
|
|
644
|
+
skippedRecordTypes: 0,
|
|
645
|
+
toolResultsDropped: 0,
|
|
646
|
+
toolResultsKept: 0
|
|
647
|
+
},
|
|
648
|
+
modelsUsed: [],
|
|
649
|
+
modelsUsedSet: /* @__PURE__ */ new Set(),
|
|
650
|
+
pendingToolCalls: [],
|
|
651
|
+
pendingToolCallsById: /* @__PURE__ */ new Map(),
|
|
652
|
+
detectedSurface: null,
|
|
653
|
+
surfaceDetected: false,
|
|
654
|
+
firstUserRawText: null
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
function toTranscriptDiagnostic(diagnostic) {
|
|
658
|
+
return {
|
|
659
|
+
kind: diagnostic.kind,
|
|
660
|
+
lineNumber: diagnostic.lineNumber,
|
|
661
|
+
message: diagnostic.message
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
function formatTranscriptDiagnosticWarning(diagnostic) {
|
|
665
|
+
return diagnostic.message;
|
|
666
|
+
}
|
|
667
|
+
async function readTranscriptFileStrict(filePath) {
|
|
668
|
+
try {
|
|
669
|
+
return await fs2.readFile(filePath, "utf8");
|
|
670
|
+
} catch (error) {
|
|
671
|
+
if (isFileNotFound(error)) {
|
|
672
|
+
throw new OpenClawTranscriptParseError("missing_file", filePath, `Transcript file not found: ${filePath}`, { cause: error });
|
|
673
|
+
}
|
|
674
|
+
throw new OpenClawTranscriptParseError("unreadable_file", filePath, `Could not read transcript file ${filePath}: ${formatErrorMessage(error)}`, {
|
|
675
|
+
cause: error
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
function extractRawMessageText(content) {
|
|
680
|
+
if (typeof content === "string") {
|
|
681
|
+
return content;
|
|
682
|
+
}
|
|
683
|
+
if (!Array.isArray(content)) {
|
|
684
|
+
return "";
|
|
685
|
+
}
|
|
686
|
+
const blocks = [];
|
|
687
|
+
for (const block of content) {
|
|
688
|
+
if (typeof block === "string") {
|
|
689
|
+
blocks.push(block);
|
|
690
|
+
continue;
|
|
691
|
+
}
|
|
692
|
+
const record = asRecord(block);
|
|
693
|
+
if (!record) {
|
|
694
|
+
continue;
|
|
695
|
+
}
|
|
696
|
+
if (typeof record.text === "string") {
|
|
697
|
+
blocks.push(record.text);
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
701
|
+
if (typeof record.content === "string" && RAW_TEXT_BLOCK_TYPES.has(type)) {
|
|
702
|
+
blocks.push(record.content);
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
return blocks.join("\n");
|
|
706
|
+
}
|
|
707
|
+
function stripOpenClawUserMetadata(content) {
|
|
708
|
+
const normalizedText = normalizeMessageText(content);
|
|
709
|
+
if (normalizedText.length === 0) {
|
|
710
|
+
return normalizedText;
|
|
711
|
+
}
|
|
712
|
+
const rawText = extractRawMessageText(content);
|
|
713
|
+
if (rawText.length === 0 || !USER_METADATA_SENTINELS.some((sentinel) => rawText.includes(sentinel))) {
|
|
714
|
+
return normalizedText;
|
|
715
|
+
}
|
|
716
|
+
return normalizeMessageText(stripMetadataBlocks(rawText));
|
|
717
|
+
}
|
|
718
|
+
function stripMetadataBlocks(text) {
|
|
719
|
+
const lines = text.split(/\r?\n/u);
|
|
720
|
+
let index = 0;
|
|
721
|
+
while (index < lines.length) {
|
|
722
|
+
while (index < lines.length && lines[index]?.trim().length === 0) {
|
|
723
|
+
index += 1;
|
|
724
|
+
}
|
|
725
|
+
if (index >= lines.length) {
|
|
726
|
+
return "";
|
|
727
|
+
}
|
|
728
|
+
const line = lines[index]?.trim();
|
|
729
|
+
if (line === USER_METADATA_SUFFIX_SENTINEL) {
|
|
730
|
+
return "";
|
|
731
|
+
}
|
|
732
|
+
if (!line || !USER_METADATA_PREFIX_SENTINELS.has(line)) {
|
|
733
|
+
break;
|
|
734
|
+
}
|
|
735
|
+
const nextIndex = skipMetadataJsonFence(lines, index);
|
|
736
|
+
if (nextIndex === index) {
|
|
737
|
+
break;
|
|
738
|
+
}
|
|
739
|
+
index = nextIndex;
|
|
740
|
+
}
|
|
741
|
+
const suffixIndex = lines.findIndex((line, lineIndex) => lineIndex >= index && line.trim() === USER_METADATA_SUFFIX_SENTINEL);
|
|
742
|
+
const body = suffixIndex >= 0 ? lines.slice(index, suffixIndex) : lines.slice(index);
|
|
743
|
+
return body.join("\n").trim();
|
|
744
|
+
}
|
|
745
|
+
function skipMetadataJsonFence(lines, startIndex) {
|
|
746
|
+
let index = startIndex + 1;
|
|
747
|
+
while (index < lines.length && lines[index]?.trim().length === 0) {
|
|
748
|
+
index += 1;
|
|
749
|
+
}
|
|
750
|
+
if (index >= lines.length || !/^```(?:json)?\s*$/iu.test(lines[index]?.trim() ?? "")) {
|
|
751
|
+
return startIndex;
|
|
752
|
+
}
|
|
753
|
+
index += 1;
|
|
754
|
+
while (index < lines.length && !/^```\s*$/u.test(lines[index]?.trim() ?? "")) {
|
|
755
|
+
index += 1;
|
|
756
|
+
}
|
|
757
|
+
if (index >= lines.length) {
|
|
758
|
+
return startIndex;
|
|
759
|
+
}
|
|
760
|
+
index += 1;
|
|
761
|
+
while (index < lines.length && lines[index]?.trim().length === 0) {
|
|
762
|
+
index += 1;
|
|
763
|
+
}
|
|
764
|
+
return index;
|
|
765
|
+
}
|
|
766
|
+
function addModelUsed(state, value) {
|
|
767
|
+
const modelId = getString(value);
|
|
768
|
+
if (!modelId || state.modelsUsedSet.has(modelId)) {
|
|
769
|
+
return;
|
|
770
|
+
}
|
|
771
|
+
state.modelsUsedSet.add(modelId);
|
|
772
|
+
state.modelsUsed.push(modelId);
|
|
773
|
+
}
|
|
774
|
+
function setDetectedSurface(state, surface) {
|
|
775
|
+
if (state.surfaceDetected || !surface) {
|
|
776
|
+
return;
|
|
777
|
+
}
|
|
778
|
+
state.detectedSurface = surface;
|
|
779
|
+
state.surfaceDetected = true;
|
|
780
|
+
}
|
|
781
|
+
function readInboundSurface(record) {
|
|
782
|
+
const inboundMeta = asRecord(record.inbound_meta);
|
|
783
|
+
const surface = getString(inboundMeta?.surface)?.trim().toLowerCase();
|
|
784
|
+
return surface || null;
|
|
785
|
+
}
|
|
786
|
+
function extractMetadataPayload(rawText, sentinel) {
|
|
787
|
+
const lines = rawText.split(/\r?\n/u);
|
|
788
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
789
|
+
if (lines[index]?.trim() !== sentinel) {
|
|
790
|
+
continue;
|
|
791
|
+
}
|
|
792
|
+
let fenceIndex = index + 1;
|
|
793
|
+
while (fenceIndex < lines.length && lines[fenceIndex]?.trim().length === 0) {
|
|
794
|
+
fenceIndex += 1;
|
|
795
|
+
}
|
|
796
|
+
if (fenceIndex >= lines.length || !/^```(?:json)?\s*$/iu.test(lines[fenceIndex]?.trim() ?? "")) {
|
|
797
|
+
continue;
|
|
798
|
+
}
|
|
799
|
+
fenceIndex += 1;
|
|
800
|
+
const jsonLines = [];
|
|
801
|
+
while (fenceIndex < lines.length && !/^```\s*$/u.test(lines[fenceIndex]?.trim() ?? "")) {
|
|
802
|
+
jsonLines.push(lines[fenceIndex] ?? "");
|
|
803
|
+
fenceIndex += 1;
|
|
804
|
+
}
|
|
805
|
+
if (fenceIndex >= lines.length) {
|
|
806
|
+
continue;
|
|
807
|
+
}
|
|
808
|
+
try {
|
|
809
|
+
const parsed = JSON.parse(jsonLines.join("\n").trim());
|
|
810
|
+
return asRecord(parsed);
|
|
811
|
+
} catch {
|
|
812
|
+
continue;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
return null;
|
|
816
|
+
}
|
|
817
|
+
function mapKnownSurface(value) {
|
|
818
|
+
if (!value) {
|
|
819
|
+
return null;
|
|
820
|
+
}
|
|
821
|
+
if (value.includes("telegram")) {
|
|
822
|
+
return "telegram";
|
|
823
|
+
}
|
|
824
|
+
if (value.includes("signal")) {
|
|
825
|
+
return "signal";
|
|
826
|
+
}
|
|
827
|
+
if (value.includes("discord")) {
|
|
828
|
+
return "discord";
|
|
829
|
+
}
|
|
830
|
+
if (value.includes("openclaw-tui")) {
|
|
831
|
+
return "tui";
|
|
832
|
+
}
|
|
833
|
+
if (value.includes("gateway-client") || value.includes("openclaw-control-ui") || value.includes("webchat")) {
|
|
834
|
+
return "webchat";
|
|
835
|
+
}
|
|
836
|
+
return null;
|
|
837
|
+
}
|
|
838
|
+
function extractSenderSurface(rawText) {
|
|
839
|
+
const payload = extractMetadataPayload(rawText, SENDER_METADATA_SENTINEL);
|
|
840
|
+
if (!payload) {
|
|
841
|
+
return null;
|
|
842
|
+
}
|
|
843
|
+
const label = getString(payload.label)?.trim().toLowerCase() ?? getString(payload.id)?.trim().toLowerCase() ?? "";
|
|
844
|
+
return mapKnownSurface(label);
|
|
845
|
+
}
|
|
846
|
+
function extractConversationInfoSurface(rawText) {
|
|
847
|
+
const payload = extractMetadataPayload(rawText, CONVERSATION_INFO_SENTINEL);
|
|
848
|
+
if (!payload) {
|
|
849
|
+
return null;
|
|
850
|
+
}
|
|
851
|
+
const senderId = getString(payload.sender_id)?.trim().toLowerCase() ?? "";
|
|
852
|
+
return mapKnownSurface(senderId);
|
|
853
|
+
}
|
|
854
|
+
function inferSurfaceFromContent(firstUserRawText) {
|
|
855
|
+
const normalized = firstUserRawText?.trim().toLowerCase() ?? "";
|
|
856
|
+
if (!normalized) {
|
|
857
|
+
return null;
|
|
858
|
+
}
|
|
859
|
+
if (normalized.includes("[subagent context]")) {
|
|
860
|
+
return "subagent";
|
|
861
|
+
}
|
|
862
|
+
if (normalized.includes("heartbeat.md")) {
|
|
863
|
+
return "heartbeat";
|
|
864
|
+
}
|
|
865
|
+
return null;
|
|
866
|
+
}
|
|
867
|
+
function resolveToolContext(state, message) {
|
|
868
|
+
const toolCallId = getString(message.toolCallId) ?? getString(message.tool_call_id) ?? getString(message.call_id) ?? getString(message.id);
|
|
869
|
+
if (toolCallId && state.pendingToolCallsById.has(toolCallId)) {
|
|
870
|
+
const context = state.pendingToolCallsById.get(toolCallId) ?? null;
|
|
871
|
+
state.pendingToolCallsById.delete(toolCallId);
|
|
872
|
+
if (context) {
|
|
873
|
+
const queuedIndex = state.pendingToolCalls.findIndex((toolCall) => toolCall.id === toolCallId);
|
|
874
|
+
if (queuedIndex >= 0) {
|
|
875
|
+
state.pendingToolCalls.splice(queuedIndex, 1);
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
return context;
|
|
879
|
+
}
|
|
880
|
+
return state.pendingToolCalls.shift() ?? null;
|
|
881
|
+
}
|
|
882
|
+
function handleMessageRecord(state, record, message) {
|
|
883
|
+
state.stats.totalMessageRecords += 1;
|
|
884
|
+
const role = normalizeOpenClawRole(message.role);
|
|
885
|
+
if (!state.surfaceDetected) {
|
|
886
|
+
setDetectedSurface(state, readInboundSurface(message));
|
|
887
|
+
}
|
|
888
|
+
if (!state.surfaceDetected && role === "user") {
|
|
889
|
+
const rawText = extractRawMessageText(message.content);
|
|
890
|
+
if (state.firstUserRawText === null) {
|
|
891
|
+
state.firstUserRawText = rawText;
|
|
892
|
+
}
|
|
893
|
+
setDetectedSurface(state, extractSenderSurface(rawText));
|
|
894
|
+
if (!state.surfaceDetected) {
|
|
895
|
+
setDetectedSurface(state, extractConversationInfoSurface(rawText));
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
if (role === "system") {
|
|
899
|
+
state.stats.systemDropped += 1;
|
|
900
|
+
return "known_skip";
|
|
901
|
+
}
|
|
902
|
+
const timestamp = extractTimestamp(record) ?? extractTimestamp(message);
|
|
903
|
+
if (role === "user") {
|
|
904
|
+
const extractedLabel = extractConversationLabel(message.content);
|
|
905
|
+
if (extractedLabel) {
|
|
906
|
+
state.sessionLabel = extractedLabel;
|
|
907
|
+
}
|
|
908
|
+
const text = stripOpenClawUserMetadata(message.content);
|
|
909
|
+
if (!text) {
|
|
910
|
+
return "known_skip";
|
|
911
|
+
}
|
|
912
|
+
if (isPureBase64(text)) {
|
|
913
|
+
state.stats.base64Dropped += 1;
|
|
914
|
+
return "known_skip";
|
|
915
|
+
}
|
|
916
|
+
pushMessage(state.messages, "user", text, timestamp);
|
|
917
|
+
return "accepted";
|
|
918
|
+
}
|
|
919
|
+
if (role === "assistant") {
|
|
920
|
+
const toolCalls = extractToolCallBlocks(message.content);
|
|
921
|
+
for (const toolCall of toolCalls) {
|
|
922
|
+
state.pendingToolCalls.push(toolCall);
|
|
923
|
+
if (toolCall.id) {
|
|
924
|
+
state.pendingToolCallsById.set(toolCall.id, toolCall);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
const assistantText = [...extractAssistantTextParts(message.content), ...toolCalls.map((toolCall) => summarizeToolCall(toolCall))].join(" ").trim();
|
|
928
|
+
addModelUsed(state, message.model);
|
|
929
|
+
if (!assistantText) {
|
|
930
|
+
return "known_skip";
|
|
931
|
+
}
|
|
932
|
+
if (isPureBase64(assistantText)) {
|
|
933
|
+
state.stats.base64Dropped += 1;
|
|
934
|
+
return "known_skip";
|
|
935
|
+
}
|
|
936
|
+
pushMessage(state.messages, "assistant", truncateWithMarker(assistantText, 5e3), timestamp);
|
|
937
|
+
return "accepted";
|
|
938
|
+
}
|
|
939
|
+
if (role !== "toolResult") {
|
|
940
|
+
return "structurally_invalid";
|
|
941
|
+
}
|
|
942
|
+
const toolContext = resolveToolContext(state, message);
|
|
943
|
+
const toolName = getString(message.name) ?? getString(message.tool) ?? getString(record.name) ?? getString(record.tool) ?? toolContext?.name;
|
|
944
|
+
const toolArgs = toolContext?.args ?? {};
|
|
945
|
+
const toolText = normalizeMessageText(message.content);
|
|
946
|
+
if (!toolText) {
|
|
947
|
+
return "known_skip";
|
|
948
|
+
}
|
|
949
|
+
if (isPureBase64(toolText)) {
|
|
950
|
+
state.stats.base64Dropped += 1;
|
|
951
|
+
return "known_skip";
|
|
952
|
+
}
|
|
953
|
+
const decision = shouldKeepToolResult(toolName, toolText, TOOL_RESULT_POLICY);
|
|
954
|
+
if (decision.keep) {
|
|
955
|
+
state.stats.toolResultsKept += 1;
|
|
956
|
+
pushMessage(state.messages, "assistant", decision.truncateTo ? truncateWithMarker(toolText, decision.truncateTo) : toolText, timestamp);
|
|
957
|
+
return "accepted";
|
|
958
|
+
}
|
|
959
|
+
state.stats.toolResultsDropped += 1;
|
|
960
|
+
pushMessage(state.messages, "assistant", toolResultPlaceholder(toolName ?? "unknown", toolArgs), timestamp);
|
|
961
|
+
return "accepted";
|
|
962
|
+
}
|
|
963
|
+
function handleRecord(state, record) {
|
|
964
|
+
if (record.type === "session") {
|
|
965
|
+
state.sessionId = getString(record.id) ?? state.sessionId;
|
|
966
|
+
state.sessionTimestamp = extractTimestamp(record) ?? state.sessionTimestamp;
|
|
967
|
+
state.sessionLabel = normalizeSessionLabel(getString(record.conversation_label) ?? "") ?? state.sessionLabel;
|
|
968
|
+
state.workingDirectory = getString(record.cwd) ?? state.workingDirectory;
|
|
969
|
+
addModelUsed(state, record.model);
|
|
970
|
+
if (!state.surfaceDetected) {
|
|
971
|
+
setDetectedSurface(state, readInboundSurface(record));
|
|
972
|
+
}
|
|
973
|
+
return "accepted";
|
|
974
|
+
}
|
|
975
|
+
if (!state.surfaceDetected) {
|
|
976
|
+
setDetectedSurface(state, readInboundSurface(record));
|
|
977
|
+
}
|
|
978
|
+
if (record.type === "model_change") {
|
|
979
|
+
addModelUsed(state, record.modelId);
|
|
980
|
+
state.stats.skippedRecordTypes += 1;
|
|
981
|
+
return "known_skip";
|
|
982
|
+
}
|
|
983
|
+
if (typeof record.type === "string" && SKIPPED_RECORD_TYPES.has(record.type)) {
|
|
984
|
+
state.stats.skippedRecordTypes += 1;
|
|
985
|
+
return "known_skip";
|
|
986
|
+
}
|
|
987
|
+
const message = asRecord(record.message);
|
|
988
|
+
if (!message) {
|
|
989
|
+
return "structurally_invalid";
|
|
990
|
+
}
|
|
991
|
+
return handleMessageRecord(state, record, message);
|
|
992
|
+
}
|
|
993
|
+
function buildFilterWarning(stats) {
|
|
994
|
+
return `Filtered transcript: ${stats.toolResultsDropped} tool results dropped, ${stats.toolResultsKept} kept, ${stats.systemDropped} system dropped, ${stats.base64Dropped} base64 dropped.`;
|
|
995
|
+
}
|
|
996
|
+
function isFileNotFound(error) {
|
|
997
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
998
|
+
}
|
|
999
|
+
function formatErrorMessage(error) {
|
|
1000
|
+
if (error instanceof Error) {
|
|
1001
|
+
return error.message;
|
|
1002
|
+
}
|
|
1003
|
+
return String(error);
|
|
1004
|
+
}
|
|
1005
|
+
var OpenClawTranscriptParser = class {
|
|
1006
|
+
/**
|
|
1007
|
+
* Parses an OpenClaw JSONL transcript file into agenr transcript data.
|
|
1008
|
+
*
|
|
1009
|
+
* @param filePath - Absolute or relative path to the transcript file.
|
|
1010
|
+
* @param options - Optional parser flags for verbose diagnostics.
|
|
1011
|
+
* @returns Parsed transcript messages, warnings, and metadata.
|
|
1012
|
+
*/
|
|
1013
|
+
async parseFile(filePath, options) {
|
|
1014
|
+
const raw = await readTranscriptFileStrict(filePath);
|
|
1015
|
+
const verbose = options?.verbose === true;
|
|
1016
|
+
const state = createParseState();
|
|
1017
|
+
const transcriptHash = createHash("sha256").update(raw).digest("hex");
|
|
1018
|
+
const diagnostics = [];
|
|
1019
|
+
const jsonlResult = parseJsonlLines(raw, (record, lineNumber) => {
|
|
1020
|
+
const outcome = handleRecord(state, record);
|
|
1021
|
+
if (outcome === "structurally_invalid") {
|
|
1022
|
+
diagnostics.push({
|
|
1023
|
+
kind: "structurally_invalid_record",
|
|
1024
|
+
lineNumber,
|
|
1025
|
+
message: `Skipped structurally invalid transcript record on line ${lineNumber}`
|
|
1026
|
+
});
|
|
1027
|
+
}
|
|
1028
|
+
});
|
|
1029
|
+
diagnostics.push(...jsonlResult.diagnostics.map(toTranscriptDiagnostic));
|
|
1030
|
+
state.warnings.push(...diagnostics.map(formatTranscriptDiagnosticWarning));
|
|
1031
|
+
if (!state.surfaceDetected && state.firstUserRawText) {
|
|
1032
|
+
setDetectedSurface(state, inferSurfaceFromContent(state.firstUserRawText));
|
|
1033
|
+
}
|
|
1034
|
+
const fallbackTimestamp = state.messages.length > 0 ? await applyMessageTimestampFallbacks(filePath, state.messages, { sessionTimestamp: state.sessionTimestamp }) : await resolveTimestampFallback(filePath, state.sessionTimestamp);
|
|
1035
|
+
if (verbose) {
|
|
1036
|
+
state.warnings.push(buildFilterWarning(state.stats));
|
|
1037
|
+
}
|
|
1038
|
+
const startedAt = state.sessionTimestamp ?? state.messages[0]?.timestamp ?? fallbackTimestamp;
|
|
1039
|
+
const endedAt = state.messages[state.messages.length - 1]?.timestamp ?? state.sessionTimestamp ?? fallbackTimestamp;
|
|
1040
|
+
const stableSessionId = state.sessionId ?? deriveOpenClawSessionIdFromFilePath(filePath);
|
|
1041
|
+
return {
|
|
1042
|
+
messages: state.messages,
|
|
1043
|
+
warnings: state.warnings,
|
|
1044
|
+
metadata: {
|
|
1045
|
+
sessionId: state.sessionId,
|
|
1046
|
+
sessionLabel: state.sessionLabel,
|
|
1047
|
+
startedAt,
|
|
1048
|
+
endedAt,
|
|
1049
|
+
messageCount: state.messages.length,
|
|
1050
|
+
transcriptHash,
|
|
1051
|
+
modelsUsed: state.modelsUsed.length > 0 ? state.modelsUsed : void 0,
|
|
1052
|
+
reconstructedSurface: state.detectedSurface,
|
|
1053
|
+
surfaceReconstructionSource: state.surfaceDetected ? "reconstructed" : "none",
|
|
1054
|
+
sourceIdentity: stableSessionId ? `openclaw-session:${stableSessionId}` : void 0,
|
|
1055
|
+
sourceIdentityKind: stableSessionId ? "openclaw_session" : void 0,
|
|
1056
|
+
workingDirectory: state.workingDirectory
|
|
1057
|
+
}
|
|
1058
|
+
};
|
|
1059
|
+
}
|
|
1060
|
+
};
|
|
1061
|
+
var openClawTranscriptParser = new OpenClawTranscriptParser();
|
|
1062
|
+
|
|
1063
|
+
// src/adapters/db/openclaw-repository.ts
|
|
1064
|
+
var ZERO_VECTOR = JSON.stringify(Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0));
|
|
1065
|
+
function createOpenClawRepository(executor, options = {}) {
|
|
1066
|
+
return {
|
|
1067
|
+
listCoreEntries: async (limit) => listCoreEntries(executor, limit),
|
|
1068
|
+
findEntryBySubject: async (subject) => findEntryBySubject(executor, subject),
|
|
1069
|
+
findMostRecentEntry: async () => findMostRecentEntry(executor),
|
|
1070
|
+
getEntryTrace: async (entryId) => getEntryTrace(executor, entryId, options.claimSlotPolicyConfig),
|
|
1071
|
+
getMemoryStatusSnapshot: async () => getMemoryStatusSnapshot(executor),
|
|
1072
|
+
probeVectorAvailability: async () => probeVectorAvailability(executor)
|
|
1073
|
+
};
|
|
1074
|
+
}
|
|
1075
|
+
async function listCoreEntries(executor, limit) {
|
|
1076
|
+
if (limit <= 0) {
|
|
1077
|
+
return [];
|
|
1078
|
+
}
|
|
1079
|
+
const result = await executor.execute({
|
|
1080
|
+
sql: `
|
|
1081
|
+
SELECT
|
|
1082
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1083
|
+
FROM entries
|
|
1084
|
+
WHERE ${buildActiveEntryClause()}
|
|
1085
|
+
AND expiry = 'core'
|
|
1086
|
+
ORDER BY importance DESC, created_at DESC
|
|
1087
|
+
LIMIT ?
|
|
1088
|
+
`,
|
|
1089
|
+
args: [limit]
|
|
1090
|
+
});
|
|
1091
|
+
return result.rows.map((row) => mapEntryRow(row));
|
|
1092
|
+
}
|
|
1093
|
+
async function findEntryBySubject(executor, subject) {
|
|
1094
|
+
const normalizedSubject = subject.trim();
|
|
1095
|
+
if (normalizedSubject.length === 0) {
|
|
1096
|
+
return null;
|
|
1097
|
+
}
|
|
1098
|
+
const result = await executor.execute({
|
|
1099
|
+
sql: `
|
|
1100
|
+
SELECT
|
|
1101
|
+
${ENTRY_SELECT_COLUMNS},
|
|
1102
|
+
CASE
|
|
1103
|
+
WHEN lower(subject) = lower(?) THEN 0
|
|
1104
|
+
WHEN lower(subject) LIKE lower(?) THEN 1
|
|
1105
|
+
ELSE 2
|
|
1106
|
+
END AS match_rank
|
|
1107
|
+
FROM entries
|
|
1108
|
+
WHERE lower(subject) = lower(?)
|
|
1109
|
+
OR lower(subject) LIKE lower(?)
|
|
1110
|
+
ORDER BY match_rank ASC, created_at DESC
|
|
1111
|
+
LIMIT 1
|
|
1112
|
+
`,
|
|
1113
|
+
args: [normalizedSubject, `%${normalizedSubject}%`, normalizedSubject, `%${normalizedSubject}%`]
|
|
1114
|
+
});
|
|
1115
|
+
const row = result.rows[0];
|
|
1116
|
+
return row ? mapEntryRow(row) : null;
|
|
1117
|
+
}
|
|
1118
|
+
async function findMostRecentEntry(executor) {
|
|
1119
|
+
const result = await executor.execute({
|
|
1120
|
+
sql: `
|
|
1121
|
+
SELECT
|
|
1122
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1123
|
+
FROM entries
|
|
1124
|
+
ORDER BY created_at DESC
|
|
1125
|
+
LIMIT 1
|
|
1126
|
+
`
|
|
1127
|
+
});
|
|
1128
|
+
const row = result.rows[0];
|
|
1129
|
+
return row ? mapEntryRow(row) : null;
|
|
1130
|
+
}
|
|
1131
|
+
async function getEntryTrace(executor, entryId, claimSlotPolicyConfig) {
|
|
1132
|
+
const entry = await getEntryByIdIncludingInactive(executor, entryId);
|
|
1133
|
+
if (!entry) {
|
|
1134
|
+
return null;
|
|
1135
|
+
}
|
|
1136
|
+
const [supersededBy, supersedes, claimFamily, recallEvents] = await Promise.all([
|
|
1137
|
+
entry.superseded_by ? getEntryByIdIncludingInactive(executor, entry.superseded_by) : Promise.resolve(null),
|
|
1138
|
+
listSupersededEntries(executor, entry.id),
|
|
1139
|
+
entry.claim_key ? getClaimFamily(executor, entry.claim_key, claimSlotPolicyConfig) : Promise.resolve(void 0),
|
|
1140
|
+
listRecallEvents(executor, entry.id)
|
|
1141
|
+
]);
|
|
1142
|
+
return {
|
|
1143
|
+
entry,
|
|
1144
|
+
...supersededBy ? { supersededBy } : {},
|
|
1145
|
+
supersedes,
|
|
1146
|
+
...claimFamily ? { claimFamily } : {},
|
|
1147
|
+
recallEvents
|
|
1148
|
+
};
|
|
1149
|
+
}
|
|
1150
|
+
async function getMemoryStatusSnapshot(executor) {
|
|
1151
|
+
const result = await executor.execute({
|
|
1152
|
+
sql: `
|
|
1153
|
+
SELECT
|
|
1154
|
+
COUNT(*) AS active_entries,
|
|
1155
|
+
SUM(CASE WHEN expiry = 'core' THEN 1 ELSE 0 END) AS core_entries,
|
|
1156
|
+
COUNT(DISTINCT source_file) AS source_files
|
|
1157
|
+
FROM entries
|
|
1158
|
+
WHERE ${buildActiveEntryClause()}
|
|
1159
|
+
`
|
|
1160
|
+
});
|
|
1161
|
+
const row = result.rows[0];
|
|
1162
|
+
if (!row) {
|
|
1163
|
+
return {
|
|
1164
|
+
activeEntries: 0,
|
|
1165
|
+
coreEntries: 0,
|
|
1166
|
+
sourceFiles: 0
|
|
1167
|
+
};
|
|
1168
|
+
}
|
|
1169
|
+
return {
|
|
1170
|
+
activeEntries: readNumber(row, "active_entries", 0),
|
|
1171
|
+
coreEntries: readNumber(row, "core_entries", 0),
|
|
1172
|
+
sourceFiles: readNumber(row, "source_files", 0)
|
|
1173
|
+
};
|
|
1174
|
+
}
|
|
1175
|
+
async function probeVectorAvailability(executor) {
|
|
1176
|
+
try {
|
|
1177
|
+
await executor.execute({
|
|
1178
|
+
sql: `
|
|
1179
|
+
SELECT COUNT(*) AS matches
|
|
1180
|
+
FROM vector_top_k('${VECTOR_INDEX_NAME}', vector32(?), ?) AS matches
|
|
1181
|
+
`,
|
|
1182
|
+
args: [ZERO_VECTOR, 1]
|
|
1183
|
+
});
|
|
1184
|
+
return true;
|
|
1185
|
+
} catch {
|
|
1186
|
+
return false;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
async function getEntryByIdIncludingInactive(executor, entryId) {
|
|
1190
|
+
const normalizedId = entryId.trim();
|
|
1191
|
+
if (normalizedId.length === 0) {
|
|
1192
|
+
return null;
|
|
1193
|
+
}
|
|
1194
|
+
const result = await executor.execute({
|
|
1195
|
+
sql: `
|
|
1196
|
+
SELECT
|
|
1197
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1198
|
+
FROM entries
|
|
1199
|
+
WHERE id = ?
|
|
1200
|
+
LIMIT 1
|
|
1201
|
+
`,
|
|
1202
|
+
args: [normalizedId]
|
|
1203
|
+
});
|
|
1204
|
+
const row = result.rows[0];
|
|
1205
|
+
return row ? mapEntryRow(row) : null;
|
|
1206
|
+
}
|
|
1207
|
+
async function listSupersededEntries(executor, entryId) {
|
|
1208
|
+
const result = await executor.execute({
|
|
1209
|
+
sql: `
|
|
1210
|
+
SELECT
|
|
1211
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1212
|
+
FROM entries
|
|
1213
|
+
WHERE superseded_by = ?
|
|
1214
|
+
ORDER BY created_at DESC
|
|
1215
|
+
`,
|
|
1216
|
+
args: [entryId]
|
|
1217
|
+
});
|
|
1218
|
+
return result.rows.map((row) => mapEntryRow(row));
|
|
1219
|
+
}
|
|
1220
|
+
async function getClaimFamily(executor, claimKey, claimSlotPolicyConfig) {
|
|
1221
|
+
const normalizedClaimKey = claimKey.trim();
|
|
1222
|
+
if (normalizedClaimKey.length === 0) {
|
|
1223
|
+
return void 0;
|
|
1224
|
+
}
|
|
1225
|
+
const result = await executor.execute({
|
|
1226
|
+
sql: `
|
|
1227
|
+
SELECT
|
|
1228
|
+
${ENTRY_SELECT_COLUMNS}
|
|
1229
|
+
FROM entries
|
|
1230
|
+
WHERE claim_key = ?
|
|
1231
|
+
ORDER BY created_at ASC, id ASC
|
|
1232
|
+
`,
|
|
1233
|
+
args: [normalizedClaimKey]
|
|
1234
|
+
});
|
|
1235
|
+
const entries = result.rows.map((row) => mapEntryRow(row));
|
|
1236
|
+
const slotPolicy = resolveClaimSlotPolicy(normalizedClaimKey, claimSlotPolicyConfig);
|
|
1237
|
+
return {
|
|
1238
|
+
claimKey: normalizedClaimKey,
|
|
1239
|
+
slotPolicy: slotPolicy.policy,
|
|
1240
|
+
slotPolicyReason: slotPolicy.reason,
|
|
1241
|
+
entries
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
async function listRecallEvents(executor, entryId) {
|
|
1245
|
+
const result = await executor.execute({
|
|
1246
|
+
sql: `
|
|
1247
|
+
SELECT
|
|
1248
|
+
query,
|
|
1249
|
+
session_key,
|
|
1250
|
+
recalled_at
|
|
1251
|
+
FROM recall_events
|
|
1252
|
+
WHERE entry_id = ?
|
|
1253
|
+
ORDER BY recalled_at DESC
|
|
1254
|
+
LIMIT 10
|
|
1255
|
+
`,
|
|
1256
|
+
args: [entryId]
|
|
1257
|
+
});
|
|
1258
|
+
return result.rows.map((row) => ({
|
|
1259
|
+
query: readOptionalString(row, "query"),
|
|
1260
|
+
sessionKey: readOptionalString(row, "session_key"),
|
|
1261
|
+
recalledAt: readRequiredString(row, "recalled_at")
|
|
1262
|
+
}));
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// src/core/store/pipeline.ts
|
|
1266
|
+
import { randomUUID } from "crypto";
|
|
1267
|
+
|
|
1268
|
+
// src/core/supersession.ts
|
|
1269
|
+
function validateSupersessionRules(oldEntry, newEntry) {
|
|
1270
|
+
if (oldEntry.type !== newEntry.type) {
|
|
1271
|
+
return {
|
|
1272
|
+
ok: false,
|
|
1273
|
+
reason: "type_mismatch"
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1276
|
+
if (oldEntry.type === "milestone") {
|
|
1277
|
+
return {
|
|
1278
|
+
ok: false,
|
|
1279
|
+
reason: "milestone"
|
|
1280
|
+
};
|
|
1281
|
+
}
|
|
1282
|
+
if (oldEntry.expiry === "core") {
|
|
1283
|
+
return {
|
|
1284
|
+
ok: false,
|
|
1285
|
+
reason: "core_expiry"
|
|
1286
|
+
};
|
|
1287
|
+
}
|
|
1288
|
+
return {
|
|
1289
|
+
ok: true
|
|
1290
|
+
};
|
|
1291
|
+
}
|
|
1292
|
+
function describeSupersessionRuleFailure(reason) {
|
|
1293
|
+
switch (reason) {
|
|
1294
|
+
case "type_mismatch":
|
|
1295
|
+
return "Supersession requires both entries to have the same type.";
|
|
1296
|
+
case "milestone":
|
|
1297
|
+
return "Milestone entries are never superseded automatically.";
|
|
1298
|
+
case "core_expiry":
|
|
1299
|
+
return "Core-expiry entries are never superseded automatically.";
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// src/core/claim-key-entity-family.ts
|
|
1304
|
+
var ENTITY_FAMILY_GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
|
|
1305
|
+
"a",
|
|
1306
|
+
"an",
|
|
1307
|
+
"and",
|
|
1308
|
+
"are",
|
|
1309
|
+
"as",
|
|
1310
|
+
"at",
|
|
1311
|
+
"be",
|
|
1312
|
+
"by",
|
|
1313
|
+
"for",
|
|
1314
|
+
"from",
|
|
1315
|
+
"in",
|
|
1316
|
+
"into",
|
|
1317
|
+
"is",
|
|
1318
|
+
"it",
|
|
1319
|
+
"of",
|
|
1320
|
+
"on",
|
|
1321
|
+
"or",
|
|
1322
|
+
"that",
|
|
1323
|
+
"the",
|
|
1324
|
+
"their",
|
|
1325
|
+
"this",
|
|
1326
|
+
"to",
|
|
1327
|
+
"was",
|
|
1328
|
+
"with"
|
|
1329
|
+
]);
|
|
1330
|
+
var MAX_ATTRIBUTE_BUCKET_SIZE = 12;
|
|
1331
|
+
var MAX_EVIDENCE_VALUES = 6;
|
|
1332
|
+
var CANONICAL_SELECTION_MARGIN = 3;
|
|
1333
|
+
var SINGLETON_ALIAS_MAX_FAMILY_SIZE = 2;
|
|
1334
|
+
var SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT = 3;
|
|
1335
|
+
var SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA = 0.05;
|
|
1336
|
+
var SINGLETON_ALIAS_SCOPE_TOKENS = /* @__PURE__ */ new Set([
|
|
1337
|
+
"agent",
|
|
1338
|
+
"app",
|
|
1339
|
+
"branch",
|
|
1340
|
+
"build",
|
|
1341
|
+
"cluster",
|
|
1342
|
+
"daemon",
|
|
1343
|
+
"device",
|
|
1344
|
+
"env",
|
|
1345
|
+
"environment",
|
|
1346
|
+
"gateway",
|
|
1347
|
+
"host",
|
|
1348
|
+
"machine",
|
|
1349
|
+
"node",
|
|
1350
|
+
"plugin",
|
|
1351
|
+
"project",
|
|
1352
|
+
"repo",
|
|
1353
|
+
"repository",
|
|
1354
|
+
"server",
|
|
1355
|
+
"service",
|
|
1356
|
+
"session",
|
|
1357
|
+
"system",
|
|
1358
|
+
"workspace"
|
|
1359
|
+
]);
|
|
1360
|
+
function detectClaimKeyEntityFamilyCandidates(entries) {
|
|
1361
|
+
const profiles = buildTrustedClaimKeyEntityProfiles(entries);
|
|
1362
|
+
if (profiles.size < 2) {
|
|
1363
|
+
return [];
|
|
1364
|
+
}
|
|
1365
|
+
const pairSupport = buildPairSupport(profiles);
|
|
1366
|
+
if (pairSupport.length === 0) {
|
|
1367
|
+
return [];
|
|
1368
|
+
}
|
|
1369
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
1370
|
+
for (const support of pairSupport) {
|
|
1371
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
1372
|
+
getOrCreateSet(adjacency, leftEntity).add(rightEntity);
|
|
1373
|
+
getOrCreateSet(adjacency, rightEntity).add(leftEntity);
|
|
1374
|
+
}
|
|
1375
|
+
const visited = /* @__PURE__ */ new Set();
|
|
1376
|
+
const families = [];
|
|
1377
|
+
for (const entityPrefix of adjacency.keys()) {
|
|
1378
|
+
if (visited.has(entityPrefix)) {
|
|
1379
|
+
continue;
|
|
1380
|
+
}
|
|
1381
|
+
const component = collectConnectedEntityComponent(entityPrefix, adjacency, visited);
|
|
1382
|
+
if (component.length < 2) {
|
|
1383
|
+
continue;
|
|
1384
|
+
}
|
|
1385
|
+
const componentSet = new Set(component);
|
|
1386
|
+
const componentSupport = pairSupport.filter((support) => {
|
|
1387
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
1388
|
+
return componentSet.has(leftEntity) && componentSet.has(rightEntity);
|
|
1389
|
+
});
|
|
1390
|
+
const canonicalSelection = selectCanonicalEntityPrefix(component, componentSupport, profiles);
|
|
1391
|
+
const canonicalEntityPrefix = canonicalSelection.canonicalEntityPrefix;
|
|
1392
|
+
const autoConverge = canonicalEntityPrefix !== null && component.filter((entity) => entity !== canonicalEntityPrefix).every((entity) => {
|
|
1393
|
+
const support = findPairSupport(componentSupport, canonicalEntityPrefix, entity);
|
|
1394
|
+
return support?.autoSafe === true;
|
|
1395
|
+
});
|
|
1396
|
+
const componentProfiles = component.map((entity) => profiles.get(entity)).filter((profile) => Boolean(profile));
|
|
1397
|
+
const entryIds = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.entryIds]));
|
|
1398
|
+
const claimKeys = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.claimKeys]));
|
|
1399
|
+
const confidence = componentSupport.length > 0 ? Math.max(...componentSupport.map((support) => support.confidence)) : 0.75;
|
|
1400
|
+
families.push({
|
|
1401
|
+
entityPrefixes: [...component].sort((left, right) => left.localeCompare(right)),
|
|
1402
|
+
entryIds,
|
|
1403
|
+
claimKeys,
|
|
1404
|
+
canonicalEntityPrefix,
|
|
1405
|
+
canonicalSelectionReasons: canonicalSelection.reasons,
|
|
1406
|
+
confidence,
|
|
1407
|
+
autoConverge,
|
|
1408
|
+
unresolvedReason: canonicalSelection.unresolvedReason ?? (autoConverge ? null : "Entity-family evidence is strong enough to stage, but not every alias mapping is low-risk."),
|
|
1409
|
+
pairSupport: componentSupport.sort((left, right) => {
|
|
1410
|
+
const leftKey = left.entityPrefixes.join("::");
|
|
1411
|
+
const rightKey = right.entityPrefixes.join("::");
|
|
1412
|
+
return leftKey.localeCompare(rightKey);
|
|
1413
|
+
})
|
|
1414
|
+
});
|
|
1415
|
+
}
|
|
1416
|
+
return families.sort((left, right) => {
|
|
1417
|
+
if (right.confidence !== left.confidence) {
|
|
1418
|
+
return right.confidence - left.confidence;
|
|
1419
|
+
}
|
|
1420
|
+
const leftKey = left.entityPrefixes.join("::");
|
|
1421
|
+
const rightKey = right.entityPrefixes.join("::");
|
|
1422
|
+
return leftKey.localeCompare(rightKey);
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1425
|
+
function summarizeClaimKeyEntityPrefixStats(observations) {
|
|
1426
|
+
const counts = /* @__PURE__ */ new Map();
|
|
1427
|
+
for (const observation of observations) {
|
|
1428
|
+
const rawClaimKey = observation.claim_key?.trim();
|
|
1429
|
+
if (!rawClaimKey) {
|
|
1430
|
+
continue;
|
|
1431
|
+
}
|
|
1432
|
+
const inspection = inspectClaimKey(rawClaimKey);
|
|
1433
|
+
if (!inspection.normalized) {
|
|
1434
|
+
continue;
|
|
1435
|
+
}
|
|
1436
|
+
const entityPrefix = inspection.normalized.entity;
|
|
1437
|
+
const existing = counts.get(entityPrefix) ?? {
|
|
1438
|
+
entityPrefix,
|
|
1439
|
+
activeEntryCount: 0,
|
|
1440
|
+
trustedEntryCount: 0,
|
|
1441
|
+
tentativeEntryCount: 0,
|
|
1442
|
+
unresolvedEntryCount: 0,
|
|
1443
|
+
legacyEntryCount: 0,
|
|
1444
|
+
deterministicRepairEntryCount: 0,
|
|
1445
|
+
manualEntryCount: 0,
|
|
1446
|
+
modelEntryCount: 0,
|
|
1447
|
+
jsonRetryEntryCount: 0,
|
|
1448
|
+
surgeonFamilyReuseEntryCount: 0
|
|
1449
|
+
};
|
|
1450
|
+
existing.activeEntryCount += 1;
|
|
1451
|
+
switch (observation.claim_key_status) {
|
|
1452
|
+
case "trusted":
|
|
1453
|
+
existing.trustedEntryCount += 1;
|
|
1454
|
+
break;
|
|
1455
|
+
case "tentative":
|
|
1456
|
+
existing.tentativeEntryCount += 1;
|
|
1457
|
+
break;
|
|
1458
|
+
case "unresolved":
|
|
1459
|
+
existing.unresolvedEntryCount += 1;
|
|
1460
|
+
break;
|
|
1461
|
+
default:
|
|
1462
|
+
existing.legacyEntryCount += 1;
|
|
1463
|
+
break;
|
|
1464
|
+
}
|
|
1465
|
+
switch (observation.claim_key_source) {
|
|
1466
|
+
case "deterministic_repair":
|
|
1467
|
+
existing.deterministicRepairEntryCount += 1;
|
|
1468
|
+
break;
|
|
1469
|
+
case "manual":
|
|
1470
|
+
existing.manualEntryCount += 1;
|
|
1471
|
+
break;
|
|
1472
|
+
case "model":
|
|
1473
|
+
existing.modelEntryCount += 1;
|
|
1474
|
+
break;
|
|
1475
|
+
case "json_retry":
|
|
1476
|
+
existing.jsonRetryEntryCount += 1;
|
|
1477
|
+
break;
|
|
1478
|
+
case "surgeon_family_reuse":
|
|
1479
|
+
existing.surgeonFamilyReuseEntryCount += 1;
|
|
1480
|
+
break;
|
|
1481
|
+
default:
|
|
1482
|
+
break;
|
|
1483
|
+
}
|
|
1484
|
+
counts.set(entityPrefix, existing);
|
|
1485
|
+
}
|
|
1486
|
+
return [...counts.values()].sort((left, right) => {
|
|
1487
|
+
if (right.activeEntryCount !== left.activeEntryCount) {
|
|
1488
|
+
return right.activeEntryCount - left.activeEntryCount;
|
|
1489
|
+
}
|
|
1490
|
+
if (right.trustedEntryCount !== left.trustedEntryCount) {
|
|
1491
|
+
return right.trustedEntryCount - left.trustedEntryCount;
|
|
1492
|
+
}
|
|
1493
|
+
return left.entityPrefix.localeCompare(right.entityPrefix);
|
|
1494
|
+
});
|
|
1495
|
+
}
|
|
1496
|
+
function detectClaimKeySingletonAliasCandidates(observations) {
|
|
1497
|
+
return detectClaimKeySingletonAliasCandidatesFromStats(summarizeClaimKeyEntityPrefixStats(observations));
|
|
1498
|
+
}
|
|
1499
|
+
function detectClaimKeySingletonAliasCandidatesFromStats(stats) {
|
|
1500
|
+
const candidatesByAlias = /* @__PURE__ */ new Map();
|
|
1501
|
+
const dominantFamilies = stats.filter((profile) => profile.trustedEntryCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT);
|
|
1502
|
+
const aliasFamilies = stats.filter((profile) => {
|
|
1503
|
+
return profile.activeEntryCount > 0 && profile.activeEntryCount <= SINGLETON_ALIAS_MAX_FAMILY_SIZE && profile.trustedEntryCount < profile.activeEntryCount && buildLowTrustEntryCount(profile) >= 1;
|
|
1504
|
+
});
|
|
1505
|
+
for (const aliasProfile of aliasFamilies) {
|
|
1506
|
+
for (const dominantProfile of dominantFamilies) {
|
|
1507
|
+
if (aliasProfile.entityPrefix === dominantProfile.entityPrefix || dominantProfile.activeEntryCount <= aliasProfile.activeEntryCount) {
|
|
1508
|
+
continue;
|
|
1509
|
+
}
|
|
1510
|
+
const candidate = evaluateSingletonAliasCandidate(aliasProfile, dominantProfile);
|
|
1511
|
+
if (!candidate) {
|
|
1512
|
+
continue;
|
|
1513
|
+
}
|
|
1514
|
+
const existing = candidatesByAlias.get(aliasProfile.entityPrefix) ?? [];
|
|
1515
|
+
existing.push(candidate);
|
|
1516
|
+
candidatesByAlias.set(aliasProfile.entityPrefix, existing);
|
|
1517
|
+
}
|
|
1518
|
+
}
|
|
1519
|
+
return [...candidatesByAlias.values()].flatMap(selectBestSingletonAliasCandidate).sort((left, right) => right.confidence - left.confidence || left.aliasEntityPrefix.localeCompare(right.aliasEntityPrefix));
|
|
1520
|
+
}
|
|
1521
|
+
function buildTrustedClaimKeyEntityProfiles(entries) {
|
|
1522
|
+
const profiles = /* @__PURE__ */ new Map();
|
|
1523
|
+
for (const entry of entries) {
|
|
1524
|
+
const rawClaimKey = entry.claim_key?.trim();
|
|
1525
|
+
if (!rawClaimKey) {
|
|
1526
|
+
continue;
|
|
1527
|
+
}
|
|
1528
|
+
const inspection = inspectClaimKey(rawClaimKey);
|
|
1529
|
+
if (!inspection.canonical || !inspection.normalized || inspection.suspectReasons.length > 0) {
|
|
1530
|
+
continue;
|
|
1531
|
+
}
|
|
1532
|
+
const entityPrefix = inspection.normalized.entity;
|
|
1533
|
+
const attribute = inspection.normalized.attribute;
|
|
1534
|
+
const profile = getOrCreateProfile(profiles, entityPrefix);
|
|
1535
|
+
profile.entryIds.add(entry.id);
|
|
1536
|
+
profile.claimKeys.add(inspection.normalized.claimKey);
|
|
1537
|
+
profile.attributeSet.add(attribute);
|
|
1538
|
+
const [attributeHead = attribute] = attribute.split("_");
|
|
1539
|
+
if (attributeHead) {
|
|
1540
|
+
profile.attributeHeadSet.add(attributeHead);
|
|
1541
|
+
}
|
|
1542
|
+
for (const tag of entry.tags) {
|
|
1543
|
+
const normalizedTag = normalizeClaimKeySegment(tag);
|
|
1544
|
+
if (normalizedTag) {
|
|
1545
|
+
profile.tags.add(normalizedTag);
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
for (const token of tokenizeGrounding(entry.source_context)) {
|
|
1549
|
+
profile.sourceContextTokens.add(token);
|
|
1550
|
+
}
|
|
1551
|
+
for (const token of tokenizeGrounding(entry.subject)) {
|
|
1552
|
+
profile.subjectTokens.add(token);
|
|
1553
|
+
}
|
|
1554
|
+
profile.entryCount += 1;
|
|
1555
|
+
profile.totalQualityScore += entry.quality_score;
|
|
1556
|
+
}
|
|
1557
|
+
return profiles;
|
|
1558
|
+
}
|
|
1559
|
+
function getOrCreateProfile(profiles, entityPrefix) {
|
|
1560
|
+
const existing = profiles.get(entityPrefix);
|
|
1561
|
+
if (existing) {
|
|
1562
|
+
return existing;
|
|
1563
|
+
}
|
|
1564
|
+
const tokenList = entityPrefix.split("_").filter((token) => token.length > 0);
|
|
1565
|
+
const created = {
|
|
1566
|
+
entityPrefix,
|
|
1567
|
+
entryIds: /* @__PURE__ */ new Set(),
|
|
1568
|
+
claimKeys: /* @__PURE__ */ new Set(),
|
|
1569
|
+
attributeSet: /* @__PURE__ */ new Set(),
|
|
1570
|
+
attributeHeadSet: /* @__PURE__ */ new Set(),
|
|
1571
|
+
tags: /* @__PURE__ */ new Set(),
|
|
1572
|
+
sourceContextTokens: /* @__PURE__ */ new Set(),
|
|
1573
|
+
subjectTokens: /* @__PURE__ */ new Set(),
|
|
1574
|
+
entryCount: 0,
|
|
1575
|
+
totalQualityScore: 0,
|
|
1576
|
+
tokenList,
|
|
1577
|
+
sortedTokenSignature: [...tokenList].sort().join("_"),
|
|
1578
|
+
compactSignature: tokenList.join("")
|
|
1579
|
+
};
|
|
1580
|
+
profiles.set(entityPrefix, created);
|
|
1581
|
+
return created;
|
|
1582
|
+
}
|
|
1583
|
+
function buildPairSupport(profiles) {
|
|
1584
|
+
const candidatePairs = /* @__PURE__ */ new Set();
|
|
1585
|
+
const attributeBuckets = /* @__PURE__ */ new Map();
|
|
1586
|
+
for (const profile of profiles.values()) {
|
|
1587
|
+
for (const attribute of profile.attributeSet) {
|
|
1588
|
+
const bucket = attributeBuckets.get(attribute);
|
|
1589
|
+
if (bucket) {
|
|
1590
|
+
bucket.push(profile.entityPrefix);
|
|
1591
|
+
} else {
|
|
1592
|
+
attributeBuckets.set(attribute, [profile.entityPrefix]);
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
for (const entities of attributeBuckets.values()) {
|
|
1597
|
+
const normalizedEntities = normalizeStringArray(entities);
|
|
1598
|
+
if (normalizedEntities.length < 2 || normalizedEntities.length > MAX_ATTRIBUTE_BUCKET_SIZE) {
|
|
1599
|
+
continue;
|
|
1600
|
+
}
|
|
1601
|
+
for (let index = 0; index < normalizedEntities.length; index += 1) {
|
|
1602
|
+
const leftEntity = normalizedEntities[index];
|
|
1603
|
+
if (!leftEntity) {
|
|
1604
|
+
continue;
|
|
1605
|
+
}
|
|
1606
|
+
for (let peerIndex = index + 1; peerIndex < normalizedEntities.length; peerIndex += 1) {
|
|
1607
|
+
const rightEntity = normalizedEntities[peerIndex];
|
|
1608
|
+
if (!rightEntity) {
|
|
1609
|
+
continue;
|
|
1610
|
+
}
|
|
1611
|
+
candidatePairs.add(buildPairKey(leftEntity, rightEntity));
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
const support = [];
|
|
1616
|
+
for (const pairKey of candidatePairs) {
|
|
1617
|
+
const [leftEntity = "", rightEntity = ""] = pairKey.split("::");
|
|
1618
|
+
const leftProfile = profiles.get(leftEntity);
|
|
1619
|
+
const rightProfile = profiles.get(rightEntity);
|
|
1620
|
+
if (!leftProfile || !rightProfile) {
|
|
1621
|
+
continue;
|
|
1622
|
+
}
|
|
1623
|
+
const pairSupport = evaluateEntityFamilyPairSupport(leftProfile, rightProfile);
|
|
1624
|
+
if (pairSupport) {
|
|
1625
|
+
support.push(pairSupport);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
return support;
|
|
1629
|
+
}
|
|
1630
|
+
function evaluateEntityFamilyPairSupport(leftProfile, rightProfile) {
|
|
1631
|
+
const sharedAttributes = intersectSets(leftProfile.attributeSet, rightProfile.attributeSet);
|
|
1632
|
+
if (sharedAttributes.length === 0) {
|
|
1633
|
+
return null;
|
|
1634
|
+
}
|
|
1635
|
+
const sharedAttributeHeads = intersectSets(leftProfile.attributeHeadSet, rightProfile.attributeHeadSet);
|
|
1636
|
+
const sharedTags = intersectSets(leftProfile.tags, rightProfile.tags);
|
|
1637
|
+
const sharedSourceContextTokens = intersectSets(leftProfile.sourceContextTokens, rightProfile.sourceContextTokens);
|
|
1638
|
+
const sharedSubjectTokens = intersectSets(leftProfile.subjectTokens, rightProfile.subjectTokens);
|
|
1639
|
+
const lexicalRelation = evaluateEntityLexicalRelation(leftProfile, rightProfile);
|
|
1640
|
+
const groundingScore = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0) + (sharedSubjectTokens.length >= 2 ? 1 : 0);
|
|
1641
|
+
const groundingAnchorCount = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0);
|
|
1642
|
+
const qualifies = sharedAttributes.length >= 3 || sharedAttributes.length >= 2 && (lexicalRelation.kind !== null || groundingAnchorCount >= 1) || sharedAttributes.length === 1 && lexicalRelation.kind !== null && groundingAnchorCount >= 1;
|
|
1643
|
+
if (!qualifies) {
|
|
1644
|
+
return null;
|
|
1645
|
+
}
|
|
1646
|
+
const evidence = [
|
|
1647
|
+
{
|
|
1648
|
+
kind: "shared_attribute_overlap",
|
|
1649
|
+
detail: `Shared attributes: ${sharedAttributes.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1650
|
+
}
|
|
1651
|
+
];
|
|
1652
|
+
if (sharedAttributeHeads.length >= 2) {
|
|
1653
|
+
evidence.push({
|
|
1654
|
+
kind: "shared_attribute_head_overlap",
|
|
1655
|
+
detail: `Shared attribute families: ${sharedAttributeHeads.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
if (sharedTags.length > 0) {
|
|
1659
|
+
evidence.push({
|
|
1660
|
+
kind: "shared_tag_grounding",
|
|
1661
|
+
detail: `Shared tags: ${sharedTags.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1662
|
+
});
|
|
1663
|
+
}
|
|
1664
|
+
if (sharedSourceContextTokens.length >= 2) {
|
|
1665
|
+
evidence.push({
|
|
1666
|
+
kind: "shared_source_context_grounding",
|
|
1667
|
+
detail: `Shared source-context tokens: ${sharedSourceContextTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1668
|
+
});
|
|
1669
|
+
}
|
|
1670
|
+
if (sharedSubjectTokens.length >= 2) {
|
|
1671
|
+
evidence.push({
|
|
1672
|
+
kind: "shared_subject_grounding",
|
|
1673
|
+
detail: `Shared subject tokens: ${sharedSubjectTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
1674
|
+
});
|
|
1675
|
+
}
|
|
1676
|
+
if (lexicalRelation.kind && lexicalRelation.detail) {
|
|
1677
|
+
evidence.push({
|
|
1678
|
+
kind: lexicalRelation.kind,
|
|
1679
|
+
detail: lexicalRelation.detail
|
|
1680
|
+
});
|
|
1681
|
+
}
|
|
1682
|
+
const confidence = Math.min(
|
|
1683
|
+
0.98,
|
|
1684
|
+
0.48 + Math.min(sharedAttributes.length, 3) * 0.12 + Math.min(groundingScore, 3) * 0.08 + lexicalRelation.strengthScore * 0.05
|
|
1685
|
+
);
|
|
1686
|
+
return {
|
|
1687
|
+
entityPrefixes: [leftProfile.entityPrefix, rightProfile.entityPrefix],
|
|
1688
|
+
supportingEntryIds: normalizeStringArray([...leftProfile.entryIds, ...rightProfile.entryIds]),
|
|
1689
|
+
sharedAttributes,
|
|
1690
|
+
confidence,
|
|
1691
|
+
autoSafe: lexicalRelation.autoSafe && (sharedAttributes.length >= 2 || sharedAttributes.length === 1 && groundingAnchorCount >= 1 && groundingScore >= 2),
|
|
1692
|
+
preferredCanonicalEntityPrefix: lexicalRelation.preferredCanonicalEntityPrefix,
|
|
1693
|
+
evidence
|
|
1694
|
+
};
|
|
1695
|
+
}
|
|
1696
|
+
function evaluateEntityLexicalRelation(leftProfile, rightProfile) {
|
|
1697
|
+
const leftTokens = leftProfile.tokenList;
|
|
1698
|
+
const rightTokens = rightProfile.tokenList;
|
|
1699
|
+
if (leftProfile.compactSignature === rightProfile.compactSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
|
|
1700
|
+
const preferredCanonicalEntityPrefix = leftTokens.length === rightTokens.length ? null : leftTokens.length > rightTokens.length ? leftProfile.entityPrefix : rightProfile.entityPrefix;
|
|
1701
|
+
return {
|
|
1702
|
+
kind: "lexical_separator_variant",
|
|
1703
|
+
detail: preferredCanonicalEntityPrefix === null ? "Entity prefixes collapse to the same compact lexical form." : `Entity prefixes collapse to the same compact lexical form; "${preferredCanonicalEntityPrefix}" preserves clearer token boundaries.`,
|
|
1704
|
+
autoSafe: true,
|
|
1705
|
+
preferredCanonicalEntityPrefix,
|
|
1706
|
+
strengthScore: 3
|
|
1707
|
+
};
|
|
1708
|
+
}
|
|
1709
|
+
if (leftProfile.sortedTokenSignature.length > 0 && leftProfile.sortedTokenSignature === rightProfile.sortedTokenSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
|
|
1710
|
+
return {
|
|
1711
|
+
kind: "lexical_token_reordering",
|
|
1712
|
+
detail: "Entity prefixes use the same lexical tokens in a different order.",
|
|
1713
|
+
autoSafe: true,
|
|
1714
|
+
preferredCanonicalEntityPrefix: null,
|
|
1715
|
+
strengthScore: 2
|
|
1716
|
+
};
|
|
1717
|
+
}
|
|
1718
|
+
const leftInitialism = buildInitialism(leftTokens);
|
|
1719
|
+
const rightInitialism = buildInitialism(rightTokens);
|
|
1720
|
+
if (leftInitialism.length >= 2 && leftInitialism === rightProfile.entityPrefix) {
|
|
1721
|
+
return {
|
|
1722
|
+
kind: "lexical_initialism_expansion",
|
|
1723
|
+
detail: `Entity prefix "${rightProfile.entityPrefix}" matches the initialism of "${leftProfile.entityPrefix}".`,
|
|
1724
|
+
autoSafe: false,
|
|
1725
|
+
preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
|
|
1726
|
+
strengthScore: 1
|
|
1727
|
+
};
|
|
1728
|
+
}
|
|
1729
|
+
if (rightInitialism.length >= 2 && rightInitialism === leftProfile.entityPrefix) {
|
|
1730
|
+
return {
|
|
1731
|
+
kind: "lexical_initialism_expansion",
|
|
1732
|
+
detail: `Entity prefix "${leftProfile.entityPrefix}" matches the initialism of "${rightProfile.entityPrefix}".`,
|
|
1733
|
+
autoSafe: false,
|
|
1734
|
+
preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
|
|
1735
|
+
strengthScore: 1
|
|
1736
|
+
};
|
|
1737
|
+
}
|
|
1738
|
+
if (isTokenSubset(leftTokens, rightTokens)) {
|
|
1739
|
+
return {
|
|
1740
|
+
kind: "lexical_token_subset",
|
|
1741
|
+
detail: `"${leftProfile.entityPrefix}" is a lexical subset of "${rightProfile.entityPrefix}".`,
|
|
1742
|
+
autoSafe: false,
|
|
1743
|
+
preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
|
|
1744
|
+
strengthScore: 1
|
|
1745
|
+
};
|
|
1746
|
+
}
|
|
1747
|
+
if (isTokenSubset(rightTokens, leftTokens)) {
|
|
1748
|
+
return {
|
|
1749
|
+
kind: "lexical_token_subset",
|
|
1750
|
+
detail: `"${rightProfile.entityPrefix}" is a lexical subset of "${leftProfile.entityPrefix}".`,
|
|
1751
|
+
autoSafe: false,
|
|
1752
|
+
preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
|
|
1753
|
+
strengthScore: 1
|
|
1754
|
+
};
|
|
1755
|
+
}
|
|
1756
|
+
return {
|
|
1757
|
+
kind: null,
|
|
1758
|
+
detail: null,
|
|
1759
|
+
autoSafe: false,
|
|
1760
|
+
preferredCanonicalEntityPrefix: null,
|
|
1761
|
+
strengthScore: 0
|
|
1762
|
+
};
|
|
1763
|
+
}
|
|
1764
|
+
function selectCanonicalEntityPrefix(entityPrefixes, pairSupport, profiles) {
|
|
1765
|
+
const scoreByEntity = /* @__PURE__ */ new Map();
|
|
1766
|
+
const reasonsByEntity = /* @__PURE__ */ new Map();
|
|
1767
|
+
for (const entityPrefix of entityPrefixes) {
|
|
1768
|
+
const profile = profiles.get(entityPrefix);
|
|
1769
|
+
if (!profile) {
|
|
1770
|
+
continue;
|
|
1771
|
+
}
|
|
1772
|
+
let score = Math.min(profile.attributeSet.size, 6) * 2 + Math.min(profile.entryCount, 6) + Math.round(profile.totalQualityScore / Math.max(profile.entryCount, 1));
|
|
1773
|
+
const reasons = [];
|
|
1774
|
+
if (profile.attributeSet.size >= 2) {
|
|
1775
|
+
reasons.push("broader trusted attribute coverage");
|
|
1776
|
+
}
|
|
1777
|
+
for (const support of pairSupport) {
|
|
1778
|
+
if (support.preferredCanonicalEntityPrefix === entityPrefix) {
|
|
1779
|
+
score += 4;
|
|
1780
|
+
} else if (support.preferredCanonicalEntityPrefix !== null) {
|
|
1781
|
+
score -= 2;
|
|
1782
|
+
}
|
|
1783
|
+
}
|
|
1784
|
+
const formSpecificity = scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles);
|
|
1785
|
+
score += formSpecificity.score;
|
|
1786
|
+
if (formSpecificity.reason) {
|
|
1787
|
+
reasons.push(formSpecificity.reason);
|
|
1788
|
+
}
|
|
1789
|
+
const lexicalVotes2 = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === entityPrefix).length;
|
|
1790
|
+
if (lexicalVotes2 > 0) {
|
|
1791
|
+
reasons.push(`lexical alias evidence prefers "${entityPrefix}"`);
|
|
1792
|
+
}
|
|
1793
|
+
scoreByEntity.set(entityPrefix, score);
|
|
1794
|
+
reasonsByEntity.set(entityPrefix, normalizeStringArray(reasons));
|
|
1795
|
+
}
|
|
1796
|
+
const ranked = [...scoreByEntity.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]));
|
|
1797
|
+
const [bestCandidate, secondCandidate] = ranked;
|
|
1798
|
+
if (!bestCandidate) {
|
|
1799
|
+
return {
|
|
1800
|
+
canonicalEntityPrefix: null,
|
|
1801
|
+
reasons: [],
|
|
1802
|
+
unresolvedReason: "No canonical entity prefix could be selected from the detected family."
|
|
1803
|
+
};
|
|
1804
|
+
}
|
|
1805
|
+
const [bestEntityPrefix, bestScore] = bestCandidate;
|
|
1806
|
+
const secondScore = secondCandidate?.[1] ?? Number.NEGATIVE_INFINITY;
|
|
1807
|
+
const bestProfile = profiles.get(bestEntityPrefix);
|
|
1808
|
+
if (!bestProfile) {
|
|
1809
|
+
return {
|
|
1810
|
+
canonicalEntityPrefix: null,
|
|
1811
|
+
reasons: [],
|
|
1812
|
+
unresolvedReason: "No canonical entity prefix could be selected from the detected family."
|
|
1813
|
+
};
|
|
1814
|
+
}
|
|
1815
|
+
const directPeerSupport = entityPrefixes.filter((entityPrefix) => entityPrefix !== bestEntityPrefix).map((entityPrefix) => findPairSupport(pairSupport, bestEntityPrefix, entityPrefix));
|
|
1816
|
+
const hasDirectSupportToAllPeers = directPeerSupport.every((support) => support !== null);
|
|
1817
|
+
const hasLexicalSupportToAllPeers = directPeerSupport.every((support) => support?.evidence.some((evidence) => evidence.kind.startsWith("lexical_")) === true);
|
|
1818
|
+
const lexicalVotes = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === bestEntityPrefix).length;
|
|
1819
|
+
if (!hasDirectSupportToAllPeers || !hasLexicalSupportToAllPeers || lexicalVotes === 0 || bestScore - secondScore < CANONICAL_SELECTION_MARGIN) {
|
|
1820
|
+
return {
|
|
1821
|
+
canonicalEntityPrefix: null,
|
|
1822
|
+
reasons: [],
|
|
1823
|
+
unresolvedReason: "Multiple plausible canonical entity prefixes remain after conservative scoring."
|
|
1824
|
+
};
|
|
1825
|
+
}
|
|
1826
|
+
return {
|
|
1827
|
+
canonicalEntityPrefix: bestEntityPrefix,
|
|
1828
|
+
reasons: reasonsByEntity.get(bestEntityPrefix) ?? [],
|
|
1829
|
+
unresolvedReason: null
|
|
1830
|
+
};
|
|
1831
|
+
}
|
|
1832
|
+
function scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles) {
|
|
1833
|
+
const profile = profiles.get(entityPrefix);
|
|
1834
|
+
if (!profile) {
|
|
1835
|
+
return { score: 0, reason: null };
|
|
1836
|
+
}
|
|
1837
|
+
let score = 0;
|
|
1838
|
+
let reason = null;
|
|
1839
|
+
const compactPeers = entityPrefixes.filter((peerEntityPrefix) => peerEntityPrefix !== entityPrefix).map((peerEntityPrefix) => profiles.get(peerEntityPrefix)).filter((peerProfile) => Boolean(peerProfile)).filter((peerProfile) => peerProfile.compactSignature === profile.compactSignature);
|
|
1840
|
+
if (profile.tokenList.length >= 2 && compactPeers.some((peerProfile) => peerProfile.tokenList.length < profile.tokenList.length)) {
|
|
1841
|
+
score += 2;
|
|
1842
|
+
reason = "less abbreviated lexical form";
|
|
1843
|
+
}
|
|
1844
|
+
if (entityPrefix.length <= 3 && compactPeers.length === 0) {
|
|
1845
|
+
score -= 1;
|
|
1846
|
+
}
|
|
1847
|
+
return { score, reason };
|
|
1848
|
+
}
|
|
1849
|
+
function collectConnectedEntityComponent(startingEntityPrefix, adjacency, visited) {
|
|
1850
|
+
const queue = [startingEntityPrefix];
|
|
1851
|
+
const component = [];
|
|
1852
|
+
visited.add(startingEntityPrefix);
|
|
1853
|
+
while (queue.length > 0) {
|
|
1854
|
+
const entityPrefix = queue.shift();
|
|
1855
|
+
if (!entityPrefix) {
|
|
1856
|
+
continue;
|
|
1857
|
+
}
|
|
1858
|
+
component.push(entityPrefix);
|
|
1859
|
+
const peers = adjacency.get(entityPrefix);
|
|
1860
|
+
if (!peers) {
|
|
1861
|
+
continue;
|
|
1862
|
+
}
|
|
1863
|
+
for (const peer of peers) {
|
|
1864
|
+
if (visited.has(peer)) {
|
|
1865
|
+
continue;
|
|
1866
|
+
}
|
|
1867
|
+
visited.add(peer);
|
|
1868
|
+
queue.push(peer);
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
return component;
|
|
1872
|
+
}
|
|
1873
|
+
function findPairSupport(pairSupport, leftEntityPrefix, rightEntityPrefix) {
|
|
1874
|
+
for (const support of pairSupport) {
|
|
1875
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
1876
|
+
if (leftEntity === leftEntityPrefix && rightEntity === rightEntityPrefix || leftEntity === rightEntityPrefix && rightEntity === leftEntityPrefix) {
|
|
1877
|
+
return support;
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
return null;
|
|
1881
|
+
}
|
|
1882
|
+
function evaluateSingletonAliasCandidate(aliasProfile, dominantProfile) {
|
|
1883
|
+
const lexicalRelation = evaluateSingletonAliasLexicalRelation(aliasProfile.entityPrefix, dominantProfile.entityPrefix);
|
|
1884
|
+
if (!lexicalRelation.kind || !lexicalRelation.detail || lexicalRelation.scopeLike) {
|
|
1885
|
+
return null;
|
|
1886
|
+
}
|
|
1887
|
+
const dominantTrustedCount = dominantProfile.trustedEntryCount;
|
|
1888
|
+
if (dominantTrustedCount < SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT) {
|
|
1889
|
+
return null;
|
|
1890
|
+
}
|
|
1891
|
+
const aliasLowTrustCount = buildLowTrustEntryCount(aliasProfile);
|
|
1892
|
+
if (aliasLowTrustCount === 0) {
|
|
1893
|
+
return null;
|
|
1894
|
+
}
|
|
1895
|
+
const evidence = [
|
|
1896
|
+
{
|
|
1897
|
+
kind: "singleton_family_size",
|
|
1898
|
+
detail: `"${aliasProfile.entityPrefix}" has ${aliasProfile.activeEntryCount} active keyed ${pluralize(aliasProfile.activeEntryCount, "entry")}.`
|
|
1899
|
+
},
|
|
1900
|
+
{
|
|
1901
|
+
kind: "dominant_trusted_family",
|
|
1902
|
+
detail: `"${dominantProfile.entityPrefix}" already has ${dominantTrustedCount} trusted ${pluralize(dominantTrustedCount, "entry")}.`
|
|
1903
|
+
},
|
|
1904
|
+
{
|
|
1905
|
+
kind: "low_trust_creation_path",
|
|
1906
|
+
detail: describeLowTrustAliasFamily(aliasProfile)
|
|
1907
|
+
},
|
|
1908
|
+
{
|
|
1909
|
+
kind: lexicalRelation.kind,
|
|
1910
|
+
detail: lexicalRelation.detail
|
|
1911
|
+
}
|
|
1912
|
+
];
|
|
1913
|
+
const confidence = Math.min(
|
|
1914
|
+
0.98,
|
|
1915
|
+
0.58 + Math.min(dominantTrustedCount, 6) * 0.05 + Math.min(aliasLowTrustCount, 2) * 0.05 + Math.min(dominantProfile.activeEntryCount - aliasProfile.activeEntryCount, 6) * 0.02 + lexicalRelation.strengthScore * 0.08
|
|
1916
|
+
);
|
|
1917
|
+
return {
|
|
1918
|
+
aliasEntityPrefix: aliasProfile.entityPrefix,
|
|
1919
|
+
dominantEntityPrefix: dominantProfile.entityPrefix,
|
|
1920
|
+
aliasFamilySize: aliasProfile.activeEntryCount,
|
|
1921
|
+
dominantFamilySize: dominantProfile.activeEntryCount,
|
|
1922
|
+
dominantTrustedCount,
|
|
1923
|
+
aliasLowTrustCount,
|
|
1924
|
+
confidence,
|
|
1925
|
+
canonicalReuseSafe: lexicalRelation.canonicalReuseSafe && aliasProfile.activeEntryCount === 1 && aliasLowTrustCount === aliasProfile.activeEntryCount && dominantTrustedCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT,
|
|
1926
|
+
evidence
|
|
1927
|
+
};
|
|
1928
|
+
}
|
|
1929
|
+
function selectBestSingletonAliasCandidate(candidates) {
|
|
1930
|
+
const ranked = [...candidates].sort(
|
|
1931
|
+
(left, right) => right.confidence - left.confidence || left.dominantEntityPrefix.localeCompare(right.dominantEntityPrefix)
|
|
1932
|
+
);
|
|
1933
|
+
const [best, runnerUp] = ranked;
|
|
1934
|
+
if (!best) {
|
|
1935
|
+
return [];
|
|
1936
|
+
}
|
|
1937
|
+
if (runnerUp && best.confidence - runnerUp.confidence < SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA) {
|
|
1938
|
+
return [];
|
|
1939
|
+
}
|
|
1940
|
+
return [best];
|
|
1941
|
+
}
|
|
1942
|
+
function evaluateSingletonAliasLexicalRelation(aliasEntityPrefix, dominantEntityPrefix) {
|
|
1943
|
+
const aliasTokens = aliasEntityPrefix.split("_").filter((token) => token.length > 0);
|
|
1944
|
+
const dominantTokens = dominantEntityPrefix.split("_").filter((token) => token.length > 0);
|
|
1945
|
+
const aliasCompactSignature = aliasTokens.join("");
|
|
1946
|
+
const dominantCompactSignature = dominantTokens.join("");
|
|
1947
|
+
if (aliasCompactSignature === dominantCompactSignature && aliasEntityPrefix !== dominantEntityPrefix) {
|
|
1948
|
+
return {
|
|
1949
|
+
kind: "lexical_separator_variant",
|
|
1950
|
+
detail: `Entity prefixes "${aliasEntityPrefix}" and "${dominantEntityPrefix}" collapse to the same compact lexical form.`,
|
|
1951
|
+
canonicalReuseSafe: true,
|
|
1952
|
+
scopeLike: false,
|
|
1953
|
+
strengthScore: 3
|
|
1954
|
+
};
|
|
1955
|
+
}
|
|
1956
|
+
if (!isTokenSubset(dominantTokens, aliasTokens)) {
|
|
1957
|
+
return {
|
|
1958
|
+
kind: null,
|
|
1959
|
+
detail: null,
|
|
1960
|
+
canonicalReuseSafe: false,
|
|
1961
|
+
scopeLike: false,
|
|
1962
|
+
strengthScore: 0
|
|
1963
|
+
};
|
|
1964
|
+
}
|
|
1965
|
+
const dominantTokenSet = new Set(dominantTokens);
|
|
1966
|
+
const addedTokens = aliasTokens.filter((token) => !dominantTokenSet.has(token));
|
|
1967
|
+
const scopeLike = addedTokens.length !== 1 || addedTokens.some((token) => SINGLETON_ALIAS_SCOPE_TOKENS.has(token));
|
|
1968
|
+
if (scopeLike) {
|
|
1969
|
+
return {
|
|
1970
|
+
kind: null,
|
|
1971
|
+
detail: null,
|
|
1972
|
+
canonicalReuseSafe: false,
|
|
1973
|
+
scopeLike: true,
|
|
1974
|
+
strengthScore: 0
|
|
1975
|
+
};
|
|
1976
|
+
}
|
|
1977
|
+
return {
|
|
1978
|
+
kind: "lexical_token_subset",
|
|
1979
|
+
detail: `"${aliasEntityPrefix}" extends "${dominantEntityPrefix}" by the added token "${addedTokens[0]}".`,
|
|
1980
|
+
canonicalReuseSafe: true,
|
|
1981
|
+
scopeLike: false,
|
|
1982
|
+
strengthScore: 2
|
|
1983
|
+
};
|
|
1984
|
+
}
|
|
1985
|
+
function buildLowTrustEntryCount(profile) {
|
|
1986
|
+
const deterministicOnlyCount = Math.max(0, profile.deterministicRepairEntryCount - profile.tentativeEntryCount);
|
|
1987
|
+
return profile.tentativeEntryCount + profile.unresolvedEntryCount + deterministicOnlyCount;
|
|
1988
|
+
}
|
|
1989
|
+
function describeLowTrustAliasFamily(profile) {
|
|
1990
|
+
const reasons = [];
|
|
1991
|
+
if (profile.deterministicRepairEntryCount > 0) {
|
|
1992
|
+
reasons.push(`${profile.deterministicRepairEntryCount} deterministic repair ${pluralize(profile.deterministicRepairEntryCount, "entry")}`);
|
|
1993
|
+
}
|
|
1994
|
+
if (profile.tentativeEntryCount > 0) {
|
|
1995
|
+
reasons.push(`${profile.tentativeEntryCount} tentative ${pluralize(profile.tentativeEntryCount, "entry")}`);
|
|
1996
|
+
}
|
|
1997
|
+
if (profile.unresolvedEntryCount > 0) {
|
|
1998
|
+
reasons.push(`${profile.unresolvedEntryCount} unresolved ${pluralize(profile.unresolvedEntryCount, "entry")}`);
|
|
1999
|
+
}
|
|
2000
|
+
if (reasons.length === 0) {
|
|
2001
|
+
return `"${profile.entityPrefix}" is not fully trusted yet.`;
|
|
2002
|
+
}
|
|
2003
|
+
return `"${profile.entityPrefix}" is low-trust because it has ${reasons.join(", ")}.`;
|
|
2004
|
+
}
|
|
2005
|
+
function buildInitialism(tokens) {
|
|
2006
|
+
if (tokens.length < 2) {
|
|
2007
|
+
return "";
|
|
2008
|
+
}
|
|
2009
|
+
return tokens.map((token) => token[0] ?? "").join("");
|
|
2010
|
+
}
|
|
2011
|
+
function isTokenSubset(subsetTokens, supersetTokens) {
|
|
2012
|
+
if (subsetTokens.length === 0 || subsetTokens.length >= supersetTokens.length) {
|
|
2013
|
+
return false;
|
|
2014
|
+
}
|
|
2015
|
+
const superset = new Set(supersetTokens);
|
|
2016
|
+
return subsetTokens.every((token) => superset.has(token));
|
|
2017
|
+
}
|
|
2018
|
+
function tokenizeGrounding(value) {
|
|
2019
|
+
if (!value) {
|
|
2020
|
+
return [];
|
|
2021
|
+
}
|
|
2022
|
+
const normalized = normalizeClaimKeySegment(value);
|
|
2023
|
+
if (!normalized) {
|
|
2024
|
+
return [];
|
|
2025
|
+
}
|
|
2026
|
+
return normalized.split("_").filter((token) => token.length >= 2 && !ENTITY_FAMILY_GROUNDING_STOP_TOKENS.has(token));
|
|
2027
|
+
}
|
|
2028
|
+
function intersectSets(left, right) {
|
|
2029
|
+
const intersection = [];
|
|
2030
|
+
const [small, large] = left.size <= right.size ? [left, right] : [right, left];
|
|
2031
|
+
for (const value of small) {
|
|
2032
|
+
if (large.has(value)) {
|
|
2033
|
+
intersection.push(value);
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
return intersection.sort((first, second) => first.localeCompare(second));
|
|
2037
|
+
}
|
|
2038
|
+
function normalizeStringArray(values) {
|
|
2039
|
+
return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
|
|
2040
|
+
}
|
|
2041
|
+
function buildPairKey(leftEntityPrefix, rightEntityPrefix) {
|
|
2042
|
+
return [leftEntityPrefix, rightEntityPrefix].sort((left, right) => left.localeCompare(right)).join("::");
|
|
2043
|
+
}
|
|
2044
|
+
function getOrCreateSet(map, key) {
|
|
2045
|
+
const existing = map.get(key);
|
|
2046
|
+
if (existing) {
|
|
2047
|
+
return existing;
|
|
2048
|
+
}
|
|
2049
|
+
const created = /* @__PURE__ */ new Set();
|
|
2050
|
+
map.set(key, created);
|
|
2051
|
+
return created;
|
|
2052
|
+
}
|
|
2053
|
+
function pluralize(count, noun) {
|
|
2054
|
+
return count === 1 ? noun : `${noun}s`;
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
// src/core/claim-key-support.ts
|
|
2058
|
+
var MAX_AUTO_APPLY_ATTRIBUTE_TOKENS = 4;
|
|
2059
|
+
var GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
|
|
2060
|
+
"a",
|
|
2061
|
+
"an",
|
|
2062
|
+
"and",
|
|
2063
|
+
"are",
|
|
2064
|
+
"as",
|
|
2065
|
+
"at",
|
|
2066
|
+
"be",
|
|
2067
|
+
"by",
|
|
2068
|
+
"for",
|
|
2069
|
+
"from",
|
|
2070
|
+
"how",
|
|
2071
|
+
"in",
|
|
2072
|
+
"into",
|
|
2073
|
+
"is",
|
|
2074
|
+
"it",
|
|
2075
|
+
"of",
|
|
2076
|
+
"on",
|
|
2077
|
+
"or",
|
|
2078
|
+
"our",
|
|
2079
|
+
"that",
|
|
2080
|
+
"the",
|
|
2081
|
+
"their",
|
|
2082
|
+
"this",
|
|
2083
|
+
"to",
|
|
2084
|
+
"we",
|
|
2085
|
+
"with"
|
|
2086
|
+
]);
|
|
2087
|
+
var AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["to", "for", "from", "with", "about", "into", "onto", "between", "during"]);
|
|
2088
|
+
var POLICY_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["policy", "default", "workflow", "process", "strategy", "guardrail", "rule", "boundary"]);
|
|
2089
|
+
var AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["source", "truth", "guide", "runbook", "reference"]);
|
|
2090
|
+
var ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set([
|
|
2091
|
+
"adapter",
|
|
2092
|
+
"boundary",
|
|
2093
|
+
"architecture",
|
|
2094
|
+
"backend",
|
|
2095
|
+
"storage",
|
|
2096
|
+
"model",
|
|
2097
|
+
"support",
|
|
2098
|
+
"contract",
|
|
2099
|
+
"interface",
|
|
2100
|
+
"surface"
|
|
2101
|
+
]);
|
|
2102
|
+
var STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
2103
|
+
"access",
|
|
2104
|
+
"boundary",
|
|
2105
|
+
"condition",
|
|
2106
|
+
"contract",
|
|
2107
|
+
"dependency",
|
|
2108
|
+
"mode",
|
|
2109
|
+
"owner",
|
|
2110
|
+
"order",
|
|
2111
|
+
"path",
|
|
2112
|
+
"policy",
|
|
2113
|
+
"preference",
|
|
2114
|
+
"preservation",
|
|
2115
|
+
"process",
|
|
2116
|
+
"requirement",
|
|
2117
|
+
"role",
|
|
2118
|
+
"rule",
|
|
2119
|
+
"schedule",
|
|
2120
|
+
"sequencing",
|
|
2121
|
+
"setting",
|
|
2122
|
+
"status",
|
|
2123
|
+
"strategy",
|
|
2124
|
+
"support",
|
|
2125
|
+
"surface",
|
|
2126
|
+
"timezone",
|
|
2127
|
+
"version",
|
|
2128
|
+
"window",
|
|
2129
|
+
"workflow",
|
|
2130
|
+
"workspace"
|
|
2131
|
+
]);
|
|
2132
|
+
function buildTrustedClaimKeySupportSeed(entries) {
|
|
2133
|
+
const claimKeyStats = /* @__PURE__ */ new Map();
|
|
2134
|
+
const trustedEntries = [];
|
|
2135
|
+
for (const entry of entries) {
|
|
2136
|
+
const claimKey = entry.claim_key?.trim();
|
|
2137
|
+
if (!claimKey || !isTrustedClaimKeyForCleanup(claimKey)) {
|
|
2138
|
+
continue;
|
|
2139
|
+
}
|
|
2140
|
+
const inspection = inspectClaimKey(claimKey);
|
|
2141
|
+
if (!inspection.normalized) {
|
|
2142
|
+
continue;
|
|
2143
|
+
}
|
|
2144
|
+
const existing = claimKeyStats.get(claimKey);
|
|
2145
|
+
if (existing) {
|
|
2146
|
+
existing.count += 1;
|
|
2147
|
+
existing.maxImportance = Math.max(existing.maxImportance, entry.importance);
|
|
2148
|
+
existing.latestCreatedAt = existing.latestCreatedAt.localeCompare(entry.created_at) >= 0 ? existing.latestCreatedAt : entry.created_at;
|
|
2149
|
+
continue;
|
|
2150
|
+
}
|
|
2151
|
+
claimKeyStats.set(claimKey, {
|
|
2152
|
+
count: 1,
|
|
2153
|
+
maxImportance: entry.importance,
|
|
2154
|
+
latestCreatedAt: entry.created_at
|
|
2155
|
+
});
|
|
2156
|
+
trustedEntries.push({
|
|
2157
|
+
id: entry.id,
|
|
2158
|
+
claimKey: inspection.normalized.claimKey,
|
|
2159
|
+
entity: inspection.normalized.entity,
|
|
2160
|
+
attribute: inspection.normalized.attribute,
|
|
2161
|
+
type: entry.type,
|
|
2162
|
+
tags: normalizeGroundingTags(entry.tags),
|
|
2163
|
+
sourceContextTokens: tokenizeGroundingText(entry.source_context),
|
|
2164
|
+
subjectTokens: tokenizeGroundingText(entry.subject),
|
|
2165
|
+
createdAt: entry.created_at
|
|
2166
|
+
});
|
|
2167
|
+
}
|
|
2168
|
+
const orderedClaimKeys = [...claimKeyStats.entries()].sort((left, right) => {
|
|
2169
|
+
const countDelta = right[1].count - left[1].count;
|
|
2170
|
+
if (countDelta !== 0) {
|
|
2171
|
+
return countDelta;
|
|
2172
|
+
}
|
|
2173
|
+
const importanceDelta = right[1].maxImportance - left[1].maxImportance;
|
|
2174
|
+
if (importanceDelta !== 0) {
|
|
2175
|
+
return importanceDelta;
|
|
2176
|
+
}
|
|
2177
|
+
const createdAtDelta = right[1].latestCreatedAt.localeCompare(left[1].latestCreatedAt);
|
|
2178
|
+
if (createdAtDelta !== 0) {
|
|
2179
|
+
return createdAtDelta;
|
|
2180
|
+
}
|
|
2181
|
+
return left[0].localeCompare(right[0]);
|
|
2182
|
+
}).map(([claimKey]) => claimKey);
|
|
2183
|
+
const orderedEntries = orderedClaimKeys.flatMap(
|
|
2184
|
+
(claimKey) => trustedEntries.filter((entry) => entry.claimKey === claimKey).sort((left, right) => {
|
|
2185
|
+
const createdAtDelta = right.createdAt.localeCompare(left.createdAt);
|
|
2186
|
+
if (createdAtDelta !== 0) {
|
|
2187
|
+
return createdAtDelta;
|
|
2188
|
+
}
|
|
2189
|
+
return left.id.localeCompare(right.id);
|
|
2190
|
+
})
|
|
2191
|
+
);
|
|
2192
|
+
return {
|
|
2193
|
+
entries: orderedEntries
|
|
2194
|
+
};
|
|
2195
|
+
}
|
|
2196
|
+
function buildClaimKeySupportSeedFromExamples(claimKeys) {
|
|
2197
|
+
const entries = claimKeys.flatMap((claimKey, index) => {
|
|
2198
|
+
const inspection = inspectClaimKey(claimKey);
|
|
2199
|
+
if (!inspection.normalized || !isTrustedClaimKeyForCleanup(inspection.normalized.claimKey)) {
|
|
2200
|
+
return [];
|
|
2201
|
+
}
|
|
2202
|
+
return [
|
|
2203
|
+
{
|
|
2204
|
+
id: `example:${index + 1}`,
|
|
2205
|
+
claimKey: inspection.normalized.claimKey,
|
|
2206
|
+
entity: inspection.normalized.entity,
|
|
2207
|
+
attribute: inspection.normalized.attribute,
|
|
2208
|
+
tags: [],
|
|
2209
|
+
sourceContextTokens: [],
|
|
2210
|
+
subjectTokens: [],
|
|
2211
|
+
createdAt: "1970-01-01T00:00:00.000Z"
|
|
2212
|
+
}
|
|
2213
|
+
];
|
|
2214
|
+
});
|
|
2215
|
+
return { entries };
|
|
2216
|
+
}
|
|
2217
|
+
function evaluateClaimKeySupport(entry, targetClaimKey, trustedHints) {
|
|
2218
|
+
const inspection = inspectClaimKey(targetClaimKey);
|
|
2219
|
+
const normalized = inspection.normalized;
|
|
2220
|
+
if (!normalized) {
|
|
2221
|
+
return createEmptyClaimKeySupportEvaluation();
|
|
2222
|
+
}
|
|
2223
|
+
const entryTagSet = new Set(normalizeGroundingTags(entry.tags));
|
|
2224
|
+
const entrySourceTokens = new Set(tokenizeGroundingText(entry.source_context));
|
|
2225
|
+
const relevantEntries = trustedHints.entries.filter((trustedEntry) => {
|
|
2226
|
+
if (entry.id && trustedEntry.id === entry.id) {
|
|
2227
|
+
return false;
|
|
2228
|
+
}
|
|
2229
|
+
return trustedEntry.claimKey === normalized.claimKey || trustedEntry.entity === normalized.entity;
|
|
2230
|
+
});
|
|
2231
|
+
const exactReuseEntries = relevantEntries.filter((trustedEntry) => trustedEntry.claimKey === normalized.claimKey);
|
|
2232
|
+
const familyReuseEntries = relevantEntries.filter(
|
|
2233
|
+
(trustedEntry) => trustedEntry.claimKey !== normalized.claimKey && trustedEntry.entity === normalized.entity
|
|
2234
|
+
);
|
|
2235
|
+
const groundedExactReuseEntries = exactReuseEntries.filter((trustedEntry) => {
|
|
2236
|
+
const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
|
|
2237
|
+
return grounding.tagGrounding || grounding.sourceContextGrounding;
|
|
2238
|
+
});
|
|
2239
|
+
const groundedFamilyReuseEntries = familyReuseEntries.filter((trustedEntry) => {
|
|
2240
|
+
const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
|
|
2241
|
+
return grounding.tagGrounding || grounding.sourceContextGrounding;
|
|
2242
|
+
});
|
|
2243
|
+
const tagGrounding = relevantEntries.some((trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).tagGrounding);
|
|
2244
|
+
const sourceContextGrounding = relevantEntries.some(
|
|
2245
|
+
(trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).sourceContextGrounding
|
|
2246
|
+
);
|
|
2247
|
+
const localGrounding = tagGrounding || sourceContextGrounding;
|
|
2248
|
+
const lexicalAlignment = inspectCandidateLexicalAlignment(entry, normalized.entity, normalized.attribute);
|
|
2249
|
+
const templateSupport = matchesConservativeTemplateSupport(entry, normalized.attribute);
|
|
2250
|
+
const stableSlotSupport = matchesStableFamilySlotSupport(normalized.attribute);
|
|
2251
|
+
const trustedExactReuse = exactReuseEntries.length > 0 && (groundedExactReuseEntries.length > 0 || exactReuseEntries.every((candidate) => candidate.id.startsWith("example:")));
|
|
2252
|
+
const trustedEntityFamilyReuse = groundedFamilyReuseEntries.length > 0 || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:"));
|
|
2253
|
+
const promotionSupport = resolveClaimKeyPromotionSupport({
|
|
2254
|
+
exactReuseCount: trustedExactReuse ? Math.max(1, groundedExactReuseEntries.length) : 0,
|
|
2255
|
+
familyReuseCount: familyReuseEntries.length,
|
|
2256
|
+
groundedFamilyReuseCount: groundedFamilyReuseEntries.length > 0 ? groundedFamilyReuseEntries.length : familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")) ? familyReuseEntries.length : 0,
|
|
2257
|
+
localGrounding: localGrounding || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")),
|
|
2258
|
+
templateSupport,
|
|
2259
|
+
stableSlotSupport,
|
|
2260
|
+
lexicalAlignment
|
|
2261
|
+
});
|
|
2262
|
+
const supportedProposal = lexicalAlignment.any && (templateSupport || stableSlotSupport || trustedExactReuse || trustedEntityFamilyReuse || localGrounding);
|
|
2263
|
+
const supportEvidence = [
|
|
2264
|
+
trustedExactReuse ? "trusted_exact_reuse" : null,
|
|
2265
|
+
trustedEntityFamilyReuse ? "trusted_entity_family_reuse" : null,
|
|
2266
|
+
tagGrounding ? "tag_grounding" : null,
|
|
2267
|
+
sourceContextGrounding ? "source_context_grounding" : null,
|
|
2268
|
+
lexicalAlignment.entity ? "entity_lexical_alignment" : null,
|
|
2269
|
+
lexicalAlignment.attribute ? "attribute_lexical_alignment" : null,
|
|
2270
|
+
lexicalAlignment.strongEntityAttribute ? "strong_entity_attribute_lexical_alignment" : null,
|
|
2271
|
+
templateSupport ? "template_support" : null,
|
|
2272
|
+
stableSlotSupport ? "stable_slot_support" : null,
|
|
2273
|
+
promotionSupport.relaxedStableSlotFamilyGate ? "single_grounded_family_sibling" : null
|
|
2274
|
+
].filter((value) => value !== null);
|
|
2275
|
+
const rationaleFragments = [
|
|
2276
|
+
trustedExactReuse ? `trusted exact reuse from ${Math.max(1, groundedExactReuseEntries.length)} matching entr${Math.max(1, groundedExactReuseEntries.length) === 1 ? "y" : "ies"}` : null,
|
|
2277
|
+
trustedEntityFamilyReuse ? `trusted ${normalized.entity} family reuse from ${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length)} supporting entr${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length) === 1 ? "y" : "ies"}` : null,
|
|
2278
|
+
tagGrounding ? "overlapping tags with trusted corpus entries" : null,
|
|
2279
|
+
sourceContextGrounding ? "overlapping source_context with trusted corpus entries" : null,
|
|
2280
|
+
lexicalAlignment.strongEntityAttribute ? "strong entity and slot lexical alignment" : null,
|
|
2281
|
+
lexicalAlignment.attribute ? lexicalAlignment.strongEntityAttribute ? null : "clear lexical alignment to the proposed slot" : lexicalAlignment.entity ? "clear lexical alignment to the proposed entity" : null,
|
|
2282
|
+
templateSupport ? "a conservative policy/default/source-of-truth template match" : null,
|
|
2283
|
+
stableSlotSupport ? "a stable compact slot head in a well-established entity family" : null,
|
|
2284
|
+
promotionSupport.relaxedStableSlotFamilyGate ? "one grounded family sibling cleared the stable-slot family gate" : null
|
|
2285
|
+
].filter((value) => value !== null);
|
|
2286
|
+
return {
|
|
2287
|
+
autoApplyClass: promotionSupport.autoApplyClass,
|
|
2288
|
+
supportedProposal,
|
|
2289
|
+
trustedExactReuse,
|
|
2290
|
+
trustedEntityFamilyReuse,
|
|
2291
|
+
tagGrounding,
|
|
2292
|
+
sourceContextGrounding,
|
|
2293
|
+
localGrounding,
|
|
2294
|
+
entityLexicalAlignment: lexicalAlignment.entity,
|
|
2295
|
+
attributeLexicalAlignment: lexicalAlignment.attribute,
|
|
2296
|
+
strongEntityAttributeLexicalAlignment: lexicalAlignment.strongEntityAttribute,
|
|
2297
|
+
lexicalAlignment: lexicalAlignment.any,
|
|
2298
|
+
templateSupport,
|
|
2299
|
+
stableSlotSupport,
|
|
2300
|
+
familyReuseCount: familyReuseEntries.length,
|
|
2301
|
+
groundedFamilyReuseCount: groundedFamilyReuseEntries.length,
|
|
2302
|
+
relaxedStableSlotFamilyGate: promotionSupport.relaxedStableSlotFamilyGate,
|
|
2303
|
+
supportingEntryIds: normalizeStringArray2([
|
|
2304
|
+
...groundedExactReuseEntries.map((candidate) => candidate.id),
|
|
2305
|
+
...groundedFamilyReuseEntries.map((candidate) => candidate.id),
|
|
2306
|
+
...familyReuseEntries.filter((candidate) => candidate.id.startsWith("example:")).map((candidate) => candidate.id)
|
|
2307
|
+
]),
|
|
2308
|
+
supportEvidence,
|
|
2309
|
+
rationaleFragments
|
|
2310
|
+
};
|
|
2311
|
+
}
|
|
2312
|
+
function createEmptyClaimKeySupportEvaluation() {
|
|
2313
|
+
return {
|
|
2314
|
+
autoApplyClass: null,
|
|
2315
|
+
supportedProposal: false,
|
|
2316
|
+
trustedExactReuse: false,
|
|
2317
|
+
trustedEntityFamilyReuse: false,
|
|
2318
|
+
tagGrounding: false,
|
|
2319
|
+
sourceContextGrounding: false,
|
|
2320
|
+
localGrounding: false,
|
|
2321
|
+
entityLexicalAlignment: false,
|
|
2322
|
+
attributeLexicalAlignment: false,
|
|
2323
|
+
strongEntityAttributeLexicalAlignment: false,
|
|
2324
|
+
lexicalAlignment: false,
|
|
2325
|
+
templateSupport: false,
|
|
2326
|
+
stableSlotSupport: false,
|
|
2327
|
+
familyReuseCount: 0,
|
|
2328
|
+
groundedFamilyReuseCount: 0,
|
|
2329
|
+
relaxedStableSlotFamilyGate: false,
|
|
2330
|
+
supportingEntryIds: [],
|
|
2331
|
+
supportEvidence: [],
|
|
2332
|
+
rationaleFragments: []
|
|
2333
|
+
};
|
|
2334
|
+
}
|
|
2335
|
+
function evaluateClaimKeyCompactness(claimKey, prior) {
|
|
2336
|
+
const compacted = compactClaimKey(claimKey);
|
|
2337
|
+
if (!compacted) {
|
|
2338
|
+
return {
|
|
2339
|
+
claimKey,
|
|
2340
|
+
compactedFrom: null,
|
|
2341
|
+
compactionReason: null,
|
|
2342
|
+
compactEnoughForAutoApply: false,
|
|
2343
|
+
blockerReason: "invalid_claim_key"
|
|
2344
|
+
};
|
|
2345
|
+
}
|
|
2346
|
+
const attributeTokens = compacted.attribute.split("_").filter((token) => token.length > 0);
|
|
2347
|
+
const compactEnoughForAutoApply = attributeTokens.length > 0 && attributeTokens.length <= MAX_AUTO_APPLY_ATTRIBUTE_TOKENS && !attributeTokens.some((token) => AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS.has(token));
|
|
2348
|
+
const compactedFrom = compacted.compactedFrom ?? prior?.priorCompactedFrom ?? null;
|
|
2349
|
+
const compactionReason = compacted.reason && prior?.priorCompactionReason ? `${prior.priorCompactionReason} and ${compacted.reason}` : compacted.reason ?? prior?.priorCompactionReason ?? null;
|
|
2350
|
+
return {
|
|
2351
|
+
claimKey: compacted.claimKey,
|
|
2352
|
+
compactedFrom,
|
|
2353
|
+
compactionReason,
|
|
2354
|
+
compactEnoughForAutoApply,
|
|
2355
|
+
blockerReason: compactEnoughForAutoApply ? null : "non_compact_canonical_slot"
|
|
2356
|
+
};
|
|
2357
|
+
}
|
|
2358
|
+
function normalizeGroundingTags(tags) {
|
|
2359
|
+
return normalizeStringArray2((tags ?? []).map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0));
|
|
2360
|
+
}
|
|
2361
|
+
function tokenizeGroundingText(value) {
|
|
2362
|
+
if (!value) {
|
|
2363
|
+
return [];
|
|
2364
|
+
}
|
|
2365
|
+
return normalizeStringArray2(
|
|
2366
|
+
value.split(/[^a-zA-Z0-9]+/u).map((token) => normalizeClaimKeySegment(token)).filter((token) => token.length > 2 && !GROUNDING_STOP_TOKENS.has(token))
|
|
2367
|
+
);
|
|
2368
|
+
}
|
|
2369
|
+
function buildEntryLocalLexicalTokens(entry) {
|
|
2370
|
+
return normalizeStringArray2([
|
|
2371
|
+
...tokenizeGroundingText(entry.subject),
|
|
2372
|
+
...tokenizeGroundingText(entry.content),
|
|
2373
|
+
...tokenizeGroundingText(entry.source_context),
|
|
2374
|
+
...normalizeGroundingTags(entry.tags)
|
|
2375
|
+
]);
|
|
2376
|
+
}
|
|
2377
|
+
function resolveClaimKeyPromotionSupport(input) {
|
|
2378
|
+
if (input.exactReuseCount > 0 && (input.lexicalAlignment.attribute || input.templateSupport)) {
|
|
2379
|
+
return {
|
|
2380
|
+
autoApplyClass: "trusted_exact_reuse_grounded",
|
|
2381
|
+
relaxedStableSlotFamilyGate: false
|
|
2382
|
+
};
|
|
2383
|
+
}
|
|
2384
|
+
if (input.templateSupport && input.localGrounding && input.familyReuseCount > 0 && (input.lexicalAlignment.attribute || input.lexicalAlignment.entity)) {
|
|
2385
|
+
return {
|
|
2386
|
+
autoApplyClass: "trusted_family_template_grounded",
|
|
2387
|
+
relaxedStableSlotFamilyGate: false
|
|
2388
|
+
};
|
|
2389
|
+
}
|
|
2390
|
+
const relaxedStableSlotFamilyGate = input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && input.familyReuseCount === 1 && input.lexicalAlignment.strongEntityAttribute;
|
|
2391
|
+
if (input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && (input.familyReuseCount >= 2 || relaxedStableSlotFamilyGate) && input.lexicalAlignment.attribute) {
|
|
2392
|
+
return {
|
|
2393
|
+
autoApplyClass: "trusted_family_stable_slot",
|
|
2394
|
+
relaxedStableSlotFamilyGate
|
|
2395
|
+
};
|
|
2396
|
+
}
|
|
2397
|
+
if (input.localGrounding && input.groundedFamilyReuseCount > 0 && input.lexicalAlignment.strongEntityAttribute) {
|
|
2398
|
+
return {
|
|
2399
|
+
autoApplyClass: "trusted_family_grounded_alignment",
|
|
2400
|
+
relaxedStableSlotFamilyGate: false
|
|
2401
|
+
};
|
|
2402
|
+
}
|
|
2403
|
+
return {
|
|
2404
|
+
autoApplyClass: null,
|
|
2405
|
+
relaxedStableSlotFamilyGate: false
|
|
2406
|
+
};
|
|
2407
|
+
}
|
|
2408
|
+
function inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry) {
|
|
2409
|
+
return {
|
|
2410
|
+
tagGrounding: countSetOverlap(entryTagSet, trustedEntry.tags) > 0,
|
|
2411
|
+
sourceContextGrounding: countSetOverlap(entrySourceTokens, trustedEntry.sourceContextTokens) > 0
|
|
2412
|
+
};
|
|
2413
|
+
}
|
|
2414
|
+
function inspectCandidateLexicalAlignment(entry, entity, attribute) {
|
|
2415
|
+
const lexicalTokens = new Set(buildEntryLocalLexicalTokens(entry));
|
|
2416
|
+
const entityTokens = entity.split("_").filter((token) => token.length > 0);
|
|
2417
|
+
const attributeTokens = attribute.split("_").filter((token) => token.length > 0 && !GROUNDING_STOP_TOKENS.has(token));
|
|
2418
|
+
const entityOverlapCount = countSetOverlap(lexicalTokens, entityTokens);
|
|
2419
|
+
const attributeOverlapCount = countSetOverlap(lexicalTokens, attributeTokens);
|
|
2420
|
+
const entityAlignment = entityOverlapCount > 0;
|
|
2421
|
+
const attributeAlignment = attributeOverlapCount > 0;
|
|
2422
|
+
const strongAttributeAlignment = attributeTokens.length > 0 && attributeOverlapCount >= Math.min(attributeTokens.length, 2);
|
|
2423
|
+
return {
|
|
2424
|
+
entity: entityAlignment,
|
|
2425
|
+
attribute: attributeAlignment,
|
|
2426
|
+
any: entityAlignment || attributeAlignment,
|
|
2427
|
+
strongEntityAttribute: entityAlignment && strongAttributeAlignment,
|
|
2428
|
+
entityOverlapCount,
|
|
2429
|
+
attributeOverlapCount
|
|
2430
|
+
};
|
|
2431
|
+
}
|
|
2432
|
+
function matchesConservativeTemplateSupport(entry, attribute) {
|
|
2433
|
+
const attributeTokens = new Set(attribute.split("_").filter((token) => token.length > 0));
|
|
2434
|
+
const subjectText = entry.subject.toLowerCase();
|
|
2435
|
+
const contentText = entry.content.toLowerCase();
|
|
2436
|
+
const combinedText = `${subjectText}
|
|
2437
|
+
${contentText}`;
|
|
2438
|
+
const authoritativePattern = /\b(authoritative|source of truth|source of record|canonical guide|canonical reference|primary guide|runbook)\b/u.test(
|
|
2439
|
+
combinedText
|
|
2440
|
+
);
|
|
2441
|
+
if (authoritativePattern && intersects(attributeTokens, AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS)) {
|
|
2442
|
+
return true;
|
|
2443
|
+
}
|
|
2444
|
+
const policyPattern = /\b(should|must|should stay|must stay|always|never|default(?:s)? to|default(?:s)?|policy|guardrail|required|preference|prefers?)\b/u.test(combinedText);
|
|
2445
|
+
if (policyPattern && intersects(attributeTokens, POLICY_TEMPLATE_ATTRIBUTE_TOKENS)) {
|
|
2446
|
+
return true;
|
|
2447
|
+
}
|
|
2448
|
+
const architecturePattern = /\b(uses|supports|backed by|architecture|boundary|workflow|process|pipeline|adapter|layer|contract|interface|surface)\b/u.test(
|
|
2449
|
+
combinedText
|
|
2450
|
+
);
|
|
2451
|
+
return architecturePattern && intersects(attributeTokens, ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS);
|
|
2452
|
+
}
|
|
2453
|
+
function matchesStableFamilySlotSupport(attribute) {
|
|
2454
|
+
const tokens = attribute.split("_").filter((token) => token.length > 0);
|
|
2455
|
+
if (tokens.length === 0 || tokens.length > MAX_AUTO_APPLY_ATTRIBUTE_TOKENS) {
|
|
2456
|
+
return false;
|
|
2457
|
+
}
|
|
2458
|
+
const head = tokens[tokens.length - 1];
|
|
2459
|
+
return typeof head === "string" && STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS.has(head);
|
|
2460
|
+
}
|
|
2461
|
+
function countSetOverlap(left, right) {
|
|
2462
|
+
let count = 0;
|
|
2463
|
+
for (const value of right) {
|
|
2464
|
+
if (left.has(value)) {
|
|
2465
|
+
count += 1;
|
|
2466
|
+
}
|
|
2467
|
+
}
|
|
2468
|
+
return count;
|
|
2469
|
+
}
|
|
2470
|
+
function intersects(left, right) {
|
|
2471
|
+
for (const value of left) {
|
|
2472
|
+
if (right.has(value)) {
|
|
2473
|
+
return true;
|
|
2474
|
+
}
|
|
2475
|
+
}
|
|
2476
|
+
return false;
|
|
2477
|
+
}
|
|
2478
|
+
function normalizeStringArray2(values) {
|
|
2479
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2480
|
+
const normalized = [];
|
|
2481
|
+
for (const value of values) {
|
|
2482
|
+
if (!value || seen.has(value)) {
|
|
2483
|
+
continue;
|
|
2484
|
+
}
|
|
2485
|
+
seen.add(value);
|
|
2486
|
+
normalized.push(value);
|
|
2487
|
+
}
|
|
2488
|
+
return normalized;
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
// src/core/store/claim-extraction.ts
|
|
2492
|
+
var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "the_user", "myself", "user", "we", "our_team", "the_project", "this_project"]);
|
|
2493
|
+
var USER_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user"]);
|
|
2494
|
+
var PROJECT_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["the_project", "this_project"]);
|
|
2495
|
+
var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
2496
|
+
"budget",
|
|
2497
|
+
"city",
|
|
2498
|
+
"config",
|
|
2499
|
+
"deadline",
|
|
2500
|
+
"email",
|
|
2501
|
+
"employer",
|
|
2502
|
+
"language",
|
|
2503
|
+
"limit",
|
|
2504
|
+
"location",
|
|
2505
|
+
"mode",
|
|
2506
|
+
"model",
|
|
2507
|
+
"name",
|
|
2508
|
+
"owner",
|
|
2509
|
+
"plan",
|
|
2510
|
+
"policy",
|
|
2511
|
+
"preference",
|
|
2512
|
+
"priority",
|
|
2513
|
+
"quota",
|
|
2514
|
+
"region",
|
|
2515
|
+
"role",
|
|
2516
|
+
"schedule",
|
|
2517
|
+
"setting",
|
|
2518
|
+
"status",
|
|
2519
|
+
"strategy",
|
|
2520
|
+
"team",
|
|
2521
|
+
"theme",
|
|
2522
|
+
"timezone",
|
|
2523
|
+
"version",
|
|
2524
|
+
"window"
|
|
2525
|
+
]);
|
|
2526
|
+
var MAX_ENTITY_HINTS = 12;
|
|
2527
|
+
var MAX_CLAIM_KEY_EXAMPLES = 8;
|
|
2528
|
+
var MAX_SUPPORT_CLAIM_KEY_EXAMPLES = 128;
|
|
2529
|
+
var DEFAULT_REPAIR_CONFIDENCE = 0.86;
|
|
2530
|
+
var HIGH_CONFIDENCE_BACKFILL_THRESHOLD = 0.92;
|
|
2531
|
+
var SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.72;
|
|
2532
|
+
var COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.74;
|
|
2533
|
+
var PROPOSAL_CONFIDENCE_THRESHOLD = 0.75;
|
|
2534
|
+
var SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD = 0.65;
|
|
2535
|
+
function applyClaimExtractionResultToEntry(entry, extracted) {
|
|
2536
|
+
const lifecycle = buildExtractedClaimKeyLifecycle(extracted, buildInferredIngestClaimKeySupportContext(entry));
|
|
2537
|
+
if (!lifecycle) {
|
|
2538
|
+
return;
|
|
2539
|
+
}
|
|
2540
|
+
applyClaimKeyLifecycle(entry, lifecycle);
|
|
2541
|
+
}
|
|
2542
|
+
async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
|
|
2543
|
+
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
2544
|
+
return null;
|
|
2545
|
+
}
|
|
2546
|
+
const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
|
|
2547
|
+
let attempt;
|
|
2548
|
+
try {
|
|
2549
|
+
attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
|
|
2550
|
+
} catch (error) {
|
|
2551
|
+
const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2552
|
+
if (repaired) {
|
|
2553
|
+
return repaired;
|
|
2554
|
+
}
|
|
2555
|
+
throw error;
|
|
2556
|
+
}
|
|
2557
|
+
if (attempt.response.no_claim === true) {
|
|
2558
|
+
options.onPreviewOutcome?.(buildPreviewOutcome("no_claim", attempt));
|
|
2559
|
+
return null;
|
|
2560
|
+
}
|
|
2561
|
+
const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, options.onWarning);
|
|
2562
|
+
if (candidate) {
|
|
2563
|
+
options.onPreviewOutcome?.({
|
|
2564
|
+
outcome: "candidate",
|
|
2565
|
+
confidence: candidate.confidence,
|
|
2566
|
+
rawEntity: candidate.rawEntity,
|
|
2567
|
+
rawAttribute: candidate.rawAttribute,
|
|
2568
|
+
path: attempt.path
|
|
2569
|
+
});
|
|
2570
|
+
return {
|
|
2571
|
+
claimKey: candidate.claimKey,
|
|
2572
|
+
confidence: candidate.confidence,
|
|
2573
|
+
rawEntity: candidate.rawEntity,
|
|
2574
|
+
rawAttribute: candidate.rawAttribute,
|
|
2575
|
+
path: attempt.path,
|
|
2576
|
+
...candidate.compactedFrom ? {
|
|
2577
|
+
compactedFrom: candidate.compactedFrom,
|
|
2578
|
+
compactionReason: candidate.compactionReason
|
|
2579
|
+
} : {}
|
|
2580
|
+
};
|
|
2581
|
+
}
|
|
2582
|
+
options.onPreviewOutcome?.(buildPreviewOutcome("rejected_candidate", attempt));
|
|
2583
|
+
return tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2584
|
+
}
|
|
2585
|
+
async function extractClaimKeyDecision(entry, llm, config, options = {}) {
|
|
2586
|
+
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
2587
|
+
return {
|
|
2588
|
+
result: null,
|
|
2589
|
+
diagnostic: {
|
|
2590
|
+
outcome: "ineligible_type",
|
|
2591
|
+
confidence: null,
|
|
2592
|
+
path: null,
|
|
2593
|
+
warning: null,
|
|
2594
|
+
suggestedClaimKey: null,
|
|
2595
|
+
reviewable: false,
|
|
2596
|
+
supportEvidence: [],
|
|
2597
|
+
rationale: "entry type is not eligible for claim-key extraction"
|
|
2598
|
+
}
|
|
2599
|
+
};
|
|
2600
|
+
}
|
|
2601
|
+
const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
|
|
2602
|
+
let attempt;
|
|
2603
|
+
try {
|
|
2604
|
+
attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
|
|
2605
|
+
} catch (error) {
|
|
2606
|
+
const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2607
|
+
if (repaired2) {
|
|
2608
|
+
return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
|
|
2609
|
+
}
|
|
2610
|
+
const warning = formatClaimExtractionError(error);
|
|
2611
|
+
options.onWarning?.(`Claim extraction failed for "${entry.subject}": ${warning}`);
|
|
2612
|
+
return {
|
|
2613
|
+
result: null,
|
|
2614
|
+
diagnostic: {
|
|
2615
|
+
outcome: "extraction_failure",
|
|
2616
|
+
confidence: null,
|
|
2617
|
+
path: null,
|
|
2618
|
+
warning,
|
|
2619
|
+
suggestedClaimKey: null,
|
|
2620
|
+
reviewable: false,
|
|
2621
|
+
supportEvidence: [],
|
|
2622
|
+
rationale: "claim extraction failed before a safe candidate could be produced"
|
|
2623
|
+
}
|
|
2624
|
+
};
|
|
2625
|
+
}
|
|
2626
|
+
if (attempt.response.no_claim === true) {
|
|
2627
|
+
return {
|
|
2628
|
+
result: null,
|
|
2629
|
+
diagnostic: {
|
|
2630
|
+
outcome: "no_claim",
|
|
2631
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
2632
|
+
path: attempt.path,
|
|
2633
|
+
warning: null,
|
|
2634
|
+
suggestedClaimKey: null,
|
|
2635
|
+
reviewable: false,
|
|
2636
|
+
supportEvidence: [],
|
|
2637
|
+
rationale: "model explicitly returned no_claim"
|
|
2638
|
+
}
|
|
2639
|
+
};
|
|
2640
|
+
}
|
|
2641
|
+
const warnings = [];
|
|
2642
|
+
const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, (warning) => {
|
|
2643
|
+
warnings.push(warning);
|
|
2644
|
+
options.onWarning?.(warning);
|
|
2645
|
+
});
|
|
2646
|
+
if (!candidate) {
|
|
2647
|
+
const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2648
|
+
if (repaired2) {
|
|
2649
|
+
return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
|
|
2650
|
+
}
|
|
2651
|
+
return {
|
|
2652
|
+
result: null,
|
|
2653
|
+
diagnostic: {
|
|
2654
|
+
outcome: "rejected_candidate",
|
|
2655
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
2656
|
+
path: attempt.path,
|
|
2657
|
+
warning: warnings[0] ?? null,
|
|
2658
|
+
suggestedClaimKey: null,
|
|
2659
|
+
reviewable: false,
|
|
2660
|
+
supportEvidence: [],
|
|
2661
|
+
rationale: "model proposed a structurally unsafe or non-canonical claim key"
|
|
2662
|
+
}
|
|
2663
|
+
};
|
|
2664
|
+
}
|
|
2665
|
+
const result = toClaimExtractionResult(candidate, attempt.path);
|
|
2666
|
+
if (result.confidence >= config.confidenceThreshold) {
|
|
2667
|
+
return {
|
|
2668
|
+
result,
|
|
2669
|
+
diagnostic: buildAcceptedDiagnostic(result, result.confidence >= config.confidenceThreshold ? "candidate met the ingest confidence threshold" : null)
|
|
2670
|
+
};
|
|
2671
|
+
}
|
|
2672
|
+
const support = evaluateClaimKeySupport(
|
|
2673
|
+
{
|
|
2674
|
+
subject: entry.subject,
|
|
2675
|
+
content: entry.content,
|
|
2676
|
+
type: entry.type,
|
|
2677
|
+
tags: entry.tags,
|
|
2678
|
+
source_context: entry.source_context
|
|
2679
|
+
},
|
|
2680
|
+
result.claimKey ?? "",
|
|
2681
|
+
buildClaimKeySupportSeedFromExamples(options.supportClaimKeys ?? [])
|
|
2682
|
+
);
|
|
2683
|
+
const compactness = evaluateClaimKeyCompactness(result.claimKey ?? "", {
|
|
2684
|
+
priorCompactedFrom: result.compactedFrom ?? null,
|
|
2685
|
+
priorCompactionReason: result.compactionReason ?? null
|
|
2686
|
+
});
|
|
2687
|
+
const autoApplyThreshold = support.autoApplyClass !== null && compactness.compactedFrom ? COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : support.autoApplyClass !== null ? SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : HIGH_CONFIDENCE_BACKFILL_THRESHOLD;
|
|
2688
|
+
const proposalThreshold = support.supportedProposal ? SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD : PROPOSAL_CONFIDENCE_THRESHOLD;
|
|
2689
|
+
if (compactness.claimKey !== result.claimKey) {
|
|
2690
|
+
result.claimKey = compactness.claimKey;
|
|
2691
|
+
result.compactedFrom = compactness.compactedFrom;
|
|
2692
|
+
result.compactionReason = compactness.compactionReason;
|
|
2693
|
+
}
|
|
2694
|
+
if (result.confidence >= autoApplyThreshold && compactness.compactEnoughForAutoApply) {
|
|
2695
|
+
result.acceptanceRationale = support.autoApplyClass !== null ? `accepted below the default threshold via ${describeSupportPromotionClass(support)}` : "accepted as a high-confidence preview";
|
|
2696
|
+
return {
|
|
2697
|
+
result,
|
|
2698
|
+
diagnostic: buildAcceptedDiagnostic(
|
|
2699
|
+
result,
|
|
2700
|
+
support.autoApplyClass !== null ? `supported near-miss candidate cleared the conservative auto-apply threshold via ${describeSupportPromotionClass(support)}` : `candidate cleared the conservative high-confidence threshold of ${autoApplyThreshold.toFixed(2)}`
|
|
2701
|
+
)
|
|
2702
|
+
};
|
|
2703
|
+
}
|
|
2704
|
+
const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
2705
|
+
if (repaired && (!result.claimKey || repaired.claimKey === result.claimKey)) {
|
|
2706
|
+
return finalizeDeterministicRepairDecision(repaired, options.entityPrefixStats);
|
|
2707
|
+
}
|
|
2708
|
+
if (result.confidence >= proposalThreshold) {
|
|
2709
|
+
return {
|
|
2710
|
+
result: null,
|
|
2711
|
+
diagnostic: {
|
|
2712
|
+
outcome: "low_confidence_candidate",
|
|
2713
|
+
confidence: result.confidence,
|
|
2714
|
+
path: result.path,
|
|
2715
|
+
warning: warnings[0] ?? null,
|
|
2716
|
+
suggestedClaimKey: result.claimKey,
|
|
2717
|
+
reviewable: true,
|
|
2718
|
+
supportEvidence: support.supportEvidence,
|
|
2719
|
+
rationale: support.rationaleFragments.length > 0 ? `candidate stayed below the auto-apply threshold but has structured support from ${support.rationaleFragments.join(", ")}` : `candidate stayed below the auto-apply threshold of ${autoApplyThreshold.toFixed(2)}`
|
|
2720
|
+
}
|
|
2721
|
+
};
|
|
2722
|
+
}
|
|
2723
|
+
return {
|
|
2724
|
+
result: null,
|
|
2725
|
+
diagnostic: {
|
|
2726
|
+
outcome: "low_confidence_candidate",
|
|
2727
|
+
confidence: result.confidence,
|
|
2728
|
+
path: result.path,
|
|
2729
|
+
warning: warnings[0] ?? null,
|
|
2730
|
+
suggestedClaimKey: result.claimKey,
|
|
2731
|
+
reviewable: false,
|
|
2732
|
+
supportEvidence: support.supportEvidence,
|
|
2733
|
+
rationale: "candidate stayed below both the conservative auto-apply and review thresholds"
|
|
2734
|
+
}
|
|
2735
|
+
};
|
|
2736
|
+
}
|
|
2737
|
+
async function getEntityHints(db) {
|
|
2738
|
+
return db.getDistinctClaimKeyPrefixes();
|
|
2739
|
+
}
|
|
2740
|
+
async function runBatchClaimExtraction(results, ports, config, _concurrency = 10, onWarning, onDiagnostic) {
|
|
2741
|
+
if (!config.enabled) {
|
|
2742
|
+
return /* @__PURE__ */ new Map();
|
|
2743
|
+
}
|
|
2744
|
+
const hintState = await loadClaimExtractionHintState(ports.db);
|
|
2745
|
+
const llm = ports.createLlm();
|
|
2746
|
+
const extractedEntries = /* @__PURE__ */ new Map();
|
|
2747
|
+
const diagnostics = /* @__PURE__ */ new Map();
|
|
2748
|
+
const retryEntries = [];
|
|
2749
|
+
for (const result of results) {
|
|
2750
|
+
for (const entry of result.entries) {
|
|
2751
|
+
if (entry.claim_key) {
|
|
2752
|
+
recordClaimKeyHint(hintState, entry.claim_key);
|
|
2753
|
+
continue;
|
|
2754
|
+
}
|
|
2755
|
+
if (!config.eligibleTypes.includes(entry.type)) {
|
|
2756
|
+
diagnostics.set(entry, {
|
|
2757
|
+
outcome: "ineligible_type",
|
|
2758
|
+
confidence: null,
|
|
2759
|
+
path: null,
|
|
2760
|
+
warning: null,
|
|
2761
|
+
suggestedClaimKey: null,
|
|
2762
|
+
reviewable: false,
|
|
2763
|
+
supportEvidence: [],
|
|
2764
|
+
rationale: "entry type is not eligible for claim-key extraction"
|
|
2765
|
+
});
|
|
2766
|
+
continue;
|
|
2767
|
+
}
|
|
2768
|
+
const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning);
|
|
2769
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
2770
|
+
if (decision.result?.claimKey) {
|
|
2771
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2772
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2773
|
+
extractedEntries.set(entry, decision.result);
|
|
2774
|
+
continue;
|
|
2775
|
+
}
|
|
2776
|
+
retryEntries.push(entry);
|
|
2777
|
+
}
|
|
2778
|
+
}
|
|
2779
|
+
if (retryEntries.length > 0 && extractedEntries.size > 0) {
|
|
2780
|
+
for (const entry of retryEntries) {
|
|
2781
|
+
if (entry.claim_key) {
|
|
2782
|
+
continue;
|
|
2783
|
+
}
|
|
2784
|
+
const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning);
|
|
2785
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
2786
|
+
if (!decision.result?.claimKey) {
|
|
2787
|
+
continue;
|
|
2788
|
+
}
|
|
2789
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2790
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2791
|
+
extractedEntries.set(entry, decision.result);
|
|
2792
|
+
}
|
|
2793
|
+
}
|
|
2794
|
+
for (const result of results) {
|
|
2795
|
+
for (const entry of result.entries) {
|
|
2796
|
+
const diagnostic = diagnostics.get(entry);
|
|
2797
|
+
if (diagnostic) {
|
|
2798
|
+
onDiagnostic?.(entry, diagnostic);
|
|
2799
|
+
}
|
|
2800
|
+
}
|
|
2801
|
+
}
|
|
2802
|
+
return extractedEntries;
|
|
2803
|
+
}
|
|
2804
|
+
async function extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning) {
|
|
2805
|
+
try {
|
|
2806
|
+
return await extractClaimKeyDecision(
|
|
2807
|
+
{
|
|
2808
|
+
type: entry.type,
|
|
2809
|
+
subject: entry.subject,
|
|
2810
|
+
content: entry.content,
|
|
2811
|
+
tags: entry.tags,
|
|
2812
|
+
source_context: entry.source_context
|
|
2813
|
+
},
|
|
2814
|
+
llm,
|
|
2815
|
+
config,
|
|
2816
|
+
{
|
|
2817
|
+
hints: buildEntryHints(hintState, entry),
|
|
2818
|
+
onWarning,
|
|
2819
|
+
supportClaimKeys: [...hintState.supportClaimKeys],
|
|
2820
|
+
entityPrefixStats: hintState.entityPrefixStats
|
|
2821
|
+
}
|
|
2822
|
+
);
|
|
2823
|
+
} catch {
|
|
2824
|
+
return {
|
|
2825
|
+
result: null,
|
|
2826
|
+
diagnostic: {
|
|
2827
|
+
outcome: "extraction_failure",
|
|
2828
|
+
confidence: null,
|
|
2829
|
+
path: null,
|
|
2830
|
+
warning: "claim extraction failed unexpectedly",
|
|
2831
|
+
suggestedClaimKey: null,
|
|
2832
|
+
reviewable: false,
|
|
2833
|
+
supportEvidence: [],
|
|
2834
|
+
rationale: "claim extraction failed unexpectedly"
|
|
2835
|
+
}
|
|
2836
|
+
};
|
|
2837
|
+
}
|
|
2838
|
+
}
|
|
2839
|
+
function buildClaimExtractionSystemPrompt(hints, promptMode) {
|
|
2840
|
+
const metadataHints = [hints.userEntity ? `user_id=${hints.userEntity}` : null, hints.projectEntity ? `project=${hints.projectEntity}` : null].filter(
|
|
2841
|
+
(value) => value !== null
|
|
2842
|
+
);
|
|
2843
|
+
const groundingHints = [
|
|
2844
|
+
hints.tags.length > 0 ? `tags=${hints.tags.join(", ")}` : null,
|
|
2845
|
+
hints.sourceContext ? `source_context=${hints.sourceContext}` : null
|
|
2846
|
+
].filter((value) => value !== null);
|
|
2847
|
+
const retryInstructions = promptMode === "json_retry" ? [
|
|
2848
|
+
"",
|
|
2849
|
+
"Your previous answer was invalid JSON.",
|
|
2850
|
+
"Reply with exactly one JSON object and nothing else.",
|
|
2851
|
+
"Do not use markdown fences, commentary, or trailing text."
|
|
2852
|
+
] : [];
|
|
2853
|
+
return [
|
|
2854
|
+
"You are a knowledge entry classifier. Extract one stable claim key for a durable knowledge entry.",
|
|
2855
|
+
"A claim key names the durable slot this entry updates: entity/attribute in lowercase snake_case.",
|
|
2856
|
+
"The goal is stable slot naming, not a paraphrase of the current value.",
|
|
2857
|
+
"",
|
|
2858
|
+
"Stability rules:",
|
|
2859
|
+
"- Prefer stable slot names over transient wording.",
|
|
2860
|
+
"- Choose attribute names that still make sense if the value changes.",
|
|
2861
|
+
"- Prefer short noun-like slot names over sentence-like attribute phrases.",
|
|
2862
|
+
"- When a candidate sounds like a rule or explanation sentence, compress it into the reusable slot it governs.",
|
|
2863
|
+
"- Prefer concrete entities over pronouns, deictic phrases, or self-referential placeholders.",
|
|
2864
|
+
"- Reuse an existing entity or full claim-key example when it clearly matches the same slot.",
|
|
2865
|
+
"- Stay domain-general. The same rules apply to people, devices, services, projects, places, organizations, products, datasets, policies, and preferences.",
|
|
2866
|
+
"- If the entry states a durable rule, default, workflow, guardrail, source-of-truth rule, architecture boundary, or process constraint plus rationale, extract the primary durable slot rather than the supporting rationale.",
|
|
2867
|
+
"- Do not return no_claim just because the entry explains why the rule exists. The durable policy or system slot is usually still the target.",
|
|
2868
|
+
"- Avoid full action clauses like requires_x_to_y, preserves_x_across_y, or x_precedes_y when a shorter stable slot such as trigger_condition, context_preservation, source_of_truth, or handoff_order would carry the same durable meaning.",
|
|
2869
|
+
"",
|
|
2870
|
+
"Return no_claim when:",
|
|
2871
|
+
"- The entry is narrative, multi-fact, or mostly a story about what happened.",
|
|
2872
|
+
"- The entry is an event or milestone without one continuing slot.",
|
|
2873
|
+
"- The entity is ambiguous or can only be named with a pronoun or vague placeholder.",
|
|
2874
|
+
"- The entry does not express one durable property, preference, decision, configuration, relationship, or other stable slot.",
|
|
2875
|
+
"- When unsure, prefer no_claim over inventing a weak key.",
|
|
2876
|
+
"",
|
|
2877
|
+
"Positive examples:",
|
|
2878
|
+
`- "Jim's timezone is America/Chicago." -> jim/timezone`,
|
|
2879
|
+
'- "Jim prefers oat milk in coffee." -> jim/coffee_preference',
|
|
2880
|
+
'- "Pixel 8 is set to dark mode." -> pixel_8/theme_mode',
|
|
2881
|
+
'- "Postgres max_connections is 200." -> postgres/max_connections',
|
|
2882
|
+
'- "Agenr defaults to gpt-5.4-mini." -> agenr/default_model',
|
|
2883
|
+
'- "Mac mini updates should stay manual so debugging stays predictable." -> mac_mini/manual_update_policy',
|
|
2884
|
+
'- "Use the warehouse inventory sheet as the source of truth for stock counts." -> stock_counts/source_of_truth',
|
|
2885
|
+
'- "The repo workflow is defined by AGENTS.md, even when older notes disagree." -> repo_workflow/source_of_truth',
|
|
2886
|
+
'- "Agenr keeps pure logic in src/core and adapters outside it so future hosts can plug in cleanly." -> agenr/core_adapter_boundary',
|
|
2887
|
+
'- "The before-prompt-build hook only triggers after a real agent turn or message." -> before_prompt_build_hook/trigger_condition',
|
|
2888
|
+
'- "Durable memory preserves context across sessions." -> durable_memory/context_preservation',
|
|
2889
|
+
'- "SQLite in this environment supports window functions." -> sqlite/window_function_support',
|
|
2890
|
+
'- "Meeting-recorder transcripts need manual cleanup before durable ingest." -> meeting_recorder/transcript_cleanup_workflow',
|
|
2891
|
+
'- "Reflection synthesis can hallucinate when it summarizes from partial notes." -> reflection_synthesis/hallucination_risk',
|
|
2892
|
+
"",
|
|
2893
|
+
"Negative examples:",
|
|
2894
|
+
"- Bad: jim/america_chicago -> Good: jim/timezone",
|
|
2895
|
+
"- Bad: project_x/details -> Good: project_x/deploy_strategy",
|
|
2896
|
+
"- Bad: we/deployment_process -> Good: platform_team/deploy_strategy",
|
|
2897
|
+
"- Bad: jim/oat_milk -> Good: jim/coffee_preference",
|
|
2898
|
+
"- Bad: release_notes/because_rollbacks_are_hard -> Good: release_process/source_of_truth",
|
|
2899
|
+
"- Bad: openclaw/requires_real_agent_turn_or_message_to_trigger -> Good: openclaw/trigger_condition",
|
|
2900
|
+
"- Bad: session_continuity/durable_memory_preserves_context_across_sessions -> Good: session_continuity/context_preservation",
|
|
2901
|
+
"- Bad: incident_story/we_spent_two_hours_debugging -> Good: no_claim",
|
|
2902
|
+
"",
|
|
2903
|
+
"Field rules:",
|
|
2904
|
+
"- entity: the main concrete thing being described. It can be a person, device, service, product, organization, workflow area, or other durable system/process anchor.",
|
|
2905
|
+
"- attribute: the narrow stable slot on that entity. For policy/process entries, name the governing slot such as source_of_truth, default_mode, update_policy, architecture_boundary, deploy_strategy, or escalation_workflow.",
|
|
2906
|
+
"- Confidence: 0.0 to 1.0. Use 0.9+ only when the slot is unambiguous and durable.",
|
|
2907
|
+
"",
|
|
2908
|
+
`Known entity hints: ${hints.entityHints.length > 0 ? hints.entityHints.join(", ") : "(none)"}`,
|
|
2909
|
+
`Known claim-key examples: ${hints.claimKeyExamples.length > 0 ? hints.claimKeyExamples.join(", ") : "(none)"}`,
|
|
2910
|
+
`Current entry metadata hints: ${metadataHints.length > 0 ? metadataHints.join(", ") : "(none)"}`,
|
|
2911
|
+
`Current entry grounding clues: ${groundingHints.length > 0 ? groundingHints.join(", ") : "(none)"}`,
|
|
2912
|
+
'If project metadata is present, it may resolve phrases like "the project" when that mapping is obvious.',
|
|
2913
|
+
'If user metadata is present, it may resolve phrases like "the user", "I", or "me" when that mapping is obvious.',
|
|
2914
|
+
"Tags and source_context are local grounding clues, not proof. Use them to pick the right durable slot only when the entry content already supports that slot.",
|
|
2915
|
+
...retryInstructions,
|
|
2916
|
+
"",
|
|
2917
|
+
'Respond with JSON: { "entity": string, "attribute": string, "confidence": number, "no_claim"?: boolean }'
|
|
2918
|
+
].join("\n");
|
|
2919
|
+
}
|
|
2920
|
+
function buildClaimExtractionUserPrompt(entry) {
|
|
2921
|
+
return [`Entry type: ${entry.type}`, `Subject: ${entry.subject}`, `Content: ${entry.content}`].join("\n");
|
|
2922
|
+
}
|
|
2923
|
+
async function attemptClaimExtraction(entry, hints, llm) {
|
|
2924
|
+
const userPrompt = buildClaimExtractionUserPrompt(entry);
|
|
2925
|
+
try {
|
|
2926
|
+
return {
|
|
2927
|
+
path: "model",
|
|
2928
|
+
response: await llm.completeJson(buildClaimExtractionSystemPrompt(hints, "standard"), userPrompt)
|
|
2929
|
+
};
|
|
2930
|
+
} catch (error) {
|
|
2931
|
+
if (!isMalformedJsonError(error)) {
|
|
2932
|
+
throw error;
|
|
2933
|
+
}
|
|
2934
|
+
}
|
|
2935
|
+
return {
|
|
2936
|
+
path: "json_retry",
|
|
2937
|
+
response: await llm.completeJson(buildClaimExtractionSystemPrompt(hints, "json_retry"), userPrompt)
|
|
2938
|
+
};
|
|
2939
|
+
}
|
|
2940
|
+
function buildClaimExtractionCandidate(entry, response, hints, onWarning) {
|
|
2941
|
+
const confidence = normalizeConfidence(response.confidence);
|
|
2942
|
+
const rawEntity = typeof response.entity === "string" ? response.entity.trim() : "";
|
|
2943
|
+
const rawAttribute = typeof response.attribute === "string" ? response.attribute.trim() : "";
|
|
2944
|
+
const entity = normalizeEntity(rawEntity, hints);
|
|
2945
|
+
const attribute = normalizeClaimKeySegment(rawAttribute);
|
|
2946
|
+
const normalizedClaimKey = normalizeClaimKey(`${entity}/${attribute}`);
|
|
2947
|
+
if (!normalizedClaimKey.ok) {
|
|
2948
|
+
onWarning?.(`Claim extraction dropped claim key for "${entry.subject}": ${describeClaimKeyNormalizationFailure(normalizedClaimKey.reason)}.`);
|
|
2949
|
+
return null;
|
|
2950
|
+
}
|
|
2951
|
+
const compactedClaimKey = compactClaimKey(normalizedClaimKey.value.claimKey);
|
|
2952
|
+
if (!compactedClaimKey) {
|
|
2953
|
+
onWarning?.(`Claim extraction dropped claim key for "${entry.subject}": claim key could not be compacted safely.`);
|
|
2954
|
+
return null;
|
|
2955
|
+
}
|
|
2956
|
+
const validatedClaimKey = validateExtractedClaimKey(compactedClaimKey);
|
|
2957
|
+
if (!validatedClaimKey.ok) {
|
|
2958
|
+
onWarning?.(
|
|
2959
|
+
`Claim extraction rejected "${validatedClaimKey.value.claimKey}" for "${entry.subject}": ${describeExtractedClaimKeyRejection(validatedClaimKey.reason, validatedClaimKey.value)}.`
|
|
2960
|
+
);
|
|
2961
|
+
return null;
|
|
2962
|
+
}
|
|
2963
|
+
return {
|
|
2964
|
+
claimKey: validatedClaimKey.value.claimKey,
|
|
2965
|
+
confidence,
|
|
2966
|
+
rawEntity,
|
|
2967
|
+
rawAttribute,
|
|
2968
|
+
compactedFrom: compactedClaimKey.compactedFrom,
|
|
2969
|
+
compactionReason: compactedClaimKey.reason
|
|
2970
|
+
};
|
|
2971
|
+
}
|
|
2972
|
+
function toClaimExtractionResult(candidate, path4) {
|
|
2973
|
+
return {
|
|
2974
|
+
claimKey: candidate.claimKey,
|
|
2975
|
+
confidence: candidate.confidence,
|
|
2976
|
+
rawEntity: candidate.rawEntity,
|
|
2977
|
+
rawAttribute: candidate.rawAttribute,
|
|
2978
|
+
path: path4,
|
|
2979
|
+
...candidate.compactedFrom ? {
|
|
2980
|
+
compactedFrom: candidate.compactedFrom,
|
|
2981
|
+
compactionReason: candidate.compactionReason
|
|
2982
|
+
} : {}
|
|
2983
|
+
};
|
|
2984
|
+
}
|
|
2985
|
+
function buildAcceptedDiagnostic(result, rationale) {
|
|
2986
|
+
return {
|
|
2987
|
+
outcome: "accepted",
|
|
2988
|
+
confidence: result.confidence,
|
|
2989
|
+
path: result.path,
|
|
2990
|
+
warning: null,
|
|
2991
|
+
suggestedClaimKey: result.claimKey,
|
|
2992
|
+
reviewable: false,
|
|
2993
|
+
supportEvidence: [],
|
|
2994
|
+
rationale
|
|
2995
|
+
};
|
|
2996
|
+
}
|
|
2997
|
+
function finalizeDeterministicRepairDecision(repaired, entityPrefixStats) {
|
|
2998
|
+
const aliasCandidate = findSingletonAliasReuseCandidate(repaired, entityPrefixStats);
|
|
2999
|
+
if (!aliasCandidate) {
|
|
3000
|
+
return {
|
|
3001
|
+
result: repaired,
|
|
3002
|
+
diagnostic: buildAcceptedDiagnostic(repaired, "deterministic possessive-slot repair recovered the missing claim key")
|
|
3003
|
+
};
|
|
3004
|
+
}
|
|
3005
|
+
if (aliasCandidate.canonicalReuseSafe) {
|
|
3006
|
+
const reusedResult = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix);
|
|
3007
|
+
reusedResult.acceptanceRationale = `reused dominant entity family "${aliasCandidate.dominantEntityPrefix}" instead of minting singleton alias "${aliasCandidate.aliasEntityPrefix}"`;
|
|
3008
|
+
return {
|
|
3009
|
+
result: reusedResult,
|
|
3010
|
+
diagnostic: buildAcceptedDiagnostic(
|
|
3011
|
+
reusedResult,
|
|
3012
|
+
`deterministic repair reused dominant family "${aliasCandidate.dominantEntityPrefix}" instead of new singleton alias "${aliasCandidate.aliasEntityPrefix}"`
|
|
3013
|
+
)
|
|
3014
|
+
};
|
|
3015
|
+
}
|
|
3016
|
+
const suggestedClaimKey = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix).claimKey;
|
|
3017
|
+
return {
|
|
3018
|
+
result: null,
|
|
3019
|
+
diagnostic: {
|
|
3020
|
+
outcome: "low_confidence_candidate",
|
|
3021
|
+
confidence: repaired.confidence,
|
|
3022
|
+
path: repaired.path,
|
|
3023
|
+
warning: null,
|
|
3024
|
+
suggestedClaimKey,
|
|
3025
|
+
reviewable: true,
|
|
3026
|
+
supportEvidence: aliasCandidate.evidence.map((evidence) => evidence.kind),
|
|
3027
|
+
rationale: `deterministic repair would create singleton alias "${aliasCandidate.aliasEntityPrefix}" next to dominant trusted family "${aliasCandidate.dominantEntityPrefix}", so the new namespace was staged for review`
|
|
3028
|
+
}
|
|
3029
|
+
};
|
|
3030
|
+
}
|
|
3031
|
+
function findSingletonAliasReuseCandidate(repaired, entityPrefixStats) {
|
|
3032
|
+
const claimKey = repaired.claimKey;
|
|
3033
|
+
if (!claimKey || !entityPrefixStats || entityPrefixStats.length === 0) {
|
|
3034
|
+
return null;
|
|
3035
|
+
}
|
|
3036
|
+
const [entityPrefix = ""] = claimKey.split("/", 1);
|
|
3037
|
+
if (!entityPrefix) {
|
|
3038
|
+
return null;
|
|
3039
|
+
}
|
|
3040
|
+
const augmentedStats = summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix);
|
|
3041
|
+
return detectClaimKeySingletonAliasCandidatesFromStats(augmentedStats).find((candidate) => candidate.aliasEntityPrefix === entityPrefix) ?? null;
|
|
3042
|
+
}
|
|
3043
|
+
function summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix) {
|
|
3044
|
+
const existing = entityPrefixStats.find((profile) => profile.entityPrefix === entityPrefix);
|
|
3045
|
+
if (existing) {
|
|
3046
|
+
return entityPrefixStats;
|
|
3047
|
+
}
|
|
3048
|
+
return [
|
|
3049
|
+
...entityPrefixStats,
|
|
3050
|
+
{
|
|
3051
|
+
entityPrefix,
|
|
3052
|
+
activeEntryCount: 1,
|
|
3053
|
+
trustedEntryCount: 0,
|
|
3054
|
+
tentativeEntryCount: 1,
|
|
3055
|
+
unresolvedEntryCount: 0,
|
|
3056
|
+
legacyEntryCount: 0,
|
|
3057
|
+
deterministicRepairEntryCount: 1,
|
|
3058
|
+
manualEntryCount: 0,
|
|
3059
|
+
modelEntryCount: 0,
|
|
3060
|
+
jsonRetryEntryCount: 0,
|
|
3061
|
+
surgeonFamilyReuseEntryCount: 0
|
|
3062
|
+
}
|
|
3063
|
+
];
|
|
3064
|
+
}
|
|
3065
|
+
function rewriteClaimKeyEntityPrefix(result, entityPrefix) {
|
|
3066
|
+
const claimKey = result.claimKey;
|
|
3067
|
+
if (!claimKey) {
|
|
3068
|
+
return result;
|
|
3069
|
+
}
|
|
3070
|
+
const [, attribute = ""] = claimKey.split("/", 2);
|
|
3071
|
+
return {
|
|
3072
|
+
...result,
|
|
3073
|
+
claimKey: `${entityPrefix}/${attribute}`
|
|
3074
|
+
};
|
|
3075
|
+
}
|
|
3076
|
+
function formatClaimExtractionError(error) {
|
|
3077
|
+
return error instanceof Error ? error.message : String(error);
|
|
3078
|
+
}
|
|
3079
|
+
function describeSupportPromotionClass(support) {
|
|
3080
|
+
switch (support.autoApplyClass) {
|
|
3081
|
+
case "trusted_exact_reuse_grounded":
|
|
3082
|
+
return "trusted exact-key reuse with local grounding";
|
|
3083
|
+
case "trusted_family_template_grounded":
|
|
3084
|
+
return "trusted family reuse plus grounded template support";
|
|
3085
|
+
case "trusted_family_stable_slot":
|
|
3086
|
+
return "trusted family reuse plus a stable compact slot";
|
|
3087
|
+
case "trusted_family_grounded_alignment":
|
|
3088
|
+
return "trusted family reuse plus grounded dual lexical alignment";
|
|
3089
|
+
default:
|
|
3090
|
+
return "structural support";
|
|
3091
|
+
}
|
|
3092
|
+
}
|
|
3093
|
+
function tryDeterministicClaimKeyRepair(entry, hints) {
|
|
3094
|
+
const repaired = parsePossessiveClaim(entry.subject) ?? parsePossessiveStatement(entry.content);
|
|
3095
|
+
if (!repaired) {
|
|
3096
|
+
return null;
|
|
3097
|
+
}
|
|
3098
|
+
const attribute = normalizeClaimKeySegment(repaired.attribute);
|
|
3099
|
+
if (!looksLikeDeterministicAttribute(attribute)) {
|
|
3100
|
+
return null;
|
|
3101
|
+
}
|
|
3102
|
+
const entity = normalizeEntity(repaired.entity, hints);
|
|
3103
|
+
const normalizedClaimKey = normalizeClaimKey(`${entity}/${attribute}`);
|
|
3104
|
+
if (!normalizedClaimKey.ok) {
|
|
3105
|
+
return null;
|
|
3106
|
+
}
|
|
3107
|
+
const validatedClaimKey = validateExtractedClaimKey(normalizedClaimKey.value);
|
|
3108
|
+
if (!validatedClaimKey.ok) {
|
|
3109
|
+
return null;
|
|
3110
|
+
}
|
|
3111
|
+
return {
|
|
3112
|
+
claimKey: validatedClaimKey.value.claimKey,
|
|
3113
|
+
confidence: DEFAULT_REPAIR_CONFIDENCE,
|
|
3114
|
+
rawEntity: repaired.entity,
|
|
3115
|
+
rawAttribute: repaired.attribute,
|
|
3116
|
+
path: "deterministic_repair"
|
|
3117
|
+
};
|
|
3118
|
+
}
|
|
3119
|
+
async function loadClaimExtractionHintState(db) {
|
|
3120
|
+
const [entityHintResult, promptClaimKeyExampleResult, supportClaimKeyExampleResult, entityPrefixStatsResult] = await Promise.allSettled([
|
|
3121
|
+
getEntityHints(db),
|
|
3122
|
+
getClaimKeyExamples(db, MAX_CLAIM_KEY_EXAMPLES),
|
|
3123
|
+
getClaimKeyExamples(db, MAX_SUPPORT_CLAIM_KEY_EXAMPLES),
|
|
3124
|
+
getClaimKeyEntityPrefixStats(db)
|
|
3125
|
+
]);
|
|
3126
|
+
return createHintState({
|
|
3127
|
+
entityHints: entityHintResult.status === "fulfilled" ? entityHintResult.value : [],
|
|
3128
|
+
claimKeyExamples: promptClaimKeyExampleResult.status === "fulfilled" ? promptClaimKeyExampleResult.value : [],
|
|
3129
|
+
supportClaimKeys: supportClaimKeyExampleResult.status === "fulfilled" ? supportClaimKeyExampleResult.value : [],
|
|
3130
|
+
entityPrefixStats: entityPrefixStatsResult.status === "fulfilled" ? entityPrefixStatsResult.value : []
|
|
3131
|
+
});
|
|
3132
|
+
}
|
|
3133
|
+
async function getClaimKeyExamples(db, limit) {
|
|
3134
|
+
if (typeof db.getClaimKeyExamples !== "function") {
|
|
3135
|
+
return [];
|
|
3136
|
+
}
|
|
3137
|
+
return db.getClaimKeyExamples(limit);
|
|
3138
|
+
}
|
|
3139
|
+
async function getClaimKeyEntityPrefixStats(db) {
|
|
3140
|
+
if (typeof db.getClaimKeyEntityPrefixStats !== "function") {
|
|
3141
|
+
return [];
|
|
3142
|
+
}
|
|
3143
|
+
return db.getClaimKeyEntityPrefixStats();
|
|
3144
|
+
}
|
|
3145
|
+
function createHintState(input) {
|
|
3146
|
+
const claimKeyExamples = normalizeClaimKeyExamples(input.claimKeyExamples ?? []);
|
|
3147
|
+
const supportClaimKeys = normalizeSupportClaimKeys(input.supportClaimKeys ?? []);
|
|
3148
|
+
const entityHints = limitUnique(
|
|
3149
|
+
[
|
|
3150
|
+
...normalizeEntityHints(input.entityHints ?? []),
|
|
3151
|
+
...supportClaimKeys.flatMap((claimKey) => {
|
|
3152
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
3153
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
3154
|
+
})
|
|
3155
|
+
],
|
|
3156
|
+
MAX_ENTITY_HINTS
|
|
3157
|
+
);
|
|
3158
|
+
return {
|
|
3159
|
+
entityHints,
|
|
3160
|
+
claimKeyExamples,
|
|
3161
|
+
supportClaimKeys,
|
|
3162
|
+
entityPrefixStats: input.entityPrefixStats ?? []
|
|
3163
|
+
};
|
|
3164
|
+
}
|
|
3165
|
+
function buildEntryHints(state, entry) {
|
|
3166
|
+
return {
|
|
3167
|
+
entityHints: [...state.entityHints],
|
|
3168
|
+
claimKeyExamples: [...state.claimKeyExamples],
|
|
3169
|
+
userId: entry.user_id,
|
|
3170
|
+
project: entry.project,
|
|
3171
|
+
tags: entry.tags,
|
|
3172
|
+
sourceContext: entry.source_context
|
|
3173
|
+
};
|
|
3174
|
+
}
|
|
3175
|
+
function recordClaimKeyHint(state, claimKey) {
|
|
3176
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
3177
|
+
if (!normalizedClaimKey.ok) {
|
|
3178
|
+
return;
|
|
3179
|
+
}
|
|
3180
|
+
state.claimKeyExamples = prependUnique(state.claimKeyExamples, normalizedClaimKey.value.claimKey, MAX_CLAIM_KEY_EXAMPLES);
|
|
3181
|
+
state.supportClaimKeys = prependUnique(state.supportClaimKeys, normalizedClaimKey.value.claimKey, MAX_SUPPORT_CLAIM_KEY_EXAMPLES);
|
|
3182
|
+
state.entityHints = prependUnique(state.entityHints, normalizedClaimKey.value.entity, MAX_ENTITY_HINTS);
|
|
3183
|
+
}
|
|
3184
|
+
function normalizeClaimExtractionHints(hints) {
|
|
3185
|
+
const claimKeyExamples = normalizeClaimKeyExamples(hints.claimKeyExamples ?? []);
|
|
3186
|
+
return {
|
|
3187
|
+
entityHints: limitUnique(
|
|
3188
|
+
[
|
|
3189
|
+
...normalizeEntityHints(hints.entityHints ?? []),
|
|
3190
|
+
...claimKeyExamples.flatMap((claimKey) => {
|
|
3191
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
3192
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
3193
|
+
})
|
|
3194
|
+
],
|
|
3195
|
+
MAX_ENTITY_HINTS
|
|
3196
|
+
),
|
|
3197
|
+
claimKeyExamples,
|
|
3198
|
+
userEntity: normalizeMetadataEntity(hints.userId),
|
|
3199
|
+
projectEntity: normalizeMetadataEntity(hints.project),
|
|
3200
|
+
tags: normalizeHintTags(hints.tags ?? []),
|
|
3201
|
+
sourceContext: normalizeSourceContextHint(hints.sourceContext)
|
|
3202
|
+
};
|
|
3203
|
+
}
|
|
3204
|
+
function buildPreviewOutcome(outcome, attempt) {
|
|
3205
|
+
return {
|
|
3206
|
+
outcome,
|
|
3207
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
3208
|
+
rawEntity: typeof attempt.response.entity === "string" ? attempt.response.entity.trim() : "",
|
|
3209
|
+
rawAttribute: typeof attempt.response.attribute === "string" ? attempt.response.attribute.trim() : "",
|
|
3210
|
+
path: attempt.path
|
|
3211
|
+
};
|
|
3212
|
+
}
|
|
3213
|
+
function normalizeConfidence(value) {
|
|
3214
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
3215
|
+
return 0;
|
|
3216
|
+
}
|
|
3217
|
+
return Math.min(1, Math.max(0, value));
|
|
3218
|
+
}
|
|
3219
|
+
function normalizeEntity(value, hints) {
|
|
3220
|
+
const normalizedValue = normalizeClaimKeySegment(value);
|
|
3221
|
+
if (normalizedValue.length === 0) {
|
|
3222
|
+
return "";
|
|
3223
|
+
}
|
|
3224
|
+
if (!SELF_REFERENTIAL_ENTITIES.has(normalizedValue)) {
|
|
3225
|
+
return normalizedValue;
|
|
3226
|
+
}
|
|
3227
|
+
if (USER_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.userEntity) {
|
|
3228
|
+
return hints.userEntity;
|
|
3229
|
+
}
|
|
3230
|
+
if (PROJECT_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.projectEntity) {
|
|
3231
|
+
return hints.projectEntity;
|
|
3232
|
+
}
|
|
3233
|
+
const concreteCandidates = limitUnique(
|
|
3234
|
+
[hints.projectEntity, hints.userEntity, ...hints.entityHints].filter(
|
|
3235
|
+
(candidate) => typeof candidate === "string" && candidate.length > 0
|
|
3236
|
+
),
|
|
3237
|
+
MAX_ENTITY_HINTS
|
|
3238
|
+
);
|
|
3239
|
+
if (concreteCandidates.length === 1) {
|
|
3240
|
+
return concreteCandidates[0] ?? normalizedValue;
|
|
3241
|
+
}
|
|
3242
|
+
if (hints.entityHints.length === 1) {
|
|
3243
|
+
return hints.entityHints[0] ?? normalizedValue;
|
|
3244
|
+
}
|
|
3245
|
+
return normalizedValue;
|
|
3246
|
+
}
|
|
3247
|
+
function normalizeEntityHints(entityHints) {
|
|
3248
|
+
return limitUnique(
|
|
3249
|
+
entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !SELF_REFERENTIAL_ENTITIES.has(entityHint)),
|
|
3250
|
+
MAX_ENTITY_HINTS
|
|
3251
|
+
);
|
|
3252
|
+
}
|
|
3253
|
+
function normalizeClaimKeyExamples(claimKeyExamples) {
|
|
3254
|
+
return limitUnique(
|
|
3255
|
+
claimKeyExamples.flatMap((claimKeyExample) => {
|
|
3256
|
+
const normalizedClaimKey = normalizeClaimKey(claimKeyExample);
|
|
3257
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
|
|
3258
|
+
}),
|
|
3259
|
+
MAX_CLAIM_KEY_EXAMPLES
|
|
3260
|
+
);
|
|
3261
|
+
}
|
|
3262
|
+
function normalizeSupportClaimKeys(claimKeys) {
|
|
3263
|
+
return limitUnique(
|
|
3264
|
+
claimKeys.flatMap((claimKey) => {
|
|
3265
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
3266
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
|
|
3267
|
+
}),
|
|
3268
|
+
MAX_SUPPORT_CLAIM_KEY_EXAMPLES
|
|
3269
|
+
);
|
|
3270
|
+
}
|
|
3271
|
+
function normalizeMetadataEntity(value) {
|
|
3272
|
+
if (typeof value !== "string") {
|
|
3273
|
+
return void 0;
|
|
3274
|
+
}
|
|
3275
|
+
const normalized = normalizeClaimKeySegment(value);
|
|
3276
|
+
if (normalized.length === 0 || SELF_REFERENTIAL_ENTITIES.has(normalized) || !/[a-z]/u.test(normalized)) {
|
|
3277
|
+
return void 0;
|
|
3278
|
+
}
|
|
3279
|
+
return normalized;
|
|
3280
|
+
}
|
|
3281
|
+
function normalizeHintTags(tags) {
|
|
3282
|
+
return limitUnique(
|
|
3283
|
+
tags.map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0),
|
|
3284
|
+
8
|
|
3285
|
+
);
|
|
3286
|
+
}
|
|
3287
|
+
function normalizeSourceContextHint(value) {
|
|
3288
|
+
const trimmed = value?.trim();
|
|
3289
|
+
if (!trimmed) {
|
|
3290
|
+
return void 0;
|
|
3291
|
+
}
|
|
3292
|
+
return trimmed.length <= 160 ? trimmed : `${trimmed.slice(0, 157).trimEnd()}...`;
|
|
3293
|
+
}
|
|
3294
|
+
function isMalformedJsonError(error) {
|
|
3295
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3296
|
+
return /json|unexpected token|unexpected end|unexpected non-whitespace|unterminated|position \d+/iu.test(message);
|
|
3297
|
+
}
|
|
3298
|
+
function parsePossessiveClaim(subject) {
|
|
3299
|
+
const match = /^\s*(?<entity>[^.!?\n]+?)[’']s\s+(?<attribute>[^.!?\n]+?)\s*$/iu.exec(subject);
|
|
3300
|
+
if (!match?.groups) {
|
|
3301
|
+
return null;
|
|
3302
|
+
}
|
|
3303
|
+
return {
|
|
3304
|
+
entity: stripTrailingPunctuation(match.groups.entity),
|
|
3305
|
+
attribute: stripTrailingPunctuation(match.groups.attribute)
|
|
3306
|
+
};
|
|
3307
|
+
}
|
|
3308
|
+
function parsePossessiveStatement(content) {
|
|
3309
|
+
const match = /^\s*(?<entity>[^.!?\n]+?)[’']s\s+(?<attribute>[^.!?\n]+?)\s+(?:is|are|was|were)\b/iu.exec(content);
|
|
3310
|
+
if (!match?.groups) {
|
|
3311
|
+
return null;
|
|
3312
|
+
}
|
|
3313
|
+
return {
|
|
3314
|
+
entity: stripTrailingPunctuation(match.groups.entity),
|
|
3315
|
+
attribute: stripTrailingPunctuation(match.groups.attribute)
|
|
3316
|
+
};
|
|
3317
|
+
}
|
|
3318
|
+
function stripTrailingPunctuation(value) {
|
|
3319
|
+
return value.trim().replace(/[\s"'“”‘’.,:;!?]+$/gu, "").trim();
|
|
3320
|
+
}
|
|
3321
|
+
function looksLikeDeterministicAttribute(attribute) {
|
|
3322
|
+
const parts = attribute.split("_").filter((part) => part.length > 0);
|
|
3323
|
+
if (parts.length === 0 || parts.length > 4) {
|
|
3324
|
+
return false;
|
|
3325
|
+
}
|
|
3326
|
+
const head = parts[parts.length - 1];
|
|
3327
|
+
return typeof head === "string" && DETERMINISTIC_ATTRIBUTE_HEADS.has(head);
|
|
3328
|
+
}
|
|
3329
|
+
function prependUnique(values, value, limit) {
|
|
3330
|
+
return limitUnique([value, ...values], limit);
|
|
3331
|
+
}
|
|
3332
|
+
function limitUnique(values, limit) {
|
|
3333
|
+
return Array.from(new Set(values.filter((value) => value.length > 0))).slice(0, limit);
|
|
3334
|
+
}
|
|
3335
|
+
|
|
3336
|
+
// src/core/store/hashing.ts
|
|
3337
|
+
import { createHash as createHash2 } from "crypto";
|
|
3338
|
+
function computeContentHash(content, sourceFile) {
|
|
3339
|
+
const input = sourceFile ? `${sourceFile}
|
|
3340
|
+
${content}` : content;
|
|
3341
|
+
return createHash2("sha256").update(input).digest("hex");
|
|
3342
|
+
}
|
|
3343
|
+
function computeNormContentHash(content) {
|
|
3344
|
+
const normalized = content.toLowerCase().replace(/\s+/g, " ").trim().replace(/[^\w\s]/g, "");
|
|
3345
|
+
return createHash2("sha256").update(normalized).digest("hex");
|
|
3346
|
+
}
|
|
3347
|
+
|
|
3348
|
+
// src/core/store/validation.ts
|
|
3349
|
+
var UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/iu;
|
|
3350
|
+
function validateEntriesWithIndexes(inputs) {
|
|
3351
|
+
const valid = [];
|
|
3352
|
+
const errors = [];
|
|
3353
|
+
const warnings = [];
|
|
3354
|
+
const rejectedInputIndexes = [];
|
|
3355
|
+
for (const [index, input] of inputs.entries()) {
|
|
3356
|
+
const subject = normalizeString(input.subject);
|
|
3357
|
+
const content = normalizeString(input.content);
|
|
3358
|
+
if (!ENTRY_TYPES.includes(input.type)) {
|
|
3359
|
+
errors.push(`Entry ${index} has an invalid type.`);
|
|
3360
|
+
rejectedInputIndexes.push(index);
|
|
3361
|
+
continue;
|
|
3362
|
+
}
|
|
3363
|
+
if (subject.length === 0) {
|
|
3364
|
+
errors.push(`Entry ${index} is missing a subject.`);
|
|
3365
|
+
rejectedInputIndexes.push(index);
|
|
3366
|
+
continue;
|
|
3367
|
+
}
|
|
3368
|
+
if (content.length === 0) {
|
|
3369
|
+
errors.push(`Entry ${index} is missing content.`);
|
|
3370
|
+
rejectedInputIndexes.push(index);
|
|
3371
|
+
continue;
|
|
3372
|
+
}
|
|
3373
|
+
if (input.expiry !== void 0 && !EXPIRY_LEVELS.includes(input.expiry)) {
|
|
3374
|
+
errors.push(`Entry ${index} has an invalid expiry.`);
|
|
3375
|
+
rejectedInputIndexes.push(index);
|
|
3376
|
+
continue;
|
|
3377
|
+
}
|
|
3378
|
+
if (input.tags !== void 0 && !areValidTags(input.tags)) {
|
|
3379
|
+
errors.push(`Entry ${index} has invalid tags.`);
|
|
3380
|
+
rejectedInputIndexes.push(index);
|
|
3381
|
+
continue;
|
|
3382
|
+
}
|
|
3383
|
+
if (input.importance !== void 0 && !Number.isFinite(input.importance)) {
|
|
3384
|
+
errors.push(`Entry ${index} has an invalid importance.`);
|
|
3385
|
+
rejectedInputIndexes.push(index);
|
|
3386
|
+
continue;
|
|
3387
|
+
}
|
|
3388
|
+
if (input.supersedes !== void 0 && !isUuid(input.supersedes)) {
|
|
3389
|
+
errors.push(`Entry ${index} has an invalid supersedes id.`);
|
|
3390
|
+
rejectedInputIndexes.push(index);
|
|
3391
|
+
continue;
|
|
3392
|
+
}
|
|
3393
|
+
const temporalValidity = validateTemporalValidityRange(input.valid_from, input.valid_to);
|
|
3394
|
+
if (!temporalValidity.ok) {
|
|
3395
|
+
errors.push(`Entry ${index} ${temporalValidity.message}`);
|
|
3396
|
+
rejectedInputIndexes.push(index);
|
|
3397
|
+
continue;
|
|
3398
|
+
}
|
|
3399
|
+
let normalizedClaimKey;
|
|
3400
|
+
if (input.claim_key !== void 0) {
|
|
3401
|
+
if (typeof input.claim_key !== "string") {
|
|
3402
|
+
warnings.push(`Entry ${index} provided a non-string claim key and it was dropped.`);
|
|
3403
|
+
} else {
|
|
3404
|
+
const claimKey = normalizeClaimKey(input.claim_key);
|
|
3405
|
+
if (claimKey.ok) {
|
|
3406
|
+
normalizedClaimKey = claimKey.value.claimKey;
|
|
3407
|
+
} else {
|
|
3408
|
+
warnings.push(
|
|
3409
|
+
`Entry ${index} provided invalid claim key ${JSON.stringify(input.claim_key)} and it was dropped: ${describeClaimKeyNormalizationFailure(claimKey.reason)}.`
|
|
3410
|
+
);
|
|
3411
|
+
}
|
|
3412
|
+
}
|
|
3413
|
+
}
|
|
3414
|
+
const claimKeyRaw = normalizedClaimKey ? normalizeOptionalString(input.claim_key_raw) : void 0;
|
|
3415
|
+
const claimKeyStatus = normalizedClaimKey ? normalizeClaimKeyStatus(input.claim_key_status, index, warnings) : void 0;
|
|
3416
|
+
const claimKeySource = normalizedClaimKey ? normalizeClaimKeySource(input.claim_key_source, index, warnings) : void 0;
|
|
3417
|
+
const claimKeyConfidence = normalizedClaimKey ? normalizeClaimKeyConfidence(input.claim_key_confidence, index, warnings) : void 0;
|
|
3418
|
+
const claimKeyRationale = normalizedClaimKey ? normalizeOptionalString(input.claim_key_rationale) : void 0;
|
|
3419
|
+
const claimSupportSourceKind = normalizedClaimKey ? normalizeOptionalString(input.claim_support_source_kind) : void 0;
|
|
3420
|
+
const claimSupportLocator = normalizedClaimKey ? normalizeOptionalString(input.claim_support_locator) : void 0;
|
|
3421
|
+
const claimSupportObservedAt = normalizedClaimKey && input.claim_support_observed_at !== void 0 ? normalizeClaimSupportObservedAt(input.claim_support_observed_at, index, warnings) : void 0;
|
|
3422
|
+
const claimSupportMode = normalizedClaimKey && input.claim_support_mode !== void 0 ? normalizeClaimSupportMode(input.claim_support_mode, index, warnings) : void 0;
|
|
3423
|
+
const hasPrecomputedLifecycleFields = hasPrecomputedClaimKeyLifecycleFields(input);
|
|
3424
|
+
const resolvedPrecomputedLifecycle = normalizedClaimKey && hasPrecomputedLifecycleFields ? buildPrecomputedClaimKeyLifecycle({
|
|
3425
|
+
claim_key: normalizedClaimKey,
|
|
3426
|
+
claim_key_raw: claimKeyRaw,
|
|
3427
|
+
claim_key_status: claimKeyStatus,
|
|
3428
|
+
claim_key_source: claimKeySource,
|
|
3429
|
+
claim_key_confidence: claimKeyConfidence,
|
|
3430
|
+
claim_key_rationale: claimKeyRationale,
|
|
3431
|
+
claim_support_source_kind: claimSupportSourceKind,
|
|
3432
|
+
claim_support_locator: claimSupportLocator,
|
|
3433
|
+
claim_support_observed_at: claimSupportObservedAt,
|
|
3434
|
+
claim_support_mode: claimSupportMode
|
|
3435
|
+
}) : void 0;
|
|
3436
|
+
if (hasPrecomputedLifecycleFields) {
|
|
3437
|
+
if (!normalizedClaimKey) {
|
|
3438
|
+
errors.push(`Entry ${index} provided claim-key lifecycle metadata without a valid claim key.`);
|
|
3439
|
+
rejectedInputIndexes.push(index);
|
|
3440
|
+
continue;
|
|
3441
|
+
}
|
|
3442
|
+
if (!resolvedPrecomputedLifecycle) {
|
|
3443
|
+
errors.push(
|
|
3444
|
+
`Entry ${index} provided partial or invalid claim-key lifecycle metadata. Complete bundles require claim_key_status, claim_key_source, claim_key_confidence, and claim_key_rationale.`
|
|
3445
|
+
);
|
|
3446
|
+
rejectedInputIndexes.push(index);
|
|
3447
|
+
continue;
|
|
3448
|
+
}
|
|
3449
|
+
}
|
|
3450
|
+
valid.push({
|
|
3451
|
+
inputIndex: index,
|
|
3452
|
+
input: {
|
|
3453
|
+
type: input.type,
|
|
3454
|
+
subject,
|
|
3455
|
+
content,
|
|
3456
|
+
importance: clampImportance(input.importance),
|
|
3457
|
+
expiry: input.expiry ?? "temporary",
|
|
3458
|
+
tags: normalizeTags(input.tags),
|
|
3459
|
+
source_file: normalizeOptionalString(input.source_file),
|
|
3460
|
+
source_context: normalizeOptionalString(input.source_context),
|
|
3461
|
+
user_id: normalizeOptionalString(input.user_id),
|
|
3462
|
+
project: normalizeOptionalString(input.project),
|
|
3463
|
+
created_at: normalizeOptionalString(input.created_at),
|
|
3464
|
+
supersedes: normalizeOptionalString(input.supersedes),
|
|
3465
|
+
claim_key: normalizedClaimKey,
|
|
3466
|
+
claim_key_raw: resolvedPrecomputedLifecycle?.claim_key_raw ?? claimKeyRaw,
|
|
3467
|
+
claim_key_status: resolvedPrecomputedLifecycle?.claim_key_status,
|
|
3468
|
+
claim_key_source: resolvedPrecomputedLifecycle?.claim_key_source,
|
|
3469
|
+
claim_key_confidence: resolvedPrecomputedLifecycle?.claim_key_confidence,
|
|
3470
|
+
claim_key_rationale: resolvedPrecomputedLifecycle?.claim_key_rationale,
|
|
3471
|
+
claim_support_source_kind: resolvedPrecomputedLifecycle?.claim_support_source_kind ?? claimSupportSourceKind,
|
|
3472
|
+
claim_support_locator: resolvedPrecomputedLifecycle?.claim_support_locator ?? claimSupportLocator,
|
|
3473
|
+
claim_support_observed_at: resolvedPrecomputedLifecycle?.claim_support_observed_at ?? claimSupportObservedAt,
|
|
3474
|
+
claim_support_mode: resolvedPrecomputedLifecycle?.claim_support_mode ?? claimSupportMode,
|
|
3475
|
+
valid_from: temporalValidity.value.validFrom,
|
|
3476
|
+
valid_to: temporalValidity.value.validTo
|
|
3477
|
+
}
|
|
3478
|
+
});
|
|
3479
|
+
}
|
|
3480
|
+
return {
|
|
3481
|
+
valid,
|
|
3482
|
+
rejected: errors.length,
|
|
3483
|
+
rejectedInputIndexes,
|
|
3484
|
+
errors,
|
|
3485
|
+
warnings
|
|
3486
|
+
};
|
|
3487
|
+
}
|
|
3488
|
+
function clampImportance(value) {
|
|
3489
|
+
if (value === void 0) {
|
|
3490
|
+
return 7;
|
|
3491
|
+
}
|
|
3492
|
+
return Math.min(10, Math.max(1, Math.round(value)));
|
|
3493
|
+
}
|
|
3494
|
+
function normalizeString(value) {
|
|
3495
|
+
return value.trim();
|
|
3496
|
+
}
|
|
3497
|
+
function normalizeOptionalString(value) {
|
|
3498
|
+
const normalized = value?.trim();
|
|
3499
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
3500
|
+
}
|
|
3501
|
+
function normalizeClaimSupportObservedAt(value, index, warnings) {
|
|
3502
|
+
const normalized = normalizeOptionalString(value);
|
|
3503
|
+
if (!normalized) {
|
|
3504
|
+
return void 0;
|
|
3505
|
+
}
|
|
3506
|
+
if (!isIsoTimestamp(normalized)) {
|
|
3507
|
+
warnings.push(`Entry ${index} provided invalid claim_support_observed_at ${JSON.stringify(value)} and it was dropped.`);
|
|
3508
|
+
return void 0;
|
|
3509
|
+
}
|
|
3510
|
+
return normalized;
|
|
3511
|
+
}
|
|
3512
|
+
function normalizeClaimKeyStatus(value, index, warnings) {
|
|
3513
|
+
const parsed = parseClaimKeyStatus(value);
|
|
3514
|
+
if (parsed) {
|
|
3515
|
+
return parsed;
|
|
3516
|
+
}
|
|
3517
|
+
if (value !== void 0) {
|
|
3518
|
+
warnings.push(`Entry ${index} provided invalid claim_key_status ${JSON.stringify(value)} and it was dropped.`);
|
|
3519
|
+
}
|
|
3520
|
+
return void 0;
|
|
3521
|
+
}
|
|
3522
|
+
function normalizeClaimKeySource(value, index, warnings) {
|
|
3523
|
+
const parsed = parseClaimKeySource(value);
|
|
3524
|
+
if (parsed) {
|
|
3525
|
+
return parsed;
|
|
3526
|
+
}
|
|
3527
|
+
if (value !== void 0) {
|
|
3528
|
+
warnings.push(`Entry ${index} provided invalid claim_key_source ${JSON.stringify(value)} and it was dropped.`);
|
|
3529
|
+
}
|
|
3530
|
+
return void 0;
|
|
3531
|
+
}
|
|
3532
|
+
function normalizeClaimKeyConfidence(value, index, warnings) {
|
|
3533
|
+
if (value === void 0) {
|
|
3534
|
+
return void 0;
|
|
3535
|
+
}
|
|
3536
|
+
const parsed = parseClaimKeyConfidence(value);
|
|
3537
|
+
if (parsed !== void 0) {
|
|
3538
|
+
return parsed;
|
|
3539
|
+
}
|
|
3540
|
+
warnings.push(`Entry ${index} provided invalid claim_key_confidence ${JSON.stringify(value)} and it was dropped.`);
|
|
3541
|
+
return void 0;
|
|
3542
|
+
}
|
|
3543
|
+
function normalizeClaimSupportMode(value, index, warnings) {
|
|
3544
|
+
const parsed = parseClaimSupportMode(value);
|
|
3545
|
+
if (parsed) {
|
|
3546
|
+
return parsed;
|
|
3547
|
+
}
|
|
3548
|
+
warnings.push(`Entry ${index} provided invalid claim_support_mode ${JSON.stringify(value)} and it was dropped.`);
|
|
3549
|
+
return void 0;
|
|
3550
|
+
}
|
|
3551
|
+
function areValidTags(value) {
|
|
3552
|
+
return Array.isArray(value) && value.every((tag) => typeof tag === "string");
|
|
3553
|
+
}
|
|
3554
|
+
function normalizeTags(tags) {
|
|
3555
|
+
if (!tags) {
|
|
3556
|
+
return [];
|
|
3557
|
+
}
|
|
3558
|
+
return tags.map((tag) => tag.trim()).filter((tag) => tag.length > 0);
|
|
3559
|
+
}
|
|
3560
|
+
function isUuid(value) {
|
|
3561
|
+
return UUID_PATTERN.test(value.trim());
|
|
3562
|
+
}
|
|
3563
|
+
function isIsoTimestamp(value) {
|
|
3564
|
+
const normalized = value.trim();
|
|
3565
|
+
return normalized.length > 0 && normalized.includes("T") && !Number.isNaN(Date.parse(normalized));
|
|
3566
|
+
}
|
|
3567
|
+
|
|
3568
|
+
// src/core/store/pipeline.ts
|
|
3569
|
+
var AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE = 0.9;
|
|
3570
|
+
var AUTO_SUPERSESSION_ELIGIBLE_SOURCES = /* @__PURE__ */ new Set(["model", "json_retry"]);
|
|
3571
|
+
async function storeEntriesDetailed(inputs, db, embedding, options = {}) {
|
|
3572
|
+
if (inputs.length === 0) {
|
|
3573
|
+
return { stored: 0, skipped: 0, rejected: 0, details: [] };
|
|
3574
|
+
}
|
|
3575
|
+
const plan = await buildStorePlan(inputs, db);
|
|
3576
|
+
for (const warning of plan.warnings) {
|
|
3577
|
+
options.onWarning?.(warning);
|
|
3578
|
+
}
|
|
3579
|
+
if (plan.pendingEntries.length === 0) {
|
|
3580
|
+
return {
|
|
3581
|
+
stored: 0,
|
|
3582
|
+
skipped: plan.skipped,
|
|
3583
|
+
rejected: plan.rejected,
|
|
3584
|
+
details: sortStoreDetails(plan.details)
|
|
3585
|
+
};
|
|
3586
|
+
}
|
|
3587
|
+
if (options.dryRun === true) {
|
|
3588
|
+
return {
|
|
3589
|
+
stored: 0,
|
|
3590
|
+
skipped: plan.skipped,
|
|
3591
|
+
rejected: plan.rejected,
|
|
3592
|
+
details: sortStoreDetails([
|
|
3593
|
+
...plan.details,
|
|
3594
|
+
...plan.pendingEntries.map((entry) => ({
|
|
3595
|
+
inputIndex: entry.inputIndex,
|
|
3596
|
+
outcome: "dry_run",
|
|
3597
|
+
reason: "dry_run"
|
|
3598
|
+
}))
|
|
3599
|
+
])
|
|
3600
|
+
};
|
|
3601
|
+
}
|
|
3602
|
+
const pendingEntries = plan.pendingEntries;
|
|
3603
|
+
const extractedClaimKeys = await maybeExtractClaimKeys(pendingEntries, options);
|
|
3604
|
+
applyExtractedClaimKeyMetadata(pendingEntries, extractedClaimKeys);
|
|
3605
|
+
const embeddings = await resolvePendingEmbeddings(inputs, pendingEntries, embedding, options.precomputedEmbeddings);
|
|
3606
|
+
await persistEntries(db, pendingEntries, embeddings, extractedClaimKeys, options.claimExtraction?.config, options.onWarning);
|
|
3607
|
+
return {
|
|
3608
|
+
stored: pendingEntries.length,
|
|
3609
|
+
skipped: plan.skipped,
|
|
3610
|
+
rejected: plan.rejected,
|
|
3611
|
+
details: sortStoreDetails([
|
|
3612
|
+
...plan.details,
|
|
3613
|
+
...pendingEntries.map((entry) => ({
|
|
3614
|
+
inputIndex: entry.inputIndex,
|
|
3615
|
+
outcome: "stored"
|
|
3616
|
+
}))
|
|
3617
|
+
])
|
|
3618
|
+
};
|
|
3619
|
+
}
|
|
3620
|
+
async function resolvePendingEmbeddings(inputs, entries, embedding, precomputedEmbeddings) {
|
|
3621
|
+
if (!precomputedEmbeddings) {
|
|
3622
|
+
return embedPendingEntries(entries, embedding);
|
|
3623
|
+
}
|
|
3624
|
+
if (precomputedEmbeddings.length !== inputs.length) {
|
|
3625
|
+
throw new Error(`Precomputed embedding length mismatch: expected ${inputs.length}, received ${precomputedEmbeddings.length}.`);
|
|
3626
|
+
}
|
|
3627
|
+
return entries.map((entry) => {
|
|
3628
|
+
const vector = precomputedEmbeddings[entry.inputIndex];
|
|
3629
|
+
if (!vector) {
|
|
3630
|
+
throw new Error(`Missing precomputed embedding for input index ${entry.inputIndex}.`);
|
|
3631
|
+
}
|
|
3632
|
+
return vector;
|
|
3633
|
+
});
|
|
3634
|
+
}
|
|
3635
|
+
async function embedPendingEntries(entries, embedding) {
|
|
3636
|
+
const texts = entries.map(({ input }) => composeEmbeddingText(input));
|
|
3637
|
+
const vectors = await embedding.embed(texts);
|
|
3638
|
+
if (vectors.length !== entries.length) {
|
|
3639
|
+
throw new Error(`Embedding length mismatch: expected ${entries.length}, received ${vectors.length}.`);
|
|
3640
|
+
}
|
|
3641
|
+
return vectors;
|
|
3642
|
+
}
|
|
3643
|
+
async function persistEntries(db, preparedEntries, embeddings, extractedClaimKeys, claimExtractionConfig, onWarning) {
|
|
3644
|
+
const writeBatch = async (targetDb) => {
|
|
3645
|
+
let stored = 0;
|
|
3646
|
+
const autoSupersessionPlans = await planAutoSupersession(targetDb, preparedEntries, extractedClaimKeys, claimExtractionConfig);
|
|
3647
|
+
const emittedWarnings = /* @__PURE__ */ new Set();
|
|
3648
|
+
for (const [index, preparedEntry] of preparedEntries.entries()) {
|
|
3649
|
+
const embedding = embeddings[index] ?? [];
|
|
3650
|
+
const entry = buildEntry(preparedEntry, embedding);
|
|
3651
|
+
const entryId = await targetDb.insertEntry(entry, embedding, preparedEntry.contentHash);
|
|
3652
|
+
const supersededEntryId = preparedEntry.input.supersedes;
|
|
3653
|
+
if (supersededEntryId) {
|
|
3654
|
+
const superseded = await targetDb.supersedeEntry(supersededEntryId, entryId, "update");
|
|
3655
|
+
if (!superseded) {
|
|
3656
|
+
onWarning?.(`Stored entry ${entryId} but could not supersede ${supersededEntryId} because the target was missing or inactive.`);
|
|
3657
|
+
}
|
|
3658
|
+
}
|
|
3659
|
+
const autoSupersessionPlan = autoSupersessionPlans.get(preparedEntry.inputIndex);
|
|
3660
|
+
if (autoSupersessionPlan?.kind === "link" && autoSupersessionPlan.oldEntryId) {
|
|
3661
|
+
const superseded = await targetDb.supersedeEntry(autoSupersessionPlan.oldEntryId, entryId, "update");
|
|
3662
|
+
if (!superseded) {
|
|
3663
|
+
onWarning?.(
|
|
3664
|
+
`Stored entry ${entryId} with claim_key "${preparedEntry.input.claim_key}" but could not auto-supersede ${autoSupersessionPlan.oldEntryId} because the target was missing or inactive.`
|
|
3665
|
+
);
|
|
3666
|
+
}
|
|
3667
|
+
}
|
|
3668
|
+
if (autoSupersessionPlan?.warning && !emittedWarnings.has(autoSupersessionPlan.warning)) {
|
|
3669
|
+
emittedWarnings.add(autoSupersessionPlan.warning);
|
|
3670
|
+
onWarning?.(autoSupersessionPlan.warning);
|
|
3671
|
+
}
|
|
3672
|
+
stored += 1;
|
|
3673
|
+
}
|
|
3674
|
+
return stored;
|
|
3675
|
+
};
|
|
3676
|
+
if (hasTransactionSupport(db) && preparedEntries.some((entry) => entry.input.supersedes !== void 0 || entry.input.claim_key !== void 0)) {
|
|
3677
|
+
return db.withTransaction(writeBatch);
|
|
3678
|
+
}
|
|
3679
|
+
if (hasTransactionSupport(db) && preparedEntries.length > 1) {
|
|
3680
|
+
return db.withTransaction(writeBatch);
|
|
3681
|
+
}
|
|
3682
|
+
return writeBatch(db);
|
|
3683
|
+
}
|
|
3684
|
+
function buildEntry(preparedEntry, embedding) {
|
|
3685
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
3686
|
+
const acceptedClaimKey = preparedEntry.claimKey;
|
|
3687
|
+
return {
|
|
3688
|
+
id: randomUUID(),
|
|
3689
|
+
type: preparedEntry.input.type,
|
|
3690
|
+
subject: preparedEntry.input.subject,
|
|
3691
|
+
content: preparedEntry.input.content,
|
|
3692
|
+
importance: preparedEntry.input.importance ?? 7,
|
|
3693
|
+
expiry: preparedEntry.input.expiry ?? "temporary",
|
|
3694
|
+
tags: preparedEntry.input.tags ?? [],
|
|
3695
|
+
source_file: preparedEntry.input.source_file,
|
|
3696
|
+
source_context: preparedEntry.input.source_context,
|
|
3697
|
+
user_id: preparedEntry.input.user_id,
|
|
3698
|
+
project: preparedEntry.input.project,
|
|
3699
|
+
embedding,
|
|
3700
|
+
content_hash: preparedEntry.contentHash,
|
|
3701
|
+
norm_content_hash: preparedEntry.normContentHash,
|
|
3702
|
+
quality_score: 0.5,
|
|
3703
|
+
recall_count: 0,
|
|
3704
|
+
valid_from: preparedEntry.input.valid_from,
|
|
3705
|
+
valid_to: preparedEntry.input.valid_to,
|
|
3706
|
+
claim_key: acceptedClaimKey?.claim_key ?? preparedEntry.input.claim_key,
|
|
3707
|
+
claim_key_raw: acceptedClaimKey?.claim_key_raw,
|
|
3708
|
+
claim_key_status: acceptedClaimKey?.claim_key_status,
|
|
3709
|
+
claim_key_source: acceptedClaimKey?.claim_key_source,
|
|
3710
|
+
claim_key_confidence: acceptedClaimKey?.claim_key_confidence,
|
|
3711
|
+
claim_key_rationale: acceptedClaimKey?.claim_key_rationale,
|
|
3712
|
+
claim_support_source_kind: acceptedClaimKey?.claim_support_source_kind,
|
|
3713
|
+
claim_support_locator: acceptedClaimKey?.claim_support_locator,
|
|
3714
|
+
claim_support_observed_at: acceptedClaimKey?.claim_support_observed_at,
|
|
3715
|
+
claim_support_mode: acceptedClaimKey?.claim_support_mode,
|
|
3716
|
+
retired: false,
|
|
3717
|
+
created_at: preparedEntry.input.created_at ?? now,
|
|
3718
|
+
updated_at: now
|
|
3719
|
+
};
|
|
3720
|
+
}
|
|
3721
|
+
async function maybeExtractClaimKeys(preparedEntries, options) {
|
|
3722
|
+
const claimExtraction = options.claimExtraction;
|
|
3723
|
+
if (!claimExtraction || preparedEntries.length === 0) {
|
|
3724
|
+
return /* @__PURE__ */ new Map();
|
|
3725
|
+
}
|
|
3726
|
+
try {
|
|
3727
|
+
const extractedEntries = await runBatchClaimExtraction(
|
|
3728
|
+
[
|
|
3729
|
+
{
|
|
3730
|
+
entries: preparedEntries.map((preparedEntry) => preparedEntry.input)
|
|
3731
|
+
}
|
|
3732
|
+
],
|
|
3733
|
+
{
|
|
3734
|
+
createLlm: () => claimExtraction.llm,
|
|
3735
|
+
db: claimExtraction.db
|
|
3736
|
+
},
|
|
3737
|
+
claimExtraction.config,
|
|
3738
|
+
1,
|
|
3739
|
+
options.onWarning,
|
|
3740
|
+
(entry, diagnostic) => {
|
|
3741
|
+
const preparedEntry = preparedEntries.find((candidate) => candidate.input === entry);
|
|
3742
|
+
if (preparedEntry) {
|
|
3743
|
+
options.onClaimExtractionDiagnostic?.(preparedEntry.inputIndex, diagnostic);
|
|
3744
|
+
}
|
|
3745
|
+
}
|
|
3746
|
+
);
|
|
3747
|
+
const extractedClaimKeys = /* @__PURE__ */ new Map();
|
|
3748
|
+
for (const preparedEntry of preparedEntries) {
|
|
3749
|
+
const extracted = extractedEntries.get(preparedEntry.input);
|
|
3750
|
+
if (extracted) {
|
|
3751
|
+
extractedClaimKeys.set(preparedEntry.inputIndex, extracted);
|
|
3752
|
+
}
|
|
3753
|
+
}
|
|
3754
|
+
return extractedClaimKeys;
|
|
3755
|
+
} catch (error) {
|
|
3756
|
+
const subject = preparedEntries[0]?.input.subject ?? "batch";
|
|
3757
|
+
options.onWarning?.(`Claim extraction failed for "${subject}": ${formatPipelineError(error)}`);
|
|
3758
|
+
return /* @__PURE__ */ new Map();
|
|
3759
|
+
}
|
|
3760
|
+
}
|
|
3761
|
+
function hasTransactionSupport(db) {
|
|
3762
|
+
return typeof db.withTransaction === "function";
|
|
3763
|
+
}
|
|
3764
|
+
function applyExtractedClaimKeyMetadata(preparedEntries, extractedClaimKeys) {
|
|
3765
|
+
for (const preparedEntry of preparedEntries) {
|
|
3766
|
+
if (preparedEntry.claimKey) {
|
|
3767
|
+
continue;
|
|
3768
|
+
}
|
|
3769
|
+
const extractedClaimKey = extractedClaimKeys.get(preparedEntry.inputIndex);
|
|
3770
|
+
const acceptedClaimKey = buildPrecomputedClaimKeyLifecycle(preparedEntry.input) ?? (extractedClaimKey ? buildExtractedClaimKeyLifecycle(extractedClaimKey, buildInferredIngestClaimKeySupportContext(preparedEntry.input)) : void 0);
|
|
3771
|
+
if (!acceptedClaimKey) {
|
|
3772
|
+
continue;
|
|
3773
|
+
}
|
|
3774
|
+
preparedEntry.claimKey = acceptedClaimKey;
|
|
3775
|
+
applyClaimKeyLifecycle(preparedEntry.input, acceptedClaimKey);
|
|
3776
|
+
}
|
|
3777
|
+
}
|
|
3778
|
+
async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, claimExtractionConfig) {
|
|
3779
|
+
const plans = /* @__PURE__ */ new Map();
|
|
3780
|
+
const preparedEntriesByClaimKey = groupPreparedEntriesByClaimKey(preparedEntries);
|
|
3781
|
+
const siblingCache = /* @__PURE__ */ new Map();
|
|
3782
|
+
for (const preparedEntry of preparedEntries) {
|
|
3783
|
+
const claimKey = preparedEntry.claimKey?.claim_key ?? preparedEntry.input.claim_key;
|
|
3784
|
+
if (!claimKey || preparedEntry.input.supersedes) {
|
|
3785
|
+
continue;
|
|
3786
|
+
}
|
|
3787
|
+
const siblings = await getClaimKeySiblings(db, siblingCache, claimKey);
|
|
3788
|
+
if (siblings.length === 0) {
|
|
3789
|
+
continue;
|
|
3790
|
+
}
|
|
3791
|
+
const batchSiblingCount = preparedEntriesByClaimKey.get(claimKey)?.length ?? 0;
|
|
3792
|
+
if (batchSiblingCount > 1) {
|
|
3793
|
+
plans.set(preparedEntry.inputIndex, {
|
|
3794
|
+
kind: "skip",
|
|
3795
|
+
warning: `Skipped auto-supersession for claim_key "${claimKey}" because this store batch contains ${batchSiblingCount} entries for the same slot.`
|
|
3796
|
+
});
|
|
3797
|
+
continue;
|
|
3798
|
+
}
|
|
3799
|
+
if (siblings.length > 1) {
|
|
3800
|
+
plans.set(preparedEntry.inputIndex, {
|
|
3801
|
+
kind: "skip",
|
|
3802
|
+
warning: `Skipped auto-supersession for claim_key "${claimKey}" because ${siblings.length} active siblings already exist for that slot.`
|
|
3803
|
+
});
|
|
3804
|
+
continue;
|
|
3805
|
+
}
|
|
3806
|
+
const sibling = siblings[0];
|
|
3807
|
+
if (!sibling) {
|
|
3808
|
+
continue;
|
|
3809
|
+
}
|
|
3810
|
+
if (!isAutoSupersessionEligible(preparedEntry.claimKey, claimExtractionConfig)) {
|
|
3811
|
+
plans.set(preparedEntry.inputIndex, {
|
|
3812
|
+
kind: "skip",
|
|
3813
|
+
warning: buildAutoSupersessionEligibilityWarning(preparedEntry)
|
|
3814
|
+
});
|
|
3815
|
+
continue;
|
|
3816
|
+
}
|
|
3817
|
+
const supersessionValidation = validateSupersessionRules(sibling, {
|
|
3818
|
+
type: preparedEntry.input.type,
|
|
3819
|
+
expiry: preparedEntry.input.expiry ?? "temporary"
|
|
3820
|
+
});
|
|
3821
|
+
if (!supersessionValidation.ok) {
|
|
3822
|
+
plans.set(preparedEntry.inputIndex, {
|
|
3823
|
+
kind: "skip",
|
|
3824
|
+
warning: buildAutoSupersessionRuleWarning(preparedEntry, sibling, supersessionValidation.reason)
|
|
3825
|
+
});
|
|
3826
|
+
continue;
|
|
3827
|
+
}
|
|
3828
|
+
plans.set(preparedEntry.inputIndex, {
|
|
3829
|
+
kind: "link",
|
|
3830
|
+
oldEntryId: sibling.id
|
|
3831
|
+
});
|
|
3832
|
+
}
|
|
3833
|
+
return plans;
|
|
3834
|
+
}
|
|
3835
|
+
function groupPreparedEntriesByClaimKey(preparedEntries) {
|
|
3836
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
3837
|
+
for (const preparedEntry of preparedEntries) {
|
|
3838
|
+
const claimKey = preparedEntry.claimKey?.claim_key ?? preparedEntry.input.claim_key;
|
|
3839
|
+
if (!claimKey) {
|
|
3840
|
+
continue;
|
|
3841
|
+
}
|
|
3842
|
+
const existing = grouped.get(claimKey) ?? [];
|
|
3843
|
+
existing.push(preparedEntry);
|
|
3844
|
+
grouped.set(claimKey, existing);
|
|
3845
|
+
}
|
|
3846
|
+
return grouped;
|
|
3847
|
+
}
|
|
3848
|
+
async function getClaimKeySiblings(db, cache, claimKey) {
|
|
3849
|
+
const cached = cache.get(claimKey);
|
|
3850
|
+
if (cached) {
|
|
3851
|
+
return cached;
|
|
3852
|
+
}
|
|
3853
|
+
const siblings = await db.findActiveEntriesByClaimKey(claimKey);
|
|
3854
|
+
cache.set(claimKey, siblings);
|
|
3855
|
+
return siblings;
|
|
3856
|
+
}
|
|
3857
|
+
function isAutoSupersessionEligible(claimKey, claimExtractionConfig) {
|
|
3858
|
+
if (!claimKey || claimKey.claim_key_status !== "trusted") {
|
|
3859
|
+
return false;
|
|
3860
|
+
}
|
|
3861
|
+
if (claimKey.claim_key_source === "manual") {
|
|
3862
|
+
return true;
|
|
3863
|
+
}
|
|
3864
|
+
if (!AUTO_SUPERSESSION_ELIGIBLE_SOURCES.has(claimKey.claim_key_source) || !claimExtractionConfig) {
|
|
3865
|
+
return false;
|
|
3866
|
+
}
|
|
3867
|
+
return claimKey.claim_key_confidence >= Math.max(claimExtractionConfig.confidenceThreshold, AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE);
|
|
3868
|
+
}
|
|
3869
|
+
function buildAutoSupersessionEligibilityWarning(preparedEntry) {
|
|
3870
|
+
const acceptedClaimKey = preparedEntry.claimKey;
|
|
3871
|
+
const claimKey = acceptedClaimKey?.claim_key ?? preparedEntry.input.claim_key ?? "(missing)";
|
|
3872
|
+
if (!acceptedClaimKey) {
|
|
3873
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not explicit or a tracked high-confidence extraction.`;
|
|
3874
|
+
}
|
|
3875
|
+
if (acceptedClaimKey.claim_key_source === "manual") {
|
|
3876
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not eligible for automatic linking.`;
|
|
3877
|
+
}
|
|
3878
|
+
if (acceptedClaimKey.claim_key_status !== "trusted") {
|
|
3879
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the accepted claim key is ${acceptedClaimKey.claim_key_status} from ${acceptedClaimKey.claim_key_source} at confidence ${acceptedClaimKey.claim_key_confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
|
|
3880
|
+
}
|
|
3881
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the extracted claim key came from ${acceptedClaimKey.claim_key_source} at confidence ${acceptedClaimKey.claim_key_confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
|
|
3882
|
+
}
|
|
3883
|
+
function buildAutoSupersessionRuleWarning(preparedEntry, sibling, reason) {
|
|
3884
|
+
if (reason === "type_mismatch") {
|
|
3885
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${preparedEntry.input.claim_key}" but skipped auto-supersession because the matching active entry is type "${sibling.type}" and the new entry is type "${preparedEntry.input.type}". ${describeSupersessionRuleFailure(reason)}`;
|
|
3886
|
+
}
|
|
3887
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${preparedEntry.input.claim_key}" but skipped auto-supersession: ${describeSupersessionRuleFailure(reason)}`;
|
|
3888
|
+
}
|
|
3889
|
+
async function buildStorePlan(inputs, db) {
|
|
3890
|
+
const validation = validateEntriesWithIndexes(inputs);
|
|
3891
|
+
const details = validation.rejectedInputIndexes.map((inputIndex) => ({
|
|
3892
|
+
inputIndex,
|
|
3893
|
+
outcome: "rejected",
|
|
3894
|
+
reason: "validation"
|
|
3895
|
+
}));
|
|
3896
|
+
const preparedEntries = validation.valid.map(({ input, inputIndex }) => ({
|
|
3897
|
+
input,
|
|
3898
|
+
inputIndex,
|
|
3899
|
+
contentHash: computeContentHash(input.content, input.source_file),
|
|
3900
|
+
normContentHash: computeNormContentHash(input.content),
|
|
3901
|
+
claimKey: buildManualAcceptedClaimKey(inputs[inputIndex], input)
|
|
3902
|
+
}));
|
|
3903
|
+
const afterBatchContentHash = dedupePreparedEntries(preparedEntries, "contentHash", "content_hash", details);
|
|
3904
|
+
const existingHashes = await db.findExistingHashes(afterBatchContentHash.map((entry) => entry.contentHash));
|
|
3905
|
+
const afterExistingContentHash = filterExistingPreparedEntries(afterBatchContentHash, existingHashes, "contentHash", "content_hash", details);
|
|
3906
|
+
const afterBatchNormHash = dedupePreparedEntries(afterExistingContentHash, "normContentHash", "norm_content_hash", details);
|
|
3907
|
+
const existingNormHashes = await db.findExistingNormHashes(afterBatchNormHash.map((entry) => entry.normContentHash));
|
|
3908
|
+
const pendingEntries = filterExistingPreparedEntries(afterBatchNormHash, existingNormHashes, "normContentHash", "norm_content_hash", details);
|
|
3909
|
+
return {
|
|
3910
|
+
pendingEntries,
|
|
3911
|
+
skipped: details.filter((detail) => detail.outcome === "skipped").length,
|
|
3912
|
+
rejected: validation.rejected,
|
|
3913
|
+
details,
|
|
3914
|
+
warnings: validation.warnings
|
|
3915
|
+
};
|
|
3916
|
+
}
|
|
3917
|
+
function dedupePreparedEntries(entries, field, reason, details) {
|
|
3918
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3919
|
+
const deduped = [];
|
|
3920
|
+
for (const entry of entries) {
|
|
3921
|
+
const key = entry[field];
|
|
3922
|
+
if (seen.has(key)) {
|
|
3923
|
+
details.push({
|
|
3924
|
+
inputIndex: entry.inputIndex,
|
|
3925
|
+
outcome: "skipped",
|
|
3926
|
+
reason
|
|
3927
|
+
});
|
|
3928
|
+
continue;
|
|
3929
|
+
}
|
|
3930
|
+
seen.add(key);
|
|
3931
|
+
deduped.push(entry);
|
|
3932
|
+
}
|
|
3933
|
+
return deduped;
|
|
3934
|
+
}
|
|
3935
|
+
function filterExistingPreparedEntries(entries, existing, field, reason, details) {
|
|
3936
|
+
return entries.filter((entry) => {
|
|
3937
|
+
if (!existing.has(entry[field])) {
|
|
3938
|
+
return true;
|
|
3939
|
+
}
|
|
3940
|
+
details.push({
|
|
3941
|
+
inputIndex: entry.inputIndex,
|
|
3942
|
+
outcome: "skipped",
|
|
3943
|
+
reason
|
|
3944
|
+
});
|
|
3945
|
+
return false;
|
|
3946
|
+
});
|
|
3947
|
+
}
|
|
3948
|
+
function formatPipelineError(error) {
|
|
3949
|
+
if (error instanceof Error) {
|
|
3950
|
+
return error.message;
|
|
3951
|
+
}
|
|
3952
|
+
return String(error);
|
|
3953
|
+
}
|
|
3954
|
+
function sortStoreDetails(details) {
|
|
3955
|
+
return [...details].sort((left, right) => left.inputIndex - right.inputIndex);
|
|
3956
|
+
}
|
|
3957
|
+
function buildManualAcceptedClaimKey(rawInput, normalizedInput) {
|
|
3958
|
+
const canonicalClaimKey = normalizedInput.claim_key;
|
|
3959
|
+
if (!canonicalClaimKey) {
|
|
3960
|
+
return void 0;
|
|
3961
|
+
}
|
|
3962
|
+
const precomputedAcceptedClaimKey = buildPrecomputedClaimKeyLifecycle(normalizedInput);
|
|
3963
|
+
if (precomputedAcceptedClaimKey) {
|
|
3964
|
+
return precomputedAcceptedClaimKey;
|
|
3965
|
+
}
|
|
3966
|
+
if (rawInput && hasPrecomputedClaimKeyLifecycleFields(rawInput)) {
|
|
3967
|
+
throw new Error("Store inputs with claim-key lifecycle metadata must provide a complete valid lifecycle bundle.");
|
|
3968
|
+
}
|
|
3969
|
+
return buildManualClaimKeyLifecycle({
|
|
3970
|
+
claimKey: canonicalClaimKey,
|
|
3971
|
+
rawClaimKey: normalizedInput.claim_key_raw ?? normalizeOptionalString2(rawInput?.claim_key),
|
|
3972
|
+
supportSourceKind: normalizedInput.claim_support_source_kind,
|
|
3973
|
+
supportLocator: normalizedInput.claim_support_locator,
|
|
3974
|
+
supportObservedAt: normalizedInput.claim_support_observed_at,
|
|
3975
|
+
supportMode: normalizedInput.claim_support_mode
|
|
3976
|
+
});
|
|
3977
|
+
}
|
|
3978
|
+
function normalizeOptionalString2(value) {
|
|
3979
|
+
const normalized = value?.trim();
|
|
3980
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
3981
|
+
}
|
|
3982
|
+
|
|
3983
|
+
// src/core/episode/summary-prompt.ts
|
|
3984
|
+
var EPISODE_SUMMARY_SYSTEM_PROMPT = [
|
|
3985
|
+
"You write strict JSON episode summaries for historical recall.",
|
|
3986
|
+
"The transcript can be about any topic - technical work, casual conversation, planning, research, creative projects, life events, or anything else.",
|
|
3987
|
+
"Do not assume any particular domain.",
|
|
3988
|
+
"Describe only what happened in this session.",
|
|
3989
|
+
"Do not carry inherited context or open loops forward unless the session actively worked on them.",
|
|
3990
|
+
"Return exactly one JSON object with this shape:",
|
|
3991
|
+
'{ "summary": string, "tags": string[], "activityLevel": "substantial" | "minimal" | "none", "project": string | null }',
|
|
3992
|
+
"Requirements:",
|
|
3993
|
+
"- summary must be 100 to 300 words in plain prose (roughly 4 to 10 sentences)",
|
|
3994
|
+
"- describe what was discussed, decided, or accomplished - not a turn-by-turn replay",
|
|
3995
|
+
"- this is a narrative overview for historical recall, not a verbatim record",
|
|
3996
|
+
"- preserve concrete details worth remembering: names, places, dates, specific decisions, key topics, and notable specifics that would help someone recall this session months later",
|
|
3997
|
+
"- tags must be 3 to 8 short lowercase anchors drawn from the actual session content",
|
|
3998
|
+
"- project should be null when no clear project scope appears",
|
|
3999
|
+
"- activityLevel: use substantial when meaningful discussion or work occurred, minimal when the session was brief or lightweight, none when essentially nothing happened",
|
|
4000
|
+
"- do not include Markdown fences or extra commentary"
|
|
4001
|
+
].join("\n");
|
|
4002
|
+
function buildEpisodeSummaryPrompt(transcript) {
|
|
4003
|
+
return [
|
|
4004
|
+
"Produce a historical episodic summary for this completed session.",
|
|
4005
|
+
"Describe what was discussed, decided, or accomplished during this transcript window.",
|
|
4006
|
+
"",
|
|
4007
|
+
"Transcript:",
|
|
4008
|
+
transcript
|
|
4009
|
+
].join("\n");
|
|
4010
|
+
}
|
|
4011
|
+
function parseEpisodeSummaryResponse(value) {
|
|
4012
|
+
const parsed = parseJsonObject(value);
|
|
4013
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
4014
|
+
return null;
|
|
4015
|
+
}
|
|
4016
|
+
const parsedRecord = parsed;
|
|
4017
|
+
const summary = normalizeSummary(parsedRecord.summary);
|
|
4018
|
+
const activityLevel = normalizeActivityLevel(parsedRecord.activityLevel);
|
|
4019
|
+
if (!summary || !activityLevel) {
|
|
4020
|
+
return null;
|
|
4021
|
+
}
|
|
4022
|
+
return {
|
|
4023
|
+
summary,
|
|
4024
|
+
tags: normalizeTags2(parsedRecord.tags),
|
|
4025
|
+
activityLevel,
|
|
4026
|
+
...normalizeProject(parsedRecord.project) ? { project: normalizeProject(parsedRecord.project) } : {}
|
|
4027
|
+
};
|
|
4028
|
+
}
|
|
4029
|
+
function normalizeSummary(value) {
|
|
4030
|
+
if (typeof value !== "string") {
|
|
4031
|
+
return null;
|
|
4032
|
+
}
|
|
4033
|
+
const normalized = value.replace(/\s+/gu, " ").trim();
|
|
4034
|
+
return normalized ? normalized : null;
|
|
4035
|
+
}
|
|
4036
|
+
function normalizeActivityLevel(value) {
|
|
4037
|
+
if (typeof value !== "string") {
|
|
4038
|
+
return null;
|
|
4039
|
+
}
|
|
4040
|
+
const normalized = value.trim().toLowerCase();
|
|
4041
|
+
return EPISODE_ACTIVITY_LEVELS.includes(normalized) ? normalized : null;
|
|
4042
|
+
}
|
|
4043
|
+
function normalizeTags2(value) {
|
|
4044
|
+
if (!Array.isArray(value)) {
|
|
4045
|
+
return [];
|
|
4046
|
+
}
|
|
4047
|
+
return Array.from(
|
|
4048
|
+
new Set(
|
|
4049
|
+
value.filter((tag) => typeof tag === "string").map((tag) => tag.trim().toLowerCase()).filter((tag) => tag.length > 0)
|
|
4050
|
+
)
|
|
4051
|
+
).slice(0, 8);
|
|
4052
|
+
}
|
|
4053
|
+
function normalizeProject(value) {
|
|
4054
|
+
if (typeof value !== "string") {
|
|
4055
|
+
return void 0;
|
|
4056
|
+
}
|
|
4057
|
+
const normalized = value.replace(/\s+/gu, " ").trim();
|
|
4058
|
+
return normalized ? normalized : void 0;
|
|
4059
|
+
}
|
|
4060
|
+
function parseJsonObject(value) {
|
|
4061
|
+
const candidates = collectJsonCandidates(value);
|
|
4062
|
+
for (const candidate of candidates) {
|
|
4063
|
+
try {
|
|
4064
|
+
return JSON.parse(candidate);
|
|
4065
|
+
} catch {
|
|
4066
|
+
continue;
|
|
4067
|
+
}
|
|
4068
|
+
}
|
|
4069
|
+
return null;
|
|
4070
|
+
}
|
|
4071
|
+
function collectJsonCandidates(value) {
|
|
4072
|
+
const trimmed = value.trim();
|
|
4073
|
+
const candidates = /* @__PURE__ */ new Set();
|
|
4074
|
+
if (trimmed) {
|
|
4075
|
+
candidates.add(trimmed);
|
|
4076
|
+
}
|
|
4077
|
+
const fencedMatches = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/giu) ?? [];
|
|
4078
|
+
for (const match of fencedMatches) {
|
|
4079
|
+
const normalized = match.replace(/```(?:json)?/iu, "").replace(/```/gu, "").trim();
|
|
4080
|
+
if (normalized) {
|
|
4081
|
+
candidates.add(normalized);
|
|
4082
|
+
}
|
|
4083
|
+
}
|
|
4084
|
+
const objectStart = trimmed.indexOf("{");
|
|
4085
|
+
const objectEnd = trimmed.lastIndexOf("}");
|
|
4086
|
+
if (objectStart >= 0 && objectEnd > objectStart) {
|
|
4087
|
+
candidates.add(trimmed.slice(objectStart, objectEnd + 1));
|
|
4088
|
+
}
|
|
4089
|
+
return [...candidates];
|
|
4090
|
+
}
|
|
4091
|
+
|
|
4092
|
+
// src/core/episode/summary-generator.ts
|
|
4093
|
+
async function generateEpisodeSummary(transcript, llm) {
|
|
4094
|
+
const response = await llm.complete(EPISODE_SUMMARY_SYSTEM_PROMPT, buildEpisodeSummaryPrompt(transcript));
|
|
4095
|
+
return parseEpisodeSummaryResponse(response);
|
|
4096
|
+
}
|
|
4097
|
+
|
|
4098
|
+
// src/app/episode-ingest/service/preflight.ts
|
|
4099
|
+
import path2 from "path";
|
|
4100
|
+
|
|
4101
|
+
// src/core/episode/transcript-render.ts
|
|
4102
|
+
var MIN_EPISODE_MESSAGES = 4;
|
|
4103
|
+
var MAX_EPISODE_TRANSCRIPT_CHARS = 14e3;
|
|
4104
|
+
function renderTranscript(messages) {
|
|
4105
|
+
return messages.map((message) => `${message.role === "user" ? "User" : "Assistant"}: ${message.text.trim()}`).join("\n");
|
|
4106
|
+
}
|
|
4107
|
+
function capEpisodeTranscript(transcript, maxChars) {
|
|
4108
|
+
if (transcript.length <= maxChars) {
|
|
4109
|
+
return transcript;
|
|
4110
|
+
}
|
|
4111
|
+
const omissionMarker = "\n\n[Earlier middle transcript omitted for brevity]\n\n";
|
|
4112
|
+
const headBudget = Math.max(0, Math.floor((maxChars - omissionMarker.length) * 0.35));
|
|
4113
|
+
const tailBudget = Math.max(0, maxChars - omissionMarker.length - headBudget);
|
|
4114
|
+
const head = trimToBoundary(transcript.slice(0, headBudget), false);
|
|
4115
|
+
const tail = trimToBoundary(transcript.slice(-tailBudget), true);
|
|
4116
|
+
return `${head}${omissionMarker}${tail}`.trim();
|
|
4117
|
+
}
|
|
4118
|
+
function trimToBoundary(value, fromStart) {
|
|
4119
|
+
if (value.length === 0) {
|
|
4120
|
+
return value;
|
|
4121
|
+
}
|
|
4122
|
+
if (fromStart) {
|
|
4123
|
+
const boundary = value.search(/\s/u);
|
|
4124
|
+
return boundary >= 0 ? value.slice(boundary).trimStart() : value.trim();
|
|
4125
|
+
}
|
|
4126
|
+
const reversedBoundary = value.trimEnd().search(/\s\S*$/u);
|
|
4127
|
+
return reversedBoundary >= 0 ? value.slice(0, reversedBoundary).trimEnd() : value.trim();
|
|
4128
|
+
}
|
|
4129
|
+
|
|
4130
|
+
// src/app/episode-ingest/service/shared.ts
|
|
4131
|
+
var CHARS_PER_TOKEN_ESTIMATE = 4;
|
|
4132
|
+
function estimateInputTokens(renderedTranscript) {
|
|
4133
|
+
return Math.max(1, Math.ceil(renderedTranscript.length / CHARS_PER_TOKEN_ESTIMATE));
|
|
4134
|
+
}
|
|
4135
|
+
function estimateEpisodeSummaryInputTokens(renderedTranscript) {
|
|
4136
|
+
return estimateInputTokens(EPISODE_SUMMARY_SYSTEM_PROMPT) + estimateInputTokens(buildEpisodeSummaryPrompt(renderedTranscript));
|
|
4137
|
+
}
|
|
4138
|
+
async function embedEpisodeSummary(summary, ports) {
|
|
4139
|
+
if (ports.embedSummary) {
|
|
4140
|
+
try {
|
|
4141
|
+
return normalizeEmbeddingVector(await ports.embedSummary(summary));
|
|
4142
|
+
} catch {
|
|
4143
|
+
return void 0;
|
|
4144
|
+
}
|
|
4145
|
+
}
|
|
4146
|
+
return embedEpisodeSummaryWithPort(summary, ports.embedding);
|
|
4147
|
+
}
|
|
4148
|
+
async function embedEpisodeSummaryWithPort(summary, embeddingPort) {
|
|
4149
|
+
if (!embeddingPort) {
|
|
4150
|
+
return void 0;
|
|
4151
|
+
}
|
|
4152
|
+
try {
|
|
4153
|
+
const vectors = await embeddingPort.embed([summary]);
|
|
4154
|
+
return normalizeEmbeddingVector(vectors[0]);
|
|
4155
|
+
} catch {
|
|
4156
|
+
return void 0;
|
|
4157
|
+
}
|
|
4158
|
+
}
|
|
4159
|
+
function parseCandidateEndedAt(endedAt) {
|
|
4160
|
+
if (!endedAt) {
|
|
4161
|
+
return void 0;
|
|
4162
|
+
}
|
|
4163
|
+
const parsed = new Date(endedAt);
|
|
4164
|
+
return Number.isNaN(parsed.getTime()) ? void 0 : parsed;
|
|
4165
|
+
}
|
|
4166
|
+
function createSerializedExecutor() {
|
|
4167
|
+
let pending = Promise.resolve();
|
|
4168
|
+
return async (task) => {
|
|
4169
|
+
const current = pending.then(task, task);
|
|
4170
|
+
pending = current.then(
|
|
4171
|
+
() => void 0,
|
|
4172
|
+
() => void 0
|
|
4173
|
+
);
|
|
4174
|
+
return current;
|
|
4175
|
+
};
|
|
4176
|
+
}
|
|
4177
|
+
function createEmptyUsageStats() {
|
|
4178
|
+
return {
|
|
4179
|
+
calls: 0,
|
|
4180
|
+
inputTokens: 0,
|
|
4181
|
+
outputTokens: 0,
|
|
4182
|
+
cacheReadTokens: 0,
|
|
4183
|
+
cacheWriteTokens: 0,
|
|
4184
|
+
totalTokens: 0,
|
|
4185
|
+
totalCost: 0
|
|
4186
|
+
};
|
|
4187
|
+
}
|
|
4188
|
+
function cloneUsageStats(usage) {
|
|
4189
|
+
return {
|
|
4190
|
+
calls: usage.calls,
|
|
4191
|
+
inputTokens: usage.inputTokens,
|
|
4192
|
+
outputTokens: usage.outputTokens,
|
|
4193
|
+
cacheReadTokens: usage.cacheReadTokens,
|
|
4194
|
+
cacheWriteTokens: usage.cacheWriteTokens,
|
|
4195
|
+
totalTokens: usage.totalTokens,
|
|
4196
|
+
totalCost: usage.totalCost
|
|
4197
|
+
};
|
|
4198
|
+
}
|
|
4199
|
+
function addUsageStats(total, usage) {
|
|
4200
|
+
total.calls += usage.calls;
|
|
4201
|
+
total.inputTokens += usage.inputTokens;
|
|
4202
|
+
total.outputTokens += usage.outputTokens;
|
|
4203
|
+
total.cacheReadTokens += usage.cacheReadTokens;
|
|
4204
|
+
total.cacheWriteTokens += usage.cacheWriteTokens;
|
|
4205
|
+
total.totalTokens += usage.totalTokens;
|
|
4206
|
+
total.totalCost += usage.totalCost;
|
|
4207
|
+
return total;
|
|
4208
|
+
}
|
|
4209
|
+
function trimOptionalString(value) {
|
|
4210
|
+
const trimmed = value?.trim();
|
|
4211
|
+
return trimmed ? trimmed : void 0;
|
|
4212
|
+
}
|
|
4213
|
+
function formatExecutionError(error) {
|
|
4214
|
+
if (error instanceof Error) {
|
|
4215
|
+
return error.message || error.name;
|
|
4216
|
+
}
|
|
4217
|
+
return String(error);
|
|
4218
|
+
}
|
|
4219
|
+
function compareCandidatesByEndedAt(left, right) {
|
|
4220
|
+
const leftTime = left.endedAt ? new Date(left.endedAt).getTime() : Number.NEGATIVE_INFINITY;
|
|
4221
|
+
const rightTime = right.endedAt ? new Date(right.endedAt).getTime() : Number.NEGATIVE_INFINITY;
|
|
4222
|
+
if (leftTime !== rightTime) {
|
|
4223
|
+
return rightTime - leftTime;
|
|
4224
|
+
}
|
|
4225
|
+
return left.filePath.localeCompare(right.filePath);
|
|
4226
|
+
}
|
|
4227
|
+
function normalizeEmbeddingVector(vector) {
|
|
4228
|
+
const normalized = vector?.map((value) => Number.isFinite(value) ? value : 0);
|
|
4229
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
4230
|
+
}
|
|
4231
|
+
|
|
4232
|
+
// src/app/episode-ingest/service/preflight.ts
|
|
4233
|
+
var ACTIVE_SESSION_WINDOW_MS = 5 * 60 * 1e3;
|
|
4234
|
+
async function prepareEpisodeIngest(targetPath, ports, options = {}) {
|
|
4235
|
+
const files = await ports.files.discoverFiles(targetPath);
|
|
4236
|
+
if (files.length === 0) {
|
|
4237
|
+
return createEmptyPreflightResult();
|
|
4238
|
+
}
|
|
4239
|
+
if (ports.sessionRegistry) {
|
|
4240
|
+
await ports.sessionRegistry.listSessions();
|
|
4241
|
+
}
|
|
4242
|
+
const requestedPreflightConcurrency = options.preflightConcurrency ?? 20;
|
|
4243
|
+
const preflightConcurrency = Number.isFinite(requestedPreflightConcurrency) ? Math.max(1, Math.trunc(requestedPreflightConcurrency)) : 20;
|
|
4244
|
+
const workerCount = Math.min(preflightConcurrency, files.length);
|
|
4245
|
+
const skippedByIndex = new Array(files.length);
|
|
4246
|
+
const invalidByIndex = new Array(files.length);
|
|
4247
|
+
const candidatesByIndex = new Array(files.length);
|
|
4248
|
+
const referenceNow = options.now ?? /* @__PURE__ */ new Date();
|
|
4249
|
+
let nextIndex = 0;
|
|
4250
|
+
let completed = 0;
|
|
4251
|
+
await Promise.all(
|
|
4252
|
+
Array.from({ length: workerCount }, async () => {
|
|
4253
|
+
while (true) {
|
|
4254
|
+
const currentIndex = nextIndex;
|
|
4255
|
+
nextIndex += 1;
|
|
4256
|
+
if (currentIndex >= files.length) {
|
|
4257
|
+
return;
|
|
4258
|
+
}
|
|
4259
|
+
const filePath = files[currentIndex];
|
|
4260
|
+
if (!filePath) {
|
|
4261
|
+
return;
|
|
4262
|
+
}
|
|
4263
|
+
const result = await classifyPreflightTranscript(filePath, ports, {
|
|
4264
|
+
referenceNow,
|
|
4265
|
+
regenerate: options.regenerate === true
|
|
4266
|
+
});
|
|
4267
|
+
if (result.kind === "candidate") {
|
|
4268
|
+
candidatesByIndex[currentIndex] = result.value;
|
|
4269
|
+
} else if (result.kind === "skipped") {
|
|
4270
|
+
skippedByIndex[currentIndex] = result.value;
|
|
4271
|
+
} else {
|
|
4272
|
+
invalidByIndex[currentIndex] = result.value;
|
|
4273
|
+
}
|
|
4274
|
+
completed += 1;
|
|
4275
|
+
options.onPreflightProgress?.(completed, files.length);
|
|
4276
|
+
}
|
|
4277
|
+
})
|
|
4278
|
+
);
|
|
4279
|
+
const skipped = skippedByIndex.flatMap((entry) => entry ? [entry] : []);
|
|
4280
|
+
const invalid = invalidByIndex.flatMap((entry) => entry ? [entry] : []);
|
|
4281
|
+
const candidates = candidatesByIndex.flatMap((entry) => entry ? [entry] : []);
|
|
4282
|
+
candidates.sort(compareCandidatesByEndedAt);
|
|
4283
|
+
return {
|
|
4284
|
+
files,
|
|
4285
|
+
candidates,
|
|
4286
|
+
skipped,
|
|
4287
|
+
invalid,
|
|
4288
|
+
totals: {
|
|
4289
|
+
discovered: files.length,
|
|
4290
|
+
candidates: candidates.length,
|
|
4291
|
+
skipped: skipped.length,
|
|
4292
|
+
invalid: invalid.length,
|
|
4293
|
+
skippedShort: skipped.filter((entry) => entry.reason === "skipped_short").length,
|
|
4294
|
+
skippedActive: skipped.filter((entry) => entry.reason === "skipped_active").length,
|
|
4295
|
+
skippedExists: skipped.filter((entry) => entry.reason === "skipped_exists").length
|
|
4296
|
+
}
|
|
4297
|
+
};
|
|
4298
|
+
}
|
|
4299
|
+
async function classifyPreflightTranscript(filePath, ports, options) {
|
|
4300
|
+
const parsedTranscript = await ports.transcript.parseFile(filePath);
|
|
4301
|
+
const cleanedMessages = parsedTranscript.messages.filter((message) => message.text.trim().length > 0);
|
|
4302
|
+
const parsedSessionId = parsedTranscript.metadata.sessionId?.trim() || void 0;
|
|
4303
|
+
const registryMeta = parsedSessionId ? await ports.sessionRegistry?.getSessionMeta(parsedSessionId) : void 0;
|
|
4304
|
+
const reconstructedMeta = registryMeta ? void 0 : {
|
|
4305
|
+
surface: parsedTranscript.metadata.reconstructedSurface ?? null,
|
|
4306
|
+
metadataSource: parsedTranscript.metadata.surfaceReconstructionSource ?? "none"
|
|
4307
|
+
};
|
|
4308
|
+
const resolvedMeta = resolveSessionMeta(filePath, parsedSessionId, registryMeta, reconstructedMeta);
|
|
4309
|
+
if (!resolvedMeta.sessionId && cleanedMessages.length === 0) {
|
|
4310
|
+
return {
|
|
4311
|
+
kind: "invalid",
|
|
4312
|
+
value: {
|
|
4313
|
+
filePath,
|
|
4314
|
+
sessionId: void 0,
|
|
4315
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
4316
|
+
messageCount: 0,
|
|
4317
|
+
metadataSource: resolvedMeta.metadataSource
|
|
4318
|
+
}
|
|
4319
|
+
};
|
|
4320
|
+
}
|
|
4321
|
+
const existingEpisode = await findExistingEpisode(ports, resolvedMeta.sessionId, parsedTranscript.metadata.transcriptHash);
|
|
4322
|
+
if (existingEpisode && options.regenerate !== true) {
|
|
4323
|
+
return {
|
|
4324
|
+
kind: "skipped",
|
|
4325
|
+
value: {
|
|
4326
|
+
filePath,
|
|
4327
|
+
reason: "skipped_exists",
|
|
4328
|
+
sessionId: resolvedMeta.sessionId,
|
|
4329
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
4330
|
+
messageCount: cleanedMessages.length,
|
|
4331
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
4332
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
4333
|
+
agentId: resolvedMeta.agentId,
|
|
4334
|
+
surface: resolvedMeta.surface,
|
|
4335
|
+
metadataSource: resolvedMeta.metadataSource,
|
|
4336
|
+
existingEpisode
|
|
4337
|
+
}
|
|
4338
|
+
};
|
|
4339
|
+
}
|
|
4340
|
+
if (cleanedMessages.length < MIN_EPISODE_MESSAGES) {
|
|
4341
|
+
return {
|
|
4342
|
+
kind: "skipped",
|
|
4343
|
+
value: {
|
|
4344
|
+
filePath,
|
|
4345
|
+
reason: "skipped_short",
|
|
4346
|
+
sessionId: resolvedMeta.sessionId,
|
|
4347
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
4348
|
+
messageCount: cleanedMessages.length,
|
|
4349
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
4350
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
4351
|
+
agentId: resolvedMeta.agentId,
|
|
4352
|
+
surface: resolvedMeta.surface,
|
|
4353
|
+
metadataSource: resolvedMeta.metadataSource
|
|
4354
|
+
}
|
|
4355
|
+
};
|
|
4356
|
+
}
|
|
4357
|
+
if (options.skipActiveSessionCheck !== true && isActiveSession(parsedTranscript.metadata.endedAt, options.referenceNow)) {
|
|
4358
|
+
return {
|
|
4359
|
+
kind: "skipped",
|
|
4360
|
+
value: {
|
|
4361
|
+
filePath,
|
|
4362
|
+
reason: "skipped_active",
|
|
4363
|
+
sessionId: resolvedMeta.sessionId,
|
|
4364
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
4365
|
+
messageCount: cleanedMessages.length,
|
|
4366
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
4367
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
4368
|
+
agentId: resolvedMeta.agentId,
|
|
4369
|
+
surface: resolvedMeta.surface,
|
|
4370
|
+
metadataSource: resolvedMeta.metadataSource
|
|
4371
|
+
}
|
|
4372
|
+
};
|
|
4373
|
+
}
|
|
4374
|
+
const renderedTranscript = capEpisodeTranscript(renderTranscript(cleanedMessages), MAX_EPISODE_TRANSCRIPT_CHARS);
|
|
4375
|
+
return {
|
|
4376
|
+
kind: "candidate",
|
|
4377
|
+
value: {
|
|
4378
|
+
filePath,
|
|
4379
|
+
sessionId: resolvedMeta.sessionId,
|
|
4380
|
+
sourceRef: resolvedMeta.sourceRef,
|
|
4381
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
4382
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
4383
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
4384
|
+
messageCount: cleanedMessages.length,
|
|
4385
|
+
agentId: resolvedMeta.agentId,
|
|
4386
|
+
surface: resolvedMeta.surface,
|
|
4387
|
+
metadataSource: resolvedMeta.metadataSource,
|
|
4388
|
+
renderedTranscript,
|
|
4389
|
+
estimatedInputTokens: estimateInputTokens(renderedTranscript),
|
|
4390
|
+
...existingEpisode ? { existingEpisode } : {}
|
|
4391
|
+
}
|
|
4392
|
+
};
|
|
4393
|
+
}
|
|
4394
|
+
function createEmptyPreflightResult() {
|
|
4395
|
+
return {
|
|
4396
|
+
files: [],
|
|
4397
|
+
candidates: [],
|
|
4398
|
+
skipped: [],
|
|
4399
|
+
invalid: [],
|
|
4400
|
+
totals: {
|
|
4401
|
+
discovered: 0,
|
|
4402
|
+
candidates: 0,
|
|
4403
|
+
skipped: 0,
|
|
4404
|
+
invalid: 0,
|
|
4405
|
+
skippedShort: 0,
|
|
4406
|
+
skippedActive: 0,
|
|
4407
|
+
skippedExists: 0
|
|
4408
|
+
}
|
|
4409
|
+
};
|
|
4410
|
+
}
|
|
4411
|
+
function resolveSessionMeta(filePath, parsedSessionId, registryMeta, reconstructedMeta) {
|
|
4412
|
+
if (registryMeta) {
|
|
4413
|
+
return {
|
|
4414
|
+
sessionId: parsedSessionId ?? registryMeta.sessionId,
|
|
4415
|
+
sourceRef: registryMeta.sourceRef,
|
|
4416
|
+
agentId: registryMeta.agentId,
|
|
4417
|
+
surface: registryMeta.surface,
|
|
4418
|
+
metadataSource: "registry"
|
|
4419
|
+
};
|
|
4420
|
+
}
|
|
4421
|
+
return {
|
|
4422
|
+
sessionId: parsedSessionId,
|
|
4423
|
+
sourceRef: filePath,
|
|
4424
|
+
agentId: deriveAgentIdFromPath(filePath),
|
|
4425
|
+
surface: reconstructedMeta?.surface ?? null,
|
|
4426
|
+
metadataSource: reconstructedMeta?.metadataSource ?? "none"
|
|
4427
|
+
};
|
|
4428
|
+
}
|
|
4429
|
+
function deriveAgentIdFromPath(filePath) {
|
|
4430
|
+
const resolved = path2.resolve(filePath);
|
|
4431
|
+
const parent = path2.basename(path2.dirname(resolved));
|
|
4432
|
+
const grandparent = path2.basename(path2.dirname(path2.dirname(resolved)));
|
|
4433
|
+
if (parent !== "sessions") {
|
|
4434
|
+
return null;
|
|
4435
|
+
}
|
|
4436
|
+
const candidate = grandparent.trim();
|
|
4437
|
+
if (!candidate || candidate === "." || candidate === "/") {
|
|
4438
|
+
return null;
|
|
4439
|
+
}
|
|
4440
|
+
return candidate;
|
|
4441
|
+
}
|
|
4442
|
+
function isActiveSession(endedAt, now) {
|
|
4443
|
+
if (!endedAt) {
|
|
4444
|
+
return false;
|
|
4445
|
+
}
|
|
4446
|
+
const endedAtDate = new Date(endedAt);
|
|
4447
|
+
if (Number.isNaN(endedAtDate.getTime())) {
|
|
4448
|
+
return false;
|
|
4449
|
+
}
|
|
4450
|
+
return endedAtDate.getTime() > now.getTime() - ACTIVE_SESSION_WINDOW_MS;
|
|
4451
|
+
}
|
|
4452
|
+
async function findExistingEpisode(ports, sessionId, transcriptHash) {
|
|
4453
|
+
const bySourceId = sessionId ? await ports.episodes.getEpisodeBySourceId("openclaw", sessionId) : null;
|
|
4454
|
+
if (bySourceId) {
|
|
4455
|
+
return bySourceId;
|
|
4456
|
+
}
|
|
4457
|
+
return ports.episodes.getEpisodeByTranscriptHash("openclaw", transcriptHash);
|
|
4458
|
+
}
|
|
4459
|
+
|
|
4460
|
+
// src/app/episode-ingest/service/execute.ts
|
|
4461
|
+
async function ingestEpisodeTranscript(filePath, ports, options) {
|
|
4462
|
+
const createSummaryLlm = ports.createSummaryLlm;
|
|
4463
|
+
if (!createSummaryLlm) {
|
|
4464
|
+
throw new Error("Episode transcript ingest requires createSummaryLlm().");
|
|
4465
|
+
}
|
|
4466
|
+
const classification = await classifyPreflightTranscript(filePath, ports, {
|
|
4467
|
+
referenceNow: options.now ?? /* @__PURE__ */ new Date(),
|
|
4468
|
+
regenerate: options.regenerate === true,
|
|
4469
|
+
skipActiveSessionCheck: options.skipActiveSessionCheck === true
|
|
4470
|
+
});
|
|
4471
|
+
if (classification.kind === "skipped") {
|
|
4472
|
+
return {
|
|
4473
|
+
kind: "skipped",
|
|
4474
|
+
skipped: classification.value
|
|
4475
|
+
};
|
|
4476
|
+
}
|
|
4477
|
+
if (classification.kind === "invalid") {
|
|
4478
|
+
return {
|
|
4479
|
+
kind: "invalid",
|
|
4480
|
+
invalid: classification.value
|
|
4481
|
+
};
|
|
4482
|
+
}
|
|
4483
|
+
const candidate = applyCandidateOverrides(classification.value, options.candidateOverrides);
|
|
4484
|
+
const session = await executeEpisodeCandidate(candidate, createSummaryLlm, ports, options.genVersion, async (task) => task());
|
|
4485
|
+
return {
|
|
4486
|
+
kind: "executed",
|
|
4487
|
+
candidate,
|
|
4488
|
+
session
|
|
4489
|
+
};
|
|
4490
|
+
}
|
|
4491
|
+
async function executeEpisodeIngestPlan(plan, ports, options) {
|
|
4492
|
+
const createSummaryLlm = ports.createSummaryLlm;
|
|
4493
|
+
if (!createSummaryLlm) {
|
|
4494
|
+
throw new Error("Episode ingest execution requires createSummaryLlm().");
|
|
4495
|
+
}
|
|
4496
|
+
if (!Number.isFinite(options.concurrency) || Math.trunc(options.concurrency) <= 0) {
|
|
4497
|
+
throw new Error(`Episode ingest concurrency must be a positive integer. Received: ${options.concurrency}.`);
|
|
4498
|
+
}
|
|
4499
|
+
if (plan.candidates.length === 0) {
|
|
4500
|
+
return {
|
|
4501
|
+
sessions: [],
|
|
4502
|
+
usage: createEmptyUsageStats(),
|
|
4503
|
+
modelRef: plan.model.modelRef,
|
|
4504
|
+
totals: {
|
|
4505
|
+
attempted: 0,
|
|
4506
|
+
written: 0,
|
|
4507
|
+
updated: 0,
|
|
4508
|
+
unchanged: 0,
|
|
4509
|
+
failed: 0
|
|
4510
|
+
}
|
|
4511
|
+
};
|
|
4512
|
+
}
|
|
4513
|
+
const results = new Array(plan.candidates.length);
|
|
4514
|
+
let nextIndex = 0;
|
|
4515
|
+
let completed = 0;
|
|
4516
|
+
const workerCount = Math.min(Math.trunc(options.concurrency), plan.candidates.length);
|
|
4517
|
+
const runSerializedWrite = createSerializedExecutor();
|
|
4518
|
+
await Promise.all(
|
|
4519
|
+
Array.from({ length: workerCount }, async () => {
|
|
4520
|
+
while (true) {
|
|
4521
|
+
const currentIndex = nextIndex;
|
|
4522
|
+
nextIndex += 1;
|
|
4523
|
+
if (currentIndex >= plan.candidates.length) {
|
|
4524
|
+
return;
|
|
4525
|
+
}
|
|
4526
|
+
const candidate = plan.candidates[currentIndex];
|
|
4527
|
+
if (!candidate) {
|
|
4528
|
+
return;
|
|
4529
|
+
}
|
|
4530
|
+
const result = await executeEpisodeCandidate(candidate, createSummaryLlm, ports, options.genVersion, runSerializedWrite);
|
|
4531
|
+
results[currentIndex] = result;
|
|
4532
|
+
completed += 1;
|
|
4533
|
+
options.onProgress?.(completed, plan.candidates.length, result);
|
|
4534
|
+
}
|
|
4535
|
+
})
|
|
4536
|
+
);
|
|
4537
|
+
const usage = results.reduce((total, result) => addUsageStats(total, result.usage), createEmptyUsageStats());
|
|
4538
|
+
return {
|
|
4539
|
+
sessions: results,
|
|
4540
|
+
usage,
|
|
4541
|
+
modelRef: plan.model.modelRef,
|
|
4542
|
+
totals: {
|
|
4543
|
+
attempted: results.length,
|
|
4544
|
+
written: results.filter((result) => result.action === "written").length,
|
|
4545
|
+
updated: results.filter((result) => result.action === "updated").length,
|
|
4546
|
+
unchanged: results.filter((result) => result.action === "unchanged").length,
|
|
4547
|
+
failed: results.filter((result) => result.action === "failed").length
|
|
4548
|
+
}
|
|
4549
|
+
};
|
|
4550
|
+
}
|
|
4551
|
+
async function executeEpisodeCandidate(candidate, createSummaryLlm, ports, genVersion, runSerializedWrite) {
|
|
4552
|
+
const startedAt = trimOptionalString(candidate.startedAt) ?? trimOptionalString(candidate.existingEpisode?.startedAt);
|
|
4553
|
+
const endedAt = trimOptionalString(candidate.endedAt) ?? trimOptionalString(candidate.existingEpisode?.endedAt);
|
|
4554
|
+
if (!startedAt) {
|
|
4555
|
+
return {
|
|
4556
|
+
action: "failed",
|
|
4557
|
+
filePath: candidate.filePath,
|
|
4558
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
4559
|
+
error: "missing_started_at",
|
|
4560
|
+
usage: createEmptyUsageStats()
|
|
4561
|
+
};
|
|
4562
|
+
}
|
|
4563
|
+
const llm = createSummaryLlm();
|
|
4564
|
+
try {
|
|
4565
|
+
const structured = await generateEpisodeSummary(candidate.renderedTranscript, llm);
|
|
4566
|
+
if (!structured) {
|
|
4567
|
+
return {
|
|
4568
|
+
action: "failed",
|
|
4569
|
+
filePath: candidate.filePath,
|
|
4570
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
4571
|
+
error: "invalid_response",
|
|
4572
|
+
usage: cloneUsageStats(llm.metadata.usage)
|
|
4573
|
+
};
|
|
4574
|
+
}
|
|
4575
|
+
const existingEpisode = candidate.existingEpisode;
|
|
4576
|
+
const embedding = await embedEpisodeSummary(structured.summary, ports);
|
|
4577
|
+
const writeResult = await runSerializedWrite(
|
|
4578
|
+
async () => ports.episodes.upsertEpisode({
|
|
4579
|
+
source: "openclaw",
|
|
4580
|
+
...candidate.sessionId ? { sourceId: candidate.sessionId } : {},
|
|
4581
|
+
sourceRef: candidate.metadataSource === "registry" || !existingEpisode?.sourceRef ? candidate.sourceRef : existingEpisode.sourceRef,
|
|
4582
|
+
transcriptHash: candidate.transcriptHash,
|
|
4583
|
+
...trimOptionalString(candidate.agentId) ?? trimOptionalString(existingEpisode?.agentId) ? { agentId: trimOptionalString(candidate.agentId) ?? trimOptionalString(existingEpisode?.agentId) } : {},
|
|
4584
|
+
...trimOptionalString(candidate.surface) ?? trimOptionalString(existingEpisode?.surface) ? { surface: trimOptionalString(candidate.surface) ?? trimOptionalString(existingEpisode?.surface) } : {},
|
|
4585
|
+
startedAt,
|
|
4586
|
+
...endedAt ? { endedAt } : {},
|
|
4587
|
+
summary: structured.summary,
|
|
4588
|
+
tags: structured.tags,
|
|
4589
|
+
activityLevel: structured.activityLevel,
|
|
4590
|
+
...structured.project ? { project: structured.project } : {},
|
|
4591
|
+
genModel: llm.metadata.modelRef,
|
|
4592
|
+
genVersion,
|
|
4593
|
+
messageCount: candidate.messageCount,
|
|
4594
|
+
...embedding ? { embedding } : {}
|
|
4595
|
+
})
|
|
4596
|
+
);
|
|
4597
|
+
return {
|
|
4598
|
+
action: mapWriteAction(writeResult.action),
|
|
4599
|
+
filePath: candidate.filePath,
|
|
4600
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
4601
|
+
activityLevel: structured.activityLevel,
|
|
4602
|
+
episodeId: writeResult.episode.id,
|
|
4603
|
+
usage: cloneUsageStats(llm.metadata.usage)
|
|
4604
|
+
};
|
|
4605
|
+
} catch (error) {
|
|
4606
|
+
return {
|
|
4607
|
+
action: "failed",
|
|
4608
|
+
filePath: candidate.filePath,
|
|
4609
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
4610
|
+
error: formatExecutionError(error),
|
|
4611
|
+
usage: cloneUsageStats(llm.metadata.usage)
|
|
4612
|
+
};
|
|
4613
|
+
}
|
|
4614
|
+
}
|
|
4615
|
+
function applyCandidateOverrides(candidate, overrides) {
|
|
4616
|
+
if (!overrides) {
|
|
4617
|
+
return candidate;
|
|
4618
|
+
}
|
|
4619
|
+
return {
|
|
4620
|
+
...candidate,
|
|
4621
|
+
...overrides.sessionId !== void 0 ? { sessionId: overrides.sessionId } : {},
|
|
4622
|
+
...overrides.sourceRef !== void 0 ? { sourceRef: overrides.sourceRef } : {},
|
|
4623
|
+
..."agentId" in overrides ? { agentId: overrides.agentId ?? null } : {},
|
|
4624
|
+
..."surface" in overrides ? { surface: overrides.surface ?? null } : {},
|
|
4625
|
+
...overrides.metadataSource !== void 0 ? { metadataSource: overrides.metadataSource } : {}
|
|
4626
|
+
};
|
|
4627
|
+
}
|
|
4628
|
+
function mapWriteAction(action) {
|
|
4629
|
+
if (action === "inserted") {
|
|
4630
|
+
return "written";
|
|
4631
|
+
}
|
|
4632
|
+
return action;
|
|
4633
|
+
}
|
|
4634
|
+
|
|
4635
|
+
// src/app/episode-ingest/service/backfill.ts
|
|
4636
|
+
async function backfillEpisodeEmbeddings(ports, options) {
|
|
4637
|
+
const embedding = ports.embedding;
|
|
4638
|
+
if (!embedding) {
|
|
4639
|
+
throw new Error("Episode embedding backfill requires an embedding provider.");
|
|
4640
|
+
}
|
|
4641
|
+
if (!Number.isFinite(options.concurrency) || Math.trunc(options.concurrency) <= 0) {
|
|
4642
|
+
throw new Error(`Episode embedding backfill concurrency must be a positive integer. Received: ${options.concurrency}.`);
|
|
4643
|
+
}
|
|
4644
|
+
const pendingEpisodes = await ports.episodes.listEpisodesWithoutEmbeddings();
|
|
4645
|
+
if (pendingEpisodes.length === 0) {
|
|
4646
|
+
return {
|
|
4647
|
+
totalMissing: 0,
|
|
4648
|
+
attempted: 0,
|
|
4649
|
+
embedded: 0,
|
|
4650
|
+
failed: 0,
|
|
4651
|
+
estimatedInputTokens: 0
|
|
4652
|
+
};
|
|
4653
|
+
}
|
|
4654
|
+
const estimatedInputTokens = pendingEpisodes.reduce((total, episode) => total + estimateInputTokens(episode.summary), 0);
|
|
4655
|
+
const workerCount = Math.min(Math.trunc(options.concurrency), pendingEpisodes.length);
|
|
4656
|
+
let nextIndex = 0;
|
|
4657
|
+
let completed = 0;
|
|
4658
|
+
let embeddedCount = 0;
|
|
4659
|
+
let failedCount = 0;
|
|
4660
|
+
await Promise.all(
|
|
4661
|
+
Array.from({ length: workerCount }, async () => {
|
|
4662
|
+
while (true) {
|
|
4663
|
+
const currentIndex = nextIndex;
|
|
4664
|
+
nextIndex += 1;
|
|
4665
|
+
if (currentIndex >= pendingEpisodes.length) {
|
|
4666
|
+
return;
|
|
4667
|
+
}
|
|
4668
|
+
const episode = pendingEpisodes[currentIndex];
|
|
4669
|
+
if (!episode) {
|
|
4670
|
+
return;
|
|
4671
|
+
}
|
|
4672
|
+
let status = "failed";
|
|
4673
|
+
try {
|
|
4674
|
+
const vector = await embedEpisodeSummaryWithPort(episode.summary, embedding);
|
|
4675
|
+
if (vector) {
|
|
4676
|
+
await ports.episodes.updateEpisodeEmbedding(episode.id, vector);
|
|
4677
|
+
embeddedCount += 1;
|
|
4678
|
+
status = "embedded";
|
|
4679
|
+
} else {
|
|
4680
|
+
failedCount += 1;
|
|
4681
|
+
}
|
|
4682
|
+
} catch {
|
|
4683
|
+
failedCount += 1;
|
|
4684
|
+
}
|
|
4685
|
+
completed += 1;
|
|
4686
|
+
options.onProgress?.(completed, pendingEpisodes.length, episode, status);
|
|
4687
|
+
}
|
|
4688
|
+
})
|
|
4689
|
+
);
|
|
4690
|
+
return {
|
|
4691
|
+
totalMissing: pendingEpisodes.length,
|
|
4692
|
+
attempted: pendingEpisodes.length,
|
|
4693
|
+
embedded: embeddedCount,
|
|
4694
|
+
failed: failedCount,
|
|
4695
|
+
estimatedInputTokens
|
|
4696
|
+
};
|
|
4697
|
+
}
|
|
4698
|
+
|
|
4699
|
+
// src/app/episode-ingest/service/plan.ts
|
|
4700
|
+
function createEpisodeIngestPlan(preflight, model, options = {}) {
|
|
4701
|
+
const cutoff = resolveRecentCutoff(options.recent, options.now);
|
|
4702
|
+
let excludedByRecent = 0;
|
|
4703
|
+
let excludedUndated = 0;
|
|
4704
|
+
const candidates = preflight.candidates.flatMap((candidate) => {
|
|
4705
|
+
const estimatedInputTokens = estimateEpisodeSummaryInputTokens(candidate.renderedTranscript);
|
|
4706
|
+
const plannedCandidate = {
|
|
4707
|
+
...candidate,
|
|
4708
|
+
estimatedInputTokens
|
|
4709
|
+
};
|
|
4710
|
+
if (!cutoff) {
|
|
4711
|
+
return [plannedCandidate];
|
|
4712
|
+
}
|
|
4713
|
+
const endedAt = parseCandidateEndedAt(candidate.endedAt);
|
|
4714
|
+
if (!endedAt) {
|
|
4715
|
+
excludedByRecent += 1;
|
|
4716
|
+
excludedUndated += 1;
|
|
4717
|
+
return [];
|
|
4718
|
+
}
|
|
4719
|
+
if (endedAt.getTime() < cutoff.getTime()) {
|
|
4720
|
+
excludedByRecent += 1;
|
|
4721
|
+
return [];
|
|
4722
|
+
}
|
|
4723
|
+
return [plannedCandidate];
|
|
4724
|
+
});
|
|
4725
|
+
const inputTokens = candidates.reduce((total, candidate) => total + candidate.estimatedInputTokens, 0);
|
|
4726
|
+
const outputTokens = candidates.length * 500;
|
|
4727
|
+
const estimatedCostUsd = inputTokens / 1e6 * model.pricing.input + outputTokens / 1e6 * model.pricing.output;
|
|
4728
|
+
return {
|
|
4729
|
+
candidates,
|
|
4730
|
+
model,
|
|
4731
|
+
estimate: {
|
|
4732
|
+
candidateCount: candidates.length,
|
|
4733
|
+
inputTokens,
|
|
4734
|
+
outputTokens,
|
|
4735
|
+
totalTokens: inputTokens + outputTokens,
|
|
4736
|
+
estimatedCostUsd
|
|
4737
|
+
},
|
|
4738
|
+
...options.recent?.trim() ? { recent: options.recent.trim() } : {},
|
|
4739
|
+
...cutoff ? { recentCutoff: cutoff.toISOString() } : {},
|
|
4740
|
+
totals: {
|
|
4741
|
+
preflightCandidates: preflight.candidates.length,
|
|
4742
|
+
selectedCandidates: candidates.length,
|
|
4743
|
+
excludedByRecent,
|
|
4744
|
+
excludedUndated
|
|
4745
|
+
}
|
|
4746
|
+
};
|
|
4747
|
+
}
|
|
4748
|
+
function resolveRecentCutoff(recent, now) {
|
|
4749
|
+
const trimmedRecent = recent?.trim();
|
|
4750
|
+
if (!trimmedRecent) {
|
|
4751
|
+
return void 0;
|
|
4752
|
+
}
|
|
4753
|
+
const cutoff = parseRelativeDate(trimmedRecent, now ?? /* @__PURE__ */ new Date());
|
|
4754
|
+
if (!cutoff) {
|
|
4755
|
+
throw new Error(`Unsupported recent value "${trimmedRecent}". Use day shorthand like 30d or an ISO timestamp.`);
|
|
4756
|
+
}
|
|
4757
|
+
return cutoff;
|
|
4758
|
+
}
|
|
4759
|
+
|
|
4760
|
+
// src/adapters/openclaw/session/sessions-store-reader.ts
|
|
4761
|
+
import * as fs3 from "fs/promises";
|
|
4762
|
+
import path3 from "path";
|
|
4763
|
+
async function readOpenClawSessionsStore(sessionsDir, logger) {
|
|
4764
|
+
if (sessionsDir.trim().length === 0) {
|
|
4765
|
+
debugLog2(logger, "sessions-store-reader", "skipping sessions.json read because sessionsDir is empty");
|
|
4766
|
+
return [];
|
|
4767
|
+
}
|
|
4768
|
+
const result = await readOpenClawSessionsStoreWithDiagnostics(sessionsDir);
|
|
4769
|
+
for (const diagnostic of result.diagnostics) {
|
|
4770
|
+
debugLog2(logger, "sessions-store-reader", diagnostic.message);
|
|
4771
|
+
}
|
|
4772
|
+
if (result.diagnostics.length === 0) {
|
|
4773
|
+
debugLog2(
|
|
4774
|
+
logger,
|
|
4775
|
+
"sessions-store-reader",
|
|
4776
|
+
`loaded sessions.json entries=${result.entries.length} path=${path3.join(path3.resolve(sessionsDir.trim()), "sessions.json")}`
|
|
4777
|
+
);
|
|
4778
|
+
}
|
|
4779
|
+
return result.entries;
|
|
4780
|
+
}
|
|
4781
|
+
async function readOpenClawSessionsStoreWithDiagnostics(sessionsDir) {
|
|
4782
|
+
const normalizedSessionsDir = sessionsDir.trim();
|
|
4783
|
+
if (normalizedSessionsDir.length === 0) {
|
|
4784
|
+
return {
|
|
4785
|
+
entries: [],
|
|
4786
|
+
diagnostics: []
|
|
4787
|
+
};
|
|
4788
|
+
}
|
|
4789
|
+
const resolvedSessionsDir = path3.resolve(normalizedSessionsDir);
|
|
4790
|
+
const sessionsJsonPath = path3.join(resolvedSessionsDir, "sessions.json");
|
|
4791
|
+
try {
|
|
4792
|
+
const raw = await fs3.readFile(sessionsJsonPath, "utf8");
|
|
4793
|
+
const parsed = JSON.parse(raw);
|
|
4794
|
+
if (!isRecord(parsed)) {
|
|
4795
|
+
return {
|
|
4796
|
+
entries: [],
|
|
4797
|
+
diagnostics: [
|
|
4798
|
+
{
|
|
4799
|
+
kind: "structurally_invalid_file",
|
|
4800
|
+
message: `sessions.json did not contain an object: path=${sessionsJsonPath}`,
|
|
4801
|
+
path: sessionsJsonPath
|
|
4802
|
+
}
|
|
4803
|
+
]
|
|
4804
|
+
};
|
|
4805
|
+
}
|
|
4806
|
+
const entries = [];
|
|
4807
|
+
for (const [sessionKey, value] of Object.entries(parsed)) {
|
|
4808
|
+
const normalizedSessionKey = sessionKey.trim();
|
|
4809
|
+
if (normalizedSessionKey.length === 0) {
|
|
4810
|
+
continue;
|
|
4811
|
+
}
|
|
4812
|
+
if (!isRecord(value)) {
|
|
4813
|
+
continue;
|
|
4814
|
+
}
|
|
4815
|
+
const sessionId = asTrimmedString(value["sessionId"]);
|
|
4816
|
+
const sessionFile = asTrimmedString(value["sessionFile"]);
|
|
4817
|
+
const origin = isRecord(value["origin"]) ? value["origin"] : void 0;
|
|
4818
|
+
const surface = asTrimmedString(origin?.["surface"]);
|
|
4819
|
+
const provider = asTrimmedString(origin?.["provider"]);
|
|
4820
|
+
const chatType = asTrimmedString(value["chatType"]);
|
|
4821
|
+
const updatedAt = asFiniteNumber(value["updatedAt"]);
|
|
4822
|
+
entries.push({
|
|
4823
|
+
sessionKey: normalizedSessionKey,
|
|
4824
|
+
...sessionId ? { sessionId } : {},
|
|
4825
|
+
...sessionFile ? { sessionFile: resolveSessionStorePath(sessionFile, resolvedSessionsDir) } : {},
|
|
4826
|
+
...surface ? { surface } : {},
|
|
4827
|
+
...provider ? { provider } : {},
|
|
4828
|
+
...chatType ? { chatType } : {},
|
|
4829
|
+
...updatedAt !== void 0 ? { updatedAt } : {}
|
|
4830
|
+
});
|
|
4831
|
+
}
|
|
4832
|
+
return {
|
|
4833
|
+
entries,
|
|
4834
|
+
diagnostics: []
|
|
4835
|
+
};
|
|
4836
|
+
} catch (error) {
|
|
4837
|
+
if (isFileNotFound2(error)) {
|
|
4838
|
+
return {
|
|
4839
|
+
entries: [],
|
|
4840
|
+
diagnostics: [
|
|
4841
|
+
{
|
|
4842
|
+
kind: "missing_file",
|
|
4843
|
+
message: `sessions.json missing at ${sessionsJsonPath}`,
|
|
4844
|
+
path: sessionsJsonPath
|
|
4845
|
+
}
|
|
4846
|
+
]
|
|
4847
|
+
};
|
|
4848
|
+
}
|
|
4849
|
+
if (error instanceof SyntaxError) {
|
|
4850
|
+
return {
|
|
4851
|
+
entries: [],
|
|
4852
|
+
diagnostics: [
|
|
4853
|
+
{
|
|
4854
|
+
kind: "malformed_json",
|
|
4855
|
+
message: `sessions.json parse failed at ${sessionsJsonPath}: ${error.message}`,
|
|
4856
|
+
path: sessionsJsonPath
|
|
4857
|
+
}
|
|
4858
|
+
]
|
|
4859
|
+
};
|
|
4860
|
+
}
|
|
4861
|
+
return {
|
|
4862
|
+
entries: [],
|
|
4863
|
+
diagnostics: [
|
|
4864
|
+
{
|
|
4865
|
+
kind: "unreadable_file",
|
|
4866
|
+
message: `sessions.json read failed at ${sessionsJsonPath}: ${formatErrorMessage2(error)}`,
|
|
4867
|
+
path: sessionsJsonPath
|
|
4868
|
+
}
|
|
4869
|
+
]
|
|
4870
|
+
};
|
|
4871
|
+
}
|
|
4872
|
+
}
|
|
4873
|
+
function resolveSessionStorePath(candidatePath, sessionsDir) {
|
|
4874
|
+
return path3.isAbsolute(candidatePath) ? path3.resolve(candidatePath) : path3.resolve(sessionsDir, candidatePath);
|
|
4875
|
+
}
|
|
4876
|
+
function isRecord(value) {
|
|
4877
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
4878
|
+
}
|
|
4879
|
+
function asTrimmedString(value) {
|
|
4880
|
+
return typeof value === "string" && value.trim().length > 0 ? value.trim() : void 0;
|
|
4881
|
+
}
|
|
4882
|
+
function asFiniteNumber(value) {
|
|
4883
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
4884
|
+
}
|
|
4885
|
+
function debugLog2(logger, subsystem, message) {
|
|
4886
|
+
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
4887
|
+
}
|
|
4888
|
+
function isFileNotFound2(error) {
|
|
4889
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
4890
|
+
}
|
|
4891
|
+
function formatErrorMessage2(error) {
|
|
4892
|
+
if (error instanceof Error) {
|
|
4893
|
+
return error.message;
|
|
4894
|
+
}
|
|
4895
|
+
return String(error);
|
|
4896
|
+
}
|
|
4897
|
+
|
|
4898
|
+
// src/adapters/openclaw/session/tui-lane.ts
|
|
4899
|
+
var TUI_SESSION_KEY_PATTERN = /^agent:([^:]+):([^:]+)$/i;
|
|
4900
|
+
var TUI_UUID_LANE_PATTERN = /^tui[a-z0-9]*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
4901
|
+
var TUI_UUID_SUFFIX_PATTERN = /-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
4902
|
+
function parseTuiSessionKey(sessionKey) {
|
|
4903
|
+
const normalizedSessionKey = sessionKey.trim();
|
|
4904
|
+
if (normalizedSessionKey.length === 0) {
|
|
4905
|
+
return null;
|
|
4906
|
+
}
|
|
4907
|
+
const match = TUI_SESSION_KEY_PATTERN.exec(normalizedSessionKey);
|
|
4908
|
+
if (!match) {
|
|
4909
|
+
return null;
|
|
4910
|
+
}
|
|
4911
|
+
const [, agentId, instanceLane] = match;
|
|
4912
|
+
const normalizedAgentId = agentId?.trim();
|
|
4913
|
+
const normalizedInstanceLane = instanceLane?.trim();
|
|
4914
|
+
if (!normalizedAgentId || !normalizedInstanceLane || !normalizedInstanceLane.toLowerCase().startsWith("tui")) {
|
|
4915
|
+
return null;
|
|
4916
|
+
}
|
|
4917
|
+
const stableLane = TUI_UUID_LANE_PATTERN.test(normalizedInstanceLane) ? normalizedInstanceLane.replace(TUI_UUID_SUFFIX_PATTERN, "") : normalizedInstanceLane;
|
|
4918
|
+
return {
|
|
4919
|
+
agentId: normalizedAgentId,
|
|
4920
|
+
stableLane,
|
|
4921
|
+
instanceLane: normalizedInstanceLane
|
|
4922
|
+
};
|
|
4923
|
+
}
|
|
4924
|
+
|
|
4925
|
+
export {
|
|
4926
|
+
detectClaimKeyEntityFamilyCandidates,
|
|
4927
|
+
detectClaimKeySingletonAliasCandidates,
|
|
4928
|
+
buildTrustedClaimKeySupportSeed,
|
|
4929
|
+
evaluateClaimKeySupport,
|
|
4930
|
+
evaluateClaimKeyCompactness,
|
|
4931
|
+
normalizeGroundingTags,
|
|
4932
|
+
tokenizeGroundingText,
|
|
4933
|
+
buildEntryLocalLexicalTokens,
|
|
4934
|
+
applyClaimExtractionResultToEntry,
|
|
4935
|
+
previewClaimKeyExtraction,
|
|
4936
|
+
runBatchClaimExtraction,
|
|
4937
|
+
validateSupersessionRules,
|
|
4938
|
+
describeSupersessionRuleFailure,
|
|
4939
|
+
computeContentHash,
|
|
4940
|
+
computeNormContentHash,
|
|
4941
|
+
validateEntriesWithIndexes,
|
|
4942
|
+
storeEntriesDetailed,
|
|
4943
|
+
deriveOpenClawSessionIdFromFilePath,
|
|
4944
|
+
OpenClawTranscriptParser,
|
|
4945
|
+
openClawTranscriptParser,
|
|
4946
|
+
readOpenClawSessionsStore,
|
|
4947
|
+
parseTuiSessionKey,
|
|
4948
|
+
backfillEpisodeEmbeddings,
|
|
4949
|
+
prepareEpisodeIngest,
|
|
4950
|
+
ingestEpisodeTranscript,
|
|
4951
|
+
executeEpisodeIngestPlan,
|
|
4952
|
+
createEpisodeIngestPlan,
|
|
4953
|
+
createOpenClawRepository
|
|
4954
|
+
};
|