@agenr/agenr-plugin 1.7.3 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-ETQPUJGS.js +0 -0
- package/dist/chunk-IZDGXMTQ.js +839 -0
- package/dist/chunk-NIQKTINU.js +2545 -0
- package/dist/chunk-NXCCTZ4G.js +3322 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,3322 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ENTRY_TYPES,
|
|
3
|
+
EPISODE_ACTIVITY_LEVELS,
|
|
4
|
+
EXPIRY_LEVELS,
|
|
5
|
+
composeEmbeddingText
|
|
6
|
+
} from "./chunk-NIQKTINU.js";
|
|
7
|
+
import {
|
|
8
|
+
parseRelativeDate
|
|
9
|
+
} from "./chunk-7WL5EAQZ.js";
|
|
10
|
+
|
|
11
|
+
// src/adapters/openclaw/transcript/parser.ts
|
|
12
|
+
import { createHash } from "crypto";
|
|
13
|
+
import * as fs2 from "fs/promises";
|
|
14
|
+
|
|
15
|
+
// src/adapters/openclaw/transcript/jsonl.ts
|
|
16
|
+
function parseJsonlLines(raw, warnings, onRecord) {
|
|
17
|
+
const lines = raw.split(/\r?\n/);
|
|
18
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
19
|
+
const line = lines[index]?.trim();
|
|
20
|
+
if (!line) {
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
let parsed;
|
|
24
|
+
try {
|
|
25
|
+
parsed = JSON.parse(line);
|
|
26
|
+
} catch {
|
|
27
|
+
warnings.push(`Skipped malformed JSONL line ${index + 1}`);
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
onRecord(parsed, index + 1);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// src/adapters/openclaw/transcript/tool-summarization.ts
|
|
38
|
+
var DEFAULT_TOOL_RESULT_DROP_NAMES = ["read", "web_fetch", "browser", "screenshot", "snapshot", "canvas", "tts"];
|
|
39
|
+
var DEFAULT_TOOL_RESULT_KEEP_NAMES = ["web_search", "memory_search", "memory_get", "image"];
|
|
40
|
+
var DEFAULT_TOOL_RESULT_DROP_NAME_SET = new Set(DEFAULT_TOOL_RESULT_DROP_NAMES);
|
|
41
|
+
var DEFAULT_TOOL_RESULT_KEEP_NAME_SET = new Set(DEFAULT_TOOL_RESULT_KEEP_NAMES);
|
|
42
|
+
function asRecord(value) {
|
|
43
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
44
|
+
}
|
|
45
|
+
function getString(value) {
|
|
46
|
+
return typeof value === "string" && value.trim().length > 0 ? value : void 0;
|
|
47
|
+
}
|
|
48
|
+
function truncateInline(value, max) {
|
|
49
|
+
if (value.length <= max) {
|
|
50
|
+
return value;
|
|
51
|
+
}
|
|
52
|
+
return value.slice(0, max);
|
|
53
|
+
}
|
|
54
|
+
function firstStringArgValue(args, max) {
|
|
55
|
+
for (const value of Object.values(args)) {
|
|
56
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
57
|
+
return truncateInline(value.trim(), max);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return void 0;
|
|
61
|
+
}
|
|
62
|
+
function extractAgenrStoreEntries(args) {
|
|
63
|
+
const nestedEntries = Array.isArray(args.entries) ? args.entries.flatMap((entry) => {
|
|
64
|
+
const record = asRecord(entry);
|
|
65
|
+
return record ? [record] : [];
|
|
66
|
+
}) : [];
|
|
67
|
+
if (nestedEntries.length > 0) {
|
|
68
|
+
return nestedEntries;
|
|
69
|
+
}
|
|
70
|
+
if (getString(args.type) || getString(args.subject) || getString(args.content) || getString(args.claimKey) || getString(args.claim_key) || getString(args.supersedes)) {
|
|
71
|
+
return [args];
|
|
72
|
+
}
|
|
73
|
+
return [];
|
|
74
|
+
}
|
|
75
|
+
function summarizeAgenrStoreEntry(entry) {
|
|
76
|
+
const type = getString(entry.type) ?? "unknown";
|
|
77
|
+
const subject = getString(entry.subject) ?? "(no subject)";
|
|
78
|
+
const claimKey = getString(entry.claimKey) ?? getString(entry.claim_key);
|
|
79
|
+
const claimKeySuffix = claimKey ? ` claim_key=${JSON.stringify(truncateInline(claimKey.trim(), 120))}` : "";
|
|
80
|
+
return `${type}: "${truncateInline(subject, 60)}"${claimKeySuffix}`;
|
|
81
|
+
}
|
|
82
|
+
function toolIdentifier(toolName, args) {
|
|
83
|
+
const normalizedToolName = toolName.trim().toLowerCase();
|
|
84
|
+
if (normalizedToolName === "read" || normalizedToolName === "edit" || normalizedToolName === "write") {
|
|
85
|
+
return getString(args.file_path) ?? getString(args.path) ?? getString(args.file) ?? "(unknown file)";
|
|
86
|
+
}
|
|
87
|
+
if (normalizedToolName === "exec") {
|
|
88
|
+
const command = getString(args.command) ?? getString(args.cmd) ?? "(unknown command)";
|
|
89
|
+
return truncateInline(command, 100);
|
|
90
|
+
}
|
|
91
|
+
if (normalizedToolName === "web_fetch") {
|
|
92
|
+
return getString(args.url) ?? "(unknown url)";
|
|
93
|
+
}
|
|
94
|
+
if (normalizedToolName === "web_search") {
|
|
95
|
+
return getString(args.query) ?? "(unknown query)";
|
|
96
|
+
}
|
|
97
|
+
if (normalizedToolName === "browser") {
|
|
98
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
99
|
+
const targetUrl = getString(args.targetUrl) ?? getString(args.url);
|
|
100
|
+
return targetUrl ? `${action} ${targetUrl}` : action;
|
|
101
|
+
}
|
|
102
|
+
if (normalizedToolName === "agenr_store") {
|
|
103
|
+
const entries = extractAgenrStoreEntries(args);
|
|
104
|
+
return `${entries.length} entr${entries.length === 1 ? "y" : "ies"}`;
|
|
105
|
+
}
|
|
106
|
+
if (normalizedToolName === "agenr_recall") {
|
|
107
|
+
const query = getString(args.query) ?? "(no query)";
|
|
108
|
+
return `"${truncateInline(query, 80)}"`;
|
|
109
|
+
}
|
|
110
|
+
if (normalizedToolName === "message") {
|
|
111
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
112
|
+
const target = getString(args.target) ?? getString(args.to) ?? "(unknown target)";
|
|
113
|
+
return `${truncateInline(action, 80)} to ${truncateInline(target, 80)}`;
|
|
114
|
+
}
|
|
115
|
+
if (normalizedToolName === "sessions_spawn") {
|
|
116
|
+
return getString(args.label) ?? getString(args.task)?.slice(0, 60) ?? "(unknown task)";
|
|
117
|
+
}
|
|
118
|
+
if (normalizedToolName === "image") {
|
|
119
|
+
return getString(args.image) ?? getString(args.url) ?? getString(args.path) ?? "(unknown image)";
|
|
120
|
+
}
|
|
121
|
+
if (normalizedToolName === "canvas") {
|
|
122
|
+
return getString(args.action) ?? "(unknown action)";
|
|
123
|
+
}
|
|
124
|
+
if (normalizedToolName === "tts") {
|
|
125
|
+
const text = getString(args.text) ?? "(unknown text)";
|
|
126
|
+
return truncateInline(text, 50);
|
|
127
|
+
}
|
|
128
|
+
return firstStringArgValue(args, 80) ?? "(unknown)";
|
|
129
|
+
}
|
|
130
|
+
function extractToolCallBlocks(content) {
|
|
131
|
+
if (!Array.isArray(content)) {
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
const toolCalls = [];
|
|
135
|
+
for (const block of content) {
|
|
136
|
+
const record = asRecord(block);
|
|
137
|
+
if (!record) {
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
141
|
+
const name = getString(record.name) ?? getString(record.tool) ?? getString(record.tool_name);
|
|
142
|
+
const args = asRecord(record.arguments) ?? asRecord(record.args) ?? asRecord(record.input) ?? {};
|
|
143
|
+
const id = getString(record.id) ?? getString(record.toolCallId) ?? getString(record.tool_call_id) ?? getString(record.call_id);
|
|
144
|
+
if ((type === "toolcall" || type === "tool_call" || type === "tool_use" || type === "tooluse") && name) {
|
|
145
|
+
toolCalls.push({ name, args, id });
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
if (!type && name && ("arguments" in record || "args" in record || "input" in record)) {
|
|
149
|
+
toolCalls.push({ name, args, id });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return toolCalls;
|
|
153
|
+
}
|
|
154
|
+
function summarizeToolCall(call, options) {
|
|
155
|
+
const normalizedToolName = call.name.trim().toLowerCase();
|
|
156
|
+
const override = options?.overrides?.[normalizedToolName];
|
|
157
|
+
if (override) {
|
|
158
|
+
const summary = override(call);
|
|
159
|
+
if (summary) {
|
|
160
|
+
return summary;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
const args = call.args;
|
|
164
|
+
const filePath = getString(args.file_path) ?? getString(args.path) ?? getString(args.file);
|
|
165
|
+
if (normalizedToolName === "read") {
|
|
166
|
+
return `[called Read: ${filePath ?? "(unknown file)"}]`;
|
|
167
|
+
}
|
|
168
|
+
if (normalizedToolName === "write") {
|
|
169
|
+
const content = getString(args.content) ?? getString(args.text) ?? "";
|
|
170
|
+
return `[called Write: ${filePath ?? "(unknown file)"} - ${content.length} chars]`;
|
|
171
|
+
}
|
|
172
|
+
if (normalizedToolName === "edit") {
|
|
173
|
+
const oldText = getString(args.oldText) ?? getString(args.old_string) ?? "";
|
|
174
|
+
return `[called Edit: ${filePath ?? "(unknown file)"} - replaced ${oldText.length} chars]`;
|
|
175
|
+
}
|
|
176
|
+
if (normalizedToolName === "exec") {
|
|
177
|
+
const command = getString(args.command) ?? getString(args.cmd) ?? "(unknown command)";
|
|
178
|
+
return `[called exec: ${truncateInline(command, 200)}]`;
|
|
179
|
+
}
|
|
180
|
+
if (normalizedToolName === "web_search") {
|
|
181
|
+
const query = getString(args.query) ?? "(unknown query)";
|
|
182
|
+
return `[called web_search: ${truncateInline(query, 200)}]`;
|
|
183
|
+
}
|
|
184
|
+
if (normalizedToolName === "web_fetch") {
|
|
185
|
+
const url = getString(args.url) ?? "(unknown url)";
|
|
186
|
+
return `[called web_fetch: ${truncateInline(url, 200)}]`;
|
|
187
|
+
}
|
|
188
|
+
if (normalizedToolName === "browser") {
|
|
189
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
190
|
+
return `[called browser: ${truncateInline(action, 200)}]`;
|
|
191
|
+
}
|
|
192
|
+
if (normalizedToolName === "message") {
|
|
193
|
+
const action = getString(args.action) ?? "(unknown action)";
|
|
194
|
+
const target = getString(args.target) ?? getString(args.to) ?? "(unknown target)";
|
|
195
|
+
return `[called message: ${truncateInline(action, 200)} to ${truncateInline(target, 200)}]`;
|
|
196
|
+
}
|
|
197
|
+
if (normalizedToolName === "agenr_store") {
|
|
198
|
+
const entries = extractAgenrStoreEntries(args);
|
|
199
|
+
if (entries.length === 0) {
|
|
200
|
+
return "[attempted brain store: (empty)]";
|
|
201
|
+
}
|
|
202
|
+
const summaries = entries.slice(0, 3).map(summarizeAgenrStoreEntry);
|
|
203
|
+
const countSuffix = entries.length > 3 ? ` (+${entries.length - 3} more)` : "";
|
|
204
|
+
return `[attempted brain store: ${summaries.join(", ")}${countSuffix}]`;
|
|
205
|
+
}
|
|
206
|
+
if (normalizedToolName === "agenr_recall") {
|
|
207
|
+
const query = getString(args.query) ?? "(no query)";
|
|
208
|
+
return `[recalled from brain: "${truncateInline(query, 100)}"]`;
|
|
209
|
+
}
|
|
210
|
+
if (normalizedToolName === "sessions_spawn") {
|
|
211
|
+
const label = getString(args.label);
|
|
212
|
+
const mode = getString(args.mode) ?? "run";
|
|
213
|
+
const model = getString(args.model);
|
|
214
|
+
const modelSuffix = model ? ` model=${model}` : "";
|
|
215
|
+
if (label) {
|
|
216
|
+
return `[spawned sub-agent: ${label} (${mode}${modelSuffix})]`;
|
|
217
|
+
}
|
|
218
|
+
const task = getString(args.task) ?? "(no task)";
|
|
219
|
+
return `[spawned sub-agent: ${truncateInline(task, 80)} (${mode}${modelSuffix})]`;
|
|
220
|
+
}
|
|
221
|
+
const relevantArgValue = firstStringArgValue(
|
|
222
|
+
Object.fromEntries(
|
|
223
|
+
Object.entries(args).filter(
|
|
224
|
+
([key]) => !["buffer", "content", "data", "newText", "new_string", "oldText", "old_string"].includes(key) && !(normalizedToolName === "write" && key === "text")
|
|
225
|
+
)
|
|
226
|
+
),
|
|
227
|
+
80
|
|
228
|
+
) ?? "(no args)";
|
|
229
|
+
return `[called ${call.name}: ${relevantArgValue}]`;
|
|
230
|
+
}
|
|
231
|
+
function toolResultPlaceholder(toolName, args) {
|
|
232
|
+
const normalizedToolName = toolName.trim().length > 0 ? toolName.trim() : "unknown";
|
|
233
|
+
const identifier = toolIdentifier(normalizedToolName, args);
|
|
234
|
+
return `[tool result from ${normalizedToolName}: ${identifier} - filtered]`;
|
|
235
|
+
}
|
|
236
|
+
function shouldKeepToolResult(toolName, text, policy) {
|
|
237
|
+
const normalizedToolName = (toolName ?? "").trim().toLowerCase();
|
|
238
|
+
const dropToolNames = policy?.dropToolNames ?? DEFAULT_TOOL_RESULT_DROP_NAME_SET;
|
|
239
|
+
const keepToolNames = policy?.keepToolNames ?? DEFAULT_TOOL_RESULT_KEEP_NAME_SET;
|
|
240
|
+
if (normalizedToolName && dropToolNames.has(normalizedToolName)) {
|
|
241
|
+
return { keep: false };
|
|
242
|
+
}
|
|
243
|
+
if (normalizedToolName && keepToolNames.has(normalizedToolName)) {
|
|
244
|
+
return { keep: true, truncateTo: 2e3 };
|
|
245
|
+
}
|
|
246
|
+
if (normalizedToolName === "exec") {
|
|
247
|
+
if (text.length < 1e3) {
|
|
248
|
+
return { keep: true, truncateTo: 2e3 };
|
|
249
|
+
}
|
|
250
|
+
if (/(error|failed|fail)/i.test(text)) {
|
|
251
|
+
return { keep: true, truncateTo: 2e3 };
|
|
252
|
+
}
|
|
253
|
+
return { keep: false };
|
|
254
|
+
}
|
|
255
|
+
if (text.length < 500) {
|
|
256
|
+
return { keep: true, truncateTo: 2e3 };
|
|
257
|
+
}
|
|
258
|
+
return { keep: false };
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// src/adapters/openclaw/transcript/message-content.ts
|
|
262
|
+
var TEXT_BLOCK_TYPES = /* @__PURE__ */ new Set(["input_text", "output_text", "text"]);
|
|
263
|
+
function normalizeWhitespace(value) {
|
|
264
|
+
return value.replace(/\s+/g, " ").trim();
|
|
265
|
+
}
|
|
266
|
+
function extractTextBlocks(content) {
|
|
267
|
+
if (typeof content === "string") {
|
|
268
|
+
const normalized = normalizeWhitespace(content);
|
|
269
|
+
return normalized ? [normalized] : [];
|
|
270
|
+
}
|
|
271
|
+
if (!Array.isArray(content)) {
|
|
272
|
+
return [];
|
|
273
|
+
}
|
|
274
|
+
const textBlocks = [];
|
|
275
|
+
let nonTextBlockCount = 0;
|
|
276
|
+
for (const block of content) {
|
|
277
|
+
if (typeof block === "string") {
|
|
278
|
+
const normalized = normalizeWhitespace(block);
|
|
279
|
+
if (normalized) {
|
|
280
|
+
textBlocks.push(normalized);
|
|
281
|
+
}
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
const record = asRecord(block);
|
|
285
|
+
if (!record) {
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
if (typeof record.text === "string") {
|
|
289
|
+
const normalized = normalizeWhitespace(record.text);
|
|
290
|
+
if (normalized) {
|
|
291
|
+
textBlocks.push(normalized);
|
|
292
|
+
}
|
|
293
|
+
continue;
|
|
294
|
+
}
|
|
295
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
296
|
+
if (typeof record.content === "string" && TEXT_BLOCK_TYPES.has(type)) {
|
|
297
|
+
const normalized = normalizeWhitespace(record.content);
|
|
298
|
+
if (normalized) {
|
|
299
|
+
textBlocks.push(normalized);
|
|
300
|
+
}
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
nonTextBlockCount += 1;
|
|
304
|
+
}
|
|
305
|
+
if (textBlocks.length === 0 && nonTextBlockCount > 0) {
|
|
306
|
+
textBlocks.push(`[non-text content omitted: ${nonTextBlockCount} block${nonTextBlockCount === 1 ? "" : "s"}]`);
|
|
307
|
+
}
|
|
308
|
+
return textBlocks;
|
|
309
|
+
}
|
|
310
|
+
function extractRawTextBlocks(content) {
|
|
311
|
+
if (typeof content === "string") {
|
|
312
|
+
return [content];
|
|
313
|
+
}
|
|
314
|
+
if (!Array.isArray(content)) {
|
|
315
|
+
return [];
|
|
316
|
+
}
|
|
317
|
+
const textBlocks = [];
|
|
318
|
+
for (const block of content) {
|
|
319
|
+
if (typeof block === "string") {
|
|
320
|
+
textBlocks.push(block);
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
const record = asRecord(block);
|
|
324
|
+
if (!record) {
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
if (typeof record.text === "string") {
|
|
328
|
+
textBlocks.push(record.text);
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
332
|
+
if (typeof record.content === "string" && TEXT_BLOCK_TYPES.has(type)) {
|
|
333
|
+
textBlocks.push(record.content);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
return textBlocks;
|
|
337
|
+
}
|
|
338
|
+
function normalizeLabel(value) {
|
|
339
|
+
return value.trim().toLowerCase().replace(/[\s_-]+/g, "-").replace(/^-+|-+$/g, "");
|
|
340
|
+
}
|
|
341
|
+
function normalizeMessageText(content) {
|
|
342
|
+
return normalizeWhitespace(extractTextBlocks(content).join("\n"));
|
|
343
|
+
}
|
|
344
|
+
function normalizeOpenClawRole(value) {
|
|
345
|
+
if (typeof value !== "string") {
|
|
346
|
+
return "unknown";
|
|
347
|
+
}
|
|
348
|
+
const normalized = value.trim().toLowerCase();
|
|
349
|
+
if (normalized === "user" || normalized === "human") {
|
|
350
|
+
return "user";
|
|
351
|
+
}
|
|
352
|
+
if (normalized === "assistant" || normalized === "ai" || normalized === "developer") {
|
|
353
|
+
return "assistant";
|
|
354
|
+
}
|
|
355
|
+
if (normalized === "system") {
|
|
356
|
+
return "system";
|
|
357
|
+
}
|
|
358
|
+
if (normalized === "tool" || normalized === "toolresult" || normalized === "tool_result") {
|
|
359
|
+
return "toolResult";
|
|
360
|
+
}
|
|
361
|
+
return "unknown";
|
|
362
|
+
}
|
|
363
|
+
function truncateWithMarker(text, maxChars) {
|
|
364
|
+
if (text.length <= maxChars) {
|
|
365
|
+
return text;
|
|
366
|
+
}
|
|
367
|
+
return `${text.slice(0, maxChars)}
|
|
368
|
+
[...truncated]`;
|
|
369
|
+
}
|
|
370
|
+
function isPureBase64(text) {
|
|
371
|
+
const trimmed = text.trim();
|
|
372
|
+
if (trimmed.length < 500) {
|
|
373
|
+
return false;
|
|
374
|
+
}
|
|
375
|
+
if (!/[+/=]/.test(trimmed)) {
|
|
376
|
+
return false;
|
|
377
|
+
}
|
|
378
|
+
return /^[A-Za-z0-9+/=\s]{500,}$/.test(trimmed);
|
|
379
|
+
}
|
|
380
|
+
function normalizeSessionLabel(value) {
|
|
381
|
+
const normalized = normalizeLabel(value);
|
|
382
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
383
|
+
}
|
|
384
|
+
function extractConversationLabel(content) {
|
|
385
|
+
const rawTextBlocks = extractRawTextBlocks(content);
|
|
386
|
+
for (const block of rawTextBlocks) {
|
|
387
|
+
const matches = block.matchAll(/```(?:json)?\s*([\s\S]*?)\s*```/gi);
|
|
388
|
+
for (const match of matches) {
|
|
389
|
+
const candidate = match[1];
|
|
390
|
+
if (!candidate) {
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
try {
|
|
394
|
+
const parsed = JSON.parse(candidate);
|
|
395
|
+
const record = asRecord(parsed);
|
|
396
|
+
const conversationLabel = record ? getString(record.conversation_label) : void 0;
|
|
397
|
+
const normalizedLabel = conversationLabel ? normalizeSessionLabel(conversationLabel) : void 0;
|
|
398
|
+
if (normalizedLabel) {
|
|
399
|
+
return normalizedLabel;
|
|
400
|
+
}
|
|
401
|
+
} catch {
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return void 0;
|
|
406
|
+
}
|
|
407
|
+
function extractAssistantTextParts(content) {
|
|
408
|
+
if (typeof content === "string") {
|
|
409
|
+
const normalized = normalizeWhitespace(content);
|
|
410
|
+
return normalized ? [normalized] : [];
|
|
411
|
+
}
|
|
412
|
+
if (!Array.isArray(content)) {
|
|
413
|
+
return [];
|
|
414
|
+
}
|
|
415
|
+
const textParts = [];
|
|
416
|
+
for (const block of content) {
|
|
417
|
+
if (typeof block === "string") {
|
|
418
|
+
const normalized = normalizeWhitespace(block);
|
|
419
|
+
if (normalized) {
|
|
420
|
+
textParts.push(normalized);
|
|
421
|
+
}
|
|
422
|
+
continue;
|
|
423
|
+
}
|
|
424
|
+
const record = asRecord(block);
|
|
425
|
+
if (!record) {
|
|
426
|
+
continue;
|
|
427
|
+
}
|
|
428
|
+
if (typeof record.text === "string") {
|
|
429
|
+
const normalized = normalizeWhitespace(record.text);
|
|
430
|
+
if (normalized) {
|
|
431
|
+
textParts.push(normalized);
|
|
432
|
+
}
|
|
433
|
+
continue;
|
|
434
|
+
}
|
|
435
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
436
|
+
if (typeof record.content === "string" && TEXT_BLOCK_TYPES.has(type)) {
|
|
437
|
+
const normalized = normalizeWhitespace(record.content);
|
|
438
|
+
if (normalized) {
|
|
439
|
+
textParts.push(normalized);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
return textParts;
|
|
444
|
+
}
|
|
445
|
+
function pushMessage(messages, role, text, timestamp) {
|
|
446
|
+
messages.push({
|
|
447
|
+
index: messages.length,
|
|
448
|
+
role,
|
|
449
|
+
text,
|
|
450
|
+
timestamp
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// src/adapters/openclaw/transcript/timestamps.ts
|
|
455
|
+
import * as fs from "fs/promises";
|
|
456
|
+
function parseTimestampValue(value) {
|
|
457
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
458
|
+
const parsed = new Date(value);
|
|
459
|
+
if (!Number.isNaN(parsed.getTime())) {
|
|
460
|
+
return parsed.toISOString();
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
464
|
+
const milliseconds = value > 1e12 ? value : value * 1e3;
|
|
465
|
+
const parsed = new Date(milliseconds);
|
|
466
|
+
if (!Number.isNaN(parsed.getTime())) {
|
|
467
|
+
return parsed.toISOString();
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
return void 0;
|
|
471
|
+
}
|
|
472
|
+
function extractTimestamp(record) {
|
|
473
|
+
for (const field of ["timestamp", "ts", "created_at", "createdAt", "time", "date"]) {
|
|
474
|
+
const parsed = parseTimestampValue(record[field]);
|
|
475
|
+
if (parsed) {
|
|
476
|
+
return parsed;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
return void 0;
|
|
480
|
+
}
|
|
481
|
+
async function getFileMtimeTimestamp(filePath) {
|
|
482
|
+
try {
|
|
483
|
+
const stat2 = await fs.stat(filePath);
|
|
484
|
+
return parseTimestampValue(stat2.mtime.toISOString());
|
|
485
|
+
} catch {
|
|
486
|
+
return void 0;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
async function resolveTimestampFallback(filePath, ...candidates) {
|
|
490
|
+
for (const candidate of candidates) {
|
|
491
|
+
const parsed = parseTimestampValue(candidate);
|
|
492
|
+
if (parsed) {
|
|
493
|
+
return parsed;
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
const fileMtime = await getFileMtimeTimestamp(filePath);
|
|
497
|
+
if (fileMtime) {
|
|
498
|
+
return fileMtime;
|
|
499
|
+
}
|
|
500
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
501
|
+
}
|
|
502
|
+
async function applyMessageTimestampFallbacks(filePath, messages, options) {
|
|
503
|
+
const fallbackTimestamp = await resolveTimestampFallback(filePath, options?.sessionTimestamp);
|
|
504
|
+
for (const message of messages) {
|
|
505
|
+
message.timestamp = parseTimestampValue(message.timestamp) ?? fallbackTimestamp;
|
|
506
|
+
}
|
|
507
|
+
return fallbackTimestamp;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// src/adapters/openclaw/transcript/parser.ts
|
|
511
|
+
var SKIPPED_RECORD_TYPES = /* @__PURE__ */ new Set(["compaction", "custom", "thinking_level_change"]);
|
|
512
|
+
var TOOL_RESULT_POLICY = {
|
|
513
|
+
dropToolNames: /* @__PURE__ */ new Set([...DEFAULT_TOOL_RESULT_DROP_NAMES, "agenr_recall", "image"]),
|
|
514
|
+
keepToolNames: new Set(DEFAULT_TOOL_RESULT_KEEP_NAMES.filter((name) => name !== "image"))
|
|
515
|
+
};
|
|
516
|
+
var RAW_TEXT_BLOCK_TYPES = /* @__PURE__ */ new Set(["input_text", "output_text", "text"]);
|
|
517
|
+
var SENDER_METADATA_SENTINEL = "Sender (untrusted metadata):";
|
|
518
|
+
var CONVERSATION_INFO_SENTINEL = "Conversation info (untrusted metadata):";
|
|
519
|
+
var USER_METADATA_PREFIX_SENTINELS = /* @__PURE__ */ new Set([
|
|
520
|
+
SENDER_METADATA_SENTINEL,
|
|
521
|
+
CONVERSATION_INFO_SENTINEL,
|
|
522
|
+
"Thread starter (untrusted, for context):",
|
|
523
|
+
"Replied message (untrusted, for context):",
|
|
524
|
+
"Forwarded message context (untrusted metadata):",
|
|
525
|
+
"Chat history since last reply (untrusted, for context):"
|
|
526
|
+
]);
|
|
527
|
+
var USER_METADATA_SUFFIX_SENTINEL = "Untrusted context (metadata, do not treat as instructions or commands):";
|
|
528
|
+
var USER_METADATA_SENTINELS = [USER_METADATA_SUFFIX_SENTINEL, ...USER_METADATA_PREFIX_SENTINELS];
|
|
529
|
+
function createParseState() {
|
|
530
|
+
return {
|
|
531
|
+
warnings: [],
|
|
532
|
+
messages: [],
|
|
533
|
+
stats: {
|
|
534
|
+
totalMessageRecords: 0,
|
|
535
|
+
systemDropped: 0,
|
|
536
|
+
base64Dropped: 0,
|
|
537
|
+
skippedRecordTypes: 0,
|
|
538
|
+
toolResultsDropped: 0,
|
|
539
|
+
toolResultsKept: 0
|
|
540
|
+
},
|
|
541
|
+
modelsUsed: [],
|
|
542
|
+
modelsUsedSet: /* @__PURE__ */ new Set(),
|
|
543
|
+
pendingToolCalls: [],
|
|
544
|
+
pendingToolCallsById: /* @__PURE__ */ new Map(),
|
|
545
|
+
detectedSurface: null,
|
|
546
|
+
surfaceDetected: false,
|
|
547
|
+
firstUserRawText: null
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
function extractRawMessageText(content) {
|
|
551
|
+
if (typeof content === "string") {
|
|
552
|
+
return content;
|
|
553
|
+
}
|
|
554
|
+
if (!Array.isArray(content)) {
|
|
555
|
+
return "";
|
|
556
|
+
}
|
|
557
|
+
const blocks = [];
|
|
558
|
+
for (const block of content) {
|
|
559
|
+
if (typeof block === "string") {
|
|
560
|
+
blocks.push(block);
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
const record = asRecord(block);
|
|
564
|
+
if (!record) {
|
|
565
|
+
continue;
|
|
566
|
+
}
|
|
567
|
+
if (typeof record.text === "string") {
|
|
568
|
+
blocks.push(record.text);
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
const type = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
|
|
572
|
+
if (typeof record.content === "string" && RAW_TEXT_BLOCK_TYPES.has(type)) {
|
|
573
|
+
blocks.push(record.content);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
return blocks.join("\n");
|
|
577
|
+
}
|
|
578
|
+
function stripOpenClawUserMetadata(content) {
|
|
579
|
+
const normalizedText = normalizeMessageText(content);
|
|
580
|
+
if (normalizedText.length === 0) {
|
|
581
|
+
return normalizedText;
|
|
582
|
+
}
|
|
583
|
+
const rawText = extractRawMessageText(content);
|
|
584
|
+
if (rawText.length === 0 || !USER_METADATA_SENTINELS.some((sentinel) => rawText.includes(sentinel))) {
|
|
585
|
+
return normalizedText;
|
|
586
|
+
}
|
|
587
|
+
return normalizeMessageText(stripMetadataBlocks(rawText));
|
|
588
|
+
}
|
|
589
|
+
function stripMetadataBlocks(text) {
|
|
590
|
+
const lines = text.split(/\r?\n/u);
|
|
591
|
+
let index = 0;
|
|
592
|
+
while (index < lines.length) {
|
|
593
|
+
while (index < lines.length && lines[index]?.trim().length === 0) {
|
|
594
|
+
index += 1;
|
|
595
|
+
}
|
|
596
|
+
if (index >= lines.length) {
|
|
597
|
+
return "";
|
|
598
|
+
}
|
|
599
|
+
const line = lines[index]?.trim();
|
|
600
|
+
if (line === USER_METADATA_SUFFIX_SENTINEL) {
|
|
601
|
+
return "";
|
|
602
|
+
}
|
|
603
|
+
if (!line || !USER_METADATA_PREFIX_SENTINELS.has(line)) {
|
|
604
|
+
break;
|
|
605
|
+
}
|
|
606
|
+
const nextIndex = skipMetadataJsonFence(lines, index);
|
|
607
|
+
if (nextIndex === index) {
|
|
608
|
+
break;
|
|
609
|
+
}
|
|
610
|
+
index = nextIndex;
|
|
611
|
+
}
|
|
612
|
+
const suffixIndex = lines.findIndex((line, lineIndex) => lineIndex >= index && line.trim() === USER_METADATA_SUFFIX_SENTINEL);
|
|
613
|
+
const body = suffixIndex >= 0 ? lines.slice(index, suffixIndex) : lines.slice(index);
|
|
614
|
+
return body.join("\n").trim();
|
|
615
|
+
}
|
|
616
|
+
function skipMetadataJsonFence(lines, startIndex) {
|
|
617
|
+
let index = startIndex + 1;
|
|
618
|
+
while (index < lines.length && lines[index]?.trim().length === 0) {
|
|
619
|
+
index += 1;
|
|
620
|
+
}
|
|
621
|
+
if (index >= lines.length || !/^```(?:json)?\s*$/iu.test(lines[index]?.trim() ?? "")) {
|
|
622
|
+
return startIndex;
|
|
623
|
+
}
|
|
624
|
+
index += 1;
|
|
625
|
+
while (index < lines.length && !/^```\s*$/u.test(lines[index]?.trim() ?? "")) {
|
|
626
|
+
index += 1;
|
|
627
|
+
}
|
|
628
|
+
if (index >= lines.length) {
|
|
629
|
+
return startIndex;
|
|
630
|
+
}
|
|
631
|
+
index += 1;
|
|
632
|
+
while (index < lines.length && lines[index]?.trim().length === 0) {
|
|
633
|
+
index += 1;
|
|
634
|
+
}
|
|
635
|
+
return index;
|
|
636
|
+
}
|
|
637
|
+
function addModelUsed(state, value) {
|
|
638
|
+
const modelId = getString(value);
|
|
639
|
+
if (!modelId || state.modelsUsedSet.has(modelId)) {
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
state.modelsUsedSet.add(modelId);
|
|
643
|
+
state.modelsUsed.push(modelId);
|
|
644
|
+
}
|
|
645
|
+
function setDetectedSurface(state, surface) {
|
|
646
|
+
if (state.surfaceDetected || !surface) {
|
|
647
|
+
return;
|
|
648
|
+
}
|
|
649
|
+
state.detectedSurface = surface;
|
|
650
|
+
state.surfaceDetected = true;
|
|
651
|
+
}
|
|
652
|
+
function readInboundSurface(record) {
|
|
653
|
+
const inboundMeta = asRecord(record.inbound_meta);
|
|
654
|
+
const surface = getString(inboundMeta?.surface)?.trim().toLowerCase();
|
|
655
|
+
return surface || null;
|
|
656
|
+
}
|
|
657
|
+
function extractMetadataPayload(rawText, sentinel) {
|
|
658
|
+
const lines = rawText.split(/\r?\n/u);
|
|
659
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
660
|
+
if (lines[index]?.trim() !== sentinel) {
|
|
661
|
+
continue;
|
|
662
|
+
}
|
|
663
|
+
let fenceIndex = index + 1;
|
|
664
|
+
while (fenceIndex < lines.length && lines[fenceIndex]?.trim().length === 0) {
|
|
665
|
+
fenceIndex += 1;
|
|
666
|
+
}
|
|
667
|
+
if (fenceIndex >= lines.length || !/^```(?:json)?\s*$/iu.test(lines[fenceIndex]?.trim() ?? "")) {
|
|
668
|
+
continue;
|
|
669
|
+
}
|
|
670
|
+
fenceIndex += 1;
|
|
671
|
+
const jsonLines = [];
|
|
672
|
+
while (fenceIndex < lines.length && !/^```\s*$/u.test(lines[fenceIndex]?.trim() ?? "")) {
|
|
673
|
+
jsonLines.push(lines[fenceIndex] ?? "");
|
|
674
|
+
fenceIndex += 1;
|
|
675
|
+
}
|
|
676
|
+
if (fenceIndex >= lines.length) {
|
|
677
|
+
continue;
|
|
678
|
+
}
|
|
679
|
+
try {
|
|
680
|
+
const parsed = JSON.parse(jsonLines.join("\n").trim());
|
|
681
|
+
return asRecord(parsed);
|
|
682
|
+
} catch {
|
|
683
|
+
continue;
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
return null;
|
|
687
|
+
}
|
|
688
|
+
function mapKnownSurface(value) {
|
|
689
|
+
if (!value) {
|
|
690
|
+
return null;
|
|
691
|
+
}
|
|
692
|
+
if (value.includes("telegram")) {
|
|
693
|
+
return "telegram";
|
|
694
|
+
}
|
|
695
|
+
if (value.includes("signal")) {
|
|
696
|
+
return "signal";
|
|
697
|
+
}
|
|
698
|
+
if (value.includes("discord")) {
|
|
699
|
+
return "discord";
|
|
700
|
+
}
|
|
701
|
+
if (value.includes("openclaw-tui")) {
|
|
702
|
+
return "tui";
|
|
703
|
+
}
|
|
704
|
+
if (value.includes("gateway-client") || value.includes("openclaw-control-ui") || value.includes("webchat")) {
|
|
705
|
+
return "webchat";
|
|
706
|
+
}
|
|
707
|
+
return null;
|
|
708
|
+
}
|
|
709
|
+
function extractSenderSurface(rawText) {
|
|
710
|
+
const payload = extractMetadataPayload(rawText, SENDER_METADATA_SENTINEL);
|
|
711
|
+
if (!payload) {
|
|
712
|
+
return null;
|
|
713
|
+
}
|
|
714
|
+
const label = getString(payload.label)?.trim().toLowerCase() ?? getString(payload.id)?.trim().toLowerCase() ?? "";
|
|
715
|
+
return mapKnownSurface(label);
|
|
716
|
+
}
|
|
717
|
+
function extractConversationInfoSurface(rawText) {
|
|
718
|
+
const payload = extractMetadataPayload(rawText, CONVERSATION_INFO_SENTINEL);
|
|
719
|
+
if (!payload) {
|
|
720
|
+
return null;
|
|
721
|
+
}
|
|
722
|
+
const senderId = getString(payload.sender_id)?.trim().toLowerCase() ?? "";
|
|
723
|
+
return mapKnownSurface(senderId);
|
|
724
|
+
}
|
|
725
|
+
function inferSurfaceFromContent(firstUserRawText) {
|
|
726
|
+
const normalized = firstUserRawText?.trim().toLowerCase() ?? "";
|
|
727
|
+
if (!normalized) {
|
|
728
|
+
return null;
|
|
729
|
+
}
|
|
730
|
+
if (normalized.includes("[subagent context]")) {
|
|
731
|
+
return "subagent";
|
|
732
|
+
}
|
|
733
|
+
if (normalized.includes("heartbeat.md")) {
|
|
734
|
+
return "heartbeat";
|
|
735
|
+
}
|
|
736
|
+
return null;
|
|
737
|
+
}
|
|
738
|
+
function resolveToolContext(state, message) {
|
|
739
|
+
const toolCallId = getString(message.toolCallId) ?? getString(message.tool_call_id) ?? getString(message.call_id) ?? getString(message.id);
|
|
740
|
+
if (toolCallId && state.pendingToolCallsById.has(toolCallId)) {
|
|
741
|
+
const context = state.pendingToolCallsById.get(toolCallId) ?? null;
|
|
742
|
+
state.pendingToolCallsById.delete(toolCallId);
|
|
743
|
+
if (context) {
|
|
744
|
+
const queuedIndex = state.pendingToolCalls.findIndex((toolCall) => toolCall.id === toolCallId);
|
|
745
|
+
if (queuedIndex >= 0) {
|
|
746
|
+
state.pendingToolCalls.splice(queuedIndex, 1);
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
return context;
|
|
750
|
+
}
|
|
751
|
+
return state.pendingToolCalls.shift() ?? null;
|
|
752
|
+
}
|
|
753
|
+
function handleMessageRecord(state, record, message) {
|
|
754
|
+
state.stats.totalMessageRecords += 1;
|
|
755
|
+
const role = normalizeOpenClawRole(message.role);
|
|
756
|
+
if (!state.surfaceDetected) {
|
|
757
|
+
setDetectedSurface(state, readInboundSurface(message));
|
|
758
|
+
}
|
|
759
|
+
if (!state.surfaceDetected && role === "user") {
|
|
760
|
+
const rawText = extractRawMessageText(message.content);
|
|
761
|
+
if (state.firstUserRawText === null) {
|
|
762
|
+
state.firstUserRawText = rawText;
|
|
763
|
+
}
|
|
764
|
+
setDetectedSurface(state, extractSenderSurface(rawText));
|
|
765
|
+
if (!state.surfaceDetected) {
|
|
766
|
+
setDetectedSurface(state, extractConversationInfoSurface(rawText));
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
if (role === "system") {
|
|
770
|
+
state.stats.systemDropped += 1;
|
|
771
|
+
return;
|
|
772
|
+
}
|
|
773
|
+
const timestamp = extractTimestamp(record) ?? extractTimestamp(message);
|
|
774
|
+
if (role === "user") {
|
|
775
|
+
const extractedLabel = extractConversationLabel(message.content);
|
|
776
|
+
if (extractedLabel) {
|
|
777
|
+
state.sessionLabel = extractedLabel;
|
|
778
|
+
}
|
|
779
|
+
const text = stripOpenClawUserMetadata(message.content);
|
|
780
|
+
if (!text) {
|
|
781
|
+
return;
|
|
782
|
+
}
|
|
783
|
+
if (isPureBase64(text)) {
|
|
784
|
+
state.stats.base64Dropped += 1;
|
|
785
|
+
return;
|
|
786
|
+
}
|
|
787
|
+
pushMessage(state.messages, "user", text, timestamp);
|
|
788
|
+
return;
|
|
789
|
+
}
|
|
790
|
+
if (role === "assistant") {
|
|
791
|
+
const toolCalls = extractToolCallBlocks(message.content);
|
|
792
|
+
for (const toolCall of toolCalls) {
|
|
793
|
+
state.pendingToolCalls.push(toolCall);
|
|
794
|
+
if (toolCall.id) {
|
|
795
|
+
state.pendingToolCallsById.set(toolCall.id, toolCall);
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
const assistantText = [...extractAssistantTextParts(message.content), ...toolCalls.map((toolCall) => summarizeToolCall(toolCall))].join(" ").trim();
|
|
799
|
+
addModelUsed(state, message.model);
|
|
800
|
+
if (!assistantText) {
|
|
801
|
+
return;
|
|
802
|
+
}
|
|
803
|
+
if (isPureBase64(assistantText)) {
|
|
804
|
+
state.stats.base64Dropped += 1;
|
|
805
|
+
return;
|
|
806
|
+
}
|
|
807
|
+
pushMessage(state.messages, "assistant", truncateWithMarker(assistantText, 5e3), timestamp);
|
|
808
|
+
return;
|
|
809
|
+
}
|
|
810
|
+
if (role !== "toolResult") {
|
|
811
|
+
return;
|
|
812
|
+
}
|
|
813
|
+
const toolContext = resolveToolContext(state, message);
|
|
814
|
+
const toolName = getString(message.name) ?? getString(message.tool) ?? getString(record.name) ?? getString(record.tool) ?? toolContext?.name;
|
|
815
|
+
const toolArgs = toolContext?.args ?? {};
|
|
816
|
+
const toolText = normalizeMessageText(message.content);
|
|
817
|
+
if (!toolText) {
|
|
818
|
+
return;
|
|
819
|
+
}
|
|
820
|
+
if (isPureBase64(toolText)) {
|
|
821
|
+
state.stats.base64Dropped += 1;
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
const decision = shouldKeepToolResult(toolName, toolText, TOOL_RESULT_POLICY);
|
|
825
|
+
if (decision.keep) {
|
|
826
|
+
state.stats.toolResultsKept += 1;
|
|
827
|
+
pushMessage(state.messages, "assistant", decision.truncateTo ? truncateWithMarker(toolText, decision.truncateTo) : toolText, timestamp);
|
|
828
|
+
return;
|
|
829
|
+
}
|
|
830
|
+
state.stats.toolResultsDropped += 1;
|
|
831
|
+
pushMessage(state.messages, "assistant", toolResultPlaceholder(toolName ?? "unknown", toolArgs), timestamp);
|
|
832
|
+
}
|
|
833
|
+
function handleRecord(state, record) {
|
|
834
|
+
if (record.type === "session") {
|
|
835
|
+
state.sessionId = getString(record.id) ?? state.sessionId;
|
|
836
|
+
state.sessionTimestamp = extractTimestamp(record) ?? state.sessionTimestamp;
|
|
837
|
+
state.sessionLabel = normalizeSessionLabel(getString(record.conversation_label) ?? "") ?? state.sessionLabel;
|
|
838
|
+
addModelUsed(state, record.model);
|
|
839
|
+
if (!state.surfaceDetected) {
|
|
840
|
+
setDetectedSurface(state, readInboundSurface(record));
|
|
841
|
+
}
|
|
842
|
+
return;
|
|
843
|
+
}
|
|
844
|
+
if (!state.surfaceDetected) {
|
|
845
|
+
setDetectedSurface(state, readInboundSurface(record));
|
|
846
|
+
}
|
|
847
|
+
if (record.type === "model_change") {
|
|
848
|
+
addModelUsed(state, record.modelId);
|
|
849
|
+
state.stats.skippedRecordTypes += 1;
|
|
850
|
+
return;
|
|
851
|
+
}
|
|
852
|
+
if (typeof record.type === "string" && SKIPPED_RECORD_TYPES.has(record.type)) {
|
|
853
|
+
state.stats.skippedRecordTypes += 1;
|
|
854
|
+
return;
|
|
855
|
+
}
|
|
856
|
+
const message = asRecord(record.message);
|
|
857
|
+
if (!message) {
|
|
858
|
+
return;
|
|
859
|
+
}
|
|
860
|
+
handleMessageRecord(state, record, message);
|
|
861
|
+
}
|
|
862
|
+
function buildFilterWarning(stats) {
|
|
863
|
+
return `Filtered transcript: ${stats.toolResultsDropped} tool results dropped, ${stats.toolResultsKept} kept, ${stats.systemDropped} system dropped, ${stats.base64Dropped} base64 dropped.`;
|
|
864
|
+
}
|
|
865
|
+
var OpenClawTranscriptParser = class {
|
|
866
|
+
/**
|
|
867
|
+
* Parses an OpenClaw JSONL transcript file into agenr transcript data.
|
|
868
|
+
*
|
|
869
|
+
* @param filePath - Absolute or relative path to the transcript file.
|
|
870
|
+
* @param options - Optional parser flags for verbose diagnostics.
|
|
871
|
+
* @returns Parsed transcript messages, warnings, and metadata.
|
|
872
|
+
*/
|
|
873
|
+
async parseFile(filePath, options) {
|
|
874
|
+
const raw = await fs2.readFile(filePath, "utf8");
|
|
875
|
+
const verbose = options?.verbose === true;
|
|
876
|
+
const state = createParseState();
|
|
877
|
+
const transcriptHash = createHash("sha256").update(raw).digest("hex");
|
|
878
|
+
parseJsonlLines(raw, state.warnings, (record) => {
|
|
879
|
+
handleRecord(state, record);
|
|
880
|
+
});
|
|
881
|
+
if (!state.surfaceDetected && state.firstUserRawText) {
|
|
882
|
+
setDetectedSurface(state, inferSurfaceFromContent(state.firstUserRawText));
|
|
883
|
+
}
|
|
884
|
+
const fallbackTimestamp = state.messages.length > 0 ? await applyMessageTimestampFallbacks(filePath, state.messages, { sessionTimestamp: state.sessionTimestamp }) : await resolveTimestampFallback(filePath, state.sessionTimestamp);
|
|
885
|
+
if (verbose) {
|
|
886
|
+
state.warnings.push(buildFilterWarning(state.stats));
|
|
887
|
+
}
|
|
888
|
+
const startedAt = state.sessionTimestamp ?? state.messages[0]?.timestamp ?? fallbackTimestamp;
|
|
889
|
+
const endedAt = state.messages[state.messages.length - 1]?.timestamp ?? state.sessionTimestamp ?? fallbackTimestamp;
|
|
890
|
+
return {
|
|
891
|
+
messages: state.messages,
|
|
892
|
+
warnings: state.warnings,
|
|
893
|
+
metadata: {
|
|
894
|
+
sessionId: state.sessionId,
|
|
895
|
+
sessionLabel: state.sessionLabel,
|
|
896
|
+
startedAt,
|
|
897
|
+
endedAt,
|
|
898
|
+
messageCount: state.messages.length,
|
|
899
|
+
transcriptHash,
|
|
900
|
+
modelsUsed: state.modelsUsed.length > 0 ? state.modelsUsed : void 0,
|
|
901
|
+
reconstructedSurface: state.detectedSurface,
|
|
902
|
+
surfaceReconstructionSource: state.surfaceDetected ? "reconstructed" : "none"
|
|
903
|
+
}
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
};
|
|
907
|
+
var openClawTranscriptParser = new OpenClawTranscriptParser();
|
|
908
|
+
|
|
909
|
+
// src/core/claim-key.ts
|
|
910
|
+
var UNKNOWN_SEGMENT = "unknown";
|
|
911
|
+
var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user", "we", "our_team", "the_project", "this_project"]);
|
|
912
|
+
var GENERIC_ENTITIES = /* @__PURE__ */ new Set([
|
|
913
|
+
"app",
|
|
914
|
+
"company",
|
|
915
|
+
"config",
|
|
916
|
+
"data",
|
|
917
|
+
"device",
|
|
918
|
+
"entity",
|
|
919
|
+
"environment",
|
|
920
|
+
"item",
|
|
921
|
+
"organization",
|
|
922
|
+
"person",
|
|
923
|
+
"place",
|
|
924
|
+
"project",
|
|
925
|
+
"service",
|
|
926
|
+
"setting",
|
|
927
|
+
"system",
|
|
928
|
+
"team",
|
|
929
|
+
"thing",
|
|
930
|
+
"user",
|
|
931
|
+
"workspace"
|
|
932
|
+
]);
|
|
933
|
+
var GENERIC_ATTRIBUTES = /* @__PURE__ */ new Set(["info", "details", "config", "stuff", "thing", "data"]);
|
|
934
|
+
var COMPACTION_RELATION_TOKENS = /* @__PURE__ */ new Set([
|
|
935
|
+
"after",
|
|
936
|
+
"before",
|
|
937
|
+
"depend",
|
|
938
|
+
"depends",
|
|
939
|
+
"follows",
|
|
940
|
+
"follow",
|
|
941
|
+
"keep",
|
|
942
|
+
"keeps",
|
|
943
|
+
"maintain",
|
|
944
|
+
"maintains",
|
|
945
|
+
"need",
|
|
946
|
+
"needs",
|
|
947
|
+
"precede",
|
|
948
|
+
"precedes",
|
|
949
|
+
"preserve",
|
|
950
|
+
"preserves",
|
|
951
|
+
"require",
|
|
952
|
+
"required",
|
|
953
|
+
"requires",
|
|
954
|
+
"retain",
|
|
955
|
+
"retains"
|
|
956
|
+
]);
|
|
957
|
+
var COMPACTION_BREAK_TOKENS = /* @__PURE__ */ new Set(["about", "across", "and", "between", "during", "for", "from", "into", "onto", "or", "to", "with"]);
|
|
958
|
+
var COMPACTION_WEAK_LEADING_TOKENS = /* @__PURE__ */ new Set(["actual", "authoritative", "canonical", "concrete", "current", "durable", "existing", "real"]);
|
|
959
|
+
var ACTION_CONDITION_TOKENS = /* @__PURE__ */ new Set(["activate", "activation", "apply", "fire", "launch", "run", "start", "trigger"]);
|
|
960
|
+
var TRAILING_OBJECT_COMPACTION_PREPOSITIONS = /* @__PURE__ */ new Set(["about", "for", "from", "into", "onto", "to", "with"]);
|
|
961
|
+
var TRAILING_OBJECT_TRANSFER_HEADS = /* @__PURE__ */ new Set([
|
|
962
|
+
"access",
|
|
963
|
+
"boundary",
|
|
964
|
+
"condition",
|
|
965
|
+
"contract",
|
|
966
|
+
"guide",
|
|
967
|
+
"path",
|
|
968
|
+
"policy",
|
|
969
|
+
"preference",
|
|
970
|
+
"process",
|
|
971
|
+
"rule",
|
|
972
|
+
"schedule",
|
|
973
|
+
"support",
|
|
974
|
+
"surface",
|
|
975
|
+
"window",
|
|
976
|
+
"workflow"
|
|
977
|
+
]);
|
|
978
|
+
var STABLE_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
979
|
+
"access",
|
|
980
|
+
"boundary",
|
|
981
|
+
"condition",
|
|
982
|
+
"contract",
|
|
983
|
+
"default",
|
|
984
|
+
"dependency",
|
|
985
|
+
"guide",
|
|
986
|
+
"mode",
|
|
987
|
+
"order",
|
|
988
|
+
"path",
|
|
989
|
+
"policy",
|
|
990
|
+
"preference",
|
|
991
|
+
"preservation",
|
|
992
|
+
"process",
|
|
993
|
+
"requirement",
|
|
994
|
+
"rule",
|
|
995
|
+
"schedule",
|
|
996
|
+
"setting",
|
|
997
|
+
"status",
|
|
998
|
+
"strategy",
|
|
999
|
+
"support",
|
|
1000
|
+
"surface",
|
|
1001
|
+
"timezone",
|
|
1002
|
+
"truth",
|
|
1003
|
+
"version",
|
|
1004
|
+
"window",
|
|
1005
|
+
"workflow"
|
|
1006
|
+
]);
|
|
1007
|
+
function normalizeClaimKeySegment(value) {
|
|
1008
|
+
return value.trim().toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/_+/g, "_").replace(/^_+|_+$/g, "");
|
|
1009
|
+
}
|
|
1010
|
+
function normalizeClaimKey(value) {
|
|
1011
|
+
const trimmed = value.trim();
|
|
1012
|
+
if (trimmed.length === 0) {
|
|
1013
|
+
return { ok: false, reason: "empty" };
|
|
1014
|
+
}
|
|
1015
|
+
const slashCount = Array.from(trimmed).filter((character) => character === "/").length;
|
|
1016
|
+
if (slashCount === 0) {
|
|
1017
|
+
return { ok: false, reason: "missing_separator" };
|
|
1018
|
+
}
|
|
1019
|
+
if (slashCount !== 1) {
|
|
1020
|
+
return { ok: false, reason: "too_many_segments" };
|
|
1021
|
+
}
|
|
1022
|
+
const [rawEntity = "", rawAttribute = ""] = trimmed.split("/");
|
|
1023
|
+
const entity = normalizeClaimKeySegment(rawEntity);
|
|
1024
|
+
if (entity.length === 0) {
|
|
1025
|
+
return { ok: false, reason: "empty_entity" };
|
|
1026
|
+
}
|
|
1027
|
+
const attribute = normalizeClaimKeySegment(rawAttribute);
|
|
1028
|
+
if (attribute.length === 0) {
|
|
1029
|
+
return { ok: false, reason: "empty_attribute" };
|
|
1030
|
+
}
|
|
1031
|
+
if (entity === UNKNOWN_SEGMENT && attribute === UNKNOWN_SEGMENT) {
|
|
1032
|
+
return { ok: false, reason: "unknown_pair" };
|
|
1033
|
+
}
|
|
1034
|
+
return {
|
|
1035
|
+
ok: true,
|
|
1036
|
+
value: {
|
|
1037
|
+
claimKey: `${entity}/${attribute}`,
|
|
1038
|
+
entity,
|
|
1039
|
+
attribute
|
|
1040
|
+
}
|
|
1041
|
+
};
|
|
1042
|
+
}
|
|
1043
|
+
function compactClaimKey(claimKey) {
|
|
1044
|
+
const normalized = normalizeClaimKey(claimKey);
|
|
1045
|
+
if (!normalized.ok) {
|
|
1046
|
+
return null;
|
|
1047
|
+
}
|
|
1048
|
+
let attributeTokens = normalized.value.attribute.split("_").filter((token) => token.length > 0);
|
|
1049
|
+
const entityTokens = normalized.value.entity.split("_").filter((token) => token.length > 0);
|
|
1050
|
+
const reasons = [];
|
|
1051
|
+
if (entityTokens.length > 0 && startsWithTokens(attributeTokens, entityTokens) && attributeTokens.length > entityTokens.length) {
|
|
1052
|
+
attributeTokens = attributeTokens.slice(entityTokens.length);
|
|
1053
|
+
reasons.push("removed duplicated entity prefix from attribute");
|
|
1054
|
+
}
|
|
1055
|
+
if (entityTokens.length > 0 && attributeTokens.length > entityTokens.length + 1 && endsWithTokens(attributeTokens, entityTokens) && TRAILING_OBJECT_COMPACTION_PREPOSITIONS.has(attributeTokens[attributeTokens.length - entityTokens.length - 1] ?? "")) {
|
|
1056
|
+
attributeTokens = attributeTokens.slice(0, attributeTokens.length - entityTokens.length - 1);
|
|
1057
|
+
reasons.push("removed duplicated entity suffix from attribute");
|
|
1058
|
+
}
|
|
1059
|
+
const sourceOfTruthCompaction = compactSourceOfTruthAttribute(attributeTokens);
|
|
1060
|
+
if (sourceOfTruthCompaction) {
|
|
1061
|
+
attributeTokens = sourceOfTruthCompaction.attributeTokens;
|
|
1062
|
+
reasons.push(sourceOfTruthCompaction.reason);
|
|
1063
|
+
} else {
|
|
1064
|
+
const relationCompaction = compactRelationAttribute(attributeTokens);
|
|
1065
|
+
if (relationCompaction) {
|
|
1066
|
+
attributeTokens = relationCompaction.attributeTokens;
|
|
1067
|
+
reasons.push(relationCompaction.reason);
|
|
1068
|
+
} else {
|
|
1069
|
+
const trailingObjectCompaction = compactTrailingObjectAttribute(attributeTokens);
|
|
1070
|
+
if (trailingObjectCompaction) {
|
|
1071
|
+
attributeTokens = trailingObjectCompaction.attributeTokens;
|
|
1072
|
+
reasons.push(trailingObjectCompaction.reason);
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
const attribute = attributeTokens.join("_");
|
|
1077
|
+
if (attribute.length === 0) {
|
|
1078
|
+
return {
|
|
1079
|
+
claimKey: normalized.value.claimKey,
|
|
1080
|
+
entity: normalized.value.entity,
|
|
1081
|
+
attribute: normalized.value.attribute,
|
|
1082
|
+
compactedFrom: null,
|
|
1083
|
+
reason: null
|
|
1084
|
+
};
|
|
1085
|
+
}
|
|
1086
|
+
const compactedClaimKey = `${normalized.value.entity}/${attribute}`;
|
|
1087
|
+
return {
|
|
1088
|
+
claimKey: compactedClaimKey,
|
|
1089
|
+
entity: normalized.value.entity,
|
|
1090
|
+
attribute,
|
|
1091
|
+
compactedFrom: compactedClaimKey !== normalized.value.claimKey ? normalized.value.claimKey : null,
|
|
1092
|
+
reason: reasons.length > 0 ? joinCompactionReasons(reasons) : null
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
function validateExtractedClaimKey(claimKey) {
|
|
1096
|
+
if (SELF_REFERENTIAL_ENTITIES.has(claimKey.entity)) {
|
|
1097
|
+
return {
|
|
1098
|
+
ok: false,
|
|
1099
|
+
reason: "self_referential_entity",
|
|
1100
|
+
value: claimKey
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
if (GENERIC_ATTRIBUTES.has(claimKey.attribute)) {
|
|
1104
|
+
return {
|
|
1105
|
+
ok: false,
|
|
1106
|
+
reason: "generic_attribute",
|
|
1107
|
+
value: claimKey
|
|
1108
|
+
};
|
|
1109
|
+
}
|
|
1110
|
+
if (isValueShapedAttribute(claimKey.attribute)) {
|
|
1111
|
+
return {
|
|
1112
|
+
ok: false,
|
|
1113
|
+
reason: "value_shaped_attribute",
|
|
1114
|
+
value: claimKey
|
|
1115
|
+
};
|
|
1116
|
+
}
|
|
1117
|
+
return {
|
|
1118
|
+
ok: true,
|
|
1119
|
+
value: claimKey
|
|
1120
|
+
};
|
|
1121
|
+
}
|
|
1122
|
+
function inspectClaimKey(value) {
|
|
1123
|
+
const rawClaimKey = value.trim();
|
|
1124
|
+
const normalized = normalizeClaimKey(rawClaimKey);
|
|
1125
|
+
if (!normalized.ok) {
|
|
1126
|
+
return {
|
|
1127
|
+
rawClaimKey,
|
|
1128
|
+
canonical: false,
|
|
1129
|
+
normalizationFailure: normalized.reason,
|
|
1130
|
+
suspectReasons: []
|
|
1131
|
+
};
|
|
1132
|
+
}
|
|
1133
|
+
const suspectReasons = /* @__PURE__ */ new Set();
|
|
1134
|
+
const validation = validateExtractedClaimKey(normalized.value);
|
|
1135
|
+
if (!validation.ok) {
|
|
1136
|
+
suspectReasons.add(validation.reason);
|
|
1137
|
+
}
|
|
1138
|
+
if (GENERIC_ENTITIES.has(normalized.value.entity)) {
|
|
1139
|
+
suspectReasons.add("generic_entity");
|
|
1140
|
+
}
|
|
1141
|
+
return {
|
|
1142
|
+
rawClaimKey,
|
|
1143
|
+
canonical: normalized.value.claimKey === rawClaimKey,
|
|
1144
|
+
normalized: normalized.value,
|
|
1145
|
+
suspectReasons: [...suspectReasons]
|
|
1146
|
+
};
|
|
1147
|
+
}
|
|
1148
|
+
function isTrustedClaimKeyForCleanup(value) {
|
|
1149
|
+
const inspection = inspectClaimKey(value);
|
|
1150
|
+
return Boolean(inspection.canonical && inspection.normalized && inspection.suspectReasons.length === 0);
|
|
1151
|
+
}
|
|
1152
|
+
function describeClaimKeyNormalizationFailure(reason) {
|
|
1153
|
+
switch (reason) {
|
|
1154
|
+
case "empty":
|
|
1155
|
+
return "claim key was empty";
|
|
1156
|
+
case "missing_separator":
|
|
1157
|
+
return "claim key must contain exactly one '/'";
|
|
1158
|
+
case "too_many_segments":
|
|
1159
|
+
return "claim key must contain exactly one '/'";
|
|
1160
|
+
case "empty_entity":
|
|
1161
|
+
return "claim key entity was empty after normalization";
|
|
1162
|
+
case "empty_attribute":
|
|
1163
|
+
return "claim key attribute was empty after normalization";
|
|
1164
|
+
case "unknown_pair":
|
|
1165
|
+
return 'claim key "unknown/unknown" is not allowed';
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
function describeExtractedClaimKeyRejection(reason, claimKey) {
|
|
1169
|
+
switch (reason) {
|
|
1170
|
+
case "self_referential_entity":
|
|
1171
|
+
return `entity "${claimKey.entity}" is self-referential`;
|
|
1172
|
+
case "generic_attribute":
|
|
1173
|
+
return `attribute "${claimKey.attribute}" is too generic`;
|
|
1174
|
+
case "value_shaped_attribute":
|
|
1175
|
+
return `attribute "${claimKey.attribute}" looks value-shaped`;
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
function describeClaimKeySuspicion(reason, claimKey) {
|
|
1179
|
+
switch (reason) {
|
|
1180
|
+
case "generic_entity":
|
|
1181
|
+
return `entity "${claimKey.entity}" is too generic`;
|
|
1182
|
+
case "self_referential_entity":
|
|
1183
|
+
case "generic_attribute":
|
|
1184
|
+
case "value_shaped_attribute":
|
|
1185
|
+
return describeExtractedClaimKeyRejection(reason, claimKey);
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
function isValueShapedAttribute(attribute) {
|
|
1189
|
+
return /^\d+(?:_\d+)*$/u.test(attribute) || /^v\d+(?:_\d+)*$/u.test(attribute);
|
|
1190
|
+
}
|
|
1191
|
+
function compactSourceOfTruthAttribute(attributeTokens) {
|
|
1192
|
+
const sourceOfTruthIndex = findSourceOfTruthPhraseIndex(attributeTokens);
|
|
1193
|
+
if (sourceOfTruthIndex === -1) {
|
|
1194
|
+
return null;
|
|
1195
|
+
}
|
|
1196
|
+
const normalizedPhrase = ["source", "of", "truth"];
|
|
1197
|
+
if (attributeTokens.length === normalizedPhrase.length && startsWithTokens(attributeTokens, normalizedPhrase)) {
|
|
1198
|
+
return null;
|
|
1199
|
+
}
|
|
1200
|
+
const before = attributeTokens.slice(0, sourceOfTruthIndex);
|
|
1201
|
+
const after = attributeTokens.slice(sourceOfTruthIndex + normalizedPhrase.length);
|
|
1202
|
+
const leadingAllowed = before.every((token) => COMPACTION_WEAK_LEADING_TOKENS.has(token));
|
|
1203
|
+
const hasMixedStableFamily = before.some((token) => STABLE_ATTRIBUTE_HEADS.has(token)) || after.some((token) => STABLE_ATTRIBUTE_HEADS.has(token));
|
|
1204
|
+
const hasConjunctionNoise = before.includes("and") || before.includes("or") || after.includes("and") || after.includes("or");
|
|
1205
|
+
if (!leadingAllowed || hasMixedStableFamily || hasConjunctionNoise) {
|
|
1206
|
+
return null;
|
|
1207
|
+
}
|
|
1208
|
+
return {
|
|
1209
|
+
attributeTokens: normalizedPhrase,
|
|
1210
|
+
reason: "collapsed source-of-truth phrasing into the stable canonical slot"
|
|
1211
|
+
};
|
|
1212
|
+
}
|
|
1213
|
+
function compactRelationAttribute(attributeTokens) {
|
|
1214
|
+
const relationIndex = attributeTokens.findIndex((token) => COMPACTION_RELATION_TOKENS.has(token));
|
|
1215
|
+
if (relationIndex === -1) {
|
|
1216
|
+
return null;
|
|
1217
|
+
}
|
|
1218
|
+
const relation = attributeTokens[relationIndex] ?? "";
|
|
1219
|
+
const left = attributeTokens.slice(0, relationIndex);
|
|
1220
|
+
const right = attributeTokens.slice(relationIndex + 1);
|
|
1221
|
+
if (left.length === 0 && right.length === 0) {
|
|
1222
|
+
return null;
|
|
1223
|
+
}
|
|
1224
|
+
if (isRequirementRelation(relation)) {
|
|
1225
|
+
const conditionAction = extractConditionAction(right);
|
|
1226
|
+
if (conditionAction) {
|
|
1227
|
+
return {
|
|
1228
|
+
attributeTokens: [conditionAction, "condition"],
|
|
1229
|
+
reason: `collapsed a sentence-like ${conditionAction} requirement into a stable condition slot`
|
|
1230
|
+
};
|
|
1231
|
+
}
|
|
1232
|
+
const requirementFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
|
|
1233
|
+
if (!requirementFocus) {
|
|
1234
|
+
return null;
|
|
1235
|
+
}
|
|
1236
|
+
return {
|
|
1237
|
+
attributeTokens: [...requirementFocus, "requirement"],
|
|
1238
|
+
reason: "collapsed a sentence-like requirement phrase into a stable requirement slot"
|
|
1239
|
+
};
|
|
1240
|
+
}
|
|
1241
|
+
if (isOrderingRelation(relation)) {
|
|
1242
|
+
const orderingFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
|
|
1243
|
+
if (!orderingFocus) {
|
|
1244
|
+
return null;
|
|
1245
|
+
}
|
|
1246
|
+
return {
|
|
1247
|
+
attributeTokens: [...orderingFocus, "order"],
|
|
1248
|
+
reason: "collapsed a sentence-like ordering phrase into a stable order slot"
|
|
1249
|
+
};
|
|
1250
|
+
}
|
|
1251
|
+
if (isPreservationRelation(relation)) {
|
|
1252
|
+
const preservationFocus = extractCompactionFocus(right, 2) ?? extractCompactionFocus(left, 2);
|
|
1253
|
+
if (!preservationFocus) {
|
|
1254
|
+
return null;
|
|
1255
|
+
}
|
|
1256
|
+
return {
|
|
1257
|
+
attributeTokens: [...preservationFocus, "preservation"],
|
|
1258
|
+
reason: "collapsed a sentence-like preservation phrase into a stable preservation slot"
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
return null;
|
|
1262
|
+
}
|
|
1263
|
+
function compactTrailingObjectAttribute(attributeTokens) {
|
|
1264
|
+
const prepositionIndex = attributeTokens.findIndex((token) => TRAILING_OBJECT_COMPACTION_PREPOSITIONS.has(token));
|
|
1265
|
+
if (prepositionIndex <= 0 || prepositionIndex >= attributeTokens.length - 1) {
|
|
1266
|
+
return null;
|
|
1267
|
+
}
|
|
1268
|
+
const left = trimWeakLeadingTokens(attributeTokens.slice(0, prepositionIndex));
|
|
1269
|
+
const right = attributeTokens.slice(prepositionIndex + 1);
|
|
1270
|
+
if (left.length === 0 || left.length > 3 || left.includes("and") || left.includes("or") || left.some((token) => COMPACTION_RELATION_TOKENS.has(token))) {
|
|
1271
|
+
return null;
|
|
1272
|
+
}
|
|
1273
|
+
const head = left[left.length - 1];
|
|
1274
|
+
if (!head || !TRAILING_OBJECT_TRANSFER_HEADS.has(head)) {
|
|
1275
|
+
return null;
|
|
1276
|
+
}
|
|
1277
|
+
const objectFocus = extractCompactionFocus(right, 2);
|
|
1278
|
+
if (!objectFocus) {
|
|
1279
|
+
return null;
|
|
1280
|
+
}
|
|
1281
|
+
const headCore = extractStableHeadCore(left, 2);
|
|
1282
|
+
if (!headCore) {
|
|
1283
|
+
return null;
|
|
1284
|
+
}
|
|
1285
|
+
return {
|
|
1286
|
+
attributeTokens: [...objectFocus, ...headCore],
|
|
1287
|
+
reason: "collapsed a trailing object phrase into a compact stable slot name"
|
|
1288
|
+
};
|
|
1289
|
+
}
|
|
1290
|
+
function findSourceOfTruthPhraseIndex(tokens) {
|
|
1291
|
+
for (let index = 0; index <= tokens.length - 3; index += 1) {
|
|
1292
|
+
if (tokens[index] === "source" && tokens[index + 1] === "of" && tokens[index + 2] === "truth") {
|
|
1293
|
+
return index;
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
return -1;
|
|
1297
|
+
}
|
|
1298
|
+
function extractConditionAction(tokens) {
|
|
1299
|
+
for (let index = tokens.length - 1; index >= 0; index -= 1) {
|
|
1300
|
+
const token = tokens[index];
|
|
1301
|
+
if (token && ACTION_CONDITION_TOKENS.has(token)) {
|
|
1302
|
+
return token;
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
return null;
|
|
1306
|
+
}
|
|
1307
|
+
function extractCompactionFocus(tokens, limit) {
|
|
1308
|
+
const compactable = trimWeakLeadingTokens(tokens).filter((token) => token.length > 0);
|
|
1309
|
+
const segments = splitTokensOnBreaks(compactable).filter((segment) => segment.length > 0);
|
|
1310
|
+
const preferredSegment = segments[0];
|
|
1311
|
+
if (!preferredSegment || preferredSegment.length === 0) {
|
|
1312
|
+
return null;
|
|
1313
|
+
}
|
|
1314
|
+
return preferredSegment.slice(0, limit);
|
|
1315
|
+
}
|
|
1316
|
+
function extractStableHeadCore(tokens, limit) {
|
|
1317
|
+
const compactable = trimWeakLeadingTokens(tokens).filter((token) => token.length > 0);
|
|
1318
|
+
const head = compactable[compactable.length - 1];
|
|
1319
|
+
if (!head || !STABLE_ATTRIBUTE_HEADS.has(head)) {
|
|
1320
|
+
return null;
|
|
1321
|
+
}
|
|
1322
|
+
return compactable.slice(Math.max(0, compactable.length - limit));
|
|
1323
|
+
}
|
|
1324
|
+
function splitTokensOnBreaks(tokens) {
|
|
1325
|
+
const segments = [];
|
|
1326
|
+
let current = [];
|
|
1327
|
+
for (const token of tokens) {
|
|
1328
|
+
if (COMPACTION_BREAK_TOKENS.has(token)) {
|
|
1329
|
+
if (current.length > 0) {
|
|
1330
|
+
segments.push(current);
|
|
1331
|
+
current = [];
|
|
1332
|
+
}
|
|
1333
|
+
continue;
|
|
1334
|
+
}
|
|
1335
|
+
current.push(token);
|
|
1336
|
+
}
|
|
1337
|
+
if (current.length > 0) {
|
|
1338
|
+
segments.push(current);
|
|
1339
|
+
}
|
|
1340
|
+
return segments;
|
|
1341
|
+
}
|
|
1342
|
+
function trimWeakLeadingTokens(tokens) {
|
|
1343
|
+
let start = 0;
|
|
1344
|
+
while (start < tokens.length && COMPACTION_WEAK_LEADING_TOKENS.has(tokens[start] ?? "")) {
|
|
1345
|
+
start += 1;
|
|
1346
|
+
}
|
|
1347
|
+
return tokens.slice(start);
|
|
1348
|
+
}
|
|
1349
|
+
function joinCompactionReasons(reasons) {
|
|
1350
|
+
if (reasons.length <= 1) {
|
|
1351
|
+
return reasons[0] ?? "";
|
|
1352
|
+
}
|
|
1353
|
+
return `${reasons.slice(0, -1).join(", ")} and ${reasons[reasons.length - 1]}`;
|
|
1354
|
+
}
|
|
1355
|
+
function isRequirementRelation(token) {
|
|
1356
|
+
return token === "depend" || token === "depends" || token === "need" || token === "needs" || token === "required" || token === "require" || token === "requires";
|
|
1357
|
+
}
|
|
1358
|
+
function isOrderingRelation(token) {
|
|
1359
|
+
return token === "after" || token === "before" || token === "follow" || token === "follows" || token === "precede" || token === "precedes";
|
|
1360
|
+
}
|
|
1361
|
+
function isPreservationRelation(token) {
|
|
1362
|
+
return token === "keep" || token === "keeps" || token === "maintain" || token === "maintains" || token === "preserve" || token === "preserves" || token === "retain" || token === "retains";
|
|
1363
|
+
}
|
|
1364
|
+
function startsWithTokens(tokens, prefix) {
|
|
1365
|
+
return prefix.every((token, index) => tokens[index] === token);
|
|
1366
|
+
}
|
|
1367
|
+
function endsWithTokens(tokens, suffix) {
|
|
1368
|
+
return suffix.every((token, index) => tokens[tokens.length - suffix.length + index] === token);
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
// src/core/store/pipeline.ts
|
|
1372
|
+
import { randomUUID } from "crypto";
|
|
1373
|
+
|
|
1374
|
+
// src/core/supersession.ts
|
|
1375
|
+
function validateSupersessionRules(oldEntry, newEntry) {
|
|
1376
|
+
if (oldEntry.type !== newEntry.type) {
|
|
1377
|
+
return {
|
|
1378
|
+
ok: false,
|
|
1379
|
+
reason: "type_mismatch"
|
|
1380
|
+
};
|
|
1381
|
+
}
|
|
1382
|
+
if (oldEntry.type === "milestone") {
|
|
1383
|
+
return {
|
|
1384
|
+
ok: false,
|
|
1385
|
+
reason: "milestone"
|
|
1386
|
+
};
|
|
1387
|
+
}
|
|
1388
|
+
if (oldEntry.expiry === "core") {
|
|
1389
|
+
return {
|
|
1390
|
+
ok: false,
|
|
1391
|
+
reason: "core_expiry"
|
|
1392
|
+
};
|
|
1393
|
+
}
|
|
1394
|
+
return {
|
|
1395
|
+
ok: true
|
|
1396
|
+
};
|
|
1397
|
+
}
|
|
1398
|
+
function describeSupersessionRuleFailure(reason) {
|
|
1399
|
+
switch (reason) {
|
|
1400
|
+
case "type_mismatch":
|
|
1401
|
+
return "Supersession requires both entries to have the same type.";
|
|
1402
|
+
case "milestone":
|
|
1403
|
+
return "Milestone entries are never superseded automatically.";
|
|
1404
|
+
case "core_expiry":
|
|
1405
|
+
return "Core-expiry entries are never superseded automatically.";
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
// src/core/store/claim-extraction.ts
|
|
1410
|
+
var SELF_REFERENTIAL_ENTITIES2 = /* @__PURE__ */ new Set(["i", "me", "the_user", "myself", "user", "we", "our_team", "the_project", "this_project"]);
|
|
1411
|
+
var USER_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user"]);
|
|
1412
|
+
var PROJECT_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["the_project", "this_project"]);
|
|
1413
|
+
var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
1414
|
+
"budget",
|
|
1415
|
+
"city",
|
|
1416
|
+
"config",
|
|
1417
|
+
"deadline",
|
|
1418
|
+
"email",
|
|
1419
|
+
"employer",
|
|
1420
|
+
"language",
|
|
1421
|
+
"limit",
|
|
1422
|
+
"location",
|
|
1423
|
+
"mode",
|
|
1424
|
+
"model",
|
|
1425
|
+
"name",
|
|
1426
|
+
"owner",
|
|
1427
|
+
"plan",
|
|
1428
|
+
"policy",
|
|
1429
|
+
"preference",
|
|
1430
|
+
"priority",
|
|
1431
|
+
"quota",
|
|
1432
|
+
"region",
|
|
1433
|
+
"role",
|
|
1434
|
+
"schedule",
|
|
1435
|
+
"setting",
|
|
1436
|
+
"status",
|
|
1437
|
+
"strategy",
|
|
1438
|
+
"team",
|
|
1439
|
+
"theme",
|
|
1440
|
+
"timezone",
|
|
1441
|
+
"version",
|
|
1442
|
+
"window"
|
|
1443
|
+
]);
|
|
1444
|
+
var MAX_ENTITY_HINTS = 12;
|
|
1445
|
+
var MAX_CLAIM_KEY_EXAMPLES = 8;
|
|
1446
|
+
var DEFAULT_REPAIR_CONFIDENCE = 0.86;
|
|
1447
|
+
async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
|
|
1448
|
+
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
1449
|
+
return null;
|
|
1450
|
+
}
|
|
1451
|
+
const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
|
|
1452
|
+
let attempt;
|
|
1453
|
+
try {
|
|
1454
|
+
attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
|
|
1455
|
+
} catch (error) {
|
|
1456
|
+
const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1457
|
+
if (repaired) {
|
|
1458
|
+
return repaired;
|
|
1459
|
+
}
|
|
1460
|
+
throw error;
|
|
1461
|
+
}
|
|
1462
|
+
if (attempt.response.no_claim === true) {
|
|
1463
|
+
options.onPreviewOutcome?.(buildPreviewOutcome("no_claim", attempt));
|
|
1464
|
+
return null;
|
|
1465
|
+
}
|
|
1466
|
+
const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, options.onWarning);
|
|
1467
|
+
if (candidate) {
|
|
1468
|
+
options.onPreviewOutcome?.({
|
|
1469
|
+
outcome: "candidate",
|
|
1470
|
+
confidence: candidate.confidence,
|
|
1471
|
+
rawEntity: candidate.rawEntity,
|
|
1472
|
+
rawAttribute: candidate.rawAttribute,
|
|
1473
|
+
path: attempt.path
|
|
1474
|
+
});
|
|
1475
|
+
return {
|
|
1476
|
+
claimKey: candidate.claimKey,
|
|
1477
|
+
confidence: candidate.confidence,
|
|
1478
|
+
rawEntity: candidate.rawEntity,
|
|
1479
|
+
rawAttribute: candidate.rawAttribute,
|
|
1480
|
+
path: attempt.path,
|
|
1481
|
+
...candidate.compactedFrom ? {
|
|
1482
|
+
compactedFrom: candidate.compactedFrom,
|
|
1483
|
+
compactionReason: candidate.compactionReason
|
|
1484
|
+
} : {}
|
|
1485
|
+
};
|
|
1486
|
+
}
|
|
1487
|
+
options.onPreviewOutcome?.(buildPreviewOutcome("rejected_candidate", attempt));
|
|
1488
|
+
return tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1489
|
+
}
|
|
1490
|
+
async function extractClaimKey(entry, llm, config, options = {}) {
|
|
1491
|
+
const preview = await previewClaimKeyExtraction(entry, llm, config, options);
|
|
1492
|
+
if (!preview) {
|
|
1493
|
+
return null;
|
|
1494
|
+
}
|
|
1495
|
+
if (preview.path === "deterministic_repair" || preview.confidence >= config.confidenceThreshold) {
|
|
1496
|
+
return preview;
|
|
1497
|
+
}
|
|
1498
|
+
const deterministicRepair = tryDeterministicClaimKeyRepair(entry, normalizeClaimExtractionHints(options.hints ?? {}));
|
|
1499
|
+
if (deterministicRepair) {
|
|
1500
|
+
return deterministicRepair;
|
|
1501
|
+
}
|
|
1502
|
+
return null;
|
|
1503
|
+
}
|
|
1504
|
+
async function getEntityHints(db) {
|
|
1505
|
+
return db.getDistinctClaimKeyPrefixes();
|
|
1506
|
+
}
|
|
1507
|
+
async function runBatchClaimExtraction(results, ports, config, _concurrency = 10, onWarning) {
|
|
1508
|
+
if (!config.enabled) {
|
|
1509
|
+
return /* @__PURE__ */ new Map();
|
|
1510
|
+
}
|
|
1511
|
+
const hintState = await loadClaimExtractionHintState(ports.db);
|
|
1512
|
+
const llm = ports.createLlm();
|
|
1513
|
+
const extractedEntries = /* @__PURE__ */ new Map();
|
|
1514
|
+
for (const result of results) {
|
|
1515
|
+
for (const entry of result.entries) {
|
|
1516
|
+
if (entry.claim_key) {
|
|
1517
|
+
recordClaimKeyHint(hintState, entry.claim_key);
|
|
1518
|
+
continue;
|
|
1519
|
+
}
|
|
1520
|
+
if (!config.eligibleTypes.includes(entry.type)) {
|
|
1521
|
+
continue;
|
|
1522
|
+
}
|
|
1523
|
+
try {
|
|
1524
|
+
const extracted = await extractClaimKey(
|
|
1525
|
+
{
|
|
1526
|
+
type: entry.type,
|
|
1527
|
+
subject: entry.subject,
|
|
1528
|
+
content: entry.content
|
|
1529
|
+
},
|
|
1530
|
+
llm,
|
|
1531
|
+
config,
|
|
1532
|
+
{
|
|
1533
|
+
hints: buildEntryHints(hintState, entry),
|
|
1534
|
+
onWarning
|
|
1535
|
+
}
|
|
1536
|
+
);
|
|
1537
|
+
if (extracted?.claimKey) {
|
|
1538
|
+
entry.claim_key = extracted.claimKey;
|
|
1539
|
+
recordClaimKeyHint(hintState, extracted.claimKey);
|
|
1540
|
+
extractedEntries.set(entry, extracted);
|
|
1541
|
+
}
|
|
1542
|
+
} catch {
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
}
|
|
1546
|
+
return extractedEntries;
|
|
1547
|
+
}
|
|
1548
|
+
function buildClaimExtractionSystemPrompt(hints, promptMode) {
|
|
1549
|
+
const metadataHints = [hints.userEntity ? `user_id=${hints.userEntity}` : null, hints.projectEntity ? `project=${hints.projectEntity}` : null].filter(
|
|
1550
|
+
(value) => value !== null
|
|
1551
|
+
);
|
|
1552
|
+
const groundingHints = [
|
|
1553
|
+
hints.tags.length > 0 ? `tags=${hints.tags.join(", ")}` : null,
|
|
1554
|
+
hints.sourceContext ? `source_context=${hints.sourceContext}` : null
|
|
1555
|
+
].filter((value) => value !== null);
|
|
1556
|
+
const retryInstructions = promptMode === "json_retry" ? [
|
|
1557
|
+
"",
|
|
1558
|
+
"Your previous answer was invalid JSON.",
|
|
1559
|
+
"Reply with exactly one JSON object and nothing else.",
|
|
1560
|
+
"Do not use markdown fences, commentary, or trailing text."
|
|
1561
|
+
] : [];
|
|
1562
|
+
return [
|
|
1563
|
+
"You are a knowledge entry classifier. Extract one stable claim key for a durable knowledge entry.",
|
|
1564
|
+
"A claim key names the durable slot this entry updates: entity/attribute in lowercase snake_case.",
|
|
1565
|
+
"The goal is stable slot naming, not a paraphrase of the current value.",
|
|
1566
|
+
"",
|
|
1567
|
+
"Stability rules:",
|
|
1568
|
+
"- Prefer stable slot names over transient wording.",
|
|
1569
|
+
"- Choose attribute names that still make sense if the value changes.",
|
|
1570
|
+
"- Prefer short noun-like slot names over sentence-like attribute phrases.",
|
|
1571
|
+
"- When a candidate sounds like a rule or explanation sentence, compress it into the reusable slot it governs.",
|
|
1572
|
+
"- Prefer concrete entities over pronouns, deictic phrases, or self-referential placeholders.",
|
|
1573
|
+
"- Reuse an existing entity or full claim-key example when it clearly matches the same slot.",
|
|
1574
|
+
"- Stay domain-general. The same rules apply to people, devices, services, projects, places, organizations, products, datasets, policies, and preferences.",
|
|
1575
|
+
"- If the entry states a durable rule, default, workflow, guardrail, source-of-truth rule, architecture boundary, or process constraint plus rationale, extract the primary durable slot rather than the supporting rationale.",
|
|
1576
|
+
"- Do not return no_claim just because the entry explains why the rule exists. The durable policy or system slot is usually still the target.",
|
|
1577
|
+
"- Avoid full action clauses like requires_x_to_y, preserves_x_across_y, or x_precedes_y when a shorter stable slot such as trigger_condition, context_preservation, source_of_truth, or handoff_order would carry the same durable meaning.",
|
|
1578
|
+
"",
|
|
1579
|
+
"Return no_claim when:",
|
|
1580
|
+
"- The entry is narrative, multi-fact, or mostly a story about what happened.",
|
|
1581
|
+
"- The entry is an event or milestone without one continuing slot.",
|
|
1582
|
+
"- The entity is ambiguous or can only be named with a pronoun or vague placeholder.",
|
|
1583
|
+
"- The entry does not express one durable property, preference, decision, configuration, relationship, or other stable slot.",
|
|
1584
|
+
"- When unsure, prefer no_claim over inventing a weak key.",
|
|
1585
|
+
"",
|
|
1586
|
+
"Positive examples:",
|
|
1587
|
+
`- "Jim's timezone is America/Chicago." -> jim/timezone`,
|
|
1588
|
+
'- "Jim prefers oat milk in coffee." -> jim/coffee_preference',
|
|
1589
|
+
'- "Pixel 8 is set to dark mode." -> pixel_8/theme_mode',
|
|
1590
|
+
'- "Postgres max_connections is 200." -> postgres/max_connections',
|
|
1591
|
+
'- "Agenr defaults to gpt-5.4-mini." -> agenr/default_model',
|
|
1592
|
+
'- "Mac mini updates should stay manual so debugging stays predictable." -> mac_mini/manual_update_policy',
|
|
1593
|
+
'- "Use the warehouse inventory sheet as the source of truth for stock counts." -> stock_counts/source_of_truth',
|
|
1594
|
+
'- "The repo workflow is defined by AGENTS.md, even when older notes disagree." -> repo_workflow/source_of_truth',
|
|
1595
|
+
'- "Agenr keeps pure logic in src/core and adapters outside it so future hosts can plug in cleanly." -> agenr/core_adapter_boundary',
|
|
1596
|
+
'- "The before-prompt-build hook only triggers after a real agent turn or message." -> before_prompt_build_hook/trigger_condition',
|
|
1597
|
+
'- "Durable memory preserves context across sessions." -> durable_memory/context_preservation',
|
|
1598
|
+
"",
|
|
1599
|
+
"Negative examples:",
|
|
1600
|
+
"- Bad: jim/america_chicago -> Good: jim/timezone",
|
|
1601
|
+
"- Bad: project_x/details -> Good: project_x/deploy_strategy",
|
|
1602
|
+
"- Bad: we/deployment_process -> Good: platform_team/deploy_strategy",
|
|
1603
|
+
"- Bad: jim/oat_milk -> Good: jim/coffee_preference",
|
|
1604
|
+
"- Bad: release_notes/because_rollbacks_are_hard -> Good: release_process/source_of_truth",
|
|
1605
|
+
"- Bad: openclaw/requires_real_agent_turn_or_message_to_trigger -> Good: openclaw/trigger_condition",
|
|
1606
|
+
"- Bad: session_continuity/durable_memory_preserves_context_across_sessions -> Good: session_continuity/context_preservation",
|
|
1607
|
+
"- Bad: incident_story/we_spent_two_hours_debugging -> Good: no_claim",
|
|
1608
|
+
"",
|
|
1609
|
+
"Field rules:",
|
|
1610
|
+
"- entity: the main concrete thing being described. It can be a person, device, service, product, organization, workflow area, or other durable system/process anchor.",
|
|
1611
|
+
"- attribute: the narrow stable slot on that entity. For policy/process entries, name the governing slot such as source_of_truth, default_mode, update_policy, architecture_boundary, deploy_strategy, or escalation_workflow.",
|
|
1612
|
+
"- Confidence: 0.0 to 1.0. Use 0.9+ only when the slot is unambiguous and durable.",
|
|
1613
|
+
"",
|
|
1614
|
+
`Known entity hints: ${hints.entityHints.length > 0 ? hints.entityHints.join(", ") : "(none)"}`,
|
|
1615
|
+
`Known claim-key examples: ${hints.claimKeyExamples.length > 0 ? hints.claimKeyExamples.join(", ") : "(none)"}`,
|
|
1616
|
+
`Current entry metadata hints: ${metadataHints.length > 0 ? metadataHints.join(", ") : "(none)"}`,
|
|
1617
|
+
`Current entry grounding clues: ${groundingHints.length > 0 ? groundingHints.join(", ") : "(none)"}`,
|
|
1618
|
+
'If project metadata is present, it may resolve phrases like "the project" when that mapping is obvious.',
|
|
1619
|
+
'If user metadata is present, it may resolve phrases like "the user", "I", or "me" when that mapping is obvious.',
|
|
1620
|
+
"Tags and source_context are local grounding clues, not proof. Use them to pick the right durable slot only when the entry content already supports that slot.",
|
|
1621
|
+
...retryInstructions,
|
|
1622
|
+
"",
|
|
1623
|
+
'Respond with JSON: { "entity": string, "attribute": string, "confidence": number, "no_claim"?: boolean }'
|
|
1624
|
+
].join("\n");
|
|
1625
|
+
}
|
|
1626
|
+
function buildClaimExtractionUserPrompt(entry) {
|
|
1627
|
+
return [`Entry type: ${entry.type}`, `Subject: ${entry.subject}`, `Content: ${entry.content}`].join("\n");
|
|
1628
|
+
}
|
|
1629
|
+
async function attemptClaimExtraction(entry, hints, llm) {
|
|
1630
|
+
const userPrompt = buildClaimExtractionUserPrompt(entry);
|
|
1631
|
+
try {
|
|
1632
|
+
return {
|
|
1633
|
+
path: "model",
|
|
1634
|
+
response: await llm.completeJson(buildClaimExtractionSystemPrompt(hints, "standard"), userPrompt)
|
|
1635
|
+
};
|
|
1636
|
+
} catch (error) {
|
|
1637
|
+
if (!isMalformedJsonError(error)) {
|
|
1638
|
+
throw error;
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
return {
|
|
1642
|
+
path: "json_retry",
|
|
1643
|
+
response: await llm.completeJson(buildClaimExtractionSystemPrompt(hints, "json_retry"), userPrompt)
|
|
1644
|
+
};
|
|
1645
|
+
}
|
|
1646
|
+
function buildClaimExtractionCandidate(entry, response, hints, onWarning) {
|
|
1647
|
+
const confidence = normalizeConfidence(response.confidence);
|
|
1648
|
+
const rawEntity = typeof response.entity === "string" ? response.entity.trim() : "";
|
|
1649
|
+
const rawAttribute = typeof response.attribute === "string" ? response.attribute.trim() : "";
|
|
1650
|
+
const entity = normalizeEntity(rawEntity, hints);
|
|
1651
|
+
const attribute = normalizeClaimKeySegment(rawAttribute);
|
|
1652
|
+
const normalizedClaimKey = normalizeClaimKey(`${entity}/${attribute}`);
|
|
1653
|
+
if (!normalizedClaimKey.ok) {
|
|
1654
|
+
onWarning?.(`Claim extraction dropped claim key for "${entry.subject}": ${describeClaimKeyNormalizationFailure(normalizedClaimKey.reason)}.`);
|
|
1655
|
+
return null;
|
|
1656
|
+
}
|
|
1657
|
+
const compactedClaimKey = compactClaimKey(normalizedClaimKey.value.claimKey);
|
|
1658
|
+
if (!compactedClaimKey) {
|
|
1659
|
+
onWarning?.(`Claim extraction dropped claim key for "${entry.subject}": claim key could not be compacted safely.`);
|
|
1660
|
+
return null;
|
|
1661
|
+
}
|
|
1662
|
+
const validatedClaimKey = validateExtractedClaimKey(compactedClaimKey);
|
|
1663
|
+
if (!validatedClaimKey.ok) {
|
|
1664
|
+
onWarning?.(
|
|
1665
|
+
`Claim extraction rejected "${validatedClaimKey.value.claimKey}" for "${entry.subject}": ${describeExtractedClaimKeyRejection(validatedClaimKey.reason, validatedClaimKey.value)}.`
|
|
1666
|
+
);
|
|
1667
|
+
return null;
|
|
1668
|
+
}
|
|
1669
|
+
return {
|
|
1670
|
+
claimKey: validatedClaimKey.value.claimKey,
|
|
1671
|
+
confidence,
|
|
1672
|
+
rawEntity,
|
|
1673
|
+
rawAttribute,
|
|
1674
|
+
compactedFrom: compactedClaimKey.compactedFrom,
|
|
1675
|
+
compactionReason: compactedClaimKey.reason
|
|
1676
|
+
};
|
|
1677
|
+
}
|
|
1678
|
+
function tryDeterministicClaimKeyRepair(entry, hints) {
|
|
1679
|
+
const repaired = parsePossessiveClaim(entry.subject) ?? parsePossessiveStatement(entry.content);
|
|
1680
|
+
if (!repaired) {
|
|
1681
|
+
return null;
|
|
1682
|
+
}
|
|
1683
|
+
const attribute = normalizeClaimKeySegment(repaired.attribute);
|
|
1684
|
+
if (!looksLikeDeterministicAttribute(attribute)) {
|
|
1685
|
+
return null;
|
|
1686
|
+
}
|
|
1687
|
+
const entity = normalizeEntity(repaired.entity, hints);
|
|
1688
|
+
const normalizedClaimKey = normalizeClaimKey(`${entity}/${attribute}`);
|
|
1689
|
+
if (!normalizedClaimKey.ok) {
|
|
1690
|
+
return null;
|
|
1691
|
+
}
|
|
1692
|
+
const validatedClaimKey = validateExtractedClaimKey(normalizedClaimKey.value);
|
|
1693
|
+
if (!validatedClaimKey.ok) {
|
|
1694
|
+
return null;
|
|
1695
|
+
}
|
|
1696
|
+
return {
|
|
1697
|
+
claimKey: validatedClaimKey.value.claimKey,
|
|
1698
|
+
confidence: DEFAULT_REPAIR_CONFIDENCE,
|
|
1699
|
+
rawEntity: repaired.entity,
|
|
1700
|
+
rawAttribute: repaired.attribute,
|
|
1701
|
+
path: "deterministic_repair"
|
|
1702
|
+
};
|
|
1703
|
+
}
|
|
1704
|
+
async function loadClaimExtractionHintState(db) {
|
|
1705
|
+
const [entityHintResult, claimKeyExampleResult] = await Promise.allSettled([getEntityHints(db), getClaimKeyExamples(db)]);
|
|
1706
|
+
return createHintState({
|
|
1707
|
+
entityHints: entityHintResult.status === "fulfilled" ? entityHintResult.value : [],
|
|
1708
|
+
claimKeyExamples: claimKeyExampleResult.status === "fulfilled" ? claimKeyExampleResult.value : []
|
|
1709
|
+
});
|
|
1710
|
+
}
|
|
1711
|
+
async function getClaimKeyExamples(db) {
|
|
1712
|
+
if (typeof db.getClaimKeyExamples !== "function") {
|
|
1713
|
+
return [];
|
|
1714
|
+
}
|
|
1715
|
+
return db.getClaimKeyExamples(MAX_CLAIM_KEY_EXAMPLES);
|
|
1716
|
+
}
|
|
1717
|
+
function createHintState(input) {
|
|
1718
|
+
const claimKeyExamples = normalizeClaimKeyExamples(input.claimKeyExamples ?? []);
|
|
1719
|
+
const entityHints = limitUnique(
|
|
1720
|
+
[
|
|
1721
|
+
...normalizeEntityHints(input.entityHints ?? []),
|
|
1722
|
+
...claimKeyExamples.flatMap((claimKey) => {
|
|
1723
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
1724
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
1725
|
+
})
|
|
1726
|
+
],
|
|
1727
|
+
MAX_ENTITY_HINTS
|
|
1728
|
+
);
|
|
1729
|
+
return {
|
|
1730
|
+
entityHints,
|
|
1731
|
+
claimKeyExamples
|
|
1732
|
+
};
|
|
1733
|
+
}
|
|
1734
|
+
function buildEntryHints(state, entry) {
|
|
1735
|
+
return {
|
|
1736
|
+
entityHints: [...state.entityHints],
|
|
1737
|
+
claimKeyExamples: [...state.claimKeyExamples],
|
|
1738
|
+
userId: entry.user_id,
|
|
1739
|
+
project: entry.project,
|
|
1740
|
+
tags: entry.tags,
|
|
1741
|
+
sourceContext: entry.source_context
|
|
1742
|
+
};
|
|
1743
|
+
}
|
|
1744
|
+
function recordClaimKeyHint(state, claimKey) {
|
|
1745
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
1746
|
+
if (!normalizedClaimKey.ok) {
|
|
1747
|
+
return;
|
|
1748
|
+
}
|
|
1749
|
+
state.claimKeyExamples = prependUnique(state.claimKeyExamples, normalizedClaimKey.value.claimKey, MAX_CLAIM_KEY_EXAMPLES);
|
|
1750
|
+
state.entityHints = prependUnique(state.entityHints, normalizedClaimKey.value.entity, MAX_ENTITY_HINTS);
|
|
1751
|
+
}
|
|
1752
|
+
function normalizeClaimExtractionHints(hints) {
|
|
1753
|
+
const claimKeyExamples = normalizeClaimKeyExamples(hints.claimKeyExamples ?? []);
|
|
1754
|
+
return {
|
|
1755
|
+
entityHints: limitUnique(
|
|
1756
|
+
[
|
|
1757
|
+
...normalizeEntityHints(hints.entityHints ?? []),
|
|
1758
|
+
...claimKeyExamples.flatMap((claimKey) => {
|
|
1759
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
1760
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
1761
|
+
})
|
|
1762
|
+
],
|
|
1763
|
+
MAX_ENTITY_HINTS
|
|
1764
|
+
),
|
|
1765
|
+
claimKeyExamples,
|
|
1766
|
+
userEntity: normalizeMetadataEntity(hints.userId),
|
|
1767
|
+
projectEntity: normalizeMetadataEntity(hints.project),
|
|
1768
|
+
tags: normalizeHintTags(hints.tags ?? []),
|
|
1769
|
+
sourceContext: normalizeSourceContextHint(hints.sourceContext)
|
|
1770
|
+
};
|
|
1771
|
+
}
|
|
1772
|
+
function buildPreviewOutcome(outcome, attempt) {
|
|
1773
|
+
return {
|
|
1774
|
+
outcome,
|
|
1775
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
1776
|
+
rawEntity: typeof attempt.response.entity === "string" ? attempt.response.entity.trim() : "",
|
|
1777
|
+
rawAttribute: typeof attempt.response.attribute === "string" ? attempt.response.attribute.trim() : "",
|
|
1778
|
+
path: attempt.path
|
|
1779
|
+
};
|
|
1780
|
+
}
|
|
1781
|
+
function normalizeConfidence(value) {
|
|
1782
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
1783
|
+
return 0;
|
|
1784
|
+
}
|
|
1785
|
+
return Math.min(1, Math.max(0, value));
|
|
1786
|
+
}
|
|
1787
|
+
function normalizeEntity(value, hints) {
|
|
1788
|
+
const normalizedValue = normalizeClaimKeySegment(value);
|
|
1789
|
+
if (normalizedValue.length === 0) {
|
|
1790
|
+
return "";
|
|
1791
|
+
}
|
|
1792
|
+
if (!SELF_REFERENTIAL_ENTITIES2.has(normalizedValue)) {
|
|
1793
|
+
return normalizedValue;
|
|
1794
|
+
}
|
|
1795
|
+
if (USER_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.userEntity) {
|
|
1796
|
+
return hints.userEntity;
|
|
1797
|
+
}
|
|
1798
|
+
if (PROJECT_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.projectEntity) {
|
|
1799
|
+
return hints.projectEntity;
|
|
1800
|
+
}
|
|
1801
|
+
const concreteCandidates = limitUnique(
|
|
1802
|
+
[hints.projectEntity, hints.userEntity, ...hints.entityHints].filter(
|
|
1803
|
+
(candidate) => typeof candidate === "string" && candidate.length > 0
|
|
1804
|
+
),
|
|
1805
|
+
MAX_ENTITY_HINTS
|
|
1806
|
+
);
|
|
1807
|
+
if (concreteCandidates.length === 1) {
|
|
1808
|
+
return concreteCandidates[0] ?? normalizedValue;
|
|
1809
|
+
}
|
|
1810
|
+
if (hints.entityHints.length === 1) {
|
|
1811
|
+
return hints.entityHints[0] ?? normalizedValue;
|
|
1812
|
+
}
|
|
1813
|
+
return normalizedValue;
|
|
1814
|
+
}
|
|
1815
|
+
function normalizeEntityHints(entityHints) {
|
|
1816
|
+
return limitUnique(
|
|
1817
|
+
entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !SELF_REFERENTIAL_ENTITIES2.has(entityHint)),
|
|
1818
|
+
MAX_ENTITY_HINTS
|
|
1819
|
+
);
|
|
1820
|
+
}
|
|
1821
|
+
function normalizeClaimKeyExamples(claimKeyExamples) {
|
|
1822
|
+
return limitUnique(
|
|
1823
|
+
claimKeyExamples.flatMap((claimKeyExample) => {
|
|
1824
|
+
const normalizedClaimKey = normalizeClaimKey(claimKeyExample);
|
|
1825
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
|
|
1826
|
+
}),
|
|
1827
|
+
MAX_CLAIM_KEY_EXAMPLES
|
|
1828
|
+
);
|
|
1829
|
+
}
|
|
1830
|
+
function normalizeMetadataEntity(value) {
|
|
1831
|
+
if (typeof value !== "string") {
|
|
1832
|
+
return void 0;
|
|
1833
|
+
}
|
|
1834
|
+
const normalized = normalizeClaimKeySegment(value);
|
|
1835
|
+
if (normalized.length === 0 || SELF_REFERENTIAL_ENTITIES2.has(normalized) || !/[a-z]/u.test(normalized)) {
|
|
1836
|
+
return void 0;
|
|
1837
|
+
}
|
|
1838
|
+
return normalized;
|
|
1839
|
+
}
|
|
1840
|
+
function normalizeHintTags(tags) {
|
|
1841
|
+
return limitUnique(
|
|
1842
|
+
tags.map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0),
|
|
1843
|
+
8
|
|
1844
|
+
);
|
|
1845
|
+
}
|
|
1846
|
+
function normalizeSourceContextHint(value) {
|
|
1847
|
+
const trimmed = value?.trim();
|
|
1848
|
+
if (!trimmed) {
|
|
1849
|
+
return void 0;
|
|
1850
|
+
}
|
|
1851
|
+
return trimmed.length <= 160 ? trimmed : `${trimmed.slice(0, 157).trimEnd()}...`;
|
|
1852
|
+
}
|
|
1853
|
+
function isMalformedJsonError(error) {
|
|
1854
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1855
|
+
return /json|unexpected token|unexpected end|unexpected non-whitespace|unterminated|position \d+/iu.test(message);
|
|
1856
|
+
}
|
|
1857
|
+
function parsePossessiveClaim(subject) {
|
|
1858
|
+
const match = /^\s*(?<entity>[^.!?\n]+?)[’']s\s+(?<attribute>[^.!?\n]+?)\s*$/iu.exec(subject);
|
|
1859
|
+
if (!match?.groups) {
|
|
1860
|
+
return null;
|
|
1861
|
+
}
|
|
1862
|
+
return {
|
|
1863
|
+
entity: stripTrailingPunctuation(match.groups.entity),
|
|
1864
|
+
attribute: stripTrailingPunctuation(match.groups.attribute)
|
|
1865
|
+
};
|
|
1866
|
+
}
|
|
1867
|
+
function parsePossessiveStatement(content) {
|
|
1868
|
+
const match = /^\s*(?<entity>[^.!?\n]+?)[’']s\s+(?<attribute>[^.!?\n]+?)\s+(?:is|are|was|were)\b/iu.exec(content);
|
|
1869
|
+
if (!match?.groups) {
|
|
1870
|
+
return null;
|
|
1871
|
+
}
|
|
1872
|
+
return {
|
|
1873
|
+
entity: stripTrailingPunctuation(match.groups.entity),
|
|
1874
|
+
attribute: stripTrailingPunctuation(match.groups.attribute)
|
|
1875
|
+
};
|
|
1876
|
+
}
|
|
1877
|
+
function stripTrailingPunctuation(value) {
|
|
1878
|
+
return value.trim().replace(/[\s"'“”‘’.,:;!?]+$/gu, "").trim();
|
|
1879
|
+
}
|
|
1880
|
+
function looksLikeDeterministicAttribute(attribute) {
|
|
1881
|
+
const parts = attribute.split("_").filter((part) => part.length > 0);
|
|
1882
|
+
if (parts.length === 0 || parts.length > 4) {
|
|
1883
|
+
return false;
|
|
1884
|
+
}
|
|
1885
|
+
const head = parts[parts.length - 1];
|
|
1886
|
+
return typeof head === "string" && DETERMINISTIC_ATTRIBUTE_HEADS.has(head);
|
|
1887
|
+
}
|
|
1888
|
+
function prependUnique(values, value, limit) {
|
|
1889
|
+
return limitUnique([value, ...values], limit);
|
|
1890
|
+
}
|
|
1891
|
+
function limitUnique(values, limit) {
|
|
1892
|
+
return Array.from(new Set(values.filter((value) => value.length > 0))).slice(0, limit);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
// src/core/store/hashing.ts
|
|
1896
|
+
import { createHash as createHash2 } from "crypto";
|
|
1897
|
+
function computeContentHash(content, sourceFile) {
|
|
1898
|
+
const input = sourceFile ? `${sourceFile}
|
|
1899
|
+
${content}` : content;
|
|
1900
|
+
return createHash2("sha256").update(input).digest("hex");
|
|
1901
|
+
}
|
|
1902
|
+
function computeNormContentHash(content) {
|
|
1903
|
+
const normalized = content.toLowerCase().replace(/\s+/g, " ").trim().replace(/[^\w\s]/g, "");
|
|
1904
|
+
return createHash2("sha256").update(normalized).digest("hex");
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
// src/core/store/validation.ts
|
|
1908
|
+
var UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/iu;
|
|
1909
|
+
function validateEntriesWithIndexes(inputs) {
|
|
1910
|
+
const valid = [];
|
|
1911
|
+
const errors = [];
|
|
1912
|
+
const warnings = [];
|
|
1913
|
+
const rejectedInputIndexes = [];
|
|
1914
|
+
for (const [index, input] of inputs.entries()) {
|
|
1915
|
+
const subject = normalizeString(input.subject);
|
|
1916
|
+
const content = normalizeString(input.content);
|
|
1917
|
+
if (!ENTRY_TYPES.includes(input.type)) {
|
|
1918
|
+
errors.push(`Entry ${index} has an invalid type.`);
|
|
1919
|
+
rejectedInputIndexes.push(index);
|
|
1920
|
+
continue;
|
|
1921
|
+
}
|
|
1922
|
+
if (subject.length === 0) {
|
|
1923
|
+
errors.push(`Entry ${index} is missing a subject.`);
|
|
1924
|
+
rejectedInputIndexes.push(index);
|
|
1925
|
+
continue;
|
|
1926
|
+
}
|
|
1927
|
+
if (content.length === 0) {
|
|
1928
|
+
errors.push(`Entry ${index} is missing content.`);
|
|
1929
|
+
rejectedInputIndexes.push(index);
|
|
1930
|
+
continue;
|
|
1931
|
+
}
|
|
1932
|
+
if (input.expiry !== void 0 && !EXPIRY_LEVELS.includes(input.expiry)) {
|
|
1933
|
+
errors.push(`Entry ${index} has an invalid expiry.`);
|
|
1934
|
+
rejectedInputIndexes.push(index);
|
|
1935
|
+
continue;
|
|
1936
|
+
}
|
|
1937
|
+
if (input.tags !== void 0 && !areValidTags(input.tags)) {
|
|
1938
|
+
errors.push(`Entry ${index} has invalid tags.`);
|
|
1939
|
+
rejectedInputIndexes.push(index);
|
|
1940
|
+
continue;
|
|
1941
|
+
}
|
|
1942
|
+
if (input.importance !== void 0 && !Number.isFinite(input.importance)) {
|
|
1943
|
+
errors.push(`Entry ${index} has an invalid importance.`);
|
|
1944
|
+
rejectedInputIndexes.push(index);
|
|
1945
|
+
continue;
|
|
1946
|
+
}
|
|
1947
|
+
if (input.supersedes !== void 0 && !isUuid(input.supersedes)) {
|
|
1948
|
+
errors.push(`Entry ${index} has an invalid supersedes id.`);
|
|
1949
|
+
rejectedInputIndexes.push(index);
|
|
1950
|
+
continue;
|
|
1951
|
+
}
|
|
1952
|
+
if (input.valid_from !== void 0 && !isIsoTimestamp(input.valid_from)) {
|
|
1953
|
+
errors.push(`Entry ${index} has an invalid valid_from timestamp.`);
|
|
1954
|
+
rejectedInputIndexes.push(index);
|
|
1955
|
+
continue;
|
|
1956
|
+
}
|
|
1957
|
+
if (input.valid_to !== void 0 && !isIsoTimestamp(input.valid_to)) {
|
|
1958
|
+
errors.push(`Entry ${index} has an invalid valid_to timestamp.`);
|
|
1959
|
+
rejectedInputIndexes.push(index);
|
|
1960
|
+
continue;
|
|
1961
|
+
}
|
|
1962
|
+
let normalizedClaimKey;
|
|
1963
|
+
if (input.claim_key !== void 0) {
|
|
1964
|
+
if (typeof input.claim_key !== "string") {
|
|
1965
|
+
warnings.push(`Entry ${index} provided a non-string claim key and it was dropped.`);
|
|
1966
|
+
} else {
|
|
1967
|
+
const claimKey = normalizeClaimKey(input.claim_key);
|
|
1968
|
+
if (claimKey.ok) {
|
|
1969
|
+
normalizedClaimKey = claimKey.value.claimKey;
|
|
1970
|
+
} else {
|
|
1971
|
+
warnings.push(
|
|
1972
|
+
`Entry ${index} provided invalid claim key ${JSON.stringify(input.claim_key)} and it was dropped: ${describeClaimKeyNormalizationFailure(claimKey.reason)}.`
|
|
1973
|
+
);
|
|
1974
|
+
}
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
valid.push({
|
|
1978
|
+
inputIndex: index,
|
|
1979
|
+
input: {
|
|
1980
|
+
type: input.type,
|
|
1981
|
+
subject,
|
|
1982
|
+
content,
|
|
1983
|
+
importance: clampImportance(input.importance),
|
|
1984
|
+
expiry: input.expiry ?? "temporary",
|
|
1985
|
+
tags: normalizeTags(input.tags),
|
|
1986
|
+
source_file: normalizeOptionalString(input.source_file),
|
|
1987
|
+
source_context: normalizeOptionalString(input.source_context),
|
|
1988
|
+
user_id: normalizeOptionalString(input.user_id),
|
|
1989
|
+
project: normalizeOptionalString(input.project),
|
|
1990
|
+
created_at: normalizeOptionalString(input.created_at),
|
|
1991
|
+
supersedes: normalizeOptionalString(input.supersedes),
|
|
1992
|
+
claim_key: normalizedClaimKey,
|
|
1993
|
+
valid_from: normalizeOptionalString(input.valid_from),
|
|
1994
|
+
valid_to: normalizeOptionalString(input.valid_to)
|
|
1995
|
+
}
|
|
1996
|
+
});
|
|
1997
|
+
}
|
|
1998
|
+
return {
|
|
1999
|
+
valid,
|
|
2000
|
+
rejected: errors.length,
|
|
2001
|
+
rejectedInputIndexes,
|
|
2002
|
+
errors,
|
|
2003
|
+
warnings
|
|
2004
|
+
};
|
|
2005
|
+
}
|
|
2006
|
+
function clampImportance(value) {
|
|
2007
|
+
if (value === void 0) {
|
|
2008
|
+
return 7;
|
|
2009
|
+
}
|
|
2010
|
+
return Math.min(10, Math.max(1, Math.round(value)));
|
|
2011
|
+
}
|
|
2012
|
+
function normalizeString(value) {
|
|
2013
|
+
return value.trim();
|
|
2014
|
+
}
|
|
2015
|
+
function normalizeOptionalString(value) {
|
|
2016
|
+
const normalized = value?.trim();
|
|
2017
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
2018
|
+
}
|
|
2019
|
+
function areValidTags(value) {
|
|
2020
|
+
return Array.isArray(value) && value.every((tag) => typeof tag === "string");
|
|
2021
|
+
}
|
|
2022
|
+
function normalizeTags(tags) {
|
|
2023
|
+
if (!tags) {
|
|
2024
|
+
return [];
|
|
2025
|
+
}
|
|
2026
|
+
return tags.map((tag) => tag.trim()).filter((tag) => tag.length > 0);
|
|
2027
|
+
}
|
|
2028
|
+
function isUuid(value) {
|
|
2029
|
+
return UUID_PATTERN.test(value.trim());
|
|
2030
|
+
}
|
|
2031
|
+
function isIsoTimestamp(value) {
|
|
2032
|
+
const normalized = value.trim();
|
|
2033
|
+
return normalized.length > 0 && normalized.includes("T") && !Number.isNaN(Date.parse(normalized));
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
// src/core/store/pipeline.ts
|
|
2037
|
+
var AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE = 0.9;
|
|
2038
|
+
var AUTO_SUPERSESSION_ELIGIBLE_PATHS = /* @__PURE__ */ new Set(["model", "json_retry"]);
|
|
2039
|
+
async function storeEntriesDetailed(inputs, db, embedding, options = {}) {
|
|
2040
|
+
if (inputs.length === 0) {
|
|
2041
|
+
return { stored: 0, skipped: 0, rejected: 0, details: [] };
|
|
2042
|
+
}
|
|
2043
|
+
const plan = await buildStorePlan(inputs, db);
|
|
2044
|
+
for (const warning of plan.warnings) {
|
|
2045
|
+
options.onWarning?.(warning);
|
|
2046
|
+
}
|
|
2047
|
+
if (plan.pendingEntries.length === 0) {
|
|
2048
|
+
return {
|
|
2049
|
+
stored: 0,
|
|
2050
|
+
skipped: plan.skipped,
|
|
2051
|
+
rejected: plan.rejected,
|
|
2052
|
+
details: sortStoreDetails(plan.details)
|
|
2053
|
+
};
|
|
2054
|
+
}
|
|
2055
|
+
if (options.dryRun === true) {
|
|
2056
|
+
return {
|
|
2057
|
+
stored: 0,
|
|
2058
|
+
skipped: plan.skipped,
|
|
2059
|
+
rejected: plan.rejected,
|
|
2060
|
+
details: sortStoreDetails([
|
|
2061
|
+
...plan.details,
|
|
2062
|
+
...plan.pendingEntries.map((entry) => ({
|
|
2063
|
+
inputIndex: entry.inputIndex,
|
|
2064
|
+
outcome: "dry_run",
|
|
2065
|
+
reason: "dry_run"
|
|
2066
|
+
}))
|
|
2067
|
+
])
|
|
2068
|
+
};
|
|
2069
|
+
}
|
|
2070
|
+
const pendingEntries = plan.pendingEntries;
|
|
2071
|
+
const extractedClaimKeys = await maybeExtractClaimKeys(pendingEntries, options);
|
|
2072
|
+
const embeddings = await resolvePendingEmbeddings(inputs, pendingEntries, embedding, options.precomputedEmbeddings);
|
|
2073
|
+
await persistEntries(db, pendingEntries, embeddings, extractedClaimKeys, options.claimExtraction?.config, options.onWarning);
|
|
2074
|
+
return {
|
|
2075
|
+
stored: pendingEntries.length,
|
|
2076
|
+
skipped: plan.skipped,
|
|
2077
|
+
rejected: plan.rejected,
|
|
2078
|
+
details: sortStoreDetails([
|
|
2079
|
+
...plan.details,
|
|
2080
|
+
...pendingEntries.map((entry) => ({
|
|
2081
|
+
inputIndex: entry.inputIndex,
|
|
2082
|
+
outcome: "stored"
|
|
2083
|
+
}))
|
|
2084
|
+
])
|
|
2085
|
+
};
|
|
2086
|
+
}
|
|
2087
|
+
async function resolvePendingEmbeddings(inputs, entries, embedding, precomputedEmbeddings) {
|
|
2088
|
+
if (!precomputedEmbeddings) {
|
|
2089
|
+
return embedPendingEntries(entries, embedding);
|
|
2090
|
+
}
|
|
2091
|
+
if (precomputedEmbeddings.length !== inputs.length) {
|
|
2092
|
+
throw new Error(`Precomputed embedding length mismatch: expected ${inputs.length}, received ${precomputedEmbeddings.length}.`);
|
|
2093
|
+
}
|
|
2094
|
+
return entries.map((entry) => {
|
|
2095
|
+
const vector = precomputedEmbeddings[entry.inputIndex];
|
|
2096
|
+
if (!vector) {
|
|
2097
|
+
throw new Error(`Missing precomputed embedding for input index ${entry.inputIndex}.`);
|
|
2098
|
+
}
|
|
2099
|
+
return vector;
|
|
2100
|
+
});
|
|
2101
|
+
}
|
|
2102
|
+
async function embedPendingEntries(entries, embedding) {
|
|
2103
|
+
const texts = entries.map(({ input }) => composeEmbeddingText(input));
|
|
2104
|
+
const vectors = await embedding.embed(texts);
|
|
2105
|
+
if (vectors.length !== entries.length) {
|
|
2106
|
+
throw new Error(`Embedding length mismatch: expected ${entries.length}, received ${vectors.length}.`);
|
|
2107
|
+
}
|
|
2108
|
+
return vectors;
|
|
2109
|
+
}
|
|
2110
|
+
async function persistEntries(db, preparedEntries, embeddings, extractedClaimKeys, claimExtractionConfig, onWarning) {
|
|
2111
|
+
const writeBatch = async (targetDb) => {
|
|
2112
|
+
let stored = 0;
|
|
2113
|
+
const autoSupersessionPlans = await planAutoSupersession(targetDb, preparedEntries, extractedClaimKeys, claimExtractionConfig);
|
|
2114
|
+
const emittedWarnings = /* @__PURE__ */ new Set();
|
|
2115
|
+
for (const [index, preparedEntry] of preparedEntries.entries()) {
|
|
2116
|
+
const embedding = embeddings[index] ?? [];
|
|
2117
|
+
const entry = buildEntry(preparedEntry, embedding);
|
|
2118
|
+
const entryId = await targetDb.insertEntry(entry, embedding, preparedEntry.contentHash);
|
|
2119
|
+
const supersededEntryId = preparedEntry.input.supersedes;
|
|
2120
|
+
if (supersededEntryId) {
|
|
2121
|
+
const superseded = await targetDb.supersedeEntry(supersededEntryId, entryId, "update");
|
|
2122
|
+
if (!superseded) {
|
|
2123
|
+
onWarning?.(`Stored entry ${entryId} but could not supersede ${supersededEntryId} because the target was missing or inactive.`);
|
|
2124
|
+
}
|
|
2125
|
+
}
|
|
2126
|
+
const autoSupersessionPlan = autoSupersessionPlans.get(preparedEntry.inputIndex);
|
|
2127
|
+
if (autoSupersessionPlan?.kind === "link" && autoSupersessionPlan.oldEntryId) {
|
|
2128
|
+
const superseded = await targetDb.supersedeEntry(autoSupersessionPlan.oldEntryId, entryId, "update");
|
|
2129
|
+
if (!superseded) {
|
|
2130
|
+
onWarning?.(
|
|
2131
|
+
`Stored entry ${entryId} with claim_key "${preparedEntry.input.claim_key}" but could not auto-supersede ${autoSupersessionPlan.oldEntryId} because the target was missing or inactive.`
|
|
2132
|
+
);
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
if (autoSupersessionPlan?.warning && !emittedWarnings.has(autoSupersessionPlan.warning)) {
|
|
2136
|
+
emittedWarnings.add(autoSupersessionPlan.warning);
|
|
2137
|
+
onWarning?.(autoSupersessionPlan.warning);
|
|
2138
|
+
}
|
|
2139
|
+
stored += 1;
|
|
2140
|
+
}
|
|
2141
|
+
return stored;
|
|
2142
|
+
};
|
|
2143
|
+
if (hasTransactionSupport(db) && preparedEntries.some((entry) => entry.input.supersedes !== void 0 || entry.input.claim_key !== void 0)) {
|
|
2144
|
+
return db.withTransaction(writeBatch);
|
|
2145
|
+
}
|
|
2146
|
+
if (hasTransactionSupport(db) && preparedEntries.length > 1) {
|
|
2147
|
+
return db.withTransaction(writeBatch);
|
|
2148
|
+
}
|
|
2149
|
+
return writeBatch(db);
|
|
2150
|
+
}
|
|
2151
|
+
function buildEntry(preparedEntry, embedding) {
|
|
2152
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2153
|
+
return {
|
|
2154
|
+
id: randomUUID(),
|
|
2155
|
+
type: preparedEntry.input.type,
|
|
2156
|
+
subject: preparedEntry.input.subject,
|
|
2157
|
+
content: preparedEntry.input.content,
|
|
2158
|
+
importance: preparedEntry.input.importance ?? 7,
|
|
2159
|
+
expiry: preparedEntry.input.expiry ?? "temporary",
|
|
2160
|
+
tags: preparedEntry.input.tags ?? [],
|
|
2161
|
+
source_file: preparedEntry.input.source_file,
|
|
2162
|
+
source_context: preparedEntry.input.source_context,
|
|
2163
|
+
user_id: preparedEntry.input.user_id,
|
|
2164
|
+
project: preparedEntry.input.project,
|
|
2165
|
+
embedding,
|
|
2166
|
+
content_hash: preparedEntry.contentHash,
|
|
2167
|
+
norm_content_hash: preparedEntry.normContentHash,
|
|
2168
|
+
quality_score: 0.5,
|
|
2169
|
+
recall_count: 0,
|
|
2170
|
+
valid_from: preparedEntry.input.valid_from,
|
|
2171
|
+
valid_to: preparedEntry.input.valid_to,
|
|
2172
|
+
claim_key: preparedEntry.input.claim_key,
|
|
2173
|
+
retired: false,
|
|
2174
|
+
created_at: preparedEntry.input.created_at ?? now,
|
|
2175
|
+
updated_at: now
|
|
2176
|
+
};
|
|
2177
|
+
}
|
|
2178
|
+
async function maybeExtractClaimKeys(preparedEntries, options) {
|
|
2179
|
+
const claimExtraction = options.claimExtraction;
|
|
2180
|
+
if (!claimExtraction || preparedEntries.length === 0) {
|
|
2181
|
+
return /* @__PURE__ */ new Map();
|
|
2182
|
+
}
|
|
2183
|
+
try {
|
|
2184
|
+
const extractedEntries = await runBatchClaimExtraction(
|
|
2185
|
+
[
|
|
2186
|
+
{
|
|
2187
|
+
entries: preparedEntries.map((preparedEntry) => preparedEntry.input)
|
|
2188
|
+
}
|
|
2189
|
+
],
|
|
2190
|
+
{
|
|
2191
|
+
createLlm: () => claimExtraction.llm,
|
|
2192
|
+
db: claimExtraction.db
|
|
2193
|
+
},
|
|
2194
|
+
claimExtraction.config,
|
|
2195
|
+
1,
|
|
2196
|
+
options.onWarning
|
|
2197
|
+
);
|
|
2198
|
+
const extractedClaimKeys = /* @__PURE__ */ new Map();
|
|
2199
|
+
for (const preparedEntry of preparedEntries) {
|
|
2200
|
+
const extracted = extractedEntries.get(preparedEntry.input);
|
|
2201
|
+
if (extracted) {
|
|
2202
|
+
extractedClaimKeys.set(preparedEntry.inputIndex, extracted);
|
|
2203
|
+
}
|
|
2204
|
+
}
|
|
2205
|
+
return extractedClaimKeys;
|
|
2206
|
+
} catch (error) {
|
|
2207
|
+
const subject = preparedEntries[0]?.input.subject ?? "batch";
|
|
2208
|
+
options.onWarning?.(`Claim extraction failed for "${subject}": ${formatPipelineError(error)}`);
|
|
2209
|
+
return /* @__PURE__ */ new Map();
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
function hasTransactionSupport(db) {
|
|
2213
|
+
return typeof db.withTransaction === "function";
|
|
2214
|
+
}
|
|
2215
|
+
async function planAutoSupersession(db, preparedEntries, extractedClaimKeys, claimExtractionConfig) {
|
|
2216
|
+
const plans = /* @__PURE__ */ new Map();
|
|
2217
|
+
const preparedEntriesByClaimKey = groupPreparedEntriesByClaimKey(preparedEntries);
|
|
2218
|
+
const siblingCache = /* @__PURE__ */ new Map();
|
|
2219
|
+
for (const preparedEntry of preparedEntries) {
|
|
2220
|
+
const claimKey = preparedEntry.input.claim_key;
|
|
2221
|
+
if (!claimKey || preparedEntry.input.supersedes) {
|
|
2222
|
+
continue;
|
|
2223
|
+
}
|
|
2224
|
+
const siblings = await getClaimKeySiblings(db, siblingCache, claimKey);
|
|
2225
|
+
if (siblings.length === 0) {
|
|
2226
|
+
continue;
|
|
2227
|
+
}
|
|
2228
|
+
const batchSiblingCount = preparedEntriesByClaimKey.get(claimKey)?.length ?? 0;
|
|
2229
|
+
if (batchSiblingCount > 1) {
|
|
2230
|
+
plans.set(preparedEntry.inputIndex, {
|
|
2231
|
+
kind: "skip",
|
|
2232
|
+
warning: `Skipped auto-supersession for claim_key "${claimKey}" because this store batch contains ${batchSiblingCount} entries for the same slot.`
|
|
2233
|
+
});
|
|
2234
|
+
continue;
|
|
2235
|
+
}
|
|
2236
|
+
if (siblings.length > 1) {
|
|
2237
|
+
plans.set(preparedEntry.inputIndex, {
|
|
2238
|
+
kind: "skip",
|
|
2239
|
+
warning: `Skipped auto-supersession for claim_key "${claimKey}" because ${siblings.length} active siblings already exist for that slot.`
|
|
2240
|
+
});
|
|
2241
|
+
continue;
|
|
2242
|
+
}
|
|
2243
|
+
const sibling = siblings[0];
|
|
2244
|
+
if (!sibling) {
|
|
2245
|
+
continue;
|
|
2246
|
+
}
|
|
2247
|
+
if (!isAutoSupersessionEligible(preparedEntry, extractedClaimKeys, claimExtractionConfig)) {
|
|
2248
|
+
plans.set(preparedEntry.inputIndex, {
|
|
2249
|
+
kind: "skip",
|
|
2250
|
+
warning: buildAutoSupersessionEligibilityWarning(preparedEntry, extractedClaimKeys.get(preparedEntry.inputIndex))
|
|
2251
|
+
});
|
|
2252
|
+
continue;
|
|
2253
|
+
}
|
|
2254
|
+
const supersessionValidation = validateSupersessionRules(sibling, {
|
|
2255
|
+
type: preparedEntry.input.type,
|
|
2256
|
+
expiry: preparedEntry.input.expiry ?? "temporary"
|
|
2257
|
+
});
|
|
2258
|
+
if (!supersessionValidation.ok) {
|
|
2259
|
+
plans.set(preparedEntry.inputIndex, {
|
|
2260
|
+
kind: "skip",
|
|
2261
|
+
warning: buildAutoSupersessionRuleWarning(preparedEntry, sibling, supersessionValidation.reason)
|
|
2262
|
+
});
|
|
2263
|
+
continue;
|
|
2264
|
+
}
|
|
2265
|
+
plans.set(preparedEntry.inputIndex, {
|
|
2266
|
+
kind: "link",
|
|
2267
|
+
oldEntryId: sibling.id
|
|
2268
|
+
});
|
|
2269
|
+
}
|
|
2270
|
+
return plans;
|
|
2271
|
+
}
|
|
2272
|
+
function groupPreparedEntriesByClaimKey(preparedEntries) {
|
|
2273
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
2274
|
+
for (const preparedEntry of preparedEntries) {
|
|
2275
|
+
const claimKey = preparedEntry.input.claim_key;
|
|
2276
|
+
if (!claimKey) {
|
|
2277
|
+
continue;
|
|
2278
|
+
}
|
|
2279
|
+
const existing = grouped.get(claimKey) ?? [];
|
|
2280
|
+
existing.push(preparedEntry);
|
|
2281
|
+
grouped.set(claimKey, existing);
|
|
2282
|
+
}
|
|
2283
|
+
return grouped;
|
|
2284
|
+
}
|
|
2285
|
+
async function getClaimKeySiblings(db, cache, claimKey) {
|
|
2286
|
+
const cached = cache.get(claimKey);
|
|
2287
|
+
if (cached) {
|
|
2288
|
+
return cached;
|
|
2289
|
+
}
|
|
2290
|
+
const siblings = await db.findActiveEntriesByClaimKey(claimKey);
|
|
2291
|
+
cache.set(claimKey, siblings);
|
|
2292
|
+
return siblings;
|
|
2293
|
+
}
|
|
2294
|
+
function isAutoSupersessionEligible(preparedEntry, extractedClaimKeys, claimExtractionConfig) {
|
|
2295
|
+
if (preparedEntry.claimKeySource === "manual") {
|
|
2296
|
+
return true;
|
|
2297
|
+
}
|
|
2298
|
+
const extractedClaimKey = extractedClaimKeys.get(preparedEntry.inputIndex);
|
|
2299
|
+
if (!extractedClaimKey || !claimExtractionConfig) {
|
|
2300
|
+
return false;
|
|
2301
|
+
}
|
|
2302
|
+
if (!AUTO_SUPERSESSION_ELIGIBLE_PATHS.has(extractedClaimKey.path)) {
|
|
2303
|
+
return false;
|
|
2304
|
+
}
|
|
2305
|
+
return extractedClaimKey.confidence >= Math.max(claimExtractionConfig.confidenceThreshold, AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE);
|
|
2306
|
+
}
|
|
2307
|
+
function buildAutoSupersessionEligibilityWarning(preparedEntry, extractedClaimKey) {
|
|
2308
|
+
const claimKey = preparedEntry.input.claim_key ?? "(missing)";
|
|
2309
|
+
if (preparedEntry.claimKeySource === "manual") {
|
|
2310
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not eligible for automatic linking.`;
|
|
2311
|
+
}
|
|
2312
|
+
if (extractedClaimKey) {
|
|
2313
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the extracted claim key came from ${extractedClaimKey.path} at confidence ${extractedClaimKey.confidence.toFixed(2)}. Only explicit/manual claim keys or model-extracted keys at ${AUTO_SUPERSESSION_MIN_EXTRACTED_CONFIDENCE.toFixed(2)}+ auto-link.`;
|
|
2314
|
+
}
|
|
2315
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${claimKey}" but skipped auto-supersession because the claim-key provenance was not explicit or a tracked high-confidence extraction.`;
|
|
2316
|
+
}
|
|
2317
|
+
function buildAutoSupersessionRuleWarning(preparedEntry, sibling, reason) {
|
|
2318
|
+
if (reason === "type_mismatch") {
|
|
2319
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${preparedEntry.input.claim_key}" but skipped auto-supersession because the matching active entry is type "${sibling.type}" and the new entry is type "${preparedEntry.input.type}". ${describeSupersessionRuleFailure(reason)}`;
|
|
2320
|
+
}
|
|
2321
|
+
return `Stored entry "${preparedEntry.input.subject}" with claim_key "${preparedEntry.input.claim_key}" but skipped auto-supersession: ${describeSupersessionRuleFailure(reason)}`;
|
|
2322
|
+
}
|
|
2323
|
+
async function buildStorePlan(inputs, db) {
|
|
2324
|
+
const validation = validateEntriesWithIndexes(inputs);
|
|
2325
|
+
const details = validation.rejectedInputIndexes.map((inputIndex) => ({
|
|
2326
|
+
inputIndex,
|
|
2327
|
+
outcome: "rejected",
|
|
2328
|
+
reason: "validation"
|
|
2329
|
+
}));
|
|
2330
|
+
const preparedEntries = validation.valid.map(({ input, inputIndex }) => ({
|
|
2331
|
+
input,
|
|
2332
|
+
inputIndex,
|
|
2333
|
+
contentHash: computeContentHash(input.content, input.source_file),
|
|
2334
|
+
normContentHash: computeNormContentHash(input.content),
|
|
2335
|
+
claimKeySource: input.claim_key ? "manual" : void 0
|
|
2336
|
+
}));
|
|
2337
|
+
const afterBatchContentHash = dedupePreparedEntries(preparedEntries, "contentHash", "content_hash", details);
|
|
2338
|
+
const existingHashes = await db.findExistingHashes(afterBatchContentHash.map((entry) => entry.contentHash));
|
|
2339
|
+
const afterExistingContentHash = filterExistingPreparedEntries(afterBatchContentHash, existingHashes, "contentHash", "content_hash", details);
|
|
2340
|
+
const afterBatchNormHash = dedupePreparedEntries(afterExistingContentHash, "normContentHash", "norm_content_hash", details);
|
|
2341
|
+
const existingNormHashes = await db.findExistingNormHashes(afterBatchNormHash.map((entry) => entry.normContentHash));
|
|
2342
|
+
const pendingEntries = filterExistingPreparedEntries(afterBatchNormHash, existingNormHashes, "normContentHash", "norm_content_hash", details);
|
|
2343
|
+
return {
|
|
2344
|
+
pendingEntries,
|
|
2345
|
+
skipped: details.filter((detail) => detail.outcome === "skipped").length,
|
|
2346
|
+
rejected: validation.rejected,
|
|
2347
|
+
details,
|
|
2348
|
+
warnings: validation.warnings
|
|
2349
|
+
};
|
|
2350
|
+
}
|
|
2351
|
+
function dedupePreparedEntries(entries, field, reason, details) {
|
|
2352
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2353
|
+
const deduped = [];
|
|
2354
|
+
for (const entry of entries) {
|
|
2355
|
+
const key = entry[field];
|
|
2356
|
+
if (seen.has(key)) {
|
|
2357
|
+
details.push({
|
|
2358
|
+
inputIndex: entry.inputIndex,
|
|
2359
|
+
outcome: "skipped",
|
|
2360
|
+
reason
|
|
2361
|
+
});
|
|
2362
|
+
continue;
|
|
2363
|
+
}
|
|
2364
|
+
seen.add(key);
|
|
2365
|
+
deduped.push(entry);
|
|
2366
|
+
}
|
|
2367
|
+
return deduped;
|
|
2368
|
+
}
|
|
2369
|
+
function filterExistingPreparedEntries(entries, existing, field, reason, details) {
|
|
2370
|
+
return entries.filter((entry) => {
|
|
2371
|
+
if (!existing.has(entry[field])) {
|
|
2372
|
+
return true;
|
|
2373
|
+
}
|
|
2374
|
+
details.push({
|
|
2375
|
+
inputIndex: entry.inputIndex,
|
|
2376
|
+
outcome: "skipped",
|
|
2377
|
+
reason
|
|
2378
|
+
});
|
|
2379
|
+
return false;
|
|
2380
|
+
});
|
|
2381
|
+
}
|
|
2382
|
+
function formatPipelineError(error) {
|
|
2383
|
+
if (error instanceof Error) {
|
|
2384
|
+
return error.message;
|
|
2385
|
+
}
|
|
2386
|
+
return String(error);
|
|
2387
|
+
}
|
|
2388
|
+
function sortStoreDetails(details) {
|
|
2389
|
+
return [...details].sort((left, right) => left.inputIndex - right.inputIndex);
|
|
2390
|
+
}
|
|
2391
|
+
|
|
2392
|
+
// src/core/episode/summary-prompt.ts
|
|
2393
|
+
var EPISODE_SUMMARY_SYSTEM_PROMPT = [
|
|
2394
|
+
"You write strict JSON episode summaries for historical recall.",
|
|
2395
|
+
"The transcript can be about any topic - technical work, casual conversation, planning, research, creative projects, life events, or anything else.",
|
|
2396
|
+
"Do not assume any particular domain.",
|
|
2397
|
+
"Describe only what happened in this session.",
|
|
2398
|
+
"Do not carry inherited context or open loops forward unless the session actively worked on them.",
|
|
2399
|
+
"Return exactly one JSON object with this shape:",
|
|
2400
|
+
'{ "summary": string, "tags": string[], "activityLevel": "substantial" | "minimal" | "none", "project": string | null }',
|
|
2401
|
+
"Requirements:",
|
|
2402
|
+
"- summary must be 100 to 300 words in plain prose (roughly 4 to 10 sentences)",
|
|
2403
|
+
"- describe what was discussed, decided, or accomplished - not a turn-by-turn replay",
|
|
2404
|
+
"- this is a narrative overview for historical recall, not a verbatim record",
|
|
2405
|
+
"- preserve concrete details worth remembering: names, places, dates, specific decisions, key topics, and notable specifics that would help someone recall this session months later",
|
|
2406
|
+
"- tags must be 3 to 8 short lowercase anchors drawn from the actual session content",
|
|
2407
|
+
"- project should be null when no clear project scope appears",
|
|
2408
|
+
"- activityLevel: use substantial when meaningful discussion or work occurred, minimal when the session was brief or lightweight, none when essentially nothing happened",
|
|
2409
|
+
"- do not include Markdown fences or extra commentary"
|
|
2410
|
+
].join("\n");
|
|
2411
|
+
function buildEpisodeSummaryPrompt(transcript) {
|
|
2412
|
+
return [
|
|
2413
|
+
"Produce a historical episodic summary for this completed session.",
|
|
2414
|
+
"Describe what was discussed, decided, or accomplished during this transcript window.",
|
|
2415
|
+
"",
|
|
2416
|
+
"Transcript:",
|
|
2417
|
+
transcript
|
|
2418
|
+
].join("\n");
|
|
2419
|
+
}
|
|
2420
|
+
function parseEpisodeSummaryResponse(value) {
|
|
2421
|
+
const parsed = parseJsonObject(value);
|
|
2422
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
2423
|
+
return null;
|
|
2424
|
+
}
|
|
2425
|
+
const parsedRecord = parsed;
|
|
2426
|
+
const summary = normalizeSummary(parsedRecord.summary);
|
|
2427
|
+
const activityLevel = normalizeActivityLevel(parsedRecord.activityLevel);
|
|
2428
|
+
if (!summary || !activityLevel) {
|
|
2429
|
+
return null;
|
|
2430
|
+
}
|
|
2431
|
+
return {
|
|
2432
|
+
summary,
|
|
2433
|
+
tags: normalizeTags2(parsedRecord.tags),
|
|
2434
|
+
activityLevel,
|
|
2435
|
+
...normalizeProject(parsedRecord.project) ? { project: normalizeProject(parsedRecord.project) } : {}
|
|
2436
|
+
};
|
|
2437
|
+
}
|
|
2438
|
+
function normalizeSummary(value) {
|
|
2439
|
+
if (typeof value !== "string") {
|
|
2440
|
+
return null;
|
|
2441
|
+
}
|
|
2442
|
+
const normalized = value.replace(/\s+/gu, " ").trim();
|
|
2443
|
+
return normalized ? normalized : null;
|
|
2444
|
+
}
|
|
2445
|
+
function normalizeActivityLevel(value) {
|
|
2446
|
+
if (typeof value !== "string") {
|
|
2447
|
+
return null;
|
|
2448
|
+
}
|
|
2449
|
+
const normalized = value.trim().toLowerCase();
|
|
2450
|
+
return EPISODE_ACTIVITY_LEVELS.includes(normalized) ? normalized : null;
|
|
2451
|
+
}
|
|
2452
|
+
function normalizeTags2(value) {
|
|
2453
|
+
if (!Array.isArray(value)) {
|
|
2454
|
+
return [];
|
|
2455
|
+
}
|
|
2456
|
+
return Array.from(
|
|
2457
|
+
new Set(
|
|
2458
|
+
value.filter((tag) => typeof tag === "string").map((tag) => tag.trim().toLowerCase()).filter((tag) => tag.length > 0)
|
|
2459
|
+
)
|
|
2460
|
+
).slice(0, 8);
|
|
2461
|
+
}
|
|
2462
|
+
function normalizeProject(value) {
|
|
2463
|
+
if (typeof value !== "string") {
|
|
2464
|
+
return void 0;
|
|
2465
|
+
}
|
|
2466
|
+
const normalized = value.replace(/\s+/gu, " ").trim();
|
|
2467
|
+
return normalized ? normalized : void 0;
|
|
2468
|
+
}
|
|
2469
|
+
function parseJsonObject(value) {
|
|
2470
|
+
const candidates = collectJsonCandidates(value);
|
|
2471
|
+
for (const candidate of candidates) {
|
|
2472
|
+
try {
|
|
2473
|
+
return JSON.parse(candidate);
|
|
2474
|
+
} catch {
|
|
2475
|
+
continue;
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
return null;
|
|
2479
|
+
}
|
|
2480
|
+
function collectJsonCandidates(value) {
|
|
2481
|
+
const trimmed = value.trim();
|
|
2482
|
+
const candidates = /* @__PURE__ */ new Set();
|
|
2483
|
+
if (trimmed) {
|
|
2484
|
+
candidates.add(trimmed);
|
|
2485
|
+
}
|
|
2486
|
+
const fencedMatches = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/giu) ?? [];
|
|
2487
|
+
for (const match of fencedMatches) {
|
|
2488
|
+
const normalized = match.replace(/```(?:json)?/iu, "").replace(/```/gu, "").trim();
|
|
2489
|
+
if (normalized) {
|
|
2490
|
+
candidates.add(normalized);
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
const objectStart = trimmed.indexOf("{");
|
|
2494
|
+
const objectEnd = trimmed.lastIndexOf("}");
|
|
2495
|
+
if (objectStart >= 0 && objectEnd > objectStart) {
|
|
2496
|
+
candidates.add(trimmed.slice(objectStart, objectEnd + 1));
|
|
2497
|
+
}
|
|
2498
|
+
return [...candidates];
|
|
2499
|
+
}
|
|
2500
|
+
|
|
2501
|
+
// src/core/episode/summary-generator.ts
|
|
2502
|
+
async function generateEpisodeSummary(transcript, llm) {
|
|
2503
|
+
const response = await llm.complete(EPISODE_SUMMARY_SYSTEM_PROMPT, buildEpisodeSummaryPrompt(transcript));
|
|
2504
|
+
return parseEpisodeSummaryResponse(response);
|
|
2505
|
+
}
|
|
2506
|
+
|
|
2507
|
+
// src/app/episode-ingest/service/preflight.ts
|
|
2508
|
+
import path from "path";
|
|
2509
|
+
|
|
2510
|
+
// src/core/episode/transcript-render.ts
|
|
2511
|
+
var MIN_EPISODE_MESSAGES = 4;
|
|
2512
|
+
var MAX_EPISODE_TRANSCRIPT_CHARS = 14e3;
|
|
2513
|
+
function renderTranscript(messages) {
|
|
2514
|
+
return messages.map((message) => `${message.role === "user" ? "User" : "Assistant"}: ${message.text.trim()}`).join("\n");
|
|
2515
|
+
}
|
|
2516
|
+
function capEpisodeTranscript(transcript, maxChars) {
|
|
2517
|
+
if (transcript.length <= maxChars) {
|
|
2518
|
+
return transcript;
|
|
2519
|
+
}
|
|
2520
|
+
const omissionMarker = "\n\n[Earlier middle transcript omitted for brevity]\n\n";
|
|
2521
|
+
const headBudget = Math.max(0, Math.floor((maxChars - omissionMarker.length) * 0.35));
|
|
2522
|
+
const tailBudget = Math.max(0, maxChars - omissionMarker.length - headBudget);
|
|
2523
|
+
const head = trimToBoundary(transcript.slice(0, headBudget), false);
|
|
2524
|
+
const tail = trimToBoundary(transcript.slice(-tailBudget), true);
|
|
2525
|
+
return `${head}${omissionMarker}${tail}`.trim();
|
|
2526
|
+
}
|
|
2527
|
+
function trimToBoundary(value, fromStart) {
|
|
2528
|
+
if (value.length === 0) {
|
|
2529
|
+
return value;
|
|
2530
|
+
}
|
|
2531
|
+
if (fromStart) {
|
|
2532
|
+
const boundary = value.search(/\s/u);
|
|
2533
|
+
return boundary >= 0 ? value.slice(boundary).trimStart() : value.trim();
|
|
2534
|
+
}
|
|
2535
|
+
const reversedBoundary = value.trimEnd().search(/\s\S*$/u);
|
|
2536
|
+
return reversedBoundary >= 0 ? value.slice(0, reversedBoundary).trimEnd() : value.trim();
|
|
2537
|
+
}
|
|
2538
|
+
|
|
2539
|
+
// src/app/episode-ingest/service/shared.ts
|
|
2540
|
+
var CHARS_PER_TOKEN_ESTIMATE = 4;
|
|
2541
|
+
function estimateInputTokens(renderedTranscript) {
|
|
2542
|
+
return Math.max(1, Math.ceil(renderedTranscript.length / CHARS_PER_TOKEN_ESTIMATE));
|
|
2543
|
+
}
|
|
2544
|
+
function estimateEpisodeSummaryInputTokens(renderedTranscript) {
|
|
2545
|
+
return estimateInputTokens(EPISODE_SUMMARY_SYSTEM_PROMPT) + estimateInputTokens(buildEpisodeSummaryPrompt(renderedTranscript));
|
|
2546
|
+
}
|
|
2547
|
+
async function embedEpisodeSummary(summary, ports) {
|
|
2548
|
+
if (ports.embedSummary) {
|
|
2549
|
+
try {
|
|
2550
|
+
return normalizeEmbeddingVector(await ports.embedSummary(summary));
|
|
2551
|
+
} catch {
|
|
2552
|
+
return void 0;
|
|
2553
|
+
}
|
|
2554
|
+
}
|
|
2555
|
+
return embedEpisodeSummaryWithPort(summary, ports.embedding);
|
|
2556
|
+
}
|
|
2557
|
+
async function embedEpisodeSummaryWithPort(summary, embeddingPort) {
|
|
2558
|
+
if (!embeddingPort) {
|
|
2559
|
+
return void 0;
|
|
2560
|
+
}
|
|
2561
|
+
try {
|
|
2562
|
+
const vectors = await embeddingPort.embed([summary]);
|
|
2563
|
+
return normalizeEmbeddingVector(vectors[0]);
|
|
2564
|
+
} catch {
|
|
2565
|
+
return void 0;
|
|
2566
|
+
}
|
|
2567
|
+
}
|
|
2568
|
+
function parseCandidateEndedAt(endedAt) {
|
|
2569
|
+
if (!endedAt) {
|
|
2570
|
+
return void 0;
|
|
2571
|
+
}
|
|
2572
|
+
const parsed = new Date(endedAt);
|
|
2573
|
+
return Number.isNaN(parsed.getTime()) ? void 0 : parsed;
|
|
2574
|
+
}
|
|
2575
|
+
function createSerializedExecutor() {
|
|
2576
|
+
let pending = Promise.resolve();
|
|
2577
|
+
return async (task) => {
|
|
2578
|
+
const current = pending.then(task, task);
|
|
2579
|
+
pending = current.then(
|
|
2580
|
+
() => void 0,
|
|
2581
|
+
() => void 0
|
|
2582
|
+
);
|
|
2583
|
+
return current;
|
|
2584
|
+
};
|
|
2585
|
+
}
|
|
2586
|
+
function createEmptyUsageStats() {
|
|
2587
|
+
return {
|
|
2588
|
+
calls: 0,
|
|
2589
|
+
inputTokens: 0,
|
|
2590
|
+
outputTokens: 0,
|
|
2591
|
+
cacheReadTokens: 0,
|
|
2592
|
+
cacheWriteTokens: 0,
|
|
2593
|
+
totalTokens: 0,
|
|
2594
|
+
totalCost: 0
|
|
2595
|
+
};
|
|
2596
|
+
}
|
|
2597
|
+
function cloneUsageStats(usage) {
|
|
2598
|
+
return {
|
|
2599
|
+
calls: usage.calls,
|
|
2600
|
+
inputTokens: usage.inputTokens,
|
|
2601
|
+
outputTokens: usage.outputTokens,
|
|
2602
|
+
cacheReadTokens: usage.cacheReadTokens,
|
|
2603
|
+
cacheWriteTokens: usage.cacheWriteTokens,
|
|
2604
|
+
totalTokens: usage.totalTokens,
|
|
2605
|
+
totalCost: usage.totalCost
|
|
2606
|
+
};
|
|
2607
|
+
}
|
|
2608
|
+
function addUsageStats(total, usage) {
|
|
2609
|
+
total.calls += usage.calls;
|
|
2610
|
+
total.inputTokens += usage.inputTokens;
|
|
2611
|
+
total.outputTokens += usage.outputTokens;
|
|
2612
|
+
total.cacheReadTokens += usage.cacheReadTokens;
|
|
2613
|
+
total.cacheWriteTokens += usage.cacheWriteTokens;
|
|
2614
|
+
total.totalTokens += usage.totalTokens;
|
|
2615
|
+
total.totalCost += usage.totalCost;
|
|
2616
|
+
return total;
|
|
2617
|
+
}
|
|
2618
|
+
function trimOptionalString(value) {
|
|
2619
|
+
const trimmed = value?.trim();
|
|
2620
|
+
return trimmed ? trimmed : void 0;
|
|
2621
|
+
}
|
|
2622
|
+
function formatExecutionError(error) {
|
|
2623
|
+
if (error instanceof Error) {
|
|
2624
|
+
return error.message || error.name;
|
|
2625
|
+
}
|
|
2626
|
+
return String(error);
|
|
2627
|
+
}
|
|
2628
|
+
function compareCandidatesByEndedAt(left, right) {
|
|
2629
|
+
const leftTime = left.endedAt ? new Date(left.endedAt).getTime() : Number.NEGATIVE_INFINITY;
|
|
2630
|
+
const rightTime = right.endedAt ? new Date(right.endedAt).getTime() : Number.NEGATIVE_INFINITY;
|
|
2631
|
+
if (leftTime !== rightTime) {
|
|
2632
|
+
return rightTime - leftTime;
|
|
2633
|
+
}
|
|
2634
|
+
return left.filePath.localeCompare(right.filePath);
|
|
2635
|
+
}
|
|
2636
|
+
function normalizeEmbeddingVector(vector) {
|
|
2637
|
+
const normalized = vector?.map((value) => Number.isFinite(value) ? value : 0);
|
|
2638
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
2639
|
+
}
|
|
2640
|
+
|
|
2641
|
+
// src/app/episode-ingest/service/preflight.ts
|
|
2642
|
+
var ACTIVE_SESSION_WINDOW_MS = 5 * 60 * 1e3;
|
|
2643
|
+
async function prepareEpisodeIngest(targetPath, ports, options = {}) {
|
|
2644
|
+
const files = await ports.files.discoverFiles(targetPath);
|
|
2645
|
+
if (files.length === 0) {
|
|
2646
|
+
return createEmptyPreflightResult();
|
|
2647
|
+
}
|
|
2648
|
+
if (ports.sessionRegistry) {
|
|
2649
|
+
await ports.sessionRegistry.listSessions();
|
|
2650
|
+
}
|
|
2651
|
+
const requestedPreflightConcurrency = options.preflightConcurrency ?? 20;
|
|
2652
|
+
const preflightConcurrency = Number.isFinite(requestedPreflightConcurrency) ? Math.max(1, Math.trunc(requestedPreflightConcurrency)) : 20;
|
|
2653
|
+
const workerCount = Math.min(preflightConcurrency, files.length);
|
|
2654
|
+
const skippedByIndex = new Array(files.length);
|
|
2655
|
+
const invalidByIndex = new Array(files.length);
|
|
2656
|
+
const candidatesByIndex = new Array(files.length);
|
|
2657
|
+
const referenceNow = options.now ?? /* @__PURE__ */ new Date();
|
|
2658
|
+
let nextIndex = 0;
|
|
2659
|
+
let completed = 0;
|
|
2660
|
+
await Promise.all(
|
|
2661
|
+
Array.from({ length: workerCount }, async () => {
|
|
2662
|
+
while (true) {
|
|
2663
|
+
const currentIndex = nextIndex;
|
|
2664
|
+
nextIndex += 1;
|
|
2665
|
+
if (currentIndex >= files.length) {
|
|
2666
|
+
return;
|
|
2667
|
+
}
|
|
2668
|
+
const filePath = files[currentIndex];
|
|
2669
|
+
if (!filePath) {
|
|
2670
|
+
return;
|
|
2671
|
+
}
|
|
2672
|
+
const result = await classifyPreflightTranscript(filePath, ports, {
|
|
2673
|
+
referenceNow,
|
|
2674
|
+
regenerate: options.regenerate === true
|
|
2675
|
+
});
|
|
2676
|
+
if (result.kind === "candidate") {
|
|
2677
|
+
candidatesByIndex[currentIndex] = result.value;
|
|
2678
|
+
} else if (result.kind === "skipped") {
|
|
2679
|
+
skippedByIndex[currentIndex] = result.value;
|
|
2680
|
+
} else {
|
|
2681
|
+
invalidByIndex[currentIndex] = result.value;
|
|
2682
|
+
}
|
|
2683
|
+
completed += 1;
|
|
2684
|
+
options.onPreflightProgress?.(completed, files.length);
|
|
2685
|
+
}
|
|
2686
|
+
})
|
|
2687
|
+
);
|
|
2688
|
+
const skipped = skippedByIndex.flatMap((entry) => entry ? [entry] : []);
|
|
2689
|
+
const invalid = invalidByIndex.flatMap((entry) => entry ? [entry] : []);
|
|
2690
|
+
const candidates = candidatesByIndex.flatMap((entry) => entry ? [entry] : []);
|
|
2691
|
+
candidates.sort(compareCandidatesByEndedAt);
|
|
2692
|
+
return {
|
|
2693
|
+
files,
|
|
2694
|
+
candidates,
|
|
2695
|
+
skipped,
|
|
2696
|
+
invalid,
|
|
2697
|
+
totals: {
|
|
2698
|
+
discovered: files.length,
|
|
2699
|
+
candidates: candidates.length,
|
|
2700
|
+
skipped: skipped.length,
|
|
2701
|
+
invalid: invalid.length,
|
|
2702
|
+
skippedShort: skipped.filter((entry) => entry.reason === "skipped_short").length,
|
|
2703
|
+
skippedActive: skipped.filter((entry) => entry.reason === "skipped_active").length,
|
|
2704
|
+
skippedExists: skipped.filter((entry) => entry.reason === "skipped_exists").length
|
|
2705
|
+
}
|
|
2706
|
+
};
|
|
2707
|
+
}
|
|
2708
|
+
async function classifyPreflightTranscript(filePath, ports, options) {
|
|
2709
|
+
const parsedTranscript = await ports.transcript.parseFile(filePath);
|
|
2710
|
+
const cleanedMessages = parsedTranscript.messages.filter((message) => message.text.trim().length > 0);
|
|
2711
|
+
const parsedSessionId = parsedTranscript.metadata.sessionId?.trim() || void 0;
|
|
2712
|
+
const registryMeta = parsedSessionId ? await ports.sessionRegistry?.getSessionMeta(parsedSessionId) : void 0;
|
|
2713
|
+
const reconstructedMeta = registryMeta ? void 0 : {
|
|
2714
|
+
surface: parsedTranscript.metadata.reconstructedSurface ?? null,
|
|
2715
|
+
metadataSource: parsedTranscript.metadata.surfaceReconstructionSource ?? "none"
|
|
2716
|
+
};
|
|
2717
|
+
const resolvedMeta = resolveSessionMeta(filePath, parsedSessionId, registryMeta, reconstructedMeta);
|
|
2718
|
+
if (!resolvedMeta.sessionId && cleanedMessages.length === 0) {
|
|
2719
|
+
return {
|
|
2720
|
+
kind: "invalid",
|
|
2721
|
+
value: {
|
|
2722
|
+
filePath,
|
|
2723
|
+
sessionId: void 0,
|
|
2724
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
2725
|
+
messageCount: 0,
|
|
2726
|
+
metadataSource: resolvedMeta.metadataSource
|
|
2727
|
+
}
|
|
2728
|
+
};
|
|
2729
|
+
}
|
|
2730
|
+
const existingEpisode = await findExistingEpisode(ports, resolvedMeta.sessionId, parsedTranscript.metadata.transcriptHash);
|
|
2731
|
+
if (existingEpisode && options.regenerate !== true) {
|
|
2732
|
+
return {
|
|
2733
|
+
kind: "skipped",
|
|
2734
|
+
value: {
|
|
2735
|
+
filePath,
|
|
2736
|
+
reason: "skipped_exists",
|
|
2737
|
+
sessionId: resolvedMeta.sessionId,
|
|
2738
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
2739
|
+
messageCount: cleanedMessages.length,
|
|
2740
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
2741
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
2742
|
+
agentId: resolvedMeta.agentId,
|
|
2743
|
+
surface: resolvedMeta.surface,
|
|
2744
|
+
metadataSource: resolvedMeta.metadataSource,
|
|
2745
|
+
existingEpisode
|
|
2746
|
+
}
|
|
2747
|
+
};
|
|
2748
|
+
}
|
|
2749
|
+
if (cleanedMessages.length < MIN_EPISODE_MESSAGES) {
|
|
2750
|
+
return {
|
|
2751
|
+
kind: "skipped",
|
|
2752
|
+
value: {
|
|
2753
|
+
filePath,
|
|
2754
|
+
reason: "skipped_short",
|
|
2755
|
+
sessionId: resolvedMeta.sessionId,
|
|
2756
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
2757
|
+
messageCount: cleanedMessages.length,
|
|
2758
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
2759
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
2760
|
+
agentId: resolvedMeta.agentId,
|
|
2761
|
+
surface: resolvedMeta.surface,
|
|
2762
|
+
metadataSource: resolvedMeta.metadataSource
|
|
2763
|
+
}
|
|
2764
|
+
};
|
|
2765
|
+
}
|
|
2766
|
+
if (options.skipActiveSessionCheck !== true && isActiveSession(parsedTranscript.metadata.endedAt, options.referenceNow)) {
|
|
2767
|
+
return {
|
|
2768
|
+
kind: "skipped",
|
|
2769
|
+
value: {
|
|
2770
|
+
filePath,
|
|
2771
|
+
reason: "skipped_active",
|
|
2772
|
+
sessionId: resolvedMeta.sessionId,
|
|
2773
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
2774
|
+
messageCount: cleanedMessages.length,
|
|
2775
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
2776
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
2777
|
+
agentId: resolvedMeta.agentId,
|
|
2778
|
+
surface: resolvedMeta.surface,
|
|
2779
|
+
metadataSource: resolvedMeta.metadataSource
|
|
2780
|
+
}
|
|
2781
|
+
};
|
|
2782
|
+
}
|
|
2783
|
+
const renderedTranscript = capEpisodeTranscript(renderTranscript(cleanedMessages), MAX_EPISODE_TRANSCRIPT_CHARS);
|
|
2784
|
+
return {
|
|
2785
|
+
kind: "candidate",
|
|
2786
|
+
value: {
|
|
2787
|
+
filePath,
|
|
2788
|
+
sessionId: resolvedMeta.sessionId,
|
|
2789
|
+
sourceRef: resolvedMeta.sourceRef,
|
|
2790
|
+
transcriptHash: parsedTranscript.metadata.transcriptHash,
|
|
2791
|
+
startedAt: parsedTranscript.metadata.startedAt,
|
|
2792
|
+
endedAt: parsedTranscript.metadata.endedAt,
|
|
2793
|
+
messageCount: cleanedMessages.length,
|
|
2794
|
+
agentId: resolvedMeta.agentId,
|
|
2795
|
+
surface: resolvedMeta.surface,
|
|
2796
|
+
metadataSource: resolvedMeta.metadataSource,
|
|
2797
|
+
renderedTranscript,
|
|
2798
|
+
estimatedInputTokens: estimateInputTokens(renderedTranscript),
|
|
2799
|
+
...existingEpisode ? { existingEpisode } : {}
|
|
2800
|
+
}
|
|
2801
|
+
};
|
|
2802
|
+
}
|
|
2803
|
+
function createEmptyPreflightResult() {
|
|
2804
|
+
return {
|
|
2805
|
+
files: [],
|
|
2806
|
+
candidates: [],
|
|
2807
|
+
skipped: [],
|
|
2808
|
+
invalid: [],
|
|
2809
|
+
totals: {
|
|
2810
|
+
discovered: 0,
|
|
2811
|
+
candidates: 0,
|
|
2812
|
+
skipped: 0,
|
|
2813
|
+
invalid: 0,
|
|
2814
|
+
skippedShort: 0,
|
|
2815
|
+
skippedActive: 0,
|
|
2816
|
+
skippedExists: 0
|
|
2817
|
+
}
|
|
2818
|
+
};
|
|
2819
|
+
}
|
|
2820
|
+
function resolveSessionMeta(filePath, parsedSessionId, registryMeta, reconstructedMeta) {
|
|
2821
|
+
if (registryMeta) {
|
|
2822
|
+
return {
|
|
2823
|
+
sessionId: parsedSessionId ?? registryMeta.sessionId,
|
|
2824
|
+
sourceRef: registryMeta.sourceRef,
|
|
2825
|
+
agentId: registryMeta.agentId,
|
|
2826
|
+
surface: registryMeta.surface,
|
|
2827
|
+
metadataSource: "registry"
|
|
2828
|
+
};
|
|
2829
|
+
}
|
|
2830
|
+
return {
|
|
2831
|
+
sessionId: parsedSessionId,
|
|
2832
|
+
sourceRef: filePath,
|
|
2833
|
+
agentId: deriveAgentIdFromPath(filePath),
|
|
2834
|
+
surface: reconstructedMeta?.surface ?? null,
|
|
2835
|
+
metadataSource: reconstructedMeta?.metadataSource ?? "none"
|
|
2836
|
+
};
|
|
2837
|
+
}
|
|
2838
|
+
function deriveAgentIdFromPath(filePath) {
|
|
2839
|
+
const resolved = path.resolve(filePath);
|
|
2840
|
+
const parent = path.basename(path.dirname(resolved));
|
|
2841
|
+
const grandparent = path.basename(path.dirname(path.dirname(resolved)));
|
|
2842
|
+
if (parent !== "sessions") {
|
|
2843
|
+
return null;
|
|
2844
|
+
}
|
|
2845
|
+
const candidate = grandparent.trim();
|
|
2846
|
+
if (!candidate || candidate === "." || candidate === "/") {
|
|
2847
|
+
return null;
|
|
2848
|
+
}
|
|
2849
|
+
return candidate;
|
|
2850
|
+
}
|
|
2851
|
+
function isActiveSession(endedAt, now) {
|
|
2852
|
+
if (!endedAt) {
|
|
2853
|
+
return false;
|
|
2854
|
+
}
|
|
2855
|
+
const endedAtDate = new Date(endedAt);
|
|
2856
|
+
if (Number.isNaN(endedAtDate.getTime())) {
|
|
2857
|
+
return false;
|
|
2858
|
+
}
|
|
2859
|
+
return endedAtDate.getTime() > now.getTime() - ACTIVE_SESSION_WINDOW_MS;
|
|
2860
|
+
}
|
|
2861
|
+
async function findExistingEpisode(ports, sessionId, transcriptHash) {
|
|
2862
|
+
const bySourceId = sessionId ? await ports.episodes.getEpisodeBySourceId("openclaw", sessionId) : null;
|
|
2863
|
+
if (bySourceId) {
|
|
2864
|
+
return bySourceId;
|
|
2865
|
+
}
|
|
2866
|
+
return ports.episodes.getEpisodeByTranscriptHash("openclaw", transcriptHash);
|
|
2867
|
+
}
|
|
2868
|
+
|
|
2869
|
+
// src/app/episode-ingest/service/execute.ts
|
|
2870
|
+
async function ingestEpisodeTranscript(filePath, ports, options) {
|
|
2871
|
+
const createSummaryLlm = ports.createSummaryLlm;
|
|
2872
|
+
if (!createSummaryLlm) {
|
|
2873
|
+
throw new Error("Episode transcript ingest requires createSummaryLlm().");
|
|
2874
|
+
}
|
|
2875
|
+
const classification = await classifyPreflightTranscript(filePath, ports, {
|
|
2876
|
+
referenceNow: options.now ?? /* @__PURE__ */ new Date(),
|
|
2877
|
+
regenerate: options.regenerate === true,
|
|
2878
|
+
skipActiveSessionCheck: options.skipActiveSessionCheck === true
|
|
2879
|
+
});
|
|
2880
|
+
if (classification.kind === "skipped") {
|
|
2881
|
+
return {
|
|
2882
|
+
kind: "skipped",
|
|
2883
|
+
skipped: classification.value
|
|
2884
|
+
};
|
|
2885
|
+
}
|
|
2886
|
+
if (classification.kind === "invalid") {
|
|
2887
|
+
return {
|
|
2888
|
+
kind: "invalid",
|
|
2889
|
+
invalid: classification.value
|
|
2890
|
+
};
|
|
2891
|
+
}
|
|
2892
|
+
const candidate = applyCandidateOverrides(classification.value, options.candidateOverrides);
|
|
2893
|
+
const session = await executeEpisodeCandidate(candidate, createSummaryLlm, ports, options.genVersion, async (task) => task());
|
|
2894
|
+
return {
|
|
2895
|
+
kind: "executed",
|
|
2896
|
+
candidate,
|
|
2897
|
+
session
|
|
2898
|
+
};
|
|
2899
|
+
}
|
|
2900
|
+
async function executeEpisodeIngestPlan(plan, ports, options) {
|
|
2901
|
+
const createSummaryLlm = ports.createSummaryLlm;
|
|
2902
|
+
if (!createSummaryLlm) {
|
|
2903
|
+
throw new Error("Episode ingest execution requires createSummaryLlm().");
|
|
2904
|
+
}
|
|
2905
|
+
if (!Number.isFinite(options.concurrency) || Math.trunc(options.concurrency) <= 0) {
|
|
2906
|
+
throw new Error(`Episode ingest concurrency must be a positive integer. Received: ${options.concurrency}.`);
|
|
2907
|
+
}
|
|
2908
|
+
if (plan.candidates.length === 0) {
|
|
2909
|
+
return {
|
|
2910
|
+
sessions: [],
|
|
2911
|
+
usage: createEmptyUsageStats(),
|
|
2912
|
+
modelRef: plan.model.modelRef,
|
|
2913
|
+
totals: {
|
|
2914
|
+
attempted: 0,
|
|
2915
|
+
written: 0,
|
|
2916
|
+
updated: 0,
|
|
2917
|
+
unchanged: 0,
|
|
2918
|
+
failed: 0
|
|
2919
|
+
}
|
|
2920
|
+
};
|
|
2921
|
+
}
|
|
2922
|
+
const results = new Array(plan.candidates.length);
|
|
2923
|
+
let nextIndex = 0;
|
|
2924
|
+
let completed = 0;
|
|
2925
|
+
const workerCount = Math.min(Math.trunc(options.concurrency), plan.candidates.length);
|
|
2926
|
+
const runSerializedWrite = createSerializedExecutor();
|
|
2927
|
+
await Promise.all(
|
|
2928
|
+
Array.from({ length: workerCount }, async () => {
|
|
2929
|
+
while (true) {
|
|
2930
|
+
const currentIndex = nextIndex;
|
|
2931
|
+
nextIndex += 1;
|
|
2932
|
+
if (currentIndex >= plan.candidates.length) {
|
|
2933
|
+
return;
|
|
2934
|
+
}
|
|
2935
|
+
const candidate = plan.candidates[currentIndex];
|
|
2936
|
+
if (!candidate) {
|
|
2937
|
+
return;
|
|
2938
|
+
}
|
|
2939
|
+
const result = await executeEpisodeCandidate(candidate, createSummaryLlm, ports, options.genVersion, runSerializedWrite);
|
|
2940
|
+
results[currentIndex] = result;
|
|
2941
|
+
completed += 1;
|
|
2942
|
+
options.onProgress?.(completed, plan.candidates.length, result);
|
|
2943
|
+
}
|
|
2944
|
+
})
|
|
2945
|
+
);
|
|
2946
|
+
const usage = results.reduce((total, result) => addUsageStats(total, result.usage), createEmptyUsageStats());
|
|
2947
|
+
return {
|
|
2948
|
+
sessions: results,
|
|
2949
|
+
usage,
|
|
2950
|
+
modelRef: plan.model.modelRef,
|
|
2951
|
+
totals: {
|
|
2952
|
+
attempted: results.length,
|
|
2953
|
+
written: results.filter((result) => result.action === "written").length,
|
|
2954
|
+
updated: results.filter((result) => result.action === "updated").length,
|
|
2955
|
+
unchanged: results.filter((result) => result.action === "unchanged").length,
|
|
2956
|
+
failed: results.filter((result) => result.action === "failed").length
|
|
2957
|
+
}
|
|
2958
|
+
};
|
|
2959
|
+
}
|
|
2960
|
+
async function executeEpisodeCandidate(candidate, createSummaryLlm, ports, genVersion, runSerializedWrite) {
|
|
2961
|
+
const startedAt = trimOptionalString(candidate.startedAt) ?? trimOptionalString(candidate.existingEpisode?.startedAt);
|
|
2962
|
+
const endedAt = trimOptionalString(candidate.endedAt) ?? trimOptionalString(candidate.existingEpisode?.endedAt);
|
|
2963
|
+
if (!startedAt) {
|
|
2964
|
+
return {
|
|
2965
|
+
action: "failed",
|
|
2966
|
+
filePath: candidate.filePath,
|
|
2967
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
2968
|
+
error: "missing_started_at",
|
|
2969
|
+
usage: createEmptyUsageStats()
|
|
2970
|
+
};
|
|
2971
|
+
}
|
|
2972
|
+
const llm = createSummaryLlm();
|
|
2973
|
+
try {
|
|
2974
|
+
const structured = await generateEpisodeSummary(candidate.renderedTranscript, llm);
|
|
2975
|
+
if (!structured) {
|
|
2976
|
+
return {
|
|
2977
|
+
action: "failed",
|
|
2978
|
+
filePath: candidate.filePath,
|
|
2979
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
2980
|
+
error: "invalid_response",
|
|
2981
|
+
usage: cloneUsageStats(llm.metadata.usage)
|
|
2982
|
+
};
|
|
2983
|
+
}
|
|
2984
|
+
const existingEpisode = candidate.existingEpisode;
|
|
2985
|
+
const embedding = await embedEpisodeSummary(structured.summary, ports);
|
|
2986
|
+
const writeResult = await runSerializedWrite(
|
|
2987
|
+
async () => ports.episodes.upsertEpisode({
|
|
2988
|
+
source: "openclaw",
|
|
2989
|
+
...candidate.sessionId ? { sourceId: candidate.sessionId } : {},
|
|
2990
|
+
sourceRef: candidate.metadataSource === "registry" || !existingEpisode?.sourceRef ? candidate.sourceRef : existingEpisode.sourceRef,
|
|
2991
|
+
transcriptHash: candidate.transcriptHash,
|
|
2992
|
+
...trimOptionalString(candidate.agentId) ?? trimOptionalString(existingEpisode?.agentId) ? { agentId: trimOptionalString(candidate.agentId) ?? trimOptionalString(existingEpisode?.agentId) } : {},
|
|
2993
|
+
...trimOptionalString(candidate.surface) ?? trimOptionalString(existingEpisode?.surface) ? { surface: trimOptionalString(candidate.surface) ?? trimOptionalString(existingEpisode?.surface) } : {},
|
|
2994
|
+
startedAt,
|
|
2995
|
+
...endedAt ? { endedAt } : {},
|
|
2996
|
+
summary: structured.summary,
|
|
2997
|
+
tags: structured.tags,
|
|
2998
|
+
activityLevel: structured.activityLevel,
|
|
2999
|
+
...structured.project ? { project: structured.project } : {},
|
|
3000
|
+
genModel: llm.metadata.modelRef,
|
|
3001
|
+
genVersion,
|
|
3002
|
+
messageCount: candidate.messageCount,
|
|
3003
|
+
...embedding ? { embedding } : {}
|
|
3004
|
+
})
|
|
3005
|
+
);
|
|
3006
|
+
return {
|
|
3007
|
+
action: mapWriteAction(writeResult.action),
|
|
3008
|
+
filePath: candidate.filePath,
|
|
3009
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
3010
|
+
activityLevel: structured.activityLevel,
|
|
3011
|
+
episodeId: writeResult.episode.id,
|
|
3012
|
+
usage: cloneUsageStats(llm.metadata.usage)
|
|
3013
|
+
};
|
|
3014
|
+
} catch (error) {
|
|
3015
|
+
return {
|
|
3016
|
+
action: "failed",
|
|
3017
|
+
filePath: candidate.filePath,
|
|
3018
|
+
...candidate.sessionId ? { sessionId: candidate.sessionId } : {},
|
|
3019
|
+
error: formatExecutionError(error),
|
|
3020
|
+
usage: cloneUsageStats(llm.metadata.usage)
|
|
3021
|
+
};
|
|
3022
|
+
}
|
|
3023
|
+
}
|
|
3024
|
+
function applyCandidateOverrides(candidate, overrides) {
|
|
3025
|
+
if (!overrides) {
|
|
3026
|
+
return candidate;
|
|
3027
|
+
}
|
|
3028
|
+
return {
|
|
3029
|
+
...candidate,
|
|
3030
|
+
...overrides.sessionId !== void 0 ? { sessionId: overrides.sessionId } : {},
|
|
3031
|
+
...overrides.sourceRef !== void 0 ? { sourceRef: overrides.sourceRef } : {},
|
|
3032
|
+
..."agentId" in overrides ? { agentId: overrides.agentId ?? null } : {},
|
|
3033
|
+
..."surface" in overrides ? { surface: overrides.surface ?? null } : {},
|
|
3034
|
+
...overrides.metadataSource !== void 0 ? { metadataSource: overrides.metadataSource } : {}
|
|
3035
|
+
};
|
|
3036
|
+
}
|
|
3037
|
+
function mapWriteAction(action) {
|
|
3038
|
+
if (action === "inserted") {
|
|
3039
|
+
return "written";
|
|
3040
|
+
}
|
|
3041
|
+
return action;
|
|
3042
|
+
}
|
|
3043
|
+
|
|
3044
|
+
// src/app/episode-ingest/service/backfill.ts
|
|
3045
|
+
async function backfillEpisodeEmbeddings(ports, options) {
|
|
3046
|
+
const embedding = ports.embedding;
|
|
3047
|
+
if (!embedding) {
|
|
3048
|
+
throw new Error("Episode embedding backfill requires an embedding provider.");
|
|
3049
|
+
}
|
|
3050
|
+
if (!Number.isFinite(options.concurrency) || Math.trunc(options.concurrency) <= 0) {
|
|
3051
|
+
throw new Error(`Episode embedding backfill concurrency must be a positive integer. Received: ${options.concurrency}.`);
|
|
3052
|
+
}
|
|
3053
|
+
const pendingEpisodes = await ports.episodes.listEpisodesWithoutEmbeddings();
|
|
3054
|
+
if (pendingEpisodes.length === 0) {
|
|
3055
|
+
return {
|
|
3056
|
+
totalMissing: 0,
|
|
3057
|
+
attempted: 0,
|
|
3058
|
+
embedded: 0,
|
|
3059
|
+
failed: 0,
|
|
3060
|
+
estimatedInputTokens: 0
|
|
3061
|
+
};
|
|
3062
|
+
}
|
|
3063
|
+
const estimatedInputTokens = pendingEpisodes.reduce((total, episode) => total + estimateInputTokens(episode.summary), 0);
|
|
3064
|
+
const workerCount = Math.min(Math.trunc(options.concurrency), pendingEpisodes.length);
|
|
3065
|
+
let nextIndex = 0;
|
|
3066
|
+
let completed = 0;
|
|
3067
|
+
let embeddedCount = 0;
|
|
3068
|
+
let failedCount = 0;
|
|
3069
|
+
await Promise.all(
|
|
3070
|
+
Array.from({ length: workerCount }, async () => {
|
|
3071
|
+
while (true) {
|
|
3072
|
+
const currentIndex = nextIndex;
|
|
3073
|
+
nextIndex += 1;
|
|
3074
|
+
if (currentIndex >= pendingEpisodes.length) {
|
|
3075
|
+
return;
|
|
3076
|
+
}
|
|
3077
|
+
const episode = pendingEpisodes[currentIndex];
|
|
3078
|
+
if (!episode) {
|
|
3079
|
+
return;
|
|
3080
|
+
}
|
|
3081
|
+
let status = "failed";
|
|
3082
|
+
try {
|
|
3083
|
+
const vector = await embedEpisodeSummaryWithPort(episode.summary, embedding);
|
|
3084
|
+
if (vector) {
|
|
3085
|
+
await ports.episodes.updateEpisodeEmbedding(episode.id, vector);
|
|
3086
|
+
embeddedCount += 1;
|
|
3087
|
+
status = "embedded";
|
|
3088
|
+
} else {
|
|
3089
|
+
failedCount += 1;
|
|
3090
|
+
}
|
|
3091
|
+
} catch {
|
|
3092
|
+
failedCount += 1;
|
|
3093
|
+
}
|
|
3094
|
+
completed += 1;
|
|
3095
|
+
options.onProgress?.(completed, pendingEpisodes.length, episode, status);
|
|
3096
|
+
}
|
|
3097
|
+
})
|
|
3098
|
+
);
|
|
3099
|
+
return {
|
|
3100
|
+
totalMissing: pendingEpisodes.length,
|
|
3101
|
+
attempted: pendingEpisodes.length,
|
|
3102
|
+
embedded: embeddedCount,
|
|
3103
|
+
failed: failedCount,
|
|
3104
|
+
estimatedInputTokens
|
|
3105
|
+
};
|
|
3106
|
+
}
|
|
3107
|
+
|
|
3108
|
+
// src/app/episode-ingest/service/plan.ts
|
|
3109
|
+
function createEpisodeIngestPlan(preflight, model, options = {}) {
|
|
3110
|
+
const cutoff = resolveRecentCutoff(options.recent, options.now);
|
|
3111
|
+
let excludedByRecent = 0;
|
|
3112
|
+
let excludedUndated = 0;
|
|
3113
|
+
const candidates = preflight.candidates.flatMap((candidate) => {
|
|
3114
|
+
const estimatedInputTokens = estimateEpisodeSummaryInputTokens(candidate.renderedTranscript);
|
|
3115
|
+
const plannedCandidate = {
|
|
3116
|
+
...candidate,
|
|
3117
|
+
estimatedInputTokens
|
|
3118
|
+
};
|
|
3119
|
+
if (!cutoff) {
|
|
3120
|
+
return [plannedCandidate];
|
|
3121
|
+
}
|
|
3122
|
+
const endedAt = parseCandidateEndedAt(candidate.endedAt);
|
|
3123
|
+
if (!endedAt) {
|
|
3124
|
+
excludedByRecent += 1;
|
|
3125
|
+
excludedUndated += 1;
|
|
3126
|
+
return [];
|
|
3127
|
+
}
|
|
3128
|
+
if (endedAt.getTime() < cutoff.getTime()) {
|
|
3129
|
+
excludedByRecent += 1;
|
|
3130
|
+
return [];
|
|
3131
|
+
}
|
|
3132
|
+
return [plannedCandidate];
|
|
3133
|
+
});
|
|
3134
|
+
const inputTokens = candidates.reduce((total, candidate) => total + candidate.estimatedInputTokens, 0);
|
|
3135
|
+
const outputTokens = candidates.length * 500;
|
|
3136
|
+
const estimatedCostUsd = inputTokens / 1e6 * model.pricing.input + outputTokens / 1e6 * model.pricing.output;
|
|
3137
|
+
return {
|
|
3138
|
+
candidates,
|
|
3139
|
+
model,
|
|
3140
|
+
estimate: {
|
|
3141
|
+
candidateCount: candidates.length,
|
|
3142
|
+
inputTokens,
|
|
3143
|
+
outputTokens,
|
|
3144
|
+
totalTokens: inputTokens + outputTokens,
|
|
3145
|
+
estimatedCostUsd
|
|
3146
|
+
},
|
|
3147
|
+
...options.recent?.trim() ? { recent: options.recent.trim() } : {},
|
|
3148
|
+
...cutoff ? { recentCutoff: cutoff.toISOString() } : {},
|
|
3149
|
+
totals: {
|
|
3150
|
+
preflightCandidates: preflight.candidates.length,
|
|
3151
|
+
selectedCandidates: candidates.length,
|
|
3152
|
+
excludedByRecent,
|
|
3153
|
+
excludedUndated
|
|
3154
|
+
}
|
|
3155
|
+
};
|
|
3156
|
+
}
|
|
3157
|
+
function resolveRecentCutoff(recent, now) {
|
|
3158
|
+
const trimmedRecent = recent?.trim();
|
|
3159
|
+
if (!trimmedRecent) {
|
|
3160
|
+
return void 0;
|
|
3161
|
+
}
|
|
3162
|
+
const cutoff = parseRelativeDate(trimmedRecent, now ?? /* @__PURE__ */ new Date());
|
|
3163
|
+
if (!cutoff) {
|
|
3164
|
+
throw new Error(`Unsupported recent value "${trimmedRecent}". Use day shorthand like 30d or an ISO timestamp.`);
|
|
3165
|
+
}
|
|
3166
|
+
return cutoff;
|
|
3167
|
+
}
|
|
3168
|
+
|
|
3169
|
+
// src/adapters/openclaw/session/session-id.ts
|
|
3170
|
+
import path2 from "path";
|
|
3171
|
+
function deriveOpenClawSessionIdFromFilePath(sessionFile, logger) {
|
|
3172
|
+
const normalizedSessionFile = sessionFile.trim();
|
|
3173
|
+
if (normalizedSessionFile.length === 0) {
|
|
3174
|
+
debugLog(logger, "session-id", "cannot derive session id from empty session file path");
|
|
3175
|
+
return void 0;
|
|
3176
|
+
}
|
|
3177
|
+
const fileName = path2.basename(normalizedSessionFile);
|
|
3178
|
+
const sessionId = fileName.replace(/\.jsonl(?:\..*)?$/i, "").trim();
|
|
3179
|
+
debugLog(logger, "session-id", `derived session id "${sessionId || "<empty>"}" from file=${normalizedSessionFile}`);
|
|
3180
|
+
return sessionId.length > 0 ? sessionId : void 0;
|
|
3181
|
+
}
|
|
3182
|
+
function debugLog(logger, subsystem, message) {
|
|
3183
|
+
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
3184
|
+
}
|
|
3185
|
+
|
|
3186
|
+
// src/adapters/openclaw/session/sessions-store-reader.ts
|
|
3187
|
+
import * as fs3 from "fs/promises";
|
|
3188
|
+
import path3 from "path";
|
|
3189
|
+
async function readOpenClawSessionsStore(sessionsDir, logger) {
|
|
3190
|
+
const normalizedSessionsDir = sessionsDir.trim();
|
|
3191
|
+
if (normalizedSessionsDir.length === 0) {
|
|
3192
|
+
debugLog2(logger, "sessions-store-reader", "skipping sessions.json read because sessionsDir is empty");
|
|
3193
|
+
return [];
|
|
3194
|
+
}
|
|
3195
|
+
const resolvedSessionsDir = path3.resolve(normalizedSessionsDir);
|
|
3196
|
+
const sessionsJsonPath = path3.join(resolvedSessionsDir, "sessions.json");
|
|
3197
|
+
try {
|
|
3198
|
+
const raw = await fs3.readFile(sessionsJsonPath, "utf8");
|
|
3199
|
+
const parsed = JSON.parse(raw);
|
|
3200
|
+
if (!isRecord(parsed)) {
|
|
3201
|
+
debugLog2(logger, "sessions-store-reader", `sessions.json did not contain an object: path=${sessionsJsonPath}`);
|
|
3202
|
+
return [];
|
|
3203
|
+
}
|
|
3204
|
+
const entries = [];
|
|
3205
|
+
for (const [sessionKey, value] of Object.entries(parsed)) {
|
|
3206
|
+
const normalizedSessionKey = sessionKey.trim();
|
|
3207
|
+
if (normalizedSessionKey.length === 0) {
|
|
3208
|
+
debugLog2(logger, "sessions-store-reader", `skipping blank session key in ${sessionsJsonPath}`);
|
|
3209
|
+
continue;
|
|
3210
|
+
}
|
|
3211
|
+
if (!isRecord(value)) {
|
|
3212
|
+
debugLog2(logger, "sessions-store-reader", `skipping non-object entry for key=${normalizedSessionKey}`);
|
|
3213
|
+
continue;
|
|
3214
|
+
}
|
|
3215
|
+
const sessionId = asTrimmedString(value["sessionId"]);
|
|
3216
|
+
const sessionFile = asTrimmedString(value["sessionFile"]);
|
|
3217
|
+
const origin = isRecord(value["origin"]) ? value["origin"] : void 0;
|
|
3218
|
+
const surface = asTrimmedString(origin?.["surface"]);
|
|
3219
|
+
const provider = asTrimmedString(origin?.["provider"]);
|
|
3220
|
+
const chatType = asTrimmedString(value["chatType"]);
|
|
3221
|
+
const updatedAt = asFiniteNumber(value["updatedAt"]);
|
|
3222
|
+
entries.push({
|
|
3223
|
+
sessionKey: normalizedSessionKey,
|
|
3224
|
+
...sessionId ? { sessionId } : {},
|
|
3225
|
+
...sessionFile ? { sessionFile: resolveSessionStorePath(sessionFile, resolvedSessionsDir) } : {},
|
|
3226
|
+
...surface ? { surface } : {},
|
|
3227
|
+
...provider ? { provider } : {},
|
|
3228
|
+
...chatType ? { chatType } : {},
|
|
3229
|
+
...updatedAt !== void 0 ? { updatedAt } : {}
|
|
3230
|
+
});
|
|
3231
|
+
}
|
|
3232
|
+
debugLog2(logger, "sessions-store-reader", `loaded sessions.json entries=${entries.length} path=${sessionsJsonPath}`);
|
|
3233
|
+
return entries;
|
|
3234
|
+
} catch (error) {
|
|
3235
|
+
if (isFileNotFound(error)) {
|
|
3236
|
+
debugLog2(logger, "sessions-store-reader", `sessions.json missing at ${sessionsJsonPath}`);
|
|
3237
|
+
return [];
|
|
3238
|
+
}
|
|
3239
|
+
if (error instanceof SyntaxError) {
|
|
3240
|
+
debugLog2(logger, "sessions-store-reader", `sessions.json parse failed at ${sessionsJsonPath}: ${error.message}`);
|
|
3241
|
+
return [];
|
|
3242
|
+
}
|
|
3243
|
+
debugLog2(logger, "sessions-store-reader", `sessions.json read failed at ${sessionsJsonPath}: ${formatErrorMessage(error)}`);
|
|
3244
|
+
return [];
|
|
3245
|
+
}
|
|
3246
|
+
}
|
|
3247
|
+
function resolveSessionStorePath(candidatePath, sessionsDir) {
|
|
3248
|
+
return path3.isAbsolute(candidatePath) ? path3.resolve(candidatePath) : path3.resolve(sessionsDir, candidatePath);
|
|
3249
|
+
}
|
|
3250
|
+
function isRecord(value) {
|
|
3251
|
+
return typeof value === "object" && value !== null;
|
|
3252
|
+
}
|
|
3253
|
+
function asTrimmedString(value) {
|
|
3254
|
+
return typeof value === "string" && value.trim().length > 0 ? value.trim() : void 0;
|
|
3255
|
+
}
|
|
3256
|
+
function asFiniteNumber(value) {
|
|
3257
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
3258
|
+
}
|
|
3259
|
+
function debugLog2(logger, subsystem, message) {
|
|
3260
|
+
logger?.debug?.(`[agenr] ${subsystem}: ${message}`);
|
|
3261
|
+
}
|
|
3262
|
+
function isFileNotFound(error) {
|
|
3263
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
3264
|
+
}
|
|
3265
|
+
function formatErrorMessage(error) {
|
|
3266
|
+
if (error instanceof Error) {
|
|
3267
|
+
return error.message;
|
|
3268
|
+
}
|
|
3269
|
+
return String(error);
|
|
3270
|
+
}
|
|
3271
|
+
|
|
3272
|
+
// src/adapters/openclaw/session/tui-lane.ts
|
|
3273
|
+
var TUI_SESSION_KEY_PATTERN = /^agent:([^:]+):([^:]+)$/i;
|
|
3274
|
+
var TUI_UUID_LANE_PATTERN = /^tui[a-z0-9]*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
3275
|
+
var TUI_UUID_SUFFIX_PATTERN = /-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
3276
|
+
function parseTuiSessionKey(sessionKey) {
|
|
3277
|
+
const normalizedSessionKey = sessionKey.trim();
|
|
3278
|
+
if (normalizedSessionKey.length === 0) {
|
|
3279
|
+
return null;
|
|
3280
|
+
}
|
|
3281
|
+
const match = TUI_SESSION_KEY_PATTERN.exec(normalizedSessionKey);
|
|
3282
|
+
if (!match) {
|
|
3283
|
+
return null;
|
|
3284
|
+
}
|
|
3285
|
+
const [, agentId, instanceLane] = match;
|
|
3286
|
+
const normalizedAgentId = agentId?.trim();
|
|
3287
|
+
const normalizedInstanceLane = instanceLane?.trim();
|
|
3288
|
+
if (!normalizedAgentId || !normalizedInstanceLane || !normalizedInstanceLane.toLowerCase().startsWith("tui")) {
|
|
3289
|
+
return null;
|
|
3290
|
+
}
|
|
3291
|
+
const stableLane = TUI_UUID_LANE_PATTERN.test(normalizedInstanceLane) ? normalizedInstanceLane.replace(TUI_UUID_SUFFIX_PATTERN, "") : normalizedInstanceLane;
|
|
3292
|
+
return {
|
|
3293
|
+
agentId: normalizedAgentId,
|
|
3294
|
+
stableLane,
|
|
3295
|
+
instanceLane: normalizedInstanceLane
|
|
3296
|
+
};
|
|
3297
|
+
}
|
|
3298
|
+
|
|
3299
|
+
export {
|
|
3300
|
+
normalizeClaimKeySegment,
|
|
3301
|
+
normalizeClaimKey,
|
|
3302
|
+
compactClaimKey,
|
|
3303
|
+
inspectClaimKey,
|
|
3304
|
+
isTrustedClaimKeyForCleanup,
|
|
3305
|
+
describeClaimKeyNormalizationFailure,
|
|
3306
|
+
describeClaimKeySuspicion,
|
|
3307
|
+
previewClaimKeyExtraction,
|
|
3308
|
+
runBatchClaimExtraction,
|
|
3309
|
+
validateSupersessionRules,
|
|
3310
|
+
describeSupersessionRuleFailure,
|
|
3311
|
+
storeEntriesDetailed,
|
|
3312
|
+
OpenClawTranscriptParser,
|
|
3313
|
+
openClawTranscriptParser,
|
|
3314
|
+
deriveOpenClawSessionIdFromFilePath,
|
|
3315
|
+
readOpenClawSessionsStore,
|
|
3316
|
+
parseTuiSessionKey,
|
|
3317
|
+
backfillEpisodeEmbeddings,
|
|
3318
|
+
prepareEpisodeIngest,
|
|
3319
|
+
ingestEpisodeTranscript,
|
|
3320
|
+
executeEpisodeIngestPlan,
|
|
3321
|
+
createEpisodeIngestPlan
|
|
3322
|
+
};
|