@martian-engineering/lossless-claw 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -1
- package/docs/configuration.md +23 -0
- package/openclaw.plugin.json +75 -0
- package/package.json +2 -1
- package/skills/lossless-claw/SKILL.md +33 -0
- package/skills/lossless-claw/references/architecture.md +52 -0
- package/skills/lossless-claw/references/config.md +263 -0
- package/skills/lossless-claw/references/diagnostics.md +79 -0
- package/skills/lossless-claw/references/recall-tools.md +55 -0
- package/skills/lossless-claw/references/session-lifecycle.md +59 -0
- package/src/assembler.ts +132 -36
- package/src/compaction.ts +22 -46
- package/src/db/config.ts +52 -20
- package/src/db/migration.ts +50 -13
- package/src/engine.ts +781 -172
- package/src/plugin/index.ts +45 -0
- package/src/plugin/lcm-command.ts +759 -0
- package/src/plugin/lcm-doctor-apply.ts +546 -0
- package/src/plugin/lcm-doctor-shared.ts +210 -0
- package/src/store/conversation-store.ts +60 -21
- package/src/store/parse-utc-timestamp.ts +25 -0
- package/src/store/summary-store.ts +380 -11
- package/src/summarize.ts +107 -20
- package/src/tools/lcm-expand-query-tool.ts +58 -25
- package/src/tools/lcm-expansion-recursion-guard.ts +87 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { DatabaseSync } from "node:sqlite";
|
|
2
2
|
import { sanitizeFts5Query } from "./fts5-sanitize.js";
|
|
3
3
|
import { buildLikeSearchPlan, containsCjk, createFallbackSnippet } from "./full-text-fallback.js";
|
|
4
|
+
import { parseUtcTimestamp, parseUtcTimestampOrNull } from "./parse-utc-timestamp.js";
|
|
4
5
|
|
|
5
6
|
export type SummaryKind = "leaf" | "condensed";
|
|
6
7
|
export type ContextItemType = "message" | "summary";
|
|
@@ -117,6 +118,16 @@ export type ConversationBootstrapStateRecord = {
|
|
|
117
118
|
updatedAt: Date;
|
|
118
119
|
};
|
|
119
120
|
|
|
121
|
+
export type TranscriptGcCandidateRecord = {
|
|
122
|
+
messageId: number;
|
|
123
|
+
conversationId: number;
|
|
124
|
+
seq: number;
|
|
125
|
+
toolCallId: string;
|
|
126
|
+
toolName: string | null;
|
|
127
|
+
externalizedFileId: string | null;
|
|
128
|
+
originalByteSize: number | null;
|
|
129
|
+
};
|
|
130
|
+
|
|
120
131
|
// ── DB row shapes (snake_case) ────────────────────────────────────────────────
|
|
121
132
|
|
|
122
133
|
interface SummaryRow {
|
|
@@ -207,6 +218,17 @@ interface ConversationBootstrapStateRow {
|
|
|
207
218
|
updated_at: string;
|
|
208
219
|
}
|
|
209
220
|
|
|
221
|
+
const CJK_QUERY_SEGMENT_RE =
|
|
222
|
+
/[\u2E80-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\uAC00-\uD7AF\u3040-\u309F\u30A0-\u30FF]+/g;
|
|
223
|
+
const LATIN_QUERY_TOKEN_RE = /[a-zA-Z0-9][\w./-]*/g;
|
|
224
|
+
interface TranscriptGcCandidateRow {
|
|
225
|
+
message_id: number;
|
|
226
|
+
conversation_id: number;
|
|
227
|
+
seq: number;
|
|
228
|
+
tool_call_id: string | null;
|
|
229
|
+
tool_name: string | null;
|
|
230
|
+
metadata: string | null;
|
|
231
|
+
}
|
|
210
232
|
// ── Row mappers ───────────────────────────────────────────────────────────────
|
|
211
233
|
|
|
212
234
|
function toSummaryRecord(row: SummaryRow): SummaryRecord {
|
|
@@ -224,8 +246,8 @@ function toSummaryRecord(row: SummaryRow): SummaryRecord {
|
|
|
224
246
|
content: row.content,
|
|
225
247
|
tokenCount: row.token_count,
|
|
226
248
|
fileIds,
|
|
227
|
-
earliestAt:
|
|
228
|
-
latestAt:
|
|
249
|
+
earliestAt: parseUtcTimestampOrNull(row.earliest_at),
|
|
250
|
+
latestAt: parseUtcTimestampOrNull(row.latest_at),
|
|
229
251
|
descendantCount:
|
|
230
252
|
typeof row.descendant_count === "number" &&
|
|
231
253
|
Number.isFinite(row.descendant_count) &&
|
|
@@ -245,7 +267,7 @@ function toSummaryRecord(row: SummaryRow): SummaryRecord {
|
|
|
245
267
|
? Math.floor(row.source_message_token_count)
|
|
246
268
|
: 0,
|
|
247
269
|
model: typeof row.model === "string" ? row.model : "unknown",
|
|
248
|
-
createdAt:
|
|
270
|
+
createdAt: parseUtcTimestamp(row.created_at),
|
|
249
271
|
};
|
|
250
272
|
}
|
|
251
273
|
|
|
@@ -256,7 +278,7 @@ function toContextItemRecord(row: ContextItemRow): ContextItemRecord {
|
|
|
256
278
|
itemType: row.item_type,
|
|
257
279
|
messageId: row.message_id,
|
|
258
280
|
summaryId: row.summary_id,
|
|
259
|
-
createdAt:
|
|
281
|
+
createdAt: parseUtcTimestamp(row.created_at),
|
|
260
282
|
};
|
|
261
283
|
}
|
|
262
284
|
|
|
@@ -266,7 +288,7 @@ function toSearchResult(row: SummarySearchRow): SummarySearchResult {
|
|
|
266
288
|
conversationId: row.conversation_id,
|
|
267
289
|
kind: row.kind,
|
|
268
290
|
snippet: row.snippet,
|
|
269
|
-
createdAt:
|
|
291
|
+
createdAt: parseUtcTimestamp(row.created_at),
|
|
270
292
|
rank: row.rank,
|
|
271
293
|
};
|
|
272
294
|
}
|
|
@@ -280,7 +302,7 @@ function toLargeFileRecord(row: LargeFileRow): LargeFileRecord {
|
|
|
280
302
|
byteSize: row.byte_size,
|
|
281
303
|
storageUri: row.storage_uri,
|
|
282
304
|
explorationSummary: row.exploration_summary,
|
|
283
|
-
createdAt:
|
|
305
|
+
createdAt: parseUtcTimestamp(row.created_at),
|
|
284
306
|
};
|
|
285
307
|
}
|
|
286
308
|
|
|
@@ -294,7 +316,43 @@ function toConversationBootstrapStateRecord(
|
|
|
294
316
|
lastSeenMtimeMs: row.last_seen_mtime_ms,
|
|
295
317
|
lastProcessedOffset: row.last_processed_offset,
|
|
296
318
|
lastProcessedEntryHash: row.last_processed_entry_hash,
|
|
297
|
-
updatedAt:
|
|
319
|
+
updatedAt: parseUtcTimestamp(row.updated_at),
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function toTranscriptGcCandidateRecord(
|
|
324
|
+
row: TranscriptGcCandidateRow,
|
|
325
|
+
): TranscriptGcCandidateRecord | null {
|
|
326
|
+
if (typeof row.tool_call_id !== "string" || row.tool_call_id.length === 0) {
|
|
327
|
+
return null;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
let metadata: Record<string, unknown> | null = null;
|
|
331
|
+
try {
|
|
332
|
+
metadata =
|
|
333
|
+
typeof row.metadata === "string" && row.metadata.length > 0
|
|
334
|
+
? (JSON.parse(row.metadata) as Record<string, unknown>)
|
|
335
|
+
: null;
|
|
336
|
+
} catch {
|
|
337
|
+
metadata = null;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (!metadata || metadata.toolOutputExternalized !== true) {
|
|
341
|
+
return null;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return {
|
|
345
|
+
messageId: row.message_id,
|
|
346
|
+
conversationId: row.conversation_id,
|
|
347
|
+
seq: row.seq,
|
|
348
|
+
toolCallId: row.tool_call_id,
|
|
349
|
+
toolName: row.tool_name,
|
|
350
|
+
externalizedFileId:
|
|
351
|
+
typeof metadata.externalizedFileId === "string" ? metadata.externalizedFileId : null,
|
|
352
|
+
originalByteSize:
|
|
353
|
+
typeof metadata.originalByteSize === "number" && Number.isFinite(metadata.originalByteSize)
|
|
354
|
+
? Math.max(0, Math.floor(metadata.originalByteSize))
|
|
355
|
+
: null,
|
|
298
356
|
};
|
|
299
357
|
}
|
|
300
358
|
|
|
@@ -400,6 +458,17 @@ export class SummaryStore {
|
|
|
400
458
|
// compaction and assembly will still work correctly.
|
|
401
459
|
}
|
|
402
460
|
|
|
461
|
+
// Also index into the CJK trigram FTS table for CJK substring search.
|
|
462
|
+
try {
|
|
463
|
+
this.db
|
|
464
|
+
.prepare(
|
|
465
|
+
`INSERT INTO summaries_fts_cjk(summary_id, content) VALUES (?, ?)`,
|
|
466
|
+
)
|
|
467
|
+
.run(input.summaryId, input.content);
|
|
468
|
+
} catch {
|
|
469
|
+
// CJK trigram FTS table may not exist yet (pre-migration); ignore.
|
|
470
|
+
}
|
|
471
|
+
|
|
403
472
|
return toSummaryRecord(row);
|
|
404
473
|
}
|
|
405
474
|
|
|
@@ -539,7 +608,71 @@ export class SummaryStore {
|
|
|
539
608
|
}
|
|
540
609
|
return orderedLinks;
|
|
541
610
|
}
|
|
611
|
+
/**
|
|
612
|
+
* Return summarized tool-result messages that are safe candidates for
|
|
613
|
+
* transcript GC because they are no longer present as raw context items.
|
|
614
|
+
*/
|
|
615
|
+
async listTranscriptGcCandidates(
|
|
616
|
+
conversationId: number,
|
|
617
|
+
options?: { limit?: number },
|
|
618
|
+
): Promise<TranscriptGcCandidateRecord[]> {
|
|
619
|
+
const limit =
|
|
620
|
+
typeof options?.limit === "number" && Number.isFinite(options.limit) && options.limit > 0
|
|
621
|
+
? Math.max(1, Math.floor(options.limit))
|
|
622
|
+
: 25;
|
|
542
623
|
|
|
624
|
+
const rows = this.db
|
|
625
|
+
.prepare(
|
|
626
|
+
`SELECT
|
|
627
|
+
m.message_id,
|
|
628
|
+
m.conversation_id,
|
|
629
|
+
m.seq,
|
|
630
|
+
mp.tool_call_id,
|
|
631
|
+
mp.tool_name,
|
|
632
|
+
mp.metadata
|
|
633
|
+
FROM messages m
|
|
634
|
+
JOIN message_parts mp
|
|
635
|
+
ON mp.message_id = m.message_id
|
|
636
|
+
WHERE m.conversation_id = ?
|
|
637
|
+
AND m.role = 'tool'
|
|
638
|
+
AND mp.part_type = 'tool'
|
|
639
|
+
AND mp.tool_call_id IS NOT NULL
|
|
640
|
+
AND mp.tool_call_id != ''
|
|
641
|
+
AND EXISTS (
|
|
642
|
+
SELECT 1
|
|
643
|
+
FROM summary_messages sm
|
|
644
|
+
WHERE sm.message_id = m.message_id
|
|
645
|
+
)
|
|
646
|
+
AND NOT EXISTS (
|
|
647
|
+
SELECT 1
|
|
648
|
+
FROM context_items ci
|
|
649
|
+
WHERE ci.conversation_id = m.conversation_id
|
|
650
|
+
AND ci.item_type = 'message'
|
|
651
|
+
AND ci.message_id = m.message_id
|
|
652
|
+
)
|
|
653
|
+
ORDER BY m.seq ASC, mp.ordinal ASC`,
|
|
654
|
+
)
|
|
655
|
+
.all(conversationId) as unknown as TranscriptGcCandidateRow[];
|
|
656
|
+
|
|
657
|
+
const seenMessageIds = new Set<number>();
|
|
658
|
+
const candidates: TranscriptGcCandidateRecord[] = [];
|
|
659
|
+
for (const row of rows) {
|
|
660
|
+
if (seenMessageIds.has(row.message_id)) {
|
|
661
|
+
continue;
|
|
662
|
+
}
|
|
663
|
+
const candidate = toTranscriptGcCandidateRecord(row);
|
|
664
|
+
if (!candidate) {
|
|
665
|
+
continue;
|
|
666
|
+
}
|
|
667
|
+
seenMessageIds.add(candidate.messageId);
|
|
668
|
+
candidates.push(candidate);
|
|
669
|
+
if (candidates.length >= limit) {
|
|
670
|
+
break;
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
return candidates;
|
|
675
|
+
}
|
|
543
676
|
async getSummaryChildren(parentSummaryId: string): Promise<SummaryRecord[]> {
|
|
544
677
|
const rows = this.db
|
|
545
678
|
.prepare(
|
|
@@ -687,6 +820,45 @@ export class SummaryStore {
|
|
|
687
820
|
return rows.map((row) => row.depth);
|
|
688
821
|
}
|
|
689
822
|
|
|
823
|
+
async pruneForNewSession(conversationId: number, retainDepth: number): Promise<void> {
|
|
824
|
+
if (Number.isFinite(retainDepth) && retainDepth < 0) {
|
|
825
|
+
return;
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
this.db
|
|
829
|
+
.prepare(
|
|
830
|
+
`DELETE FROM context_items
|
|
831
|
+
WHERE conversation_id = ?
|
|
832
|
+
AND item_type = 'message'`,
|
|
833
|
+
)
|
|
834
|
+
.run(conversationId);
|
|
835
|
+
|
|
836
|
+
if (!Number.isFinite(retainDepth)) {
|
|
837
|
+
this.db
|
|
838
|
+
.prepare(
|
|
839
|
+
`DELETE FROM context_items
|
|
840
|
+
WHERE conversation_id = ?
|
|
841
|
+
AND item_type = 'summary'`,
|
|
842
|
+
)
|
|
843
|
+
.run(conversationId);
|
|
844
|
+
return;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
this.db
|
|
848
|
+
.prepare(
|
|
849
|
+
`DELETE FROM context_items
|
|
850
|
+
WHERE conversation_id = ?
|
|
851
|
+
AND item_type = 'summary'
|
|
852
|
+
AND summary_id IN (
|
|
853
|
+
SELECT summary_id
|
|
854
|
+
FROM summaries
|
|
855
|
+
WHERE conversation_id = ?
|
|
856
|
+
AND depth < ?
|
|
857
|
+
)`,
|
|
858
|
+
)
|
|
859
|
+
.run(conversationId, conversationId, Math.floor(retainDepth));
|
|
860
|
+
}
|
|
861
|
+
|
|
690
862
|
async appendContextMessage(conversationId: number, messageId: number): Promise<void> {
|
|
691
863
|
const row = this.db
|
|
692
864
|
.prepare(
|
|
@@ -830,10 +1002,30 @@ export class SummaryStore {
|
|
|
830
1002
|
const limit = input.limit ?? 50;
|
|
831
1003
|
|
|
832
1004
|
if (input.mode === "full_text") {
|
|
833
|
-
// FTS5 unicode61
|
|
834
|
-
//
|
|
1005
|
+
// FTS5 unicode61 cannot segment CJK ideographs, so CJK queries route
|
|
1006
|
+
// through the trigram FTS table first, then fall back to LIKE with OR
|
|
1007
|
+
// semantics (instead of the original AND logic which fails when the
|
|
1008
|
+
// user's phrasing doesn't exactly match the summary text).
|
|
835
1009
|
if (containsCjk(input.query)) {
|
|
836
|
-
|
|
1010
|
+
const cjkSegments = this.extractCjkSegments(input.query);
|
|
1011
|
+
const hasShortCjkSegment = cjkSegments.some((segment) => segment.length < 3);
|
|
1012
|
+
if (!hasShortCjkSegment) {
|
|
1013
|
+
try {
|
|
1014
|
+
const trigramResults = this.searchCjkTrigram(
|
|
1015
|
+
input.query,
|
|
1016
|
+
limit,
|
|
1017
|
+
input.conversationId,
|
|
1018
|
+
input.since,
|
|
1019
|
+
input.before,
|
|
1020
|
+
);
|
|
1021
|
+
if (trigramResults.length > 0) {
|
|
1022
|
+
return trigramResults;
|
|
1023
|
+
}
|
|
1024
|
+
} catch {
|
|
1025
|
+
// trigram table may not exist; fall through to LIKE OR
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
return this.searchLikeCjk(
|
|
837
1029
|
input.query,
|
|
838
1030
|
limit,
|
|
839
1031
|
input.conversationId,
|
|
@@ -950,6 +1142,183 @@ export class SummaryStore {
|
|
|
950
1142
|
conversationId: row.conversation_id,
|
|
951
1143
|
kind: row.kind,
|
|
952
1144
|
snippet: createFallbackSnippet(row.content, plan.terms),
|
|
1145
|
+
createdAt: parseUtcTimestamp(row.created_at),
|
|
1146
|
+
rank: 0,
|
|
1147
|
+
}));
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
private extractCjkSegments(query: string): string[] {
|
|
1151
|
+
return query.match(CJK_QUERY_SEGMENT_RE) ?? [];
|
|
1152
|
+
}
|
|
1153
|
+
|
|
1154
|
+
private extractLatinTokens(query: string): string[] {
|
|
1155
|
+
const tokens = query.match(LATIN_QUERY_TOKEN_RE) ?? [];
|
|
1156
|
+
return [...new Set(tokens.map((token) => token.toLowerCase()))];
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
private escapeLikeTerm(term: string): string {
|
|
1160
|
+
return term.replace(/([\\%_])/g, "\\$1");
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
// ── CJK trigram FTS search ──────────────────────────────────────────────
|
|
1164
|
+
// Each CJK segment of 3+ chars is split into overlapping 4-char chunks for
|
|
1165
|
+
// trigram MATCH with OR semantics within the segment. Segment groups are
|
|
1166
|
+
// combined with AND, and Latin tokens are applied as LIKE filters so mixed
|
|
1167
|
+
// queries still require every part of the user's intent.
|
|
1168
|
+
|
|
1169
|
+
/**
|
|
1170
|
+
* Split a CJK string into overlapping chunks of `size` characters.
|
|
1171
|
+
* E.g. "端到端测试结果" with size=4 →
|
|
1172
|
+
* ["端到端测", "到端测试", "端测试结", "测试结果"]
|
|
1173
|
+
*/
|
|
1174
|
+
private splitCjkChunks(text: string, size: number): string[] {
|
|
1175
|
+
const chunks: string[] = [];
|
|
1176
|
+
for (let i = 0; i <= text.length - size; i++) {
|
|
1177
|
+
const chunk = text.slice(i, i + size);
|
|
1178
|
+
if (!chunks.includes(chunk)) {
|
|
1179
|
+
chunks.push(chunk);
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
return chunks;
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
private searchCjkTrigram(
|
|
1186
|
+
query: string,
|
|
1187
|
+
limit: number,
|
|
1188
|
+
conversationId?: number,
|
|
1189
|
+
since?: Date,
|
|
1190
|
+
before?: Date,
|
|
1191
|
+
): SummarySearchResult[] {
|
|
1192
|
+
const cjkSegments = this.extractCjkSegments(query).filter((segment) => segment.length >= 3);
|
|
1193
|
+
if (cjkSegments.length === 0) {
|
|
1194
|
+
return [];
|
|
1195
|
+
}
|
|
1196
|
+
const latinTokens = this.extractLatinTokens(query);
|
|
1197
|
+
|
|
1198
|
+
// Build one OR group per CJK segment, then require every segment group and
|
|
1199
|
+
// every Latin token to match so mixed queries preserve full-intent search.
|
|
1200
|
+
const cjkGroups: string[] = [];
|
|
1201
|
+
for (const segment of cjkSegments) {
|
|
1202
|
+
const segmentTerms =
|
|
1203
|
+
segment.length <= 4 ? [segment] : this.splitCjkChunks(segment, 4);
|
|
1204
|
+
const groupExpr = [...new Set(segmentTerms)]
|
|
1205
|
+
.map((term) => `"${term.replace(/"/g, '""')}"`)
|
|
1206
|
+
.join(" OR ");
|
|
1207
|
+
cjkGroups.push(`(${groupExpr})`);
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
const where: string[] = ["summaries_fts_cjk MATCH ?"];
|
|
1211
|
+
const args: Array<string | number> = [cjkGroups.join(" AND ")];
|
|
1212
|
+
for (const token of latinTokens) {
|
|
1213
|
+
where.push("LOWER(s.content) LIKE ? ESCAPE '\\'");
|
|
1214
|
+
args.push(`%${this.escapeLikeTerm(token)}%`);
|
|
1215
|
+
}
|
|
1216
|
+
if (conversationId != null) {
|
|
1217
|
+
where.push("s.conversation_id = ?");
|
|
1218
|
+
args.push(conversationId);
|
|
1219
|
+
}
|
|
1220
|
+
if (since) {
|
|
1221
|
+
where.push("julianday(s.created_at) >= julianday(?)");
|
|
1222
|
+
args.push(since.toISOString());
|
|
1223
|
+
}
|
|
1224
|
+
if (before) {
|
|
1225
|
+
where.push("julianday(s.created_at) < julianday(?)");
|
|
1226
|
+
args.push(before.toISOString());
|
|
1227
|
+
}
|
|
1228
|
+
args.push(limit);
|
|
1229
|
+
|
|
1230
|
+
const sql = `SELECT
|
|
1231
|
+
f.summary_id,
|
|
1232
|
+
s.conversation_id,
|
|
1233
|
+
s.kind,
|
|
1234
|
+
snippet(summaries_fts_cjk, 1, '', '', '...', 32) AS snippet,
|
|
1235
|
+
rank,
|
|
1236
|
+
s.created_at
|
|
1237
|
+
FROM summaries_fts_cjk f
|
|
1238
|
+
JOIN summaries s ON s.summary_id = f.summary_id
|
|
1239
|
+
WHERE ${where.join(" AND ")}
|
|
1240
|
+
ORDER BY rank
|
|
1241
|
+
LIMIT ?`;
|
|
1242
|
+
const rows = this.db.prepare(sql).all(...args) as unknown as SummarySearchRow[];
|
|
1243
|
+
return rows.map(toSearchResult);
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
// ── CJK LIKE fallback ────────────────────────────────────────────────────
|
|
1247
|
+
// When the trigram table is unavailable, split each CJK segment into
|
|
1248
|
+
// sliding-window terms so partial matches still work. Terms within a single
|
|
1249
|
+
// segment are ORed together, but each segment and Latin token still has to
|
|
1250
|
+
// match so mixed queries keep full-intent semantics.
|
|
1251
|
+
|
|
1252
|
+
private searchLikeCjk(
|
|
1253
|
+
query: string,
|
|
1254
|
+
limit: number,
|
|
1255
|
+
conversationId?: number,
|
|
1256
|
+
since?: Date,
|
|
1257
|
+
before?: Date,
|
|
1258
|
+
): SummarySearchResult[] {
|
|
1259
|
+
const cjkSegments = this.extractCjkSegments(query);
|
|
1260
|
+
const latinTokens = this.extractLatinTokens(query);
|
|
1261
|
+
if (cjkSegments.length === 0 && latinTokens.length === 0) {
|
|
1262
|
+
return [];
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
const cjkTerms: string[] = [];
|
|
1266
|
+
const cjkClauses: string[] = [];
|
|
1267
|
+
const cjkArgs: string[] = [];
|
|
1268
|
+
for (const segment of cjkSegments) {
|
|
1269
|
+
const segmentTerms =
|
|
1270
|
+
segment.length === 1
|
|
1271
|
+
? [segment]
|
|
1272
|
+
: segment.length === 2
|
|
1273
|
+
? [segment]
|
|
1274
|
+
: this.splitCjkChunks(segment, 2);
|
|
1275
|
+
const uniqueTerms = [...new Set(segmentTerms)];
|
|
1276
|
+
cjkTerms.push(...uniqueTerms);
|
|
1277
|
+
cjkClauses.push(
|
|
1278
|
+
`(${uniqueTerms.map(() => `LOWER(content) LIKE ? ESCAPE '\\'`).join(" OR ")})`,
|
|
1279
|
+
);
|
|
1280
|
+
cjkArgs.push(
|
|
1281
|
+
...uniqueTerms.map((term) => `%${this.escapeLikeTerm(term.toLowerCase())}%`),
|
|
1282
|
+
);
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
const latinClauses = latinTokens.map(() => `LOWER(content) LIKE ? ESCAPE '\\'`);
|
|
1286
|
+
const latinArgs = latinTokens.map((token) => `%${this.escapeLikeTerm(token)}%`);
|
|
1287
|
+
|
|
1288
|
+
const where: string[] = [...cjkClauses, ...latinClauses];
|
|
1289
|
+
const args: Array<string | number> = [...cjkArgs, ...latinArgs];
|
|
1290
|
+
if (conversationId != null) {
|
|
1291
|
+
where.push("conversation_id = ?");
|
|
1292
|
+
args.push(conversationId);
|
|
1293
|
+
}
|
|
1294
|
+
if (since) {
|
|
1295
|
+
where.push("julianday(created_at) >= julianday(?)");
|
|
1296
|
+
args.push(since.toISOString());
|
|
1297
|
+
}
|
|
1298
|
+
if (before) {
|
|
1299
|
+
where.push("julianday(created_at) < julianday(?)");
|
|
1300
|
+
args.push(before.toISOString());
|
|
1301
|
+
}
|
|
1302
|
+
args.push(limit);
|
|
1303
|
+
|
|
1304
|
+
const rows = this.db
|
|
1305
|
+
.prepare(
|
|
1306
|
+
`SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
|
|
1307
|
+
earliest_at, latest_at, descendant_count, descendant_token_count,
|
|
1308
|
+
source_message_token_count, model, created_at
|
|
1309
|
+
FROM summaries
|
|
1310
|
+
WHERE ${where.join(" AND ")}
|
|
1311
|
+
ORDER BY created_at DESC
|
|
1312
|
+
LIMIT ?`,
|
|
1313
|
+
)
|
|
1314
|
+
.all(...args) as unknown as SummaryRow[];
|
|
1315
|
+
|
|
1316
|
+
const snippetTerms = cjkTerms.length > 0 ? [...new Set([...cjkTerms, ...latinTokens])] : latinTokens;
|
|
1317
|
+
return rows.map((row) => ({
|
|
1318
|
+
summaryId: row.summary_id,
|
|
1319
|
+
conversationId: row.conversation_id,
|
|
1320
|
+
kind: row.kind,
|
|
1321
|
+
snippet: createFallbackSnippet(row.content, snippetTerms),
|
|
953
1322
|
createdAt: new Date(row.created_at),
|
|
954
1323
|
rank: 0,
|
|
955
1324
|
}));
|
|
@@ -1014,7 +1383,7 @@ export class SummaryStore {
|
|
|
1014
1383
|
conversationId: row.conversation_id,
|
|
1015
1384
|
kind: row.kind,
|
|
1016
1385
|
snippet: match[0],
|
|
1017
|
-
createdAt:
|
|
1386
|
+
createdAt: parseUtcTimestamp(row.created_at),
|
|
1018
1387
|
rank: 0,
|
|
1019
1388
|
});
|
|
1020
1389
|
}
|
package/src/summarize.ts
CHANGED
|
@@ -33,6 +33,16 @@ type ResolvedSummaryCandidate = SummaryResolutionCandidate & {
|
|
|
33
33
|
model: string;
|
|
34
34
|
};
|
|
35
35
|
|
|
36
|
+
function buildSummarizerBreakerKey(params: {
|
|
37
|
+
candidate: ResolvedSummaryCandidate;
|
|
38
|
+
legacyAuthProfileId?: string;
|
|
39
|
+
}): string {
|
|
40
|
+
const authProfileId = params.candidate.useLegacyAuthProfile
|
|
41
|
+
? (params.legacyAuthProfileId ?? "-")
|
|
42
|
+
: "-";
|
|
43
|
+
return `provider:${params.candidate.provider};model:${params.candidate.model};authProfile:${authProfileId}`;
|
|
44
|
+
}
|
|
45
|
+
|
|
36
46
|
type SummaryMode = "normal" | "aggressive";
|
|
37
47
|
|
|
38
48
|
const DEFAULT_LEAF_TARGET_TOKENS = 2400;
|
|
@@ -49,6 +59,18 @@ const AUTH_ERROR_TEXT_PATTERN =
|
|
|
49
59
|
/\b401\b|unauthorized|unauthorised|invalid[_ -]?token|invalid[_ -]?api[_ -]?key|authentication failed|authorization failed|missing scope|insufficient scope|model\.request\b/i;
|
|
50
60
|
const AUTH_ERROR_STATUS_KEYS = ["status", "statusCode", "status_code"] as const;
|
|
51
61
|
const AUTH_ERROR_NESTED_KEYS = ["error", "response", "cause", "details", "data", "body"] as const;
|
|
62
|
+
const AUTH_ERROR_TOP_LEVEL_KEYS = [
|
|
63
|
+
"error",
|
|
64
|
+
"errorMessage",
|
|
65
|
+
"status",
|
|
66
|
+
"statusCode",
|
|
67
|
+
"status_code",
|
|
68
|
+
"code",
|
|
69
|
+
"details",
|
|
70
|
+
"cause",
|
|
71
|
+
"data",
|
|
72
|
+
"body",
|
|
73
|
+
] as const;
|
|
52
74
|
|
|
53
75
|
type ProviderAuthFailure = {
|
|
54
76
|
statusCode?: number;
|
|
@@ -411,6 +433,21 @@ function extractAuthFailureStatusCode(value: unknown, depth = 0): number | undef
|
|
|
411
433
|
return undefined;
|
|
412
434
|
}
|
|
413
435
|
|
|
436
|
+
function hasTopLevelAuthInspectionKeys(value: Record<string, unknown>): boolean {
|
|
437
|
+
return AUTH_ERROR_TOP_LEVEL_KEYS.some((key) => key in value);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
function looksLikeThrownError(value: Record<string, unknown>): boolean {
|
|
441
|
+
return (
|
|
442
|
+
(typeof value.name === "string" && /\berror\b/i.test(value.name)) ||
|
|
443
|
+
"stack" in value ||
|
|
444
|
+
(typeof value.message === "string" &&
|
|
445
|
+
!("content" in value) &&
|
|
446
|
+
!("response" in value) &&
|
|
447
|
+
!("output" in value))
|
|
448
|
+
);
|
|
449
|
+
}
|
|
450
|
+
|
|
414
451
|
function pickAuthInspectionValue(value: unknown): unknown {
|
|
415
452
|
if (!isRecord(value)) {
|
|
416
453
|
return value;
|
|
@@ -420,26 +457,43 @@ function pickAuthInspectionValue(value: unknown): unknown {
|
|
|
420
457
|
}
|
|
421
458
|
|
|
422
459
|
const subset: Record<string, unknown> = {};
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
"status",
|
|
428
|
-
"statusCode",
|
|
429
|
-
"status_code",
|
|
430
|
-
"code",
|
|
431
|
-
"details",
|
|
432
|
-
"response",
|
|
433
|
-
"cause",
|
|
434
|
-
]) {
|
|
460
|
+
const hasTopLevelAuthKeys = hasTopLevelAuthInspectionKeys(value);
|
|
461
|
+
const errorLike = value instanceof Error || looksLikeThrownError(value);
|
|
462
|
+
|
|
463
|
+
for (const key of AUTH_ERROR_TOP_LEVEL_KEYS) {
|
|
435
464
|
if (key in value) {
|
|
436
465
|
subset[key] = value[key];
|
|
437
466
|
}
|
|
438
467
|
}
|
|
439
|
-
|
|
468
|
+
|
|
469
|
+
// Only inspect top-level message payloads when the envelope already looks
|
|
470
|
+
// error-shaped. Successful summary responses also use `message`.
|
|
471
|
+
if ((hasTopLevelAuthKeys || errorLike) && "message" in value) {
|
|
472
|
+
subset.message = value.message;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// `response` can carry either an error payload or successful summary text.
|
|
476
|
+
// Include it only when the surrounding or nested shape already looks like an
|
|
477
|
+
// error envelope.
|
|
478
|
+
if ("response" in value) {
|
|
479
|
+
const response = value.response;
|
|
480
|
+
if (
|
|
481
|
+
hasTopLevelAuthKeys ||
|
|
482
|
+
(isRecord(response) && hasTopLevelAuthInspectionKeys(response)) ||
|
|
483
|
+
(isRecord(response) && looksLikeThrownError(response))
|
|
484
|
+
) {
|
|
485
|
+
subset.response = response;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
return Object.keys(subset).length > 0 ? subset : {};
|
|
440
490
|
}
|
|
441
491
|
|
|
442
|
-
|
|
492
|
+
/** @internal Exported for testing only. */
|
|
493
|
+
export function extractProviderAuthFailure(
|
|
494
|
+
value: unknown,
|
|
495
|
+
opts?: { requireStructuralSignal?: boolean },
|
|
496
|
+
): ProviderAuthFailure | undefined {
|
|
443
497
|
const inspectValue = pickAuthInspectionValue(value);
|
|
444
498
|
const statusCode = extractAuthFailureStatusCode(inspectValue);
|
|
445
499
|
const textParts: string[] = [];
|
|
@@ -449,7 +503,20 @@ function extractProviderAuthFailure(value: unknown): ProviderAuthFailure | undef
|
|
|
449
503
|
const hasScopeSignal =
|
|
450
504
|
missingModelRequestScope || /\b(missing|insufficient)\s+scope\b/i.test(normalizedMessage);
|
|
451
505
|
|
|
452
|
-
|
|
506
|
+
// When requireStructuralSignal is set (e.g. checking a successful API response
|
|
507
|
+
// rather than a caught error), only detect auth failures that have a concrete
|
|
508
|
+
// structural indicator (HTTP 401 status code or an explicit provider_auth error
|
|
509
|
+
// kind). Plain text matches in the response body are NOT sufficient — the LLM
|
|
510
|
+
// summary content may legitimately discuss auth errors without being one.
|
|
511
|
+
const hasExplicitErrorKind =
|
|
512
|
+
isRecord(value) && isRecord((value as Record<string, unknown>).error) &&
|
|
513
|
+
((value as Record<string, unknown>).error as Record<string, unknown>).kind === "provider_auth";
|
|
514
|
+
|
|
515
|
+
if (opts?.requireStructuralSignal) {
|
|
516
|
+
if (statusCode !== 401 && !hasExplicitErrorKind) {
|
|
517
|
+
return undefined;
|
|
518
|
+
}
|
|
519
|
+
} else if (statusCode !== 401 && !hasScopeSignal && !AUTH_ERROR_TEXT_PATTERN.test(normalizedMessage)) {
|
|
453
520
|
return undefined;
|
|
454
521
|
}
|
|
455
522
|
|
|
@@ -1041,7 +1108,7 @@ export async function createLcmSummarizeFromLegacyParams(params: {
|
|
|
1041
1108
|
deps: LcmDependencies;
|
|
1042
1109
|
legacyParams: LcmSummarizerLegacyParams;
|
|
1043
1110
|
customInstructions?: string;
|
|
1044
|
-
}): Promise<{ fn: LcmSummarizeFn; model: string } | undefined> {
|
|
1111
|
+
}): Promise<{ fn: LcmSummarizeFn; model: string; breakerKey: string } | undefined> {
|
|
1045
1112
|
const resolvedCandidates = resolveSummaryCandidates(params);
|
|
1046
1113
|
if (resolvedCandidates.length === 0) {
|
|
1047
1114
|
console.error("[lcm] createLcmSummarize: no summary model candidates resolved");
|
|
@@ -1168,7 +1235,11 @@ export async function createLcmSummarizeFromLegacyParams(params: {
|
|
|
1168
1235
|
|
|
1169
1236
|
try {
|
|
1170
1237
|
const directResult = await runSummarizerCall(directApiKey, "auth-retry", reasoning);
|
|
1171
|
-
|
|
1238
|
+
// Use requireStructuralSignal on the retry success path too — the
|
|
1239
|
+
// summary text may legitimately contain auth-error phrases.
|
|
1240
|
+
const directFailure = extractProviderAuthFailure(directResult, {
|
|
1241
|
+
requireStructuralSignal: true,
|
|
1242
|
+
});
|
|
1172
1243
|
if (directFailure) {
|
|
1173
1244
|
const retryAuthError = new LcmProviderAuthError({
|
|
1174
1245
|
provider,
|
|
@@ -1186,7 +1257,11 @@ export async function createLcmSummarizeFromLegacyParams(params: {
|
|
|
1186
1257
|
if (directErr instanceof LcmProviderAuthError) {
|
|
1187
1258
|
throw directErr;
|
|
1188
1259
|
}
|
|
1189
|
-
|
|
1260
|
+
// Catch path: real errors carry structural signals (HTTP 401, error.kind),
|
|
1261
|
+
// so requireStructuralSignal is safe here too.
|
|
1262
|
+
const directFailure = extractProviderAuthFailure(directErr, {
|
|
1263
|
+
requireStructuralSignal: true,
|
|
1264
|
+
});
|
|
1190
1265
|
if (directFailure) {
|
|
1191
1266
|
const retryAuthError = new LcmProviderAuthError({
|
|
1192
1267
|
provider,
|
|
@@ -1207,7 +1282,12 @@ export async function createLcmSummarizeFromLegacyParams(params: {
|
|
|
1207
1282
|
const apiKey = await params.deps.getApiKey(provider, model, lookupOptions);
|
|
1208
1283
|
try {
|
|
1209
1284
|
const result = await runSummarizerCall(apiKey, label, reasoning);
|
|
1210
|
-
|
|
1285
|
+
// Use requireStructuralSignal so that LLM summary text containing
|
|
1286
|
+
// auth-related words (e.g. "provider auth error") is NOT mistaken
|
|
1287
|
+
// for an actual API auth failure.
|
|
1288
|
+
const authFailure = extractProviderAuthFailure(result, {
|
|
1289
|
+
requireStructuralSignal: true,
|
|
1290
|
+
});
|
|
1211
1291
|
if (!authFailure) {
|
|
1212
1292
|
return result;
|
|
1213
1293
|
}
|
|
@@ -1387,5 +1467,12 @@ export async function createLcmSummarizeFromLegacyParams(params: {
|
|
|
1387
1467
|
return "";
|
|
1388
1468
|
};
|
|
1389
1469
|
|
|
1390
|
-
return {
|
|
1470
|
+
return {
|
|
1471
|
+
fn,
|
|
1472
|
+
model: resolvedCandidates[0]!.model,
|
|
1473
|
+
breakerKey: buildSummarizerBreakerKey({
|
|
1474
|
+
candidate: resolvedCandidates[0]!,
|
|
1475
|
+
legacyAuthProfileId,
|
|
1476
|
+
}),
|
|
1477
|
+
};
|
|
1391
1478
|
}
|