@martian-engineering/lossless-claw 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +19 -3
  2. package/dist/index.js +19240 -0
  3. package/docs/agent-tools.md +9 -4
  4. package/docs/configuration.md +24 -5
  5. package/openclaw.plugin.json +27 -3
  6. package/package.json +7 -6
  7. package/skills/lossless-claw/SKILL.md +3 -2
  8. package/skills/lossless-claw/references/architecture.md +12 -0
  9. package/skills/lossless-claw/references/config.md +37 -0
  10. package/skills/lossless-claw/references/diagnostics.md +13 -0
  11. package/index.ts +0 -2
  12. package/src/assembler.ts +0 -1188
  13. package/src/compaction.ts +0 -1756
  14. package/src/db/config.ts +0 -345
  15. package/src/db/connection.ts +0 -141
  16. package/src/db/features.ts +0 -42
  17. package/src/db/migration.ts +0 -746
  18. package/src/engine.ts +0 -4306
  19. package/src/expansion-auth.ts +0 -365
  20. package/src/expansion-policy.ts +0 -303
  21. package/src/expansion.ts +0 -383
  22. package/src/integrity.ts +0 -600
  23. package/src/large-files.ts +0 -546
  24. package/src/lcm-log.ts +0 -37
  25. package/src/openclaw-bridge.ts +0 -22
  26. package/src/plugin/index.ts +0 -1960
  27. package/src/plugin/lcm-command.ts +0 -765
  28. package/src/plugin/lcm-doctor-apply.ts +0 -542
  29. package/src/plugin/lcm-doctor-shared.ts +0 -210
  30. package/src/plugin/shared-init.ts +0 -59
  31. package/src/prune.ts +0 -391
  32. package/src/retrieval.ts +0 -363
  33. package/src/session-patterns.ts +0 -23
  34. package/src/startup-banner-log.ts +0 -49
  35. package/src/store/compaction-telemetry-store.ts +0 -156
  36. package/src/store/conversation-store.ts +0 -929
  37. package/src/store/fts5-sanitize.ts +0 -50
  38. package/src/store/full-text-fallback.ts +0 -83
  39. package/src/store/full-text-sort.ts +0 -21
  40. package/src/store/index.ts +0 -39
  41. package/src/store/parse-utc-timestamp.ts +0 -25
  42. package/src/store/summary-store.ts +0 -1519
  43. package/src/summarize.ts +0 -1511
  44. package/src/tools/common.ts +0 -53
  45. package/src/tools/lcm-conversation-scope.ts +0 -127
  46. package/src/tools/lcm-describe-tool.ts +0 -245
  47. package/src/tools/lcm-expand-query-tool.ts +0 -831
  48. package/src/tools/lcm-expand-tool.delegation.ts +0 -580
  49. package/src/tools/lcm-expand-tool.ts +0 -453
  50. package/src/tools/lcm-expansion-recursion-guard.ts +0 -373
  51. package/src/tools/lcm-grep-tool.ts +0 -228
  52. package/src/transaction-mutex.ts +0 -136
  53. package/src/transcript-repair.ts +0 -301
  54. package/src/types.ts +0 -165
@@ -1,1519 +0,0 @@
1
- import type { DatabaseSync } from "node:sqlite";
2
- import { withDatabaseTransaction } from "../transaction-mutex.js";
3
- import { sanitizeFts5Query } from "./fts5-sanitize.js";
4
- import { buildLikeSearchPlan, containsCjk, createFallbackSnippet } from "./full-text-fallback.js";
5
- import { parseUtcTimestamp, parseUtcTimestampOrNull } from "./parse-utc-timestamp.js";
6
- import { buildFtsOrderBy, type SearchSort } from "./full-text-sort.js";
7
-
8
- export type SummaryKind = "leaf" | "condensed";
9
- export type ContextItemType = "message" | "summary";
10
-
11
- export type CreateSummaryInput = {
12
- summaryId: string;
13
- conversationId: number;
14
- kind: SummaryKind;
15
- depth?: number;
16
- content: string;
17
- tokenCount: number;
18
- fileIds?: string[];
19
- earliestAt?: Date;
20
- latestAt?: Date;
21
- descendantCount?: number;
22
- descendantTokenCount?: number;
23
- sourceMessageTokenCount?: number;
24
- model?: string;
25
- };
26
-
27
- export type SummaryRecord = {
28
- summaryId: string;
29
- conversationId: number;
30
- kind: SummaryKind;
31
- depth: number;
32
- content: string;
33
- tokenCount: number;
34
- fileIds: string[];
35
- earliestAt: Date | null;
36
- latestAt: Date | null;
37
- descendantCount: number;
38
- descendantTokenCount: number;
39
- sourceMessageTokenCount: number;
40
- model: string;
41
- createdAt: Date;
42
- };
43
-
44
- export type SummarySubtreeNodeRecord = SummaryRecord & {
45
- depthFromRoot: number;
46
- parentSummaryId: string | null;
47
- path: string;
48
- childCount: number;
49
- };
50
-
51
- export type MessageLeafSummaryLinkRecord = {
52
- messageId: number;
53
- summaryId: string;
54
- };
55
-
56
- export type ContextItemRecord = {
57
- conversationId: number;
58
- ordinal: number;
59
- itemType: ContextItemType;
60
- messageId: number | null;
61
- summaryId: string | null;
62
- createdAt: Date;
63
- };
64
-
65
- export type SummarySearchInput = {
66
- conversationId?: number;
67
- query: string;
68
- mode: "regex" | "full_text";
69
- since?: Date;
70
- before?: Date;
71
- limit?: number;
72
- sort?: SearchSort;
73
- };
74
-
75
- export type SummarySearchResult = {
76
- summaryId: string;
77
- conversationId: number;
78
- kind: SummaryKind;
79
- snippet: string;
80
- createdAt: Date;
81
- rank?: number;
82
- };
83
-
84
- export type CreateLargeFileInput = {
85
- fileId: string;
86
- conversationId: number;
87
- fileName?: string;
88
- mimeType?: string;
89
- byteSize?: number;
90
- storageUri: string;
91
- explorationSummary?: string;
92
- };
93
-
94
- export type LargeFileRecord = {
95
- fileId: string;
96
- conversationId: number;
97
- fileName: string | null;
98
- mimeType: string | null;
99
- byteSize: number | null;
100
- storageUri: string;
101
- explorationSummary: string | null;
102
- createdAt: Date;
103
- };
104
-
105
- export type UpsertConversationBootstrapStateInput = {
106
- conversationId: number;
107
- sessionFilePath: string;
108
- lastSeenSize: number;
109
- lastSeenMtimeMs: number;
110
- lastProcessedOffset: number;
111
- lastProcessedEntryHash?: string | null;
112
- };
113
-
114
- export type ConversationBootstrapStateRecord = {
115
- conversationId: number;
116
- sessionFilePath: string;
117
- lastSeenSize: number;
118
- lastSeenMtimeMs: number;
119
- lastProcessedOffset: number;
120
- lastProcessedEntryHash: string | null;
121
- updatedAt: Date;
122
- };
123
-
124
- export type TranscriptGcCandidateRecord = {
125
- messageId: number;
126
- conversationId: number;
127
- seq: number;
128
- toolCallId: string;
129
- toolName: string | null;
130
- externalizedFileId: string | null;
131
- originalByteSize: number | null;
132
- };
133
-
134
- // ── DB row shapes (snake_case) ────────────────────────────────────────────────
135
-
136
- interface SummaryRow {
137
- summary_id: string;
138
- conversation_id: number;
139
- kind: SummaryKind;
140
- depth: number;
141
- content: string;
142
- token_count: number;
143
- file_ids: string;
144
- earliest_at: string | null;
145
- latest_at: string | null;
146
- descendant_count: number | null;
147
- descendant_token_count: number | null;
148
- source_message_token_count: number | null;
149
- model: string | null;
150
- created_at: string;
151
- }
152
-
153
- interface SummarySubtreeRow extends SummaryRow {
154
- depth_from_root: number;
155
- parent_summary_id: string | null;
156
- path: string;
157
- child_count: number | null;
158
- }
159
-
160
- interface ContextItemRow {
161
- conversation_id: number;
162
- ordinal: number;
163
- item_type: ContextItemType;
164
- message_id: number | null;
165
- summary_id: string | null;
166
- created_at: string;
167
- }
168
-
169
- interface SummarySearchRow {
170
- summary_id: string;
171
- conversation_id: number;
172
- kind: SummaryKind;
173
- snippet: string;
174
- rank: number;
175
- created_at: string;
176
- }
177
-
178
- interface MaxOrdinalRow {
179
- max_ordinal: number;
180
- }
181
-
182
- interface DistinctDepthRow {
183
- depth: number;
184
- }
185
-
186
- interface TokenSumRow {
187
- total: number;
188
- }
189
-
190
- interface MessageIdRow {
191
- message_id: number;
192
- }
193
-
194
- interface MaxDepthRow {
195
- max_depth: number | null;
196
- }
197
-
198
- interface MessageLeafSummaryLinkRow {
199
- message_id: number;
200
- summary_id: string;
201
- }
202
-
203
- interface LargeFileRow {
204
- file_id: string;
205
- conversation_id: number;
206
- file_name: string | null;
207
- mime_type: string | null;
208
- byte_size: number | null;
209
- storage_uri: string;
210
- exploration_summary: string | null;
211
- created_at: string;
212
- }
213
-
214
- interface ConversationBootstrapStateRow {
215
- conversation_id: number;
216
- session_file_path: string;
217
- last_seen_size: number;
218
- last_seen_mtime_ms: number;
219
- last_processed_offset: number;
220
- last_processed_entry_hash: string | null;
221
- updated_at: string;
222
- }
223
-
224
- const CJK_QUERY_SEGMENT_RE =
225
- /[\u2E80-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\uAC00-\uD7AF\u3040-\u309F\u30A0-\u30FF]+/g;
226
- const LATIN_QUERY_TOKEN_RE = /[a-zA-Z0-9][\w./-]*/g;
227
- interface TranscriptGcCandidateRow {
228
- message_id: number;
229
- conversation_id: number;
230
- seq: number;
231
- tool_call_id: string | null;
232
- tool_name: string | null;
233
- metadata: string | null;
234
- }
235
- // ── Row mappers ───────────────────────────────────────────────────────────────
236
-
237
- function toSummaryRecord(row: SummaryRow): SummaryRecord {
238
- let fileIds: string[] = [];
239
- try {
240
- fileIds = JSON.parse(row.file_ids);
241
- } catch {
242
- // ignore malformed JSON
243
- }
244
- return {
245
- summaryId: row.summary_id,
246
- conversationId: row.conversation_id,
247
- kind: row.kind,
248
- depth: row.depth,
249
- content: row.content,
250
- tokenCount: row.token_count,
251
- fileIds,
252
- earliestAt: parseUtcTimestampOrNull(row.earliest_at),
253
- latestAt: parseUtcTimestampOrNull(row.latest_at),
254
- descendantCount:
255
- typeof row.descendant_count === "number" &&
256
- Number.isFinite(row.descendant_count) &&
257
- row.descendant_count >= 0
258
- ? Math.floor(row.descendant_count)
259
- : 0,
260
- descendantTokenCount:
261
- typeof row.descendant_token_count === "number" &&
262
- Number.isFinite(row.descendant_token_count) &&
263
- row.descendant_token_count >= 0
264
- ? Math.floor(row.descendant_token_count)
265
- : 0,
266
- sourceMessageTokenCount:
267
- typeof row.source_message_token_count === "number" &&
268
- Number.isFinite(row.source_message_token_count) &&
269
- row.source_message_token_count >= 0
270
- ? Math.floor(row.source_message_token_count)
271
- : 0,
272
- model: typeof row.model === "string" ? row.model : "unknown",
273
- createdAt: parseUtcTimestamp(row.created_at),
274
- };
275
- }
276
-
277
- function toContextItemRecord(row: ContextItemRow): ContextItemRecord {
278
- return {
279
- conversationId: row.conversation_id,
280
- ordinal: row.ordinal,
281
- itemType: row.item_type,
282
- messageId: row.message_id,
283
- summaryId: row.summary_id,
284
- createdAt: parseUtcTimestamp(row.created_at),
285
- };
286
- }
287
-
288
- function toSearchResult(row: SummarySearchRow): SummarySearchResult {
289
- return {
290
- summaryId: row.summary_id,
291
- conversationId: row.conversation_id,
292
- kind: row.kind,
293
- snippet: row.snippet,
294
- createdAt: parseUtcTimestamp(row.created_at),
295
- rank: row.rank,
296
- };
297
- }
298
-
299
- function toLargeFileRecord(row: LargeFileRow): LargeFileRecord {
300
- return {
301
- fileId: row.file_id,
302
- conversationId: row.conversation_id,
303
- fileName: row.file_name,
304
- mimeType: row.mime_type,
305
- byteSize: row.byte_size,
306
- storageUri: row.storage_uri,
307
- explorationSummary: row.exploration_summary,
308
- createdAt: parseUtcTimestamp(row.created_at),
309
- };
310
- }
311
-
312
- function toConversationBootstrapStateRecord(
313
- row: ConversationBootstrapStateRow,
314
- ): ConversationBootstrapStateRecord {
315
- return {
316
- conversationId: row.conversation_id,
317
- sessionFilePath: row.session_file_path,
318
- lastSeenSize: row.last_seen_size,
319
- lastSeenMtimeMs: row.last_seen_mtime_ms,
320
- lastProcessedOffset: row.last_processed_offset,
321
- lastProcessedEntryHash: row.last_processed_entry_hash,
322
- updatedAt: parseUtcTimestamp(row.updated_at),
323
- };
324
- }
325
-
326
- function toTranscriptGcCandidateRecord(
327
- row: TranscriptGcCandidateRow,
328
- ): TranscriptGcCandidateRecord | null {
329
- if (typeof row.tool_call_id !== "string" || row.tool_call_id.length === 0) {
330
- return null;
331
- }
332
-
333
- let metadata: Record<string, unknown> | null = null;
334
- try {
335
- metadata =
336
- typeof row.metadata === "string" && row.metadata.length > 0
337
- ? (JSON.parse(row.metadata) as Record<string, unknown>)
338
- : null;
339
- } catch {
340
- metadata = null;
341
- }
342
-
343
- if (!metadata || metadata.toolOutputExternalized !== true) {
344
- return null;
345
- }
346
-
347
- return {
348
- messageId: row.message_id,
349
- conversationId: row.conversation_id,
350
- seq: row.seq,
351
- toolCallId: row.tool_call_id,
352
- toolName: row.tool_name,
353
- externalizedFileId:
354
- typeof metadata.externalizedFileId === "string" ? metadata.externalizedFileId : null,
355
- originalByteSize:
356
- typeof metadata.originalByteSize === "number" && Number.isFinite(metadata.originalByteSize)
357
- ? Math.max(0, Math.floor(metadata.originalByteSize))
358
- : null,
359
- };
360
- }
361
-
362
- // ── SummaryStore ──────────────────────────────────────────────────────────────
363
-
364
- export class SummaryStore {
365
- private readonly fts5Available: boolean;
366
-
367
- constructor(
368
- private db: DatabaseSync,
369
- options?: { fts5Available?: boolean },
370
- ) {
371
- this.fts5Available = options?.fts5Available ?? true;
372
- }
373
-
374
- // ── Summary CRUD ──────────────────────────────────────────────────────────
375
-
376
- async insertSummary(input: CreateSummaryInput): Promise<SummaryRecord> {
377
- const fileIds = JSON.stringify(input.fileIds ?? []);
378
- const earliestAt = input.earliestAt instanceof Date ? input.earliestAt.toISOString() : null;
379
- const latestAt = input.latestAt instanceof Date ? input.latestAt.toISOString() : null;
380
- const descendantCount =
381
- typeof input.descendantCount === "number" &&
382
- Number.isFinite(input.descendantCount) &&
383
- input.descendantCount >= 0
384
- ? Math.floor(input.descendantCount)
385
- : 0;
386
- const descendantTokenCount =
387
- typeof input.descendantTokenCount === "number" &&
388
- Number.isFinite(input.descendantTokenCount) &&
389
- input.descendantTokenCount >= 0
390
- ? Math.floor(input.descendantTokenCount)
391
- : 0;
392
- const sourceMessageTokenCount =
393
- typeof input.sourceMessageTokenCount === "number" &&
394
- Number.isFinite(input.sourceMessageTokenCount) &&
395
- input.sourceMessageTokenCount >= 0
396
- ? Math.floor(input.sourceMessageTokenCount)
397
- : 0;
398
- const depth =
399
- typeof input.depth === "number" && Number.isFinite(input.depth) && input.depth >= 0
400
- ? Math.floor(input.depth)
401
- : input.kind === "leaf"
402
- ? 0
403
- : 1;
404
-
405
- this.db
406
- .prepare(
407
- `INSERT INTO summaries (
408
- summary_id,
409
- conversation_id,
410
- kind,
411
- depth,
412
- content,
413
- token_count,
414
- file_ids,
415
- earliest_at,
416
- latest_at,
417
- descendant_count,
418
- descendant_token_count,
419
- source_message_token_count,
420
- model
421
- )
422
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
423
- )
424
- .run(
425
- input.summaryId,
426
- input.conversationId,
427
- input.kind,
428
- depth,
429
- input.content,
430
- input.tokenCount,
431
- fileIds,
432
- earliestAt,
433
- latestAt,
434
- descendantCount,
435
- descendantTokenCount,
436
- sourceMessageTokenCount,
437
- input.model ?? "unknown",
438
- );
439
-
440
- const row = this.db
441
- .prepare(
442
- `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
443
- earliest_at, latest_at, descendant_count, created_at
444
- , descendant_token_count, source_message_token_count, model
445
- FROM summaries WHERE summary_id = ?`,
446
- )
447
- .get(input.summaryId) as unknown as SummaryRow;
448
-
449
- // Index in FTS5 as best-effort; compaction flow must continue even if
450
- // FTS indexing fails for any reason.
451
- if (!this.fts5Available) {
452
- return toSummaryRecord(row);
453
- }
454
-
455
- try {
456
- this.db
457
- .prepare(`INSERT INTO summaries_fts(summary_id, content) VALUES (?, ?)`)
458
- .run(input.summaryId, input.content);
459
- } catch {
460
- // FTS indexing failed — search won't find this summary but
461
- // compaction and assembly will still work correctly.
462
- }
463
-
464
- // Also index into the CJK trigram FTS table for CJK substring search.
465
- try {
466
- this.db
467
- .prepare(
468
- `INSERT INTO summaries_fts_cjk(summary_id, content) VALUES (?, ?)`,
469
- )
470
- .run(input.summaryId, input.content);
471
- } catch {
472
- // CJK trigram FTS table may not exist yet (pre-migration); ignore.
473
- }
474
-
475
- return toSummaryRecord(row);
476
- }
477
-
478
- async getSummary(summaryId: string): Promise<SummaryRecord | null> {
479
- const row = this.db
480
- .prepare(
481
- `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
482
- earliest_at, latest_at, descendant_count, created_at
483
- , descendant_token_count, source_message_token_count, model
484
- FROM summaries WHERE summary_id = ?`,
485
- )
486
- .get(summaryId) as unknown as SummaryRow | undefined;
487
- return row ? toSummaryRecord(row) : null;
488
- }
489
-
490
- async getSummariesByConversation(conversationId: number): Promise<SummaryRecord[]> {
491
- const rows = this.db
492
- .prepare(
493
- `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
494
- earliest_at, latest_at, descendant_count, created_at
495
- , descendant_token_count, source_message_token_count, model
496
- FROM summaries
497
- WHERE conversation_id = ?
498
- ORDER BY created_at`,
499
- )
500
- .all(conversationId) as unknown as SummaryRow[];
501
- return rows.map(toSummaryRecord);
502
- }
503
-
504
- // ── Lineage ───────────────────────────────────────────────────────────────
505
-
506
- async linkSummaryToMessages(summaryId: string, messageIds: number[]): Promise<void> {
507
- if (messageIds.length === 0) {
508
- return;
509
- }
510
-
511
- const stmt = this.db.prepare(
512
- `INSERT INTO summary_messages (summary_id, message_id, ordinal)
513
- VALUES (?, ?, ?)
514
- ON CONFLICT (summary_id, message_id) DO NOTHING`,
515
- );
516
-
517
- for (let idx = 0; idx < messageIds.length; idx++) {
518
- stmt.run(summaryId, messageIds[idx], idx);
519
- }
520
- }
521
-
522
- async linkSummaryToParents(summaryId: string, parentSummaryIds: string[]): Promise<void> {
523
- if (parentSummaryIds.length === 0) {
524
- return;
525
- }
526
-
527
- const stmt = this.db.prepare(
528
- `INSERT INTO summary_parents (summary_id, parent_summary_id, ordinal)
529
- VALUES (?, ?, ?)
530
- ON CONFLICT (summary_id, parent_summary_id) DO NOTHING`,
531
- );
532
-
533
- for (let idx = 0; idx < parentSummaryIds.length; idx++) {
534
- stmt.run(summaryId, parentSummaryIds[idx], idx);
535
- }
536
- }
537
-
538
- async getSummaryMessages(summaryId: string): Promise<number[]> {
539
- const rows = this.db
540
- .prepare(
541
- `SELECT message_id FROM summary_messages
542
- WHERE summary_id = ?
543
- ORDER BY ordinal`,
544
- )
545
- .all(summaryId) as unknown as MessageIdRow[];
546
- return rows.map((r) => r.message_id);
547
- }
548
-
549
- /**
550
- * Return the deepest persisted summary depth for a conversation.
551
- */
552
- async getConversationMaxSummaryDepth(conversationId: number): Promise<number | null> {
553
- const row = this.db
554
- .prepare(
555
- `SELECT MAX(depth) AS max_depth
556
- FROM summaries
557
- WHERE conversation_id = ?`,
558
- )
559
- .get(conversationId) as unknown as MaxDepthRow | undefined;
560
- return typeof row?.max_depth === "number" ? row.max_depth : null;
561
- }
562
-
563
- /**
564
- * Resolve raw message hits back to their linked leaf summaries.
565
- */
566
- async getLeafSummaryLinksForMessageIds(
567
- conversationId: number,
568
- messageIds: number[],
569
- ): Promise<MessageLeafSummaryLinkRecord[]> {
570
- const normalizedMessageIds = Array.from(
571
- new Set(
572
- messageIds.filter(
573
- (messageId): messageId is number => Number.isInteger(messageId) && messageId > 0,
574
- ),
575
- ),
576
- );
577
- if (normalizedMessageIds.length === 0) {
578
- return [];
579
- }
580
-
581
- const placeholders = normalizedMessageIds.map(() => "?").join(", ");
582
- const rows = this.db
583
- .prepare(
584
- `SELECT sm.message_id, sm.summary_id
585
- FROM summary_messages sm
586
- JOIN summaries s ON s.summary_id = sm.summary_id
587
- WHERE s.conversation_id = ?
588
- AND s.kind = 'leaf'
589
- AND sm.message_id IN (${placeholders})
590
- ORDER BY sm.ordinal ASC, s.created_at ASC`,
591
- )
592
- .all(conversationId, ...normalizedMessageIds) as unknown as MessageLeafSummaryLinkRow[];
593
-
594
- const summaryIdsByMessageId = new Map<number, string[]>();
595
- for (const row of rows) {
596
- const existing = summaryIdsByMessageId.get(row.message_id) ?? [];
597
- if (!existing.includes(row.summary_id)) {
598
- existing.push(row.summary_id);
599
- summaryIdsByMessageId.set(row.message_id, existing);
600
- }
601
- }
602
-
603
- const orderedLinks: MessageLeafSummaryLinkRecord[] = [];
604
- for (const messageId of normalizedMessageIds) {
605
- for (const summaryId of summaryIdsByMessageId.get(messageId) ?? []) {
606
- orderedLinks.push({
607
- messageId,
608
- summaryId,
609
- });
610
- }
611
- }
612
- return orderedLinks;
613
- }
614
- /**
615
- * Return summarized tool-result messages that are safe candidates for
616
- * transcript GC because they are no longer present as raw context items.
617
- */
618
- async listTranscriptGcCandidates(
619
- conversationId: number,
620
- options?: { limit?: number },
621
- ): Promise<TranscriptGcCandidateRecord[]> {
622
- const limit =
623
- typeof options?.limit === "number" && Number.isFinite(options.limit) && options.limit > 0
624
- ? Math.max(1, Math.floor(options.limit))
625
- : 25;
626
-
627
- const rows = this.db
628
- .prepare(
629
- `SELECT
630
- m.message_id,
631
- m.conversation_id,
632
- m.seq,
633
- mp.tool_call_id,
634
- mp.tool_name,
635
- mp.metadata
636
- FROM messages m
637
- JOIN message_parts mp
638
- ON mp.message_id = m.message_id
639
- WHERE m.conversation_id = ?
640
- AND m.role = 'tool'
641
- AND mp.part_type = 'tool'
642
- AND mp.tool_call_id IS NOT NULL
643
- AND mp.tool_call_id != ''
644
- AND EXISTS (
645
- SELECT 1
646
- FROM summary_messages sm
647
- WHERE sm.message_id = m.message_id
648
- )
649
- AND NOT EXISTS (
650
- SELECT 1
651
- FROM context_items ci
652
- WHERE ci.conversation_id = m.conversation_id
653
- AND ci.item_type = 'message'
654
- AND ci.message_id = m.message_id
655
- )
656
- ORDER BY m.seq ASC, mp.ordinal ASC`,
657
- )
658
- .all(conversationId) as unknown as TranscriptGcCandidateRow[];
659
-
660
- const seenMessageIds = new Set<number>();
661
- const candidates: TranscriptGcCandidateRecord[] = [];
662
- for (const row of rows) {
663
- if (seenMessageIds.has(row.message_id)) {
664
- continue;
665
- }
666
- const candidate = toTranscriptGcCandidateRecord(row);
667
- if (!candidate) {
668
- continue;
669
- }
670
- seenMessageIds.add(candidate.messageId);
671
- candidates.push(candidate);
672
- if (candidates.length >= limit) {
673
- break;
674
- }
675
- }
676
-
677
- return candidates;
678
- }
679
- async getSummaryChildren(parentSummaryId: string): Promise<SummaryRecord[]> {
680
- const rows = this.db
681
- .prepare(
682
- `SELECT s.summary_id, s.conversation_id, s.kind, s.depth, s.content, s.token_count,
683
- s.file_ids, s.earliest_at, s.latest_at, s.descendant_count, s.created_at
684
- , s.descendant_token_count, s.source_message_token_count, s.model
685
- FROM summaries s
686
- JOIN summary_parents sp ON sp.summary_id = s.summary_id
687
- WHERE sp.parent_summary_id = ?
688
- ORDER BY sp.ordinal`,
689
- )
690
- .all(parentSummaryId) as unknown as SummaryRow[];
691
- return rows.map(toSummaryRecord);
692
- }
693
-
694
- // NOTE: historical naming is confusing here.
695
- // getSummaryParents(summaryId) returns the source summaries compacted into
696
- // `summaryId`. Expansion should use this direction for replay.
697
- async getSummaryParents(summaryId: string): Promise<SummaryRecord[]> {
698
- const rows = this.db
699
- .prepare(
700
- `SELECT s.summary_id, s.conversation_id, s.kind, s.depth, s.content, s.token_count,
701
- s.file_ids, s.earliest_at, s.latest_at, s.descendant_count, s.created_at
702
- , s.descendant_token_count, s.source_message_token_count, s.model
703
- FROM summaries s
704
- JOIN summary_parents sp ON sp.parent_summary_id = s.summary_id
705
- WHERE sp.summary_id = ?
706
- ORDER BY sp.ordinal`,
707
- )
708
- .all(summaryId) as unknown as SummaryRow[];
709
- return rows.map(toSummaryRecord);
710
- }
711
-
712
- async getSummarySubtree(summaryId: string): Promise<SummarySubtreeNodeRecord[]> {
713
- const rows = this.db
714
- .prepare(
715
- `WITH RECURSIVE subtree(summary_id, parent_summary_id, depth_from_root, path) AS (
716
- SELECT ?, NULL, 0, ''
717
- UNION ALL
718
- SELECT
719
- sp.summary_id,
720
- sp.parent_summary_id,
721
- subtree.depth_from_root + 1,
722
- CASE
723
- WHEN subtree.path = '' THEN printf('%04d', sp.ordinal)
724
- ELSE subtree.path || '.' || printf('%04d', sp.ordinal)
725
- END
726
- FROM summary_parents sp
727
- JOIN subtree ON sp.parent_summary_id = subtree.summary_id
728
- )
729
- SELECT
730
- s.summary_id,
731
- s.conversation_id,
732
- s.kind,
733
- s.depth,
734
- s.content,
735
- s.token_count,
736
- s.file_ids,
737
- s.earliest_at,
738
- s.latest_at,
739
- s.descendant_count,
740
- s.descendant_token_count,
741
- s.source_message_token_count,
742
- s.model,
743
- s.created_at,
744
- subtree.depth_from_root,
745
- subtree.parent_summary_id,
746
- subtree.path,
747
- (
748
- SELECT COUNT(*) FROM summary_parents sp2
749
- WHERE sp2.parent_summary_id = s.summary_id
750
- ) AS child_count
751
- FROM subtree
752
- JOIN summaries s ON s.summary_id = subtree.summary_id
753
- ORDER BY subtree.depth_from_root ASC, subtree.path ASC, s.created_at ASC`,
754
- )
755
- .all(summaryId) as unknown as SummarySubtreeRow[];
756
-
757
- const seen = new Set<string>();
758
- const output: SummarySubtreeNodeRecord[] = [];
759
- for (const row of rows) {
760
- if (seen.has(row.summary_id)) {
761
- continue;
762
- }
763
- seen.add(row.summary_id);
764
- output.push({
765
- ...toSummaryRecord(row),
766
- depthFromRoot: Math.max(0, Math.floor(row.depth_from_root ?? 0)),
767
- parentSummaryId: row.parent_summary_id ?? null,
768
- path: typeof row.path === "string" ? row.path : "",
769
- childCount:
770
- typeof row.child_count === "number" && Number.isFinite(row.child_count)
771
- ? Math.max(0, Math.floor(row.child_count))
772
- : 0,
773
- });
774
- }
775
- return output;
776
- }
777
-
778
- // ── Context items ─────────────────────────────────────────────────────────
779
-
780
- async getContextItems(conversationId: number): Promise<ContextItemRecord[]> {
781
- const rows = this.db
782
- .prepare(
783
- `SELECT conversation_id, ordinal, item_type, message_id, summary_id, created_at
784
- FROM context_items
785
- WHERE conversation_id = ?
786
- ORDER BY ordinal`,
787
- )
788
- .all(conversationId) as unknown as ContextItemRow[];
789
- return rows.map(toContextItemRecord);
790
- }
791
-
792
- async getDistinctDepthsInContext(
793
- conversationId: number,
794
- options?: { maxOrdinalExclusive?: number },
795
- ): Promise<number[]> {
796
- const maxOrdinalExclusive = options?.maxOrdinalExclusive;
797
- const useOrdinalBound =
798
- typeof maxOrdinalExclusive === "number" &&
799
- Number.isFinite(maxOrdinalExclusive) &&
800
- maxOrdinalExclusive !== Infinity;
801
-
802
- const sql = useOrdinalBound
803
- ? `SELECT DISTINCT s.depth
804
- FROM context_items ci
805
- JOIN summaries s ON s.summary_id = ci.summary_id
806
- WHERE ci.conversation_id = ?
807
- AND ci.item_type = 'summary'
808
- AND ci.ordinal < ?
809
- ORDER BY s.depth ASC`
810
- : `SELECT DISTINCT s.depth
811
- FROM context_items ci
812
- JOIN summaries s ON s.summary_id = ci.summary_id
813
- WHERE ci.conversation_id = ?
814
- AND ci.item_type = 'summary'
815
- ORDER BY s.depth ASC`;
816
-
817
- const rows = useOrdinalBound
818
- ? (this.db
819
- .prepare(sql)
820
- .all(conversationId, Math.floor(maxOrdinalExclusive)) as unknown as DistinctDepthRow[])
821
- : (this.db.prepare(sql).all(conversationId) as unknown as DistinctDepthRow[]);
822
-
823
- return rows.map((row) => row.depth);
824
- }
825
-
826
- /** Serialize a multi-step summary write sequence on the shared database. */
827
- async withTransaction<T>(operation: () => Promise<T> | T): Promise<T> {
828
- return withDatabaseTransaction(this.db, "BEGIN", operation);
829
- }
830
-
831
- async pruneForNewSession(conversationId: number, retainDepth: number): Promise<void> {
832
- if (Number.isFinite(retainDepth) && retainDepth < 0) {
833
- return;
834
- }
835
-
836
- this.db
837
- .prepare(
838
- `DELETE FROM context_items
839
- WHERE conversation_id = ?
840
- AND item_type = 'message'`,
841
- )
842
- .run(conversationId);
843
-
844
- if (!Number.isFinite(retainDepth)) {
845
- this.db
846
- .prepare(
847
- `DELETE FROM context_items
848
- WHERE conversation_id = ?
849
- AND item_type = 'summary'`,
850
- )
851
- .run(conversationId);
852
- return;
853
- }
854
-
855
- this.db
856
- .prepare(
857
- `DELETE FROM context_items
858
- WHERE conversation_id = ?
859
- AND item_type = 'summary'
860
- AND summary_id IN (
861
- SELECT summary_id
862
- FROM summaries
863
- WHERE conversation_id = ?
864
- AND depth < ?
865
- )`,
866
- )
867
- .run(conversationId, conversationId, Math.floor(retainDepth));
868
- }
869
-
870
- async appendContextMessage(conversationId: number, messageId: number): Promise<void> {
871
- const row = this.db
872
- .prepare(
873
- `SELECT COALESCE(MAX(ordinal), -1) AS max_ordinal
874
- FROM context_items WHERE conversation_id = ?`,
875
- )
876
- .get(conversationId) as unknown as MaxOrdinalRow;
877
-
878
- this.db
879
- .prepare(
880
- `INSERT INTO context_items (conversation_id, ordinal, item_type, message_id)
881
- VALUES (?, ?, 'message', ?)`,
882
- )
883
- .run(conversationId, row.max_ordinal + 1, messageId);
884
- }
885
-
886
- async appendContextMessages(conversationId: number, messageIds: number[]): Promise<void> {
887
- if (messageIds.length === 0) {
888
- return;
889
- }
890
-
891
- const row = this.db
892
- .prepare(
893
- `SELECT COALESCE(MAX(ordinal), -1) AS max_ordinal
894
- FROM context_items WHERE conversation_id = ?`,
895
- )
896
- .get(conversationId) as unknown as MaxOrdinalRow;
897
- const baseOrdinal = row.max_ordinal + 1;
898
-
899
- const stmt = this.db.prepare(
900
- `INSERT INTO context_items (conversation_id, ordinal, item_type, message_id)
901
- VALUES (?, ?, 'message', ?)`,
902
- );
903
- for (let idx = 0; idx < messageIds.length; idx++) {
904
- stmt.run(conversationId, baseOrdinal + idx, messageIds[idx]);
905
- }
906
- }
907
-
908
- async appendContextSummary(conversationId: number, summaryId: string): Promise<void> {
909
- const row = this.db
910
- .prepare(
911
- `SELECT COALESCE(MAX(ordinal), -1) AS max_ordinal
912
- FROM context_items WHERE conversation_id = ?`,
913
- )
914
- .get(conversationId) as unknown as MaxOrdinalRow;
915
-
916
- this.db
917
- .prepare(
918
- `INSERT INTO context_items (conversation_id, ordinal, item_type, summary_id)
919
- VALUES (?, ?, 'summary', ?)`,
920
- )
921
- .run(conversationId, row.max_ordinal + 1, summaryId);
922
- }
923
-
924
- async replaceContextRangeWithSummary(input: {
925
- conversationId: number;
926
- startOrdinal: number;
927
- endOrdinal: number;
928
- summaryId: string;
929
- }): Promise<void> {
930
- await this.withTransaction(() => {
931
- this.replaceContextRangeWithSummaryInTransaction(input);
932
- });
933
- }
934
-
935
- // Update the context slice in-place while the caller already owns the txn.
936
- private replaceContextRangeWithSummaryInTransaction(input: {
937
- conversationId: number;
938
- startOrdinal: number;
939
- endOrdinal: number;
940
- summaryId: string;
941
- }): void {
942
- const { conversationId, startOrdinal, endOrdinal, summaryId } = input;
943
-
944
- // 1. Delete context items in the range [startOrdinal, endOrdinal]
945
- this.db
946
- .prepare(
947
- `DELETE FROM context_items
948
- WHERE conversation_id = ?
949
- AND ordinal >= ?
950
- AND ordinal <= ?`,
951
- )
952
- .run(conversationId, startOrdinal, endOrdinal);
953
-
954
- // 2. Insert the replacement summary item at startOrdinal
955
- this.db
956
- .prepare(
957
- `INSERT INTO context_items (conversation_id, ordinal, item_type, summary_id)
958
- VALUES (?, ?, 'summary', ?)`,
959
- )
960
- .run(conversationId, startOrdinal, summaryId);
961
-
962
- // 3. Resequence all ordinals to maintain contiguity (no gaps).
963
- // Pre-compute ranks from a SELECT (safe snapshot), then apply
964
- // via 2-pass UPDATE loop using negative temps to avoid UNIQUE
965
- // constraint violations. The SELECT reads post-delete/insert
966
- // state and provides a consistent snapshot for resequencing.
967
- const items = this.db
968
- .prepare(
969
- `SELECT ordinal FROM context_items
970
- WHERE conversation_id = ?
971
- ORDER BY ordinal`,
972
- )
973
- .all(conversationId) as unknown as { ordinal: number }[];
974
-
975
- if (items.length > 0 && items.some((item, i) => item.ordinal !== i)) {
976
- const updateStmt = this.db.prepare(
977
- `UPDATE context_items SET ordinal = ?
978
- WHERE conversation_id = ? AND ordinal = ?`,
979
- );
980
- for (let i = 0; i < items.length; i++) {
981
- updateStmt.run(-(i + 1), conversationId, items[i].ordinal);
982
- }
983
- for (let i = 0; i < items.length; i++) {
984
- updateStmt.run(i, conversationId, -(i + 1));
985
- }
986
- }
987
- }
988
-
989
- async getContextTokenCount(conversationId: number): Promise<number> {
990
- const row = this.db
991
- .prepare(
992
- `SELECT COALESCE(SUM(token_count), 0) AS total
993
- FROM (
994
- SELECT m.token_count
995
- FROM context_items ci
996
- JOIN messages m ON m.message_id = ci.message_id
997
- WHERE ci.conversation_id = ?
998
- AND ci.item_type = 'message'
999
-
1000
- UNION ALL
1001
-
1002
- SELECT s.token_count
1003
- FROM context_items ci
1004
- JOIN summaries s ON s.summary_id = ci.summary_id
1005
- WHERE ci.conversation_id = ?
1006
- AND ci.item_type = 'summary'
1007
- ) sub`,
1008
- )
1009
- .get(conversationId, conversationId) as unknown as TokenSumRow;
1010
- return row?.total ?? 0;
1011
- }
1012
-
1013
- // ── Search ────────────────────────────────────────────────────────────────
1014
-
1015
- async searchSummaries(input: SummarySearchInput): Promise<SummarySearchResult[]> {
1016
- const limit = input.limit ?? 50;
1017
-
1018
- if (input.mode === "full_text") {
1019
- // FTS5 unicode61 cannot segment CJK ideographs, so CJK queries route
1020
- // through the trigram FTS table first, then fall back to LIKE with OR
1021
- // semantics (instead of the original AND logic which fails when the
1022
- // user's phrasing doesn't exactly match the summary text).
1023
- if (containsCjk(input.query)) {
1024
- const cjkSegments = this.extractCjkSegments(input.query);
1025
- const hasShortCjkSegment = cjkSegments.some((segment) => segment.length < 3);
1026
- if (!hasShortCjkSegment) {
1027
- try {
1028
- const trigramResults = this.searchCjkTrigram(
1029
- input.query,
1030
- limit,
1031
- input.conversationId,
1032
- input.since,
1033
- input.before,
1034
- );
1035
- if (trigramResults.length > 0) {
1036
- return trigramResults;
1037
- }
1038
- } catch {
1039
- // trigram table may not exist; fall through to LIKE OR
1040
- }
1041
- }
1042
- return this.searchLikeCjk(
1043
- input.query,
1044
- limit,
1045
- input.conversationId,
1046
- input.since,
1047
- input.before,
1048
- );
1049
- }
1050
- if (this.fts5Available) {
1051
- try {
1052
- return this.searchFullText(
1053
- input.query,
1054
- limit,
1055
- input.conversationId,
1056
- input.since,
1057
- input.before,
1058
- input.sort,
1059
- );
1060
- } catch {
1061
- return this.searchLike(
1062
- input.query,
1063
- limit,
1064
- input.conversationId,
1065
- input.since,
1066
- input.before,
1067
- );
1068
- }
1069
- }
1070
- return this.searchLike(input.query, limit, input.conversationId, input.since, input.before);
1071
- }
1072
- return this.searchRegex(input.query, limit, input.conversationId, input.since, input.before);
1073
- }
1074
-
1075
- private searchFullText(
1076
- query: string,
1077
- limit: number,
1078
- conversationId?: number,
1079
- since?: Date,
1080
- before?: Date,
1081
- sort?: SearchSort,
1082
- ): SummarySearchResult[] {
1083
- const where: string[] = ["summaries_fts MATCH ?"];
1084
- const args: Array<string | number> = [sanitizeFts5Query(query)];
1085
- if (conversationId != null) {
1086
- where.push("s.conversation_id = ?");
1087
- args.push(conversationId);
1088
- }
1089
- if (since) {
1090
- where.push("julianday(s.created_at) >= julianday(?)");
1091
- args.push(since.toISOString());
1092
- }
1093
- if (before) {
1094
- where.push("julianday(s.created_at) < julianday(?)");
1095
- args.push(before.toISOString());
1096
- }
1097
- args.push(limit);
1098
- const orderBy = buildFtsOrderBy(sort, "s.created_at");
1099
-
1100
- const sql = `SELECT
1101
- summaries_fts.summary_id,
1102
- s.conversation_id,
1103
- s.kind,
1104
- snippet(summaries_fts, 1, '', '', '...', 32) AS snippet,
1105
- rank,
1106
- s.created_at
1107
- FROM summaries_fts
1108
- JOIN summaries s ON s.summary_id = summaries_fts.summary_id
1109
- WHERE ${where.join(" AND ")}
1110
- ORDER BY ${orderBy}
1111
- LIMIT ?`;
1112
- const rows = this.db.prepare(sql).all(...args) as unknown as SummarySearchRow[];
1113
- return rows.map(toSearchResult);
1114
- }
1115
-
1116
- private searchLike(
1117
- query: string,
1118
- limit: number,
1119
- conversationId?: number,
1120
- since?: Date,
1121
- before?: Date,
1122
- ): SummarySearchResult[] {
1123
- const plan = buildLikeSearchPlan("content", query);
1124
- if (plan.terms.length === 0) {
1125
- return [];
1126
- }
1127
-
1128
- const where: string[] = [...plan.where];
1129
- const args: Array<string | number> = [...plan.args];
1130
- if (conversationId != null) {
1131
- where.push("conversation_id = ?");
1132
- args.push(conversationId);
1133
- }
1134
- if (since) {
1135
- where.push("julianday(created_at) >= julianday(?)");
1136
- args.push(since.toISOString());
1137
- }
1138
- if (before) {
1139
- where.push("julianday(created_at) < julianday(?)");
1140
- args.push(before.toISOString());
1141
- }
1142
- args.push(limit);
1143
-
1144
- const whereClause = where.length > 0 ? `WHERE ${where.join(" AND ")}` : "";
1145
- const rows = this.db
1146
- .prepare(
1147
- `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
1148
- earliest_at, latest_at, descendant_count, descendant_token_count,
1149
- source_message_token_count, model, created_at
1150
- FROM summaries
1151
- ${whereClause}
1152
- ORDER BY created_at DESC
1153
- LIMIT ?`,
1154
- )
1155
- .all(...args) as unknown as SummaryRow[];
1156
-
1157
- return rows.map((row) => ({
1158
- summaryId: row.summary_id,
1159
- conversationId: row.conversation_id,
1160
- kind: row.kind,
1161
- snippet: createFallbackSnippet(row.content, plan.terms),
1162
- createdAt: parseUtcTimestamp(row.created_at),
1163
- rank: 0,
1164
- }));
1165
- }
1166
-
1167
- private extractCjkSegments(query: string): string[] {
1168
- return query.match(CJK_QUERY_SEGMENT_RE) ?? [];
1169
- }
1170
-
1171
- private extractLatinTokens(query: string): string[] {
1172
- const tokens = query.match(LATIN_QUERY_TOKEN_RE) ?? [];
1173
- return [...new Set(tokens.map((token) => token.toLowerCase()))];
1174
- }
1175
-
1176
- private escapeLikeTerm(term: string): string {
1177
- return term.replace(/([\\%_])/g, "\\$1");
1178
- }
1179
-
1180
- // ── CJK trigram FTS search ──────────────────────────────────────────────
1181
- // Each CJK segment of 3+ chars is split into overlapping 4-char chunks for
1182
- // trigram MATCH with OR semantics within the segment. Segment groups are
1183
- // combined with AND, and Latin tokens are applied as LIKE filters so mixed
1184
- // queries still require every part of the user's intent.
1185
-
1186
- /**
1187
- * Split a CJK string into overlapping chunks of `size` characters.
1188
- * E.g. "端到端测试结果" with size=4 →
1189
- * ["端到端测", "到端测试", "端测试结", "测试结果"]
1190
- */
1191
- private splitCjkChunks(text: string, size: number): string[] {
1192
- const chunks: string[] = [];
1193
- for (let i = 0; i <= text.length - size; i++) {
1194
- const chunk = text.slice(i, i + size);
1195
- if (!chunks.includes(chunk)) {
1196
- chunks.push(chunk);
1197
- }
1198
- }
1199
- return chunks;
1200
- }
1201
-
1202
- private searchCjkTrigram(
1203
- query: string,
1204
- limit: number,
1205
- conversationId?: number,
1206
- since?: Date,
1207
- before?: Date,
1208
- ): SummarySearchResult[] {
1209
- const cjkSegments = this.extractCjkSegments(query).filter((segment) => segment.length >= 3);
1210
- if (cjkSegments.length === 0) {
1211
- return [];
1212
- }
1213
- const latinTokens = this.extractLatinTokens(query);
1214
-
1215
- // Build one OR group per CJK segment, then require every segment group and
1216
- // every Latin token to match so mixed queries preserve full-intent search.
1217
- const cjkGroups: string[] = [];
1218
- for (const segment of cjkSegments) {
1219
- const segmentTerms =
1220
- segment.length <= 4 ? [segment] : this.splitCjkChunks(segment, 4);
1221
- const groupExpr = [...new Set(segmentTerms)]
1222
- .map((term) => `"${term.replace(/"/g, '""')}"`)
1223
- .join(" OR ");
1224
- cjkGroups.push(`(${groupExpr})`);
1225
- }
1226
-
1227
- const where: string[] = ["summaries_fts_cjk MATCH ?"];
1228
- const args: Array<string | number> = [cjkGroups.join(" AND ")];
1229
- for (const token of latinTokens) {
1230
- where.push("LOWER(s.content) LIKE ? ESCAPE '\\'");
1231
- args.push(`%${this.escapeLikeTerm(token)}%`);
1232
- }
1233
- if (conversationId != null) {
1234
- where.push("s.conversation_id = ?");
1235
- args.push(conversationId);
1236
- }
1237
- if (since) {
1238
- where.push("julianday(s.created_at) >= julianday(?)");
1239
- args.push(since.toISOString());
1240
- }
1241
- if (before) {
1242
- where.push("julianday(s.created_at) < julianday(?)");
1243
- args.push(before.toISOString());
1244
- }
1245
- args.push(limit);
1246
-
1247
- const sql = `SELECT
1248
- f.summary_id,
1249
- s.conversation_id,
1250
- s.kind,
1251
- snippet(summaries_fts_cjk, 1, '', '', '...', 32) AS snippet,
1252
- rank,
1253
- s.created_at
1254
- FROM summaries_fts_cjk f
1255
- JOIN summaries s ON s.summary_id = f.summary_id
1256
- WHERE ${where.join(" AND ")}
1257
- ORDER BY rank
1258
- LIMIT ?`;
1259
- const rows = this.db.prepare(sql).all(...args) as unknown as SummarySearchRow[];
1260
- return rows.map(toSearchResult);
1261
- }
1262
-
1263
- // ── CJK LIKE fallback ────────────────────────────────────────────────────
1264
- // When the trigram table is unavailable, split each CJK segment into
1265
- // sliding-window terms so partial matches still work. Terms within a single
1266
- // segment are ORed together, but each segment and Latin token still has to
1267
- // match so mixed queries keep full-intent semantics.
1268
-
1269
- private searchLikeCjk(
1270
- query: string,
1271
- limit: number,
1272
- conversationId?: number,
1273
- since?: Date,
1274
- before?: Date,
1275
- ): SummarySearchResult[] {
1276
- const cjkSegments = this.extractCjkSegments(query);
1277
- const latinTokens = this.extractLatinTokens(query);
1278
- if (cjkSegments.length === 0 && latinTokens.length === 0) {
1279
- return [];
1280
- }
1281
-
1282
- const cjkTerms: string[] = [];
1283
- const cjkClauses: string[] = [];
1284
- const cjkArgs: string[] = [];
1285
- for (const segment of cjkSegments) {
1286
- const segmentTerms =
1287
- segment.length === 1
1288
- ? [segment]
1289
- : segment.length === 2
1290
- ? [segment]
1291
- : this.splitCjkChunks(segment, 2);
1292
- const uniqueTerms = [...new Set(segmentTerms)];
1293
- cjkTerms.push(...uniqueTerms);
1294
- cjkClauses.push(
1295
- `(${uniqueTerms.map(() => `LOWER(content) LIKE ? ESCAPE '\\'`).join(" OR ")})`,
1296
- );
1297
- cjkArgs.push(
1298
- ...uniqueTerms.map((term) => `%${this.escapeLikeTerm(term.toLowerCase())}%`),
1299
- );
1300
- }
1301
-
1302
- const latinClauses = latinTokens.map(() => `LOWER(content) LIKE ? ESCAPE '\\'`);
1303
- const latinArgs = latinTokens.map((token) => `%${this.escapeLikeTerm(token)}%`);
1304
-
1305
- const where: string[] = [...cjkClauses, ...latinClauses];
1306
- const args: Array<string | number> = [...cjkArgs, ...latinArgs];
1307
- if (conversationId != null) {
1308
- where.push("conversation_id = ?");
1309
- args.push(conversationId);
1310
- }
1311
- if (since) {
1312
- where.push("julianday(created_at) >= julianday(?)");
1313
- args.push(since.toISOString());
1314
- }
1315
- if (before) {
1316
- where.push("julianday(created_at) < julianday(?)");
1317
- args.push(before.toISOString());
1318
- }
1319
- args.push(limit);
1320
-
1321
- const rows = this.db
1322
- .prepare(
1323
- `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
1324
- earliest_at, latest_at, descendant_count, descendant_token_count,
1325
- source_message_token_count, model, created_at
1326
- FROM summaries
1327
- WHERE ${where.join(" AND ")}
1328
- ORDER BY created_at DESC
1329
- LIMIT ?`,
1330
- )
1331
- .all(...args) as unknown as SummaryRow[];
1332
-
1333
- const snippetTerms = cjkTerms.length > 0 ? [...new Set([...cjkTerms, ...latinTokens])] : latinTokens;
1334
- return rows.map((row) => ({
1335
- summaryId: row.summary_id,
1336
- conversationId: row.conversation_id,
1337
- kind: row.kind,
1338
- snippet: createFallbackSnippet(row.content, snippetTerms),
1339
- createdAt: new Date(row.created_at),
1340
- rank: 0,
1341
- }));
1342
- }
1343
-
1344
- private searchRegex(
1345
- pattern: string,
1346
- limit: number,
1347
- conversationId?: number,
1348
- since?: Date,
1349
- before?: Date,
1350
- ): SummarySearchResult[] {
1351
- // Guard against ReDoS: reject patterns with nested quantifiers or excessive length
1352
- if (pattern.length > 500 || /(\+|\*|\?)\)(\+|\*|\?|\{\d)/.test(pattern)) {
1353
- return [];
1354
- }
1355
- let re: RegExp;
1356
- try {
1357
- re = new RegExp(pattern);
1358
- } catch {
1359
- return [];
1360
- }
1361
-
1362
- const where: string[] = [];
1363
- const args: Array<string | number> = [];
1364
- if (conversationId != null) {
1365
- where.push("conversation_id = ?");
1366
- args.push(conversationId);
1367
- }
1368
- if (since) {
1369
- where.push("julianday(created_at) >= julianday(?)");
1370
- args.push(since.toISOString());
1371
- }
1372
- if (before) {
1373
- where.push("julianday(created_at) < julianday(?)");
1374
- args.push(before.toISOString());
1375
- }
1376
- const whereClause = where.length > 0 ? `WHERE ${where.join(" AND ")}` : "";
1377
- const rows = this.db
1378
- .prepare(
1379
- `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
1380
- earliest_at, latest_at, descendant_count, descendant_token_count,
1381
- source_message_token_count, model, created_at
1382
- FROM summaries
1383
- ${whereClause}
1384
- ORDER BY created_at DESC`,
1385
- )
1386
- .all(...args) as unknown as SummaryRow[];
1387
-
1388
- const MAX_ROW_SCAN = 10_000;
1389
- const results: SummarySearchResult[] = [];
1390
- let scanned = 0;
1391
- for (const row of rows) {
1392
- if (results.length >= limit || scanned >= MAX_ROW_SCAN) {
1393
- break;
1394
- }
1395
- scanned++;
1396
- const match = re.exec(row.content);
1397
- if (match) {
1398
- results.push({
1399
- summaryId: row.summary_id,
1400
- conversationId: row.conversation_id,
1401
- kind: row.kind,
1402
- snippet: match[0],
1403
- createdAt: parseUtcTimestamp(row.created_at),
1404
- rank: 0,
1405
- });
1406
- }
1407
- }
1408
- return results;
1409
- }
1410
-
1411
- // ── Large files ───────────────────────────────────────────────────────────
1412
-
1413
- async insertLargeFile(input: CreateLargeFileInput): Promise<LargeFileRecord> {
1414
- this.db
1415
- .prepare(
1416
- `INSERT INTO large_files (file_id, conversation_id, file_name, mime_type, byte_size, storage_uri, exploration_summary)
1417
- VALUES (?, ?, ?, ?, ?, ?, ?)`,
1418
- )
1419
- .run(
1420
- input.fileId,
1421
- input.conversationId,
1422
- input.fileName ?? null,
1423
- input.mimeType ?? null,
1424
- input.byteSize ?? null,
1425
- input.storageUri,
1426
- input.explorationSummary ?? null,
1427
- );
1428
-
1429
- const row = this.db
1430
- .prepare(
1431
- `SELECT file_id, conversation_id, file_name, mime_type, byte_size, storage_uri, exploration_summary, created_at
1432
- FROM large_files WHERE file_id = ?`,
1433
- )
1434
- .get(input.fileId) as unknown as LargeFileRow;
1435
-
1436
- return toLargeFileRecord(row);
1437
- }
1438
-
1439
- async getLargeFile(fileId: string): Promise<LargeFileRecord | null> {
1440
- const row = this.db
1441
- .prepare(
1442
- `SELECT file_id, conversation_id, file_name, mime_type, byte_size, storage_uri, exploration_summary, created_at
1443
- FROM large_files WHERE file_id = ?`,
1444
- )
1445
- .get(fileId) as unknown as LargeFileRow | undefined;
1446
- return row ? toLargeFileRecord(row) : null;
1447
- }
1448
-
1449
- async getLargeFilesByConversation(conversationId: number): Promise<LargeFileRecord[]> {
1450
- const rows = this.db
1451
- .prepare(
1452
- `SELECT file_id, conversation_id, file_name, mime_type, byte_size, storage_uri, exploration_summary, created_at
1453
- FROM large_files
1454
- WHERE conversation_id = ?
1455
- ORDER BY created_at`,
1456
- )
1457
- .all(conversationId) as unknown as LargeFileRow[];
1458
- return rows.map(toLargeFileRecord);
1459
- }
1460
-
1461
- // ── Bootstrap state ──────────────────────────────────────────────────────
1462
-
1463
- async getConversationBootstrapState(
1464
- conversationId: number,
1465
- ): Promise<ConversationBootstrapStateRecord | null> {
1466
- const row = this.db
1467
- .prepare(
1468
- `SELECT conversation_id, session_file_path, last_seen_size, last_seen_mtime_ms,
1469
- last_processed_offset, last_processed_entry_hash, updated_at
1470
- FROM conversation_bootstrap_state
1471
- WHERE conversation_id = ?`,
1472
- )
1473
- .get(conversationId) as unknown as ConversationBootstrapStateRow | undefined;
1474
- return row ? toConversationBootstrapStateRecord(row) : null;
1475
- }
1476
-
1477
- async upsertConversationBootstrapState(
1478
- input: UpsertConversationBootstrapStateInput,
1479
- ): Promise<ConversationBootstrapStateRecord> {
1480
- this.db
1481
- .prepare(
1482
- `INSERT INTO conversation_bootstrap_state (
1483
- conversation_id,
1484
- session_file_path,
1485
- last_seen_size,
1486
- last_seen_mtime_ms,
1487
- last_processed_offset,
1488
- last_processed_entry_hash
1489
- )
1490
- VALUES (?, ?, ?, ?, ?, ?)
1491
- ON CONFLICT (conversation_id) DO UPDATE SET
1492
- session_file_path = excluded.session_file_path,
1493
- last_seen_size = excluded.last_seen_size,
1494
- last_seen_mtime_ms = excluded.last_seen_mtime_ms,
1495
- last_processed_offset = excluded.last_processed_offset,
1496
- last_processed_entry_hash = excluded.last_processed_entry_hash,
1497
- updated_at = datetime('now')`,
1498
- )
1499
- .run(
1500
- input.conversationId,
1501
- input.sessionFilePath,
1502
- Math.max(0, Math.floor(input.lastSeenSize)),
1503
- Math.max(0, Math.floor(input.lastSeenMtimeMs)),
1504
- Math.max(0, Math.floor(input.lastProcessedOffset)),
1505
- input.lastProcessedEntryHash ?? null,
1506
- );
1507
-
1508
- const row = this.db
1509
- .prepare(
1510
- `SELECT conversation_id, session_file_path, last_seen_size, last_seen_mtime_ms,
1511
- last_processed_offset, last_processed_entry_hash, updated_at
1512
- FROM conversation_bootstrap_state
1513
- WHERE conversation_id = ?`,
1514
- )
1515
- .get(input.conversationId) as unknown as ConversationBootstrapStateRow;
1516
-
1517
- return toConversationBootstrapStateRecord(row);
1518
- }
1519
- }