@martian-engineering/lossless-claw 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -3
- package/docs/agent-tools.md +7 -1
- package/docs/configuration.md +200 -200
- package/openclaw.plugin.json +123 -0
- package/package.json +1 -1
- package/skills/lossless-claw/references/config.md +135 -3
- package/src/assembler.ts +5 -1
- package/src/compaction.ts +149 -38
- package/src/db/config.ts +102 -4
- package/src/db/connection.ts +20 -2
- package/src/db/migration.ts +57 -0
- package/src/engine.ts +814 -97
- package/src/lcm-log.ts +37 -0
- package/src/plugin/index.ts +398 -83
- package/src/plugin/lcm-command.ts +10 -4
- package/src/plugin/shared-init.ts +59 -0
- package/src/prune.ts +391 -0
- package/src/retrieval.ts +7 -5
- package/src/startup-banner-log.ts +1 -0
- package/src/store/compaction-telemetry-store.ts +156 -0
- package/src/store/conversation-store.ts +6 -1
- package/src/store/fts5-sanitize.ts +25 -4
- package/src/store/full-text-sort.ts +21 -0
- package/src/store/index.ts +8 -0
- package/src/store/summary-store.ts +21 -14
- package/src/summarize.ts +54 -30
- package/src/tools/lcm-describe-tool.ts +9 -4
- package/src/tools/lcm-expand-query-tool.ts +11 -6
- package/src/tools/lcm-expand-tool.ts +9 -4
- package/src/tools/lcm-grep-tool.ts +22 -8
- package/src/types.ts +1 -0
|
@@ -710,35 +710,41 @@ async function buildDoctorApplyText(params: {
|
|
|
710
710
|
}
|
|
711
711
|
|
|
712
712
|
export function createLcmCommand(params: {
|
|
713
|
-
db: DatabaseSync;
|
|
713
|
+
db: DatabaseSync | (() => DatabaseSync | Promise<DatabaseSync>);
|
|
714
714
|
config: LcmConfig;
|
|
715
715
|
deps?: LcmDependencies;
|
|
716
716
|
summarize?: LcmSummarizeFn;
|
|
717
717
|
}): OpenClawPluginCommandDefinition {
|
|
718
|
+
const getDb = async (): Promise<DatabaseSync> =>
|
|
719
|
+
typeof params.db === "function" ? await params.db() : params.db;
|
|
720
|
+
|
|
718
721
|
return {
|
|
719
722
|
name: "lcm",
|
|
720
723
|
nativeNames: {
|
|
721
724
|
default: "lossless",
|
|
722
725
|
},
|
|
726
|
+
nativeProgressMessages: {
|
|
727
|
+
telegram: "Lossless Claw is working...",
|
|
728
|
+
},
|
|
723
729
|
description: "Show Lossless Claw health, scan broken summaries, and repair scoped doctor issues.",
|
|
724
730
|
acceptsArgs: true,
|
|
725
731
|
handler: async (ctx) => {
|
|
726
732
|
const parsed = parseLcmCommand(ctx.args);
|
|
727
733
|
switch (parsed.kind) {
|
|
728
734
|
case "status":
|
|
729
|
-
return { text: await buildStatusText({ ctx, db:
|
|
735
|
+
return { text: await buildStatusText({ ctx, db: await getDb(), config: params.config }) };
|
|
730
736
|
case "doctor":
|
|
731
737
|
return parsed.apply
|
|
732
738
|
? {
|
|
733
739
|
text: await buildDoctorApplyText({
|
|
734
740
|
ctx,
|
|
735
|
-
db:
|
|
741
|
+
db: await getDb(),
|
|
736
742
|
config: params.config,
|
|
737
743
|
deps: params.deps,
|
|
738
744
|
summarize: params.summarize,
|
|
739
745
|
}),
|
|
740
746
|
}
|
|
741
|
-
: { text: await buildDoctorText({ ctx, db:
|
|
747
|
+
: { text: await buildDoctorText({ ctx, db: await getDb() }) };
|
|
742
748
|
case "help":
|
|
743
749
|
return { text: buildHelpText(parsed.error) };
|
|
744
750
|
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Process-global singleton state for LCM plugin initialization.
|
|
3
|
+
*
|
|
4
|
+
* OpenClaw v2026.4.5+ calls plugin register() per-agent-context (main,
|
|
5
|
+
* subagents, cron lanes). Without sharing, each call opens a new DB
|
|
6
|
+
* connection and runs migrations — causing lock storms on large databases.
|
|
7
|
+
*
|
|
8
|
+
* Uses the same globalThis + Symbol.for() pattern as startup-banner-log.ts
|
|
9
|
+
* to ensure one DB connection and engine per database path per process.
|
|
10
|
+
*
|
|
11
|
+
* The shared state stores the waitForEngine/waitForDatabase closures from
|
|
12
|
+
* the first register() call. These closures close over the local init
|
|
13
|
+
* variables (database, lcm, initPromise, etc.) so all subsequent callers
|
|
14
|
+
* share the same deferred init chain without stale-reference issues.
|
|
15
|
+
*/
|
|
16
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
17
|
+
import type { LcmContextEngine } from "../engine.js";
|
|
18
|
+
|
|
19
|
+
export type SharedLcmInit = {
|
|
20
|
+
/** Whether gateway_stop has been called. */
|
|
21
|
+
stopped: boolean;
|
|
22
|
+
/** Sync accessor — returns the engine if already initialized, null otherwise. */
|
|
23
|
+
getCachedEngine: () => LcmContextEngine | null;
|
|
24
|
+
/** Async accessor for the initialized engine (waits for deferred init). */
|
|
25
|
+
waitForEngine: () => Promise<LcmContextEngine>;
|
|
26
|
+
/** Async accessor for the initialized DB handle (waits for deferred init). */
|
|
27
|
+
waitForDatabase: () => Promise<DatabaseSync>;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const SHARED_KEY = Symbol.for(
|
|
31
|
+
"@martian-engineering/lossless-claw/shared-init",
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
function getStore(): Map<string, SharedLcmInit> {
|
|
35
|
+
const g = globalThis as typeof globalThis & {
|
|
36
|
+
[key: symbol]: Map<string, SharedLcmInit> | undefined;
|
|
37
|
+
};
|
|
38
|
+
if (!g[SHARED_KEY]) {
|
|
39
|
+
g[SHARED_KEY] = new Map();
|
|
40
|
+
}
|
|
41
|
+
return g[SHARED_KEY]!;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function getSharedInit(dbPath: string): SharedLcmInit | undefined {
|
|
45
|
+
return getStore().get(dbPath);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function setSharedInit(dbPath: string, init: SharedLcmInit): void {
|
|
49
|
+
getStore().set(dbPath, init);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function removeSharedInit(dbPath: string): void {
|
|
53
|
+
getStore().delete(dbPath);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Clear all shared init state. Intended for tests only. */
|
|
57
|
+
export function clearAllSharedInit(): void {
|
|
58
|
+
getStore().clear();
|
|
59
|
+
}
|
package/src/prune.ts
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation pruning for data retention.
|
|
3
|
+
*
|
|
4
|
+
* Identifies and deletes conversations where ALL messages are older than a
|
|
5
|
+
* given threshold. Relies on ON DELETE CASCADE foreign keys in the schema
|
|
6
|
+
* to clean up messages, summaries, context_items, and other dependent rows.
|
|
7
|
+
*/
|
|
8
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
9
|
+
|
|
10
|
+
// ── Duration parsing ────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
const DURATION_RE = /^(\d+)\s*(d|day|days|w|week|weeks|m|month|months|y|year|years)$/i;
|
|
13
|
+
|
|
14
|
+
const UNIT_TO_DAYS: Record<string, number> = {
|
|
15
|
+
d: 1,
|
|
16
|
+
day: 1,
|
|
17
|
+
days: 1,
|
|
18
|
+
w: 7,
|
|
19
|
+
week: 7,
|
|
20
|
+
weeks: 7,
|
|
21
|
+
m: 30,
|
|
22
|
+
month: 30,
|
|
23
|
+
months: 30,
|
|
24
|
+
y: 365,
|
|
25
|
+
year: 365,
|
|
26
|
+
years: 365,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Parse a human-friendly duration string (e.g. "90d", "3m", "1y") into
|
|
31
|
+
* a number of days. Returns `null` when the input is not recognized.
|
|
32
|
+
*/
|
|
33
|
+
export function parseDuration(input: string): number | null {
|
|
34
|
+
const trimmed = input.trim().toLowerCase();
|
|
35
|
+
const match = DURATION_RE.exec(trimmed);
|
|
36
|
+
if (!match) {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
const amount = Number(match[1]);
|
|
40
|
+
const unit = match[2]!.toLowerCase();
|
|
41
|
+
const multiplier = UNIT_TO_DAYS[unit];
|
|
42
|
+
if (multiplier == null || !Number.isFinite(amount) || amount <= 0) {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
return amount * multiplier;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ── Prune types ─────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
export type PruneCandidate = {
|
|
51
|
+
conversationId: number;
|
|
52
|
+
sessionKey: string | null;
|
|
53
|
+
messageCount: number;
|
|
54
|
+
summaryCount: number;
|
|
55
|
+
latestMessageAt: string;
|
|
56
|
+
createdAt: string;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
export type PruneResult = {
|
|
60
|
+
/** Conversations that matched the age threshold. */
|
|
61
|
+
candidates: PruneCandidate[];
|
|
62
|
+
/** Number of conversations actually deleted (0 in dry-run mode). */
|
|
63
|
+
deleted: number;
|
|
64
|
+
/** Whether VACUUM was executed after deletion. */
|
|
65
|
+
vacuumed: boolean;
|
|
66
|
+
/** The cutoff date used (ISO-8601 UTC string). */
|
|
67
|
+
cutoffDate: string;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export type PruneOptions = {
|
|
71
|
+
/** Duration string, e.g. "90d", "30d", "1y". */
|
|
72
|
+
before: string;
|
|
73
|
+
/** When true, actually delete. Default is dry-run (false). */
|
|
74
|
+
confirm?: boolean;
|
|
75
|
+
/** Maximum conversations to delete per write transaction. Default 100. */
|
|
76
|
+
batchSize?: number;
|
|
77
|
+
/** Maximum delete batches to run before returning. Default unlimited. */
|
|
78
|
+
maxBatches?: number;
|
|
79
|
+
/** When true, run VACUUM after deletion. Default false. */
|
|
80
|
+
vacuum?: boolean;
|
|
81
|
+
/** Override "now" for testing. ISO-8601 UTC string. */
|
|
82
|
+
now?: string;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
// ── Core prune logic ────────────────────────────────────────────────────────
|
|
86
|
+
|
|
87
|
+
type PruneCandidateRow = {
|
|
88
|
+
conversation_id: number;
|
|
89
|
+
session_key: string | null;
|
|
90
|
+
message_count: number;
|
|
91
|
+
summary_count: number;
|
|
92
|
+
latest_message_at: string;
|
|
93
|
+
created_at: string;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const SELECT_PRUNE_CANDIDATES_SQL = `SELECT
|
|
97
|
+
c.conversation_id,
|
|
98
|
+
c.session_key,
|
|
99
|
+
COALESCE(msg_stats.message_count, 0) AS message_count,
|
|
100
|
+
COALESCE(sum_stats.summary_count, 0) AS summary_count,
|
|
101
|
+
COALESCE(msg_stats.latest_message_at, c.created_at) AS latest_message_at,
|
|
102
|
+
c.created_at
|
|
103
|
+
FROM conversations c
|
|
104
|
+
LEFT JOIN (
|
|
105
|
+
SELECT conversation_id,
|
|
106
|
+
COUNT(*) AS message_count,
|
|
107
|
+
MAX(created_at) AS latest_message_at
|
|
108
|
+
FROM messages
|
|
109
|
+
GROUP BY conversation_id
|
|
110
|
+
) msg_stats ON msg_stats.conversation_id = c.conversation_id
|
|
111
|
+
LEFT JOIN (
|
|
112
|
+
SELECT conversation_id,
|
|
113
|
+
COUNT(*) AS summary_count
|
|
114
|
+
FROM summaries
|
|
115
|
+
GROUP BY conversation_id
|
|
116
|
+
) sum_stats ON sum_stats.conversation_id = c.conversation_id
|
|
117
|
+
WHERE julianday(COALESCE(msg_stats.latest_message_at, c.created_at)) < julianday(?)
|
|
118
|
+
ORDER BY julianday(COALESCE(msg_stats.latest_message_at, c.created_at)) ASC,
|
|
119
|
+
c.conversation_id ASC`;
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Compute the UTC cutoff date by subtracting `days` from `now`.
|
|
123
|
+
*/
|
|
124
|
+
function computeCutoffDate(days: number, now?: string): string {
|
|
125
|
+
const base = now ? new Date(now) : new Date();
|
|
126
|
+
base.setUTCDate(base.getUTCDate() - days);
|
|
127
|
+
return base.toISOString();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Normalize prune batch size to a small positive integer.
|
|
132
|
+
*/
|
|
133
|
+
function resolveBatchSize(batchSize?: number): number {
|
|
134
|
+
if (batchSize == null) {
|
|
135
|
+
return 100;
|
|
136
|
+
}
|
|
137
|
+
if (!Number.isFinite(batchSize) || batchSize <= 0) {
|
|
138
|
+
throw new Error(`Invalid batch size "${batchSize}". Expected a positive integer.`);
|
|
139
|
+
}
|
|
140
|
+
return Math.floor(batchSize);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Normalize the optional batch cap for confirm-mode pruning.
|
|
145
|
+
*/
|
|
146
|
+
function resolveMaxBatches(maxBatches?: number): number | null {
|
|
147
|
+
if (maxBatches == null) {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
if (!Number.isFinite(maxBatches) || maxBatches <= 0) {
|
|
151
|
+
throw new Error(`Invalid max batches "${maxBatches}". Expected a positive integer.`);
|
|
152
|
+
}
|
|
153
|
+
return Math.floor(maxBatches);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Load prune candidates using SQLite date math so mixed timestamp formats are
|
|
158
|
+
* compared chronologically instead of lexically.
|
|
159
|
+
*/
|
|
160
|
+
function loadPruneCandidates(
|
|
161
|
+
db: DatabaseSync,
|
|
162
|
+
cutoffDate: string,
|
|
163
|
+
limit?: number,
|
|
164
|
+
): PruneCandidate[] {
|
|
165
|
+
const sql = limit == null ? SELECT_PRUNE_CANDIDATES_SQL : `${SELECT_PRUNE_CANDIDATES_SQL}\n LIMIT ?`;
|
|
166
|
+
const rows = (
|
|
167
|
+
limit == null
|
|
168
|
+
? db.prepare(sql).all(cutoffDate)
|
|
169
|
+
: db.prepare(sql).all(cutoffDate, limit)
|
|
170
|
+
) as PruneCandidateRow[];
|
|
171
|
+
return rows.map((row) => ({
|
|
172
|
+
conversationId: row.conversation_id,
|
|
173
|
+
sessionKey: row.session_key,
|
|
174
|
+
messageCount: row.message_count,
|
|
175
|
+
summaryCount: row.summary_count,
|
|
176
|
+
latestMessageAt: row.latest_message_at,
|
|
177
|
+
createdAt: row.created_at,
|
|
178
|
+
}));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Detect whether an optional SQLite table exists.
|
|
183
|
+
*/
|
|
184
|
+
function hasTable(db: DatabaseSync, tableName: string): boolean {
|
|
185
|
+
const row = db
|
|
186
|
+
.prepare(`SELECT 1 AS found FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1`)
|
|
187
|
+
.get(tableName) as { found: number } | undefined;
|
|
188
|
+
return row?.found === 1;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Create temp tables containing the conversations, summaries, and messages
|
|
193
|
+
* selected for pruning so dependent deletes can use simple indexed lookups.
|
|
194
|
+
*/
|
|
195
|
+
function stageCandidateConversationIds(
|
|
196
|
+
db: DatabaseSync,
|
|
197
|
+
candidates: PruneCandidate[],
|
|
198
|
+
): void {
|
|
199
|
+
db.exec(`DROP TABLE IF EXISTS temp.prune_candidate_ids`);
|
|
200
|
+
db.exec(`DROP TABLE IF EXISTS temp.prune_candidate_summary_ids`);
|
|
201
|
+
db.exec(`DROP TABLE IF EXISTS temp.prune_candidate_message_ids`);
|
|
202
|
+
db.exec(`CREATE TEMP TABLE prune_candidate_ids (conversation_id INTEGER PRIMARY KEY)`);
|
|
203
|
+
db.exec(`CREATE TEMP TABLE prune_candidate_summary_ids (summary_id TEXT PRIMARY KEY)`);
|
|
204
|
+
db.exec(`CREATE TEMP TABLE prune_candidate_message_ids (message_id INTEGER PRIMARY KEY)`);
|
|
205
|
+
const insertStmt = db.prepare(
|
|
206
|
+
`INSERT INTO temp.prune_candidate_ids (conversation_id) VALUES (?)`,
|
|
207
|
+
);
|
|
208
|
+
for (const candidate of candidates) {
|
|
209
|
+
insertStmt.run(candidate.conversationId);
|
|
210
|
+
}
|
|
211
|
+
db.exec(`
|
|
212
|
+
INSERT INTO temp.prune_candidate_summary_ids (summary_id)
|
|
213
|
+
SELECT s.summary_id
|
|
214
|
+
FROM summaries s
|
|
215
|
+
JOIN temp.prune_candidate_ids p ON p.conversation_id = s.conversation_id
|
|
216
|
+
`);
|
|
217
|
+
db.exec(`
|
|
218
|
+
INSERT INTO temp.prune_candidate_message_ids (message_id)
|
|
219
|
+
SELECT m.message_id
|
|
220
|
+
FROM messages m
|
|
221
|
+
JOIN temp.prune_candidate_ids p ON p.conversation_id = m.conversation_id
|
|
222
|
+
`);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Remove the temp candidate table.
|
|
227
|
+
*/
|
|
228
|
+
function dropCandidateConversationIds(db: DatabaseSync): void {
|
|
229
|
+
db.exec(`DROP TABLE IF EXISTS temp.prune_candidate_message_ids`);
|
|
230
|
+
db.exec(`DROP TABLE IF EXISTS temp.prune_candidate_summary_ids`);
|
|
231
|
+
db.exec(`DROP TABLE IF EXISTS temp.prune_candidate_ids`);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Delete candidate conversations and return the number of rows removed.
|
|
236
|
+
*/
|
|
237
|
+
function deleteCandidates(db: DatabaseSync, candidates: PruneCandidate[]): number {
|
|
238
|
+
if (candidates.length === 0) {
|
|
239
|
+
return 0;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const tableOptions = {
|
|
243
|
+
hasMessagesFts: hasTable(db, "messages_fts"),
|
|
244
|
+
hasSummariesFts: hasTable(db, "summaries_fts"),
|
|
245
|
+
hasSummariesFtsCjk: hasTable(db, "summaries_fts_cjk"),
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
stageCandidateConversationIds(db, candidates);
|
|
249
|
+
try {
|
|
250
|
+
db.prepare(
|
|
251
|
+
`DELETE FROM summary_messages
|
|
252
|
+
WHERE summary_id IN (SELECT summary_id FROM temp.prune_candidate_summary_ids)`,
|
|
253
|
+
).run();
|
|
254
|
+
|
|
255
|
+
db.prepare(
|
|
256
|
+
`DELETE FROM summary_messages
|
|
257
|
+
WHERE message_id IN (SELECT message_id FROM temp.prune_candidate_message_ids)`,
|
|
258
|
+
).run();
|
|
259
|
+
|
|
260
|
+
db.prepare(
|
|
261
|
+
`DELETE FROM summary_parents
|
|
262
|
+
WHERE summary_id IN (SELECT summary_id FROM temp.prune_candidate_summary_ids)`,
|
|
263
|
+
).run();
|
|
264
|
+
|
|
265
|
+
db.prepare(
|
|
266
|
+
`DELETE FROM summary_parents
|
|
267
|
+
WHERE parent_summary_id IN (SELECT summary_id FROM temp.prune_candidate_summary_ids)`,
|
|
268
|
+
).run();
|
|
269
|
+
|
|
270
|
+
db.prepare(
|
|
271
|
+
`DELETE FROM context_items
|
|
272
|
+
WHERE message_id IN (SELECT message_id FROM temp.prune_candidate_message_ids)`,
|
|
273
|
+
).run();
|
|
274
|
+
|
|
275
|
+
db.prepare(
|
|
276
|
+
`DELETE FROM context_items
|
|
277
|
+
WHERE summary_id IN (SELECT summary_id FROM temp.prune_candidate_summary_ids)`,
|
|
278
|
+
).run();
|
|
279
|
+
|
|
280
|
+
db.prepare(
|
|
281
|
+
`DELETE FROM context_items
|
|
282
|
+
WHERE conversation_id IN (SELECT conversation_id FROM temp.prune_candidate_ids)`,
|
|
283
|
+
).run();
|
|
284
|
+
|
|
285
|
+
if (tableOptions.hasMessagesFts) {
|
|
286
|
+
db.prepare(
|
|
287
|
+
`DELETE FROM messages_fts
|
|
288
|
+
WHERE rowid IN (SELECT message_id FROM temp.prune_candidate_message_ids)`,
|
|
289
|
+
).run();
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (tableOptions.hasSummariesFts) {
|
|
293
|
+
db.prepare(
|
|
294
|
+
`DELETE FROM summaries_fts
|
|
295
|
+
WHERE summary_id IN (SELECT summary_id FROM temp.prune_candidate_summary_ids)`,
|
|
296
|
+
).run();
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (tableOptions.hasSummariesFtsCjk) {
|
|
300
|
+
db.prepare(
|
|
301
|
+
`DELETE FROM summaries_fts_cjk
|
|
302
|
+
WHERE summary_id IN (SELECT summary_id FROM temp.prune_candidate_summary_ids)`,
|
|
303
|
+
).run();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return Number(
|
|
307
|
+
db
|
|
308
|
+
.prepare(
|
|
309
|
+
`DELETE FROM conversations
|
|
310
|
+
WHERE conversation_id IN (SELECT conversation_id FROM temp.prune_candidate_ids)`,
|
|
311
|
+
)
|
|
312
|
+
.run().changes ?? 0,
|
|
313
|
+
);
|
|
314
|
+
} finally {
|
|
315
|
+
dropCandidateConversationIds(db);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Prune old conversations from the LCM database.
|
|
321
|
+
*
|
|
322
|
+
* In dry-run mode (default), returns the list of conversations that would be
|
|
323
|
+
* deleted without modifying the database. With `confirm: true`, deletes them
|
|
324
|
+
* and relies on ON DELETE CASCADE for cleanup of child rows.
|
|
325
|
+
*/
|
|
326
|
+
export function pruneConversations(
|
|
327
|
+
db: DatabaseSync,
|
|
328
|
+
options: PruneOptions,
|
|
329
|
+
): PruneResult {
|
|
330
|
+
const days = parseDuration(options.before);
|
|
331
|
+
if (days == null) {
|
|
332
|
+
throw new Error(
|
|
333
|
+
`Invalid duration "${options.before}". Expected a value like "90d", "30d", "3m", or "1y".`,
|
|
334
|
+
);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const cutoffDate = computeCutoffDate(days, options.now);
|
|
338
|
+
const batchSize = resolveBatchSize(options.batchSize);
|
|
339
|
+
const maxBatches = resolveMaxBatches(options.maxBatches);
|
|
340
|
+
|
|
341
|
+
let deleted = 0;
|
|
342
|
+
let vacuumed = false;
|
|
343
|
+
let candidates: PruneCandidate[];
|
|
344
|
+
|
|
345
|
+
if (!options.confirm) {
|
|
346
|
+
candidates = loadPruneCandidates(db, cutoffDate);
|
|
347
|
+
} else {
|
|
348
|
+
candidates = [];
|
|
349
|
+
let batchesRun = 0;
|
|
350
|
+
while (true) {
|
|
351
|
+
let batchCount = 0;
|
|
352
|
+
db.exec("BEGIN IMMEDIATE");
|
|
353
|
+
try {
|
|
354
|
+
const batch = loadPruneCandidates(db, cutoffDate, batchSize);
|
|
355
|
+
batchCount = batch.length;
|
|
356
|
+
if (batch.length === 0) {
|
|
357
|
+
db.exec("COMMIT");
|
|
358
|
+
break;
|
|
359
|
+
}
|
|
360
|
+
deleted += deleteCandidates(db, batch);
|
|
361
|
+
candidates.push(...batch);
|
|
362
|
+
db.exec("COMMIT");
|
|
363
|
+
} catch (error) {
|
|
364
|
+
db.exec("ROLLBACK");
|
|
365
|
+
throw error;
|
|
366
|
+
}
|
|
367
|
+
if (batchCount < batchSize) {
|
|
368
|
+
break;
|
|
369
|
+
}
|
|
370
|
+
batchesRun += 1;
|
|
371
|
+
if (maxBatches != null && batchesRun >= maxBatches) {
|
|
372
|
+
break;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (options.vacuum && deleted > 0) {
|
|
378
|
+
db.exec("VACUUM");
|
|
379
|
+
// VACUUM in WAL mode can leave the reclaimed pages in the WAL file until
|
|
380
|
+
// a checkpoint folds them back into the main database.
|
|
381
|
+
db.exec("PRAGMA wal_checkpoint(TRUNCATE)");
|
|
382
|
+
vacuumed = true;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
candidates,
|
|
387
|
+
deleted,
|
|
388
|
+
vacuumed,
|
|
389
|
+
cutoffDate,
|
|
390
|
+
};
|
|
391
|
+
}
|
package/src/retrieval.ts
CHANGED
|
@@ -9,6 +9,7 @@ import type {
|
|
|
9
9
|
SummarySearchResult,
|
|
10
10
|
LargeFileRecord,
|
|
11
11
|
} from "./store/summary-store.js";
|
|
12
|
+
import type { SearchSort } from "./store/full-text-sort.js";
|
|
12
13
|
|
|
13
14
|
// ── Public interfaces ────────────────────────────────────────────────────────
|
|
14
15
|
|
|
@@ -68,6 +69,10 @@ export interface GrepInput {
|
|
|
68
69
|
since?: Date;
|
|
69
70
|
before?: Date;
|
|
70
71
|
limit?: number;
|
|
72
|
+
/** Sort order for results. Default "recency" (newest first).
|
|
73
|
+
* "relevance" sorts by FTS5 BM25 rank (full_text mode only).
|
|
74
|
+
* "hybrid" blends relevance with recency. */
|
|
75
|
+
sort?: SearchSort;
|
|
71
76
|
}
|
|
72
77
|
|
|
73
78
|
export interface GrepResult {
|
|
@@ -222,9 +227,9 @@ export class RetrievalEngine {
|
|
|
222
227
|
* Depending on `scope`, searches messages, summaries, or both (in parallel).
|
|
223
228
|
*/
|
|
224
229
|
async grep(input: GrepInput): Promise<GrepResult> {
|
|
225
|
-
const { query, mode, scope, conversationId, since, before, limit } = input;
|
|
230
|
+
const { query, mode, scope, conversationId, since, before, limit, sort } = input;
|
|
226
231
|
|
|
227
|
-
const searchInput = { query, mode, conversationId, since, before, limit };
|
|
232
|
+
const searchInput = { query, mode, conversationId, since, before, limit, sort };
|
|
228
233
|
|
|
229
234
|
let messages: MessageSearchResult[] = [];
|
|
230
235
|
let summaries: SummarySearchResult[] = [];
|
|
@@ -241,9 +246,6 @@ export class RetrievalEngine {
|
|
|
241
246
|
]);
|
|
242
247
|
}
|
|
243
248
|
|
|
244
|
-
messages.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
|
|
245
|
-
summaries.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
|
|
246
|
-
|
|
247
249
|
return {
|
|
248
250
|
messages,
|
|
249
251
|
summaries,
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
2
|
+
import { withDatabaseTransaction } from "../transaction-mutex.js";
|
|
3
|
+
import { parseUtcTimestampOrNull } from "./parse-utc-timestamp.js";
|
|
4
|
+
|
|
5
|
+
export type CacheState = "hot" | "cold" | "unknown";
|
|
6
|
+
export type ActivityBand = "low" | "medium" | "high";
|
|
7
|
+
|
|
8
|
+
export type ConversationCompactionTelemetryRecord = {
|
|
9
|
+
conversationId: number;
|
|
10
|
+
lastObservedCacheRead: number | null;
|
|
11
|
+
lastObservedCacheWrite: number | null;
|
|
12
|
+
lastObservedCacheHitAt: Date | null;
|
|
13
|
+
lastObservedCacheBreakAt: Date | null;
|
|
14
|
+
cacheState: CacheState;
|
|
15
|
+
retention: string | null;
|
|
16
|
+
lastLeafCompactionAt: Date | null;
|
|
17
|
+
turnsSinceLeafCompaction: number;
|
|
18
|
+
tokensAccumulatedSinceLeafCompaction: number;
|
|
19
|
+
lastActivityBand: ActivityBand;
|
|
20
|
+
updatedAt: Date;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export type UpsertConversationCompactionTelemetryInput = {
|
|
24
|
+
conversationId: number;
|
|
25
|
+
lastObservedCacheRead?: number | null;
|
|
26
|
+
lastObservedCacheWrite?: number | null;
|
|
27
|
+
lastObservedCacheHitAt?: Date | null;
|
|
28
|
+
lastObservedCacheBreakAt?: Date | null;
|
|
29
|
+
cacheState: CacheState;
|
|
30
|
+
retention?: string | null;
|
|
31
|
+
lastLeafCompactionAt?: Date | null;
|
|
32
|
+
turnsSinceLeafCompaction?: number;
|
|
33
|
+
tokensAccumulatedSinceLeafCompaction?: number;
|
|
34
|
+
lastActivityBand?: ActivityBand;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
type ConversationCompactionTelemetryRow = {
|
|
38
|
+
conversation_id: number;
|
|
39
|
+
last_observed_cache_read: number | null;
|
|
40
|
+
last_observed_cache_write: number | null;
|
|
41
|
+
last_observed_cache_hit_at: string | null;
|
|
42
|
+
last_observed_cache_break_at: string | null;
|
|
43
|
+
cache_state: CacheState;
|
|
44
|
+
retention: string | null;
|
|
45
|
+
last_leaf_compaction_at: string | null;
|
|
46
|
+
turns_since_leaf_compaction: number | null;
|
|
47
|
+
tokens_accumulated_since_leaf_compaction: number | null;
|
|
48
|
+
last_activity_band: ActivityBand | null;
|
|
49
|
+
updated_at: string;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
function toConversationCompactionTelemetryRecord(
|
|
53
|
+
row: ConversationCompactionTelemetryRow,
|
|
54
|
+
): ConversationCompactionTelemetryRecord {
|
|
55
|
+
return {
|
|
56
|
+
conversationId: row.conversation_id,
|
|
57
|
+
lastObservedCacheRead: row.last_observed_cache_read,
|
|
58
|
+
lastObservedCacheWrite: row.last_observed_cache_write,
|
|
59
|
+
lastObservedCacheHitAt: parseUtcTimestampOrNull(row.last_observed_cache_hit_at),
|
|
60
|
+
lastObservedCacheBreakAt: parseUtcTimestampOrNull(row.last_observed_cache_break_at),
|
|
61
|
+
cacheState: row.cache_state,
|
|
62
|
+
retention: row.retention,
|
|
63
|
+
lastLeafCompactionAt: parseUtcTimestampOrNull(row.last_leaf_compaction_at),
|
|
64
|
+
turnsSinceLeafCompaction: row.turns_since_leaf_compaction ?? 0,
|
|
65
|
+
tokensAccumulatedSinceLeafCompaction: row.tokens_accumulated_since_leaf_compaction ?? 0,
|
|
66
|
+
lastActivityBand: row.last_activity_band ?? "low",
|
|
67
|
+
updatedAt: parseUtcTimestampOrNull(row.updated_at) ?? new Date(0),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Persist and query per-conversation prompt-cache telemetry used by
|
|
73
|
+
* cache-aware incremental compaction.
|
|
74
|
+
*/
|
|
75
|
+
export class CompactionTelemetryStore {
|
|
76
|
+
constructor(private readonly db: DatabaseSync) {}
|
|
77
|
+
|
|
78
|
+
/** Execute multiple telemetry writes atomically. */
|
|
79
|
+
withTransaction<T>(fn: () => Promise<T>): Promise<T> {
|
|
80
|
+
return withDatabaseTransaction(this.db, "BEGIN", fn);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Load the latest persisted telemetry for a conversation. */
|
|
84
|
+
async getConversationCompactionTelemetry(
|
|
85
|
+
conversationId: number,
|
|
86
|
+
): Promise<ConversationCompactionTelemetryRecord | null> {
|
|
87
|
+
const row = this.db
|
|
88
|
+
.prepare(
|
|
89
|
+
`SELECT
|
|
90
|
+
conversation_id,
|
|
91
|
+
last_observed_cache_read,
|
|
92
|
+
last_observed_cache_write,
|
|
93
|
+
last_observed_cache_hit_at,
|
|
94
|
+
last_observed_cache_break_at,
|
|
95
|
+
cache_state,
|
|
96
|
+
retention,
|
|
97
|
+
last_leaf_compaction_at,
|
|
98
|
+
turns_since_leaf_compaction,
|
|
99
|
+
tokens_accumulated_since_leaf_compaction,
|
|
100
|
+
last_activity_band,
|
|
101
|
+
updated_at
|
|
102
|
+
FROM conversation_compaction_telemetry
|
|
103
|
+
WHERE conversation_id = ?`,
|
|
104
|
+
)
|
|
105
|
+
.get(conversationId) as ConversationCompactionTelemetryRow | undefined;
|
|
106
|
+
return row ? toConversationCompactionTelemetryRecord(row) : null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Upsert the current cache telemetry snapshot for a conversation. */
|
|
110
|
+
async upsertConversationCompactionTelemetry(
|
|
111
|
+
input: UpsertConversationCompactionTelemetryInput,
|
|
112
|
+
): Promise<void> {
|
|
113
|
+
this.db
|
|
114
|
+
.prepare(
|
|
115
|
+
`INSERT INTO conversation_compaction_telemetry (
|
|
116
|
+
conversation_id,
|
|
117
|
+
last_observed_cache_read,
|
|
118
|
+
last_observed_cache_write,
|
|
119
|
+
last_observed_cache_hit_at,
|
|
120
|
+
last_observed_cache_break_at,
|
|
121
|
+
cache_state,
|
|
122
|
+
retention,
|
|
123
|
+
last_leaf_compaction_at,
|
|
124
|
+
turns_since_leaf_compaction,
|
|
125
|
+
tokens_accumulated_since_leaf_compaction,
|
|
126
|
+
last_activity_band,
|
|
127
|
+
updated_at
|
|
128
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|
129
|
+
ON CONFLICT(conversation_id) DO UPDATE SET
|
|
130
|
+
last_observed_cache_read = excluded.last_observed_cache_read,
|
|
131
|
+
last_observed_cache_write = excluded.last_observed_cache_write,
|
|
132
|
+
last_observed_cache_hit_at = excluded.last_observed_cache_hit_at,
|
|
133
|
+
last_observed_cache_break_at = excluded.last_observed_cache_break_at,
|
|
134
|
+
cache_state = excluded.cache_state,
|
|
135
|
+
retention = excluded.retention,
|
|
136
|
+
last_leaf_compaction_at = excluded.last_leaf_compaction_at,
|
|
137
|
+
turns_since_leaf_compaction = excluded.turns_since_leaf_compaction,
|
|
138
|
+
tokens_accumulated_since_leaf_compaction = excluded.tokens_accumulated_since_leaf_compaction,
|
|
139
|
+
last_activity_band = excluded.last_activity_band,
|
|
140
|
+
updated_at = datetime('now')`,
|
|
141
|
+
)
|
|
142
|
+
.run(
|
|
143
|
+
input.conversationId,
|
|
144
|
+
input.lastObservedCacheRead ?? null,
|
|
145
|
+
input.lastObservedCacheWrite ?? null,
|
|
146
|
+
input.lastObservedCacheHitAt?.toISOString() ?? null,
|
|
147
|
+
input.lastObservedCacheBreakAt?.toISOString() ?? null,
|
|
148
|
+
input.cacheState,
|
|
149
|
+
input.retention ?? null,
|
|
150
|
+
input.lastLeafCompactionAt?.toISOString() ?? null,
|
|
151
|
+
input.turnsSinceLeafCompaction ?? 0,
|
|
152
|
+
input.tokensAccumulatedSinceLeafCompaction ?? 0,
|
|
153
|
+
input.lastActivityBand ?? "low",
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
}
|