@martian-engineering/lossless-claw 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/compaction.ts +83 -10
- package/src/db/connection.ts +2 -0
- package/src/db/migration.ts +21 -0
- package/src/engine.ts +657 -146
- package/src/large-files.ts +19 -0
- package/src/plugin/index.ts +188 -28
- package/src/store/conversation-store.ts +58 -2
- package/src/store/full-text-fallback.ts +9 -0
- package/src/store/index.ts +2 -0
- package/src/store/summary-store.ts +130 -10
- package/src/summarize.ts +205 -12
- package/src/types.ts +9 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@martian-engineering/lossless-claw",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
"@sinclair/typebox": "0.34.48"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
|
+
"@changesets/changelog-github": "^0.6.0",
|
|
38
39
|
"@changesets/cli": "^2.30.0",
|
|
39
40
|
"typescript": "^5.7.0",
|
|
40
41
|
"vitest": "^3.0.0"
|
package/src/compaction.ts
CHANGED
|
@@ -142,6 +142,14 @@ function generateSummaryId(content: string): string {
|
|
|
142
142
|
/** Maximum characters for the deterministic fallback truncation (512 tokens * 4 chars). */
|
|
143
143
|
const FALLBACK_MAX_CHARS = 512 * 4;
|
|
144
144
|
const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Pattern matching MEDIA:/... file path references that appear in message content
|
|
148
|
+
* when the original message contained only a media attachment (image, file, etc.)
|
|
149
|
+
* with no meaningful text.
|
|
150
|
+
*/
|
|
151
|
+
const MEDIA_PATH_RE = /^MEDIA:\/.+$/;
|
|
152
|
+
|
|
145
153
|
const CONDENSED_MIN_INPUT_RATIO = 0.1;
|
|
146
154
|
|
|
147
155
|
function dedupeOrderedIds(ids: Iterable<string>): string[] {
|
|
@@ -231,6 +239,7 @@ export class CompactionEngine {
|
|
|
231
239
|
summarize: CompactionSummarizeFn;
|
|
232
240
|
force?: boolean;
|
|
233
241
|
hardTrigger?: boolean;
|
|
242
|
+
summaryModel?: string;
|
|
234
243
|
}): Promise<CompactionResult> {
|
|
235
244
|
return this.compactFullSweep(input);
|
|
236
245
|
}
|
|
@@ -246,6 +255,7 @@ export class CompactionEngine {
|
|
|
246
255
|
summarize: CompactionSummarizeFn;
|
|
247
256
|
force?: boolean;
|
|
248
257
|
previousSummaryContent?: string;
|
|
258
|
+
summaryModel?: string;
|
|
249
259
|
}): Promise<CompactionResult> {
|
|
250
260
|
const { conversationId, tokenBudget, summarize, force } = input;
|
|
251
261
|
|
|
@@ -281,6 +291,7 @@ export class CompactionEngine {
|
|
|
281
291
|
leafChunk.items,
|
|
282
292
|
summarize,
|
|
283
293
|
previousSummaryContent,
|
|
294
|
+
input.summaryModel,
|
|
284
295
|
);
|
|
285
296
|
if (!leafResult) {
|
|
286
297
|
return {
|
|
@@ -322,6 +333,7 @@ export class CompactionEngine {
|
|
|
322
333
|
chunk.items,
|
|
323
334
|
targetDepth,
|
|
324
335
|
summarize,
|
|
336
|
+
input.summaryModel,
|
|
325
337
|
);
|
|
326
338
|
if (!condenseResult) {
|
|
327
339
|
break;
|
|
@@ -370,6 +382,7 @@ export class CompactionEngine {
|
|
|
370
382
|
summarize: CompactionSummarizeFn;
|
|
371
383
|
force?: boolean;
|
|
372
384
|
hardTrigger?: boolean;
|
|
385
|
+
summaryModel?: string;
|
|
373
386
|
}): Promise<CompactionResult> {
|
|
374
387
|
const { conversationId, tokenBudget, summarize, force, hardTrigger } = input;
|
|
375
388
|
|
|
@@ -416,6 +429,7 @@ export class CompactionEngine {
|
|
|
416
429
|
leafChunk.items,
|
|
417
430
|
summarize,
|
|
418
431
|
previousSummaryContent,
|
|
432
|
+
input.summaryModel,
|
|
419
433
|
);
|
|
420
434
|
if (!leafResult) {
|
|
421
435
|
break;
|
|
@@ -461,6 +475,7 @@ export class CompactionEngine {
|
|
|
461
475
|
candidate.chunk.items,
|
|
462
476
|
candidate.targetDepth,
|
|
463
477
|
summarize,
|
|
478
|
+
input.summaryModel,
|
|
464
479
|
);
|
|
465
480
|
if (!condenseResult) {
|
|
466
481
|
break;
|
|
@@ -511,6 +526,7 @@ export class CompactionEngine {
|
|
|
511
526
|
targetTokens?: number;
|
|
512
527
|
currentTokens?: number;
|
|
513
528
|
summarize: CompactionSummarizeFn;
|
|
529
|
+
summaryModel?: string;
|
|
514
530
|
}): Promise<{ success: boolean; rounds: number; finalTokens: number }> {
|
|
515
531
|
const { conversationId, tokenBudget, summarize } = input;
|
|
516
532
|
const targetTokens =
|
|
@@ -542,6 +558,7 @@ export class CompactionEngine {
|
|
|
542
558
|
tokenBudget,
|
|
543
559
|
summarize,
|
|
544
560
|
force: true,
|
|
561
|
+
summaryModel: input.summaryModel,
|
|
545
562
|
});
|
|
546
563
|
|
|
547
564
|
if (result.tokensAfter <= targetTokens) {
|
|
@@ -998,6 +1015,17 @@ export class CompactionEngine {
|
|
|
998
1015
|
};
|
|
999
1016
|
}
|
|
1000
1017
|
const inputTokens = Math.max(1, estimateTokens(sourceText));
|
|
1018
|
+
const buildDeterministicFallback = (): { content: string; level: CompactionLevel } => {
|
|
1019
|
+
const truncated =
|
|
1020
|
+
sourceText.length > FALLBACK_MAX_CHARS
|
|
1021
|
+
? sourceText.slice(0, FALLBACK_MAX_CHARS)
|
|
1022
|
+
: sourceText;
|
|
1023
|
+
return {
|
|
1024
|
+
content: `${truncated}
|
|
1025
|
+
[Truncated from ${inputTokens} tokens]`,
|
|
1026
|
+
level: "fallback",
|
|
1027
|
+
};
|
|
1028
|
+
};
|
|
1001
1029
|
|
|
1002
1030
|
const runSummarizer = async (aggressiveMode: boolean): Promise<string | null> => {
|
|
1003
1031
|
const output = await params.summarize(sourceText, aggressiveMode, params.options);
|
|
@@ -1007,7 +1035,9 @@ export class CompactionEngine {
|
|
|
1007
1035
|
|
|
1008
1036
|
const initialSummary = await runSummarizer(false);
|
|
1009
1037
|
if (initialSummary === null) {
|
|
1010
|
-
|
|
1038
|
+
// Empty provider output should still compact deterministically so auth
|
|
1039
|
+
// failures or empty responses do not stall compaction entirely.
|
|
1040
|
+
return buildDeterministicFallback();
|
|
1011
1041
|
}
|
|
1012
1042
|
let summaryText = initialSummary;
|
|
1013
1043
|
let level: CompactionLevel = "normal";
|
|
@@ -1015,25 +1045,60 @@ export class CompactionEngine {
|
|
|
1015
1045
|
if (estimateTokens(summaryText) >= inputTokens) {
|
|
1016
1046
|
const aggressiveSummary = await runSummarizer(true);
|
|
1017
1047
|
if (aggressiveSummary === null) {
|
|
1018
|
-
return
|
|
1048
|
+
return buildDeterministicFallback();
|
|
1019
1049
|
}
|
|
1020
1050
|
summaryText = aggressiveSummary;
|
|
1021
1051
|
level = "aggressive";
|
|
1022
1052
|
|
|
1023
1053
|
if (estimateTokens(summaryText) >= inputTokens) {
|
|
1024
|
-
|
|
1025
|
-
sourceText.length > FALLBACK_MAX_CHARS
|
|
1026
|
-
? sourceText.slice(0, FALLBACK_MAX_CHARS)
|
|
1027
|
-
: sourceText;
|
|
1028
|
-
summaryText = `${truncated}
|
|
1029
|
-
[Truncated from ${inputTokens} tokens]`;
|
|
1030
|
-
level = "fallback";
|
|
1054
|
+
return buildDeterministicFallback();
|
|
1031
1055
|
}
|
|
1032
1056
|
}
|
|
1033
1057
|
|
|
1034
1058
|
return { content: summaryText, level };
|
|
1035
1059
|
}
|
|
1036
1060
|
|
|
1061
|
+
// ── Private: Media Annotation ────────────────────────────────────────────
|
|
1062
|
+
|
|
1063
|
+
/**
|
|
1064
|
+
* Annotate a message's content with media context when it has file/media
|
|
1065
|
+
* attachments. This gives the summarizer enough context to produce a
|
|
1066
|
+
* meaningful summary instead of trying to compress raw file paths.
|
|
1067
|
+
*
|
|
1068
|
+
* - Media-only messages (just a file path, no text): content is replaced
|
|
1069
|
+
* with "[Media attachment]" or "[Image attachment]" etc.
|
|
1070
|
+
* - Media-mostly messages (any real text + attachment): content is annotated
|
|
1071
|
+
* with " [with media attachment]" suffix.
|
|
1072
|
+
* - Text-only messages: returned unchanged.
|
|
1073
|
+
*/
|
|
1074
|
+
private async annotateMediaContent(
|
|
1075
|
+
messageId: number,
|
|
1076
|
+
content: string,
|
|
1077
|
+
): Promise<string> {
|
|
1078
|
+
const parts = await this.conversationStore.getMessageParts(messageId);
|
|
1079
|
+
const hasMediaParts = parts.some(
|
|
1080
|
+
(p) => p.partType === "file" || p.partType === "snapshot",
|
|
1081
|
+
);
|
|
1082
|
+
if (!hasMediaParts) {
|
|
1083
|
+
return content;
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
// Strip MEDIA:/... paths to see how much actual text remains
|
|
1087
|
+
const textWithoutPaths = content
|
|
1088
|
+
.split("\n")
|
|
1089
|
+
.filter((line) => !MEDIA_PATH_RE.test(line.trim()))
|
|
1090
|
+
.join("\n")
|
|
1091
|
+
.trim();
|
|
1092
|
+
|
|
1093
|
+
if (textWithoutPaths.length === 0) {
|
|
1094
|
+
// Media-only: replace with descriptive annotation
|
|
1095
|
+
return "[Media attachment]";
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// Media-mostly: keep the text, add annotation
|
|
1099
|
+
return `${textWithoutPaths} [with media attachment]`;
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1037
1102
|
// ── Private: Leaf Pass ───────────────────────────────────────────────────
|
|
1038
1103
|
|
|
1039
1104
|
/**
|
|
@@ -1044,6 +1109,7 @@ export class CompactionEngine {
|
|
|
1044
1109
|
messageItems: ContextItemRecord[],
|
|
1045
1110
|
summarize: CompactionSummarizeFn,
|
|
1046
1111
|
previousSummaryContent?: string,
|
|
1112
|
+
summaryModel?: string,
|
|
1047
1113
|
): Promise<{ summaryId: string; level: CompactionLevel; content: string } | null> {
|
|
1048
1114
|
// Fetch full message content for each context item
|
|
1049
1115
|
const messageContents: { messageId: number; content: string; createdAt: Date; tokenCount: number }[] =
|
|
@@ -1054,9 +1120,13 @@ export class CompactionEngine {
|
|
|
1054
1120
|
}
|
|
1055
1121
|
const msg = await this.conversationStore.getMessageById(item.messageId);
|
|
1056
1122
|
if (msg) {
|
|
1123
|
+
const annotatedContent = await this.annotateMediaContent(
|
|
1124
|
+
msg.messageId,
|
|
1125
|
+
msg.content,
|
|
1126
|
+
);
|
|
1057
1127
|
messageContents.push({
|
|
1058
1128
|
messageId: msg.messageId,
|
|
1059
|
-
content:
|
|
1129
|
+
content: annotatedContent,
|
|
1060
1130
|
createdAt: msg.createdAt,
|
|
1061
1131
|
tokenCount: this.resolveMessageTokenCount(msg),
|
|
1062
1132
|
});
|
|
@@ -1110,6 +1180,7 @@ export class CompactionEngine {
|
|
|
1110
1180
|
(sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
|
|
1111
1181
|
0,
|
|
1112
1182
|
),
|
|
1183
|
+
model: summaryModel,
|
|
1113
1184
|
});
|
|
1114
1185
|
|
|
1115
1186
|
// Link to source messages
|
|
@@ -1141,6 +1212,7 @@ export class CompactionEngine {
|
|
|
1141
1212
|
summaryItems: ContextItemRecord[],
|
|
1142
1213
|
targetDepth: number,
|
|
1143
1214
|
summarize: CompactionSummarizeFn,
|
|
1215
|
+
summaryModel?: string,
|
|
1144
1216
|
): Promise<PassResult | null> {
|
|
1145
1217
|
// Fetch full summary records
|
|
1146
1218
|
const summaryRecords: SummaryRecord[] = [];
|
|
@@ -1242,6 +1314,7 @@ export class CompactionEngine {
|
|
|
1242
1314
|
: 0;
|
|
1243
1315
|
return count + sourceTokens;
|
|
1244
1316
|
}, 0),
|
|
1317
|
+
model: summaryModel,
|
|
1245
1318
|
});
|
|
1246
1319
|
|
|
1247
1320
|
// Link to parent summaries
|
package/src/db/connection.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { dirname, resolve } from "node:path";
|
|
|
3
3
|
import { DatabaseSync } from "node:sqlite";
|
|
4
4
|
|
|
5
5
|
type ConnectionKey = string;
|
|
6
|
+
const SQLITE_BUSY_TIMEOUT_MS = 5_000;
|
|
6
7
|
|
|
7
8
|
const connectionsByPath = new Map<ConnectionKey, Set<DatabaseSync>>();
|
|
8
9
|
const connectionIndex = new Map<DatabaseSync, ConnectionKey>();
|
|
@@ -29,6 +30,7 @@ function ensureDbDirectory(dbPath: string): void {
|
|
|
29
30
|
|
|
30
31
|
function configureConnection(db: DatabaseSync): DatabaseSync {
|
|
31
32
|
db.exec("PRAGMA journal_mode = WAL");
|
|
33
|
+
db.exec(`PRAGMA busy_timeout = ${SQLITE_BUSY_TIMEOUT_MS}`);
|
|
32
34
|
db.exec("PRAGMA foreign_keys = ON");
|
|
33
35
|
return db;
|
|
34
36
|
}
|
package/src/db/migration.ts
CHANGED
|
@@ -80,6 +80,14 @@ function isoStringOrNull(value: Date | null): string | null {
|
|
|
80
80
|
return value ? value.toISOString() : null;
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
function ensureSummaryModelColumn(db: DatabaseSync): void {
|
|
84
|
+
const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
|
|
85
|
+
const hasModel = summaryColumns.some((col) => col.name === "model");
|
|
86
|
+
if (!hasModel) {
|
|
87
|
+
db.exec(`ALTER TABLE summaries ADD COLUMN model TEXT NOT NULL DEFAULT 'unknown'`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
83
91
|
function backfillSummaryDepths(db: DatabaseSync): void {
|
|
84
92
|
// Leaves are always depth 0, even if legacy rows had malformed values.
|
|
85
93
|
db.exec(`UPDATE summaries SET depth = 0 WHERE kind = 'leaf'`);
|
|
@@ -474,6 +482,16 @@ export function runLcmMigrations(
|
|
|
474
482
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
475
483
|
);
|
|
476
484
|
|
|
485
|
+
CREATE TABLE IF NOT EXISTS conversation_bootstrap_state (
|
|
486
|
+
conversation_id INTEGER PRIMARY KEY REFERENCES conversations(conversation_id) ON DELETE CASCADE,
|
|
487
|
+
session_file_path TEXT NOT NULL,
|
|
488
|
+
last_seen_size INTEGER NOT NULL,
|
|
489
|
+
last_seen_mtime_ms INTEGER NOT NULL,
|
|
490
|
+
last_processed_offset INTEGER NOT NULL,
|
|
491
|
+
last_processed_entry_hash TEXT,
|
|
492
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
493
|
+
);
|
|
494
|
+
|
|
477
495
|
-- Indexes
|
|
478
496
|
CREATE INDEX IF NOT EXISTS messages_conv_seq_idx ON messages (conversation_id, seq);
|
|
479
497
|
CREATE INDEX IF NOT EXISTS summaries_conv_created_idx ON summaries (conversation_id, created_at);
|
|
@@ -481,6 +499,8 @@ export function runLcmMigrations(
|
|
|
481
499
|
CREATE INDEX IF NOT EXISTS message_parts_type_idx ON message_parts (part_type);
|
|
482
500
|
CREATE INDEX IF NOT EXISTS context_items_conv_idx ON context_items (conversation_id, ordinal);
|
|
483
501
|
CREATE INDEX IF NOT EXISTS large_files_conv_idx ON large_files (conversation_id, created_at);
|
|
502
|
+
CREATE INDEX IF NOT EXISTS bootstrap_state_path_idx
|
|
503
|
+
ON conversation_bootstrap_state (session_file_path, updated_at);
|
|
484
504
|
`);
|
|
485
505
|
|
|
486
506
|
// Forward-compatible conversations migration for existing DBs.
|
|
@@ -500,6 +520,7 @@ export function runLcmMigrations(
|
|
|
500
520
|
db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS conversations_session_key_idx ON conversations (session_key)`);
|
|
501
521
|
ensureSummaryDepthColumn(db);
|
|
502
522
|
ensureSummaryMetadataColumns(db);
|
|
523
|
+
ensureSummaryModelColumn(db);
|
|
503
524
|
backfillSummaryDepths(db);
|
|
504
525
|
backfillSummaryMetadata(db);
|
|
505
526
|
|