@martian-engineering/lossless-claw 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -2
- package/index.ts +5 -5
- package/package.json +4 -4
- package/src/assembler.ts +163 -15
- package/src/db/config.ts +7 -2
- package/src/db/features.ts +42 -0
- package/src/db/migration.ts +10 -1
- package/src/engine.ts +70 -6
- package/src/store/conversation-store.ts +105 -16
- package/src/store/full-text-fallback.ts +74 -0
- package/src/store/summary-store.ts +93 -17
- package/src/transcript-repair.ts +88 -9
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@ Nothing is lost. Raw messages stay in the database. Summaries link back to their
|
|
|
22
22
|
|
|
23
23
|
### Prerequisites
|
|
24
24
|
|
|
25
|
-
- OpenClaw with context engine support
|
|
25
|
+
- OpenClaw with plugin context engine support
|
|
26
26
|
- Node.js 22+
|
|
27
27
|
- An LLM provider configured in OpenClaw (used for summarization)
|
|
28
28
|
|
|
@@ -68,6 +68,168 @@ If you need to set it manually, ensure the context engine slot points at lossles
|
|
|
68
68
|
|
|
69
69
|
Restart OpenClaw after configuration changes.
|
|
70
70
|
|
|
71
|
+
### Optional: enable FTS5 for fast full-text search
|
|
72
|
+
|
|
73
|
+
`lossless-claw` works without FTS5 as of the current release. When FTS5 is unavailable in the
|
|
74
|
+
Node runtime that runs the OpenClaw gateway, the plugin:
|
|
75
|
+
|
|
76
|
+
- keeps persisting messages and summaries
|
|
77
|
+
- falls back from `"full_text"` search to a slower `LIKE`-based search
|
|
78
|
+
- loses FTS ranking/snippet quality
|
|
79
|
+
|
|
80
|
+
If you want native FTS5 search performance and ranking, the **exact Node runtime that runs the
|
|
81
|
+
gateway** must have SQLite FTS5 compiled in.
|
|
82
|
+
|
|
83
|
+
#### Probe the gateway runtime
|
|
84
|
+
|
|
85
|
+
Run this with the same `node` binary your gateway uses:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
node --input-type=module - <<'NODE'
|
|
89
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
90
|
+
const db = new DatabaseSync(':memory:');
|
|
91
|
+
const options = db.prepare('pragma compile_options').all().map((row) => row.compile_options);
|
|
92
|
+
|
|
93
|
+
console.log(options.filter((value) => value.includes('FTS')).join('\n') || 'no fts compile options');
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
db.exec("CREATE VIRTUAL TABLE t USING fts5(content)");
|
|
97
|
+
console.log("fts5: ok");
|
|
98
|
+
} catch (err) {
|
|
99
|
+
console.log("fts5: fail");
|
|
100
|
+
console.log(err instanceof Error ? err.message : String(err));
|
|
101
|
+
}
|
|
102
|
+
NODE
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Expected output:
|
|
106
|
+
|
|
107
|
+
```text
|
|
108
|
+
ENABLE_FTS5
|
|
109
|
+
fts5: ok
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
If you get `fts5: fail`, build or install an FTS5-capable Node and point the gateway at that runtime.
|
|
113
|
+
|
|
114
|
+
#### Build an FTS5-capable Node on macOS
|
|
115
|
+
|
|
116
|
+
This workflow was verified with Node `v22.15.0`.
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
cd ~/Projects
|
|
120
|
+
git clone --depth 1 --branch v22.15.0 https://github.com/nodejs/node.git node-fts5
|
|
121
|
+
cd node-fts5
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Edit `deps/sqlite/sqlite.gyp` and add `SQLITE_ENABLE_FTS5` to the `defines` list for the `sqlite`
|
|
125
|
+
target:
|
|
126
|
+
|
|
127
|
+
```diff
|
|
128
|
+
'defines': [
|
|
129
|
+
'SQLITE_DEFAULT_MEMSTATUS=0',
|
|
130
|
+
+ 'SQLITE_ENABLE_FTS5',
|
|
131
|
+
'SQLITE_ENABLE_MATH_FUNCTIONS',
|
|
132
|
+
'SQLITE_ENABLE_SESSION',
|
|
133
|
+
'SQLITE_ENABLE_PREUPDATE_HOOK'
|
|
134
|
+
],
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Important:
|
|
138
|
+
|
|
139
|
+
- patch `deps/sqlite/sqlite.gyp`, not only `node.gyp`
|
|
140
|
+
- `node:sqlite` uses the embedded SQLite built from `deps/sqlite/sqlite.gyp`
|
|
141
|
+
|
|
142
|
+
Build the runtime:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
./configure --prefix="$PWD/out-install"
|
|
146
|
+
make -j8 node
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Expose the binary under a Node-compatible basename that OpenClaw recognizes:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
mkdir -p ~/Projects/node-fts5/bin
|
|
153
|
+
ln -sfn ~/Projects/node-fts5/out/Release/node ~/Projects/node-fts5/bin/node-22.15.0
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Use a basename like `node-22.15.0`, `node`, or `nodejs`. Names like
|
|
157
|
+
`node-v22.15.0-fts5` may not be recognized correctly by OpenClaw's CLI/runtime parsing.
|
|
158
|
+
|
|
159
|
+
Verify the new runtime:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
~/Projects/node-fts5/bin/node-22.15.0 --version
|
|
163
|
+
~/Projects/node-fts5/bin/node-22.15.0 --input-type=module - <<'NODE'
|
|
164
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
165
|
+
const db = new DatabaseSync(':memory:');
|
|
166
|
+
db.exec("CREATE VIRTUAL TABLE t USING fts5(content)");
|
|
167
|
+
console.log("fts5: ok");
|
|
168
|
+
NODE
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### Point the OpenClaw gateway at that runtime on macOS
|
|
172
|
+
|
|
173
|
+
Back up the existing LaunchAgent plist first:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
cp ~/Library/LaunchAgents/ai.openclaw.gateway.plist \
|
|
177
|
+
~/Library/LaunchAgents/ai.openclaw.gateway.plist.bak-$(date +%Y%m%d-%H%M%S)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Replace the runtime path, then reload the agent:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
/usr/libexec/PlistBuddy -c 'Set :ProgramArguments:0 /Users/youruser/Projects/node-fts5/bin/node-22.15.0' \
|
|
184
|
+
~/Library/LaunchAgents/ai.openclaw.gateway.plist
|
|
185
|
+
|
|
186
|
+
launchctl bootout gui/$UID ~/Library/LaunchAgents/ai.openclaw.gateway.plist 2>/dev/null || true
|
|
187
|
+
launchctl bootstrap gui/$UID ~/Library/LaunchAgents/ai.openclaw.gateway.plist
|
|
188
|
+
launchctl kickstart -k gui/$UID/ai.openclaw.gateway
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Verify the live runtime:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
launchctl print gui/$UID/ai.openclaw.gateway | sed -n '1,80p'
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
You should see:
|
|
198
|
+
|
|
199
|
+
```text
|
|
200
|
+
program = /Users/youruser/Projects/node-fts5/bin/node-22.15.0
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
#### Verify `lossless-claw`
|
|
204
|
+
|
|
205
|
+
Check the logs:
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
tail -n 60 ~/.openclaw/logs/gateway.log
|
|
209
|
+
tail -n 60 ~/.openclaw/logs/gateway.err.log
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
You want:
|
|
213
|
+
|
|
214
|
+
- `[gateway] [lcm] Plugin loaded ...`
|
|
215
|
+
- no new `no such module: fts5`
|
|
216
|
+
|
|
217
|
+
Then force one turn through the gateway and verify the DB fills:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
/Users/youruser/Projects/node-fts5/bin/node-22.15.0 \
|
|
221
|
+
/path/to/openclaw/dist/index.js \
|
|
222
|
+
agent --session-id fts5-smoke --message 'Reply with exactly: ok' --timeout 60
|
|
223
|
+
|
|
224
|
+
sqlite3 ~/.openclaw/lcm.db '
|
|
225
|
+
select count(*) as conversations from conversations;
|
|
226
|
+
select count(*) as messages from messages;
|
|
227
|
+
select count(*) as summaries from summaries;
|
|
228
|
+
'
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Those counts should increase after a real turn.
|
|
232
|
+
|
|
71
233
|
## Configuration
|
|
72
234
|
|
|
73
235
|
LCM is configured through a combination of plugin config and environment variables. Environment variables take precedence for backward compatibility.
|
|
@@ -110,9 +272,12 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
|
|
|
110
272
|
| `LCM_CONDENSED_TARGET_TOKENS` | `2000` | Target token count for condensed summaries |
|
|
111
273
|
| `LCM_MAX_EXPAND_TOKENS` | `4000` | Token cap for sub-agent expansion queries |
|
|
112
274
|
| `LCM_LARGE_FILE_TOKEN_THRESHOLD` | `25000` | File blocks above this size are intercepted and stored separately |
|
|
275
|
+
| `LCM_LARGE_FILE_SUMMARY_PROVIDER` | `""` | Provider override for large-file summarization |
|
|
276
|
+
| `LCM_LARGE_FILE_SUMMARY_MODEL` | `""` | Model override for large-file summarization |
|
|
113
277
|
| `LCM_SUMMARY_MODEL` | *(from OpenClaw)* | Model for summarization (e.g. `anthropic/claude-sonnet-4-20250514`) |
|
|
114
278
|
| `LCM_SUMMARY_PROVIDER` | *(from OpenClaw)* | Provider override for summarization |
|
|
115
|
-
| `
|
|
279
|
+
| `LCM_AUTOCOMPACT_DISABLED` | `false` | Disable automatic compaction after turns |
|
|
280
|
+
| `LCM_PRUNE_HEARTBEAT_OK` | `false` | Retroactively delete `HEARTBEAT_OK` turn cycles from LCM storage |
|
|
116
281
|
|
|
117
282
|
### Recommended starting configuration
|
|
118
283
|
|
|
@@ -126,6 +291,47 @@ LCM_CONTEXT_THRESHOLD=0.75
|
|
|
126
291
|
- **incrementalMaxDepth=-1** enables unlimited automatic condensation after each compaction pass — the DAG cascades as deep as needed. Set to `0` (default) for leaf-only, or a positive integer for a specific depth cap.
|
|
127
292
|
- **contextThreshold=0.75** triggers compaction when context reaches 75% of the model's window, leaving headroom for the model's response.
|
|
128
293
|
|
|
294
|
+
### OpenClaw session reset settings
|
|
295
|
+
|
|
296
|
+
LCM preserves history through compaction, but it does **not** change OpenClaw's core session reset policy. If sessions are resetting sooner than you want, increase OpenClaw's `session.reset.idleMinutes` or use a channel/type-specific override.
|
|
297
|
+
|
|
298
|
+
```json
|
|
299
|
+
{
|
|
300
|
+
"session": {
|
|
301
|
+
"reset": {
|
|
302
|
+
"mode": "idle",
|
|
303
|
+
"idleMinutes": 10080
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
- `session.reset.mode: "idle"` keeps a session alive until the idle window expires.
|
|
310
|
+
- `session.reset.idleMinutes` is the actual reset interval in minutes.
|
|
311
|
+
- OpenClaw does **not** currently enforce a maximum `idleMinutes`; in source it is validated only as a positive integer.
|
|
312
|
+
- If you also use daily reset mode, `idleMinutes` acts as a secondary guard and the session resets when **either** the daily boundary or the idle window is reached first.
|
|
313
|
+
- Legacy `session.idleMinutes` still works, but OpenClaw prefers `session.reset.idleMinutes`.
|
|
314
|
+
|
|
315
|
+
Useful values:
|
|
316
|
+
|
|
317
|
+
- `1440` = 1 day
|
|
318
|
+
- `10080` = 7 days
|
|
319
|
+
- `43200` = 30 days
|
|
320
|
+
- `525600` = 365 days
|
|
321
|
+
|
|
322
|
+
For most long-lived LCM setups, a good starting point is:
|
|
323
|
+
|
|
324
|
+
```json
|
|
325
|
+
{
|
|
326
|
+
"session": {
|
|
327
|
+
"reset": {
|
|
328
|
+
"mode": "idle",
|
|
329
|
+
"idleMinutes": 10080
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
```
|
|
334
|
+
|
|
129
335
|
## How it works
|
|
130
336
|
|
|
131
337
|
See [docs/architecture.md](docs/architecture.md) for the full technical deep-dive. Here's the summary:
|
package/index.ts
CHANGED
|
@@ -597,7 +597,11 @@ function readLatestAssistantReply(messages: unknown[]): string | undefined {
|
|
|
597
597
|
function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
|
|
598
598
|
const envSnapshot = snapshotPluginEnv();
|
|
599
599
|
const readEnv: ReadEnvFn = (key) => process.env[key];
|
|
600
|
-
const
|
|
600
|
+
const pluginConfig =
|
|
601
|
+
api.pluginConfig && typeof api.pluginConfig === "object" && !Array.isArray(api.pluginConfig)
|
|
602
|
+
? api.pluginConfig
|
|
603
|
+
: undefined;
|
|
604
|
+
const config = resolveLcmConfig(process.env, pluginConfig);
|
|
601
605
|
|
|
602
606
|
return {
|
|
603
607
|
config,
|
|
@@ -854,9 +858,6 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
|
|
|
854
858
|
};
|
|
855
859
|
}
|
|
856
860
|
|
|
857
|
-
/** Cached config from configSchema.parse — available by the time register() runs. */
|
|
858
|
-
let _parsedPluginConfig: Record<string, unknown> | undefined;
|
|
859
|
-
|
|
860
861
|
const lcmPlugin = {
|
|
861
862
|
id: "lossless-claw",
|
|
862
863
|
name: "Lossless Context Management",
|
|
@@ -869,7 +870,6 @@ const lcmPlugin = {
|
|
|
869
870
|
value && typeof value === "object" && !Array.isArray(value)
|
|
870
871
|
? (value as Record<string, unknown>)
|
|
871
872
|
: {};
|
|
872
|
-
_parsedPluginConfig = raw;
|
|
873
873
|
return resolveLcmConfig(process.env, raw);
|
|
874
874
|
},
|
|
875
875
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@martian-engineering/lossless-claw",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -24,6 +24,8 @@
|
|
|
24
24
|
"LICENSE"
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
|
+
"@mariozechner/pi-agent-core": "*",
|
|
28
|
+
"@mariozechner/pi-ai": "*",
|
|
27
29
|
"@sinclair/typebox": "0.34.48"
|
|
28
30
|
},
|
|
29
31
|
"devDependencies": {
|
|
@@ -31,9 +33,7 @@
|
|
|
31
33
|
"vitest": "^3.0.0"
|
|
32
34
|
},
|
|
33
35
|
"peerDependencies": {
|
|
34
|
-
"openclaw": "*"
|
|
35
|
-
"@mariozechner/pi-agent-core": "*",
|
|
36
|
-
"@mariozechner/pi-ai": "*"
|
|
36
|
+
"openclaw": "*"
|
|
37
37
|
},
|
|
38
38
|
"openclaw": {
|
|
39
39
|
"extensions": [
|
package/src/assembler.ts
CHANGED
|
@@ -144,6 +144,137 @@ function getOriginalRole(parts: MessagePartRecord[]): string | null {
|
|
|
144
144
|
return null;
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
+
function getPartMetadata(part: MessagePartRecord): {
|
|
148
|
+
originalRole?: string;
|
|
149
|
+
rawType?: string;
|
|
150
|
+
raw?: unknown;
|
|
151
|
+
} {
|
|
152
|
+
const decoded = parseJson(part.metadata);
|
|
153
|
+
if (!decoded || typeof decoded !== "object") {
|
|
154
|
+
return {};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const record = decoded as {
|
|
158
|
+
originalRole?: unknown;
|
|
159
|
+
rawType?: unknown;
|
|
160
|
+
raw?: unknown;
|
|
161
|
+
};
|
|
162
|
+
return {
|
|
163
|
+
originalRole:
|
|
164
|
+
typeof record.originalRole === "string" && record.originalRole.length > 0
|
|
165
|
+
? record.originalRole
|
|
166
|
+
: undefined,
|
|
167
|
+
rawType:
|
|
168
|
+
typeof record.rawType === "string" && record.rawType.length > 0
|
|
169
|
+
? record.rawType
|
|
170
|
+
: undefined,
|
|
171
|
+
raw: record.raw,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function parseStoredValue(value: string | null): unknown {
|
|
176
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
177
|
+
return undefined;
|
|
178
|
+
}
|
|
179
|
+
const parsed = parseJson(value);
|
|
180
|
+
return parsed !== undefined ? parsed : value;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function reasoningBlockFromPart(part: MessagePartRecord, rawType?: string): unknown {
|
|
184
|
+
const type = rawType === "thinking" ? "thinking" : "reasoning";
|
|
185
|
+
if (typeof part.textContent === "string" && part.textContent.length > 0) {
|
|
186
|
+
return type === "thinking"
|
|
187
|
+
? { type, thinking: part.textContent }
|
|
188
|
+
: { type, text: part.textContent };
|
|
189
|
+
}
|
|
190
|
+
return { type };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Detect if a raw block is an OpenClaw-normalised OpenAI reasoning item.
|
|
195
|
+
* OpenClaw converts OpenAI `{type:"reasoning", id:"rs_…", encrypted_content:"…"}`
|
|
196
|
+
* into `{type:"thinking", thinking:"", thinkingSignature:"{…}"}`.
|
|
197
|
+
* When we reassemble for the OpenAI provider we need the original back.
|
|
198
|
+
*/
|
|
199
|
+
function tryRestoreOpenAIReasoning(raw: Record<string, unknown>): Record<string, unknown> | null {
|
|
200
|
+
if (raw.type !== "thinking") return null;
|
|
201
|
+
const sig = raw.thinkingSignature;
|
|
202
|
+
if (typeof sig !== "string" || !sig.startsWith("{")) return null;
|
|
203
|
+
try {
|
|
204
|
+
const parsed = JSON.parse(sig) as Record<string, unknown>;
|
|
205
|
+
if (parsed.type === "reasoning" && typeof parsed.id === "string") {
|
|
206
|
+
return parsed;
|
|
207
|
+
}
|
|
208
|
+
} catch {
|
|
209
|
+
// not valid JSON — leave as-is
|
|
210
|
+
}
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function toolCallBlockFromPart(part: MessagePartRecord, rawType?: string): unknown {
|
|
215
|
+
const type =
|
|
216
|
+
rawType === "function_call" ||
|
|
217
|
+
rawType === "functionCall" ||
|
|
218
|
+
rawType === "tool_use" ||
|
|
219
|
+
rawType === "tool-use" ||
|
|
220
|
+
rawType === "toolUse" ||
|
|
221
|
+
rawType === "toolCall"
|
|
222
|
+
? rawType
|
|
223
|
+
: "toolCall";
|
|
224
|
+
const input = parseStoredValue(part.toolInput);
|
|
225
|
+
const block: Record<string, unknown> = { type };
|
|
226
|
+
|
|
227
|
+
if (type === "function_call") {
|
|
228
|
+
if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
|
|
229
|
+
block.call_id = part.toolCallId;
|
|
230
|
+
}
|
|
231
|
+
if (typeof part.toolName === "string" && part.toolName.length > 0) {
|
|
232
|
+
block.name = part.toolName;
|
|
233
|
+
}
|
|
234
|
+
if (input !== undefined) {
|
|
235
|
+
block.arguments = input;
|
|
236
|
+
}
|
|
237
|
+
return block;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
|
|
241
|
+
block.id = part.toolCallId;
|
|
242
|
+
}
|
|
243
|
+
if (typeof part.toolName === "string" && part.toolName.length > 0) {
|
|
244
|
+
block.name = part.toolName;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if (input !== undefined) {
|
|
248
|
+
if (type === "functionCall") {
|
|
249
|
+
block.arguments = input;
|
|
250
|
+
} else {
|
|
251
|
+
block.input = input;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
return block;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function toolResultBlockFromPart(part: MessagePartRecord, rawType?: string): unknown {
|
|
258
|
+
const type =
|
|
259
|
+
rawType === "function_call_output" || rawType === "toolResult" || rawType === "tool_result"
|
|
260
|
+
? rawType
|
|
261
|
+
: "tool_result";
|
|
262
|
+
const output = parseStoredValue(part.toolOutput) ?? part.textContent ?? "";
|
|
263
|
+
const block: Record<string, unknown> = { type, output };
|
|
264
|
+
|
|
265
|
+
if (type === "function_call_output") {
|
|
266
|
+
if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
|
|
267
|
+
block.call_id = part.toolCallId;
|
|
268
|
+
}
|
|
269
|
+
return block;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
|
|
273
|
+
block.tool_use_id = part.toolCallId;
|
|
274
|
+
}
|
|
275
|
+
return block;
|
|
276
|
+
}
|
|
277
|
+
|
|
147
278
|
function toRuntimeRole(
|
|
148
279
|
dbRole: MessageRole,
|
|
149
280
|
parts: MessagePartRecord[],
|
|
@@ -173,26 +304,43 @@ function toRuntimeRole(
|
|
|
173
304
|
}
|
|
174
305
|
|
|
175
306
|
function blockFromPart(part: MessagePartRecord): unknown {
|
|
176
|
-
const
|
|
177
|
-
if (
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
307
|
+
const metadata = getPartMetadata(part);
|
|
308
|
+
if (metadata.raw && typeof metadata.raw === "object") {
|
|
309
|
+
// If this is an OpenClaw-normalised OpenAI reasoning block, restore the original
|
|
310
|
+
// OpenAI format so the Responses API gets the {type:"reasoning", id:"rs_…"} it expects.
|
|
311
|
+
const restored = tryRestoreOpenAIReasoning(metadata.raw as Record<string, unknown>);
|
|
312
|
+
if (restored) return restored;
|
|
313
|
+
return metadata.raw;
|
|
182
314
|
}
|
|
183
315
|
|
|
184
|
-
if (part.partType === "
|
|
185
|
-
return
|
|
316
|
+
if (part.partType === "reasoning") {
|
|
317
|
+
return reasoningBlockFromPart(part, metadata.rawType);
|
|
186
318
|
}
|
|
187
319
|
if (part.partType === "tool") {
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
return toolOutput;
|
|
191
|
-
}
|
|
192
|
-
if (typeof part.textContent === "string") {
|
|
193
|
-
return { type: "text", text: part.textContent };
|
|
320
|
+
if (metadata.originalRole === "toolResult" || metadata.rawType === "function_call_output") {
|
|
321
|
+
return toolResultBlockFromPart(part, metadata.rawType);
|
|
194
322
|
}
|
|
195
|
-
return
|
|
323
|
+
return toolCallBlockFromPart(part, metadata.rawType);
|
|
324
|
+
}
|
|
325
|
+
if (
|
|
326
|
+
metadata.rawType === "function_call" ||
|
|
327
|
+
metadata.rawType === "functionCall" ||
|
|
328
|
+
metadata.rawType === "tool_use" ||
|
|
329
|
+
metadata.rawType === "tool-use" ||
|
|
330
|
+
metadata.rawType === "toolUse" ||
|
|
331
|
+
metadata.rawType === "toolCall"
|
|
332
|
+
) {
|
|
333
|
+
return toolCallBlockFromPart(part, metadata.rawType);
|
|
334
|
+
}
|
|
335
|
+
if (
|
|
336
|
+
metadata.rawType === "function_call_output" ||
|
|
337
|
+
metadata.rawType === "tool_result" ||
|
|
338
|
+
metadata.rawType === "toolResult"
|
|
339
|
+
) {
|
|
340
|
+
return toolResultBlockFromPart(part, metadata.rawType);
|
|
341
|
+
}
|
|
342
|
+
if (part.partType === "text") {
|
|
343
|
+
return { type: "text", text: part.textContent ?? "" };
|
|
196
344
|
}
|
|
197
345
|
|
|
198
346
|
if (typeof part.textContent === "string" && part.textContent.length > 0) {
|
package/src/db/config.ts
CHANGED
|
@@ -71,7 +71,10 @@ export function resolveLcmConfig(
|
|
|
71
71
|
? env.LCM_ENABLED !== "false"
|
|
72
72
|
: toBool(pc.enabled) ?? true,
|
|
73
73
|
databasePath:
|
|
74
|
-
env.LCM_DATABASE_PATH
|
|
74
|
+
env.LCM_DATABASE_PATH
|
|
75
|
+
?? toStr(pc.dbPath)
|
|
76
|
+
?? toStr(pc.databasePath)
|
|
77
|
+
?? join(homedir(), ".openclaw", "lcm.db"),
|
|
75
78
|
contextThreshold:
|
|
76
79
|
(env.LCM_CONTEXT_THRESHOLD !== undefined ? parseFloat(env.LCM_CONTEXT_THRESHOLD) : undefined)
|
|
77
80
|
?? toNumber(pc.contextThreshold) ?? 0.75,
|
|
@@ -104,7 +107,9 @@ export function resolveLcmConfig(
|
|
|
104
107
|
?? toNumber(pc.maxExpandTokens) ?? 4000,
|
|
105
108
|
largeFileTokenThreshold:
|
|
106
109
|
(env.LCM_LARGE_FILE_TOKEN_THRESHOLD !== undefined ? parseInt(env.LCM_LARGE_FILE_TOKEN_THRESHOLD, 10) : undefined)
|
|
107
|
-
?? toNumber(pc.
|
|
110
|
+
?? toNumber(pc.largeFileThresholdTokens)
|
|
111
|
+
?? toNumber(pc.largeFileTokenThreshold)
|
|
112
|
+
?? 25000,
|
|
108
113
|
largeFileSummaryProvider:
|
|
109
114
|
env.LCM_LARGE_FILE_SUMMARY_PROVIDER?.trim() ?? toStr(pc.largeFileSummaryProvider) ?? "",
|
|
110
115
|
largeFileSummaryModel:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
2
|
+
|
|
3
|
+
export type LcmDbFeatures = {
|
|
4
|
+
fts5Available: boolean;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
const featureCache = new WeakMap<DatabaseSync, LcmDbFeatures>();
|
|
8
|
+
|
|
9
|
+
function probeFts5(db: DatabaseSync): boolean {
|
|
10
|
+
try {
|
|
11
|
+
db.exec("DROP TABLE IF EXISTS temp.__lcm_fts5_probe");
|
|
12
|
+
db.exec("CREATE VIRTUAL TABLE temp.__lcm_fts5_probe USING fts5(content)");
|
|
13
|
+
db.exec("DROP TABLE temp.__lcm_fts5_probe");
|
|
14
|
+
return true;
|
|
15
|
+
} catch {
|
|
16
|
+
try {
|
|
17
|
+
db.exec("DROP TABLE IF EXISTS temp.__lcm_fts5_probe");
|
|
18
|
+
} catch {
|
|
19
|
+
// Ignore cleanup failures after a failed probe.
|
|
20
|
+
}
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Detect SQLite features exposed by the current Node runtime.
|
|
27
|
+
*
|
|
28
|
+
* The result is cached per DatabaseSync handle because the probe is runtime-
|
|
29
|
+
* specific, not database-file-specific.
|
|
30
|
+
*/
|
|
31
|
+
export function getLcmDbFeatures(db: DatabaseSync): LcmDbFeatures {
|
|
32
|
+
const cached = featureCache.get(db);
|
|
33
|
+
if (cached) {
|
|
34
|
+
return cached;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const detected: LcmDbFeatures = {
|
|
38
|
+
fts5Available: probeFts5(db),
|
|
39
|
+
};
|
|
40
|
+
featureCache.set(db, detected);
|
|
41
|
+
return detected;
|
|
42
|
+
}
|
package/src/db/migration.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { DatabaseSync } from "node:sqlite";
|
|
2
|
+
import { getLcmDbFeatures } from "./features.js";
|
|
2
3
|
|
|
3
4
|
type SummaryColumnInfo = {
|
|
4
5
|
name?: string;
|
|
@@ -354,7 +355,10 @@ function backfillSummaryMetadata(db: DatabaseSync): void {
|
|
|
354
355
|
}
|
|
355
356
|
}
|
|
356
357
|
|
|
357
|
-
export function runLcmMigrations(
|
|
358
|
+
export function runLcmMigrations(
|
|
359
|
+
db: DatabaseSync,
|
|
360
|
+
options?: { fts5Available?: boolean },
|
|
361
|
+
): void {
|
|
358
362
|
db.exec(`
|
|
359
363
|
CREATE TABLE IF NOT EXISTS conversations (
|
|
360
364
|
conversation_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -492,6 +496,11 @@ export function runLcmMigrations(db: DatabaseSync): void {
|
|
|
492
496
|
backfillSummaryDepths(db);
|
|
493
497
|
backfillSummaryMetadata(db);
|
|
494
498
|
|
|
499
|
+
const fts5Available = options?.fts5Available ?? getLcmDbFeatures(db).fts5Available;
|
|
500
|
+
if (!fts5Available) {
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
|
|
495
504
|
// FTS5 virtual tables for full-text search (cannot use IF NOT EXISTS, so check manually)
|
|
496
505
|
const hasFts = db
|
|
497
506
|
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='messages_fts'")
|
package/src/engine.ts
CHANGED
|
@@ -18,6 +18,7 @@ import { ContextAssembler } from "./assembler.js";
|
|
|
18
18
|
import { CompactionEngine, type CompactionConfig } from "./compaction.js";
|
|
19
19
|
import type { LcmConfig } from "./db/config.js";
|
|
20
20
|
import { getLcmConnection, closeLcmConnection } from "./db/connection.js";
|
|
21
|
+
import { getLcmDbFeatures } from "./db/features.js";
|
|
21
22
|
import { runLcmMigrations } from "./db/migration.js";
|
|
22
23
|
import {
|
|
23
24
|
createDelegatedExpansionGrant,
|
|
@@ -59,6 +60,39 @@ function safeString(value: unknown): string | undefined {
|
|
|
59
60
|
return typeof value === "string" ? value : undefined;
|
|
60
61
|
}
|
|
61
62
|
|
|
63
|
+
function appendTextValue(value: unknown, out: string[]): void {
|
|
64
|
+
if (typeof value === "string") {
|
|
65
|
+
out.push(value);
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
if (Array.isArray(value)) {
|
|
69
|
+
for (const entry of value) {
|
|
70
|
+
appendTextValue(entry, out);
|
|
71
|
+
}
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
if (!value || typeof value !== "object") {
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const record = value as Record<string, unknown>;
|
|
79
|
+
appendTextValue(record.text, out);
|
|
80
|
+
appendTextValue(record.value, out);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function extractReasoningText(record: Record<string, unknown>): string | undefined {
|
|
84
|
+
const chunks: string[] = [];
|
|
85
|
+
appendTextValue(record.summary, chunks);
|
|
86
|
+
if (chunks.length === 0) {
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const normalized = chunks
|
|
91
|
+
.map((chunk) => chunk.trim())
|
|
92
|
+
.filter((chunk, idx, arr) => chunk.length > 0 && arr.indexOf(chunk) === idx);
|
|
93
|
+
return normalized.length > 0 ? normalized.join("\n") : undefined;
|
|
94
|
+
}
|
|
95
|
+
|
|
62
96
|
function normalizeUnknownBlock(value: unknown): {
|
|
63
97
|
type: string;
|
|
64
98
|
text?: string;
|
|
@@ -75,7 +109,12 @@ function normalizeUnknownBlock(value: unknown): {
|
|
|
75
109
|
const rawType = safeString(record.type);
|
|
76
110
|
return {
|
|
77
111
|
type: rawType ?? "agent",
|
|
78
|
-
text:
|
|
112
|
+
text:
|
|
113
|
+
safeString(record.text) ??
|
|
114
|
+
safeString(record.thinking) ??
|
|
115
|
+
((rawType === "reasoning" || rawType === "thinking")
|
|
116
|
+
? extractReasoningText(record)
|
|
117
|
+
: undefined),
|
|
79
118
|
metadata: { raw: record },
|
|
80
119
|
};
|
|
81
120
|
}
|
|
@@ -88,7 +127,12 @@ function toPartType(type: string): MessagePartType {
|
|
|
88
127
|
case "reasoning":
|
|
89
128
|
return "reasoning";
|
|
90
129
|
case "tool_use":
|
|
130
|
+
case "toolUse":
|
|
91
131
|
case "tool-use":
|
|
132
|
+
case "toolCall":
|
|
133
|
+
case "functionCall":
|
|
134
|
+
case "function_call":
|
|
135
|
+
case "function_call_output":
|
|
92
136
|
case "tool_result":
|
|
93
137
|
case "toolResult":
|
|
94
138
|
case "tool":
|
|
@@ -214,7 +258,12 @@ function buildMessageParts(params: {
|
|
|
214
258
|
const role = typeof message.role === "string" ? message.role : "unknown";
|
|
215
259
|
const topLevel = message as unknown as Record<string, unknown>;
|
|
216
260
|
const topLevelToolCallId =
|
|
217
|
-
safeString(topLevel.toolCallId) ??
|
|
261
|
+
safeString(topLevel.toolCallId) ??
|
|
262
|
+
safeString(topLevel.tool_call_id) ??
|
|
263
|
+
safeString(topLevel.toolUseId) ??
|
|
264
|
+
safeString(topLevel.tool_use_id) ??
|
|
265
|
+
safeString(topLevel.call_id) ??
|
|
266
|
+
safeString(topLevel.id);
|
|
218
267
|
|
|
219
268
|
// BashExecutionMessage: preserve a synthetic text part so output is round-trippable.
|
|
220
269
|
if (!("content" in message) && "command" in message && "output" in message) {
|
|
@@ -283,14 +332,19 @@ function buildMessageParts(params: {
|
|
|
283
332
|
for (let ordinal = 0; ordinal < message.content.length; ordinal++) {
|
|
284
333
|
const block = normalizeUnknownBlock(message.content[ordinal]);
|
|
285
334
|
const metadataRecord = block.metadata.raw as Record<string, unknown> | undefined;
|
|
335
|
+
const partType = toPartType(block.type);
|
|
286
336
|
const toolCallId =
|
|
287
337
|
safeString(metadataRecord?.toolCallId) ??
|
|
288
338
|
safeString(metadataRecord?.tool_call_id) ??
|
|
339
|
+
safeString(metadataRecord?.toolUseId) ??
|
|
340
|
+
safeString(metadataRecord?.tool_use_id) ??
|
|
341
|
+
safeString(metadataRecord?.call_id) ??
|
|
342
|
+
(partType === "tool" ? safeString(metadataRecord?.id) : undefined) ??
|
|
289
343
|
topLevelToolCallId;
|
|
290
344
|
|
|
291
345
|
parts.push({
|
|
292
346
|
sessionId,
|
|
293
|
-
partType
|
|
347
|
+
partType,
|
|
294
348
|
ordinal,
|
|
295
349
|
textContent: block.text ?? null,
|
|
296
350
|
toolCallId,
|
|
@@ -301,6 +355,8 @@ function buildMessageParts(params: {
|
|
|
301
355
|
toolInput:
|
|
302
356
|
metadataRecord?.input !== undefined
|
|
303
357
|
? toJson(metadataRecord.input)
|
|
358
|
+
: metadataRecord?.arguments !== undefined
|
|
359
|
+
? toJson(metadataRecord.arguments)
|
|
304
360
|
: metadataRecord?.toolInput !== undefined
|
|
305
361
|
? toJson(metadataRecord.toolInput)
|
|
306
362
|
: (safeString(metadataRecord?.tool_input) ?? null),
|
|
@@ -513,6 +569,7 @@ export class LcmContextEngine implements ContextEngine {
|
|
|
513
569
|
private compaction: CompactionEngine;
|
|
514
570
|
private retrieval: RetrievalEngine;
|
|
515
571
|
private migrated = false;
|
|
572
|
+
private readonly fts5Available: boolean;
|
|
516
573
|
private sessionOperationQueues = new Map<string, Promise<void>>();
|
|
517
574
|
private largeFileTextSummarizerResolved = false;
|
|
518
575
|
private largeFileTextSummarizer?: (prompt: string) => Promise<string | null>;
|
|
@@ -523,9 +580,16 @@ export class LcmContextEngine implements ContextEngine {
|
|
|
523
580
|
this.config = deps.config;
|
|
524
581
|
|
|
525
582
|
const db = getLcmConnection(this.config.databasePath);
|
|
583
|
+
this.fts5Available = getLcmDbFeatures(db).fts5Available;
|
|
584
|
+
|
|
585
|
+
this.conversationStore = new ConversationStore(db, { fts5Available: this.fts5Available });
|
|
586
|
+
this.summaryStore = new SummaryStore(db, { fts5Available: this.fts5Available });
|
|
526
587
|
|
|
527
|
-
this.
|
|
528
|
-
|
|
588
|
+
if (!this.fts5Available) {
|
|
589
|
+
this.deps.log.warn(
|
|
590
|
+
"[lcm] FTS5 unavailable in the current Node runtime; full_text search will fall back to LIKE and indexing is disabled",
|
|
591
|
+
);
|
|
592
|
+
}
|
|
529
593
|
|
|
530
594
|
this.assembler = new ContextAssembler(
|
|
531
595
|
this.conversationStore,
|
|
@@ -561,7 +625,7 @@ export class LcmContextEngine implements ContextEngine {
|
|
|
561
625
|
return;
|
|
562
626
|
}
|
|
563
627
|
const db = getLcmConnection(this.config.databasePath);
|
|
564
|
-
runLcmMigrations(db);
|
|
628
|
+
runLcmMigrations(db, { fts5Available: this.fts5Available });
|
|
565
629
|
this.migrated = true;
|
|
566
630
|
}
|
|
567
631
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { DatabaseSync } from "node:sqlite";
|
|
2
2
|
import { randomUUID } from "node:crypto";
|
|
3
3
|
import { sanitizeFts5Query } from "./fts5-sanitize.js";
|
|
4
|
+
import { buildLikeSearchPlan, createFallbackSnippet } from "./full-text-fallback.js";
|
|
4
5
|
|
|
5
6
|
export type ConversationId = number;
|
|
6
7
|
export type MessageId = number;
|
|
@@ -203,7 +204,14 @@ function toMessagePartRecord(row: MessagePartRow): MessagePartRecord {
|
|
|
203
204
|
// ── ConversationStore ─────────────────────────────────────────────────────────
|
|
204
205
|
|
|
205
206
|
export class ConversationStore {
|
|
206
|
-
|
|
207
|
+
private readonly fts5Available: boolean;
|
|
208
|
+
|
|
209
|
+
constructor(
|
|
210
|
+
private db: DatabaseSync,
|
|
211
|
+
options?: { fts5Available?: boolean },
|
|
212
|
+
) {
|
|
213
|
+
this.fts5Available = options?.fts5Available ?? true;
|
|
214
|
+
}
|
|
207
215
|
|
|
208
216
|
// ── Transaction helpers ──────────────────────────────────────────────────
|
|
209
217
|
|
|
@@ -292,10 +300,7 @@ export class ConversationStore {
|
|
|
292
300
|
|
|
293
301
|
const messageId = Number(result.lastInsertRowid);
|
|
294
302
|
|
|
295
|
-
|
|
296
|
-
this.db
|
|
297
|
-
.prepare(`INSERT INTO messages_fts(rowid, content) VALUES (?, ?)`)
|
|
298
|
-
.run(messageId, input.content);
|
|
303
|
+
this.indexMessageForFullText(messageId, input.content);
|
|
299
304
|
|
|
300
305
|
const row = this.db
|
|
301
306
|
.prepare(
|
|
@@ -315,7 +320,6 @@ export class ConversationStore {
|
|
|
315
320
|
`INSERT INTO messages (conversation_id, seq, role, content, token_count)
|
|
316
321
|
VALUES (?, ?, ?, ?, ?)`,
|
|
317
322
|
);
|
|
318
|
-
const insertFtsStmt = this.db.prepare(`INSERT INTO messages_fts(rowid, content) VALUES (?, ?)`);
|
|
319
323
|
const selectStmt = this.db.prepare(
|
|
320
324
|
`SELECT message_id, conversation_id, seq, role, content, token_count, created_at
|
|
321
325
|
FROM messages WHERE message_id = ?`,
|
|
@@ -332,7 +336,7 @@ export class ConversationStore {
|
|
|
332
336
|
);
|
|
333
337
|
|
|
334
338
|
const messageId = Number(result.lastInsertRowid);
|
|
335
|
-
|
|
339
|
+
this.indexMessageForFullText(messageId, input.content);
|
|
336
340
|
const row = selectStmt.get(messageId) as unknown as MessageRow;
|
|
337
341
|
records.push(toMessageRecord(row));
|
|
338
342
|
}
|
|
@@ -535,8 +539,7 @@ export class ConversationStore {
|
|
|
535
539
|
.prepare(`DELETE FROM context_items WHERE item_type = 'message' AND message_id = ?`)
|
|
536
540
|
.run(messageId);
|
|
537
541
|
|
|
538
|
-
|
|
539
|
-
this.db.prepare(`DELETE FROM messages_fts WHERE rowid = ?`).run(messageId);
|
|
542
|
+
this.deleteMessageFromFullText(messageId);
|
|
540
543
|
|
|
541
544
|
// Delete the message (message_parts cascade via ON DELETE CASCADE)
|
|
542
545
|
this.db.prepare(`DELETE FROM messages WHERE message_id = ?`).run(messageId);
|
|
@@ -553,17 +556,54 @@ export class ConversationStore {
|
|
|
553
556
|
const limit = input.limit ?? 50;
|
|
554
557
|
|
|
555
558
|
if (input.mode === "full_text") {
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
559
|
+
if (this.fts5Available) {
|
|
560
|
+
try {
|
|
561
|
+
return this.searchFullText(
|
|
562
|
+
input.query,
|
|
563
|
+
limit,
|
|
564
|
+
input.conversationId,
|
|
565
|
+
input.since,
|
|
566
|
+
input.before,
|
|
567
|
+
);
|
|
568
|
+
} catch {
|
|
569
|
+
return this.searchLike(
|
|
570
|
+
input.query,
|
|
571
|
+
limit,
|
|
572
|
+
input.conversationId,
|
|
573
|
+
input.since,
|
|
574
|
+
input.before,
|
|
575
|
+
);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
return this.searchLike(input.query, limit, input.conversationId, input.since, input.before);
|
|
563
579
|
}
|
|
564
580
|
return this.searchRegex(input.query, limit, input.conversationId, input.since, input.before);
|
|
565
581
|
}
|
|
566
582
|
|
|
583
|
+
private indexMessageForFullText(messageId: MessageId, content: string): void {
|
|
584
|
+
if (!this.fts5Available) {
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
try {
|
|
588
|
+
this.db
|
|
589
|
+
.prepare(`INSERT INTO messages_fts(rowid, content) VALUES (?, ?)`)
|
|
590
|
+
.run(messageId, content);
|
|
591
|
+
} catch {
|
|
592
|
+
// Full-text indexing is optional. Message persistence must still succeed.
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
private deleteMessageFromFullText(messageId: MessageId): void {
|
|
597
|
+
if (!this.fts5Available) {
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
try {
|
|
601
|
+
this.db.prepare(`DELETE FROM messages_fts WHERE rowid = ?`).run(messageId);
|
|
602
|
+
} catch {
|
|
603
|
+
// Ignore FTS cleanup failures; the source row deletion is authoritative.
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
567
607
|
private searchFullText(
|
|
568
608
|
query: string,
|
|
569
609
|
limit: number,
|
|
@@ -603,6 +643,55 @@ export class ConversationStore {
|
|
|
603
643
|
return rows.map(toSearchResult);
|
|
604
644
|
}
|
|
605
645
|
|
|
646
|
+
private searchLike(
|
|
647
|
+
query: string,
|
|
648
|
+
limit: number,
|
|
649
|
+
conversationId?: ConversationId,
|
|
650
|
+
since?: Date,
|
|
651
|
+
before?: Date,
|
|
652
|
+
): MessageSearchResult[] {
|
|
653
|
+
const plan = buildLikeSearchPlan("content", query);
|
|
654
|
+
if (plan.terms.length === 0) {
|
|
655
|
+
return [];
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
const where: string[] = [...plan.where];
|
|
659
|
+
const args: Array<string | number> = [...plan.args];
|
|
660
|
+
if (conversationId != null) {
|
|
661
|
+
where.push("conversation_id = ?");
|
|
662
|
+
args.push(conversationId);
|
|
663
|
+
}
|
|
664
|
+
if (since) {
|
|
665
|
+
where.push("julianday(created_at) >= julianday(?)");
|
|
666
|
+
args.push(since.toISOString());
|
|
667
|
+
}
|
|
668
|
+
if (before) {
|
|
669
|
+
where.push("julianday(created_at) < julianday(?)");
|
|
670
|
+
args.push(before.toISOString());
|
|
671
|
+
}
|
|
672
|
+
args.push(limit);
|
|
673
|
+
|
|
674
|
+
const whereClause = where.length > 0 ? `WHERE ${where.join(" AND ")}` : "";
|
|
675
|
+
const rows = this.db
|
|
676
|
+
.prepare(
|
|
677
|
+
`SELECT message_id, conversation_id, seq, role, content, token_count, created_at
|
|
678
|
+
FROM messages
|
|
679
|
+
${whereClause}
|
|
680
|
+
ORDER BY created_at DESC
|
|
681
|
+
LIMIT ?`,
|
|
682
|
+
)
|
|
683
|
+
.all(...args) as unknown as MessageRow[];
|
|
684
|
+
|
|
685
|
+
return rows.map((row) => ({
|
|
686
|
+
messageId: row.message_id,
|
|
687
|
+
conversationId: row.conversation_id,
|
|
688
|
+
role: row.role,
|
|
689
|
+
snippet: createFallbackSnippet(row.content, plan.terms),
|
|
690
|
+
createdAt: new Date(row.created_at),
|
|
691
|
+
rank: 0,
|
|
692
|
+
}));
|
|
693
|
+
}
|
|
694
|
+
|
|
606
695
|
private searchRegex(
|
|
607
696
|
pattern: string,
|
|
608
697
|
limit: number,
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
const RAW_TERM_RE = /"([^"]+)"|(\S+)/g;
|
|
2
|
+
const EDGE_PUNCTUATION_RE = /^[`'"()[\]{}<>.,:;!?*_+=|\\/-]+|[`'"()[\]{}<>.,:;!?*_+=|\\/-]+$/g;
|
|
3
|
+
|
|
4
|
+
export type LikeSearchPlan = {
|
|
5
|
+
terms: string[];
|
|
6
|
+
where: string[];
|
|
7
|
+
args: string[];
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
function normalizeFallbackTerm(raw: string): string {
|
|
11
|
+
return raw.trim().replace(EDGE_PUNCTUATION_RE, "").toLowerCase();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function escapeLike(term: string): string {
|
|
15
|
+
return term.replace(/([\\%_])/g, "\\$1");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Convert a free-text query into a conservative LIKE search plan.
|
|
20
|
+
*
|
|
21
|
+
* The fallback keeps phrase tokens when the query uses double quotes, and
|
|
22
|
+
* otherwise searches for all normalized tokens as case-insensitive substrings.
|
|
23
|
+
*/
|
|
24
|
+
export function buildLikeSearchPlan(column: string, query: string): LikeSearchPlan {
|
|
25
|
+
const terms: string[] = [];
|
|
26
|
+
for (const match of query.matchAll(RAW_TERM_RE)) {
|
|
27
|
+
const raw = match[1] ?? match[2] ?? "";
|
|
28
|
+
const normalized = normalizeFallbackTerm(raw);
|
|
29
|
+
if (normalized.length > 0 && !terms.includes(normalized)) {
|
|
30
|
+
terms.push(normalized);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (terms.length === 0) {
|
|
35
|
+
const fallback = normalizeFallbackTerm(query);
|
|
36
|
+
if (fallback.length > 0) {
|
|
37
|
+
terms.push(fallback);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
terms,
|
|
43
|
+
where: terms.map(() => `LOWER(${column}) LIKE ? ESCAPE '\\'`),
|
|
44
|
+
args: terms.map((term) => `%${escapeLike(term)}%`),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Build a compact snippet centered around the earliest matching term.
|
|
50
|
+
*/
|
|
51
|
+
export function createFallbackSnippet(content: string, terms: string[]): string {
|
|
52
|
+
const haystack = content.toLowerCase();
|
|
53
|
+
let matchIndex = -1;
|
|
54
|
+
let matchLength = 0;
|
|
55
|
+
|
|
56
|
+
for (const term of terms) {
|
|
57
|
+
const idx = haystack.indexOf(term);
|
|
58
|
+
if (idx !== -1 && (matchIndex === -1 || idx < matchIndex)) {
|
|
59
|
+
matchIndex = idx;
|
|
60
|
+
matchLength = term.length;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (matchIndex === -1) {
|
|
65
|
+
const head = content.trim();
|
|
66
|
+
return head.length <= 80 ? head : `${head.slice(0, 77).trimEnd()}...`;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const start = Math.max(0, matchIndex - 24);
|
|
70
|
+
const end = Math.min(content.length, matchIndex + Math.max(matchLength, 1) + 40);
|
|
71
|
+
const prefix = start > 0 ? "..." : "";
|
|
72
|
+
const suffix = end < content.length ? "..." : "";
|
|
73
|
+
return `${prefix}${content.slice(start, end).trim()}${suffix}`;
|
|
74
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { DatabaseSync } from "node:sqlite";
|
|
2
2
|
import { sanitizeFts5Query } from "./fts5-sanitize.js";
|
|
3
|
+
import { buildLikeSearchPlan, createFallbackSnippet } from "./full-text-fallback.js";
|
|
3
4
|
|
|
4
5
|
export type SummaryKind = "leaf" | "condensed";
|
|
5
6
|
export type ContextItemType = "message" | "summary";
|
|
@@ -239,7 +240,14 @@ function toLargeFileRecord(row: LargeFileRow): LargeFileRecord {
|
|
|
239
240
|
// ── SummaryStore ──────────────────────────────────────────────────────────────
|
|
240
241
|
|
|
241
242
|
export class SummaryStore {
|
|
242
|
-
|
|
243
|
+
private readonly fts5Available: boolean;
|
|
244
|
+
|
|
245
|
+
constructor(
|
|
246
|
+
private db: DatabaseSync,
|
|
247
|
+
options?: { fts5Available?: boolean },
|
|
248
|
+
) {
|
|
249
|
+
this.fts5Available = options?.fts5Available ?? true;
|
|
250
|
+
}
|
|
243
251
|
|
|
244
252
|
// ── Summary CRUD ──────────────────────────────────────────────────────────
|
|
245
253
|
|
|
@@ -305,8 +313,21 @@ export class SummaryStore {
|
|
|
305
313
|
sourceMessageTokenCount,
|
|
306
314
|
);
|
|
307
315
|
|
|
316
|
+
const row = this.db
|
|
317
|
+
.prepare(
|
|
318
|
+
`SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
|
|
319
|
+
earliest_at, latest_at, descendant_count, created_at
|
|
320
|
+
, descendant_token_count, source_message_token_count
|
|
321
|
+
FROM summaries WHERE summary_id = ?`,
|
|
322
|
+
)
|
|
323
|
+
.get(input.summaryId) as unknown as SummaryRow;
|
|
324
|
+
|
|
308
325
|
// Index in FTS5 as best-effort; compaction flow must continue even if
|
|
309
326
|
// FTS indexing fails for any reason.
|
|
327
|
+
if (!this.fts5Available) {
|
|
328
|
+
return toSummaryRecord(row);
|
|
329
|
+
}
|
|
330
|
+
|
|
310
331
|
try {
|
|
311
332
|
this.db
|
|
312
333
|
.prepare(`INSERT INTO summaries_fts(summary_id, content) VALUES (?, ?)`)
|
|
@@ -316,15 +337,6 @@ export class SummaryStore {
|
|
|
316
337
|
// compaction and assembly will still work correctly.
|
|
317
338
|
}
|
|
318
339
|
|
|
319
|
-
const row = this.db
|
|
320
|
-
.prepare(
|
|
321
|
-
`SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
|
|
322
|
-
earliest_at, latest_at, descendant_count, created_at
|
|
323
|
-
, descendant_token_count, source_message_token_count
|
|
324
|
-
FROM summaries WHERE summary_id = ?`,
|
|
325
|
-
)
|
|
326
|
-
.get(input.summaryId) as unknown as SummaryRow;
|
|
327
|
-
|
|
328
340
|
return toSummaryRecord(row);
|
|
329
341
|
}
|
|
330
342
|
|
|
@@ -685,13 +697,26 @@ export class SummaryStore {
|
|
|
685
697
|
const limit = input.limit ?? 50;
|
|
686
698
|
|
|
687
699
|
if (input.mode === "full_text") {
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
700
|
+
if (this.fts5Available) {
|
|
701
|
+
try {
|
|
702
|
+
return this.searchFullText(
|
|
703
|
+
input.query,
|
|
704
|
+
limit,
|
|
705
|
+
input.conversationId,
|
|
706
|
+
input.since,
|
|
707
|
+
input.before,
|
|
708
|
+
);
|
|
709
|
+
} catch {
|
|
710
|
+
return this.searchLike(
|
|
711
|
+
input.query,
|
|
712
|
+
limit,
|
|
713
|
+
input.conversationId,
|
|
714
|
+
input.since,
|
|
715
|
+
input.before,
|
|
716
|
+
);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
return this.searchLike(input.query, limit, input.conversationId, input.since, input.before);
|
|
695
720
|
}
|
|
696
721
|
return this.searchRegex(input.query, limit, input.conversationId, input.since, input.before);
|
|
697
722
|
}
|
|
@@ -735,6 +760,57 @@ export class SummaryStore {
|
|
|
735
760
|
return rows.map(toSearchResult);
|
|
736
761
|
}
|
|
737
762
|
|
|
763
|
+
private searchLike(
|
|
764
|
+
query: string,
|
|
765
|
+
limit: number,
|
|
766
|
+
conversationId?: number,
|
|
767
|
+
since?: Date,
|
|
768
|
+
before?: Date,
|
|
769
|
+
): SummarySearchResult[] {
|
|
770
|
+
const plan = buildLikeSearchPlan("content", query);
|
|
771
|
+
if (plan.terms.length === 0) {
|
|
772
|
+
return [];
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
const where: string[] = [...plan.where];
|
|
776
|
+
const args: Array<string | number> = [...plan.args];
|
|
777
|
+
if (conversationId != null) {
|
|
778
|
+
where.push("conversation_id = ?");
|
|
779
|
+
args.push(conversationId);
|
|
780
|
+
}
|
|
781
|
+
if (since) {
|
|
782
|
+
where.push("julianday(created_at) >= julianday(?)");
|
|
783
|
+
args.push(since.toISOString());
|
|
784
|
+
}
|
|
785
|
+
if (before) {
|
|
786
|
+
where.push("julianday(created_at) < julianday(?)");
|
|
787
|
+
args.push(before.toISOString());
|
|
788
|
+
}
|
|
789
|
+
args.push(limit);
|
|
790
|
+
|
|
791
|
+
const whereClause = where.length > 0 ? `WHERE ${where.join(" AND ")}` : "";
|
|
792
|
+
const rows = this.db
|
|
793
|
+
.prepare(
|
|
794
|
+
`SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
|
|
795
|
+
earliest_at, latest_at, descendant_count, descendant_token_count,
|
|
796
|
+
source_message_token_count, created_at
|
|
797
|
+
FROM summaries
|
|
798
|
+
${whereClause}
|
|
799
|
+
ORDER BY created_at DESC
|
|
800
|
+
LIMIT ?`,
|
|
801
|
+
)
|
|
802
|
+
.all(...args) as unknown as SummaryRow[];
|
|
803
|
+
|
|
804
|
+
return rows.map((row) => ({
|
|
805
|
+
summaryId: row.summary_id,
|
|
806
|
+
conversationId: row.conversation_id,
|
|
807
|
+
kind: row.kind,
|
|
808
|
+
snippet: createFallbackSnippet(row.content, plan.terms),
|
|
809
|
+
createdAt: new Date(row.created_at),
|
|
810
|
+
rank: 0,
|
|
811
|
+
}));
|
|
812
|
+
}
|
|
813
|
+
|
|
738
814
|
private searchRegex(
|
|
739
815
|
pattern: string,
|
|
740
816
|
limit: number,
|
package/src/transcript-repair.ts
CHANGED
|
@@ -27,7 +27,80 @@ type ToolCallLike = {
|
|
|
27
27
|
|
|
28
28
|
// -- Extraction helpers (from tool-call-id.ts) --
|
|
29
29
|
|
|
30
|
-
const TOOL_CALL_TYPES = new Set([
|
|
30
|
+
const TOOL_CALL_TYPES = new Set([
|
|
31
|
+
"toolCall",
|
|
32
|
+
"toolUse",
|
|
33
|
+
"tool_use",
|
|
34
|
+
"tool-use",
|
|
35
|
+
"functionCall",
|
|
36
|
+
"function_call",
|
|
37
|
+
]);
|
|
38
|
+
const OPENAI_FUNCTION_CALL_TYPES = new Set(["functionCall", "function_call"]);
|
|
39
|
+
|
|
40
|
+
function extractToolCallId(block: { id?: unknown; call_id?: unknown }): string | null {
|
|
41
|
+
if (typeof block.id === "string" && block.id) {
|
|
42
|
+
return block.id;
|
|
43
|
+
}
|
|
44
|
+
if (typeof block.call_id === "string" && block.call_id) {
|
|
45
|
+
return block.call_id;
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function normalizeAssistantReasoningBlocks<T extends AgentMessageLike>(message: T): T {
|
|
51
|
+
if (!Array.isArray(message.content)) {
|
|
52
|
+
return message;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let sawToolCall = false;
|
|
56
|
+
let reasoningAfterToolCall = false;
|
|
57
|
+
let functionCallCount = 0;
|
|
58
|
+
|
|
59
|
+
for (const block of message.content) {
|
|
60
|
+
if (!block || typeof block !== "object") {
|
|
61
|
+
return message;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const type = (block as { type?: unknown }).type;
|
|
65
|
+
if (type === "reasoning" || type === "thinking") {
|
|
66
|
+
if (sawToolCall) {
|
|
67
|
+
reasoningAfterToolCall = true;
|
|
68
|
+
}
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (typeof type === "string" && TOOL_CALL_TYPES.has(type)) {
|
|
73
|
+
sawToolCall = true;
|
|
74
|
+
if (OPENAI_FUNCTION_CALL_TYPES.has(type)) {
|
|
75
|
+
functionCallCount += 1;
|
|
76
|
+
}
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return message;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Only repair the specific OpenAI shape we need: a single function call that
|
|
84
|
+
// has one or more reasoning blocks after it. Multi-call turns may use
|
|
85
|
+
// interleaved reasoning intentionally, so leave them untouched.
|
|
86
|
+
if (!reasoningAfterToolCall || functionCallCount !== 1) {
|
|
87
|
+
return message;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const reasoning = message.content.filter((block) => {
|
|
91
|
+
const type = (block as { type?: unknown }).type;
|
|
92
|
+
return type === "reasoning" || type === "thinking";
|
|
93
|
+
});
|
|
94
|
+
const toolCalls = message.content.filter((block) => {
|
|
95
|
+
const type = (block as { type?: unknown }).type;
|
|
96
|
+
return typeof type === "string" && TOOL_CALL_TYPES.has(type);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
...message,
|
|
101
|
+
content: [...reasoning, ...toolCalls],
|
|
102
|
+
};
|
|
103
|
+
}
|
|
31
104
|
|
|
32
105
|
function extractToolCallsFromAssistant(msg: AgentMessageLike): ToolCallLike[] {
|
|
33
106
|
const content = msg.content;
|
|
@@ -40,13 +113,14 @@ function extractToolCallsFromAssistant(msg: AgentMessageLike): ToolCallLike[] {
|
|
|
40
113
|
if (!block || typeof block !== "object") {
|
|
41
114
|
continue;
|
|
42
115
|
}
|
|
43
|
-
const rec = block as { type?: unknown; id?: unknown; name?: unknown };
|
|
44
|
-
|
|
116
|
+
const rec = block as { type?: unknown; id?: unknown; call_id?: unknown; name?: unknown };
|
|
117
|
+
const id = extractToolCallId(rec);
|
|
118
|
+
if (!id) {
|
|
45
119
|
continue;
|
|
46
120
|
}
|
|
47
121
|
if (typeof rec.type === "string" && TOOL_CALL_TYPES.has(rec.type)) {
|
|
48
122
|
toolCalls.push({
|
|
49
|
-
id
|
|
123
|
+
id,
|
|
50
124
|
name: typeof rec.name === "string" ? rec.name : undefined,
|
|
51
125
|
});
|
|
52
126
|
}
|
|
@@ -134,18 +208,23 @@ export function sanitizeToolUseResultPairing<T extends AgentMessageLike>(message
|
|
|
134
208
|
continue;
|
|
135
209
|
}
|
|
136
210
|
|
|
211
|
+
const normalizedAssistant = normalizeAssistantReasoningBlocks(msg);
|
|
212
|
+
if (normalizedAssistant !== msg) {
|
|
213
|
+
changed = true;
|
|
214
|
+
}
|
|
215
|
+
|
|
137
216
|
// Skip tool call extraction for aborted or errored assistant messages.
|
|
138
217
|
// When stopReason is "error" or "aborted", the tool_use blocks may be incomplete
|
|
139
218
|
// and should not have synthetic tool_results created.
|
|
140
|
-
const stopReason =
|
|
219
|
+
const stopReason = normalizedAssistant.stopReason;
|
|
141
220
|
if (stopReason === "error" || stopReason === "aborted") {
|
|
142
|
-
out.push(
|
|
221
|
+
out.push(normalizedAssistant as T);
|
|
143
222
|
continue;
|
|
144
223
|
}
|
|
145
224
|
|
|
146
|
-
const toolCalls = extractToolCallsFromAssistant(
|
|
225
|
+
const toolCalls = extractToolCallsFromAssistant(normalizedAssistant);
|
|
147
226
|
if (toolCalls.length === 0) {
|
|
148
|
-
out.push(
|
|
227
|
+
out.push(normalizedAssistant as T);
|
|
149
228
|
continue;
|
|
150
229
|
}
|
|
151
230
|
|
|
@@ -190,7 +269,7 @@ export function sanitizeToolUseResultPairing<T extends AgentMessageLike>(message
|
|
|
190
269
|
}
|
|
191
270
|
}
|
|
192
271
|
|
|
193
|
-
out.push(
|
|
272
|
+
out.push(normalizedAssistant as T);
|
|
194
273
|
|
|
195
274
|
if (spanResultsById.size > 0 && remainder.length > 0) {
|
|
196
275
|
moved = true;
|