@martian-engineering/lossless-claw 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -3
- package/docs/agent-tools.md +9 -4
- package/docs/configuration.md +9 -0
- package/package.json +1 -1
- package/skills/lossless-claw/SKILL.md +3 -2
- package/skills/lossless-claw/references/architecture.md +12 -0
- package/skills/lossless-claw/references/diagnostics.md +13 -0
- package/src/assembler.ts +12 -4
- package/src/compaction.ts +12 -15
- package/src/db/connection.ts +15 -5
- package/src/db/features.ts +24 -5
- package/src/db/migration.ts +201 -79
- package/src/engine.ts +199 -19
- package/src/estimate-tokens.ts +80 -0
- package/src/plugin/index.ts +95 -18
- package/src/plugin/lcm-command.ts +278 -3
- package/src/plugin/lcm-doctor-apply.ts +1 -3
- package/src/plugin/lcm-doctor-cleaners.ts +655 -0
- package/src/retrieval.ts +1 -4
- package/src/summarize.ts +1 -4
- package/src/tools/lcm-expand-query-tool.ts +598 -194
- package/src/tools/lcm-grep-tool.ts +2 -2
package/README.md
CHANGED
|
@@ -34,12 +34,14 @@ The plugin now ships a bundled `lossless-claw` skill plus a small plugin command
|
|
|
34
34
|
|
|
35
35
|
- `/lcm` shows version, enablement/selection state, DB path and size, summary counts, and summary-health status
|
|
36
36
|
- `/lcm doctor` scans for broken or truncated summaries
|
|
37
|
+
- `/lcm doctor clean` shows read-only high-confidence junk diagnostics for archived subagents, cron sessions, and NULL-key orphaned subagent runs
|
|
37
38
|
- `/lossless` is an alias for `/lcm` on supported native command surfaces
|
|
38
39
|
|
|
39
40
|
These are plugin slash/native commands, not root shell CLI subcommands. Supported examples:
|
|
40
41
|
|
|
41
42
|
- `/lcm`
|
|
42
43
|
- `/lcm doctor`
|
|
44
|
+
- `/lcm doctor clean`
|
|
43
45
|
- `/lossless`
|
|
44
46
|
|
|
45
47
|
Not currently supported as root CLI commands:
|
|
@@ -125,8 +127,8 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
|
|
|
125
127
|
"ignoreSessionPatterns": [
|
|
126
128
|
"agent:*:cron:**"
|
|
127
129
|
],
|
|
128
|
-
"summaryModel": "
|
|
129
|
-
"expansionModel": "
|
|
130
|
+
"summaryModel": "openai/gpt-5.4-mini",
|
|
131
|
+
"expansionModel": "openai/gpt-5.4-mini",
|
|
130
132
|
"delegationTimeoutMs": 300000,
|
|
131
133
|
"summaryTimeoutMs": 60000
|
|
132
134
|
}
|
|
@@ -164,7 +166,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
|
|
|
164
166
|
| `LCM_SUMMARY_MODEL` | `""` | Model override for compaction summarization; falls back to OpenClaw's default model when unset |
|
|
165
167
|
| `LCM_SUMMARY_PROVIDER` | `""` | Provider override for compaction summarization; falls back to `OPENCLAW_PROVIDER` or the provider embedded in the model ref |
|
|
166
168
|
| `LCM_SUMMARY_BASE_URL` | *(from OpenClaw / provider default)* | Base URL override for summarization API calls |
|
|
167
|
-
| `LCM_EXPANSION_MODEL` | *(from OpenClaw)* | Model override for `lcm_expand_query` sub-agent (e.g. `
|
|
169
|
+
| `LCM_EXPANSION_MODEL` | *(from OpenClaw)* | Model override for `lcm_expand_query` sub-agent (e.g. `openai/gpt-5.4-mini`) |
|
|
168
170
|
| `LCM_EXPANSION_PROVIDER` | *(from OpenClaw)* | Provider override for `lcm_expand_query` sub-agent |
|
|
169
171
|
| `LCM_DELEGATION_TIMEOUT_MS` | `120000` | Max time to wait for delegated `lcm_expand_query` sub-agent completion |
|
|
170
172
|
| `LCM_SUMMARY_TIMEOUT_MS` | `60000` | Max time to wait for a single model-backed LCM summarizer call |
|
|
@@ -174,6 +176,8 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
|
|
|
174
176
|
|
|
175
177
|
If you want `lcm_expand_query` to run on a dedicated model via `expansionModel` or `LCM_EXPANSION_MODEL`, OpenClaw must explicitly trust the plugin to request sub-agent model overrides.
|
|
176
178
|
|
|
179
|
+
For most setups, `openai/gpt-5.4-mini` is a better starting point than Anthropic Haiku because it is cheap, fast, and does not depend on Anthropic quota remaining.
|
|
180
|
+
|
|
177
181
|
Add a `subagent` policy under `plugins.entries.lossless-claw` and allowlist the canonical `provider/model` target you want the plugin to use:
|
|
178
182
|
|
|
179
183
|
```json
|
|
@@ -227,6 +231,8 @@ For compaction summarization, lossless-claw resolves the model in this order:
|
|
|
227
231
|
|
|
228
232
|
If `summaryModel` already includes a provider prefix such as `anthropic/claude-sonnet-4-20250514`, `summaryProvider` is ignored for that choice. Otherwise, the provider falls back to the matching override, then `OPENCLAW_PROVIDER`, then the provider inferred by the caller.
|
|
229
233
|
|
|
234
|
+
Runtime-managed OAuth providers are supported here too. In particular, `openai-codex` and `github-copilot` auth profiles can be used for summary and expansion calls without a separate API key.
|
|
235
|
+
|
|
230
236
|
### Recommended starting configuration
|
|
231
237
|
|
|
232
238
|
```
|
|
@@ -234,6 +240,8 @@ LCM_FRESH_TAIL_COUNT=64
|
|
|
234
240
|
LCM_LEAF_CHUNK_TOKENS=20000
|
|
235
241
|
LCM_INCREMENTAL_MAX_DEPTH=1
|
|
236
242
|
LCM_CONTEXT_THRESHOLD=0.75
|
|
243
|
+
LCM_SUMMARY_MODEL=openai/gpt-5.4-mini
|
|
244
|
+
LCM_EXPANSION_MODEL=openai/gpt-5.4-mini
|
|
237
245
|
```
|
|
238
246
|
|
|
239
247
|
- **freshTailCount=64** protects the last 64 messages from compaction, giving the model more recent context for continuity.
|
package/docs/agent-tools.md
CHANGED
|
@@ -24,7 +24,7 @@ Summaries are lossy by design. The "Expand for details about:" footer at the end
|
|
|
24
24
|
- Tool call sequences and their outputs
|
|
25
25
|
- Verbatim quotes or specific data points
|
|
26
26
|
|
|
27
|
-
`lcm_expand_query` is bounded (~120s, scoped sub-agent) and relatively cheap. Don't ration it.
|
|
27
|
+
`lcm_expand_query` is bounded (~120s, scoped sub-agent) and relatively cheap. Don't ration it, but use `lcm_grep` first when you need broad discovery across many sessions.
|
|
28
28
|
|
|
29
29
|
## Tool reference
|
|
30
30
|
|
|
@@ -114,6 +114,8 @@ lcm_describe(id: "file_789abc012345")
|
|
|
114
114
|
|
|
115
115
|
Answer a focused question by expanding summaries through the DAG. Spawns a bounded sub-agent that walks parent links down to source material and returns a compact answer.
|
|
116
116
|
|
|
117
|
+
When `allConversations: true` is set, `lcm_expand_query` can now synthesize one answer across multiple conversations. That cross-conversation mode is bounded, not exhaustive: it ranks conversation buckets, expands only the top few, and marks the result truncated when lower-ranked buckets are skipped or fail.
|
|
118
|
+
|
|
117
119
|
**Parameters:**
|
|
118
120
|
|
|
119
121
|
| Param | Type | Required | Default | Description |
|
|
@@ -130,9 +132,11 @@ Answer a focused question by expanding summaries through the DAG. Spawns a bound
|
|
|
130
132
|
**Returns:**
|
|
131
133
|
- `answer` — The focused answer text
|
|
132
134
|
- `citedIds` — Summary IDs that contributed to the answer
|
|
135
|
+
- `sourceConversationIds` — Conversations that were successfully expanded
|
|
133
136
|
- `expandedSummaryCount` — How many summaries were expanded
|
|
134
137
|
- `totalSourceTokens` — Total tokens read from the DAG
|
|
135
138
|
- `truncated` — Whether the answer was truncated to fit maxTokens
|
|
139
|
+
- `conversationBreakdown` — Optional per-conversation success/failure diagnostics for bounded multi-conversation runs
|
|
136
140
|
|
|
137
141
|
**Examples:**
|
|
138
142
|
|
|
@@ -149,7 +153,7 @@ lcm_expand_query(
|
|
|
149
153
|
prompt: "What were the exact file changes?"
|
|
150
154
|
)
|
|
151
155
|
|
|
152
|
-
# Cross-conversation
|
|
156
|
+
# Cross-conversation synthesis
|
|
153
157
|
lcm_expand_query(
|
|
154
158
|
query: "deployment procedure",
|
|
155
159
|
prompt: "What's the current deployment process?",
|
|
@@ -175,7 +179,7 @@ Add instructions to your agent's system prompt so it knows when to use LCM tools
|
|
|
175
179
|
Use LCM tools for recall:
|
|
176
180
|
1. `lcm_grep` — Search all conversations by keyword/regex. Prefer `mode: "full_text"` for topic recall, quote exact phrases, use `sort: "relevance"` for older-topic lookups, and `sort: "hybrid"` when recency should still matter.
|
|
177
181
|
2. `lcm_describe` — Inspect a specific summary (cheap, no sub-agent)
|
|
178
|
-
3. `lcm_expand_query` — Deep recall with sub-agent expansion
|
|
182
|
+
3. `lcm_expand_query` — Deep recall with bounded sub-agent expansion
|
|
179
183
|
|
|
180
184
|
When summaries in context have an "Expand for details about:" footer
|
|
181
185
|
listing something you need, use `lcm_expand_query` to get the full detail.
|
|
@@ -183,7 +187,7 @@ listing something you need, use `lcm_expand_query` to get the full detail.
|
|
|
183
187
|
|
|
184
188
|
### Conversation scoping
|
|
185
189
|
|
|
186
|
-
By default, tools operate on the current conversation. Use `allConversations: true`
|
|
190
|
+
By default, tools operate on the current conversation. Use `lcm_grep(..., allConversations: true)` when you need broad global discovery. Use `lcm_expand_query(..., allConversations: true)` when you want bounded synthesis across sessions. Use `conversationId` when you already know the exact conversation to inspect or expand.
|
|
187
191
|
|
|
188
192
|
### Performance considerations
|
|
189
193
|
|
|
@@ -191,3 +195,4 @@ By default, tools operate on the current conversation. Use `allConversations: tr
|
|
|
191
195
|
- `lcm_expand_query` spawns a sub-agent and takes ~30–120 seconds
|
|
192
196
|
- The sub-agent has a 120-second timeout with cleanup guarantees
|
|
193
197
|
- Token caps (`LCM_MAX_EXPAND_TOKENS`) prevent runaway expansion
|
|
198
|
+
- Cross-conversation `lcm_expand_query` expands only a bounded set of top-ranked conversations
|
package/docs/configuration.md
CHANGED
|
@@ -191,6 +191,15 @@ Compaction summarization resolves candidates in this order:
|
|
|
191
191
|
|
|
192
192
|
If `summaryModel` already contains a provider prefix such as `anthropic/claude-sonnet-4-20250514`, `summaryProvider` is ignored for that candidate.
|
|
193
193
|
|
|
194
|
+
Runtime-managed OAuth providers are supported here too. In particular, `openai-codex` and `github-copilot` auth profiles can be used for summary and expansion calls without a separate API key.
|
|
195
|
+
|
|
196
|
+
A practical starting point for cost-sensitive setups is:
|
|
197
|
+
|
|
198
|
+
```env
|
|
199
|
+
LCM_SUMMARY_MODEL=openai/gpt-5.4-mini
|
|
200
|
+
LCM_EXPANSION_MODEL=openai/gpt-5.4-mini
|
|
201
|
+
```
|
|
202
|
+
|
|
194
203
|
### Session pattern matching
|
|
195
204
|
|
|
196
205
|
`ignoreSessionPatterns` and `statelessSessionPatterns` use full session keys.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@martian-engineering/lossless-claw",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -12,8 +12,9 @@ Start here:
|
|
|
12
12
|
1. Confirm whether the user needs configuration help, diagnostics, recall-tool guidance, or session-lifecycle guidance.
|
|
13
13
|
2. If they need a quick health check, tell them to run `/lossless` (`/lcm` is the shorter alias).
|
|
14
14
|
3. If they suspect summary corruption or truncation, use `/lossless doctor`.
|
|
15
|
-
4. If they
|
|
16
|
-
5.
|
|
15
|
+
4. If they want high-confidence junk/session cleanup guidance, use `/lossless doctor clean` before recommending any deletes.
|
|
16
|
+
5. If they ask how `/new` or `/reset` interacts with LCM, read the session-lifecycle reference before answering.
|
|
17
|
+
6. Load the relevant reference file instead of improvising details from memory.
|
|
17
18
|
|
|
18
19
|
Reference map:
|
|
19
20
|
|
|
@@ -50,3 +50,15 @@ It looks for known summary-health markers that indicate:
|
|
|
50
50
|
- truncated summary artifacts near the end of stored content
|
|
51
51
|
|
|
52
52
|
This gives users one place to answer the question “is my summary graph healthy?” without introducing a broader mutation surface.
|
|
53
|
+
|
|
54
|
+
## What `/lcm doctor clean` tells you
|
|
55
|
+
|
|
56
|
+
The cleaners flow is also diagnostic first.
|
|
57
|
+
|
|
58
|
+
It reports high-confidence junk patterns that are structurally safe to review as standalone cleanup candidates, including:
|
|
59
|
+
|
|
60
|
+
- archived subagent sessions
|
|
61
|
+
- cron sessions
|
|
62
|
+
- NULL-key orphaned subagent context conversations
|
|
63
|
+
|
|
64
|
+
This keeps cleanup discovery separate from summary-health diagnostics while still using the same native command surface.
|
|
@@ -29,6 +29,19 @@ What it should help confirm:
|
|
|
29
29
|
- whether truncation markers exist
|
|
30
30
|
- which conversations are affected most
|
|
31
31
|
|
|
32
|
+
### `/lossless doctor clean`
|
|
33
|
+
|
|
34
|
+
Use this when the user wants read-only diagnostics for high-confidence junk patterns before any cleanup.
|
|
35
|
+
|
|
36
|
+
It should help confirm:
|
|
37
|
+
|
|
38
|
+
- whether archived subagent sessions are present
|
|
39
|
+
- whether cron sessions are accumulating unexpectedly
|
|
40
|
+
- whether NULL-key orphaned subagent conversations are present
|
|
41
|
+
- which high-confidence filters match the most conversations and messages
|
|
42
|
+
|
|
43
|
+
This command is read-only. Use it to identify likely cleanup candidates before taking any separate cleanup action.
|
|
44
|
+
|
|
32
45
|
## Interpreting common states
|
|
33
46
|
|
|
34
47
|
### `/lossless` tokens vs `/status` context
|
package/src/assembler.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type {
|
|
|
6
6
|
MessageRole,
|
|
7
7
|
} from "./store/conversation-store.js";
|
|
8
8
|
import type { SummaryStore, ContextItemRecord, SummaryRecord } from "./store/summary-store.js";
|
|
9
|
+
import { estimateTokens } from "./estimate-tokens.js";
|
|
9
10
|
|
|
10
11
|
type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
|
|
11
12
|
|
|
@@ -46,10 +47,6 @@ export interface AssembleContextResult {
|
|
|
46
47
|
|
|
47
48
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
48
49
|
|
|
49
|
-
/** Simple token estimate: ~4 chars per token, same as VoltCode's Token.estimate */
|
|
50
|
-
function estimateTokens(text: string): number {
|
|
51
|
-
return Math.ceil(text.length / 4);
|
|
52
|
-
}
|
|
53
50
|
|
|
54
51
|
type SummaryPromptSignal = Pick<SummaryRecord, "kind" | "depth" | "descendantCount">;
|
|
55
52
|
|
|
@@ -98,6 +95,7 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
|
|
|
98
95
|
"Keep raw summary IDs in tool context for follow-up; do not include them in the user-facing answer unless the user asks for sources or IDs.",
|
|
99
96
|
"",
|
|
100
97
|
"`lcm_grep` tips: prefer `mode: \"full_text\"` for keyword/topic lookup, quote exact multi-word phrases, use `sort: \"relevance\"` for older-topic retrieval, and use `sort: \"hybrid\"` when recency should still influence ranking.",
|
|
98
|
+
"`lcm_expand_query(query: ...)` uses the same FTS5 full-text search rules as `lcm_grep`: terms are ANDed by default, so extra query words narrow results. Keep `query` to 1-3 distinctive terms or a quoted phrase, and put the natural-language question in `prompt`.",
|
|
101
99
|
"",
|
|
102
100
|
"**Uncertainty checklist (run before answering):**",
|
|
103
101
|
"- Am I making an exact factual claim from a compressed or condensed summary?",
|
|
@@ -1106,6 +1104,16 @@ export class ContextAssembler {
|
|
|
1106
1104
|
}
|
|
1107
1105
|
|
|
1108
1106
|
const parts = await this.conversationStore.getMessageParts(msg.messageId);
|
|
1107
|
+
|
|
1108
|
+
// Skip empty assistant messages left by error/aborted responses.
|
|
1109
|
+
// These waste context tokens and can confuse models that reject
|
|
1110
|
+
// consecutive empty assistant turns. Only skip when both the stored
|
|
1111
|
+
// content text AND the message_parts table are empty — assistant
|
|
1112
|
+
// messages that contain tool calls have empty text content but
|
|
1113
|
+
// non-empty parts and must be preserved.
|
|
1114
|
+
if (msg.role === "assistant" && !msg.content.trim() && parts.length === 0) {
|
|
1115
|
+
return null;
|
|
1116
|
+
}
|
|
1109
1117
|
const roleFromStore = toRuntimeRole(msg.role, parts);
|
|
1110
1118
|
const isToolResult = roleFromStore === "toolResult";
|
|
1111
1119
|
const toolCallId = isToolResult ? pickToolCallId(parts) : undefined;
|
package/src/compaction.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import type { ConversationStore, CreateMessagePartInput } from "./store/conversation-store.js";
|
|
3
3
|
import type { SummaryStore, SummaryRecord, ContextItemRecord } from "./store/summary-store.js";
|
|
4
|
+
import { estimateTokens, truncateTextToEstimatedTokens } from "./estimate-tokens.js";
|
|
4
5
|
import { extractFileIdsFromContent } from "./large-files.js";
|
|
5
6
|
import { NOOP_LCM_LOGGER, type LcmLogger } from "./lcm-log.js";
|
|
6
7
|
import { LcmProviderAuthError } from "./summarize.js";
|
|
@@ -93,10 +94,6 @@ type CondensedPhaseCandidate = {
|
|
|
93
94
|
|
|
94
95
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
95
96
|
|
|
96
|
-
/** Estimate token count from character length (~4 chars per token). */
|
|
97
|
-
function estimateTokens(content: string): number {
|
|
98
|
-
return Math.ceil(content.length / 4);
|
|
99
|
-
}
|
|
100
97
|
|
|
101
98
|
/** Deterministically cap summary text so the persisted output stays within maxTokens. */
|
|
102
99
|
function capSummaryText(
|
|
@@ -112,14 +109,14 @@ function capSummaryText(
|
|
|
112
109
|
];
|
|
113
110
|
|
|
114
111
|
for (const suffix of suffixes) {
|
|
115
|
-
const
|
|
116
|
-
const capped = `${content
|
|
112
|
+
const contentBudget = Math.max(0, maxTokens - estimateTokens(suffix));
|
|
113
|
+
const capped = `${truncateTextToEstimatedTokens(content, contentBudget)}${suffix}`;
|
|
117
114
|
if (estimateTokens(capped) <= maxTokens) {
|
|
118
115
|
return capped;
|
|
119
116
|
}
|
|
120
117
|
}
|
|
121
118
|
|
|
122
|
-
return content
|
|
119
|
+
return truncateTextToEstimatedTokens(content, maxTokens);
|
|
123
120
|
}
|
|
124
121
|
|
|
125
122
|
/** Format a timestamp as `YYYY-MM-DD HH:mm TZ` for prompt source text. */
|
|
@@ -176,8 +173,8 @@ function generateSummaryId(content: string): string {
|
|
|
176
173
|
);
|
|
177
174
|
}
|
|
178
175
|
|
|
179
|
-
/** Maximum
|
|
180
|
-
const
|
|
176
|
+
/** Maximum estimated tokens for the deterministic fallback truncation. */
|
|
177
|
+
const FALLBACK_MAX_TOKENS = 512;
|
|
181
178
|
const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
|
|
182
179
|
|
|
183
180
|
/**
|
|
@@ -1301,13 +1298,13 @@ export class CompactionEngine {
|
|
|
1301
1298
|
}
|
|
1302
1299
|
const inputTokens = Math.max(1, estimateTokens(sourceText));
|
|
1303
1300
|
const buildDeterministicFallback = (): { content: string; level: CompactionLevel } => {
|
|
1304
|
-
const
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1301
|
+
const suffix = `\n[Truncated from ${inputTokens} tokens]`;
|
|
1302
|
+
const truncated = truncateTextToEstimatedTokens(
|
|
1303
|
+
sourceText,
|
|
1304
|
+
Math.max(0, FALLBACK_MAX_TOKENS - estimateTokens(suffix)),
|
|
1305
|
+
);
|
|
1308
1306
|
return {
|
|
1309
|
-
content: `${truncated}
|
|
1310
|
-
[Truncated from ${inputTokens} tokens]`,
|
|
1307
|
+
content: `${truncated}${suffix}`,
|
|
1311
1308
|
level: "fallback",
|
|
1312
1309
|
};
|
|
1313
1310
|
};
|
package/src/db/connection.ts
CHANGED
|
@@ -8,24 +8,34 @@ const SQLITE_BUSY_TIMEOUT_MS = 5_000;
|
|
|
8
8
|
const connectionsByPath = new Map<ConnectionKey, Set<DatabaseSync>>();
|
|
9
9
|
const connectionIndex = new Map<DatabaseSync, ConnectionKey>();
|
|
10
10
|
|
|
11
|
-
function isInMemoryPath(dbPath: string): boolean {
|
|
11
|
+
export function isInMemoryPath(dbPath: string): boolean {
|
|
12
12
|
const normalized = dbPath.trim();
|
|
13
13
|
return normalized === ":memory:" || normalized.startsWith("file::memory:");
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
export function getFileBackedDatabasePath(dbPath: string): string | null {
|
|
17
|
+
const trimmed = dbPath.trim();
|
|
18
|
+
if (!trimmed || isInMemoryPath(trimmed)) {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
return resolve(trimmed);
|
|
22
|
+
}
|
|
23
|
+
|
|
16
24
|
export function normalizePath(dbPath: string): ConnectionKey {
|
|
17
|
-
|
|
25
|
+
const fileBackedDatabasePath = getFileBackedDatabasePath(dbPath);
|
|
26
|
+
if (!fileBackedDatabasePath) {
|
|
18
27
|
const trimmed = dbPath.trim();
|
|
19
28
|
return trimmed.length > 0 ? trimmed : ":memory:";
|
|
20
29
|
}
|
|
21
|
-
return
|
|
30
|
+
return fileBackedDatabasePath;
|
|
22
31
|
}
|
|
23
32
|
|
|
24
33
|
function ensureDbDirectory(dbPath: string): void {
|
|
25
|
-
|
|
34
|
+
const fileBackedDatabasePath = getFileBackedDatabasePath(dbPath);
|
|
35
|
+
if (!fileBackedDatabasePath) {
|
|
26
36
|
return;
|
|
27
37
|
}
|
|
28
|
-
mkdirSync(dirname(
|
|
38
|
+
mkdirSync(dirname(fileBackedDatabasePath), { recursive: true });
|
|
29
39
|
}
|
|
30
40
|
|
|
31
41
|
function configureConnection(db: DatabaseSync): DatabaseSync {
|
package/src/db/features.ts
CHANGED
|
@@ -2,19 +2,20 @@ import type { DatabaseSync } from "node:sqlite";
|
|
|
2
2
|
|
|
3
3
|
export type LcmDbFeatures = {
|
|
4
4
|
fts5Available: boolean;
|
|
5
|
+
trigramTokenizerAvailable: boolean;
|
|
5
6
|
};
|
|
6
7
|
|
|
7
8
|
const featureCache = new WeakMap<DatabaseSync, LcmDbFeatures>();
|
|
8
9
|
|
|
9
|
-
function
|
|
10
|
+
function probeVirtualTable(db: DatabaseSync, sql: string): boolean {
|
|
10
11
|
try {
|
|
11
|
-
db.exec("DROP TABLE IF EXISTS temp.
|
|
12
|
-
db.exec(
|
|
13
|
-
db.exec("DROP TABLE temp.
|
|
12
|
+
db.exec("DROP TABLE IF EXISTS temp.__lcm_virtual_table_probe");
|
|
13
|
+
db.exec(sql);
|
|
14
|
+
db.exec("DROP TABLE temp.__lcm_virtual_table_probe");
|
|
14
15
|
return true;
|
|
15
16
|
} catch {
|
|
16
17
|
try {
|
|
17
|
-
db.exec("DROP TABLE IF EXISTS temp.
|
|
18
|
+
db.exec("DROP TABLE IF EXISTS temp.__lcm_virtual_table_probe");
|
|
18
19
|
} catch {
|
|
19
20
|
// Ignore cleanup failures after a failed probe.
|
|
20
21
|
}
|
|
@@ -22,6 +23,20 @@ function probeFts5(db: DatabaseSync): boolean {
|
|
|
22
23
|
}
|
|
23
24
|
}
|
|
24
25
|
|
|
26
|
+
function probeFts5(db: DatabaseSync): boolean {
|
|
27
|
+
return probeVirtualTable(
|
|
28
|
+
db,
|
|
29
|
+
"CREATE VIRTUAL TABLE temp.__lcm_virtual_table_probe USING fts5(content)",
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function probeTrigramTokenizer(db: DatabaseSync): boolean {
|
|
34
|
+
return probeVirtualTable(
|
|
35
|
+
db,
|
|
36
|
+
"CREATE VIRTUAL TABLE temp.__lcm_virtual_table_probe USING fts5(content, tokenize='trigram')",
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
|
|
25
40
|
/**
|
|
26
41
|
* Detect SQLite features exposed by the current Node runtime.
|
|
27
42
|
*
|
|
@@ -36,7 +51,11 @@ export function getLcmDbFeatures(db: DatabaseSync): LcmDbFeatures {
|
|
|
36
51
|
|
|
37
52
|
const detected: LcmDbFeatures = {
|
|
38
53
|
fts5Available: probeFts5(db),
|
|
54
|
+
trigramTokenizerAvailable: false,
|
|
39
55
|
};
|
|
56
|
+
if (detected.fts5Available) {
|
|
57
|
+
detected.trigramTokenizerAvailable = probeTrigramTokenizer(db);
|
|
58
|
+
}
|
|
40
59
|
featureCache.set(db, detected);
|
|
41
60
|
return detected;
|
|
42
61
|
}
|