seekx-openclaw 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ {
2
+ "id": "seekx",
3
+ "name": "seekx",
4
+ "description": "Local-first hybrid search memory backend: BM25 + vector + rerank + CJK",
5
+ "version": "0.1.0",
6
+ "kind": "memory",
7
+ "skills": ["skills"],
8
+ "configSchema": {
9
+ "type": "object",
10
+ "additionalProperties": false,
11
+ "properties": {
12
+ "dbPath": {
13
+ "type": "string"
14
+ },
15
+ "paths": {
16
+ "type": "array",
17
+ "items": {
18
+ "type": "object",
19
+ "additionalProperties": false,
20
+ "required": ["name", "path"],
21
+ "properties": {
22
+ "name": {
23
+ "type": "string"
24
+ },
25
+ "path": {
26
+ "type": "string"
27
+ },
28
+ "pattern": {
29
+ "type": "string"
30
+ }
31
+ }
32
+ }
33
+ },
34
+ "apiKey": {
35
+ "type": "string"
36
+ },
37
+ "baseUrl": {
38
+ "type": "string"
39
+ },
40
+ "embedModel": {
41
+ "type": "string"
42
+ },
43
+ "rerankModel": {
44
+ "type": "string"
45
+ },
46
+ "expandModel": {
47
+ "type": "string"
48
+ },
49
+ "searchLimit": {
50
+ "type": "number"
51
+ },
52
+ "refreshIntervalMs": {
53
+ "type": "number"
54
+ },
55
+ "includeOpenClawMemory": {
56
+ "type": "boolean"
57
+ }
58
+ }
59
+ },
60
+ "uiHints": {
61
+ "apiKey": {
62
+ "label": "API key",
63
+ "sensitive": true,
64
+ "help": "API key for embedding/reranking/expansion. Inherits from ~/.seekx/config.yml if unset."
65
+ },
66
+ "baseUrl": {
67
+ "label": "API base URL",
68
+ "placeholder": "https://api.siliconflow.cn/v1",
69
+ "help": "OpenAI-compatible API base URL."
70
+ },
71
+ "embedModel": {
72
+ "label": "Embedding model",
73
+ "placeholder": "BAAI/bge-large-zh-v1.5"
74
+ },
75
+ "rerankModel": {
76
+ "label": "Reranking model",
77
+ "placeholder": "BAAI/bge-reranker-v2-m3",
78
+ "help": "Omit to disable reranking."
79
+ },
80
+ "expandModel": {
81
+ "label": "Query expansion model",
82
+ "placeholder": "Qwen/Qwen3-8B",
83
+ "help": "Omit to disable query expansion."
84
+ }
85
+ }
86
+ }
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "seekx-openclaw",
3
+ "version": "0.3.0",
4
+ "description": "OpenClaw memory backend: hybrid BM25 + vector search with reranking and CJK support",
5
+ "type": "module",
6
+ "main": "./src/index.ts",
7
+ "exports": { ".": "./src/index.ts" },
8
+ "files": ["src", "skills", "openclaw.plugin.json"],
9
+ "openclaw": {
10
+ "extensions": ["./src/index.ts"],
11
+ "install": { "minHostVersion": ">=2026.4.0" }
12
+ },
13
+ "scripts": {
14
+ "typecheck": "tsc --noEmit",
15
+ "test": "bun test test/"
16
+ },
17
+ "dependencies": {
18
+ "seekx-core": "^0.3.0"
19
+ },
20
+ "devDependencies": {
21
+ "@types/bun": "latest",
22
+ "typescript": "^5.8.3"
23
+ },
24
+ "engines": {
25
+ "node": ">=22",
26
+ "bun": ">=1.1.0"
27
+ }
28
+ }
@@ -0,0 +1,346 @@
1
+ # Skill: Install seekx memory backend for OpenClaw
2
+
3
+ > **This skill is self-contained and agent-executable.**
4
+ > It can be fetched from a public URL and run by any capable agent
5
+ > (Cursor, Claude Desktop, OpenClaw agent, etc.) — even before the plugin
6
+ > is installed.
7
+ >
8
+ > How to invoke: tell your agent:
9
+ > "Fetch and follow this skill: https://raw.githubusercontent.com/seekx/seekx/main/packages/openclaw-plugin/skills/install/SKILL.md"
10
+
11
+ ---
12
+
13
+ ## What this skill does
14
+
15
+ Installs the `seekx-openclaw` plugin into the user's OpenClaw gateway,
16
+ configures an embedding/reranking provider, and verifies the setup end-to-end.
17
+
18
+ After completion:
19
+ - `memory_search` and `memory_get` route through seekx's search pipeline
20
+ - OpenClaw's memory files are indexed automatically
21
+ - Any extra directories the user specified are indexed and watched
22
+ - Vector search, reranking, and query expansion are enabled only when the
23
+ necessary models and runtime support are available; otherwise seekx
24
+ degrades safely to BM25-only mode
25
+
26
+ ---
27
+
28
+ ## Execution steps
29
+
30
+ Work through the steps below **in order**. At each step, run the command or
31
+ collect the answer before proceeding to the next.
32
+
33
+ ---
34
+
35
+ ### Step 1 — Verify OpenClaw is installed
36
+
37
+ ```bash
38
+ openclaw --version
39
+ ```
40
+
41
+ If the command is not found, tell the user:
42
+
43
+ > OpenClaw is not installed. Install it first:
44
+ > `npm install -g openclaw@latest`
45
+ > Then re-run this skill.
46
+
47
+ Stop here if OpenClaw is not present.
48
+
49
+ ---
50
+
51
+ ### Step 2 — Install the seekx plugin
52
+
53
+ ```bash
54
+ openclaw plugins install seekx-openclaw
55
+ ```
56
+
57
+ If this fails with a "not found" error, try the npm fallback:
58
+
59
+ ```bash
60
+ npm install -g seekx-openclaw
61
+ ```
62
+
63
+ Then register it as a local plugin:
64
+
65
+ ```bash
66
+ openclaw plugins install -l "$(npm root -g)/seekx-openclaw"
67
+ ```
68
+
69
+ Confirm it loaded:
70
+
71
+ ```bash
72
+ openclaw plugins list | grep seekx
73
+ ```
74
+
75
+ ---
76
+
77
+ ### Step 3 — Choose a provider (guided)
78
+
79
+ Ask the user the following questions **one at a time, in order**.
80
+ Collect all answers before writing any config.
81
+
82
+ ---
83
+
84
+ #### Question A — API key source
85
+
86
+ > Do you have an API key from a cloud provider, or do you prefer to run
87
+ > everything locally with Ollama?
88
+
89
+ Offer these options:
90
+
91
+ | Option | When to choose |
92
+ |---|---|
93
+ | **SiliconFlow** | Best for Chinese/Japanese/Korean text; supports reranking; low cost; requires a free account at siliconflow.cn |
94
+ | **OpenAI** | Best for English; widely used; does not support reranking via this plugin |
95
+ | **Ollama** | Fully local, no API key, no internet required; pull models first |
96
+ | **Other / Custom** | You have an OpenAI-compatible endpoint (Jina, Together, Groq, self-hosted, etc.) |
97
+
98
+ Record the user's choice as `PROVIDER`.
99
+
100
+ ---
101
+
102
+ #### Question B — API key (skip if Ollama)
103
+
104
+ If `PROVIDER` is **SiliconFlow**:
105
+
106
+ > Please provide your SiliconFlow API key.
107
+ > (Get one free at https://cloud.siliconflow.cn — click "API Keys" after signing in.)
108
+
109
+ If `PROVIDER` is **OpenAI**:
110
+
111
+ > Please provide your OpenAI API key.
112
+ > (Find it at https://platform.openai.com/api-keys)
113
+
114
+ If `PROVIDER` is **Other / Custom**:
115
+
116
+ > Please provide:
117
+ > 1. Your API base URL (must end with `/v1`, e.g. `https://api.example.com/v1`)
118
+ > 2. Your API key
119
+
120
+ Record as `API_KEY` and `BASE_URL`.
121
+
122
+ If `PROVIDER` is **Ollama**:
123
+ - Set `API_KEY = "ollama"` (placeholder, Ollama ignores it)
124
+ - Set `BASE_URL = "http://localhost:11434/v1"`
125
+ - Tell the user to pull the embedding model first:
126
+ ```bash
127
+ ollama pull nomic-embed-text
128
+ ```
129
+
130
+ ---
131
+
132
+ #### Question C — Enable query expansion? (optional)
133
+
134
+ > seekx can generate 2–3 variant phrasings of your query before searching,
135
+ > which improves recall — especially for vague or short queries.
136
+ >
137
+ > This uses one extra LLM call per search (~50–200 ms extra latency).
138
+ >
139
+ > Would you like to enable query expansion? (Recommended: yes)
140
+
141
+ If yes, record `EXPAND_MODEL` using this table:
142
+
143
+ | Provider | Recommended expand model |
144
+ |---|---|
145
+ | SiliconFlow | `Qwen/Qwen3-8B` |
146
+ | OpenAI | `gpt-4o-mini` |
147
+ | Ollama | Any chat model the user has pulled (e.g. `llama3.2`, `qwen2.5`) |
148
+ | Custom | Ask the user for a model name |
149
+
150
+ If no, set `EXPAND_MODEL = null`.
151
+
152
+ ---
153
+
154
+ #### Question D — Extra directories (optional)
155
+
156
+ > Would you like to index any of your own directories (notes, docs, etc.)
157
+ > in addition to OpenClaw's built-in memory files?
158
+ >
159
+ > If yes, list them. For each, provide:
160
+ > - A short name (e.g. `notes`, `docs`, `brain`)
161
+ > - The full path (e.g. `~/notes` or `/Users/me/Documents/notes`)
162
+
163
+ Collect as a list of `{ name, path }` pairs.
164
+ If the user says no or skips, use an empty list.
165
+
166
+ ---
167
+
168
+ ### Step 4 — Build the configuration
169
+
170
+ Using the answers collected above, assemble the `pluginConfig` object.
171
+
172
+ **For SiliconFlow:**
173
+
174
+ ```json
175
+ {
176
+ "apiKey": "<API_KEY>",
177
+ "baseUrl": "https://api.siliconflow.cn/v1",
178
+ "embedModel": "BAAI/bge-large-zh-v1.5",
179
+ "rerankModel": "BAAI/bge-reranker-v2-m3",
180
+ "expandModel": "<EXPAND_MODEL or omit>",
181
+ "paths": []
182
+ }
183
+ ```
184
+
185
+ **For OpenAI:**
186
+
187
+ ```json
188
+ {
189
+ "apiKey": "<API_KEY>",
190
+ "baseUrl": "https://api.openai.com/v1",
191
+ "embedModel": "text-embedding-3-small",
192
+ "expandModel": "<EXPAND_MODEL or omit>",
193
+ "paths": []
194
+ }
195
+ ```
196
+
197
+ (Omit `rerankModel` — OpenAI does not expose a reranking endpoint.)
198
+
199
+ **For Ollama:**
200
+
201
+ ```json
202
+ {
203
+ "apiKey": "ollama",
204
+ "baseUrl": "http://localhost:11434/v1",
205
+ "embedModel": "nomic-embed-text",
206
+ "expandModel": "<EXPAND_MODEL or omit>",
207
+ "paths": []
208
+ }
209
+ ```
210
+
211
+ **For Custom:**
212
+
213
+ ```json
214
+ {
215
+ "apiKey": "<API_KEY>",
216
+ "baseUrl": "<BASE_URL>",
217
+ "embedModel": "<ask user>",
218
+ "rerankModel": "<ask user, or omit>",
219
+ "expandModel": "<EXPAND_MODEL or omit>",
220
+ "paths": []
221
+ }
222
+ ```
223
+
224
+ If the user provided extra directories, populate `paths`:
225
+
226
+ ```json
227
+ "paths": [
228
+ { "name": "notes", "path": "~/notes" },
229
+ { "name": "docs", "path": "~/projects/docs" }
230
+ ]
231
+ ```
232
+
233
+ ---
234
+
235
+ ### Step 5 — Write the OpenClaw config
236
+
237
+ Read the current config file:
238
+
239
+ ```bash
240
+ cat ~/.openclaw/openclaw.json
241
+ ```
242
+
243
+ If the file does not exist, start with `{}`.
244
+
245
+ Merge the following into the config (do not overwrite unrelated fields):
246
+
247
+ ```json5
248
+ {
249
+ "plugins": {
250
+ "slots": {
251
+ "memory": "seekx" // makes seekx the exclusive memory backend
252
+ },
253
+ "entries": {
254
+ "seekx": {
255
+ "enabled": true,
256
+ "config": {
257
+ // paste the pluginConfig object assembled in Step 4
258
+ }
259
+ }
260
+ }
261
+ }
262
+ }
263
+ ```
264
+
265
+ Write the merged result back to `~/.openclaw/openclaw.json`.
266
+
267
+ After writing, validate the file is parseable:
268
+
269
+ ```bash
270
+ node -e "JSON.parse(require('fs').readFileSync(process.env.HOME + '/.openclaw/openclaw.json', 'utf8'))" \
271
+ && echo "valid JSON" || echo "PARSE ERROR — do not restart yet"
272
+ ```
273
+
274
+ Stop and show the error if parsing fails.
275
+
276
+ ---
277
+
278
+ ### Step 6 — Restart the gateway
279
+
280
+ ```bash
281
+ openclaw gateway restart
282
+ ```
283
+
284
+ Wait 3 seconds, then check that the gateway is running:
285
+
286
+ ```bash
287
+ openclaw gateway status
288
+ ```
289
+
290
+ ---
291
+
292
+ ### Step 7 — Verify
293
+
294
+ ```bash
295
+ openclaw status
296
+ ```
297
+
298
+ Confirm the `Memory` row contains `plugin seekx`.
299
+
300
+ Interpret the row as follows:
301
+ - `N files · N chunks · plugin seekx` → installation and indexing are working
302
+ - `0 files · 0 chunks · plugin seekx` right after restart → the initial index
303
+ may still be warming up; wait 15 seconds and run `openclaw status` again
304
+ - `plugin seekx · vector off` → valid BM25-only mode; vector search is not
305
+ active because no embedding model is configured or `sqlite-vec` is unavailable
306
+
307
+ If the user already has memory files and an active OpenClaw agent session,
308
+ optionally ask a memory-backed question through the agent to confirm end-to-end
309
+ retrieval. Do not instruct the user to run `openclaw memory ...`; current
310
+ OpenClaw versions expose memory backend state through `openclaw status`.
311
+
312
+ ---
313
+
314
+ ### Step 8 — Report to the user
315
+
316
+ Tell the user:
317
+
318
+ > seekx is now your OpenClaw memory backend.
319
+ >
320
+ > - OpenClaw's memory files (`MEMORY.md`, `memory/**/*.md`) are indexed automatically.
321
+ > - [If extra directories were configured] Your extra directories are indexed and watched for changes.
322
+ > - `memory_search` and `memory_get` now use seekx's search pipeline: BM25 by default, plus vector/rerank/expansion when configured and available.
323
+ >
324
+ > To check status at any time: `openclaw status`
325
+ > To add more directories later: edit `~/.openclaw/openclaw.json` → `plugins.entries.seekx.config.paths`
326
+
327
+ ---
328
+
329
+ ## Troubleshooting
330
+
331
+ **`openclaw plugins install` hangs or fails**
332
+ → Try `openclaw plugins install seekx-openclaw --force`
333
+ → Or use the npm fallback path in Step 2.
334
+
335
+ **`openclaw status` does not show `plugin seekx` in the Memory row**
336
+ → Check that `plugins.slots.memory` is `"seekx"` (not `"memory-core"` or missing).
337
+ → Run `openclaw plugins list` and confirm seekx is listed as enabled.
338
+
339
+ **Status shows `plugin seekx · vector off`**
340
+ → This is expected when no embedding model is configured, the API credentials are missing, or `sqlite-vec` cannot load.
341
+ → BM25 search still works; seekx is installed correctly.
342
+
343
+ **API key error / 401 from provider**
344
+ → Double-check the key and baseUrl.
345
+ → For SiliconFlow: the key starts with `sk-` and was copied from the API Keys page.
346
+ → Re-run Step 5 with corrected values and restart the gateway.
@@ -0,0 +1,152 @@
1
+ # Skill: Using seekx memory search in OpenClaw
2
+
3
+ seekx is installed as the OpenClaw memory backend.
4
+ `memory_search` and `memory_get` now route through seekx's search pipeline:
5
+ BM25 full-text by default, with vector kNN, cross-encoder reranking, and query
6
+ expansion enabled when the required models and runtime support are available.
7
+
8
+ ---
9
+
10
+ ## When to call `memory_search`
11
+
12
+ **Call proactively** before answering any query that might be answered by
13
+ stored context. Do not wait for the user to ask you to search — search first.
14
+
15
+ Call `memory_search` when the user's message involves:
16
+
17
+ - People (colleagues, contacts, companies, relationships)
18
+ - Past decisions, meetings, or discussions
19
+ - Projects, codebases, or technical systems the user has documented
20
+ - Concepts, definitions, or domain knowledge the user has written down
21
+ - Anything the user might have noted in `MEMORY.md` or a notes directory
22
+
23
+ **Do not call** `memory_search` for:
24
+ - General knowledge questions (use your built-in knowledge)
25
+ - Queries where the user has provided all the context inline
26
+ - Purely computational tasks (math, code generation from scratch)
27
+
28
+ ---
29
+
30
+ ## How to search
31
+
32
+ Use natural language queries, not keywords. When configured, seekx handles
33
+ query expansion and semantic matching internally; otherwise it falls back to
34
+ BM25-only search.
35
+
36
+ Good queries:
37
+ ```
38
+ memory_search("Alice's role at Acme Corp and her preferred communication style")
39
+ memory_search("why we chose PostgreSQL over MySQL for the billing service")
40
+ memory_search("架构评审会议上的决定")
41
+ ```
42
+
43
+ Less effective (but still works):
44
+ ```
45
+ memory_search("Alice Acme")
46
+ memory_search("PostgreSQL MySQL")
47
+ ```
48
+
49
+ ---
50
+
51
+ ## Search options
52
+
53
+ ```typescript
54
+ memory_search(query: string, opts?: {
55
+ limit?: number; // default: 6; increase for broad topics, decrease for precision
56
+ collection?: string; // restrict to a named collection (see below)
57
+ })
58
+ ```
59
+
60
+ ---
61
+
62
+ ## Scoping to a collection
63
+
64
+ If you know which directory the relevant content lives in, scope the search
65
+ to avoid noise from unrelated collections.
66
+
67
+ ```
68
+ memory_search("API authentication flow", { collection: "docs" })
69
+ memory_search("John Smith", { collection: "notes" })
70
+ ```
71
+
72
+ Collection names come from:
73
+ - `plugins.entries.seekx.config.paths[].name` in `~/.openclaw/openclaw.json`
74
+ - the built-in collection name `openclaw-memory` for OpenClaw's own memory files
75
+
76
+ Use `openclaw status` to confirm that seekx is active and that aggregate file
77
+ and chunk counts are non-zero after indexing.
78
+
79
+ ---
80
+
81
+ ## Retrieving a full document
82
+
83
+ Search results include a `path` field (absolute filesystem path).
84
+ Use `memory_get` to read the full file content when:
85
+ - You need more context than the snippet provides
86
+ - The user asks you to read a specific file from memory
87
+ - You want to check if a document has been updated since the snippet was indexed
88
+
89
+ ```
90
+ memory_get("/Users/me/notes/people/alice.md")
91
+ ```
92
+
93
+ Prefer paths that came from `memory_search` results. seekx only returns content
94
+ for files inside indexed collections; paths outside indexed scope resolve to an
95
+ empty string.
96
+
97
+ For readable indexed paths, `memory_get` reads the live file from disk rather
98
+ than the indexed snapshot, so it returns the current version.
99
+
100
+ ---
101
+
102
+ ## Interpreting results
103
+
104
+ Each result contains:
105
+ - `path` — source file (absolute path)
106
+ - `content` — the matched text excerpt
107
+ - `score` — relevance score (0–1); above 0.5 is a strong match
108
+ - `collection` — which indexed directory the file is from
109
+
110
+ If scores are all below 0.2, the query likely didn't match well.
111
+ Try rephrasing with more context, or check that the relevant files are in
112
+ an indexed collection (configured `paths[].name` or the built-in
113
+ `openclaw-memory` collection). `openclaw status` can confirm that the backend
114
+ is active and that indexing has completed.
115
+
116
+ ---
117
+
118
+ ## When search returns no results
119
+
120
+ Possible causes and actions:
121
+
122
+ | Cause | What to do |
123
+ |---|---|
124
+ | Initial indexing not finished (gateway just started) | Wait 15–30 seconds and retry |
125
+ | The content is not in any indexed collection | Tell the user the content is not indexed; suggest adding the directory |
126
+ | Query is too specific / uses exact names | Rephrase with context ("the meeting about X" instead of "X meeting notes") |
127
+ | Files use an unindexed extension | Only `*.md`, `*.txt`, `*.markdown` are indexed by default |
128
+
129
+ ---
130
+
131
+ ## CJK queries
132
+
133
+ Queries in Chinese, Japanese, or Korean work natively.
134
+ seekx uses Jieba-based tokenization for CJK text, which is superior to the
135
+ trigram approach used by OpenClaw's built-in backend.
136
+
137
+ ```
138
+ memory_search("用户增长策略讨论")
139
+ memory_search("技術的な決定の理由")
140
+ ```
141
+
142
+ ---
143
+
144
+ ## Combining with your own reasoning
145
+
146
+ After retrieving memory results, synthesize them with your own knowledge.
147
+ Cite the source file path so the user knows where the information came from.
148
+
149
+ Example response pattern:
150
+ > Based on your notes (`~/notes/projects/acme.md`), the Acme project's API
151
+ > uses JWT tokens with a 24-hour expiry. The authentication flow is described
152
+ > in the architecture document (`~/projects/docs/auth.md`).
package/src/config.ts ADDED
@@ -0,0 +1,118 @@
1
+ import { homedir } from "node:os";
2
+ import { join } from "node:path";
3
+ import { loadConfig, type ResolvedConfig, type ServiceEndpoint } from "seekx-core";
4
+
5
+ export interface ExtraPath {
6
+ name: string;
7
+ path: string;
8
+ pattern?: string;
9
+ }
10
+
11
+ /** Raw shape of what OpenClaw puts in pluginConfig. */
12
+ export interface RawPluginConfig {
13
+ dbPath?: string;
14
+ paths?: unknown;
15
+ apiKey?: string;
16
+ baseUrl?: string;
17
+ embedModel?: string;
18
+ rerankModel?: string;
19
+ expandModel?: string;
20
+ searchLimit?: number;
21
+ refreshIntervalMs?: number;
22
+ includeOpenClawMemory?: boolean;
23
+ }
24
+
25
+ export interface SeekxPluginConfig {
26
+ dbPath: string;
27
+ extraPaths: ExtraPath[];
28
+ embed: ServiceEndpoint;
29
+ rerank: ServiceEndpoint | null;
30
+ expand: ServiceEndpoint | null;
31
+ searchLimit: number;
32
+ refreshIntervalMs: number;
33
+ includeOpenClawMemory: boolean;
34
+ }
35
+
36
+ function normalizeExtraPaths(paths: unknown): ExtraPath[] {
37
+ if (paths == null) return [];
38
+ if (!Array.isArray(paths)) {
39
+ throw new Error("Invalid plugin config: paths must be an array of { name, path, pattern? }");
40
+ }
41
+
42
+ return paths.map((entry, index) => {
43
+ if (!entry || typeof entry !== "object") {
44
+ throw new Error(`Invalid plugin config: paths[${index}] must be an object`);
45
+ }
46
+
47
+ const { name, path, pattern } = entry as {
48
+ name?: unknown;
49
+ path?: unknown;
50
+ pattern?: unknown;
51
+ };
52
+
53
+ if (typeof name !== "string" || name.trim() === "") {
54
+ throw new Error(`Invalid plugin config: paths[${index}].name must be a non-empty string`);
55
+ }
56
+ if (typeof path !== "string" || path.trim() === "") {
57
+ throw new Error(`Invalid plugin config: paths[${index}].path must be a non-empty string`);
58
+ }
59
+ if (pattern != null && typeof pattern !== "string") {
60
+ throw new Error(`Invalid plugin config: paths[${index}].pattern must be a string`);
61
+ }
62
+
63
+ return {
64
+ name: name.trim(),
65
+ path: path.trim(),
66
+ ...(pattern ? { pattern } : {}),
67
+ };
68
+ });
69
+ }
70
+
71
+ /**
72
+ * Merge the OpenClaw plugin config with the seekx config file.
73
+ *
74
+ * Precedence (high → low):
75
+ * 1. pluginConfig fields (plugins.entries.seekx.config in openclaw.json)
76
+ * 2. ~/.seekx/config.yml (seekx's own config file)
77
+ * 3. Built-in defaults
78
+ *
79
+ * @param raw - Raw plugin config from OpenClaw.
80
+ * @param loadConfigFn - Injectable config loader; defaults to the real
81
+ * loadConfig() so production code needs no changes. Tests pass a stub.
82
+ */
83
+ export function resolvePluginConfig(
84
+ raw: RawPluginConfig,
85
+ loadConfigFn: () => ResolvedConfig | null = loadConfig,
86
+ ): SeekxPluginConfig {
87
+ const base: ResolvedConfig | null = loadConfigFn();
88
+
89
+ const baseUrl = raw.baseUrl ?? base?.embed.baseUrl ?? "";
90
+ const apiKey = raw.apiKey ?? base?.embed.apiKey ?? "";
91
+ const embedModel = raw.embedModel ?? base?.embed.model ?? "";
92
+ const rerankModel = raw.rerankModel ?? base?.rerank?.model ?? null;
93
+ const expandModel = raw.expandModel ?? base?.expand?.model ?? null;
94
+
95
+ const embed: ServiceEndpoint = { baseUrl, apiKey, model: embedModel };
96
+
97
+ // For rerank and expand, if plugin config specifies a model name, build the
98
+ // endpoint using the same baseUrl/apiKey (they share the same provider).
99
+ // Otherwise fall back to the full endpoint from ~/.seekx/config.yml.
100
+ const rerank: ServiceEndpoint | null = rerankModel
101
+ ? { baseUrl, apiKey, model: rerankModel }
102
+ : (base?.rerank ?? null);
103
+
104
+ const expand: ServiceEndpoint | null = expandModel
105
+ ? { baseUrl, apiKey, model: expandModel }
106
+ : (base?.expand ?? null);
107
+
108
+ return {
109
+ dbPath: raw.dbPath ?? join(homedir(), ".seekx", "openclaw.db"),
110
+ extraPaths: normalizeExtraPaths(raw.paths),
111
+ embed,
112
+ rerank,
113
+ expand,
114
+ searchLimit: raw.searchLimit ?? 6,
115
+ refreshIntervalMs: raw.refreshIntervalMs ?? 300_000,
116
+ includeOpenClawMemory: raw.includeOpenClawMemory ?? true,
117
+ };
118
+ }
package/src/index.ts ADDED
@@ -0,0 +1,36 @@
1
+ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
2
+ import { resolvePluginConfig, type RawPluginConfig } from "./config.ts";
3
+ import { SeekxLifecycle } from "./lifecycle.ts";
4
+ import { buildMemorySearchManager } from "./runtime.ts";
5
+
6
+ export default definePluginEntry({
7
+ id: "seekx",
8
+ name: "seekx",
9
+ description: "Local-first hybrid search memory backend: BM25 + vector + rerank + CJK",
10
+ kind: "memory",
11
+
12
+ register(api) {
13
+ const raw = (api.pluginConfig ?? {}) as RawPluginConfig;
14
+ const config = resolvePluginConfig(raw);
15
+ const lifecycle = new SeekxLifecycle(config);
16
+
17
+ // Start eagerly so the initial index is underway before the first
18
+ // memory_search call arrives. start() is non-blocking; indexing runs
19
+ // in the background.
20
+ void lifecycle.start();
21
+
22
+ api.registerMemoryRuntime({
23
+ getMemorySearchManager: () => buildMemorySearchManager(lifecycle),
24
+ resolveMemoryBackendConfig: (rawCfg: unknown) =>
25
+ resolvePluginConfig(rawCfg as RawPluginConfig),
26
+ });
27
+
28
+ // Register a background service so OpenClaw calls stop() on shutdown,
29
+ // giving the watcher and database a chance to close cleanly.
30
+ // id is required by the real OpenClawPluginService type.
31
+ api.registerService({
32
+ id: "seekx-lifecycle",
33
+ stop: () => lifecycle.stop(),
34
+ } as Parameters<typeof api.registerService>[0]);
35
+ },
36
+ });
@@ -0,0 +1,203 @@
1
+ import { mkdirSync, existsSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { dirname, resolve } from "node:path";
4
+ import {
5
+ openDatabase,
6
+ loadSqliteVec,
7
+ Store,
8
+ SeekxClient,
9
+ Watcher,
10
+ indexDirectory,
11
+ type Database,
12
+ type CollectionWatch,
13
+ } from "seekx-core";
14
+ import { type SeekxPluginConfig } from "./config.ts";
15
+
16
+ /** Collection name for OpenClaw's built-in memory files. */
17
+ const OPENCLAW_MEMORY_COLLECTION = "openclaw-memory";
18
+
19
+ /**
20
+ * Absolute path to OpenClaw's default agent workspace, which contains
21
+ * MEMORY.md and the memory/ daily-note tree.
22
+ */
23
+ function openClawMemoryPath(): string {
24
+ return `${homedir()}/.openclaw/workspace`;
25
+ }
26
+
27
+ export class SeekxLifecycle {
28
+ readonly config: SeekxPluginConfig;
29
+ db!: Database;
30
+ store!: Store;
31
+ client: SeekxClient | null = null;
32
+ private watcher: Watcher | null = null;
33
+ private refreshTimer: ReturnType<typeof setInterval> | null = null;
34
+ private startPromise: Promise<void> | null = null;
35
+ private stopPromise: Promise<void> | null = null;
36
+ private initialIndexPromise: Promise<void> = Promise.resolve();
37
+ private indexQueue: Promise<void> = Promise.resolve();
38
+ private stopping = false;
39
+ private readonly shutdown = () => void this.stop();
40
+
41
+ constructor(config: SeekxPluginConfig) {
42
+ this.config = config;
43
+ }
44
+
45
+ /**
46
+ * Open the database, register collections, run initial background indexing,
47
+ * and start the file watcher. Idempotent — safe to call multiple times.
48
+ */
49
+ async start(): Promise<void> {
50
+ if (this.startPromise) return this.startPromise;
51
+ this.stopping = false;
52
+ this.stopPromise = null;
53
+ this.startPromise = this._start();
54
+ return this.startPromise;
55
+ }
56
+
57
+ async waitForSearchReady(): Promise<void> {
58
+ await this.start();
59
+ await this.initialIndexPromise;
60
+ }
61
+
62
+ async resolveReadablePath(path: string): Promise<string | null> {
63
+ await this.start();
64
+ const normalizedPath = resolve(path);
65
+ for (const collection of this.store.listCollections()) {
66
+ if (this.store.findDocumentByPath(collection.name, normalizedPath)) {
67
+ return normalizedPath;
68
+ }
69
+ }
70
+ return null;
71
+ }
72
+
73
+ /** Queue a full incremental index pass and wait for completion. */
74
+ async _runFullIndex(): Promise<void> {
75
+ await this.start();
76
+ await this.queueFullIndex();
77
+ }
78
+
79
+ private async _start(): Promise<void> {
80
+ // Ensure the database directory exists before opening.
81
+ const dbDir = dirname(this.config.dbPath);
82
+ if (!existsSync(dbDir)) mkdirSync(dbDir, { recursive: true });
83
+
84
+ this.db = await openDatabase(this.config.dbPath);
85
+ const vecLoaded = await loadSqliteVec(this.db);
86
+ this.store = new Store(this.db, vecLoaded);
87
+
88
+ // Wire the store's LLM cache into the client so expand/rerank responses
89
+ // are persisted across gateway restarts (TTL 1 hour).
90
+ const llmCache = {
91
+ get: (key: string) => this.store.getCachedLLM(key),
92
+ set: (key: string, value: string, ttlSec?: number) =>
93
+ this.store.setCachedLLM(key, value, ttlSec),
94
+ };
95
+
96
+ const { embed, rerank, expand } = this.config;
97
+ this.client =
98
+ embed.baseUrl && embed.model
99
+ ? new SeekxClient(embed, rerank, expand, llmCache)
100
+ : null;
101
+
102
+ // Register OpenClaw's own memory files as a collection.
103
+ if (this.config.includeOpenClawMemory) {
104
+ const memPath = openClawMemoryPath();
105
+ if (existsSync(memPath)) {
106
+ this.store.addCollection({
107
+ name: OPENCLAW_MEMORY_COLLECTION,
108
+ path: memPath,
109
+ pattern: "**/*.md",
110
+ });
111
+ }
112
+ }
113
+
114
+ // Register user-configured extra paths.
115
+ for (const ep of this.config.extraPaths) {
116
+ const absPath = ep.path.replace(/^~/, homedir());
117
+ if (!existsSync(absPath)) continue;
118
+ this.store.addCollection({
119
+ name: ep.name,
120
+ path: absPath,
121
+ ...(ep.pattern ? { pattern: ep.pattern } : {}),
122
+ });
123
+ }
124
+
125
+ const collectionWatches: CollectionWatch[] = this.store
126
+ .listCollections()
127
+ .map((c) => ({ collection: c.name, rootPath: c.path }));
128
+
129
+ this.watcher = new Watcher(this.store, this.client, collectionWatches, {
130
+ debounceMs: 1000,
131
+ });
132
+ this.watcher.start();
133
+
134
+ // The plugin keeps its own index database by default, so collection sync
135
+ // via a separate seekx CLI process is not a supported workflow.
136
+ if (this.config.refreshIntervalMs > 0) {
137
+ this.refreshTimer = setInterval(() => {
138
+ void this.queueFullIndex();
139
+ }, this.config.refreshIntervalMs);
140
+ }
141
+
142
+ process.once("SIGTERM", this.shutdown);
143
+ process.once("exit", this.shutdown);
144
+
145
+ // Startup stays non-blocking for plugin activation, but searches wait for
146
+ // this initial pass so they do not incorrectly return an empty result set
147
+ // before the first index has been built.
148
+ this.initialIndexPromise = this.queueFullIndex();
149
+ }
150
+
151
+ private queueFullIndex(): Promise<void> {
152
+ if (this.stopping) return Promise.resolve();
153
+ const run = this.indexQueue.then(async () => {
154
+ if (this.stopping) return;
155
+ await this.runFullIndexNow();
156
+ });
157
+ this.indexQueue = run.catch(() => {});
158
+ return run;
159
+ }
160
+
161
+ /** Run incremental indexing across all registered collections immediately. */
162
+ private async runFullIndexNow(): Promise<void> {
163
+ const collections = this.store.listCollections();
164
+ for (const col of collections) {
165
+ if (this.stopping) break;
166
+ try {
167
+ await indexDirectory(
168
+ this.store,
169
+ this.client,
170
+ col.name,
171
+ col.path,
172
+ col.pattern,
173
+ col.ignore_json ? (JSON.parse(col.ignore_json) as string[]) : [],
174
+ );
175
+ } catch (err) {
176
+ console.error(`[seekx-openclaw] indexDirectory error for "${col.name}": ${err}`);
177
+ }
178
+ }
179
+ }
180
+
181
+ async stop(): Promise<void> {
182
+ if (this.stopPromise) return this.stopPromise;
183
+ if (!this.startPromise) return;
184
+
185
+ this.stopping = true;
186
+ this.stopPromise = (async () => {
187
+ if (this.refreshTimer !== null) {
188
+ clearInterval(this.refreshTimer);
189
+ this.refreshTimer = null;
190
+ }
191
+ await this.watcher?.stop();
192
+ this.watcher = null;
193
+ await this.indexQueue.catch(() => {});
194
+ this.store?.close();
195
+ this.client = null;
196
+ process.off("SIGTERM", this.shutdown);
197
+ process.off("exit", this.shutdown);
198
+ this.startPromise = null;
199
+ })();
200
+
201
+ return this.stopPromise;
202
+ }
203
+ }
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Type stubs for the OpenClaw plugin SDK.
3
+ *
4
+ * These declarations cover the narrow surface used by this plugin.
5
+ * The actual types are provided at runtime by the installed openclaw package.
6
+ * Generated from docs.openclaw.ai/plugins/sdk-overview (2026-04).
7
+ */
8
+
9
+ declare module "openclaw/plugin-sdk/plugin-entry" {
10
+ export interface MemorySearchResult {
11
+ path: string;
12
+ content: string;
13
+ score: number;
14
+ collection?: string;
15
+ title?: string | null;
16
+ }
17
+
18
+ export interface MemorySearchOpts {
19
+ limit?: number;
20
+ collection?: string;
21
+ }
22
+
23
+ /**
24
+ * Subset of MemoryProviderStatus returned by the real OpenClaw SDK.
25
+ * `status()` is called synchronously by OpenClaw's status scanner.
26
+ */
27
+ export interface BackendStatus {
28
+ backend: string;
29
+ provider?: string;
30
+ dbPath?: string;
31
+ /** Document (file) count — maps to MemoryProviderStatus.files. */
32
+ files?: number;
33
+ chunks?: number;
34
+ /** Legacy alias kept for backward compat with older test assertions. */
35
+ documents?: number;
36
+ embeddedChunks?: number;
37
+ vectorSearchAvailable?: boolean;
38
+ embedModel?: string | null;
39
+ collections?: Array<{ name: string; path: string; docCount: number }>;
40
+ vector?: { enabled: boolean; available?: boolean };
41
+ custom?: Record<string, unknown>;
42
+ }
43
+
44
+ /**
45
+ * The interface OpenClaw calls when memory_search / memory_get fire.
46
+ * Inferred from Honcho plugin source and openclaw/plugin-sdk/memory-host-search.
47
+ */
48
+ export interface MemorySearchManager {
49
+ search(query: string, opts: MemorySearchOpts): Promise<MemorySearchResult[]>;
50
+ readFile(path: string): Promise<string>;
51
+ /** Synchronous — called without await by OpenClaw's status scanner. */
52
+ status(): BackendStatus;
53
+ probeEmbeddingAvailability(): Promise<boolean>;
54
+ probeVectorAvailability(): Promise<boolean>;
55
+ }
56
+
57
+ /**
58
+ * Shape passed to api.registerMemoryRuntime().
59
+ * Legacy-compatible API; registerMemoryCapability() is preferred
60
+ * but its interface is not yet publicly documented (2026-04).
61
+ */
62
+ export interface MemoryRuntimeRegistration {
63
+ getMemorySearchManager(): { manager: MemorySearchManager };
64
+ resolveMemoryBackendConfig(raw: unknown): unknown;
65
+ }
66
+
67
+ export interface PluginServiceContext {}
68
+
69
+ export interface PluginService {
70
+ /** Unique service identifier required by OpenClaw's service registry. */
71
+ id: string;
72
+ start?: (ctx: PluginServiceContext) => Promise<void> | void;
73
+ stop?: (ctx: PluginServiceContext) => Promise<void> | void;
74
+ }
75
+
76
+ export interface PluginLogger {
77
+ debug(msg: string, ...args: unknown[]): void;
78
+ info(msg: string, ...args: unknown[]): void;
79
+ warn(msg: string, ...args: unknown[]): void;
80
+ error(msg: string, ...args: unknown[]): void;
81
+ }
82
+
83
+ export interface OpenClawPluginApi {
84
+ /** Plugin id as declared in openclaw.plugin.json. */
85
+ id: string;
86
+ /** Plugin-specific config from plugins.entries.<id>.config. */
87
+ pluginConfig: Record<string, unknown>;
88
+ /** Scoped logger. */
89
+ logger: PluginLogger;
90
+
91
+ /**
92
+ * Register a memory runtime adapter (legacy-compatible exclusive slot).
93
+ * Sets this plugin as the active memory backend.
94
+ */
95
+ registerMemoryRuntime(registration: MemoryRuntimeRegistration): void;
96
+
97
+ /**
98
+ * Register a long-lived background service.
99
+ * stop() is called on gateway shutdown, giving the service a chance
100
+ * to flush and close resources.
101
+ */
102
+ registerService(service: PluginService): void;
103
+ }
104
+
105
+ export interface DefinePluginEntryOptions {
106
+ /** Must match the id in openclaw.plugin.json. */
107
+ id: string;
108
+ name: string;
109
+ description: string;
110
+ /** Set to "memory" to participate in plugins.slots.memory. */
111
+ kind?: string;
112
+ register(api: OpenClawPluginApi): void;
113
+ }
114
+
115
+ export function definePluginEntry(opts: DefinePluginEntryOptions): unknown;
116
+ }
package/src/runtime.ts ADDED
@@ -0,0 +1,141 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { hybridSearch } from "seekx-core";
3
+ import type {
4
+ MemorySearchManager,
5
+ MemorySearchResult,
6
+ MemorySearchOpts,
7
+ } from "openclaw/plugin-sdk/plugin-entry";
8
+ import type { SeekxLifecycle } from "./lifecycle.ts";
9
+ import { readPersistedSeekxStatusSync } from "./status-db.ts";
10
+
11
+ type StatusSnapshot = {
12
+ totalDocuments: number;
13
+ totalChunks: number;
14
+ embeddedChunks: number;
15
+ vectorSearchAvailable: boolean;
16
+ embedModel: string | null;
17
+ collections: Array<{
18
+ name: string;
19
+ path: string;
20
+ docCount: number;
21
+ }>;
22
+ };
23
+
24
+ function buildStatusResponse(lc: SeekxLifecycle, snapshot: StatusSnapshot) {
25
+ return {
26
+ backend: "seekx" as const,
27
+ provider: "seekx",
28
+ dbPath: lc.config.dbPath,
29
+ files: snapshot.totalDocuments,
30
+ chunks: snapshot.totalChunks,
31
+ vector: {
32
+ enabled: snapshot.vectorSearchAvailable,
33
+ available: snapshot.vectorSearchAvailable,
34
+ },
35
+ custom: {
36
+ embeddedChunks: snapshot.embeddedChunks,
37
+ embedModel: snapshot.embedModel,
38
+ collections: snapshot.collections.map((c) => ({
39
+ name: c.name,
40
+ path: c.path,
41
+ docCount: c.docCount,
42
+ })),
43
+ },
44
+ };
45
+ }
46
+
47
+ /**
48
+ * Build the MemorySearchManager that OpenClaw's runtime calls for
49
+ * memory_search and memory_get tool invocations.
50
+ *
51
+ * Precondition: lc.start() must have been called before any method fires.
52
+ */
53
+ export function buildMemorySearchManager(lc: SeekxLifecycle): { manager: MemorySearchManager } {
54
+ const manager: MemorySearchManager = {
55
+ /**
56
+ * memory_search implementation.
57
+ *
58
+ * Routes through seekx's full hybrid pipeline:
59
+ * query expansion → BM25 + vector kNN → RRF fusion → cross-encoder rerank
60
+ *
61
+ * Each stage degrades gracefully when the required service is unavailable:
62
+ * - no expand model → original query only
63
+ * - no embed / sqlite-vec → BM25-only
64
+ * - no rerank model → RRF-ranked order used directly
65
+ */
66
+ async search(query: string, opts: MemorySearchOpts): Promise<MemorySearchResult[]> {
67
+ await lc.waitForSearchReady();
68
+ const limit = opts.limit ?? lc.config.searchLimit;
69
+ const { results } = await hybridSearch(lc.store, lc.client, query, {
70
+ limit,
71
+ mode: "hybrid",
72
+ useExpand: lc.config.expand !== null,
73
+ useRerank: lc.config.rerank !== null,
74
+ minResultScore: 0.01,
75
+ ...(opts.collection ? { collections: [opts.collection] } : {}),
76
+ });
77
+
78
+ return results.map((r) => ({
79
+ path: r.file,
80
+ content: r.snippet,
81
+ score: r.score,
82
+ collection: r.collection,
83
+ title: r.title ?? null,
84
+ }));
85
+ },
86
+
87
+ /**
88
+ * memory_get implementation.
89
+ *
90
+ * Reads the live file from disk rather than the indexed snapshot, ensuring
91
+ * the agent always sees the current version of a document.
92
+ * Returns an empty string if the file has been deleted since indexing.
93
+ */
94
+ async readFile(path: string): Promise<string> {
95
+ const readablePath = await lc.resolveReadablePath(path);
96
+ if (!readablePath) return "";
97
+ try {
98
+ return readFileSync(readablePath, "utf-8");
99
+ } catch {
100
+ return "";
101
+ }
102
+ },
103
+
104
+ /**
105
+ * status() is called SYNCHRONOUSLY by OpenClaw's status scanner.
106
+ *
107
+ * Field names follow MemoryProviderStatus (the real SDK type):
108
+ * - files → document count
109
+ * - chunks → chunk count
110
+ *
111
+ * When the lifecycle has not yet completed start() (fresh process, status
112
+ * probe runs before the DB is open), fall back to a direct SQLite read of
113
+ * the persisted index state.
114
+ */
115
+ status() {
116
+ const snapshot = lc.store?.getStatus() ?? readPersistedSeekxStatusSync(lc.config.dbPath);
117
+ if (!snapshot) {
118
+ return {
119
+ backend: "seekx" as const,
120
+ provider: "seekx",
121
+ dbPath: lc.config.dbPath,
122
+ files: 0,
123
+ chunks: 0,
124
+ };
125
+ }
126
+ return buildStatusResponse(lc, snapshot);
127
+ },
128
+
129
+ async probeEmbeddingAvailability(): Promise<boolean> {
130
+ await lc.start();
131
+ return lc.client !== null;
132
+ },
133
+
134
+ async probeVectorAvailability(): Promise<boolean> {
135
+ await lc.start();
136
+ return lc.store.getStatus().vectorSearchAvailable;
137
+ },
138
+ };
139
+
140
+ return { manager };
141
+ }
@@ -0,0 +1,178 @@
1
+ import { existsSync } from "node:fs";
2
+ import { createRequire } from "node:module";
3
+
4
+ const require = createRequire(import.meta.url);
5
+
6
+ type SqliteStatement = {
7
+ get(...params: unknown[]): unknown;
8
+ all(...params: unknown[]): unknown;
9
+ };
10
+
11
+ type SqliteDatabase = {
12
+ prepare(sql: string): SqliteStatement;
13
+ close(): void;
14
+ };
15
+
16
+ type BetterSqlite3Ctor = new (
17
+ path: string,
18
+ options?: {
19
+ readonly?: boolean;
20
+ fileMustExist?: boolean;
21
+ timeout?: number;
22
+ },
23
+ ) => SqliteDatabase;
24
+
25
+ type BunSqliteCtor = new (
26
+ path: string,
27
+ options?: {
28
+ readonly?: boolean;
29
+ create?: boolean;
30
+ },
31
+ ) => {
32
+ query(sql: string): SqliteStatement;
33
+ close(): void;
34
+ };
35
+
36
+ export interface PersistedSeekxStatus {
37
+ totalDocuments: number;
38
+ totalChunks: number;
39
+ embeddedChunks: number;
40
+ vectorSearchAvailable: boolean;
41
+ embedModel: string | null;
42
+ collections: Array<{
43
+ name: string;
44
+ path: string;
45
+ docCount: number;
46
+ chunkCount: number;
47
+ }>;
48
+ }
49
+
50
+ function toNumber(value: unknown): number {
51
+ if (typeof value === "number") return value;
52
+ if (typeof value === "bigint") return Number(value);
53
+ if (typeof value === "string") {
54
+ const parsed = Number.parseInt(value, 10);
55
+ return Number.isNaN(parsed) ? 0 : parsed;
56
+ }
57
+ return 0;
58
+ }
59
+
60
+ function readTableNames(db: SqliteDatabase): Set<string> {
61
+ const rows = db
62
+ .prepare("SELECT name FROM sqlite_master WHERE type = 'table'")
63
+ .all() as Array<{ name?: unknown }>;
64
+ return new Set(
65
+ rows
66
+ .map((row) => row.name)
67
+ .filter((name): name is string => typeof name === "string" && name.length > 0),
68
+ );
69
+ }
70
+
71
+ function readMetaValue(db: SqliteDatabase, key: string): string | null {
72
+ const row = db.prepare("SELECT value FROM meta WHERE key = ?").get(key) as { value?: unknown } | null;
73
+ return typeof row?.value === "string" ? row.value : null;
74
+ }
75
+
76
+ function isBunRuntime(): boolean {
77
+ return typeof (process.versions as { bun?: string }).bun === "string";
78
+ }
79
+
80
+ function openReadonlyDatabase(dbPath: string): SqliteDatabase {
81
+ if (isBunRuntime()) {
82
+ const { Database } = require("bun:sqlite") as { Database: BunSqliteCtor };
83
+ const db = new Database(dbPath, { readonly: true, create: false });
84
+ return {
85
+ prepare(sql: string) {
86
+ return db.query(sql);
87
+ },
88
+ close() {
89
+ db.close();
90
+ },
91
+ };
92
+ }
93
+
94
+ const coreRequire = createRequire(require.resolve("seekx-core"));
95
+ const BetterSqlite3 = coreRequire("better-sqlite3") as BetterSqlite3Ctor;
96
+ return new BetterSqlite3(dbPath, {
97
+ readonly: true,
98
+ fileMustExist: true,
99
+ timeout: 1000,
100
+ });
101
+ }
102
+
103
+ /**
104
+ * Read persisted seekx index status directly from SQLite without constructing
105
+ * Store. This is used by short-lived OpenClaw CLI probes where lifecycle.start()
106
+ * has not finished yet, but the gateway has already indexed content.
107
+ */
108
+ export function readPersistedSeekxStatusSync(dbPath: string): PersistedSeekxStatus | null {
109
+ if (!existsSync(dbPath)) return null;
110
+
111
+ let db: SqliteDatabase | null = null;
112
+ try {
113
+ db = openReadonlyDatabase(dbPath);
114
+
115
+ const tables = readTableNames(db);
116
+ const hasCoreTables =
117
+ tables.has("collections") && tables.has("documents") && tables.has("chunks");
118
+ if (!hasCoreTables) return null;
119
+
120
+ const totalDocuments = toNumber(
121
+ (db.prepare("SELECT COUNT(*) AS n FROM documents").get() as { n?: unknown } | null)?.n,
122
+ );
123
+ const totalChunks = toNumber(
124
+ (db.prepare("SELECT COUNT(*) AS n FROM chunks").get() as { n?: unknown } | null)?.n,
125
+ );
126
+
127
+ let embeddedChunks = 0;
128
+ if (tables.has("vec_chunks")) {
129
+ try {
130
+ embeddedChunks = toNumber(
131
+ (db.prepare("SELECT COUNT(*) AS n FROM vec_chunks").get() as { n?: unknown } | null)?.n,
132
+ );
133
+ } catch {
134
+ embeddedChunks = 0;
135
+ }
136
+ }
137
+
138
+ const embedModel = tables.has("meta") ? readMetaValue(db, "embed_model") : null;
139
+ const embedDim = tables.has("meta") ? readMetaValue(db, "embed_dim") : null;
140
+ const vectorSearchAvailable = tables.has("vec_chunks") && embedDim !== null;
141
+
142
+ const collections = db
143
+ .prepare(
144
+ `SELECT c.name, c.path,
145
+ COUNT(DISTINCT d.id) AS doc_count,
146
+ COUNT(ch.id) AS chunk_count
147
+ FROM collections c
148
+ LEFT JOIN documents d ON d.collection = c.name
149
+ LEFT JOIN chunks ch ON ch.doc_id = d.id
150
+ GROUP BY c.name
151
+ ORDER BY c.name`,
152
+ )
153
+ .all() as Array<{
154
+ name?: unknown;
155
+ path?: unknown;
156
+ doc_count?: unknown;
157
+ chunk_count?: unknown;
158
+ }>;
159
+
160
+ return {
161
+ totalDocuments,
162
+ totalChunks,
163
+ embeddedChunks,
164
+ vectorSearchAvailable,
165
+ embedModel,
166
+ collections: collections.map((row) => ({
167
+ name: typeof row.name === "string" ? row.name : "",
168
+ path: typeof row.path === "string" ? row.path : "",
169
+ docCount: toNumber(row.doc_count),
170
+ chunkCount: toNumber(row.chunk_count),
171
+ })),
172
+ };
173
+ } catch {
174
+ return null;
175
+ } finally {
176
+ db?.close();
177
+ }
178
+ }