@memtensor/memos-local-openclaw-plugin 1.0.2-beta.5 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capture/index.js +52 -8
- package/dist/capture/index.js.map +1 -1
- package/dist/ingest/chunker.d.ts +3 -4
- package/dist/ingest/chunker.d.ts.map +1 -1
- package/dist/ingest/chunker.js +19 -24
- package/dist/ingest/chunker.js.map +1 -1
- package/dist/ingest/providers/anthropic.d.ts +3 -1
- package/dist/ingest/providers/anthropic.d.ts.map +1 -1
- package/dist/ingest/providers/anthropic.js +79 -39
- package/dist/ingest/providers/anthropic.js.map +1 -1
- package/dist/ingest/providers/bedrock.d.ts +3 -1
- package/dist/ingest/providers/bedrock.d.ts.map +1 -1
- package/dist/ingest/providers/bedrock.js +79 -39
- package/dist/ingest/providers/bedrock.js.map +1 -1
- package/dist/ingest/providers/gemini.d.ts +3 -1
- package/dist/ingest/providers/gemini.d.ts.map +1 -1
- package/dist/ingest/providers/gemini.js +77 -39
- package/dist/ingest/providers/gemini.js.map +1 -1
- package/dist/ingest/providers/index.d.ts +3 -1
- package/dist/ingest/providers/index.d.ts.map +1 -1
- package/dist/ingest/providers/index.js +70 -30
- package/dist/ingest/providers/index.js.map +1 -1
- package/dist/ingest/providers/openai.d.ts +3 -1
- package/dist/ingest/providers/openai.d.ts.map +1 -1
- package/dist/ingest/providers/openai.js +80 -39
- package/dist/ingest/providers/openai.js.map +1 -1
- package/dist/ingest/task-processor.d.ts +1 -0
- package/dist/ingest/task-processor.d.ts.map +1 -1
- package/dist/ingest/task-processor.js +33 -9
- package/dist/ingest/task-processor.js.map +1 -1
- package/dist/ingest/worker.d.ts.map +1 -1
- package/dist/ingest/worker.js +29 -13
- package/dist/ingest/worker.js.map +1 -1
- package/dist/recall/engine.d.ts.map +1 -1
- package/dist/recall/engine.js +19 -14
- package/dist/recall/engine.js.map +1 -1
- package/dist/skill/bundled-memory-guide.d.ts +1 -5
- package/dist/skill/bundled-memory-guide.d.ts.map +1 -1
- package/dist/skill/bundled-memory-guide.js +38 -97
- package/dist/skill/bundled-memory-guide.js.map +1 -1
- package/dist/skill/evaluator.js +1 -1
- package/dist/storage/sqlite.d.ts +1 -2
- package/dist/storage/sqlite.d.ts.map +1 -1
- package/dist/storage/sqlite.js +90 -17
- package/dist/storage/sqlite.js.map +1 -1
- package/dist/tools/memory-get.d.ts.map +1 -1
- package/dist/tools/memory-get.js +1 -3
- package/dist/tools/memory-get.js.map +1 -1
- package/dist/types.d.ts +2 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/types.js.map +1 -1
- package/dist/update-check.d.ts +21 -0
- package/dist/update-check.d.ts.map +1 -0
- package/dist/update-check.js +111 -0
- package/dist/update-check.js.map +1 -0
- package/dist/viewer/html.d.ts.map +1 -1
- package/dist/viewer/html.js +444 -182
- package/dist/viewer/html.js.map +1 -1
- package/dist/viewer/server.d.ts +1 -1
- package/dist/viewer/server.d.ts.map +1 -1
- package/dist/viewer/server.js +142 -78
- package/dist/viewer/server.js.map +1 -1
- package/index.ts +206 -198
- package/openclaw.plugin.json +3 -0
- package/package.json +5 -1
- package/scripts/postinstall.cjs +69 -2
- package/skill/memos-memory-guide/SKILL.md +73 -36
- package/src/capture/index.ts +52 -8
- package/src/ingest/chunker.ts +22 -30
- package/src/ingest/providers/anthropic.ts +89 -41
- package/src/ingest/providers/bedrock.ts +90 -41
- package/src/ingest/providers/gemini.ts +89 -41
- package/src/ingest/providers/index.ts +81 -35
- package/src/ingest/providers/openai.ts +90 -41
- package/src/ingest/task-processor.ts +29 -8
- package/src/ingest/worker.ts +31 -13
- package/src/recall/engine.ts +20 -13
- package/src/skill/bundled-memory-guide.ts +5 -96
- package/src/skill/evaluator.ts +1 -1
- package/src/storage/sqlite.ts +93 -21
- package/src/tools/memory-get.ts +1 -4
- package/src/types.ts +2 -9
- package/src/update-check.ts +96 -0
- package/src/viewer/html.ts +444 -182
- package/src/viewer/server.ts +101 -66
package/openclaw.plugin.json
CHANGED
|
@@ -4,6 +4,9 @@
|
|
|
4
4
|
"description": "Full-write local conversation memory with hybrid search (RRF + MMR + recency). Provides memory_search, memory_get, task_summary, memory_timeline, memory_viewer for layered retrieval.",
|
|
5
5
|
"kind": "memory",
|
|
6
6
|
"version": "0.1.11",
|
|
7
|
+
"skills": [
|
|
8
|
+
"skill/memos-memory-guide"
|
|
9
|
+
],
|
|
7
10
|
"homepage": "https://github.com/MemTensor/MemOS/tree/main/apps/memos-local-openclaw",
|
|
8
11
|
"configSchema": {
|
|
9
12
|
"type": "object",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@memtensor/memos-local-openclaw-plugin",
|
|
3
|
-
"version": "1.0.2
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"description": "MemOS Local memory plugin for OpenClaw — full-write, hybrid-recall, progressive retrieval",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -20,6 +20,9 @@
|
|
|
20
20
|
"extensions": [
|
|
21
21
|
"./index.ts"
|
|
22
22
|
],
|
|
23
|
+
"skills": [
|
|
24
|
+
"skill/memos-memory-guide"
|
|
25
|
+
],
|
|
23
26
|
"installDependencies": true
|
|
24
27
|
},
|
|
25
28
|
"scripts": {
|
|
@@ -49,6 +52,7 @@
|
|
|
49
52
|
"better-sqlite3": "^12.6.2",
|
|
50
53
|
"posthog-node": "^5.28.0",
|
|
51
54
|
"puppeteer": "^24.38.0",
|
|
55
|
+
"semver": "^7.7.4",
|
|
52
56
|
"uuid": "^10.0.0"
|
|
53
57
|
},
|
|
54
58
|
"devDependencies": {
|
package/scripts/postinstall.cjs
CHANGED
|
@@ -270,10 +270,77 @@ try {
|
|
|
270
270
|
}
|
|
271
271
|
|
|
272
272
|
/* ═══════════════════════════════════════════════════════════
|
|
273
|
-
* Phase 2:
|
|
273
|
+
* Phase 2: Install bundled skill (memos-memory-guide)
|
|
274
274
|
* ═══════════════════════════════════════════════════════════ */
|
|
275
275
|
|
|
276
|
-
|
|
276
|
+
function installBundledSkill() {
|
|
277
|
+
phase(2, "安装记忆技能 / Install memory skill");
|
|
278
|
+
|
|
279
|
+
const home = process.env.HOME || process.env.USERPROFILE || "";
|
|
280
|
+
if (!home) { warn("Cannot determine HOME directory, skipping skill install."); return; }
|
|
281
|
+
|
|
282
|
+
const skillSrc = path.join(pluginDir, "skill", "memos-memory-guide", "SKILL.md");
|
|
283
|
+
if (!fs.existsSync(skillSrc)) {
|
|
284
|
+
warn("Bundled SKILL.md not found, skipping skill install.");
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
let pluginVersion = "0.0.0";
|
|
289
|
+
try {
|
|
290
|
+
const pkg = JSON.parse(fs.readFileSync(path.join(pluginDir, "package.json"), "utf-8"));
|
|
291
|
+
pluginVersion = pkg.version || pluginVersion;
|
|
292
|
+
} catch { /* ignore */ }
|
|
293
|
+
|
|
294
|
+
const skillContent = fs.readFileSync(skillSrc, "utf-8");
|
|
295
|
+
const targets = [
|
|
296
|
+
path.join(home, ".openclaw", "workspace", "skills", "memos-memory-guide"),
|
|
297
|
+
path.join(home, ".openclaw", "skills", "memos-memory-guide"),
|
|
298
|
+
];
|
|
299
|
+
|
|
300
|
+
const meta = JSON.stringify({ ownerId: "memos-local-openclaw-plugin", slug: "memos-memory-guide", version: pluginVersion, publishedAt: Date.now() });
|
|
301
|
+
const origin = JSON.stringify({ version: 1, registry: "memos-local-openclaw-plugin", slug: "memos-memory-guide", installedVersion: pluginVersion, installedAt: Date.now() });
|
|
302
|
+
|
|
303
|
+
for (const dest of targets) {
|
|
304
|
+
try {
|
|
305
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
306
|
+
fs.writeFileSync(path.join(dest, "SKILL.md"), skillContent, "utf-8");
|
|
307
|
+
fs.writeFileSync(path.join(dest, "_meta.json"), meta, "utf-8");
|
|
308
|
+
const clawHubDir = path.join(dest, ".clawhub");
|
|
309
|
+
fs.mkdirSync(clawHubDir, { recursive: true });
|
|
310
|
+
fs.writeFileSync(path.join(clawHubDir, "origin.json"), origin, "utf-8");
|
|
311
|
+
ok(`Skill installed → ${DIM}${dest}${RESET}`);
|
|
312
|
+
} catch (e) {
|
|
313
|
+
warn(`Could not install skill to ${dest}: ${e.message}`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Register in skills-lock.json so OpenClaw Dashboard can discover it
|
|
318
|
+
const lockPath = path.join(home, ".openclaw", "workspace", "skills-lock.json");
|
|
319
|
+
try {
|
|
320
|
+
let lockData = { version: 1, skills: {} };
|
|
321
|
+
if (fs.existsSync(lockPath)) {
|
|
322
|
+
lockData = JSON.parse(fs.readFileSync(lockPath, "utf-8"));
|
|
323
|
+
}
|
|
324
|
+
if (!lockData.skills) lockData.skills = {};
|
|
325
|
+
lockData.skills["memos-memory-guide"] = { source: "memos-local-openclaw-plugin", sourceType: "plugin", computedHash: "" };
|
|
326
|
+
fs.writeFileSync(lockPath, JSON.stringify(lockData, null, 2) + "\n", "utf-8");
|
|
327
|
+
ok("Registered in skills-lock.json");
|
|
328
|
+
} catch (e) {
|
|
329
|
+
warn(`Could not update skills-lock.json: ${e.message}`);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
try {
|
|
334
|
+
installBundledSkill();
|
|
335
|
+
} catch (e) {
|
|
336
|
+
warn(`Skill install error: ${e.message}`);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/* ═══════════════════════════════════════════════════════════
|
|
340
|
+
* Phase 3: Verify better-sqlite3 native module
|
|
341
|
+
* ═══════════════════════════════════════════════════════════ */
|
|
342
|
+
|
|
343
|
+
phase(3, "检查 better-sqlite3 原生模块 / Check native module");
|
|
277
344
|
|
|
278
345
|
const sqliteModulePath = path.join(pluginDir, "node_modules", "better-sqlite3");
|
|
279
346
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: memos-memory-guide
|
|
3
|
-
description: Use the MemOS Local memory system to search and use the user's past conversations. Use this skill whenever the user refers to past chats, their own preferences or history, or when you need to answer from prior context. When auto-recall returns nothing (long or unclear user query), generate your own short search query and call memory_search.
|
|
3
|
+
description: "Use the MemOS Local memory system to search and use the user's past conversations. Use this skill whenever the user refers to past chats, their own preferences or history, or when you need to answer from prior context. When auto-recall returns nothing (long or unclear user query), generate your own short search query and call memory_search. Available tools: memory_search, memory_get, memory_write_public, task_summary, skill_get, skill_search, skill_install, skill_publish, skill_unpublish, memory_timeline, memory_viewer."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# MemOS Local Memory — Agent Guide
|
|
@@ -17,91 +17,119 @@ This skill describes how to use the MemOS memory tools so you can reliably searc
|
|
|
17
17
|
|
|
18
18
|
### memory_search
|
|
19
19
|
|
|
20
|
-
- **What it does:**
|
|
20
|
+
- **What it does:** Search long-term conversation memory for past conversations, user preferences, decisions, and experiences. Returns relevant excerpts with `chunkId` and optionally `task_id`. Only returns memories belonging to the current agent or marked as public.
|
|
21
21
|
- **When to call:**
|
|
22
22
|
- The automatic recall did not run or returned nothing.
|
|
23
23
|
- The user's query is long or unclear — **generate a short query yourself** and call `memory_search(query="...")`.
|
|
24
24
|
- You need to search with a different angle (e.g. filter by `role='user'`).
|
|
25
|
-
- **Parameters:**
|
|
25
|
+
- **Parameters:**
|
|
26
|
+
- `query` (string, **required**) — Natural language search query.
|
|
27
|
+
- `maxResults` (number, optional) — Max results, default 20, max 20.
|
|
28
|
+
- `minScore` (number, optional) — Minimum score 0–1, default 0.45, floor 0.35.
|
|
29
|
+
- `role` (string, optional) — Filter by role: `'user'`, `'assistant'`, or `'tool'`. Use `'user'` to find what the user said.
|
|
30
|
+
|
|
31
|
+
### memory_get
|
|
32
|
+
|
|
33
|
+
- **What it does:** Get the full original text of a memory chunk. Use to verify exact details from a search hit.
|
|
34
|
+
- **When to call:** A `memory_search` hit looks relevant but you need to see the complete original content, not just the summary/excerpt.
|
|
35
|
+
- **Parameters:**
|
|
36
|
+
- `chunkId` (string, **required**) — The chunkId from a search hit.
|
|
37
|
+
- `maxChars` (number, optional) — Max characters to return (default 4000, max 12000).
|
|
26
38
|
|
|
27
39
|
### memory_write_public
|
|
28
40
|
|
|
29
|
-
- **What it does:**
|
|
30
|
-
- **When to call:** In multi-agent or collaborative scenarios, when you have
|
|
31
|
-
- **Parameters:**
|
|
41
|
+
- **What it does:** Write a piece of information to public memory. Public memories are visible to all agents during `memory_search`. Use for shared knowledge, team decisions, or cross-agent coordination information.
|
|
42
|
+
- **When to call:** In multi-agent or collaborative scenarios, when you have persistent information useful to everyone (e.g. shared decisions, conventions, configurations, workflows). Do not write session-only or purely private content.
|
|
43
|
+
- **Parameters:**
|
|
44
|
+
- `content` (string, **required**) — The content to write to public memory.
|
|
45
|
+
- `summary` (string, optional) — Short summary of the content.
|
|
32
46
|
|
|
33
47
|
### task_summary
|
|
34
48
|
|
|
35
|
-
- **What it does:**
|
|
36
|
-
- **When to call:** A `memory_search` hit included a `task_id` and you need the full
|
|
37
|
-
- **Parameters:**
|
|
49
|
+
- **What it does:** Get the detailed summary of a complete task: title, status, narrative summary, and related skills. Use when `memory_search` returns a hit with a `task_id` and you need the full story. Preserves critical information: URLs, file paths, commands, error codes, step-by-step instructions.
|
|
50
|
+
- **When to call:** A `memory_search` hit included a `task_id` and you need the full context of that task.
|
|
51
|
+
- **Parameters:**
|
|
52
|
+
- `taskId` (string, **required**) — The task_id from a memory_search hit.
|
|
38
53
|
|
|
39
54
|
### skill_get
|
|
40
55
|
|
|
41
|
-
- **What it does:**
|
|
56
|
+
- **What it does:** Retrieve a proven skill (experience guide) by `skillId` or by `taskId`. If you pass a `taskId`, the system will find the associated skill automatically.
|
|
42
57
|
- **When to call:** A search hit has a `task_id` and the task has a "how to do this again" guide. Use this to follow the same approach or reuse steps.
|
|
43
|
-
- **Parameters:**
|
|
58
|
+
- **Parameters:**
|
|
59
|
+
- `skillId` (string, optional) — Direct skill ID.
|
|
60
|
+
- `taskId` (string, optional) — Task ID — will look up the skill linked to this task.
|
|
61
|
+
- At least one of `skillId` or `taskId` must be provided.
|
|
44
62
|
|
|
45
63
|
### skill_search
|
|
46
64
|
|
|
47
|
-
- **What it does:**
|
|
48
|
-
- **When to call:** The current task requires a capability or guide you don't have. Use `skill_search` to find one first; after finding it, use `skill_get` to read it, then `skill_install` to load it for future turns.
|
|
49
|
-
- **Parameters:**
|
|
65
|
+
- **What it does:** Search available skills by natural language. Searches your own skills, public skills, or both — controlled by the `scope` parameter.
|
|
66
|
+
- **When to call:** The current task requires a capability or guide you don't have. Use `skill_search` to find one first; after finding it, use `skill_get` to read it, then `skill_install` to load it for future turns.
|
|
67
|
+
- **Parameters:**
|
|
68
|
+
- `query` (string, **required**) — Natural language description of the needed skill.
|
|
69
|
+
- `scope` (string, optional) — Search scope: `'mix'` (default, self + public), `'self'` (own only), `'public'` (public only).
|
|
50
70
|
|
|
51
71
|
### skill_install
|
|
52
72
|
|
|
53
|
-
- **What it does:**
|
|
73
|
+
- **What it does:** Install a learned skill into the agent workspace so it becomes permanently available. After installation, the skill will be loaded automatically in future sessions.
|
|
54
74
|
- **When to call:** After `skill_get` when the skill is useful for ongoing use.
|
|
55
|
-
- **Parameters:**
|
|
75
|
+
- **Parameters:**
|
|
76
|
+
- `skillId` (string, **required**) — The skill ID to install.
|
|
56
77
|
|
|
57
78
|
### skill_publish
|
|
58
79
|
|
|
59
|
-
- **What it does:**
|
|
80
|
+
- **What it does:** Make a skill public so other agents can discover and install it via `skill_search`.
|
|
60
81
|
- **When to call:** You have a useful skill that other agents could benefit from, and you want to share it.
|
|
61
|
-
- **Parameters:**
|
|
82
|
+
- **Parameters:**
|
|
83
|
+
- `skillId` (string, **required**) — The skill ID to publish.
|
|
62
84
|
|
|
63
85
|
### skill_unpublish
|
|
64
86
|
|
|
65
|
-
- **What it does:**
|
|
87
|
+
- **What it does:** Make a skill private again. Other agents will no longer be able to discover it.
|
|
66
88
|
- **When to call:** You want to stop sharing a previously published skill.
|
|
67
|
-
- **Parameters:**
|
|
89
|
+
- **Parameters:**
|
|
90
|
+
- `skillId` (string, **required**) — The skill ID to unpublish.
|
|
68
91
|
|
|
69
92
|
### memory_timeline
|
|
70
93
|
|
|
71
|
-
- **What it does:**
|
|
94
|
+
- **What it does:** Expand context around a memory search hit. Pass the `chunkId` from a search result to read the surrounding conversation messages.
|
|
72
95
|
- **When to call:** A `memory_search` hit is relevant but you need the surrounding dialogue.
|
|
73
|
-
- **Parameters:**
|
|
96
|
+
- **Parameters:**
|
|
97
|
+
- `chunkId` (string, **required**) — The chunkId from a memory_search hit.
|
|
98
|
+
- `window` (number, optional) — Context window ±N messages, default 2.
|
|
74
99
|
|
|
75
100
|
### memory_viewer
|
|
76
101
|
|
|
77
|
-
- **What it does:**
|
|
78
|
-
- **When to call:** The user asks
|
|
102
|
+
- **What it does:** Show the MemOS Memory Viewer URL. Call this when the user asks how to view, browse, manage, or check their memories. Returns the URL the user can open in their browser.
|
|
103
|
+
- **When to call:** The user asks where to see or manage their memories.
|
|
79
104
|
- **Parameters:** None.
|
|
80
105
|
|
|
81
106
|
## Quick decision flow
|
|
82
107
|
|
|
83
108
|
1. **No memories in context or auto-recall reported nothing**
|
|
84
|
-
→ Call `memory_search` with a **self-generated short query**.
|
|
109
|
+
→ Call `memory_search(query="...")` with a **self-generated short query**.
|
|
110
|
+
|
|
111
|
+
2. **Need to see the full original text of a search hit**
|
|
112
|
+
→ Call `memory_get(chunkId="...")`.
|
|
85
113
|
|
|
86
|
-
|
|
87
|
-
→ Call `task_summary(taskId)`.
|
|
114
|
+
3. **Search returned hits with `task_id` and you need full context**
|
|
115
|
+
→ Call `task_summary(taskId="...")`.
|
|
88
116
|
|
|
89
|
-
|
|
90
|
-
→ Call `skill_get(taskId
|
|
117
|
+
4. **Task has an experience guide you want to follow**
|
|
118
|
+
→ Call `skill_get(taskId="...")` or `skill_get(skillId="...")`. Optionally `skill_install(skillId="...")` for future use.
|
|
91
119
|
|
|
92
|
-
|
|
93
|
-
→ Call `memory_timeline(chunkId
|
|
120
|
+
5. **You need the exact surrounding conversation of a hit**
|
|
121
|
+
→ Call `memory_timeline(chunkId="...")`.
|
|
94
122
|
|
|
95
|
-
|
|
123
|
+
6. **You need a capability/guide that you don't have**
|
|
96
124
|
→ Call `skill_search(query="...", scope="mix")` to discover available skills.
|
|
97
125
|
|
|
98
|
-
|
|
126
|
+
7. **You have shared knowledge useful to all agents**
|
|
99
127
|
→ Call `memory_write_public(content="...")` to persist it in public memory.
|
|
100
128
|
|
|
101
|
-
|
|
102
|
-
→ Call `skill_publish(skillId
|
|
129
|
+
8. **You want to share/stop sharing a skill with other agents**
|
|
130
|
+
→ Call `skill_publish(skillId="...")` or `skill_unpublish(skillId="...")`.
|
|
103
131
|
|
|
104
|
-
|
|
132
|
+
9. **User asks where to see or manage their memories**
|
|
105
133
|
→ Call `memory_viewer()` and share the URL.
|
|
106
134
|
|
|
107
135
|
## Writing good search queries
|
|
@@ -110,3 +138,12 @@ This skill describes how to use the MemOS memory tools so you can reliably searc
|
|
|
110
138
|
- Use **concrete terms**: names, topics, tools, or decisions.
|
|
111
139
|
- If the user's message is long, **derive one or two sub-queries** rather than pasting the whole message.
|
|
112
140
|
- Use `role='user'` when you specifically want to find what the user said.
|
|
141
|
+
|
|
142
|
+
## Memory ownership and agent isolation
|
|
143
|
+
|
|
144
|
+
Each memory is tagged with an `owner` (e.g. `agent:main`, `agent:sales-bot`). This is handled **automatically** — you do not need to pass any owner parameter.
|
|
145
|
+
|
|
146
|
+
- **Your memories:** All tools (`memory_search`, `memory_get`, `memory_timeline`) automatically scope queries to your agent's own memories.
|
|
147
|
+
- **Public memories:** Memories marked as `public` are visible to all agents. Use `memory_write_public` to write shared knowledge.
|
|
148
|
+
- **Cross-agent isolation:** You cannot see memories owned by other agents (unless they are public).
|
|
149
|
+
- **How it works:** The system identifies your agent ID from the OpenClaw runtime context and applies owner filtering automatically on every search, recall, and retrieval.
|
package/src/capture/index.ts
CHANGED
|
@@ -193,14 +193,58 @@ function stripMemoryInjection(text: string): string {
|
|
|
193
193
|
"",
|
|
194
194
|
).trim();
|
|
195
195
|
|
|
196
|
-
//
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
const
|
|
202
|
-
if (
|
|
203
|
-
cleaned =
|
|
196
|
+
// Old format: ## Retrieved memories from past conversations\n\nCRITICAL INSTRUCTION:...
|
|
197
|
+
const recallIdx = cleaned.indexOf("## Retrieved memories from past conversations");
|
|
198
|
+
if (recallIdx !== -1) {
|
|
199
|
+
const before = cleaned.slice(0, recallIdx);
|
|
200
|
+
const after = cleaned.slice(recallIdx);
|
|
201
|
+
const tsMatch = after.match(/\n\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}/);
|
|
202
|
+
if (tsMatch && tsMatch.index != null) {
|
|
203
|
+
cleaned = (before + after.slice(tsMatch.index)).trim();
|
|
204
|
+
} else {
|
|
205
|
+
cleaned = before.trim();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// prependContext format: ## User's conversation history (from memory system)\n...
|
|
210
|
+
// Ends at last "Current time:" line or last chunkId= line, whichever comes later.
|
|
211
|
+
const prependIdx = cleaned.indexOf("## User's conversation history (from memory system)");
|
|
212
|
+
if (prependIdx !== -1) {
|
|
213
|
+
const before = cleaned.slice(0, prependIdx);
|
|
214
|
+
const after = cleaned.slice(prependIdx);
|
|
215
|
+
|
|
216
|
+
// Find the last anchor line that belongs to the injected block
|
|
217
|
+
const currentTimeMatch = after.match(/Current time:[^\n]*/g);
|
|
218
|
+
const chunkIdMatch = after.match(/chunkId="[^"]*"/g);
|
|
219
|
+
let cutPos = 0;
|
|
220
|
+
if (currentTimeMatch) {
|
|
221
|
+
const lastCt = after.lastIndexOf(currentTimeMatch[currentTimeMatch.length - 1]);
|
|
222
|
+
const lineEnd = after.indexOf("\n", lastCt);
|
|
223
|
+
cutPos = Math.max(cutPos, lineEnd !== -1 ? lineEnd + 1 : after.length);
|
|
224
|
+
}
|
|
225
|
+
if (chunkIdMatch) {
|
|
226
|
+
const lastCk = after.lastIndexOf(chunkIdMatch[chunkIdMatch.length - 1]);
|
|
227
|
+
const lineEnd = after.indexOf("\n", lastCk);
|
|
228
|
+
cutPos = Math.max(cutPos, lineEnd !== -1 ? lineEnd + 1 : after.length);
|
|
229
|
+
}
|
|
230
|
+
if (cutPos === 0) {
|
|
231
|
+
// No anchors found; remove everything from the header onward
|
|
232
|
+
cleaned = before.trim();
|
|
233
|
+
} else {
|
|
234
|
+
cleaned = (before + after.slice(cutPos)).trim();
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// New format: <memos_system_instruction>...</memos_system_instruction>\n\n📝 Related memories:...
|
|
239
|
+
const memosTagIdx = cleaned.indexOf("<memos_system_instruction>");
|
|
240
|
+
if (memosTagIdx !== -1) {
|
|
241
|
+
const before = cleaned.slice(0, memosTagIdx);
|
|
242
|
+
const after = cleaned.slice(memosTagIdx);
|
|
243
|
+
const tsMatch = after.match(/\n\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}/);
|
|
244
|
+
if (tsMatch && tsMatch.index != null) {
|
|
245
|
+
cleaned = (before + after.slice(tsMatch.index)).trim();
|
|
246
|
+
} else {
|
|
247
|
+
cleaned = before.trim();
|
|
204
248
|
}
|
|
205
249
|
}
|
|
206
250
|
|
package/src/ingest/chunker.ts
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import type { ChunkKind } from "../types";
|
|
2
|
-
|
|
3
1
|
export interface RawChunk {
|
|
4
2
|
content: string;
|
|
5
|
-
kind:
|
|
3
|
+
kind: "paragraph";
|
|
6
4
|
}
|
|
7
5
|
|
|
8
6
|
const MAX_CHUNK_CHARS = 3000;
|
|
@@ -24,32 +22,27 @@ const COMMAND_LINE_RE = /^(?:\$|>|#)\s+.+$/gm;
|
|
|
24
22
|
* Semantic-aware chunking:
|
|
25
23
|
* 1. Extract fenced code blocks as whole units (never split inside)
|
|
26
24
|
* 2. Detect unfenced code regions by brace-matching (functions/classes kept intact)
|
|
27
|
-
* 3. Extract error stacks, list blocks, command lines
|
|
25
|
+
* 3. Extract error stacks, list blocks, command lines as separate chunks
|
|
28
26
|
* 4. Split remaining prose at paragraph boundaries (double newline)
|
|
29
|
-
* 5. Merge short adjacent chunks
|
|
27
|
+
* 5. Merge short adjacent chunks
|
|
30
28
|
*/
|
|
31
29
|
export function chunkText(text: string): RawChunk[] {
|
|
32
30
|
let remaining = text;
|
|
33
|
-
const slots: Array<{ placeholder: string;
|
|
31
|
+
const slots: Array<{ placeholder: string; content: string }> = [];
|
|
34
32
|
let counter = 0;
|
|
35
33
|
|
|
36
|
-
function ph(content: string
|
|
34
|
+
function ph(content: string): string {
|
|
37
35
|
const tag = `\x00SLOT_${counter++}\x00`;
|
|
38
|
-
slots.push({ placeholder: tag,
|
|
36
|
+
slots.push({ placeholder: tag, content: content.trim() });
|
|
39
37
|
return tag;
|
|
40
38
|
}
|
|
41
39
|
|
|
42
|
-
remaining = remaining.replace(FENCED_CODE_RE, (m) => ph(m
|
|
43
|
-
|
|
40
|
+
remaining = remaining.replace(FENCED_CODE_RE, (m) => ph(m));
|
|
44
41
|
remaining = extractBraceBlocks(remaining, ph);
|
|
45
42
|
|
|
46
|
-
const structural:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
{ re: COMMAND_LINE_RE, kind: "command" },
|
|
50
|
-
];
|
|
51
|
-
for (const { re, kind } of structural) {
|
|
52
|
-
remaining = remaining.replace(re, (m) => ph(m, kind));
|
|
43
|
+
const structural: RegExp[] = [ERROR_STACK_RE, LIST_BLOCK_RE, COMMAND_LINE_RE];
|
|
44
|
+
for (const re of structural) {
|
|
45
|
+
remaining = remaining.replace(re, (m) => ph(m));
|
|
53
46
|
}
|
|
54
47
|
|
|
55
48
|
const raw: RawChunk[] = [];
|
|
@@ -64,7 +57,7 @@ export function chunkText(text: string): RawChunk[] {
|
|
|
64
57
|
for (const part of parts) {
|
|
65
58
|
const slot = slots.find((s) => s.placeholder === part);
|
|
66
59
|
if (slot) {
|
|
67
|
-
raw.push(slot.
|
|
60
|
+
raw.push({ content: slot.content, kind: "paragraph" });
|
|
68
61
|
} else if (part.trim().length >= MIN_CHUNK_CHARS) {
|
|
69
62
|
raw.push({ content: part.trim(), kind: "paragraph" });
|
|
70
63
|
}
|
|
@@ -75,8 +68,8 @@ export function chunkText(text: string): RawChunk[] {
|
|
|
75
68
|
}
|
|
76
69
|
|
|
77
70
|
for (const s of slots) {
|
|
78
|
-
if (!raw.some((c) => c.content === s.
|
|
79
|
-
raw.push(s.
|
|
71
|
+
if (!raw.some((c) => c.content === s.content)) {
|
|
72
|
+
raw.push({ content: s.content, kind: "paragraph" });
|
|
80
73
|
}
|
|
81
74
|
}
|
|
82
75
|
|
|
@@ -92,7 +85,7 @@ export function chunkText(text: string): RawChunk[] {
|
|
|
92
85
|
*/
|
|
93
86
|
function extractBraceBlocks(
|
|
94
87
|
text: string,
|
|
95
|
-
ph: (content: string
|
|
88
|
+
ph: (content: string) => string,
|
|
96
89
|
): string {
|
|
97
90
|
const lines = text.split("\n");
|
|
98
91
|
const result: string[] = [];
|
|
@@ -126,7 +119,7 @@ function extractBraceBlocks(
|
|
|
126
119
|
if (depth <= 0 || (BLOCK_CLOSE_RE.test(line) && depth <= 0)) {
|
|
127
120
|
const block = blockLines.join("\n");
|
|
128
121
|
if (block.trim().length >= MIN_CHUNK_CHARS) {
|
|
129
|
-
result.push(ph(block
|
|
122
|
+
result.push(ph(block));
|
|
130
123
|
} else {
|
|
131
124
|
result.push(block);
|
|
132
125
|
}
|
|
@@ -142,7 +135,7 @@ function extractBraceBlocks(
|
|
|
142
135
|
if (blockLines.length > 0) {
|
|
143
136
|
const block = blockLines.join("\n");
|
|
144
137
|
if (block.trim().length >= MIN_CHUNK_CHARS) {
|
|
145
|
-
result.push(ph(block
|
|
138
|
+
result.push(ph(block));
|
|
146
139
|
} else {
|
|
147
140
|
result.push(block);
|
|
148
141
|
}
|
|
@@ -171,11 +164,10 @@ function mergeSmallChunks(chunks: RawChunk[]): RawChunk[] {
|
|
|
171
164
|
continue;
|
|
172
165
|
}
|
|
173
166
|
|
|
174
|
-
const sameKind = buf.kind === c.kind;
|
|
175
167
|
const bothSmall = buf.content.length < IDEAL_CHUNK_CHARS && c.content.length < IDEAL_CHUNK_CHARS;
|
|
176
168
|
const mergedLen = buf.content.length + c.content.length + 2;
|
|
177
169
|
|
|
178
|
-
if (
|
|
170
|
+
if (bothSmall && mergedLen <= MAX_CHUNK_CHARS) {
|
|
179
171
|
buf.content = buf.content + "\n\n" + c.content;
|
|
180
172
|
} else {
|
|
181
173
|
merged.push(buf);
|
|
@@ -189,29 +181,29 @@ function mergeSmallChunks(chunks: RawChunk[]): RawChunk[] {
|
|
|
189
181
|
function splitOversized(chunks: RawChunk[]): RawChunk[] {
|
|
190
182
|
const result: RawChunk[] = [];
|
|
191
183
|
for (const c of chunks) {
|
|
192
|
-
if (c.content.length <= MAX_CHUNK_CHARS
|
|
184
|
+
if (c.content.length <= MAX_CHUNK_CHARS) {
|
|
193
185
|
result.push(c);
|
|
194
186
|
continue;
|
|
195
187
|
}
|
|
196
|
-
result.push(...splitAtSentenceBoundary(c.content
|
|
188
|
+
result.push(...splitAtSentenceBoundary(c.content));
|
|
197
189
|
}
|
|
198
190
|
return result;
|
|
199
191
|
}
|
|
200
192
|
|
|
201
|
-
function splitAtSentenceBoundary(text: string
|
|
193
|
+
function splitAtSentenceBoundary(text: string): RawChunk[] {
|
|
202
194
|
const sentences = text.match(/[^.!?。!?\n]+(?:[.!?。!?]+|\n{2,})/g) ?? [text];
|
|
203
195
|
const result: RawChunk[] = [];
|
|
204
196
|
let buf = "";
|
|
205
197
|
|
|
206
198
|
for (const s of sentences) {
|
|
207
199
|
if (buf.length + s.length > MAX_CHUNK_CHARS && buf.length > 0) {
|
|
208
|
-
result.push({ content: buf.trim(), kind });
|
|
200
|
+
result.push({ content: buf.trim(), kind: "paragraph" });
|
|
209
201
|
buf = "";
|
|
210
202
|
}
|
|
211
203
|
buf += s;
|
|
212
204
|
}
|
|
213
205
|
if (buf.trim().length >= MIN_CHUNK_CHARS) {
|
|
214
|
-
result.push({ content: buf.trim(), kind });
|
|
206
|
+
result.push({ content: buf.trim(), kind: "paragraph" });
|
|
215
207
|
}
|
|
216
208
|
return result;
|
|
217
209
|
}
|