memory-lancedb-pro 1.1.0-beta.6 → 1.1.0-beta.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.github/workflows/auto-assign.yml +33 -0
  2. package/.github/workflows/ci.yml +16 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +50 -0
  5. package/CHANGELOG-v1.1.0.md +227 -0
  6. package/CHANGELOG.md +13 -27
  7. package/README.md +600 -711
  8. package/README_CN.md +656 -683
  9. package/cli.ts +139 -14
  10. package/docs/CHANGELOG-v1.1.0.md +306 -0
  11. package/docs/memory_architecture_analysis.md +832 -0
  12. package/docs/openclaw-integration-playbook.md +334 -0
  13. package/docs/openclaw-integration-playbook.zh-CN.md +353 -0
  14. package/index.ts +1023 -581
  15. package/openclaw.plugin.json +176 -177
  16. package/package.json +5 -3
  17. package/scripts/sync-plugin-version.mjs +100 -0
  18. package/src/access-tracker.ts +13 -3
  19. package/src/decay-engine.ts +228 -0
  20. package/src/extraction-prompts.ts +213 -0
  21. package/src/llm-client.ts +124 -0
  22. package/src/memory-categories.ts +70 -0
  23. package/src/memory-upgrader.ts +387 -0
  24. package/src/noise-filter.ts +18 -0
  25. package/src/noise-prototypes.ts +163 -0
  26. package/src/reflection-metadata.ts +2 -1
  27. package/src/reflection-slices.ts +4 -7
  28. package/src/reflection-store.ts +177 -151
  29. package/src/retriever.ts +180 -43
  30. package/src/smart-extractor.ts +869 -0
  31. package/src/smart-metadata.ts +415 -0
  32. package/src/store.ts +298 -96
  33. package/src/tier-manager.ts +188 -0
  34. package/src/tools.ts +115 -27
  35. package/test/cli-smoke.mjs +158 -15
  36. package/test/config-session-strategy-migration.test.mjs +2 -2
  37. package/test/context-support-e2e.mjs +266 -0
  38. package/test/functional-e2e.mjs +323 -0
  39. package/test/memory-reflection.test.mjs +119 -1168
  40. package/test/openclaw-host-functional.mjs +317 -0
  41. package/test/plugin-manifest-regression.mjs +263 -0
  42. package/test/recall-text-cleanup.test.mjs +240 -0
  43. package/test/retriever-rerank-regression.mjs +278 -0
  44. package/test/smart-extractor-branches.mjs +955 -0
  45. package/test/smart-memory-lifecycle.mjs +219 -0
  46. package/test/smart-metadata-v2.mjs +121 -0
  47. package/test/sync-plugin-version.test.mjs +46 -0
  48. package/test/update-consistency-lancedb.test.mjs +201 -0
  49. package/test/vector-search-cosine.test.mjs +89 -0
  50. package/memory-lancedb-pro-1.1.0-beta.6.tgz +0 -0
  51. package/src/recall-engine.ts +0 -233
  52. package/src/reflection-aggregation.ts +0 -165
  53. package/src/reflection-normalize.ts +0 -41
  54. package/src/reflection-recall.ts +0 -170
  55. package/src/reflection-selection.ts +0 -108
  56. package/test/helpers/openclaw-extension-api-stub.mjs +0 -39
@@ -0,0 +1,33 @@
1
+ name: Auto Assign
2
+ on:
3
+ issues:
4
+ types: [opened]
5
+ pull_request:
6
+ types: [opened]
7
+
8
+ jobs:
9
+ assign-issues:
10
+ if: github.event_name == 'issues'
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ issues: write
14
+ steps:
15
+ - name: 'Auto-assign issue to AliceLJY'
16
+ uses: pozil/auto-assign-issue@v1
17
+ with:
18
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
19
+ assignees: AliceLJY
20
+ numOfAssignee: 1
21
+
22
+ assign-prs:
23
+ if: github.event_name == 'pull_request'
24
+ runs-on: ubuntu-latest
25
+ permissions:
26
+ pull-requests: write
27
+ steps:
28
+ - name: 'Auto-assign PR to rwmjhb'
29
+ uses: pozil/auto-assign-issue@v1
30
+ with:
31
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
32
+ assignees: rwmjhb
33
+ numOfAssignee: 1
@@ -5,6 +5,22 @@ on:
5
5
  pull_request:
6
6
 
7
7
  jobs:
8
+ version-sync:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - name: Checkout
12
+ uses: actions/checkout@v4
13
+
14
+ - name: Check version consistency
15
+ run: |
16
+ pkg_ver=$(node -p "JSON.parse(require('fs').readFileSync('package.json','utf8')).version")
17
+ plugin_ver=$(node -p "JSON.parse(require('fs').readFileSync('openclaw.plugin.json','utf8')).version")
18
+ if [ "$pkg_ver" != "$plugin_ver" ]; then
19
+ echo "::error::Version mismatch: package.json=$pkg_ver, openclaw.plugin.json=$plugin_ver"
20
+ exit 1
21
+ fi
22
+ echo "Versions match: $pkg_ver"
23
+
8
24
  cli-smoke:
9
25
  runs-on: ubuntu-latest
10
26
  steps:
@@ -0,0 +1,44 @@
1
+ name: Claude Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, ready_for_review, reopened]
6
+ # Optional: Only run on specific file changes
7
+ # paths:
8
+ # - "src/**/*.ts"
9
+ # - "src/**/*.tsx"
10
+ # - "src/**/*.js"
11
+ # - "src/**/*.jsx"
12
+
13
+ jobs:
14
+ claude-review:
15
+ # Optional: Filter by PR author
16
+ # if: |
17
+ # github.event.pull_request.user.login == 'external-contributor' ||
18
+ # github.event.pull_request.user.login == 'new-developer' ||
19
+ # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
20
+
21
+ runs-on: ubuntu-latest
22
+ permissions:
23
+ contents: read
24
+ pull-requests: read
25
+ issues: read
26
+ id-token: write
27
+
28
+ steps:
29
+ - name: Checkout repository
30
+ uses: actions/checkout@v4
31
+ with:
32
+ fetch-depth: 1
33
+
34
+ - name: Run Claude Code Review
35
+ id: claude-review
36
+ uses: anthropics/claude-code-action@v1
37
+ with:
38
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
39
+ plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
40
+ plugins: 'code-review@claude-code-plugins'
41
+ prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
42
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
43
+ # or https://code.claude.com/docs/en/cli-reference for available options
44
+
@@ -0,0 +1,50 @@
1
+ name: Claude Code
2
+
3
+ on:
4
+ issue_comment:
5
+ types: [created]
6
+ pull_request_review_comment:
7
+ types: [created]
8
+ issues:
9
+ types: [opened, assigned]
10
+ pull_request_review:
11
+ types: [submitted]
12
+
13
+ jobs:
14
+ claude:
15
+ if: |
16
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
17
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
18
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
19
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20
+ runs-on: ubuntu-latest
21
+ permissions:
22
+ contents: read
23
+ pull-requests: read
24
+ issues: read
25
+ id-token: write
26
+ actions: read # Required for Claude to read CI results on PRs
27
+ steps:
28
+ - name: Checkout repository
29
+ uses: actions/checkout@v4
30
+ with:
31
+ fetch-depth: 1
32
+
33
+ - name: Run Claude Code
34
+ id: claude
35
+ uses: anthropics/claude-code-action@v1
36
+ with:
37
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
38
+
39
+ # This is an optional setting that allows Claude to read CI results on PRs
40
+ additional_permissions: |
41
+ actions: read
42
+
43
+ # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
44
+ # prompt: 'Update the pull request description to include a summary of changes.'
45
+
46
+ # Optional: Add claude_args to customize behavior and configuration
47
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
48
+ # or https://code.claude.com/docs/en/cli-reference for available options
49
+ # claude_args: '--allowed-tools Bash(gh pr:*)'
50
+
@@ -0,0 +1,227 @@
1
+ # memory-lancedb-pro v1.1.0 — 智能记忆增强
2
+
3
+ > **日期**: 2026-03-03
4
+ > **作者**: CJY
5
+ > **概述**: 基于对 AI Agent 记忆系统的深入理解,对记忆的写入质量、生命周期管理和去重能力进行了全面改进与完善
6
+
7
+ ---
8
+
9
+ ## 一、改进动机
10
+
11
+ 原有记忆系统在**检索侧**表现优异(Vector+BM25 混合检索、cross-encoder 重排序、多维评分),但在以下方面存在提升空间:
12
+
13
+ - **记忆写入质量**:依赖正则表达式触发捕获,容易漏捕有价值信息或误捕噪声
14
+ - **记忆结构层次**:扁平文本存储,缺乏分层索引能力
15
+ - **记忆生命周期**:简单时间衰减,无法模拟人类记忆的遗忘与强化规律
16
+ - **去重能力**:仅基于向量相似度的粗粒度去重,缺乏语义级判断
17
+
18
+ 本次改进针对这三个维度进行了系统性增强。
19
+
20
+ ---
21
+
22
+ ## 二、变更摘要
23
+
24
+ | 改进维度 | 核心变更 | 效果 |
25
+ | ------------ | ----------------------------------------- | ---------------------------------- |
26
+ | 智能提取 | LLM 驱动的 6 类别提取 + L0/L1/L2 分层存储 | 记忆写入更精准、结构更丰富 |
27
+ | 生命周期管理 | Weibull 衰减模型 + 三层晋升/降级 | 重要记忆持久保留,过时记忆自然淡化 |
28
+ | 智能去重 | 向量预过滤 + LLM 语义决策 | 避免冗余记忆,支持信息演化合并 |
29
+
30
+ ---
31
+
32
+ ## 三、新增文件
33
+
34
+ ### 1. `src/memory-categories.ts` — 6 类别分类系统
35
+
36
+ 设计了语义明确的记忆分类体系,将记忆分为两大类六小类:
37
+
38
+ - **用户记忆**:`profile`(身份属性)、`preferences`(偏好习惯)、`entities`(持续存在的实体)、`events`(发生的事件)
39
+ - **Agent 记忆**:`cases`(问题-解决方案对)、`patterns`(可复用的处理流程)
40
+
41
+ 每个类别有不同的合并策略:
42
+
43
+ - `profile` → 始终合并(用户身份信息持续累积)
44
+ - `preferences` / `entities` / `patterns` → 支持智能合并
45
+ - `events` / `cases` → 仅新增或跳过(独立记录,保留历史完整性)
46
+
47
+ ---
48
+
49
+ ### 2. `src/llm-client.ts` — LLM 客户端
50
+
51
+ 封装了 LLM 调用接口,专注于结构化 JSON 输出:
52
+
53
+ - 复用现有 OpenAI SDK 依赖,零新增包
54
+ - 内置 JSON 容错解析:支持 markdown 代码块包裹和平衡大括号提取
55
+ - 低温度 (0.1) 保证输出一致性
56
+ - 30 秒超时保护,失败时优雅降级
57
+
58
+ ---
59
+
60
+ ### 3. `src/extraction-prompts.ts` — 记忆提取提示模板
61
+
62
+ 精心设计了 3 个提示模板:
63
+
64
+ | 函数 | 用途 |
65
+ | ------------------------- | --------------------------------------------------- |
66
+ | `buildExtractionPrompt()` | 从对话中提取 6 类别 L0/L1/L2 记忆,含 few-shot 示例 |
67
+ | `buildDedupPrompt()` | CREATE / MERGE / SKIP 去重决策 |
68
+ | `buildMergePrompt()` | 将新旧记忆合并为三层结构 |
69
+
70
+ 提取提示包含完整的记忆价值判断标准、类别决策逻辑表、常见混淆澄清规则和 6 个 few-shot 示例。
71
+
72
+ ---
73
+
74
+ ### 4. `src/smart-extractor.ts` — 智能提取管线
75
+
76
+ 实现了完整的 LLM 驱动提取流水线:
77
+
78
+ ```
79
+ 对话文本 → LLM 提取 → 候选记忆 → 向量去重 → LLM 决策 → 持久化
80
+ ```
81
+
82
+ 核心设计:
83
+
84
+ - **两阶段去重**:先用向量相似度(阈值 0.7)快速筛选候选,再用 LLM 进行语义级判断
85
+ - **类别感知合并**:不同类别应用不同合并策略
86
+ - **L0/L1/L2 三层存储**:L0 一句话索引用于检索注入,L1 结构化摘要用于精读,L2 完整叙述用于深度回顾
87
+ - **向后兼容**:新增的 6 类别自动映射到已有的 5 类别存储,L0/L1/L2 存储在 metadata JSON 中
88
+ - **按类别设定重要度**:profile (0.9) > patterns (0.85) > cases/preferences (0.8) > entities (0.7) > events (0.6)
89
+
90
+ ---
91
+
92
+ ### 5. `src/decay-engine.ts` — Weibull 衰减引擎
93
+
94
+ 基于认知心理学中的记忆遗忘曲线研究,实现了复合衰减模型:
95
+
96
+ **复合分数 = 时效权重 × 时效 + 频率权重 × 频率 + 内在权重 × 内在价值**
97
+
98
+ 三个分量:
99
+
100
+ | 分量 | 机制 | 含义 |
101
+ | ------------------------ | --------------------------------- | ---------------------- |
102
+ | **时效 (recency)** | Weibull 拉伸指数衰减 `exp(-λt^β)` | 越久远的记忆衰减越快 |
103
+ | **频率 (frequency)** | 对数饱和曲线 + 时间加权 | 越常被访问的记忆越活跃 |
104
+ | **内在价值 (intrinsic)** | `importance × confidence` | 高价值记忆天然抵抗遗忘 |
105
+
106
+ 层级特定的衰减形状 (β 参数):
107
+
108
+ - **Core** (β=0.8):亚指数衰减 → 遗忘极慢,衰减地板 0.9
109
+ - **Working** (β=1.0):标准指数衰减,衰减地板 0.7
110
+ - **Peripheral** (β=1.3):超指数衰减 → 遗忘加速,衰减地板 0.5
111
+
112
+ 关键特性:
113
+
114
+ - **重要性调制半衰期**:`effectiveHL = halfLife × exp(μ × importance)`,重要记忆持续更久
115
+ - **搜索结果加权**:检索时自动应用衰减加权,让活跃记忆排名更高
116
+ - **过期识别**:识别 composite < 0.3 的过期记忆
117
+
118
+ ---
119
+
120
+ ### 6. `src/tier-manager.ts` — 三层晋升/降级管理器
121
+
122
+ 模拟人类记忆的多级存储模型:
123
+
124
+ ```
125
+ Peripheral(外围) ⟷ Working(工作) ⟷ Core(核心)
126
+ ```
127
+
128
+ **晋升条件**:
129
+
130
+ | 方向 | 条件 |
131
+ | -------------------- | ----------------------------------------------- |
132
+ | Peripheral → Working | 访问次数 ≥ 3 且 衰减分数 ≥ 0.4 |
133
+ | Working → Core | 访问次数 ≥ 10 且 衰减分数 ≥ 0.7 且 重要度 ≥ 0.8 |
134
+
135
+ **降级条件**:
136
+
137
+ | 方向 | 条件 |
138
+ | -------------------- | ------------------------------------------------ |
139
+ | Working → Peripheral | 衰减分数 < 0.15 或(年龄 > 60 天且访问次数 < 3) |
140
+ | Core → Working | 衰减分数 < 0.15 且 访问次数 < 3(极少触发) |
141
+
142
+ ---
143
+
144
+ ## 四、修改文件
145
+
146
+ ### `index.ts` — 插件入口
147
+
148
+ #### 新增配置项
149
+
150
+ ```typescript
151
+ smartExtraction?: boolean; // 是否启用 LLM 智能提取(默认 true)
152
+ llm?: {
153
+ apiKey?: string; // LLM API Key(默认复用 embedding.apiKey)
154
+ model?: string; // LLM 模型(默认 gpt-4o-mini)
155
+ baseURL?: string; // LLM API 端点
156
+ };
157
+ extractMinMessages?: number; // 最少消息数才触发提取(默认 2)
158
+ extractMaxChars?: number; // 送入 LLM 的最大字符数(默认 8000)
159
+ ```
160
+
161
+ #### `agent_end` 钩子改进
162
+
163
+ - 当 `smartExtraction` 启用时,优先使用 SmartExtractor 进行 LLM 6 类别提取
164
+ - 当消息数不足或 SmartExtractor 未初始化时,降级回原有正则触发逻辑
165
+ - 提取完成后输出统计日志:`smart-extracted N created, M merged, K skipped`
166
+
167
+ #### `before_agent_start` 钩子改进
168
+
169
+ - 注入的记忆上下文现在显示 L0 摘要而非原始文本
170
+ - 新增 6 类别标签(如 `[preferences:global]`)
171
+ - 新增层级标记(`[C]`ore / `[W]`orking / `[P]`eripheral)
172
+
173
+ ---
174
+
175
+ ## 五、配置指南
176
+
177
+ ### 最简配置(复用已有 API Key)
178
+
179
+ ```json
180
+ {
181
+ "embedding": {
182
+ "apiKey": "${OPENAI_API_KEY}",
183
+ "model": "text-embedding-3-small"
184
+ },
185
+ "smartExtraction": true
186
+ }
187
+ ```
188
+
189
+ ### 完整配置
190
+
191
+ ```json
192
+ {
193
+ "embedding": {
194
+ "apiKey": "${OPENAI_API_KEY}",
195
+ "model": "text-embedding-3-small"
196
+ },
197
+ "smartExtraction": true,
198
+ "llm": {
199
+ "apiKey": "${OPENAI_API_KEY}",
200
+ "model": "gpt-4o-mini",
201
+ "baseURL": "https://api.openai.com/v1"
202
+ },
203
+ "extractMinMessages": 2,
204
+ "extractMaxChars": 8000
205
+ }
206
+ ```
207
+
208
+ ### 禁用智能提取
209
+
210
+ ```json
211
+ {
212
+ "smartExtraction": false
213
+ }
214
+ ```
215
+
216
+ ---
217
+
218
+ ## 六、向后兼容性
219
+
220
+ | 方面 | 兼容方式 |
221
+ | -------------- | ---------------------------------------------- |
222
+ | LanceDB Schema | 新字段存储在 `metadata` JSON 中,不修改表结构 |
223
+ | 记忆类别 | 新 6 类别自动映射到原有 5 类别 |
224
+ | 混合检索 | Vector+BM25 检索管线完全保留 |
225
+ | 去重逻辑 | 仅在 `smartExtraction: true` 时生效 |
226
+ | 已有数据 | 旧记忆正常读取,新记忆额外携带 L0/L1/L2 元数据 |
227
+ | 配置 | 全部新增配置项均有默认值,零配置即可使用 |
package/CHANGELOG.md CHANGED
@@ -1,42 +1,28 @@
1
1
  # Changelog
2
2
 
3
- ## 1.1.0-beta.6
3
+ ## 1.1.0-beta.2 (Smart Memory Beta + Access Reinforcement)
4
4
 
5
- - Refactor: build reset/new reflection handoff note in `runMemoryReflection`.
6
- - Refactor: `<open-loops>` now comes from the fresh reflection run, while `<derived-focus>` comes from historical scored itemized derived rows.
7
- - Refactor: upgrade historical `<derived-focus>` ranking to Derived-Focus V2 (conservative strict/soft normalization, non-linear group scoring, diversity-aware shortlist up to 36 before final note injection capped at 13, with no hard `score > 0.3` gate).
8
- - Breaking: stop writing and stop reading legacy combined reflection rows (`type=memory-reflection`).
9
- - Docs: refresh README / README_CN for the new handoff-note behavior and remove old legacy combined guidance.
5
+ This is a **beta** release published under the npm dist-tag **`beta`** (it does not affect the stable `latest` channel).
10
6
 
11
- ---
12
-
13
- ## 1.1.0
7
+ Highlights:
8
+ - **Smart Extraction (LLM-powered)**: 6-category extraction with L0/L1/L2 metadata (falls back to regex capture when disabled or init fails)
9
+ - **Lifecycle scoring integrated into retrieval**: decay-based score adjustment + tier floors
10
+ - **Tier transitions (best-effort)**: bounded metadata write-backs for top results (tier / access stats)
11
+ - **Access reinforcement for time decay**: frequently *manually recalled* memories decay more slowly (spaced-repetition style)
12
+ - Adds `AccessTracker` with debounced metadata write-back (accessCount / lastAccessedAt)
13
+ - Adds retrieval config: `reinforcementFactor` (default: 0.5) and `maxHalfLifeMultiplier` (default: 3)
14
14
 
15
- - Feat: add integrated self-improvement governance flow (`agent:bootstrap`, `command:new/reset`, governance tools, and `.learnings` file bootstrap).
16
- - Feat: add `memoryReflection` session strategy with inheritance/derived injection, reflection persistence, and dedicated reflection-agent support.
17
- - Fix: keep session-strategy compatibility by mapping legacy `sessionMemory.enabled` to `systemSessionMemory` / `none` and trimming reflection input toward recent conversation tail.
18
- - Fix: retry early transient upstream reflection failures once and broaden session recovery search paths to real OpenClaw agent session directories.
19
- - Docs: update README / README_CN for session strategy, self-improvement, memoryReflection, mdMirror, and reflection fallback behavior.
20
- - Tests: add targeted coverage for reflection retry classification and session recovery path resolution.
21
-
22
- PRs: #43, #2
15
+ Notes:
16
+ - Access reinforcement is gated to manual recall (`source: \"manual\"`) to avoid auto-recall strengthening noise.
23
17
 
24
18
  ---
25
19
 
26
- ## 1.0.32
27
-
28
- - Fix: strip OpenClaw `Conversation info` / `Sender` metadata noise before auto-capture matching and adaptive retrieval normalization, reducing false captures and noisy retrieval triggers.
29
- - Fix: parse `autoRecallMinRepeated` from plugin config so repeated-memory suppression works when configured.
20
+ ## 1.1.0-beta.1 (Smart Memory Beta)
30
21
 
31
- PR: #50
22
+ - Initial beta with Smart Extraction + lifecycle components (decay engine + tier manager)
32
23
 
33
24
  ---
34
25
 
35
-
36
- ## 1.0.31
37
-
38
- - Fix: `memory-pro import` now preserves provided IDs and is idempotent (skips if ID already exists).
39
-
40
26
  ## 1.0.26
41
27
 
42
28
  **Access Reinforcement for Time Decay**