memory-lancedb-pro 1.0.26 → 1.1.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG-v1.1.0.md +227 -0
- package/CHANGELOG.md +23 -0
- package/README.md +82 -0
- package/README_CN.md +82 -0
- package/index.ts +106 -11
- package/openclaw.plugin.json +69 -1
- package/package.json +1 -1
- package/src/access-tracker.ts +13 -3
- package/src/decay-engine.ts +227 -0
- package/src/extraction-prompts.ts +205 -0
- package/src/llm-client.ts +92 -0
- package/src/memory-categories.ts +69 -0
- package/src/retriever.ts +152 -4
- package/src/smart-extractor.ts +524 -0
- package/src/tier-manager.ts +189 -0
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "memory-lancedb-pro",
|
|
3
3
|
"name": "Memory (LanceDB Pro)",
|
|
4
4
|
"description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, long-context chunking, and management CLI",
|
|
5
|
-
"version": "1.0.
|
|
5
|
+
"version": "1.1.0-beta.2",
|
|
6
6
|
"kind": "memory",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
@@ -263,6 +263,44 @@
|
|
|
263
263
|
}
|
|
264
264
|
}
|
|
265
265
|
}
|
|
266
|
+
},
|
|
267
|
+
"smartExtraction": {
|
|
268
|
+
"type": "boolean",
|
|
269
|
+
"default": true,
|
|
270
|
+
"description": "Enable LLM-powered smart memory extraction (6-category system). Falls back to regex capture when disabled or init fails."
|
|
271
|
+
},
|
|
272
|
+
"llm": {
|
|
273
|
+
"type": "object",
|
|
274
|
+
"additionalProperties": false,
|
|
275
|
+
"properties": {
|
|
276
|
+
"apiKey": {
|
|
277
|
+
"type": "string",
|
|
278
|
+
"description": "API key for LLM extraction (defaults to embedding apiKey)"
|
|
279
|
+
},
|
|
280
|
+
"model": {
|
|
281
|
+
"type": "string",
|
|
282
|
+
"default": "gpt-4o-mini",
|
|
283
|
+
"description": "LLM model for memory extraction and dedup"
|
|
284
|
+
},
|
|
285
|
+
"baseURL": {
|
|
286
|
+
"type": "string",
|
|
287
|
+
"description": "Base URL for LLM API (defaults to embedding baseURL)"
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
},
|
|
291
|
+
"extractMinMessages": {
|
|
292
|
+
"type": "integer",
|
|
293
|
+
"minimum": 1,
|
|
294
|
+
"maximum": 50,
|
|
295
|
+
"default": 4,
|
|
296
|
+
"description": "Minimum conversation messages before smart extraction triggers"
|
|
297
|
+
},
|
|
298
|
+
"extractMaxChars": {
|
|
299
|
+
"type": "integer",
|
|
300
|
+
"minimum": 500,
|
|
301
|
+
"maximum": 100000,
|
|
302
|
+
"default": 8000,
|
|
303
|
+
"description": "Maximum characters of conversation text to process for extraction"
|
|
266
304
|
}
|
|
267
305
|
},
|
|
268
306
|
"required": [
|
|
@@ -321,6 +359,36 @@
|
|
|
321
359
|
"help": "Directory path for the LanceDB database files",
|
|
322
360
|
"advanced": true
|
|
323
361
|
},
|
|
362
|
+
"smartExtraction": {
|
|
363
|
+
"label": "Smart Extraction",
|
|
364
|
+
"help": "Enable LLM-powered 6-category memory extraction. Falls back to regex capture when off."
|
|
365
|
+
},
|
|
366
|
+
"llm.apiKey": {
|
|
367
|
+
"label": "LLM API Key",
|
|
368
|
+
"sensitive": true,
|
|
369
|
+
"help": "API key for smart extraction LLM (defaults to embedding apiKey)",
|
|
370
|
+
"advanced": true
|
|
371
|
+
},
|
|
372
|
+
"llm.model": {
|
|
373
|
+
"label": "LLM Model",
|
|
374
|
+
"help": "Model for memory extraction and dedup (default: gpt-4o-mini)",
|
|
375
|
+
"advanced": true
|
|
376
|
+
},
|
|
377
|
+
"llm.baseURL": {
|
|
378
|
+
"label": "LLM Base URL",
|
|
379
|
+
"help": "Base URL for LLM API (defaults to embedding baseURL)",
|
|
380
|
+
"advanced": true
|
|
381
|
+
},
|
|
382
|
+
"extractMinMessages": {
|
|
383
|
+
"label": "Min Messages for Extraction",
|
|
384
|
+
"help": "Minimum conversation messages before smart extraction triggers",
|
|
385
|
+
"advanced": true
|
|
386
|
+
},
|
|
387
|
+
"extractMaxChars": {
|
|
388
|
+
"label": "Max Chars for Extraction",
|
|
389
|
+
"help": "Maximum conversation characters to process for extraction",
|
|
390
|
+
"advanced": true
|
|
391
|
+
},
|
|
324
392
|
"autoCapture": {
|
|
325
393
|
"label": "Auto-Capture",
|
|
326
394
|
"help": "Automatically capture important information from conversations"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "memory-lancedb-pro",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.1.0-beta.2",
|
|
4
4
|
"description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, long-context chunking, and management CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
package/src/access-tracker.ts
CHANGED
|
@@ -83,9 +83,14 @@ export function parseAccessMetadata(
|
|
|
83
83
|
|
|
84
84
|
const obj = parsed as Record<string, unknown>;
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
// Support both camelCase and snake_case keys (beta smart-memory uses snake_case).
|
|
87
|
+
const rawCountAny = obj.accessCount ?? obj.access_count;
|
|
88
|
+
const rawCount =
|
|
89
|
+
typeof rawCountAny === "number" ? rawCountAny : Number(rawCountAny ?? 0);
|
|
90
|
+
|
|
91
|
+
const rawLastAny = obj.lastAccessedAt ?? obj.last_accessed_at;
|
|
87
92
|
const rawLastAccessed =
|
|
88
|
-
typeof
|
|
93
|
+
typeof rawLastAny === "number" ? rawLastAny : Number(rawLastAny ?? 0);
|
|
89
94
|
|
|
90
95
|
return {
|
|
91
96
|
accessCount: clampAccessCount(rawCount),
|
|
@@ -126,10 +131,15 @@ export function buildUpdatedMetadata(
|
|
|
126
131
|
const prev = parseAccessMetadata(existingMetadata);
|
|
127
132
|
const newCount = clampAccessCount(prev.accessCount + accessDelta);
|
|
128
133
|
|
|
134
|
+
const now = Date.now();
|
|
135
|
+
|
|
129
136
|
return JSON.stringify({
|
|
130
137
|
...existing,
|
|
138
|
+
// Write both camelCase and snake_case for compatibility.
|
|
131
139
|
accessCount: newCount,
|
|
132
|
-
lastAccessedAt:
|
|
140
|
+
lastAccessedAt: now,
|
|
141
|
+
access_count: newCount,
|
|
142
|
+
last_accessed_at: now,
|
|
133
143
|
});
|
|
134
144
|
}
|
|
135
145
|
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decay Engine — Weibull stretched-exponential decay model
|
|
3
|
+
* Ported from memx-memory/src/memory/decay.ts
|
|
4
|
+
*
|
|
5
|
+
* Composite score = recencyWeight * recency + frequencyWeight * frequency + intrinsicWeight * intrinsic
|
|
6
|
+
*
|
|
7
|
+
* - Recency: Weibull decay with importance-modulated half-life and tier-specific beta
|
|
8
|
+
* - Frequency: Logarithmic saturation with time-weighted access pattern bonus
|
|
9
|
+
* - Intrinsic: importance × confidence
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { MemoryTier } from "./memory-categories.js";
|
|
13
|
+
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// Types
|
|
16
|
+
// ============================================================================
|
|
17
|
+
|
|
18
|
+
const MS_PER_DAY = 86_400_000;
|
|
19
|
+
|
|
20
|
+
export interface DecayConfig {
|
|
21
|
+
/** Days until recency score halves (default: 30) */
|
|
22
|
+
recencyHalfLifeDays: number;
|
|
23
|
+
/** Weight of recency in composite (default: 0.4) */
|
|
24
|
+
recencyWeight: number;
|
|
25
|
+
/** Weight of access frequency (default: 0.3) */
|
|
26
|
+
frequencyWeight: number;
|
|
27
|
+
/** Weight of importance × confidence (default: 0.3) */
|
|
28
|
+
intrinsicWeight: number;
|
|
29
|
+
/** Below this composite = stale (default: 0.3) */
|
|
30
|
+
staleThreshold: number;
|
|
31
|
+
/** Minimum search boost (default: 0.3) */
|
|
32
|
+
searchBoostMin: number;
|
|
33
|
+
/** Importance modulation coefficient for half-life (default: 1.5) */
|
|
34
|
+
importanceModulation: number;
|
|
35
|
+
/** Weibull beta for Core tier — sub-exponential (default: 0.8) */
|
|
36
|
+
betaCore: number;
|
|
37
|
+
/** Weibull beta for Working tier — standard exponential (default: 1.0) */
|
|
38
|
+
betaWorking: number;
|
|
39
|
+
/** Weibull beta for Peripheral tier — super-exponential (default: 1.3) */
|
|
40
|
+
betaPeripheral: number;
|
|
41
|
+
/** Decay floor for Core memories (default: 0.9) */
|
|
42
|
+
coreDecayFloor: number;
|
|
43
|
+
/** Decay floor for Working memories (default: 0.7) */
|
|
44
|
+
workingDecayFloor: number;
|
|
45
|
+
/** Decay floor for Peripheral memories (default: 0.5) */
|
|
46
|
+
peripheralDecayFloor: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export const DEFAULT_DECAY_CONFIG: DecayConfig = {
|
|
50
|
+
recencyHalfLifeDays: 30,
|
|
51
|
+
recencyWeight: 0.4,
|
|
52
|
+
frequencyWeight: 0.3,
|
|
53
|
+
intrinsicWeight: 0.3,
|
|
54
|
+
staleThreshold: 0.3,
|
|
55
|
+
searchBoostMin: 0.3,
|
|
56
|
+
importanceModulation: 1.5,
|
|
57
|
+
betaCore: 0.8,
|
|
58
|
+
betaWorking: 1.0,
|
|
59
|
+
betaPeripheral: 1.3,
|
|
60
|
+
coreDecayFloor: 0.9,
|
|
61
|
+
workingDecayFloor: 0.7,
|
|
62
|
+
peripheralDecayFloor: 0.5,
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
export interface DecayScore {
|
|
66
|
+
memoryId: string;
|
|
67
|
+
recency: number;
|
|
68
|
+
frequency: number;
|
|
69
|
+
intrinsic: number;
|
|
70
|
+
composite: number;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Minimal memory fields needed for decay calculation. */
|
|
74
|
+
export interface DecayableMemory {
|
|
75
|
+
id: string;
|
|
76
|
+
importance: number;
|
|
77
|
+
confidence: number;
|
|
78
|
+
tier: MemoryTier;
|
|
79
|
+
accessCount: number;
|
|
80
|
+
createdAt: number;
|
|
81
|
+
lastAccessedAt: number;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface DecayEngine {
|
|
85
|
+
/** Calculate decay score for a single memory */
|
|
86
|
+
score(memory: DecayableMemory, now?: number): DecayScore;
|
|
87
|
+
/** Calculate decay scores for multiple memories */
|
|
88
|
+
scoreAll(memories: DecayableMemory[], now?: number): DecayScore[];
|
|
89
|
+
/** Apply decay boost to search results (multiplies each score by boost) */
|
|
90
|
+
applySearchBoost(
|
|
91
|
+
results: Array<{ memory: DecayableMemory; score: number }>,
|
|
92
|
+
now?: number,
|
|
93
|
+
): void;
|
|
94
|
+
/** Find stale memories (composite below threshold) */
|
|
95
|
+
getStaleMemories(
|
|
96
|
+
memories: DecayableMemory[],
|
|
97
|
+
now?: number,
|
|
98
|
+
): DecayScore[];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ============================================================================
|
|
102
|
+
// Factory
|
|
103
|
+
// ============================================================================
|
|
104
|
+
|
|
105
|
+
export function createDecayEngine(
|
|
106
|
+
config: DecayConfig = DEFAULT_DECAY_CONFIG,
|
|
107
|
+
): DecayEngine {
|
|
108
|
+
const {
|
|
109
|
+
recencyHalfLifeDays: halfLife,
|
|
110
|
+
recencyWeight: rw,
|
|
111
|
+
frequencyWeight: fw,
|
|
112
|
+
intrinsicWeight: iw,
|
|
113
|
+
staleThreshold,
|
|
114
|
+
searchBoostMin: boostMin,
|
|
115
|
+
importanceModulation: mu,
|
|
116
|
+
betaCore,
|
|
117
|
+
betaWorking,
|
|
118
|
+
betaPeripheral,
|
|
119
|
+
coreDecayFloor,
|
|
120
|
+
workingDecayFloor,
|
|
121
|
+
peripheralDecayFloor,
|
|
122
|
+
} = config;
|
|
123
|
+
|
|
124
|
+
function getTierBeta(tier: MemoryTier): number {
|
|
125
|
+
switch (tier) {
|
|
126
|
+
case "core":
|
|
127
|
+
return betaCore;
|
|
128
|
+
case "working":
|
|
129
|
+
return betaWorking;
|
|
130
|
+
case "peripheral":
|
|
131
|
+
return betaPeripheral;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function getTierFloor(tier: MemoryTier): number {
|
|
136
|
+
switch (tier) {
|
|
137
|
+
case "core":
|
|
138
|
+
return coreDecayFloor;
|
|
139
|
+
case "working":
|
|
140
|
+
return workingDecayFloor;
|
|
141
|
+
case "peripheral":
|
|
142
|
+
return peripheralDecayFloor;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Recency: Weibull stretched-exponential decay with importance-modulated half-life.
|
|
148
|
+
* effectiveHL = halfLife * exp(mu * importance)
|
|
149
|
+
* lambda = ln(2) / effectiveHL
|
|
150
|
+
* recency = exp(-lambda * daysSince^beta)
|
|
151
|
+
*/
|
|
152
|
+
function recency(memory: DecayableMemory, now: number): number {
|
|
153
|
+
const lastActive =
|
|
154
|
+
memory.accessCount > 0 ? memory.lastAccessedAt : memory.createdAt;
|
|
155
|
+
const daysSince = Math.max(0, (now - lastActive) / MS_PER_DAY);
|
|
156
|
+
const effectiveHL = halfLife * Math.exp(mu * memory.importance);
|
|
157
|
+
const lambda = Math.LN2 / effectiveHL;
|
|
158
|
+
const beta = getTierBeta(memory.tier);
|
|
159
|
+
return Math.exp(-lambda * Math.pow(daysSince, beta));
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Frequency: logarithmic saturation curve with time-weighted access pattern bonus.
|
|
164
|
+
* base = 1 - exp(-accessCount / 5)
|
|
165
|
+
* For memories with >1 access, a recentness bonus is applied.
|
|
166
|
+
*/
|
|
167
|
+
function frequency(memory: DecayableMemory): number {
|
|
168
|
+
const base = 1 - Math.exp(-memory.accessCount / 5);
|
|
169
|
+
if (memory.accessCount <= 1) return base;
|
|
170
|
+
|
|
171
|
+
const lastActive =
|
|
172
|
+
memory.accessCount > 0 ? memory.lastAccessedAt : memory.createdAt;
|
|
173
|
+
const accessSpanDays = Math.max(
|
|
174
|
+
1,
|
|
175
|
+
(lastActive - memory.createdAt) / MS_PER_DAY,
|
|
176
|
+
);
|
|
177
|
+
const avgGapDays = accessSpanDays / Math.max(memory.accessCount - 1, 1);
|
|
178
|
+
const recentnessBonus = Math.exp(-avgGapDays / 30);
|
|
179
|
+
return base * (0.5 + 0.5 * recentnessBonus);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Intrinsic value: importance × confidence.
|
|
184
|
+
*/
|
|
185
|
+
function intrinsic(memory: DecayableMemory): number {
|
|
186
|
+
return memory.importance * memory.confidence;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function scoreOne(memory: DecayableMemory, now: number): DecayScore {
|
|
190
|
+
const r = recency(memory, now);
|
|
191
|
+
const f = frequency(memory);
|
|
192
|
+
const i = intrinsic(memory);
|
|
193
|
+
const composite = rw * r + fw * f + iw * i;
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
memoryId: memory.id,
|
|
197
|
+
recency: r,
|
|
198
|
+
frequency: f,
|
|
199
|
+
intrinsic: i,
|
|
200
|
+
composite,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
score(memory, now = Date.now()) {
|
|
206
|
+
return scoreOne(memory, now);
|
|
207
|
+
},
|
|
208
|
+
|
|
209
|
+
scoreAll(memories, now = Date.now()) {
|
|
210
|
+
return memories.map((m) => scoreOne(m, now));
|
|
211
|
+
},
|
|
212
|
+
|
|
213
|
+
applySearchBoost(results, now = Date.now()) {
|
|
214
|
+
for (const r of results) {
|
|
215
|
+
const ds = scoreOne(r.memory, now);
|
|
216
|
+
r.score *= Math.max(getTierFloor(r.memory.tier), ds.composite);
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
|
|
220
|
+
getStaleMemories(memories, now = Date.now()) {
|
|
221
|
+
const scores = memories.map((m) => scoreOne(m, now));
|
|
222
|
+
return scores
|
|
223
|
+
.filter((s) => s.composite < staleThreshold)
|
|
224
|
+
.sort((a, b) => a.composite - b.composite);
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt templates for intelligent memory extraction.
|
|
3
|
+
* Ported from epro-memory/prompts.ts (OpenViking origin).
|
|
4
|
+
*
|
|
5
|
+
* Three mandatory prompts:
|
|
6
|
+
* - buildExtractionPrompt: 6-category L0/L1/L2 extraction with few-shot
|
|
7
|
+
* - buildDedupPrompt: CREATE/MERGE/SKIP dedup decision
|
|
8
|
+
* - buildMergePrompt: Memory merge with three-level structure
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export function buildExtractionPrompt(
|
|
12
|
+
conversationText: string,
|
|
13
|
+
user: string,
|
|
14
|
+
): string {
|
|
15
|
+
return `Analyze the following session context and extract memories worth long-term preservation.
|
|
16
|
+
|
|
17
|
+
User: ${user}
|
|
18
|
+
|
|
19
|
+
Target Output Language: auto (detect from recent messages)
|
|
20
|
+
|
|
21
|
+
## Recent Conversation
|
|
22
|
+
${conversationText}
|
|
23
|
+
|
|
24
|
+
# Memory Extraction Criteria
|
|
25
|
+
|
|
26
|
+
## What is worth remembering?
|
|
27
|
+
- Personalized information: Information specific to this user, not general domain knowledge
|
|
28
|
+
- Long-term validity: Information that will still be useful in future sessions
|
|
29
|
+
- Specific and clear: Has concrete details, not vague generalizations
|
|
30
|
+
|
|
31
|
+
## What is NOT worth remembering?
|
|
32
|
+
- General knowledge that anyone would know
|
|
33
|
+
- Temporary information: One-time questions or conversations
|
|
34
|
+
- Vague information: "User has questions about a feature" (no specific details)
|
|
35
|
+
- Tool output, error logs, or boilerplate
|
|
36
|
+
|
|
37
|
+
# Memory Classification
|
|
38
|
+
|
|
39
|
+
## Core Decision Logic
|
|
40
|
+
|
|
41
|
+
| Question | Answer | Category |
|
|
42
|
+
|----------|--------|----------|
|
|
43
|
+
| Who is the user? | Identity, attributes | profile |
|
|
44
|
+
| What does the user prefer? | Preferences, habits | preferences |
|
|
45
|
+
| What is this thing? | Person, project, organization | entities |
|
|
46
|
+
| What happened? | Decision, milestone | events |
|
|
47
|
+
| How was it solved? | Problem + solution | cases |
|
|
48
|
+
| What is the process? | Reusable steps | patterns |
|
|
49
|
+
|
|
50
|
+
## Precise Definition
|
|
51
|
+
|
|
52
|
+
**profile** - User identity (static attributes). Test: "User is..."
|
|
53
|
+
**preferences** - User preferences (tendencies). Test: "User prefers/likes..."
|
|
54
|
+
**entities** - Continuously existing nouns. Test: "XXX's state is..."
|
|
55
|
+
**events** - Things that happened. Test: "XXX did/completed..."
|
|
56
|
+
**cases** - Problem + solution pairs. Test: Contains "problem -> solution"
|
|
57
|
+
**patterns** - Reusable processes. Test: Can be used in "similar situations"
|
|
58
|
+
|
|
59
|
+
## Common Confusion
|
|
60
|
+
- "Plan to do X" -> events (action, not entity)
|
|
61
|
+
- "Project X status: Y" -> entities (describes entity)
|
|
62
|
+
- "User prefers X" -> preferences (not profile)
|
|
63
|
+
- "Encountered problem A, used solution B" -> cases (not events)
|
|
64
|
+
- "General process for handling certain problems" -> patterns (not cases)
|
|
65
|
+
|
|
66
|
+
# Three-Level Structure
|
|
67
|
+
|
|
68
|
+
Each memory contains three levels:
|
|
69
|
+
|
|
70
|
+
**abstract (L0)**: One-liner index
|
|
71
|
+
- Merge types (preferences/entities/profile/patterns): \`[Merge key]: [Description]\`
|
|
72
|
+
- Independent types (events/cases): Specific description
|
|
73
|
+
|
|
74
|
+
**overview (L1)**: Structured Markdown summary with category-specific headings
|
|
75
|
+
|
|
76
|
+
**content (L2)**: Full narrative with background and details
|
|
77
|
+
|
|
78
|
+
# Few-shot Examples
|
|
79
|
+
|
|
80
|
+
## profile
|
|
81
|
+
\`\`\`json
|
|
82
|
+
{
|
|
83
|
+
"category": "profile",
|
|
84
|
+
"abstract": "User basic info: AI development engineer, 3 years LLM experience",
|
|
85
|
+
"overview": "## Background\\n- Occupation: AI development engineer\\n- Experience: 3 years LLM development\\n- Tech stack: Python, LangChain",
|
|
86
|
+
"content": "User is an AI development engineer with 3 years of LLM application development experience."
|
|
87
|
+
}
|
|
88
|
+
\`\`\`
|
|
89
|
+
|
|
90
|
+
## preferences
|
|
91
|
+
\`\`\`json
|
|
92
|
+
{
|
|
93
|
+
"category": "preferences",
|
|
94
|
+
"abstract": "Python code style: No type hints, concise and direct",
|
|
95
|
+
"overview": "## Preference Domain\\n- Language: Python\\n- Topic: Code style\\n\\n## Details\\n- No type hints\\n- Concise function comments\\n- Direct implementation",
|
|
96
|
+
"content": "User prefers Python code without type hints, with concise function comments."
|
|
97
|
+
}
|
|
98
|
+
\`\`\`
|
|
99
|
+
|
|
100
|
+
## cases
|
|
101
|
+
\`\`\`json
|
|
102
|
+
{
|
|
103
|
+
"category": "cases",
|
|
104
|
+
"abstract": "LanceDB BigInt error -> Use Number() coercion before arithmetic",
|
|
105
|
+
"overview": "## Problem\\nLanceDB 0.26+ returns BigInt for numeric columns\\n\\n## Solution\\nCoerce values with Number(...) before arithmetic",
|
|
106
|
+
"content": "When LanceDB returns BigInt values, wrap them with Number() before doing arithmetic operations."
|
|
107
|
+
}
|
|
108
|
+
\`\`\`
|
|
109
|
+
|
|
110
|
+
# Output Format
|
|
111
|
+
|
|
112
|
+
Return JSON:
|
|
113
|
+
{
|
|
114
|
+
"memories": [
|
|
115
|
+
{
|
|
116
|
+
"category": "profile|preferences|entities|events|cases|patterns",
|
|
117
|
+
"abstract": "One-line index",
|
|
118
|
+
"overview": "Structured Markdown summary",
|
|
119
|
+
"content": "Full narrative"
|
|
120
|
+
}
|
|
121
|
+
]
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
Notes:
|
|
125
|
+
- Output language should match the dominant language in the conversation
|
|
126
|
+
- Only extract truly valuable personalized information
|
|
127
|
+
- If nothing worth recording, return {"memories": []}
|
|
128
|
+
- Maximum 5 memories per extraction
|
|
129
|
+
- Preferences should be aggregated by topic`;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function buildDedupPrompt(
|
|
133
|
+
candidateAbstract: string,
|
|
134
|
+
candidateOverview: string,
|
|
135
|
+
candidateContent: string,
|
|
136
|
+
existingMemories: string,
|
|
137
|
+
): string {
|
|
138
|
+
return `Determine how to handle this candidate memory.
|
|
139
|
+
|
|
140
|
+
**Candidate Memory**:
|
|
141
|
+
Abstract: ${candidateAbstract}
|
|
142
|
+
Overview: ${candidateOverview}
|
|
143
|
+
Content: ${candidateContent}
|
|
144
|
+
|
|
145
|
+
**Existing Similar Memories**:
|
|
146
|
+
${existingMemories}
|
|
147
|
+
|
|
148
|
+
Please decide:
|
|
149
|
+
- SKIP: Candidate memory duplicates existing memories, no need to save
|
|
150
|
+
- CREATE: This is completely new information, should be created
|
|
151
|
+
- MERGE: Candidate memory should be merged with an existing memory
|
|
152
|
+
|
|
153
|
+
IMPORTANT: "events" and "cases" categories are independent records — they do NOT support MERGE.
|
|
154
|
+
For these categories, only use SKIP or CREATE.
|
|
155
|
+
|
|
156
|
+
Return JSON format:
|
|
157
|
+
{
|
|
158
|
+
"decision": "skip|create|merge",
|
|
159
|
+
"match_index": 1,
|
|
160
|
+
"reason": "Decision reason"
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
If decision is "merge", set "match_index" to the number of the existing memory to merge with (1-based).`;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function buildMergePrompt(
|
|
167
|
+
existingAbstract: string,
|
|
168
|
+
existingOverview: string,
|
|
169
|
+
existingContent: string,
|
|
170
|
+
newAbstract: string,
|
|
171
|
+
newOverview: string,
|
|
172
|
+
newContent: string,
|
|
173
|
+
category: string,
|
|
174
|
+
): string {
|
|
175
|
+
return `Merge the following memory into a single coherent record with all three levels.
|
|
176
|
+
|
|
177
|
+
**Category**: ${category}
|
|
178
|
+
|
|
179
|
+
**Existing Memory:**
|
|
180
|
+
Abstract: ${existingAbstract}
|
|
181
|
+
Overview:
|
|
182
|
+
${existingOverview}
|
|
183
|
+
Content:
|
|
184
|
+
${existingContent}
|
|
185
|
+
|
|
186
|
+
**New Information:**
|
|
187
|
+
Abstract: ${newAbstract}
|
|
188
|
+
Overview:
|
|
189
|
+
${newOverview}
|
|
190
|
+
Content:
|
|
191
|
+
${newContent}
|
|
192
|
+
|
|
193
|
+
Requirements:
|
|
194
|
+
- Remove duplicate information
|
|
195
|
+
- Keep the most up-to-date details
|
|
196
|
+
- Maintain a coherent narrative
|
|
197
|
+
- Keep code identifiers / URIs / model names unchanged when they are proper nouns
|
|
198
|
+
|
|
199
|
+
Return JSON:
|
|
200
|
+
{
|
|
201
|
+
"abstract": "Merged one-line abstract",
|
|
202
|
+
"overview": "Merged structured Markdown overview",
|
|
203
|
+
"content": "Merged full content"
|
|
204
|
+
}`;
|
|
205
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Client for memory extraction and dedup decisions.
|
|
3
|
+
* Uses OpenAI-compatible API (reuses the embedding provider config).
|
|
4
|
+
*
|
|
5
|
+
* Inspired by epro-memory/llm.ts
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import OpenAI from "openai";
|
|
9
|
+
|
|
10
|
+
export interface LlmClientConfig {
|
|
11
|
+
apiKey: string;
|
|
12
|
+
model: string;
|
|
13
|
+
baseURL?: string;
|
|
14
|
+
timeoutMs?: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface LlmClient {
|
|
18
|
+
/** Send a prompt and parse the JSON response. Returns null on failure. */
|
|
19
|
+
completeJson<T>(prompt: string): Promise<T | null>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extract JSON from an LLM response that may be wrapped in markdown fences
|
|
24
|
+
* or contain surrounding text.
|
|
25
|
+
*/
|
|
26
|
+
function extractJsonFromResponse(text: string): string | null {
|
|
27
|
+
// Try markdown code fence first (```json ... ``` or ``` ... ```)
|
|
28
|
+
const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
29
|
+
if (fenceMatch) {
|
|
30
|
+
return fenceMatch[1].trim();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Try balanced brace extraction
|
|
34
|
+
const firstBrace = text.indexOf("{");
|
|
35
|
+
if (firstBrace === -1) return null;
|
|
36
|
+
|
|
37
|
+
let depth = 0;
|
|
38
|
+
let lastBrace = -1;
|
|
39
|
+
for (let i = firstBrace; i < text.length; i++) {
|
|
40
|
+
if (text[i] === "{") depth++;
|
|
41
|
+
else if (text[i] === "}") {
|
|
42
|
+
depth--;
|
|
43
|
+
if (depth === 0) {
|
|
44
|
+
lastBrace = i;
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (lastBrace === -1) return null;
|
|
51
|
+
return text.substring(firstBrace, lastBrace + 1);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function createLlmClient(config: LlmClientConfig): LlmClient {
|
|
55
|
+
const client = new OpenAI({
|
|
56
|
+
apiKey: config.apiKey,
|
|
57
|
+
baseURL: config.baseURL,
|
|
58
|
+
timeout: config.timeoutMs ?? 30000,
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
async completeJson<T>(prompt: string): Promise<T | null> {
|
|
63
|
+
try {
|
|
64
|
+
const response = await client.chat.completions.create({
|
|
65
|
+
model: config.model,
|
|
66
|
+
messages: [
|
|
67
|
+
{
|
|
68
|
+
role: "system",
|
|
69
|
+
content:
|
|
70
|
+
"You are a memory extraction assistant. Always respond with valid JSON only.",
|
|
71
|
+
},
|
|
72
|
+
{ role: "user", content: prompt },
|
|
73
|
+
],
|
|
74
|
+
temperature: 0.1,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const raw = response.choices?.[0]?.message?.content;
|
|
78
|
+
if (!raw) return null;
|
|
79
|
+
|
|
80
|
+
const jsonStr = extractJsonFromResponse(raw);
|
|
81
|
+
if (!jsonStr) return null;
|
|
82
|
+
|
|
83
|
+
return JSON.parse(jsonStr) as T;
|
|
84
|
+
} catch (err) {
|
|
85
|
+
// Graceful degradation — return null so caller can fall back
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export { extractJsonFromResponse };
|