@mem-weave/server 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/dist/cli-entry.js +49 -0
- package/dist/cli.js +53 -0
- package/dist/commands/backup.js +28 -0
- package/dist/commands/doctor.js +108 -0
- package/dist/commands/help.js +29 -0
- package/dist/commands/index.js +27 -0
- package/dist/commands/init.js +58 -0
- package/dist/commands/migrate.js +25 -0
- package/dist/commands/start.js +29 -0
- package/dist/commands/status.js +19 -0
- package/dist/commands/stop.js +46 -0
- package/dist/commands/version.js +21 -0
- package/dist/core/config.js +161 -0
- package/dist/core/decay.js +50 -0
- package/dist/core/types.js +72 -0
- package/dist/db/database.js +58 -0
- package/dist/db/repositories/access-log-repo.js +59 -0
- package/dist/db/repositories/consolidation-run-repo.js +86 -0
- package/dist/db/repositories/device-repo.js +66 -0
- package/dist/db/repositories/edge-repo.js +104 -0
- package/dist/db/repositories/memory-repo.js +294 -0
- package/dist/db/repositories/observation-repo.js +65 -0
- package/dist/db/repositories/session-repo.js +81 -0
- package/dist/db/repositories/stats-repo.js +92 -0
- package/dist/db/repositories/vector-repo.js +55 -0
- package/dist/db/schema.js +185 -0
- package/dist/injection/bundler.js +39 -0
- package/dist/injection/formatter.js +23 -0
- package/dist/prompts/compression.js +43 -0
- package/dist/prompts/edge-extract.js +21 -0
- package/dist/prompts/value-gate.js +27 -0
- package/dist/providers/embedding/index.js +36 -0
- package/dist/providers/embedding/local-xenova.js +166 -0
- package/dist/providers/embedding/noop.js +40 -0
- package/dist/providers/embedding/openai-compatible.js +46 -0
- package/dist/providers/llm/index.js +12 -0
- package/dist/providers/llm/noop.js +5 -0
- package/dist/providers/llm/openai.js +45 -0
- package/dist/rest/routes/consolidation.js +62 -0
- package/dist/rest/routes/devices.js +47 -0
- package/dist/rest/routes/injection.js +76 -0
- package/dist/rest/routes/memories.js +349 -0
- package/dist/rest/routes/observations.js +29 -0
- package/dist/rest/routes/sessions.js +37 -0
- package/dist/rest/routes/settings.js +25 -0
- package/dist/rest/routes/stats.js +15 -0
- package/dist/retrieval/bm25-search.js +91 -0
- package/dist/retrieval/causal-chain.js +197 -0
- package/dist/retrieval/fusion.js +48 -0
- package/dist/retrieval/graph-traversal.js +144 -0
- package/dist/retrieval/search-engine.js +150 -0
- package/dist/retrieval/vector-search.js +91 -0
- package/dist/server/auth.js +80 -0
- package/dist/server/bootstrap.js +28 -0
- package/dist/server/http.js +77 -0
- package/dist/server/logger.js +36 -0
- package/dist/server/rate-limiter.js +81 -0
- package/dist/server/scheduler.js +99 -0
- package/dist/workers/association.js +41 -0
- package/dist/workers/compressor.js +14 -0
- package/dist/workers/consolidator.js +201 -0
- package/dist/workers/embedder.js +102 -0
- package/dist/workers/graph-worker.js +166 -0
- package/dist/workers/value-gate.js +38 -0
- package/package.json +40 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { transaction } from '../db/database.js';
|
|
2
|
+
import { logger } from '../server/logger.js';
|
|
3
|
+
/**
|
|
4
|
+
* Process-wide mutex: only one consolidation may run at a time.
|
|
5
|
+
* Without this, a manual `POST /api/v1/consolidate` triggered while
|
|
6
|
+
* the scheduler is also running would double-evict / double-promote
|
|
7
|
+
* the same memories and produce two `consolidation_runs` rows that
|
|
8
|
+
* share work. The scheduler AGENTS.md says "NEVER run two
|
|
9
|
+
* consolidations concurrently" — this enforces it.
|
|
10
|
+
*
|
|
11
|
+
* Not perfect (a second node wouldn't see this flag), but for the
|
|
12
|
+
* v1 single-node deployment the project targets, it's sufficient.
|
|
13
|
+
*/
|
|
14
|
+
let consolidationInFlight = false;
|
|
15
|
+
export function isConsolidationRunning() {
|
|
16
|
+
return consolidationInFlight;
|
|
17
|
+
}
|
|
18
|
+
export function runConsolidation(db, tenantId, options = {}) {
|
|
19
|
+
if (consolidationInFlight) {
|
|
20
|
+
return {
|
|
21
|
+
promoted: 0,
|
|
22
|
+
evicted: 0,
|
|
23
|
+
merged: 0,
|
|
24
|
+
promotedIds: [],
|
|
25
|
+
evictedIds: [],
|
|
26
|
+
mergedPairs: [],
|
|
27
|
+
summary: 'Skipped: another consolidation is already running.'
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
consolidationInFlight = true;
|
|
31
|
+
try {
|
|
32
|
+
return runConsolidationInner(db, tenantId, options);
|
|
33
|
+
}
|
|
34
|
+
finally {
|
|
35
|
+
consolidationInFlight = false;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
function runConsolidationInner(db, tenantId, options) {
|
|
39
|
+
const result = {
|
|
40
|
+
promoted: 0,
|
|
41
|
+
evicted: 0,
|
|
42
|
+
merged: 0,
|
|
43
|
+
promotedIds: [],
|
|
44
|
+
evictedIds: [],
|
|
45
|
+
mergedPairs: [],
|
|
46
|
+
summary: ''
|
|
47
|
+
};
|
|
48
|
+
const DAY = 24 * 60 * 60 * 1000;
|
|
49
|
+
try {
|
|
50
|
+
const now = Date.now();
|
|
51
|
+
// 1. Evict short-term: strength < 0.1 AND age > 7 days AND 0 access
|
|
52
|
+
const toEvict = db.prepare(`
|
|
53
|
+
SELECT id FROM memories
|
|
54
|
+
WHERE tenant_id = ? AND tier = 'short' AND deleted_at IS NULL
|
|
55
|
+
AND strength < 0.1 AND access_count = 0
|
|
56
|
+
AND (? - created_at) > ?
|
|
57
|
+
`).all(tenantId, now, 7 * DAY);
|
|
58
|
+
if (!options.dryRun && toEvict.length > 0) {
|
|
59
|
+
transaction(db, () => {
|
|
60
|
+
const stmt = db.prepare('UPDATE memories SET deleted_at = ?, eviction_reason = ? WHERE id = ?');
|
|
61
|
+
for (const row of toEvict) {
|
|
62
|
+
stmt.run(now, 'low_strength_old_age', row.id);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
result.evicted = toEvict.length;
|
|
67
|
+
result.evictedIds = toEvict.map((r) => r.id);
|
|
68
|
+
// 2. Promote short→medium: accessed >= 3 times recently OR importance >= 7
|
|
69
|
+
const toPromote = db.prepare(`
|
|
70
|
+
SELECT id FROM memories
|
|
71
|
+
WHERE tenant_id = ? AND tier = 'short' AND deleted_at IS NULL
|
|
72
|
+
AND ((access_count >= 3 AND (? - last_accessed_at) < ?) OR importance >= 7)
|
|
73
|
+
`).all(tenantId, now, 7 * DAY);
|
|
74
|
+
if (!options.dryRun && toPromote.length > 0) {
|
|
75
|
+
transaction(db, () => {
|
|
76
|
+
const stmt = db.prepare('UPDATE memories SET tier = ? WHERE id = ?');
|
|
77
|
+
for (const row of toPromote) {
|
|
78
|
+
stmt.run('medium', row.id);
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
result.promoted = toPromote.length;
|
|
83
|
+
result.promotedIds = toPromote.map((r) => r.id);
|
|
84
|
+
// 3. Merge near-duplicates. Reuses the Jaccard-on-concepts logic
|
|
85
|
+
// that `MemoryRepo.create` uses, but operating on existing rows.
|
|
86
|
+
// For each pair (a, b) where a.id < b.id AND Jaccard(concepts) >= 0.8
|
|
87
|
+
// AND same type AND same tenant, keep the older (a) and absorb b.
|
|
88
|
+
//
|
|
89
|
+
// The same threshold is used as the live dedup gate, so a memory
|
|
90
|
+
// that slipped through during a high-throughput write (e.g.
|
|
91
|
+
// Jaccard = 0.75 because the new save only had partial concepts)
|
|
92
|
+
// will still be merged here if the operator later enriches the
|
|
93
|
+
// older memory's concepts.
|
|
94
|
+
if (!options.dryRun) {
|
|
95
|
+
const merged = mergeNearDuplicates(db, tenantId, now);
|
|
96
|
+
result.merged = merged.length;
|
|
97
|
+
result.mergedPairs = merged;
|
|
98
|
+
}
|
|
99
|
+
result.summary =
|
|
100
|
+
`Evicted ${result.evicted}, promoted ${result.promoted}, merged ${result.merged} pairs`;
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
logger.error({ err }, 'consolidation failed');
|
|
104
|
+
}
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Find pairs of near-duplicate memories (Jaccard on concepts >= 0.8,
|
|
109
|
+
* same type) and absorb the newer one into the older. Returns the list
|
|
110
|
+
* of [survivorId, absorbedId] pairs.
|
|
111
|
+
*
|
|
112
|
+
* This is the *background* counterpart to the live dedup gate in
|
|
113
|
+
* `MemoryRepo.create`. They use the same threshold and the same Jaccard
|
|
114
|
+
* formula, so behavior is consistent.
|
|
115
|
+
*/
|
|
116
|
+
function mergeNearDuplicates(db, tenantId, now) {
|
|
117
|
+
// Load all live memories for this tenant. In a real production system
|
|
118
|
+
// we'd page this; for v1 with O(thousands) memories per tenant, an
|
|
119
|
+
// in-memory O(n^2) pass is acceptable (a few seconds at most).
|
|
120
|
+
const allMemories = db.prepare(`
|
|
121
|
+
SELECT id, type, content, summary, importance, confidence,
|
|
122
|
+
concepts_json, files_json, created_at
|
|
123
|
+
FROM memories
|
|
124
|
+
WHERE tenant_id = ? AND deleted_at IS NULL
|
|
125
|
+
ORDER BY created_at ASC
|
|
126
|
+
`).all(tenantId);
|
|
127
|
+
const JACCARD_THRESHOLD = 0.8;
|
|
128
|
+
const SURVIVOR_BENEFIT = 0.05; // bump survivor's strength on absorb
|
|
129
|
+
const merged = [];
|
|
130
|
+
const absorbed = new Set();
|
|
131
|
+
for (let i = 0; i < allMemories.length; i++) {
|
|
132
|
+
const a = allMemories[i];
|
|
133
|
+
if (absorbed.has(a.id))
|
|
134
|
+
continue;
|
|
135
|
+
const aConcepts = new Set(JSON.parse(a.concepts_json).map((c) => c.toLowerCase()));
|
|
136
|
+
if (aConcepts.size === 0)
|
|
137
|
+
continue;
|
|
138
|
+
for (let j = i + 1; j < allMemories.length; j++) {
|
|
139
|
+
const b = allMemories[j];
|
|
140
|
+
if (absorbed.has(b.id))
|
|
141
|
+
continue;
|
|
142
|
+
if (a.type !== b.type)
|
|
143
|
+
continue;
|
|
144
|
+
const bConcepts = new Set(JSON.parse(b.concepts_json).map((c) => c.toLowerCase()));
|
|
145
|
+
const jaccard = jaccardSimilarity(aConcepts, bConcepts);
|
|
146
|
+
if (jaccard < JACCARD_THRESHOLD)
|
|
147
|
+
continue;
|
|
148
|
+
// a is older (ORDER BY created_at ASC) — keep a, absorb b.
|
|
149
|
+
// b's concepts/files are unioned into a; b is soft-deleted.
|
|
150
|
+
const mergedConcepts = Array.from(new Set([
|
|
151
|
+
...aConcepts,
|
|
152
|
+
...bConcepts
|
|
153
|
+
]));
|
|
154
|
+
const mergedFiles = Array.from(new Set([
|
|
155
|
+
...JSON.parse(a.files_json),
|
|
156
|
+
...JSON.parse(b.files_json)
|
|
157
|
+
]));
|
|
158
|
+
transaction(db, () => {
|
|
159
|
+
// Boost the survivor.
|
|
160
|
+
db.prepare(`
|
|
161
|
+
UPDATE memories
|
|
162
|
+
SET concepts_json = ?,
|
|
163
|
+
concepts_text = ?,
|
|
164
|
+
files_json = ?,
|
|
165
|
+
importance = MAX(importance, ?),
|
|
166
|
+
access_count = access_count + 1,
|
|
167
|
+
last_reinforced_at = ?,
|
|
168
|
+
reinforcement_score = min(1, reinforcement_score + ?),
|
|
169
|
+
strength = min(1, strength + ?),
|
|
170
|
+
updated_at = ?
|
|
171
|
+
WHERE id = ?
|
|
172
|
+
`).run(JSON.stringify(mergedConcepts), mergedConcepts.join(' '), JSON.stringify(mergedFiles), b.importance, now, SURVIVOR_BENEFIT, SURVIVOR_BENEFIT, now, a.id);
|
|
173
|
+
// Soft-delete the absorbed.
|
|
174
|
+
db.prepare(`
|
|
175
|
+
UPDATE memories
|
|
176
|
+
SET deleted_at = ?, eviction_reason = ?
|
|
177
|
+
WHERE id = ?
|
|
178
|
+
`).run(now, 'merged_into_consolidation', b.id);
|
|
179
|
+
// Audit log.
|
|
180
|
+
db.prepare(`
|
|
181
|
+
INSERT INTO consolidation_runs (id, tenant_id, started_at, ended_at, summary, dry_run)
|
|
182
|
+
SELECT ?, ?, ?, ?, ?, 0
|
|
183
|
+
WHERE NOT EXISTS (SELECT 1 FROM consolidation_runs WHERE id = ?)
|
|
184
|
+
`).run(`merge-${a.id}-${b.id}-${now}`, tenantId, now, now, `merged ${b.id} -> ${a.id}`, `merge-${a.id}-${b.id}-${now}`);
|
|
185
|
+
});
|
|
186
|
+
absorbed.add(b.id);
|
|
187
|
+
merged.push([a.id, b.id]);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return merged;
|
|
191
|
+
}
|
|
192
|
+
function jaccardSimilarity(a, b) {
|
|
193
|
+
if (a.size === 0 && b.size === 0)
|
|
194
|
+
return 0;
|
|
195
|
+
let intersection = 0;
|
|
196
|
+
for (const x of a)
|
|
197
|
+
if (b.has(x))
|
|
198
|
+
intersection++;
|
|
199
|
+
const union = a.size + b.size - intersection;
|
|
200
|
+
return union === 0 ? 0 : intersection / union;
|
|
201
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { openDatabase } from '../db/database.js';
|
|
2
|
+
import { VectorRepo } from '../db/repositories/vector-repo.js';
|
|
3
|
+
import { logger } from '../server/logger.js';
|
|
4
|
+
/**
|
|
5
|
+
* Background embedder worker.
|
|
6
|
+
*
|
|
7
|
+
* Polls the `memories` table for rows that don't yet have a corresponding
|
|
8
|
+
* entry in the vec0 table, generates embeddings via the configured provider,
|
|
9
|
+
* and writes them to the vec table.
|
|
10
|
+
*
|
|
11
|
+
* When the embedding provider is `noop` (the default), this is a no-op for
|
|
12
|
+
* actual embeddings but the loop still runs so the structure is exercised.
|
|
13
|
+
*/
|
|
14
|
+
export function startEmbedderWorker(options) {
|
|
15
|
+
const tenantId = options.tenantId ?? 'tenant_default';
|
|
16
|
+
const batchSize = options.batchSize ?? 16;
|
|
17
|
+
const interval = options.intervalMs ?? 30_000;
|
|
18
|
+
let stopped = false;
|
|
19
|
+
let timer = null;
|
|
20
|
+
async function runOnce() {
|
|
21
|
+
const db = openDatabase(options.dbPath, { vectorDimensions: options.dimensions });
|
|
22
|
+
const result = { embedded: 0, skipped: 0, failed: 0, timestamp: Date.now() };
|
|
23
|
+
try {
|
|
24
|
+
const vecRepo = new VectorRepo(db, options.dimensions);
|
|
25
|
+
// Find candidate memories that lack a vector entry
|
|
26
|
+
const vecTable = `memory_vectors_${options.dimensions}`;
|
|
27
|
+
const candidates = db.prepare(`
|
|
28
|
+
SELECT m.id, m.tenant_id, m.title, m.content, m.summary, m.concepts_text
|
|
29
|
+
FROM memories m
|
|
30
|
+
LEFT JOIN ${vecTable} v ON v.memory_id = m.id
|
|
31
|
+
WHERE m.tenant_id = ? AND m.deleted_at IS NULL AND v.memory_id IS NULL
|
|
32
|
+
ORDER BY m.created_at ASC
|
|
33
|
+
LIMIT ?
|
|
34
|
+
`).all(tenantId, batchSize);
|
|
35
|
+
if (candidates.length > 0) {
|
|
36
|
+
const texts = candidates.map((m) => [m.title, m.summary, m.content, m.concepts_text ?? ''].filter(Boolean).join('\n'));
|
|
37
|
+
try {
|
|
38
|
+
const vectors = await options.provider.embedBatch(texts);
|
|
39
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
40
|
+
const m = candidates[i];
|
|
41
|
+
const v = vectors[i];
|
|
42
|
+
if (!v) {
|
|
43
|
+
result.failed++;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
vecRepo.upsert(m.id, m.tenant_id, v);
|
|
48
|
+
result.embedded++;
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
result.failed++;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch (err) {
|
|
56
|
+
logger.error({ err: err.message }, 'batch embedding failed');
|
|
57
|
+
result.failed = candidates.length;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
finally {
|
|
62
|
+
db.close();
|
|
63
|
+
}
|
|
64
|
+
options.onRun?.(result);
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
function schedule() {
|
|
68
|
+
if (stopped)
|
|
69
|
+
return;
|
|
70
|
+
timer = setTimeout(async () => {
|
|
71
|
+
try {
|
|
72
|
+
await runOnce();
|
|
73
|
+
}
|
|
74
|
+
catch (err) {
|
|
75
|
+
logger.error({ err }, 'run failed');
|
|
76
|
+
}
|
|
77
|
+
schedule();
|
|
78
|
+
}, interval);
|
|
79
|
+
}
|
|
80
|
+
if (options.signal) {
|
|
81
|
+
if (options.signal.aborted)
|
|
82
|
+
stopped = true;
|
|
83
|
+
else
|
|
84
|
+
options.signal.addEventListener('abort', () => {
|
|
85
|
+
stopped = true;
|
|
86
|
+
if (timer)
|
|
87
|
+
clearTimeout(timer);
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
if (options.runOnStart) {
|
|
91
|
+
void runOnce();
|
|
92
|
+
}
|
|
93
|
+
schedule();
|
|
94
|
+
return {
|
|
95
|
+
stop() {
|
|
96
|
+
stopped = true;
|
|
97
|
+
if (timer)
|
|
98
|
+
clearTimeout(timer);
|
|
99
|
+
},
|
|
100
|
+
runNow: runOnce
|
|
101
|
+
};
|
|
102
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { openDatabase } from '../db/database.js';
|
|
2
|
+
import { EdgeRepo } from '../db/repositories/edge-repo.js';
|
|
3
|
+
import { MemoryRepo } from '../db/repositories/memory-repo.js';
|
|
4
|
+
import { logger } from '../server/logger.js';
|
|
5
|
+
/**
|
|
6
|
+
* Background graph worker (edge discovery).
|
|
7
|
+
*
|
|
8
|
+
* Periodically:
|
|
9
|
+
* 1. Pull a batch of recent memories that have no outgoing edges yet.
|
|
10
|
+
* 2. For each, ask the LLM (via the edge-extract prompt) what edges it
|
|
11
|
+
* should form with OTHER existing memories.
|
|
12
|
+
* 3. Persist the resulting edges.
|
|
13
|
+
*
|
|
14
|
+
* In v1, we use a simple heuristic + LLM call to find candidates:
|
|
15
|
+
* - Use the noop or real LLM to extract edge candidates from the new
|
|
16
|
+
* memory's title + content.
|
|
17
|
+
* - Resolve `targetHint` to an existing memory by title or concept match.
|
|
18
|
+
*
|
|
19
|
+
* The worker never overwrites existing edges (skips when an edge of the same
|
|
20
|
+
* `(from, to, type)` already exists).
|
|
21
|
+
*/
|
|
22
|
+
export function startGraphWorker(options) {
|
|
23
|
+
const tenantId = options.tenantId ?? 'tenant_default';
|
|
24
|
+
const batchSize = options.batchSize ?? 32;
|
|
25
|
+
const interval = options.intervalMs ?? 60 * 60 * 1000;
|
|
26
|
+
let stopped = false;
|
|
27
|
+
let timer = null;
|
|
28
|
+
async function runOnce() {
|
|
29
|
+
const result = { scanned: 0, edgesCreated: 0, timestamp: Date.now() };
|
|
30
|
+
const db = openDatabase(options.dbPath);
|
|
31
|
+
try {
|
|
32
|
+
const memRepo = new MemoryRepo(db);
|
|
33
|
+
const edgeRepo = new EdgeRepo(db);
|
|
34
|
+
// Find recent memories that have no outgoing edges yet
|
|
35
|
+
const candidates = db.prepare(`
|
|
36
|
+
SELECT m.id, m.tenant_id, m.type, m.title, m.content, m.summary, m.concepts_json
|
|
37
|
+
FROM memories m
|
|
38
|
+
LEFT JOIN edges e ON e.from_memory_id = m.id
|
|
39
|
+
WHERE m.tenant_id = ? AND m.deleted_at IS NULL
|
|
40
|
+
GROUP BY m.id
|
|
41
|
+
HAVING COUNT(e.id) = 0
|
|
42
|
+
ORDER BY m.created_at DESC
|
|
43
|
+
LIMIT ?
|
|
44
|
+
`).all(tenantId, batchSize);
|
|
45
|
+
result.scanned = candidates.length;
|
|
46
|
+
// For each candidate, look up potential targets by simple keyword overlap
|
|
47
|
+
// (title contains a concept or vice versa). This is a simple heuristic to
|
|
48
|
+
// avoid burning LLM tokens on every pair; the LLM is only called to judge
|
|
49
|
+
// whether the candidate pair actually has a typed relationship.
|
|
50
|
+
for (const mem of candidates) {
|
|
51
|
+
const targets = findCandidateTargets(db, tenantId, mem);
|
|
52
|
+
for (const target of targets) {
|
|
53
|
+
if (target.id === mem.id)
|
|
54
|
+
continue;
|
|
55
|
+
const llmCandidates = await extractEdgesViaLlm(options.llm, mem, target);
|
|
56
|
+
for (const cand of llmCandidates) {
|
|
57
|
+
if (cand.confidence < 0.6)
|
|
58
|
+
continue;
|
|
59
|
+
try {
|
|
60
|
+
edgeRepo.create({
|
|
61
|
+
tenantId: mem.tenant_id,
|
|
62
|
+
fromMemoryId: mem.id,
|
|
63
|
+
toMemoryId: target.id,
|
|
64
|
+
type: cand.type,
|
|
65
|
+
strength: cand.confidence,
|
|
66
|
+
reason: cand.reason
|
|
67
|
+
});
|
|
68
|
+
result.edgesCreated++;
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
// ignore duplicate / FK errors silently
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
db.close();
|
|
79
|
+
}
|
|
80
|
+
options.onRun?.(result);
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
83
|
+
function schedule() {
|
|
84
|
+
if (stopped)
|
|
85
|
+
return;
|
|
86
|
+
timer = setTimeout(async () => {
|
|
87
|
+
try {
|
|
88
|
+
await runOnce();
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
logger.error({ err }, 'graph-worker run failed');
|
|
92
|
+
}
|
|
93
|
+
schedule();
|
|
94
|
+
}, interval);
|
|
95
|
+
}
|
|
96
|
+
if (options.signal) {
|
|
97
|
+
if (options.signal.aborted)
|
|
98
|
+
stopped = true;
|
|
99
|
+
else
|
|
100
|
+
options.signal.addEventListener('abort', () => {
|
|
101
|
+
stopped = true;
|
|
102
|
+
if (timer)
|
|
103
|
+
clearTimeout(timer);
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
if (options.runOnStart) {
|
|
107
|
+
void runOnce();
|
|
108
|
+
}
|
|
109
|
+
schedule();
|
|
110
|
+
return {
|
|
111
|
+
stop() {
|
|
112
|
+
stopped = true;
|
|
113
|
+
if (timer)
|
|
114
|
+
clearTimeout(timer);
|
|
115
|
+
},
|
|
116
|
+
runNow: runOnce
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
function findCandidateTargets(db, tenantId, mem) {
|
|
120
|
+
// Cheap heuristic: pull up to 10 most recent memories of the same tenant
|
|
121
|
+
// whose title shares at least one word (>=4 chars) with the new memory.
|
|
122
|
+
// This is intentionally simple — the LLM call is the source of truth for
|
|
123
|
+
// whether a relationship actually exists.
|
|
124
|
+
const allRows = db.prepare(`
|
|
125
|
+
SELECT id, title, summary, concepts_json
|
|
126
|
+
FROM memories
|
|
127
|
+
WHERE tenant_id = ? AND id != ? AND deleted_at IS NULL
|
|
128
|
+
ORDER BY created_at DESC
|
|
129
|
+
LIMIT 50
|
|
130
|
+
`).all(tenantId, mem.id);
|
|
131
|
+
const memWords = tokenize(`${mem.title} ${mem.summary}`);
|
|
132
|
+
return allRows.filter((r) => {
|
|
133
|
+
const rWords = tokenize(`${r.title} ${r.summary}`);
|
|
134
|
+
return memWords.some((w) => rWords.includes(w));
|
|
135
|
+
}).slice(0, 10);
|
|
136
|
+
}
|
|
137
|
+
function tokenize(text) {
|
|
138
|
+
return text.toLowerCase().split(/[^a-z0-9\u4e00-\u9fff]+/i)
|
|
139
|
+
.filter((w) => w.length >= 4);
|
|
140
|
+
}
|
|
141
|
+
async function extractEdgesViaLlm(llm, newMemory, target) {
|
|
142
|
+
const newConcepts = JSON.parse(newMemory.concepts_json);
|
|
143
|
+
const targetConcepts = JSON.parse(target.concepts_json);
|
|
144
|
+
const prompt = `New memory:\nTitle: ${newMemory.title}\nContent: ${newMemory.content}\nConcepts: ${newConcepts.join(', ')}\n\nExisting memory:\nTitle: ${target.title}\nSummary: ${target.summary}\nConcepts: ${targetConcepts.join(', ')}`;
|
|
145
|
+
const raw = await llm.call('You are a relationship extraction engine. Given a new memory and an existing memory, return a JSON array of relationships (each with targetMemoryId="__target__", type, reason, confidence). Output [] if no relationship exists.', prompt);
|
|
146
|
+
if (!raw.trim())
|
|
147
|
+
return [];
|
|
148
|
+
try {
|
|
149
|
+
const parsed = JSON.parse(raw);
|
|
150
|
+
if (!Array.isArray(parsed))
|
|
151
|
+
return [];
|
|
152
|
+
const validTypes = new Set([
|
|
153
|
+
'causes', 'enables', 'contradicts', 'supersedes', 'references',
|
|
154
|
+
'related_to', 'before', 'after', 'duplicates', 'refines'
|
|
155
|
+
]);
|
|
156
|
+
return parsed
|
|
157
|
+
.filter((e) => typeof e.type === 'string' && typeof e.reason === 'string' && typeof e.confidence === 'number')
|
|
158
|
+
.filter((e) => validTypes.has(e.type))
|
|
159
|
+
.map((e) => ({ type: e.type, reason: e.reason, confidence: e.confidence }));
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
return [];
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// Helper to satisfy unused-warning
|
|
166
|
+
void null;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
const REMEMBER_PATTERNS = [
|
|
2
|
+
/记住/i, /记住这个/i, /以后遇到/i, /记住.*偏好/i,
|
|
3
|
+
/这个是我的偏好/i, /这个方案确定了/i, /以后记住/i
|
|
4
|
+
];
|
|
5
|
+
const DECISION_PATTERNS = [
|
|
6
|
+
/我们就用/i, /决定.*用/i, /选择.*而不是/i, /不用.*了/i,
|
|
7
|
+
/采用/i, /使用.*方案/i, /确定.*架构/i
|
|
8
|
+
];
|
|
9
|
+
const FAILURE_KEYWORDS = ['error', 'fail', 'crash', 'exception', 'build failed', 'test failed'];
|
|
10
|
+
export function evaluateObservation(input) {
|
|
11
|
+
const combined = [
|
|
12
|
+
input.userPrompt || '',
|
|
13
|
+
input.toolOutput || '',
|
|
14
|
+
input.error || ''
|
|
15
|
+
].join('\n').toLowerCase();
|
|
16
|
+
// Check for explicit "remember" requests
|
|
17
|
+
for (const pattern of REMEMBER_PATTERNS) {
|
|
18
|
+
if (pattern.test(input.userPrompt || '')) {
|
|
19
|
+
return { shouldCreateMemory: true, reason: 'User explicitly asked to remember', suggestedTypes: ['fact', 'preference'], priority: 'high' };
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// Check for decisions
|
|
23
|
+
for (const pattern of DECISION_PATTERNS) {
|
|
24
|
+
if (pattern.test(input.userPrompt || '')) {
|
|
25
|
+
return { shouldCreateMemory: true, reason: 'Architectural decision detected', suggestedTypes: ['decision'], priority: 'high' };
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// Check for tool failures
|
|
29
|
+
if (input.hookType === 'post_tool_use' && input.toolName === 'Bash' && FAILURE_KEYWORDS.some(k => combined.includes(k))) {
|
|
30
|
+
return { shouldCreateMemory: true, reason: 'Tool failure detected', suggestedTypes: ['bug'], priority: 'high' };
|
|
31
|
+
}
|
|
32
|
+
// Check for prompt_submit with substantive content
|
|
33
|
+
if (input.hookType === 'prompt_submit' && input.userPrompt && input.userPrompt.length > 50) {
|
|
34
|
+
return { shouldCreateMemory: true, reason: 'Substantive user prompt', suggestedTypes: ['event'], priority: 'medium' };
|
|
35
|
+
}
|
|
36
|
+
// Default: reject routine operations
|
|
37
|
+
return { shouldCreateMemory: false, reason: 'Routine operation, no memory value', suggestedTypes: [], priority: 'low' };
|
|
38
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mem-weave/server",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "MemWeave local-first memory infrastructure for AI agents: structured memory, 4-layer retrieval (BM25 + vector + graph + causal), token-budgeted injection, server-side write deduplication, and background consolidation. Server process: Fastify REST API + CLI.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/cli-entry.js",
|
|
7
|
+
"types": "./dist/cli-entry.d.ts",
|
|
8
|
+
"bin": {
|
|
9
|
+
"memweave": "dist/cli-entry.js"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"dist",
|
|
13
|
+
"README.md"
|
|
14
|
+
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "tsc -p tsconfig.json",
|
|
17
|
+
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
18
|
+
"clean": "rm -rf dist"
|
|
19
|
+
},
|
|
20
|
+
"engines": {
|
|
21
|
+
"node": ">=20.0.0"
|
|
22
|
+
},
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"@fastify/cors": "11.2.0",
|
|
25
|
+
"@fastify/static": "^9.1.3",
|
|
26
|
+
"better-sqlite3": "12.10.0",
|
|
27
|
+
"fastify": "5.8.5",
|
|
28
|
+
"jsonc-parser": "3.3.1",
|
|
29
|
+
"pino": "10.3.1",
|
|
30
|
+
"sqlite-vec": "^0.1.9",
|
|
31
|
+
"zod": "4.4.3"
|
|
32
|
+
},
|
|
33
|
+
"optionalDependencies": {
|
|
34
|
+
"@xenova/transformers": "*"
|
|
35
|
+
},
|
|
36
|
+
"publishConfig": {
|
|
37
|
+
"access": "public"
|
|
38
|
+
},
|
|
39
|
+
"license": "MIT"
|
|
40
|
+
}
|