persyst-mcp 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/extract-worker.js +387 -0
- package/bin/extract.js +185 -0
- package/bin/ingest.js +82 -0
- package/bin/init.js +174 -0
- package/bin/setup.js +9 -4
- package/hooks/persyst-hook.js +195 -10
- package/index.js +20 -0
- package/package.json +9 -3
- package/src/database.js +84 -16
- package/src/extractor-heuristic.js +250 -0
- package/src/search.js +31 -10
- package/src/server.js +1 -1
- package/src/tools.js +40 -26
package/src/database.js
CHANGED
|
@@ -72,6 +72,16 @@ try {
|
|
|
72
72
|
db.exec('ALTER TABLE memories ADD COLUMN assertion_time INTEGER DEFAULT (unixepoch())');
|
|
73
73
|
} catch (e) { /* Column already exists */ }
|
|
74
74
|
|
|
75
|
+
// --- Migration: add namespace column for per-agent isolation ---
|
|
76
|
+
try {
|
|
77
|
+
db.exec("ALTER TABLE memories ADD COLUMN namespace TEXT DEFAULT 'shared'");
|
|
78
|
+
} catch (e) { /* Column already exists */ }
|
|
79
|
+
|
|
80
|
+
// --- Index on namespace for fast filtered queries ---
|
|
81
|
+
try {
|
|
82
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_memories_namespace ON memories (namespace)');
|
|
83
|
+
} catch (e) { /* Index already exists */ }
|
|
84
|
+
|
|
75
85
|
// --- Contradictions table ---
|
|
76
86
|
db.exec(`
|
|
77
87
|
CREATE TABLE IF NOT EXISTS contradictions (
|
|
@@ -208,7 +218,7 @@ console.error('[persyst] Schema initialized ✓');
|
|
|
208
218
|
const stmts = {
|
|
209
219
|
// -- Insert --
|
|
210
220
|
insertMemory: db.prepare(
|
|
211
|
-
'INSERT INTO memories (content, importance_score) VALUES (?, ?)'
|
|
221
|
+
'INSERT INTO memories (content, importance_score, namespace) VALUES (?, ?, ?)'
|
|
212
222
|
),
|
|
213
223
|
insertVec: db.prepare(
|
|
214
224
|
'INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)'
|
|
@@ -246,15 +256,24 @@ const stmts = {
|
|
|
246
256
|
getById: db.prepare(
|
|
247
257
|
'SELECT * FROM memories WHERE id = ? AND valid_until IS NULL'
|
|
248
258
|
),
|
|
259
|
+
getByIdNs: db.prepare(
|
|
260
|
+
"SELECT * FROM memories WHERE id = ? AND (namespace = ? OR namespace = 'shared') AND valid_until IS NULL"
|
|
261
|
+
),
|
|
249
262
|
getAnyById: db.prepare(
|
|
250
263
|
'SELECT * FROM memories WHERE id = ?'
|
|
251
264
|
),
|
|
252
265
|
getRecent: db.prepare(
|
|
253
266
|
'SELECT * FROM memories WHERE valid_until IS NULL ORDER BY created_at DESC LIMIT ?'
|
|
254
267
|
),
|
|
268
|
+
getRecentNs: db.prepare(
|
|
269
|
+
"SELECT * FROM memories WHERE (namespace = ? OR namespace = 'shared') AND valid_until IS NULL ORDER BY created_at DESC LIMIT ?"
|
|
270
|
+
),
|
|
255
271
|
getImportant: db.prepare(
|
|
256
272
|
'SELECT * FROM memories WHERE valid_until IS NULL ORDER BY importance_score DESC LIMIT ?'
|
|
257
273
|
),
|
|
274
|
+
getImportantNs: db.prepare(
|
|
275
|
+
"SELECT * FROM memories WHERE (namespace = ? OR namespace = 'shared') AND valid_until IS NULL ORDER BY importance_score DESC LIMIT ?"
|
|
276
|
+
),
|
|
258
277
|
getProvenance: db.prepare(
|
|
259
278
|
'SELECT * FROM provenance WHERE memory_id = ?'
|
|
260
279
|
),
|
|
@@ -353,6 +372,9 @@ const stmts = {
|
|
|
353
372
|
findMemoryByContent: db.prepare(
|
|
354
373
|
'SELECT id FROM memories WHERE content = ? AND valid_until IS NULL LIMIT 1'
|
|
355
374
|
),
|
|
375
|
+
findMemoryByContentNs: db.prepare(
|
|
376
|
+
"SELECT id FROM memories WHERE content = ? AND (namespace = ? OR namespace = 'shared') AND valid_until IS NULL LIMIT 1"
|
|
377
|
+
),
|
|
356
378
|
|
|
357
379
|
// -- Hash-prefix lookup for git dedup (Bug 1 fix) --
|
|
358
380
|
findMemoryByHashPrefix: db.prepare(
|
|
@@ -363,6 +385,14 @@ const stmts = {
|
|
|
363
385
|
getActiveMemoryCount: db.prepare(
|
|
364
386
|
'SELECT COUNT(*) as count FROM memories WHERE valid_until IS NULL'
|
|
365
387
|
),
|
|
388
|
+
getActiveMemoryCountNs: db.prepare(
|
|
389
|
+
"SELECT COUNT(*) as count FROM memories WHERE (namespace = ? OR namespace = 'shared') AND valid_until IS NULL"
|
|
390
|
+
),
|
|
391
|
+
|
|
392
|
+
// -- Namespace stats --
|
|
393
|
+
getNamespaceStats: db.prepare(
|
|
394
|
+
'SELECT namespace, COUNT(*) as count FROM memories WHERE valid_until IS NULL GROUP BY namespace ORDER BY count DESC'
|
|
395
|
+
),
|
|
366
396
|
|
|
367
397
|
// -- Memory History Chain (Feature 6: prepared statements) --
|
|
368
398
|
getContradictionAncestors: db.prepare(
|
|
@@ -380,10 +410,14 @@ const stmts = {
|
|
|
380
410
|
|
|
381
411
|
/**
|
|
382
412
|
* Insert a new memory into the memories table and log its provenance.
|
|
413
|
+
* @param {string} content - Memory content
|
|
414
|
+
* @param {number} importance - Importance score (0-1)
|
|
415
|
+
* @param {Object} provenanceInfo - Provenance metadata
|
|
416
|
+
* @param {string} namespace - Namespace for agent isolation (default: 'shared')
|
|
383
417
|
* @returns {number} The new memory's ID
|
|
384
418
|
*/
|
|
385
|
-
export function insertMemory(content, importance = 1.0, provenanceInfo = null) {
|
|
386
|
-
const result = stmts.insertMemory.run(content, importance);
|
|
419
|
+
export function insertMemory(content, importance = 1.0, provenanceInfo = null, namespace = 'shared') {
|
|
420
|
+
const result = stmts.insertMemory.run(content, importance, namespace || 'shared');
|
|
387
421
|
const id = Number(result.lastInsertRowid);
|
|
388
422
|
|
|
389
423
|
// Provenance Info handling
|
|
@@ -412,13 +446,16 @@ export function insertVector(id, embedding) {
|
|
|
412
446
|
|
|
413
447
|
/**
|
|
414
448
|
* Get a memory by ID. Boosts its importance on access.
|
|
449
|
+
* @param {number} id - Memory ID
|
|
450
|
+
* @param {string|null} namespace - Namespace filter (null = no filter)
|
|
415
451
|
* @returns {object|null} The memory row, or null if not found
|
|
416
452
|
*/
|
|
417
|
-
export function getMemory(id) {
|
|
418
|
-
const memory =
|
|
453
|
+
export function getMemory(id, namespace = null) {
|
|
454
|
+
const memory = namespace
|
|
455
|
+
? stmts.getByIdNs.get(id, namespace)
|
|
456
|
+
: stmts.getById.get(id);
|
|
419
457
|
if (memory) {
|
|
420
458
|
boostMemory(id);
|
|
421
|
-
// Fetch and link provenance info
|
|
422
459
|
const prov = getProvenance(id);
|
|
423
460
|
memory.provenance = prov;
|
|
424
461
|
}
|
|
@@ -439,10 +476,14 @@ export function getAnyMemoryById(id) {
|
|
|
439
476
|
|
|
440
477
|
/**
|
|
441
478
|
* Get a memory by ID WITHOUT boosting. Used internally for search results.
|
|
479
|
+
* @param {number} id - Memory ID
|
|
480
|
+
* @param {string|null} namespace - Namespace filter (null = no filter)
|
|
442
481
|
* @returns {object|null} The memory row, or null if not found
|
|
443
482
|
*/
|
|
444
|
-
export function getMemoryById(id) {
|
|
445
|
-
const memory =
|
|
483
|
+
export function getMemoryById(id, namespace = null) {
|
|
484
|
+
const memory = namespace
|
|
485
|
+
? stmts.getByIdNs.get(id, namespace)
|
|
486
|
+
: stmts.getById.get(id);
|
|
446
487
|
if (memory) {
|
|
447
488
|
memory.provenance = getProvenance(id);
|
|
448
489
|
}
|
|
@@ -480,9 +521,13 @@ export function deleteMemory(id) {
|
|
|
480
521
|
|
|
481
522
|
/**
|
|
482
523
|
* Get the N most recently created memories.
|
|
524
|
+
* @param {number} limit - Max results
|
|
525
|
+
* @param {string|null} namespace - Namespace filter (null = all)
|
|
483
526
|
*/
|
|
484
|
-
export function getRecentMemories(limit = 10) {
|
|
485
|
-
const rows =
|
|
527
|
+
export function getRecentMemories(limit = 10, namespace = null) {
|
|
528
|
+
const rows = namespace
|
|
529
|
+
? stmts.getRecentNs.all(namespace, limit)
|
|
530
|
+
: stmts.getRecent.all(limit);
|
|
486
531
|
rows.forEach(r => {
|
|
487
532
|
r.provenance = getProvenance(r.id);
|
|
488
533
|
});
|
|
@@ -491,9 +536,13 @@ export function getRecentMemories(limit = 10) {
|
|
|
491
536
|
|
|
492
537
|
/**
|
|
493
538
|
* Get the N most important memories (by importance_score).
|
|
539
|
+
* @param {number} limit - Max results
|
|
540
|
+
* @param {string|null} namespace - Namespace filter (null = all)
|
|
494
541
|
*/
|
|
495
|
-
export function getImportantMemories(limit = 10) {
|
|
496
|
-
const rows =
|
|
542
|
+
export function getImportantMemories(limit = 10, namespace = null) {
|
|
543
|
+
const rows = namespace
|
|
544
|
+
? stmts.getImportantNs.all(namespace, limit)
|
|
545
|
+
: stmts.getImportant.all(limit);
|
|
497
546
|
rows.forEach(r => {
|
|
498
547
|
r.provenance = getProvenance(r.id);
|
|
499
548
|
});
|
|
@@ -620,9 +669,13 @@ export function getMemoriesByEntity(entityId) {
|
|
|
620
669
|
* Check if a memory with exact content already exists.
|
|
621
670
|
* Used for deduplication.
|
|
622
671
|
* @param {string} content - Exact content to match
|
|
672
|
+
* @param {string|null} namespace - Namespace filter (null = global dedup)
|
|
623
673
|
* @returns {boolean}
|
|
624
674
|
*/
|
|
625
|
-
export function memoryExists(content) {
|
|
675
|
+
export function memoryExists(content, namespace = null) {
|
|
676
|
+
if (namespace) {
|
|
677
|
+
return stmts.findMemoryByContentNs.get(content, namespace) !== undefined;
|
|
678
|
+
}
|
|
626
679
|
return stmts.findMemoryByContent.get(content) !== undefined;
|
|
627
680
|
}
|
|
628
681
|
|
|
@@ -638,12 +691,24 @@ export function memoryExistsByHashPrefix(pattern) {
|
|
|
638
691
|
|
|
639
692
|
/**
|
|
640
693
|
* Get count of active (non-archived) memories.
|
|
694
|
+
* @param {string|null} namespace - Namespace filter (null = all)
|
|
641
695
|
* @returns {number}
|
|
642
696
|
*/
|
|
643
|
-
export function getActiveMemoryCount() {
|
|
697
|
+
export function getActiveMemoryCount(namespace = null) {
|
|
698
|
+
if (namespace) {
|
|
699
|
+
return stmts.getActiveMemoryCountNs.get(namespace).count;
|
|
700
|
+
}
|
|
644
701
|
return stmts.getActiveMemoryCount.get().count;
|
|
645
702
|
}
|
|
646
703
|
|
|
704
|
+
/**
|
|
705
|
+
* Get namespace breakdown stats.
|
|
706
|
+
* @returns {Array<{namespace: string, count: number}>}
|
|
707
|
+
*/
|
|
708
|
+
export function getNamespaceStats() {
|
|
709
|
+
return stmts.getNamespaceStats.all();
|
|
710
|
+
}
|
|
711
|
+
|
|
647
712
|
// ============================================================
|
|
648
713
|
// DEDUPLICATION BY EXACT CONTENT
|
|
649
714
|
// ============================================================
|
|
@@ -651,10 +716,13 @@ export function getActiveMemoryCount() {
|
|
|
651
716
|
/**
|
|
652
717
|
* Find memory by exact content.
|
|
653
718
|
* @param {string} content
|
|
719
|
+
* @param {string|null} namespace - Namespace filter (null = global)
|
|
654
720
|
* @returns {object|null} The memory row, or null if not found
|
|
655
721
|
*/
|
|
656
|
-
export function getMemoryByContent(content) {
|
|
657
|
-
const row =
|
|
722
|
+
export function getMemoryByContent(content, namespace = null) {
|
|
723
|
+
const row = namespace
|
|
724
|
+
? stmts.findMemoryByContentNs.get(content, namespace)
|
|
725
|
+
: stmts.findMemoryByContent.get(content);
|
|
658
726
|
return row ? getMemoryById(row.id) : null;
|
|
659
727
|
}
|
|
660
728
|
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* extractor-heuristic.js — Tier 2: Zero-Cost Regex-Based Fact Extractor
|
|
3
|
+
*
|
|
4
|
+
* Scans raw conversation text for explicit developer preference signals:
|
|
5
|
+
* "I prefer...", "we decided...", "always use...", "stack includes..."
|
|
6
|
+
*
|
|
7
|
+
* Design decisions:
|
|
8
|
+
* - Runs synchronously — zero latency overhead on the hot path
|
|
9
|
+
* - Conservative extraction: high-precision, low-recall
|
|
10
|
+
* - Returns structured facts with confidence scores (0.0 - 1.0)
|
|
11
|
+
* - Deduplication-ready: facts are normalized before output
|
|
12
|
+
*
|
|
13
|
+
* This is NOT the primary extraction tier. It's a lightweight safety net
|
|
14
|
+
* that catches the most obvious signals when Tier 3 (LLM) is unavailable
|
|
15
|
+
* or still processing asynchronously.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
// ============================================================
|
|
19
|
+
// PATTERN DEFINITIONS
|
|
20
|
+
// Ordered by specificity — most specific patterns first
|
|
21
|
+
// Each pattern has: regex, category, confidence, and a template
|
|
22
|
+
// to normalize the matched text into a clean fact statement.
|
|
23
|
+
// ============================================================
|
|
24
|
+
|
|
25
|
+
const PATTERNS = [
|
|
26
|
+
// --- Decision patterns (highest confidence) ---
|
|
27
|
+
{
|
|
28
|
+
regex: /(?:we|i|the team)\s+(?:have\s+)?decided\s+(?:to\s+)?(?:use|go\s+with|adopt|switch\s+to|move\s+to)\s+(.+?)(?:\.|$)/gi,
|
|
29
|
+
category: 'decision',
|
|
30
|
+
confidence: 0.85,
|
|
31
|
+
template: (match) => `Decision: ${cleanFact(match[1])}`
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
regex: /(?:we(?:'re|\s+are)?\s+)?(?:going|moving)\s+(?:to\s+)?(?:use|adopt|switch\s+to|migrate\s+to)\s+(.+?)(?:\s+(?:for|because|since|as)\b|\.|$)/gi,
|
|
35
|
+
category: 'decision',
|
|
36
|
+
confidence: 0.80,
|
|
37
|
+
template: (match) => `Decision: Moving to ${cleanFact(match[1])}`
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
// --- Explicit preference patterns ---
|
|
41
|
+
{
|
|
42
|
+
regex: /i\s+(?:always\s+)?prefer\s+(.+?)(?:\s+(?:over|instead\s+of|rather\s+than)\s+(.+?))?(?:\.|$)/gi,
|
|
43
|
+
category: 'preference',
|
|
44
|
+
confidence: 0.80,
|
|
45
|
+
template: (match) => {
|
|
46
|
+
const pref = cleanFact(match[1]);
|
|
47
|
+
const alt = match[2] ? ` over ${cleanFact(match[2])}` : '';
|
|
48
|
+
return `Preference: ${pref}${alt}`;
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
regex: /(?:we|i)\s+(?:should\s+)?(?:always|never)\s+(?:use|avoid|include|add|write|create)\s+(.+?)(?:\.|$)/gi,
|
|
53
|
+
category: 'preference',
|
|
54
|
+
confidence: 0.75,
|
|
55
|
+
template: (match) => `Rule: ${cleanFact(match[0])}`
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
// --- Stack / technology patterns ---
|
|
59
|
+
{
|
|
60
|
+
regex: /(?:our|the|my)\s+(?:tech\s+)?stack\s+(?:includes?|uses?|is|has)\s+(.+?)(?:\.\s|\.$|$)/gim,
|
|
61
|
+
category: 'stack',
|
|
62
|
+
confidence: 0.85,
|
|
63
|
+
template: (match) => `Stack: ${cleanFact(match[1])}`
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
regex: /(?:we(?:'re|\s+are)?\s+)?using\s+(.+?)\s+(?:for|as)\s+(?:our|the)\s+(.+?)(?:\.|$)/gi,
|
|
67
|
+
category: 'stack',
|
|
68
|
+
confidence: 0.80,
|
|
69
|
+
template: (match) => `Stack: Using ${cleanFact(match[1])} for ${cleanFact(match[2])}`
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
regex: /(?:our|the)\s+(?:backend|frontend|database|api|server|client|infra(?:structure)?)\s+(?:is|uses?|runs?\s+on)\s+(.+?)(?:\.|$)/gi,
|
|
73
|
+
category: 'stack',
|
|
74
|
+
confidence: 0.80,
|
|
75
|
+
template: (match) => `Stack: ${cleanFact(match[0])}`
|
|
76
|
+
},
|
|
77
|
+
|
|
78
|
+
// --- Naming / convention patterns ---
|
|
79
|
+
{
|
|
80
|
+
regex: /(?:name|call|rename)\s+(?:it|this|the\s+\w+)\s+["'`]?(\w[\w\-\.]+)["'`]?/gi,
|
|
81
|
+
category: 'naming',
|
|
82
|
+
confidence: 0.70,
|
|
83
|
+
template: (match) => `Naming: ${cleanFact(match[0])}`
|
|
84
|
+
},
|
|
85
|
+
|
|
86
|
+
// --- Architecture patterns ---
|
|
87
|
+
{
|
|
88
|
+
regex: /(?:the\s+)?(?:project|app|application|system|architecture)\s+(?:follows?|uses?|is\s+based\s+on|implements?)\s+(.+?)(?:\s+pattern|\s+architecture)?(?:\.|$)/gi,
|
|
89
|
+
category: 'architecture',
|
|
90
|
+
confidence: 0.80,
|
|
91
|
+
template: (match) => `Architecture: ${cleanFact(match[1])}`
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
// --- Coding rule / style patterns ---
|
|
95
|
+
{
|
|
96
|
+
regex: /(?:always|never|must|should|don't|do\s+not)\s+(?:use|write|create|add|include|put|place|keep)\s+(.+?)(?:\.|$)/gi,
|
|
97
|
+
category: 'rule',
|
|
98
|
+
confidence: 0.70,
|
|
99
|
+
template: (match) => `Rule: ${cleanFact(match[0])}`
|
|
100
|
+
},
|
|
101
|
+
|
|
102
|
+
// --- Config / env patterns ---
|
|
103
|
+
{
|
|
104
|
+
regex: /(?:set|change|update|configure)\s+(?:the\s+)?(?:port|host|env|environment|config|setting)\s+(?:to|=|:)\s*["'`]?(.+?)["'`]?(?:\.|$)/gi,
|
|
105
|
+
category: 'config',
|
|
106
|
+
confidence: 0.75,
|
|
107
|
+
template: (match) => `Config: ${cleanFact(match[0])}`
|
|
108
|
+
}
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
// ============================================================
|
|
112
|
+
// NOISE FILTERS
|
|
113
|
+
// Skip lines that look like code, errors, or system output
|
|
114
|
+
// ============================================================
|
|
115
|
+
|
|
116
|
+
const NOISE_PATTERNS = [
|
|
117
|
+
/^[\s]*(?:import|export|const|let|var|function|class|if|else|for|while|return|throw|try|catch)\s/,
|
|
118
|
+
/^[\s]*[{}\[\]();]/,
|
|
119
|
+
/^[\s]*\/\//,
|
|
120
|
+
/^[\s]*\*/,
|
|
121
|
+
/^[\s]*```/,
|
|
122
|
+
/^\s*$/,
|
|
123
|
+
/^(?:error|warning|info|debug|trace):/i,
|
|
124
|
+
/^\s*at\s+\w+/, // stack trace lines
|
|
125
|
+
/^[A-Z_]{2,}=/, // ENV variable assignments
|
|
126
|
+
/^\d{4}-\d{2}-\d{2}/, // timestamp lines
|
|
127
|
+
];
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Check if a line looks like noise (code, logs, etc.)
|
|
131
|
+
* @param {string} line
|
|
132
|
+
* @returns {boolean}
|
|
133
|
+
*/
|
|
134
|
+
function isNoiseLine(line) {
|
|
135
|
+
return NOISE_PATTERNS.some(p => p.test(line));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ============================================================
|
|
139
|
+
// FACT NORMALIZATION
|
|
140
|
+
// ============================================================
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Clean and normalize an extracted fact string.
|
|
144
|
+
* Removes trailing punctuation, excess whitespace, and truncates.
|
|
145
|
+
* @param {string} raw
|
|
146
|
+
* @returns {string}
|
|
147
|
+
*/
|
|
148
|
+
function cleanFact(raw) {
|
|
149
|
+
if (!raw) return '';
|
|
150
|
+
return raw
|
|
151
|
+
.trim()
|
|
152
|
+
.replace(/[\s]+/g, ' ') // collapse whitespace
|
|
153
|
+
.replace(/[,;:]+$/, '') // strip trailing punctuation
|
|
154
|
+
.replace(/^["'`]+|["'`]+$/g, '') // strip quotes
|
|
155
|
+
.slice(0, 200); // hard max fact length
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ============================================================
|
|
159
|
+
// MAIN EXTRACTION FUNCTION
|
|
160
|
+
// ============================================================
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Extract facts from raw conversation text using regex heuristics.
|
|
164
|
+
*
|
|
165
|
+
* @param {string} text - Raw conversation text (user prompt or full turn)
|
|
166
|
+
* @param {Object} [options={}]
|
|
167
|
+
* @param {number} [options.minConfidence=0.65] - Minimum confidence to include a fact
|
|
168
|
+
* @param {number} [options.maxFacts=10] - Maximum facts to extract per call
|
|
169
|
+
* @returns {Array<{content: string, category: string, confidence: number}>}
|
|
170
|
+
*
|
|
171
|
+
* @example
|
|
172
|
+
* const facts = extractHeuristic("I prefer Postgres over SQLite for our backend database.");
|
|
173
|
+
* // => [{ content: "Preference: Postgres over SQLite", category: "preference", confidence: 0.80 }]
|
|
174
|
+
*/
|
|
175
|
+
export function extractHeuristic(text, options = {}) {
|
|
176
|
+
const {
|
|
177
|
+
minConfidence = 0.65,
|
|
178
|
+
maxFacts = 10
|
|
179
|
+
} = options;
|
|
180
|
+
|
|
181
|
+
if (!text || typeof text !== 'string' || text.length < 10) {
|
|
182
|
+
return [];
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const facts = [];
|
|
186
|
+
const seen = new Set(); // dedup by normalized content
|
|
187
|
+
|
|
188
|
+
// Process line-by-line to filter noise
|
|
189
|
+
const lines = text.split('\n');
|
|
190
|
+
const cleanLines = lines.filter(line => !isNoiseLine(line));
|
|
191
|
+
const cleanText = cleanLines.join('\n');
|
|
192
|
+
|
|
193
|
+
for (const pattern of PATTERNS) {
|
|
194
|
+
// Reset regex state for global matching
|
|
195
|
+
pattern.regex.lastIndex = 0;
|
|
196
|
+
|
|
197
|
+
let match;
|
|
198
|
+
while ((match = pattern.regex.exec(cleanText)) !== null) {
|
|
199
|
+
// Skip matches that are too short to be meaningful
|
|
200
|
+
if (match[0].length < 8) continue;
|
|
201
|
+
|
|
202
|
+
try {
|
|
203
|
+
const content = pattern.template(match);
|
|
204
|
+
if (!content || content.length < 5) continue;
|
|
205
|
+
|
|
206
|
+
// Normalize for dedup
|
|
207
|
+
const key = content.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
208
|
+
if (seen.has(key)) continue;
|
|
209
|
+
seen.add(key);
|
|
210
|
+
|
|
211
|
+
if (pattern.confidence >= minConfidence) {
|
|
212
|
+
facts.push({
|
|
213
|
+
content,
|
|
214
|
+
category: pattern.category,
|
|
215
|
+
confidence: pattern.confidence
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (facts.length >= maxFacts) break;
|
|
220
|
+
} catch (_) {
|
|
221
|
+
// Template execution failed — skip this match
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if (facts.length >= maxFacts) break;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Sort by confidence descending
|
|
230
|
+
facts.sort((a, b) => b.confidence - a.confidence);
|
|
231
|
+
|
|
232
|
+
return facts;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Quick check: does this text contain any extractable signals?
|
|
237
|
+
* Cheaper than running full extraction — use as a gate.
|
|
238
|
+
*
|
|
239
|
+
* @param {string} text
|
|
240
|
+
* @returns {boolean}
|
|
241
|
+
*/
|
|
242
|
+
export function hasExtractableSignals(text) {
|
|
243
|
+
if (!text || text.length < 10) return false;
|
|
244
|
+
|
|
245
|
+
for (const pattern of PATTERNS) {
|
|
246
|
+
pattern.regex.lastIndex = 0;
|
|
247
|
+
if (pattern.regex.test(text)) return true;
|
|
248
|
+
}
|
|
249
|
+
return false;
|
|
250
|
+
}
|
package/src/search.js
CHANGED
|
@@ -19,6 +19,8 @@ import { generateEmbedding } from './embeddings.js';
|
|
|
19
19
|
import { createAttestation } from './attestation.js';
|
|
20
20
|
import { searchCache, LRUCache } from './cache.js';
|
|
21
21
|
|
|
22
|
+
let lastDataVersion = 0;
|
|
23
|
+
|
|
22
24
|
/**
|
|
23
25
|
* Search memories using both keyword and semantic strategies.
|
|
24
26
|
* Results are cached in the LRU cache for repeated queries.
|
|
@@ -29,9 +31,21 @@ import { searchCache, LRUCache } from './cache.js';
|
|
|
29
31
|
* @param {string|null} sessionId - Session identifier
|
|
30
32
|
* @returns {Promise<Array>} Ranked search results (with .attestation property attached)
|
|
31
33
|
*/
|
|
32
|
-
export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
|
|
34
|
+
export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null, namespace = null) {
|
|
35
|
+
// Sync in-memory cache with external DB changes using sqlite data_version
|
|
36
|
+
try {
|
|
37
|
+
const currentDataVersion = db.pragma('data_version', { simple: true });
|
|
38
|
+
if (currentDataVersion !== lastDataVersion) {
|
|
39
|
+
searchCache.invalidate();
|
|
40
|
+
lastDataVersion = currentDataVersion;
|
|
41
|
+
}
|
|
42
|
+
} catch (_) {
|
|
43
|
+
// Fallback if pragma fails
|
|
44
|
+
}
|
|
45
|
+
|
|
33
46
|
// --- Check LRU cache first (Feature 1) ---
|
|
34
|
-
|
|
47
|
+
// Include namespace in cache key to prevent cross-namespace cache hits
|
|
48
|
+
const cacheKey = LRUCache.key(`${namespace || 'all'}:${queryText}`, limit);
|
|
35
49
|
const cached = searchCache.get(cacheKey);
|
|
36
50
|
if (cached) {
|
|
37
51
|
console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
|
|
@@ -80,11 +94,12 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
80
94
|
}
|
|
81
95
|
}
|
|
82
96
|
|
|
83
|
-
// --- Step 4: Fetch full details, apply reputation adjust, sort and return top N ---
|
|
97
|
+
// --- Step 4: Fetch full details, apply namespace filter, reputation adjust, sort and return top N ---
|
|
84
98
|
const finalResults = combined
|
|
85
99
|
.map(r => {
|
|
86
|
-
|
|
87
|
-
|
|
100
|
+
// Use namespace-aware getMemoryById to filter by agent namespace
|
|
101
|
+
const memory = getMemoryById(r.id, namespace);
|
|
102
|
+
if (!memory) return null; // Memory was archived, deleted, or not in namespace
|
|
88
103
|
|
|
89
104
|
// Boost memory access metrics
|
|
90
105
|
boostMemory(r.id);
|
|
@@ -223,9 +238,9 @@ function jaccardSimilarity(a, b) {
|
|
|
223
238
|
* @param {string|null} agentId - Querying agent identifier
|
|
224
239
|
* @param {string|null} sessionId - Current session ID
|
|
225
240
|
*/
|
|
226
|
-
export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null) {
|
|
227
|
-
// 1. Run hybrid search to fetch top 20 memories
|
|
228
|
-
const searchHits = await searchHybrid(queryText, 20, agentId, sessionId);
|
|
241
|
+
export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null) {
|
|
242
|
+
// 1. Run hybrid search to fetch top 20 memories (namespace-aware)
|
|
243
|
+
const searchHits = await searchHybrid(queryText, 20, agentId, sessionId, namespace);
|
|
229
244
|
const candidates = new Map();
|
|
230
245
|
|
|
231
246
|
for (const hit of searchHits) {
|
|
@@ -343,8 +358,14 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
343
358
|
* Performs memory consolidation by merging highly similar memories.
|
|
344
359
|
* Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
|
|
345
360
|
*/
|
|
346
|
-
export async function consolidateMemories() {
|
|
347
|
-
|
|
361
|
+
export async function consolidateMemories(namespace = null) {
|
|
362
|
+
// Only consolidate within namespace boundaries to prevent cross-agent merging
|
|
363
|
+
const query = namespace
|
|
364
|
+
? "SELECT * FROM memories WHERE valid_until IS NULL AND (namespace = ? OR namespace = 'shared')"
|
|
365
|
+
: 'SELECT * FROM memories WHERE valid_until IS NULL';
|
|
366
|
+
const activeMemories = namespace
|
|
367
|
+
? db.prepare(query).all(namespace)
|
|
368
|
+
: db.prepare(query).all();
|
|
348
369
|
const consolidated = [];
|
|
349
370
|
const visited = new Set();
|
|
350
371
|
|