persyst-mcp 2.2.5 → 2.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/database.js CHANGED
@@ -63,27 +63,32 @@ db.exec(`
63
63
  `);
64
64
 
65
65
  // --- Migrations for bi-temporal validity on existing tables ---
66
- try {
66
+ function columnExists(table, name) {
67
+ const info = db.prepare(`PRAGMA table_info(${table})`).all();
68
+ return info.some(col => col.name === name);
69
+ }
70
+
71
+ if (!columnExists('memories', 'valid_from')) {
67
72
  db.exec('ALTER TABLE memories ADD COLUMN valid_from INTEGER DEFAULT (unixepoch())');
68
- } catch (e) { /* Column already exists */ }
73
+ }
69
74
 
70
- try {
75
+ if (!columnExists('memories', 'valid_until')) {
71
76
  db.exec('ALTER TABLE memories ADD COLUMN valid_until INTEGER DEFAULT NULL');
72
- } catch (e) { /* Column already exists */ }
77
+ }
73
78
 
74
- try {
79
+ if (!columnExists('memories', 'assertion_time')) {
75
80
  db.exec('ALTER TABLE memories ADD COLUMN assertion_time INTEGER DEFAULT (unixepoch())');
76
- } catch (e) { /* Column already exists */ }
81
+ }
77
82
 
78
83
  // --- Migration: add namespace column for per-agent isolation ---
79
- try {
84
+ if (!columnExists('memories', 'namespace')) {
80
85
  db.exec("ALTER TABLE memories ADD COLUMN namespace TEXT DEFAULT 'shared'");
81
- } catch (e) { /* Column already exists */ }
86
+ }
82
87
 
83
88
  // --- Migration: add parent_id column for history tracing ---
84
- try {
89
+ if (!columnExists('memories', 'parent_id')) {
85
90
  db.exec('ALTER TABLE memories ADD COLUMN parent_id INTEGER DEFAULT NULL');
86
- } catch (e) { /* Column already exists */ }
91
+ }
87
92
 
88
93
  // --- Index on namespace for fast filtered queries ---
89
94
  try {
@@ -367,6 +372,11 @@ const stmts = {
367
372
  deleteEntity: db.prepare(
368
373
  'DELETE FROM entities WHERE id = ?'
369
374
  ),
375
+ deleteEdgesByEntity: db.prepare(
376
+ `DELETE FROM edges WHERE
377
+ (source_id = ? AND source_type = 'entity') OR
378
+ (target_id = ? AND target_type = 'entity')`
379
+ ),
370
380
 
371
381
  // -- Edges --
372
382
  insertEdge: db.prepare(
@@ -426,9 +436,171 @@ const stmts = {
426
436
  INSERT INTO watched_files (file_path, last_position)
427
437
  VALUES (?, ?)
428
438
  ON CONFLICT(file_path) DO UPDATE SET last_position = excluded.last_position, updated_at = unixepoch()
439
+ `),
440
+
441
+ // -- Internal lookups (pre-compiled for hot-loop use) --
442
+ getAttestationByHash: db.prepare(
443
+ 'SELECT * FROM attestations WHERE hash = ?'
444
+ ),
445
+ getMemoryParentId: db.prepare(
446
+ 'SELECT parent_id FROM memories WHERE id = ?'
447
+ ),
448
+ getMemoryChildren: db.prepare(
449
+ 'SELECT id FROM memories WHERE parent_id = ?'
450
+ ),
451
+ getMemoryContentById: db.prepare(
452
+ 'SELECT content FROM memories WHERE id = ?'
453
+ ),
454
+ getMemoryByIdRaw: db.prepare(
455
+ 'SELECT * FROM memories WHERE id = ? AND valid_until IS NULL'
456
+ ),
457
+ getMemoryLikeContent: db.prepare(
458
+ 'SELECT id FROM memories WHERE content LIKE ? AND valid_until IS NULL'
459
+ ),
460
+ getVecByRowId: db.prepare(
461
+ 'SELECT embedding FROM memories_vec WHERE rowid = ?'
462
+ ),
463
+ updateMemoryParentId: db.prepare(
464
+ 'UPDATE memories SET parent_id = ? WHERE id = ?'
465
+ ),
466
+ deleteProvenanceByMemoryId: db.prepare(
467
+ 'DELETE FROM provenance WHERE memory_id = ?'
468
+ ),
469
+ deleteContradictionsByMemoryId: db.prepare(
470
+ 'DELETE FROM contradictions WHERE old_memory_id = ? OR new_memory_id = ?'
471
+ ),
472
+ getReputationScore: db.prepare(
473
+ 'SELECT reputation_score FROM agent_stats WHERE agent_id = ?'
474
+ ),
475
+ updateProvenanceOwner: db.prepare(
476
+ "UPDATE provenance SET source_type = 'agent', source_id = ?, confidence = 1.0 WHERE memory_id = ?"
477
+ ),
478
+ archiveMemoryById: db.prepare(
479
+ 'UPDATE memories SET valid_until = unixepoch() WHERE id = ?'
480
+ ),
481
+ getEdgesBySourceAndType: db.prepare(`
482
+ SELECT * FROM edges
483
+ WHERE (source_id = ? AND source_type = ?)
484
+ OR (target_id = ? AND target_type = ?)
485
+ `),
486
+ getMemoriesByEntityEdges: db.prepare(`
487
+ SELECT * FROM edges
488
+ WHERE (source_id = ? AND source_type = 'entity' AND target_type = 'memory')
489
+ OR (target_id = ? AND target_type = 'entity' AND source_type = 'memory')
490
+ `),
491
+ consolidateVecSearch: db.prepare(`
492
+ SELECT rowid AS id, distance
493
+ FROM memories_vec
494
+ WHERE embedding MATCH ?
495
+ AND k = 30
496
+ `),
497
+ archiveAndInsertContradiction: db.prepare(
498
+ 'UPDATE memories SET valid_until = unixepoch() WHERE id = ?'
499
+ ),
500
+ archiveExpiredTransientMemories: db.prepare(`
501
+ UPDATE memories
502
+ SET valid_until = unixepoch()
503
+ WHERE valid_until IS NULL
504
+ AND (content LIKE 'Reminder:%' OR content LIKE 'Note:%')
505
+ AND (unixepoch() - created_at) > 1209600
429
506
  `)
430
507
  };
431
508
 
509
+ export { stmts };
510
+
511
+ // ============================================================
512
+ // SECRET DETECTION & REDACTION HELPERS
513
+ // ============================================================
514
+
515
+ /**
516
+ * Detects sensitive/credential patterns in a string and replaces values with [REDACTED].
517
+ * @param {string} content - The content to sanitize
518
+ * @returns {string} Sanitized content
519
+ */
520
+ export function redactSecrets(content) {
521
+ if (!content || typeof content !== 'string') return content;
522
+
523
+ let redacted = content;
524
+
525
+ // 1. Redact credentials in connection strings / URIs
526
+ // Matches scheme://user:pass@host and scheme://:pass@host
527
+ const connectionStringRegex = /\b([a-zA-Z0-9+.-]+:\/\/)([^/:\s]*):([^@/:\s]+)(@[^/\s]+)/gi;
528
+ redacted = redacted.replace(connectionStringRegex, (match, protocol, user, pass, host) => {
529
+ return protocol + user + ':[REDACTED]' + host;
530
+ });
531
+
532
+ // 2. Redact key-value pairs matching credentials (retaining key/operator, redacting value)
533
+ // Supports single-quoted, double-quoted, and unquoted values (non-whitespace).
534
+ const kvRegex = /['"]?\b(api[_-]?key|secret[_-]?key|secret|password|passwd|pwd|passphrase|auth[_-]?token|access[_-]?token|client[_-]?secret|private[_-]?key|auth|access|client|aws|gcp|google|stripe|github|openai|vercel|heroku|slack|ssh[_-]?(?:key|password|passphrase|pass)?|credential|aws_secret|secret_access_key|aws_access_key|ssh_passphrase|ssh_password|ssh_key_pass)\b['"]?\s*(?:key|token|secret|password|pwd|passwd|value|string|id)?(?:\b|(?<=['"]))\s*([:=]|is|of|to|set\s+to|\(|\buses\b)\s*(?:'([^']{6,2048})'|"([^"]{6,2048})"|([^\s]+(?:\n(?![a-zA-Z0-9_-]+\s*[:=])(?=[^\s]+(?:\n|$))[^\s]+)*))/gi;
535
+
536
+ redacted = redacted.replace(kvRegex, (match, key, op, sqVal, dqVal, uqVal) => {
537
+ const val = sqVal || dqVal || uqVal;
538
+ if (!val) return match;
539
+
540
+ // Strip trailing parenthesis if operator is '(' and value has trailing parenthesis
541
+ let cleanVal = val;
542
+ if (op === '(' && val.endsWith(')')) {
543
+ cleanVal = val.slice(0, -1);
544
+ }
545
+
546
+ const lastIdx = match.lastIndexOf(cleanVal);
547
+ if (lastIdx !== -1) {
548
+ return match.slice(0, lastIdx) + '[REDACTED]' + match.slice(lastIdx + cleanVal.length);
549
+ }
550
+ return match;
551
+ });
552
+
553
+ // 3. Redact standalone common API keys and tokens
554
+ const standalonePatterns = [
555
+ /\b(sk-[a-zA-Z0-9]{48})\b/g, // OpenAI
556
+ /\b(sk-proj-[a-zA-Z0-9-]{40,})\b/g, // OpenAI project
557
+ /\b(gh[pous]_[a-zA-Z0-9]{36,255})\b/g, // GitHub PAT/Fine-grained
558
+ /\b(xox[bapr]-[0-9]{12}-[a-zA-Z0-9]{24})\b/g, // Slack token
559
+ /\b(AIzaSy[A-Za-z0-9_-]{33})\b/g, // Google API key
560
+ /\b((?:sk|rk|pk)_(?:live|test)_[0-9a-zA-Z]{24,32})\b/g, // Stripe key
561
+ /\b(AKIA[0-9A-Z]{16,40})\b/gi, // AWS Access Key ID (case-insensitive)
562
+ /\b(ASCA[0-9A-Z]{16,40})\b/gi, // AWS ASCA Key
563
+ /\b(npm_[a-zA-Z0-9]{36,255})\b/g, // npm token
564
+ /\b(ey[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,})\b/g, // JWT token
565
+ /-----BEGIN[A-Z0-9\s_-]+PRIVATE\s+KEY[A-Z0-9\s_-]*-----\s*[\s\S]*?-----END[A-Z0-9\s_-]+PRIVATE\s+KEY[A-Z0-9\s_-]*-----/gi, // PEM private key
566
+ ];
567
+
568
+ for (const pattern of standalonePatterns) {
569
+ redacted = redacted.replace(pattern, '[REDACTED]');
570
+ }
571
+
572
+ // 4. Robust credential shape heuristic (independent of strict key-value punctuation)
573
+ const containsCredKeyword = /\b(password|passwd|pwd|passphrase|pass|secret|token|api|credential|auth|ssh|aws)\b/i.test(redacted);
574
+ if (containsCredKeyword) {
575
+ // Match password-like tokens: length 6 to 64, containing both letters and digits/symbols
576
+ const tokenRegex = /\b([a-zA-Z0-9_@#$%^&*+!~\-]{6,64})(?!\w)/g;
577
+
578
+ redacted = redacted.replace(tokenRegex, (match) => {
579
+ // Skip common query words and technical keywords
580
+ if (/^(password|passwd|pwd|passphrase|pass|secret|token|api|credential|auth|ssh|uses|with|key|from|here|what|have|this|that|your|same|then|want|more|base64|base64-like|base64-encoded|sha256|sha1|md5|aes256|aes128|utf8|utf-8|url|uri|ipv4|ipv6|http|https|sha-256|sha-1)$/i.test(match)) {
581
+ return match;
582
+ }
583
+
584
+ const hasLetter = /[a-zA-Z]/.test(match);
585
+ const hasDigitOrSpecialSymbol = /[0-9@#$%^&*+=!~]/.test(match);
586
+
587
+ if (hasLetter && hasDigitOrSpecialSymbol) {
588
+ // Require length >= 8 or containing special symbols/mixed case digits
589
+ const isStrongSecretCandidate = match.length >= 8 ||
590
+ (/[^a-zA-Z0-9_]/.test(match)) ||
591
+ (/[A-Z]/.test(match) && /[0-9]/.test(match));
592
+
593
+ if (isStrongSecretCandidate) {
594
+ return '[REDACTED]';
595
+ }
596
+ }
597
+ return match;
598
+ });
599
+ }
600
+
601
+ return redacted;
602
+ }
603
+
432
604
  // ============================================================
433
605
  // CRUD FUNCTIONS
434
606
  // Simple, one-purpose functions. No magic.
@@ -443,11 +615,12 @@ const stmts = {
443
615
  * @returns {number} The new memory's ID
444
616
  */
445
617
  export function insertMemory(content, importance = 1.0, provenanceInfo = null, namespace = 'shared', parentId = null) {
446
- if (content && content.length > 10000) {
618
+ const redactedContent = redactSecrets(content);
619
+ if (redactedContent && redactedContent.length > 10000) {
447
620
  throw new Error('Memory content exceeds maximum length of 10000 characters.');
448
621
  }
449
622
  const clampedImportance = Math.max(0.0, Math.min(1.0, Math.round(importance * 10000) / 10000));
450
- const result = stmts.insertMemory.run(content, clampedImportance, namespace || 'shared', parentId);
623
+ const result = stmts.insertMemory.run(redactedContent, clampedImportance, namespace || 'shared', parentId);
451
624
  const id = Number(result.lastInsertRowid);
452
625
 
453
626
  // Provenance Info handling
@@ -484,13 +657,12 @@ export function insertVector(id, embedding) {
484
657
  * @returns {object|null} The memory row, or null if not found
485
658
  */
486
659
  export function getMemory(id, namespace = null) {
487
- const memory = namespace
488
- ? stmts.getByIdNs.get(id, namespace)
489
- : stmts.getById.get(id);
660
+ const memory = (namespace === 'all' || namespace === null)
661
+ ? stmts.getById.get(id)
662
+ : stmts.getByIdNs.get(id, namespace);
490
663
  if (memory) {
491
664
  boostMemory(id);
492
- const prov = getProvenance(id);
493
- memory.provenance = prov;
665
+ memory.provenance = getProvenance(id);
494
666
  }
495
667
  return memory || null;
496
668
  }
@@ -514,10 +686,9 @@ export function getAnyMemoryById(id) {
514
686
  * @returns {object|null} The memory row, or null if not found
515
687
  */
516
688
  export function getMemoryById(id, namespace = null) {
517
- const ns = namespace || 'shared';
518
- const memory = ns === 'all'
689
+ const memory = (namespace === 'all' || namespace === null)
519
690
  ? stmts.getById.get(id)
520
- : stmts.getByIdNs.get(id, ns);
691
+ : stmts.getByIdNs.get(id, namespace);
521
692
  if (memory) {
522
693
  memory.provenance = getProvenance(id);
523
694
  }
@@ -530,7 +701,8 @@ export function getMemoryById(id, namespace = null) {
530
701
  * @returns {boolean} true if the memory existed and was updated
531
702
  */
532
703
  export function updateMemoryContent(id, content) {
533
- const result = stmts.updateContent.run(content, id);
704
+ const redactedContent = redactSecrets(content);
705
+ const result = stmts.updateContent.run(redactedContent, id);
534
706
  return result.changes > 0;
535
707
  }
536
708
 
@@ -550,8 +722,8 @@ export function deleteMemory(id) {
550
722
  stmts.deleteEdgesByMemory.run(id, id);
551
723
  deleteVec(id); // Remove vector first (no cascades on virtual tables)
552
724
  try {
553
- db.prepare('DELETE FROM provenance WHERE memory_id = ?').run(id);
554
- db.prepare('DELETE FROM contradictions WHERE old_memory_id = ? OR new_memory_id = ?').run(id, id);
725
+ stmts.deleteProvenanceByMemoryId.run(id);
726
+ stmts.deleteContradictionsByMemoryId.run(id, id);
555
727
  } catch (e) {
556
728
  console.error(`[persyst] Clean up provenance/contradictions error: ${e.message}`);
557
729
  }
@@ -698,6 +870,7 @@ export function getAllEntities(limit = 50) {
698
870
  * Delete an entity and its edges.
699
871
  */
700
872
  export function deleteEntity(id) {
873
+ stmts.deleteEdgesByEntity.run(id, id);
701
874
  stmts.deleteEntity.run(id);
702
875
  }
703
876
 
@@ -712,11 +885,7 @@ export function insertEdge(sourceId, targetId, relation, sourceType, targetType)
712
885
  * Get all memories linked to an entity.
713
886
  */
714
887
  export function getMemoriesByEntity(entityId) {
715
- const edges = db.prepare(`
716
- SELECT * FROM edges
717
- WHERE (source_id = ? AND source_type = 'entity' AND target_type = 'memory')
718
- OR (target_id = ? AND target_type = 'entity' AND source_type = 'memory')
719
- `).all(entityId, entityId);
888
+ const edges = stmts.getMemoriesByEntityEdges.all(entityId, entityId);
720
889
  const memoryIds = edges.map(e => e.source_type === 'memory' ? e.source_id : e.target_id);
721
890
  return memoryIds.map(id => stmts.getById.get(id)).filter(Boolean);
722
891
  }
@@ -779,7 +948,7 @@ export function getMemoryByContent(content, namespace = null) {
779
948
  const row = namespace
780
949
  ? stmts.findMemoryByContentNs.get(content, namespace)
781
950
  : stmts.findMemoryByContent.get(content);
782
- return row ? getMemoryById(row.id) : null;
951
+ return row ? getMemoryById(row.id, namespace) : null;
783
952
  }
784
953
 
785
954
  // ============================================================
@@ -797,7 +966,7 @@ export function logContradiction(oldMemoryId, newMemoryId, reason = '') {
797
966
  try {
798
967
  const parentId = Math.min(oldMemoryId, newMemoryId);
799
968
  const childId = Math.max(oldMemoryId, newMemoryId);
800
- db.prepare('UPDATE memories SET parent_id = ? WHERE id = ?').run(parentId, childId);
969
+ stmts.updateMemoryParentId.run(parentId, childId);
801
970
  } catch (e) {
802
971
  console.error(`[persyst] Failed to set parent_id on contradiction: ${e.message}`);
803
972
  }
@@ -910,13 +1079,13 @@ export function getMemoryHistoryChain(memoryId) {
910
1079
  versions.add(currentId);
911
1080
 
912
1081
  // 1. Find parent (ancestor) from memories table
913
- const row = db.prepare('SELECT parent_id FROM memories WHERE id = ?').get(currentId);
1082
+ const row = stmts.getMemoryParentId.get(currentId);
914
1083
  if (row && row.parent_id !== null) {
915
1084
  if (!versions.has(row.parent_id)) queue.push(row.parent_id);
916
1085
  }
917
1086
 
918
1087
  // 2. Find children (descendants) from memories table
919
- const children = db.prepare('SELECT id FROM memories WHERE parent_id = ?').all(currentId);
1088
+ const children = stmts.getMemoryChildren.all(currentId);
920
1089
  for (const child of children) {
921
1090
  if (!versions.has(child.id)) queue.push(child.id);
922
1091
  }
@@ -993,6 +1162,23 @@ export function upsertWatchPosition(filePath, position) {
993
1162
  // CLEANUP
994
1163
  // ============================================================
995
1164
 
1165
+ /**
1166
+ * Archive transient memories (reminders and notes) older than 14 days.
1167
+ * Returns the count of archived memories.
1168
+ */
1169
+ export function archiveExpiredMemories() {
1170
+ try {
1171
+ const info = stmts.archiveExpiredTransientMemories.run();
1172
+ if (info.changes > 0) {
1173
+ console.error(`[persyst] Archived ${info.changes} expired transient memories (Note/Reminder older than 14 days).`);
1174
+ }
1175
+ return info.changes;
1176
+ } catch (e) {
1177
+ console.error(`[persyst] Failed to archive expired memories: ${e.message}`);
1178
+ return 0;
1179
+ }
1180
+ }
1181
+
996
1182
  /**
997
1183
  * Close the database connection. Call on shutdown.
998
1184
  */
@@ -1001,4 +1187,9 @@ export function closeDatabase() {
1001
1187
  console.error('[persyst] Database closed');
1002
1188
  }
1003
1189
 
1190
+ // Run auto-expiry cleanup on database startup to prune transient bloat immediately
1191
+ try {
1192
+ archiveExpiredMemories();
1193
+ } catch (_) {}
1194
+
1004
1195
  export default db;
@@ -425,15 +425,18 @@ export function extractHeuristic(text, options = {}) {
425
425
  return [];
426
426
  }
427
427
 
428
+ // Strip all markdown fenced code blocks to prevent extracting facts from example code/logs
429
+ const cleanSourceText = text.replace(/```[\s\S]*?```/g, '');
430
+
428
431
  // --- Step 1: Explicit saves (highest priority, no filter) ---
429
- const explicitFacts = extractExplicitSaves(text);
432
+ const explicitFacts = extractExplicitSaves(cleanSourceText);
430
433
 
431
434
  // --- Step 2: Implicit pattern matching (filtered, tech-required) ---
432
435
  const implicitFacts = [];
433
436
  const seen = new Set(explicitFacts.map(f => f.content.toLowerCase().replace(/\s+/g, ' ').trim()));
434
437
 
435
438
  // Process line-by-line to filter code/noise
436
- const lines = text.split('\n');
439
+ const lines = cleanSourceText.split('\n');
437
440
  const cleanLines = lines.filter(line => !isNoiseLine(line));
438
441
  const cleanText = cleanLines.join('\n');
439
442
 
package/src/sdk.js CHANGED
@@ -101,17 +101,18 @@ export class Persyst {
101
101
  * @private
102
102
  */
103
103
  async _trackLibrary({ content, importance, agent_id, session_id, shared }) {
104
- const { insertMemory, insertVector } = await import('./database.js');
104
+ const { insertMemory, insertVector, redactSecrets } = await import('./database.js');
105
105
  const { generateEmbedding } = await import('./embeddings.js');
106
106
 
107
107
  const namespace = shared ? 'shared' : agent_id;
108
- const id = insertMemory(content, importance, {
108
+ const redactedContent = redactSecrets ? redactSecrets(content) : content;
109
+ const id = insertMemory(redactedContent, importance, {
109
110
  source_type: 'api',
110
111
  source_id: agent_id,
111
112
  confidence: 1.0
112
113
  }, namespace);
113
114
 
114
- const embedding = await generateEmbedding(content);
115
+ const embedding = await generateEmbedding(redactedContent);
115
116
  insertVector(id, embedding);
116
117
  return { success: true, id };
117
118
  }