sessionmem 1.0.6 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/adapters/capabilities/fallbackTools.js +2 -2
  2. package/dist/adapters/claudeMdInjector.js +49 -5
  3. package/dist/adapters/factory.js +68 -9
  4. package/dist/adapters/generic.js +147 -12
  5. package/dist/adapters/global/antigravity.js +14 -7
  6. package/dist/adapters/global/claudeCode.js +46 -10
  7. package/dist/adapters/global/codex.js +73 -13
  8. package/dist/adapters/global/qcoder.js +18 -5
  9. package/dist/adapters/ide/cline.js +56 -9
  10. package/dist/adapters/ide/cursor.js +15 -13
  11. package/dist/adapters/ide/installer.js +201 -8
  12. package/dist/adapters/ide/windsurf.js +14 -13
  13. package/dist/cli/commands/config.js +10 -1
  14. package/dist/cli/commands/import.js +6 -1
  15. package/dist/cli/commands/install.js +57 -16
  16. package/dist/cli/commands/ping.js +42 -8
  17. package/dist/cli/commands/reEmbed.js +4 -3
  18. package/dist/cli/commands/run.js +7 -17
  19. package/dist/cli/commands/savings.js +33 -17
  20. package/dist/cli/commands/sessionEnd.js +124 -0
  21. package/dist/cli/commands/sessionStart.js +52 -0
  22. package/dist/cli/commands/sync.js +39 -9
  23. package/dist/cli/commands/uninstall.js +35 -9
  24. package/dist/cli/context.js +17 -18
  25. package/dist/cli/index.js +16 -4
  26. package/dist/cli/projectId.js +69 -0
  27. package/dist/core/api/contracts.js +155 -42
  28. package/dist/core/api/errors.js +4 -7
  29. package/dist/core/api/memoryCoreService.js +319 -252
  30. package/dist/core/api/sessionLifecycleService.js +8 -0
  31. package/dist/core/config/policyConfig.js +33 -6
  32. package/dist/core/injection/formatStartupInjection.js +53 -9
  33. package/dist/core/retrieve/recencyBands.js +4 -1
  34. package/dist/core/retrieve/retrieveMemories.js +10 -8
  35. package/dist/core/schema/migrations/005_team_provenance.sql +5 -0
  36. package/dist/core/schema/migrations/006_access_pattern_boosting.sql +5 -0
  37. package/dist/core/schema/migrations/008_fts5_search.sql +6 -2
  38. package/dist/core/schema/migrations/009_session_events_unique.sql +24 -0
  39. package/dist/core/schema/runMigrations.js +64 -2
  40. package/dist/core/storage/memoryRepo.js +164 -7
  41. package/dist/core/storage/memorySearchRepo.js +45 -7
  42. package/dist/core/storage/sessionEventsRepo.js +15 -2
  43. package/dist/core/summarize/cloudSummarizer.js +15 -2
  44. package/dist/core/summarize/redaction.js +45 -8
  45. package/package.json +2 -2
@@ -111,6 +111,14 @@ export function createSessionLifecycleService(deps) {
111
111
  * Hard-deletes memories older than the effective retentionDays for this
112
112
  * project. retentionDays<=0 disables pruning. Any failure is swallowed
113
113
  * so it can never block or fail summarization.
114
+ *
115
+ * IMPORTANT: retention pruning fires INDEPENDENTLY of summarization. It runs
116
+ * on every handleSessionEnd path — including the `skipped_disabled`
117
+ * (autoSummarize=false) and `skipped_threshold` (too few events) early
118
+ * returns — because retention is a time-based policy that must apply whether
119
+ * or not a summary was produced this session. It is gated only by the
120
+ * retentionDays config (retentionDays<=0 disables it), not by whether the
121
+ * lifecycle proceeded past the summarization threshold.
114
122
  */
115
123
  function runLightPrune(projectId) {
116
124
  try {
@@ -1,7 +1,7 @@
1
1
  import { z } from "zod";
2
2
  import { homedir } from "os";
3
3
  import { join, dirname } from "path";
4
- import { mkdirSync, readFileSync, writeFileSync } from "fs";
4
+ import { mkdirSync, readFileSync, writeFileSync, renameSync } from "fs";
5
5
  /** Lower bound of the 1-10 importance scale. */
6
6
  export const MIN_IMPORTANCE = 1;
7
7
  /** Upper bound of the 1-10 importance scale. */
@@ -47,8 +47,12 @@ const teamConfigShape = z
47
47
  * built-in defaults.
48
48
  */
49
49
  const policyConfigShape = {
50
- retentionDays: z.number().int().default(DEFAULT_POLICY_CONFIG.retentionDays),
50
+ retentionDays: z.number().int().min(0).default(DEFAULT_POLICY_CONFIG.retentionDays),
51
51
  redactionEnabled: z.boolean().default(DEFAULT_POLICY_CONFIG.redactionEnabled),
52
+ // Optional per-session startup-injection token cap. When set, it overrides the
53
+ // built-in DEFAULT_INJECTION_CAP used by formatStartupInjection and the
54
+ // `savings` analytics. Omitted by default so existing configs keep the default.
55
+ injectionCap: z.number().int().min(100).max(10000).optional(),
52
56
  team: teamConfigShape.default({ enabled: false }),
53
57
  };
54
58
  /**
@@ -80,14 +84,32 @@ export function configFilePath() {
80
84
  * are merged over defaults via the schema's per-field `.default()`.
81
85
  */
82
86
  export function readPolicyConfig(filePath) {
87
+ let raw;
88
+ let obj;
83
89
  try {
84
- const raw = readFileSync(filePath, "utf8");
85
- const parsed = JSON.parse(raw);
86
- return policyConfigReadSchema.parse(parsed);
90
+ raw = readFileSync(filePath, "utf8");
91
+ obj = JSON.parse(raw);
87
92
  }
88
93
  catch {
89
94
  return { ...DEFAULT_POLICY_CONFIG };
90
95
  }
96
+ // Fast path: strict parse succeeds (the common case).
97
+ const strict = policyConfigReadSchema.safeParse(obj);
98
+ if (strict.success)
99
+ return strict.data;
100
+ // Lenient fallback: salvage valid individual fields so a single bad value
101
+ // (e.g. injectionCap: 50, below min 100) doesn't wipe out retentionDays etc.
102
+ const s = policyConfigShape;
103
+ const rd = s.retentionDays.safeParse(obj.retentionDays);
104
+ const re = s.redactionEnabled.safeParse(obj.redactionEnabled);
105
+ const ic = s.injectionCap.safeParse(obj.injectionCap);
106
+ const tm = s.team.safeParse(obj.team);
107
+ return {
108
+ retentionDays: rd.success ? rd.data : DEFAULT_POLICY_CONFIG.retentionDays,
109
+ redactionEnabled: re.success ? re.data : DEFAULT_POLICY_CONFIG.redactionEnabled,
110
+ injectionCap: ic.success ? ic.data : undefined,
111
+ team: tm.success ? tm.data : { ...DEFAULT_POLICY_CONFIG.team },
112
+ };
91
113
  }
92
114
  /**
93
115
  * Persist a partial policy config, merged over the current on-disk values (or
@@ -104,7 +126,12 @@ export function writePolicyConfig(filePath, partial) {
104
126
  const current = readPolicyConfig(filePath);
105
127
  const merged = policyConfigSchema.parse({ ...current, ...validatedPartial });
106
128
  mkdirSync(dirname(filePath), { recursive: true });
107
- writeFileSync(filePath, `${JSON.stringify(merged, null, 2)}\n`, "utf8");
129
+ // Atomic write: write to a temp file then rename over the target so a crash
130
+ // mid-write can't leave a truncated/corrupt config. On Windows, renameSync
131
+ // over an existing file works (Node wraps it via MoveFileExW).
132
+ const tmpPath = `${filePath}.tmp`;
133
+ writeFileSync(tmpPath, `${JSON.stringify(merged, null, 2)}\n`, "utf8");
134
+ renameSync(tmpPath, filePath);
108
135
  return merged;
109
136
  }
110
137
  /**
@@ -2,6 +2,29 @@ import { CRITICAL_WARNING_IMPORTANCE_THRESHOLD } from "../config/policyConfig.js
2
2
  import { countTokens, trimLowestPriorityContent } from "./tokenBudget.js";
3
3
  const DEFAULT_TOKEN_CAP = 450;
4
4
  const HEADER = "Relevant prior context";
5
+ // At most this many preserved (critical-warning) entries may bypass trimming.
6
+ // Critical warnings rarely need more, and an unbounded count would let
7
+ // preserved entries dominate the injection block past the token cap.
8
+ const MAX_PRESERVED = 5;
9
+ // Strip control characters and hard-cap per-entry content before it is rendered
10
+ // into the injection block. Prevents a single memory from breaking out of the
11
+ // block (newlines/control chars) or dominating it (length). The full content
12
+ // remains retrievable via retrieveMemories — this only affects startup display.
13
+ const safeContent = (content) =>
14
+ // eslint-disable-next-line no-control-regex -- intentional control-char strip
15
+ content.replace(/[\n\r\x00-\x08\x0e-\x1f\x7f]/g, " ").slice(0, 500);
16
+ // Sanitize source_adapter before it is rendered verbatim onto the metadata
17
+ // line. Like `author`, a malformed row could otherwise smuggle newlines/control
18
+ // chars (and thus a prompt-injection payload) into the injection block.
19
+ const safeSourceAdapter = (s) =>
20
+ // eslint-disable-next-line no-control-regex -- intentional control-char strip
21
+ (s ?? "").replace(/[\n\r\x00-\x08\x0e-\x1f\x7f]/g, "").slice(0, 100);
22
+ // Allow-list of known kinds. Any unrecognized kind renders as "memory" so a
23
+ // malformed row cannot inject arbitrary text into the rendered `[kind]` label.
24
+ const KNOWN_KINDS = new Set(["fact", "decision", "preference", "warning", "summary", "memory", "context"]);
25
+ function safeKind(kind) {
26
+ return KNOWN_KINDS.has(kind) ? kind : "memory";
27
+ }
5
28
  const KIND_ORDER = ["warning", "decision", "fact", "summary", "preference"];
6
29
  const KIND_RANK = new Map(KIND_ORDER.map((kind, index) => [kind, index]));
7
30
  function kindRank(kind) {
@@ -36,7 +59,12 @@ function authorPrefix(memory, localUsername) {
36
59
  if (memory.author &&
37
60
  localUsername &&
38
61
  memory.author !== localUsername) {
39
- return `${memory.author}: `;
62
+ // Defense in depth: even though `author` is constrained at the contract
63
+ // boundary, strip any control characters before rendering so a malformed
64
+ // row cannot break out of the injection block.
65
+ // eslint-disable-next-line no-control-regex -- intentional control-char strip
66
+ const safeAuthor = memory.author.replace(/[\n\r\x00-\x1f\x7f]/g, "");
67
+ return `${safeAuthor}: `;
40
68
  }
41
69
  return "";
42
70
  }
@@ -45,8 +73,8 @@ function formatLine(entry, localUsername) {
45
73
  const score = memory.score;
46
74
  const prefix = authorPrefix(memory, localUsername);
47
75
  return [
48
- `- [${memory.kind}] ${prefix}${entry.content}`,
49
- `(score total=${formatScore(score.total)}, semantic=${formatScore(score.raw.semantic)}, recency=${formatScore(score.raw.recency)}, importance=${formatScore(score.raw.importance)}; source=${memory.source_adapter}; date=${memory.updated_at})`,
76
+ `- [${safeKind(memory.kind)}] ${prefix}${safeContent(entry.content)}`,
77
+ `(score total=${formatScore(score.total)}, semantic=${formatScore(score.raw.semantic)}, recency=${formatScore(score.raw.recency)}, importance=${formatScore(score.raw.importance)}; source=${safeSourceAdapter(memory.source_adapter)}; date=${memory.updated_at})`,
50
78
  ].join(" ");
51
79
  }
52
80
  function render(entries, localUsername) {
@@ -73,12 +101,28 @@ function lowestDroppableIndex(entries) {
73
101
  export function formatStartupInjection(rankedMemories, options = {}) {
74
102
  const tokenCap = options.tokenCap ?? DEFAULT_TOKEN_CAP;
75
103
  const localUsername = options.localUsername;
76
- let included = sortMemories(rankedMemories).map((memory) => ({
77
- memory,
78
- content: memory.content,
79
- priority: KIND_ORDER.length - kindRank(memory.kind),
80
- preserve: isCriticalWarning(memory),
81
- }));
104
+ // Cap how many entries may be marked `preserve`. Excess critical warnings
105
+ // beyond MAX_PRESERVED become droppable so they cannot bypass the trim/drop
106
+ // loop and blow past the token cap. Sorting first keeps the highest-ranked
107
+ // warnings preserved.
108
+ let preservedCount = 0;
109
+ let included = sortMemories(rankedMemories).map((memory) => {
110
+ let preserve = isCriticalWarning(memory);
111
+ if (preserve) {
112
+ if (preservedCount >= MAX_PRESERVED) {
113
+ preserve = false;
114
+ }
115
+ else {
116
+ preservedCount += 1;
117
+ }
118
+ }
119
+ return {
120
+ memory,
121
+ content: memory.content,
122
+ priority: KIND_ORDER.length - kindRank(memory.kind),
123
+ preserve,
124
+ };
125
+ });
82
126
  let output = render(included, localUsername);
83
127
  while (included.length > 0 && countTokens(output) > tokenCap) {
84
128
  const trimmed = trimLowestPriorityContent(included);
@@ -4,7 +4,10 @@ function toDate(value) {
4
4
  }
5
5
  export function getRecencyBandScore(updatedAt, now = new Date()) {
6
6
  const updatedDate = toDate(updatedAt);
7
- const ageDays = Math.max(0, (now.getTime() - updatedDate.getTime()) / DAY_IN_MS);
7
+ const ageDays = (now.getTime() - updatedDate.getTime()) / DAY_IN_MS;
8
+ if (!Number.isFinite(ageDays) || ageDays < 0) {
9
+ return 0.05; // invalid/future date -> treat as max age (lowest recency score)
10
+ }
8
11
  const HALF_LIFE_DAYS = 14;
9
12
  const lambda = Math.LN2 / HALF_LIFE_DAYS;
10
13
  return Math.max(0.05, Math.exp(-lambda * ageDays));
@@ -1,7 +1,7 @@
1
1
  import { deterministicEmbed } from "../embed/deterministicEmbed.js";
2
2
  import { decayOldBoosts } from "./decay.js";
3
3
  import { scoreMemoryCandidate, } from "./score.js";
4
- import { searchMemoryCandidates, searchMemoryCandidatesFTS, } from "../storage/memorySearchRepo.js";
4
+ import { searchMemoryCandidatesFTS, } from "../storage/memorySearchRepo.js";
5
5
  const DEFAULT_EMBEDDING_DIMENSION = 32;
6
6
  function resolveEmbeddingDimension(candidates) {
7
7
  for (const candidate of candidates) {
@@ -41,19 +41,21 @@ export function retrieveMemories(input) {
41
41
  }
42
42
  const topK = input.topK ?? input.limit ?? 20;
43
43
  const now = input.now ?? new Date();
44
- // Use FTS5 pre-filtering when a semantic query is present to limit
45
- // cosine similarity computation to ~50 candidates instead of all.
46
- const candidates = queryText
47
- ? searchMemoryCandidatesFTS(input.db, input.projectId, queryText)
48
- : searchMemoryCandidates(input.db, input.projectId);
44
+ // Use FTS5 pre-filtering to limit cosine similarity computation to ~50 candidates.
45
+ // queryText is guaranteed non-empty by the Zod schema (z.string().min(1)) upstream.
46
+ const candidates = searchMemoryCandidatesFTS(input.db, input.projectId, queryText);
49
47
  const decayedCandidates = decayOldBoosts(candidates, now);
50
48
  const dimension = resolveEmbeddingDimension(candidates);
51
49
  const queryVector = deterministicEmbed(queryText, dimension).vector;
52
50
  const ranked = decayedCandidates
53
51
  .map((candidate) => {
54
52
  // When embedding is null (version mismatch or missing), use a neutral
55
- // score of 0.5 so the memory is neither penalized nor boosted.
56
- const semantic = candidate.embedding === null
53
+ // score of 0.5 so the memory is neither penalized nor boosted. A non-null
54
+ // embedding whose length differs from the query vector (a stored
55
+ // embedding from a different dimension) is treated the same way: passing
56
+ // it to cosineSimilarity would return 0 and actively penalize the row.
57
+ const semantic = candidate.embedding === null ||
58
+ candidate.embedding.length !== queryVector.length
57
59
  ? 0.5
58
60
  : cosineSimilarity(queryVector, candidate.embedding);
59
61
  const score = scoreMemoryCandidate({
@@ -5,5 +5,10 @@
5
5
  -- NOT NULL ADD COLUMN requires a default and the local OS username is not
6
6
  -- available inside static SQL. `origin_project_id` is nullable and
7
7
  -- only set on rows pulled in from another project's store.
8
+ --
9
+ -- Idempotency: SQLite has no `ADD COLUMN IF NOT EXISTS`. Re-running this
10
+ -- migration (only possible if the _migrations record was lost) throws
11
+ -- "duplicate column name", which runMigrations catches and treats as
12
+ -- already-applied. See src/core/schema/runMigrations.ts.
8
13
  ALTER TABLE memories ADD COLUMN author TEXT NOT NULL DEFAULT '';
9
14
  ALTER TABLE memories ADD COLUMN origin_project_id TEXT;
@@ -1,5 +1,10 @@
1
1
  -- Access-pattern boosting: track how often each memory is included in
2
2
  -- retrieval output. access_count drives a read-time effective_importance
3
3
  -- boost without mutating the stored importance score.
4
+ --
5
+ -- Idempotency: SQLite has no `ADD COLUMN IF NOT EXISTS`. Re-running this
6
+ -- migration (only possible if the _migrations record was lost) throws
7
+ -- "duplicate column name", which runMigrations catches and treats as
8
+ -- already-applied. See src/core/schema/runMigrations.ts.
4
9
  ALTER TABLE memories ADD COLUMN access_count INTEGER NOT NULL DEFAULT 0;
5
10
  ALTER TABLE memories ADD COLUMN last_accessed TEXT;
@@ -8,9 +8,13 @@ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
8
8
  content_rowid='rowid'
9
9
  );
10
10
 
11
- -- Populate FTS index from existing rows
11
+ -- Populate FTS index from existing rows. Guard against double-backfill: if this
12
+ -- migration is re-run on a DB that already has FTS data (e.g. after the
13
+ -- _migrations record was lost), re-inserting every row would duplicate the
14
+ -- index entries. Only backfill when the FTS table is empty.
12
15
  INSERT INTO memories_fts(rowid, content, normalized_content)
13
- SELECT rowid, content, normalized_content FROM memories;
16
+ SELECT rowid, content, normalized_content FROM memories
17
+ WHERE NOT EXISTS (SELECT 1 FROM memories_fts LIMIT 1);
14
18
 
15
19
  -- Keep FTS index in sync: INSERT trigger
16
20
  CREATE TRIGGER IF NOT EXISTS memories_fts_insert AFTER INSERT ON memories BEGIN
@@ -0,0 +1,24 @@
1
+ -- Deduplicate session events by their logical key.
2
+ --
3
+ -- `ingestSessionEvents` is now reachable from agents (MCP tool) and CLI, so it
4
+ -- can be called more than once for the same session. The original index on
5
+ -- (project_id, session_id, event_index) was NOT unique, so re-ingesting the
6
+ -- same logical event with a fresh `id` silently created duplicate rows — which
7
+ -- then inflated session-event counts and the local-summarizer input.
8
+ --
9
+ -- Replace it with a UNIQUE index so `INSERT OR IGNORE` makes re-ingestion a
10
+ -- no-op. session_events has no production writer before this migration, so there
11
+ -- are no pre-existing duplicates to reconcile.
12
+ DROP INDEX IF EXISTS idx_session_events_project_session_event_index;
13
+
14
+ -- Remove duplicate rows before adding unique constraint.
15
+ -- Keep the row with the smallest rowid for each logical key.
16
+ DELETE FROM session_events
17
+ WHERE rowid NOT IN (
18
+ SELECT MIN(rowid)
19
+ FROM session_events
20
+ GROUP BY project_id, session_id, event_index
21
+ );
22
+
23
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_session_events_project_session_event_index
24
+ ON session_events(project_id, session_id, event_index);
@@ -1,6 +1,7 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
- const DEFAULT_MIGRATIONS_DIR = path.resolve(process.cwd(), "src/core/schema/migrations");
3
+ import { fileURLToPath } from "node:url";
4
+ const DEFAULT_MIGRATIONS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "migrations");
4
5
  function ensureMigrationsTable(db) {
5
6
  db.exec(`
6
7
  CREATE TABLE IF NOT EXISTS _migrations (
@@ -18,6 +19,38 @@ function listMigrationFiles(migrationsDir) {
18
19
  .filter((fileName) => fileName.endsWith(".sql"))
19
20
  .sort((left, right) => left.localeCompare(right));
20
21
  }
22
+ /**
23
+ * Verify that every column an ADD COLUMN migration declares already exists in
24
+ * its target table. Parses `ALTER TABLE <table> ADD COLUMN <name>` statements
25
+ * from the migration SQL and checks each against `PRAGMA table_info`. Returns
26
+ * false the moment a declared column is missing (or its table doesn't exist),
27
+ * so a partially-applied migration is never marked complete.
28
+ */
29
+ function allAddedColumnsExist(db, sql) {
30
+ const addColumnRe = /ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)/gi;
31
+ const byTable = new Map();
32
+ let match;
33
+ while ((match = addColumnRe.exec(sql)) !== null) {
34
+ const [, table, column] = match;
35
+ const cols = byTable.get(table) ?? [];
36
+ cols.push(column);
37
+ byTable.set(table, cols);
38
+ }
39
+ // No ADD COLUMN statements parsed → we cannot prove the schema is consistent,
40
+ // so treat it as unsafe and let the caller re-throw.
41
+ if (byTable.size === 0) {
42
+ return false;
43
+ }
44
+ for (const [table, columns] of byTable) {
45
+ const existing = new Set(db.prepare(`PRAGMA table_info(${table})`).all().map((row) => row.name));
46
+ for (const column of columns) {
47
+ if (!existing.has(column)) {
48
+ return false;
49
+ }
50
+ }
51
+ }
52
+ return true;
53
+ }
21
54
  export function runMigrations(db, migrationsDir = DEFAULT_MIGRATIONS_DIR) {
22
55
  ensureMigrationsTable(db);
23
56
  const files = listMigrationFiles(migrationsDir);
@@ -31,8 +64,37 @@ export function runMigrations(db, migrationsDir = DEFAULT_MIGRATIONS_DIR) {
31
64
  });
32
65
  for (const fileName of files) {
33
66
  const existing = hasMigrationStmt.get(fileName);
34
- if (!existing) {
67
+ if (existing) {
68
+ continue;
69
+ }
70
+ try {
35
71
  runMigration(fileName);
36
72
  }
73
+ catch (err) {
74
+ // Idempotency guard for ALTER TABLE ADD COLUMN migrations (005/006).
75
+ // SQLite has no `ADD COLUMN IF NOT EXISTS`, so re-running a column-adding
76
+ // migration on a DB that already has the column throws "duplicate column
77
+ // name". This only happens when the _migrations record was lost (e.g. the
78
+ // table was dropped) while the schema change survived.
79
+ //
80
+ // Each migration runs in a transaction (all-or-nothing), so the duplicate
81
+ // error rolls the whole body back. Migrations 005/006 add TWO columns
82
+ // each: if only the FIRST already exists, the throw fires on the first
83
+ // ALTER and the second column is never added. Blindly marking the
84
+ // migration applied would leave that second column permanently missing.
85
+ //
86
+ // So instead of trusting the error, verify that EVERY column this
87
+ // migration was supposed to add actually exists. Only then is it safe to
88
+ // record as applied; otherwise re-throw so the failure surfaces.
89
+ if (err instanceof Error && /duplicate column name/i.test(err.message)) {
90
+ const filePath = path.join(migrationsDir, fileName);
91
+ const sql = fs.readFileSync(filePath, "utf8");
92
+ if (allAddedColumnsExist(db, sql)) {
93
+ insertMigrationStmt.run(fileName);
94
+ continue;
95
+ }
96
+ }
97
+ throw err;
98
+ }
37
99
  }
38
100
  }
@@ -1,3 +1,34 @@
1
+ // Shared INSERT ... ON CONFLICT(id) upsert column lists. The import and team-pull
2
+ // paths differ only in how they resolve `importance` on conflict (import takes the
3
+ // incoming value; pull preserves MAX(local, incoming)), so the surrounding SQL is
4
+ // factored out to keep the two prepared statements byte-for-byte aligned.
5
+ const UPSERT_INSERT_HEAD = `
6
+ INSERT INTO memories (
7
+ id, project_id, session_id, source_adapter, kind, content, normalized_content,
8
+ importance, embedding, embedding_dim, embedding_version, author, origin_project_id,
9
+ created_at, updated_at
10
+ ) VALUES (
11
+ @id, @project_id, @session_id, @source_adapter, @kind, @content, @normalized_content,
12
+ @importance, @embedding, @embedding_dim, @embedding_version, @author, @origin_project_id,
13
+ COALESCE(@created_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
14
+ COALESCE(@updated_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
15
+ )
16
+ ON CONFLICT(id) DO UPDATE SET
17
+ project_id = excluded.project_id,
18
+ session_id = excluded.session_id,
19
+ source_adapter = excluded.source_adapter,
20
+ kind = excluded.kind,
21
+ content = excluded.content,
22
+ normalized_content = excluded.normalized_content,`;
23
+ const UPSERT_INSERT_TAIL = `
24
+ embedding = excluded.embedding,
25
+ embedding_dim = excluded.embedding_dim,
26
+ embedding_version = excluded.embedding_version,
27
+ author = excluded.author,
28
+ origin_project_id = excluded.origin_project_id,
29
+ created_at = excluded.created_at,
30
+ updated_at = excluded.updated_at
31
+ `;
1
32
  const stmtCache = new WeakMap();
2
33
  function getStatements(db) {
3
34
  let stmts = stmtCache.get(db);
@@ -40,6 +71,20 @@ function getStatements(db) {
40
71
  author = excluded.author,
41
72
  origin_project_id = excluded.origin_project_id,
42
73
  updated_at = COALESCE(excluded.updated_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
74
+ ON CONFLICT(id)
75
+ DO UPDATE SET
76
+ project_id = excluded.project_id,
77
+ session_id = excluded.session_id,
78
+ source_adapter = excluded.source_adapter,
79
+ content = excluded.content,
80
+ normalized_content = excluded.normalized_content,
81
+ importance = excluded.importance,
82
+ embedding = excluded.embedding,
83
+ embedding_dim = excluded.embedding_dim,
84
+ embedding_version = excluded.embedding_version,
85
+ author = excluded.author,
86
+ origin_project_id = excluded.origin_project_id,
87
+ updated_at = COALESCE(excluded.updated_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
43
88
  `),
44
89
  listByProject: db.prepare(`
45
90
  SELECT
@@ -50,7 +95,28 @@ function getStatements(db) {
50
95
  WHERE project_id = ?
51
96
  ORDER BY updated_at DESC
52
97
  `),
98
+ // Lightweight projection for token-savings accounting: only `content` is
99
+ // needed to count tokens, so we deliberately avoid pulling the (potentially
100
+ // multi-KB) embedding JSON and normalized_content for every row. Matters for
101
+ // large projects where `savings` would otherwise load the whole table.
102
+ listContentByProject: db.prepare("SELECT content FROM memories WHERE project_id = ?"),
103
+ importUpsert: db.prepare(`${UPSERT_INSERT_HEAD}\n importance = excluded.importance,${UPSERT_INSERT_TAIL}`),
104
+ // Importance-preserving merge for team pulls: a teammate can never lower a
105
+ // locally-boosted importance. better-sqlite3@12 bundles a SQLite that accepts
106
+ // the two-arg scalar MAX() inside DO UPDATE.
107
+ pullUpsert: db.prepare(`${UPSERT_INSERT_HEAD}\n importance = MAX(memories.importance, excluded.importance),${UPSERT_INSERT_TAIL}`),
53
108
  listAllIds: db.prepare("SELECT id FROM memories"),
109
+ selectById: db.prepare(`
110
+ SELECT
111
+ id, project_id, session_id, source_adapter, kind, content, normalized_content,
112
+ importance, embedding, embedding_dim, embedding_version, author, origin_project_id,
113
+ access_count, last_accessed, created_at, updated_at
114
+ FROM memories
115
+ WHERE project_id = ? AND id = ?
116
+ LIMIT 1
117
+ `),
118
+ selectOwner: db.prepare("SELECT project_id FROM memories WHERE id = ?"),
119
+ deleteById: db.prepare("DELETE FROM memories WHERE project_id = ? AND id = ?"),
54
120
  countOlderThan: db.prepare(`
55
121
  SELECT COUNT(*) AS count
56
122
  FROM memories
@@ -72,6 +138,9 @@ function getStatements(db) {
72
138
  SET
73
139
  content = ?,
74
140
  normalized_content = COALESCE(?, normalized_content),
141
+ embedding = COALESCE(?, embedding),
142
+ embedding_dim = COALESCE(?, embedding_dim),
143
+ embedding_version = COALESCE(?, embedding_version),
75
144
  updated_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
76
145
  WHERE project_id = ? AND id = ?
77
146
  `),
@@ -96,7 +165,17 @@ function getStatements(db) {
96
165
  countBySession: db.prepare(`
97
166
  SELECT COUNT(*) AS count
98
167
  FROM memories
99
- WHERE session_id = ?
168
+ WHERE session_id = ? AND project_id = ?
169
+ `),
170
+ countAll: db.prepare("SELECT COUNT(*) AS count FROM memories WHERE project_id = ?"),
171
+ // Memories whose stored embedding does not match the supplied current
172
+ // embedding version (NULL counts as stale). Used to surface a re-embed
173
+ // hint; the actual re-embed is the `sessionmem re-embed` command.
174
+ countStaleEmbeddings: db.prepare(`
175
+ SELECT COUNT(*) AS count
176
+ FROM memories
177
+ WHERE project_id = ?
178
+ AND (embedding_version IS NULL OR embedding_version != ?)
100
179
  `),
101
180
  };
102
181
  stmtCache.set(db, stmts);
@@ -127,9 +206,62 @@ export function upsertSessionSummaryMemory(db, input) {
127
206
  assertImportance(input.importance);
128
207
  getStatements(db).upsertSessionSummary.run(toParams({ ...input, kind: "summary" }));
129
208
  }
209
+ /**
210
+ * Upsert a memory imported from an external export. On `id` conflict the incoming
211
+ * record wins on every column (including importance). Cross-project ownership
212
+ * collisions are filtered by the caller via {@link getMemoryOwnerProjectId} before
213
+ * this runs, so this never reassigns another project's row.
214
+ */
215
+ export function upsertImportedMemory(db, input) {
216
+ assertImportance(input.importance);
217
+ getStatements(db).importUpsert.run(toParams(input));
218
+ }
219
+ /**
220
+ * Upsert a memory pulled from a teammate. Identical to {@link upsertImportedMemory}
221
+ * except importance is merged as MAX(local, incoming) so a pull can never lower a
222
+ * locally-boosted importance.
223
+ */
224
+ export function upsertPulledMemory(db, input) {
225
+ assertImportance(input.importance);
226
+ getStatements(db).pullUpsert.run(toParams(input));
227
+ }
130
228
  export function listMemoriesByProject(db, projectId) {
131
229
  return getStatements(db).listByProject.all(projectId);
132
230
  }
231
+ /**
232
+ * Return just the `content` of every memory in a project. Used by the
233
+ * token-savings command, which only needs `content` to count tokens and must
234
+ * not pay to load embedding JSON / normalized_content for the whole table.
235
+ */
236
+ export function listMemoryContentsByProject(db, projectId) {
237
+ const rows = getStatements(db).listContentByProject.all(projectId);
238
+ return rows.map((r) => r.content);
239
+ }
240
+ /**
241
+ * Fetch a single memory row scoped to a project. Returns undefined when no row
242
+ * matches (caller maps that to NOT_FOUND). Uses a WeakMap-cached prepared
243
+ * statement — this is a high-frequency path (every store/get/forget and each
244
+ * batch item re-reads the inserted row).
245
+ */
246
+ export function getMemoryRecordById(db, projectId, memoryId) {
247
+ return getStatements(db).selectById.get(projectId, memoryId);
248
+ }
249
+ /**
250
+ * Resolve the project that currently owns a globally-unique memory `id`, or
251
+ * undefined when the id is unused. Import/pull use this to skip (never overwrite)
252
+ * an id already owned by a different project.
253
+ */
254
+ export function getMemoryOwnerProjectId(db, memoryId) {
255
+ const row = getStatements(db).selectOwner.get(memoryId);
256
+ return row?.project_id;
257
+ }
258
+ /**
259
+ * Hard-delete a single memory scoped to a project. Returns the number of rows
260
+ * removed (0 when the id does not exist in this project).
261
+ */
262
+ export function deleteMemoryById(db, projectId, memoryId) {
263
+ return getStatements(db).deleteById.run(projectId, memoryId).changes;
264
+ }
133
265
  /**
134
266
  * All memory ids across every project. `id` is a globally-unique
135
267
  * PRIMARY KEY, so duplicate-skip checks in `import` must consider every
@@ -153,6 +285,12 @@ export function deleteMemoriesOlderThan(db, projectId, cutoffIso) {
153
285
  const result = getStatements(db).deleteOlderThan.run(projectId, cutoffIso);
154
286
  return result.changes;
155
287
  }
288
+ // NOTE: no MCP tool, CLI command, or service method currently calls this. It is
289
+ // retained as intentional repository API surface (the importance-update
290
+ // counterpart to updateMemoryContent) for a future importance-adjustment tool,
291
+ // not forgotten code. The `updateImportance` prepared statement above is wired
292
+ // solely for this function. Keep or remove deliberately — do not delete on a
293
+ // "looks unused" pass.
156
294
  export function updateMemoryImportance(db, projectId, memoryId, nextImportance, usedAt) {
157
295
  assertImportance(nextImportance);
158
296
  const result = getStatements(db).updateImportance.run(nextImportance, usedAt ?? null, projectId, memoryId);
@@ -161,17 +299,36 @@ export function updateMemoryImportance(db, projectId, memoryId, nextImportance,
161
299
  }
162
300
  }
163
301
  /**
164
- * Count the number of memories stored under a given session_id across all
165
- * projects. Used to enforce per-session write soft limits — the count is
302
+ * Count all memories stored in a project. Used to enforce per-session write
303
+ * soft limits — the count is
166
304
  * checked before each storeMemory call and a warning is surfaced when the
167
305
  * threshold is reached.
168
306
  */
169
- export function countMemoriesBySession(db, sessionId) {
170
- const row = getStatements(db).countBySession.get(sessionId);
307
+ export function countAllMemoriesByProject(db, projectId) {
308
+ const row = getStatements(db).countAll.get(projectId);
309
+ return row.count;
310
+ }
311
+ export function countMemoriesBySession(db, sessionId, projectId) {
312
+ const row = getStatements(db).countBySession.get(sessionId, projectId);
313
+ return row.count;
314
+ }
315
+ /**
316
+ * Count memories in a project whose embedding version differs from
317
+ * `currentVersion` (NULL counts as stale). Drives the startup re-embed hint;
318
+ * the fix is the `sessionmem re-embed` command.
319
+ */
320
+ export function countStaleEmbeddings(db, projectId, currentVersion) {
321
+ const row = getStatements(db).countStaleEmbeddings.get(projectId, currentVersion);
171
322
  return row.count;
172
323
  }
173
- export function updateMemoryContent(db, projectId, memoryId, newContent, newNormalizedContent) {
174
- const result = getStatements(db).updateContent.run(newContent, newNormalizedContent ?? null, projectId, memoryId);
324
+ export function updateMemoryContent(db, projectId, memoryId, newContent, newNormalizedContent,
325
+ // Optional re-embedding: when content is rewritten (e.g. a redactExisting
326
+ // scrub) the stored embedding vector — computed from the PRE-edit text —
327
+ // becomes stale and inconsistent with the new normalized_content. Pass the
328
+ // recomputed embedding so the vector tracks the redacted text; omit to leave
329
+ // the existing embedding untouched (COALESCE keeps the prior value on null).
330
+ newEmbedding) {
331
+ const result = getStatements(db).updateContent.run(newContent, newNormalizedContent ?? null, newEmbedding ? JSON.stringify(newEmbedding.vector) : null, newEmbedding?.dimension ?? null, newEmbedding?.embeddingVersion ?? null, projectId, memoryId);
175
332
  if (result.changes === 0) {
176
333
  throw new Error(`Memory not found: ${memoryId}`);
177
334
  }