sessionmem 1.0.6 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/capabilities/fallbackTools.js +2 -2
- package/dist/adapters/claudeMdInjector.js +49 -5
- package/dist/adapters/factory.js +68 -9
- package/dist/adapters/generic.js +147 -12
- package/dist/adapters/global/antigravity.js +14 -7
- package/dist/adapters/global/claudeCode.js +46 -10
- package/dist/adapters/global/codex.js +73 -13
- package/dist/adapters/global/qcoder.js +18 -5
- package/dist/adapters/ide/cline.js +56 -9
- package/dist/adapters/ide/cursor.js +15 -13
- package/dist/adapters/ide/installer.js +201 -8
- package/dist/adapters/ide/windsurf.js +14 -13
- package/dist/cli/commands/config.js +10 -1
- package/dist/cli/commands/import.js +6 -1
- package/dist/cli/commands/install.js +57 -16
- package/dist/cli/commands/ping.js +42 -8
- package/dist/cli/commands/reEmbed.js +4 -3
- package/dist/cli/commands/run.js +7 -17
- package/dist/cli/commands/savings.js +33 -17
- package/dist/cli/commands/sessionEnd.js +124 -0
- package/dist/cli/commands/sessionStart.js +52 -0
- package/dist/cli/commands/sync.js +39 -9
- package/dist/cli/commands/uninstall.js +35 -9
- package/dist/cli/context.js +17 -18
- package/dist/cli/index.js +16 -4
- package/dist/cli/projectId.js +69 -0
- package/dist/core/api/contracts.js +155 -42
- package/dist/core/api/errors.js +4 -7
- package/dist/core/api/memoryCoreService.js +319 -252
- package/dist/core/api/sessionLifecycleService.js +8 -0
- package/dist/core/config/policyConfig.js +33 -6
- package/dist/core/injection/formatStartupInjection.js +53 -9
- package/dist/core/retrieve/recencyBands.js +4 -1
- package/dist/core/retrieve/retrieveMemories.js +10 -8
- package/dist/core/schema/migrations/005_team_provenance.sql +5 -0
- package/dist/core/schema/migrations/006_access_pattern_boosting.sql +5 -0
- package/dist/core/schema/migrations/008_fts5_search.sql +6 -2
- package/dist/core/schema/migrations/009_session_events_unique.sql +24 -0
- package/dist/core/schema/runMigrations.js +64 -2
- package/dist/core/storage/memoryRepo.js +164 -7
- package/dist/core/storage/memorySearchRepo.js +45 -7
- package/dist/core/storage/sessionEventsRepo.js +15 -2
- package/dist/core/summarize/cloudSummarizer.js +15 -2
- package/dist/core/summarize/redaction.js +45 -8
- package/package.json +2 -2
|
@@ -111,6 +111,14 @@ export function createSessionLifecycleService(deps) {
|
|
|
111
111
|
* Hard-deletes memories older than the effective retentionDays for this
|
|
112
112
|
* project. retentionDays<=0 disables pruning. Any failure is swallowed
|
|
113
113
|
* so it can never block or fail summarization.
|
|
114
|
+
*
|
|
115
|
+
* IMPORTANT: retention pruning fires INDEPENDENTLY of summarization. It runs
|
|
116
|
+
* on every handleSessionEnd path — including the `skipped_disabled`
|
|
117
|
+
* (autoSummarize=false) and `skipped_threshold` (too few events) early
|
|
118
|
+
* returns — because retention is a time-based policy that must apply whether
|
|
119
|
+
* or not a summary was produced this session. It is gated only by the
|
|
120
|
+
* retentionDays config (retentionDays<=0 disables it), not by whether the
|
|
121
|
+
* lifecycle proceeded past the summarization threshold.
|
|
114
122
|
*/
|
|
115
123
|
function runLightPrune(projectId) {
|
|
116
124
|
try {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { join, dirname } from "path";
|
|
4
|
-
import { mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
4
|
+
import { mkdirSync, readFileSync, writeFileSync, renameSync } from "fs";
|
|
5
5
|
/** Lower bound of the 1-10 importance scale. */
|
|
6
6
|
export const MIN_IMPORTANCE = 1;
|
|
7
7
|
/** Upper bound of the 1-10 importance scale. */
|
|
@@ -47,8 +47,12 @@ const teamConfigShape = z
|
|
|
47
47
|
* built-in defaults.
|
|
48
48
|
*/
|
|
49
49
|
const policyConfigShape = {
|
|
50
|
-
retentionDays: z.number().int().default(DEFAULT_POLICY_CONFIG.retentionDays),
|
|
50
|
+
retentionDays: z.number().int().min(0).default(DEFAULT_POLICY_CONFIG.retentionDays),
|
|
51
51
|
redactionEnabled: z.boolean().default(DEFAULT_POLICY_CONFIG.redactionEnabled),
|
|
52
|
+
// Optional per-session startup-injection token cap. When set, it overrides the
|
|
53
|
+
// built-in DEFAULT_INJECTION_CAP used by formatStartupInjection and the
|
|
54
|
+
// `savings` analytics. Omitted by default so existing configs keep the default.
|
|
55
|
+
injectionCap: z.number().int().min(100).max(10000).optional(),
|
|
52
56
|
team: teamConfigShape.default({ enabled: false }),
|
|
53
57
|
};
|
|
54
58
|
/**
|
|
@@ -80,14 +84,32 @@ export function configFilePath() {
|
|
|
80
84
|
* are merged over defaults via the schema's per-field `.default()`.
|
|
81
85
|
*/
|
|
82
86
|
export function readPolicyConfig(filePath) {
|
|
87
|
+
let raw;
|
|
88
|
+
let obj;
|
|
83
89
|
try {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
return policyConfigReadSchema.parse(parsed);
|
|
90
|
+
raw = readFileSync(filePath, "utf8");
|
|
91
|
+
obj = JSON.parse(raw);
|
|
87
92
|
}
|
|
88
93
|
catch {
|
|
89
94
|
return { ...DEFAULT_POLICY_CONFIG };
|
|
90
95
|
}
|
|
96
|
+
// Fast path: strict parse succeeds (the common case).
|
|
97
|
+
const strict = policyConfigReadSchema.safeParse(obj);
|
|
98
|
+
if (strict.success)
|
|
99
|
+
return strict.data;
|
|
100
|
+
// Lenient fallback: salvage valid individual fields so a single bad value
|
|
101
|
+
// (e.g. injectionCap: 50, below min 100) doesn't wipe out retentionDays etc.
|
|
102
|
+
const s = policyConfigShape;
|
|
103
|
+
const rd = s.retentionDays.safeParse(obj.retentionDays);
|
|
104
|
+
const re = s.redactionEnabled.safeParse(obj.redactionEnabled);
|
|
105
|
+
const ic = s.injectionCap.safeParse(obj.injectionCap);
|
|
106
|
+
const tm = s.team.safeParse(obj.team);
|
|
107
|
+
return {
|
|
108
|
+
retentionDays: rd.success ? rd.data : DEFAULT_POLICY_CONFIG.retentionDays,
|
|
109
|
+
redactionEnabled: re.success ? re.data : DEFAULT_POLICY_CONFIG.redactionEnabled,
|
|
110
|
+
injectionCap: ic.success ? ic.data : undefined,
|
|
111
|
+
team: tm.success ? tm.data : { ...DEFAULT_POLICY_CONFIG.team },
|
|
112
|
+
};
|
|
91
113
|
}
|
|
92
114
|
/**
|
|
93
115
|
* Persist a partial policy config, merged over the current on-disk values (or
|
|
@@ -104,7 +126,12 @@ export function writePolicyConfig(filePath, partial) {
|
|
|
104
126
|
const current = readPolicyConfig(filePath);
|
|
105
127
|
const merged = policyConfigSchema.parse({ ...current, ...validatedPartial });
|
|
106
128
|
mkdirSync(dirname(filePath), { recursive: true });
|
|
107
|
-
|
|
129
|
+
// Atomic write: write to a temp file then rename over the target so a crash
|
|
130
|
+
// mid-write can't leave a truncated/corrupt config. On Windows, renameSync
|
|
131
|
+
// over an existing file works (Node wraps it via MoveFileExW).
|
|
132
|
+
const tmpPath = `${filePath}.tmp`;
|
|
133
|
+
writeFileSync(tmpPath, `${JSON.stringify(merged, null, 2)}\n`, "utf8");
|
|
134
|
+
renameSync(tmpPath, filePath);
|
|
108
135
|
return merged;
|
|
109
136
|
}
|
|
110
137
|
/**
|
|
@@ -2,6 +2,29 @@ import { CRITICAL_WARNING_IMPORTANCE_THRESHOLD } from "../config/policyConfig.js
|
|
|
2
2
|
import { countTokens, trimLowestPriorityContent } from "./tokenBudget.js";
|
|
3
3
|
const DEFAULT_TOKEN_CAP = 450;
|
|
4
4
|
const HEADER = "Relevant prior context";
|
|
5
|
+
// At most this many preserved (critical-warning) entries may bypass trimming.
|
|
6
|
+
// Critical warnings rarely need more, and an unbounded count would let
|
|
7
|
+
// preserved entries dominate the injection block past the token cap.
|
|
8
|
+
const MAX_PRESERVED = 5;
|
|
9
|
+
// Strip control characters and hard-cap per-entry content before it is rendered
|
|
10
|
+
// into the injection block. Prevents a single memory from breaking out of the
|
|
11
|
+
// block (newlines/control chars) or dominating it (length). The full content
|
|
12
|
+
// remains retrievable via retrieveMemories — this only affects startup display.
|
|
13
|
+
const safeContent = (content) =>
|
|
14
|
+
// eslint-disable-next-line no-control-regex -- intentional control-char strip
|
|
15
|
+
content.replace(/[\n\r\x00-\x08\x0e-\x1f\x7f]/g, " ").slice(0, 500);
|
|
16
|
+
// Sanitize source_adapter before it is rendered verbatim onto the metadata
|
|
17
|
+
// line. Like `author`, a malformed row could otherwise smuggle newlines/control
|
|
18
|
+
// chars (and thus a prompt-injection payload) into the injection block.
|
|
19
|
+
const safeSourceAdapter = (s) =>
|
|
20
|
+
// eslint-disable-next-line no-control-regex -- intentional control-char strip
|
|
21
|
+
(s ?? "").replace(/[\n\r\x00-\x08\x0e-\x1f\x7f]/g, "").slice(0, 100);
|
|
22
|
+
// Allow-list of known kinds. Any unrecognized kind renders as "memory" so a
|
|
23
|
+
// malformed row cannot inject arbitrary text into the rendered `[kind]` label.
|
|
24
|
+
const KNOWN_KINDS = new Set(["fact", "decision", "preference", "warning", "summary", "memory", "context"]);
|
|
25
|
+
function safeKind(kind) {
|
|
26
|
+
return KNOWN_KINDS.has(kind) ? kind : "memory";
|
|
27
|
+
}
|
|
5
28
|
const KIND_ORDER = ["warning", "decision", "fact", "summary", "preference"];
|
|
6
29
|
const KIND_RANK = new Map(KIND_ORDER.map((kind, index) => [kind, index]));
|
|
7
30
|
function kindRank(kind) {
|
|
@@ -36,7 +59,12 @@ function authorPrefix(memory, localUsername) {
|
|
|
36
59
|
if (memory.author &&
|
|
37
60
|
localUsername &&
|
|
38
61
|
memory.author !== localUsername) {
|
|
39
|
-
|
|
62
|
+
// Defense in depth: even though `author` is constrained at the contract
|
|
63
|
+
// boundary, strip any control characters before rendering so a malformed
|
|
64
|
+
// row cannot break out of the injection block.
|
|
65
|
+
// eslint-disable-next-line no-control-regex -- intentional control-char strip
|
|
66
|
+
const safeAuthor = memory.author.replace(/[\n\r\x00-\x1f\x7f]/g, "");
|
|
67
|
+
return `${safeAuthor}: `;
|
|
40
68
|
}
|
|
41
69
|
return "";
|
|
42
70
|
}
|
|
@@ -45,8 +73,8 @@ function formatLine(entry, localUsername) {
|
|
|
45
73
|
const score = memory.score;
|
|
46
74
|
const prefix = authorPrefix(memory, localUsername);
|
|
47
75
|
return [
|
|
48
|
-
`- [${memory.kind}] ${prefix}${entry.content}`,
|
|
49
|
-
`(score total=${formatScore(score.total)}, semantic=${formatScore(score.raw.semantic)}, recency=${formatScore(score.raw.recency)}, importance=${formatScore(score.raw.importance)}; source=${memory.source_adapter}; date=${memory.updated_at})`,
|
|
76
|
+
`- [${safeKind(memory.kind)}] ${prefix}${safeContent(entry.content)}`,
|
|
77
|
+
`(score total=${formatScore(score.total)}, semantic=${formatScore(score.raw.semantic)}, recency=${formatScore(score.raw.recency)}, importance=${formatScore(score.raw.importance)}; source=${safeSourceAdapter(memory.source_adapter)}; date=${memory.updated_at})`,
|
|
50
78
|
].join(" ");
|
|
51
79
|
}
|
|
52
80
|
function render(entries, localUsername) {
|
|
@@ -73,12 +101,28 @@ function lowestDroppableIndex(entries) {
|
|
|
73
101
|
export function formatStartupInjection(rankedMemories, options = {}) {
|
|
74
102
|
const tokenCap = options.tokenCap ?? DEFAULT_TOKEN_CAP;
|
|
75
103
|
const localUsername = options.localUsername;
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
104
|
+
// Cap how many entries may be marked `preserve`. Excess critical warnings
|
|
105
|
+
// beyond MAX_PRESERVED become droppable so they cannot bypass the trim/drop
|
|
106
|
+
// loop and blow past the token cap. Sorting first keeps the highest-ranked
|
|
107
|
+
// warnings preserved.
|
|
108
|
+
let preservedCount = 0;
|
|
109
|
+
let included = sortMemories(rankedMemories).map((memory) => {
|
|
110
|
+
let preserve = isCriticalWarning(memory);
|
|
111
|
+
if (preserve) {
|
|
112
|
+
if (preservedCount >= MAX_PRESERVED) {
|
|
113
|
+
preserve = false;
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
preservedCount += 1;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return {
|
|
120
|
+
memory,
|
|
121
|
+
content: memory.content,
|
|
122
|
+
priority: KIND_ORDER.length - kindRank(memory.kind),
|
|
123
|
+
preserve,
|
|
124
|
+
};
|
|
125
|
+
});
|
|
82
126
|
let output = render(included, localUsername);
|
|
83
127
|
while (included.length > 0 && countTokens(output) > tokenCap) {
|
|
84
128
|
const trimmed = trimLowestPriorityContent(included);
|
|
@@ -4,7 +4,10 @@ function toDate(value) {
|
|
|
4
4
|
}
|
|
5
5
|
export function getRecencyBandScore(updatedAt, now = new Date()) {
|
|
6
6
|
const updatedDate = toDate(updatedAt);
|
|
7
|
-
const ageDays =
|
|
7
|
+
const ageDays = (now.getTime() - updatedDate.getTime()) / DAY_IN_MS;
|
|
8
|
+
if (!Number.isFinite(ageDays) || ageDays < 0) {
|
|
9
|
+
return 0.05; // invalid/future date -> treat as max age (lowest recency score)
|
|
10
|
+
}
|
|
8
11
|
const HALF_LIFE_DAYS = 14;
|
|
9
12
|
const lambda = Math.LN2 / HALF_LIFE_DAYS;
|
|
10
13
|
return Math.max(0.05, Math.exp(-lambda * ageDays));
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { deterministicEmbed } from "../embed/deterministicEmbed.js";
|
|
2
2
|
import { decayOldBoosts } from "./decay.js";
|
|
3
3
|
import { scoreMemoryCandidate, } from "./score.js";
|
|
4
|
-
import {
|
|
4
|
+
import { searchMemoryCandidatesFTS, } from "../storage/memorySearchRepo.js";
|
|
5
5
|
const DEFAULT_EMBEDDING_DIMENSION = 32;
|
|
6
6
|
function resolveEmbeddingDimension(candidates) {
|
|
7
7
|
for (const candidate of candidates) {
|
|
@@ -41,19 +41,21 @@ export function retrieveMemories(input) {
|
|
|
41
41
|
}
|
|
42
42
|
const topK = input.topK ?? input.limit ?? 20;
|
|
43
43
|
const now = input.now ?? new Date();
|
|
44
|
-
// Use FTS5 pre-filtering
|
|
45
|
-
//
|
|
46
|
-
const candidates = queryText
|
|
47
|
-
? searchMemoryCandidatesFTS(input.db, input.projectId, queryText)
|
|
48
|
-
: searchMemoryCandidates(input.db, input.projectId);
|
|
44
|
+
// Use FTS5 pre-filtering to limit cosine similarity computation to ~50 candidates.
|
|
45
|
+
// queryText is guaranteed non-empty by the Zod schema (z.string().min(1)) upstream.
|
|
46
|
+
const candidates = searchMemoryCandidatesFTS(input.db, input.projectId, queryText);
|
|
49
47
|
const decayedCandidates = decayOldBoosts(candidates, now);
|
|
50
48
|
const dimension = resolveEmbeddingDimension(candidates);
|
|
51
49
|
const queryVector = deterministicEmbed(queryText, dimension).vector;
|
|
52
50
|
const ranked = decayedCandidates
|
|
53
51
|
.map((candidate) => {
|
|
54
52
|
// When embedding is null (version mismatch or missing), use a neutral
|
|
55
|
-
// score of 0.5 so the memory is neither penalized nor boosted.
|
|
56
|
-
|
|
53
|
+
// score of 0.5 so the memory is neither penalized nor boosted. A non-null
|
|
54
|
+
// embedding whose length differs from the query vector (a stored
|
|
55
|
+
// embedding from a different dimension) is treated the same way: passing
|
|
56
|
+
// it to cosineSimilarity would return 0 and actively penalize the row.
|
|
57
|
+
const semantic = candidate.embedding === null ||
|
|
58
|
+
candidate.embedding.length !== queryVector.length
|
|
57
59
|
? 0.5
|
|
58
60
|
: cosineSimilarity(queryVector, candidate.embedding);
|
|
59
61
|
const score = scoreMemoryCandidate({
|
|
@@ -5,5 +5,10 @@
|
|
|
5
5
|
-- NOT NULL ADD COLUMN requires a default and the local OS username is not
|
|
6
6
|
-- available inside static SQL. `origin_project_id` is nullable and
|
|
7
7
|
-- only set on rows pulled in from another project's store.
|
|
8
|
+
--
|
|
9
|
+
-- Idempotency: SQLite has no `ADD COLUMN IF NOT EXISTS`. Re-running this
|
|
10
|
+
-- migration (only possible if the _migrations record was lost) throws
|
|
11
|
+
-- "duplicate column name", which runMigrations catches and treats as
|
|
12
|
+
-- already-applied. See src/core/schema/runMigrations.ts.
|
|
8
13
|
ALTER TABLE memories ADD COLUMN author TEXT NOT NULL DEFAULT '';
|
|
9
14
|
ALTER TABLE memories ADD COLUMN origin_project_id TEXT;
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
-- Access-pattern boosting: track how often each memory is included in
|
|
2
2
|
-- retrieval output. access_count drives a read-time effective_importance
|
|
3
3
|
-- boost without mutating the stored importance score.
|
|
4
|
+
--
|
|
5
|
+
-- Idempotency: SQLite has no `ADD COLUMN IF NOT EXISTS`. Re-running this
|
|
6
|
+
-- migration (only possible if the _migrations record was lost) throws
|
|
7
|
+
-- "duplicate column name", which runMigrations catches and treats as
|
|
8
|
+
-- already-applied. See src/core/schema/runMigrations.ts.
|
|
4
9
|
ALTER TABLE memories ADD COLUMN access_count INTEGER NOT NULL DEFAULT 0;
|
|
5
10
|
ALTER TABLE memories ADD COLUMN last_accessed TEXT;
|
|
@@ -8,9 +8,13 @@ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
|
|
8
8
|
content_rowid='rowid'
|
|
9
9
|
);
|
|
10
10
|
|
|
11
|
-
-- Populate FTS index from existing rows
|
|
11
|
+
-- Populate FTS index from existing rows. Guard against double-backfill: if this
|
|
12
|
+
-- migration is re-run on a DB that already has FTS data (e.g. after the
|
|
13
|
+
-- _migrations record was lost), re-inserting every row would duplicate the
|
|
14
|
+
-- index entries. Only backfill when the FTS table is empty.
|
|
12
15
|
INSERT INTO memories_fts(rowid, content, normalized_content)
|
|
13
|
-
SELECT rowid, content, normalized_content FROM memories
|
|
16
|
+
SELECT rowid, content, normalized_content FROM memories
|
|
17
|
+
WHERE NOT EXISTS (SELECT 1 FROM memories_fts LIMIT 1);
|
|
14
18
|
|
|
15
19
|
-- Keep FTS index in sync: INSERT trigger
|
|
16
20
|
CREATE TRIGGER IF NOT EXISTS memories_fts_insert AFTER INSERT ON memories BEGIN
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
-- Deduplicate session events by their logical key.
|
|
2
|
+
--
|
|
3
|
+
-- `ingestSessionEvents` is now reachable from agents (MCP tool) and CLI, so it
|
|
4
|
+
-- can be called more than once for the same session. The original index on
|
|
5
|
+
-- (project_id, session_id, event_index) was NOT unique, so re-ingesting the
|
|
6
|
+
-- same logical event with a fresh `id` silently created duplicate rows — which
|
|
7
|
+
-- then inflated session-event counts and the local-summarizer input.
|
|
8
|
+
--
|
|
9
|
+
-- Replace it with a UNIQUE index so `INSERT OR IGNORE` makes re-ingestion a
|
|
10
|
+
-- no-op. session_events has no production writer before this migration, so there
|
|
11
|
+
-- are no pre-existing duplicates to reconcile.
|
|
12
|
+
DROP INDEX IF EXISTS idx_session_events_project_session_event_index;
|
|
13
|
+
|
|
14
|
+
-- Remove duplicate rows before adding unique constraint.
|
|
15
|
+
-- Keep the row with the smallest rowid for each logical key.
|
|
16
|
+
DELETE FROM session_events
|
|
17
|
+
WHERE rowid NOT IN (
|
|
18
|
+
SELECT MIN(rowid)
|
|
19
|
+
FROM session_events
|
|
20
|
+
GROUP BY project_id, session_id, event_index
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_session_events_project_session_event_index
|
|
24
|
+
ON session_events(project_id, session_id, event_index);
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
const DEFAULT_MIGRATIONS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "migrations");
|
|
4
5
|
function ensureMigrationsTable(db) {
|
|
5
6
|
db.exec(`
|
|
6
7
|
CREATE TABLE IF NOT EXISTS _migrations (
|
|
@@ -18,6 +19,38 @@ function listMigrationFiles(migrationsDir) {
|
|
|
18
19
|
.filter((fileName) => fileName.endsWith(".sql"))
|
|
19
20
|
.sort((left, right) => left.localeCompare(right));
|
|
20
21
|
}
|
|
22
|
+
/**
|
|
23
|
+
* Verify that every column an ADD COLUMN migration declares already exists in
|
|
24
|
+
* its target table. Parses `ALTER TABLE <table> ADD COLUMN <name>` statements
|
|
25
|
+
* from the migration SQL and checks each against `PRAGMA table_info`. Returns
|
|
26
|
+
* false the moment a declared column is missing (or its table doesn't exist),
|
|
27
|
+
* so a partially-applied migration is never marked complete.
|
|
28
|
+
*/
|
|
29
|
+
function allAddedColumnsExist(db, sql) {
|
|
30
|
+
const addColumnRe = /ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)/gi;
|
|
31
|
+
const byTable = new Map();
|
|
32
|
+
let match;
|
|
33
|
+
while ((match = addColumnRe.exec(sql)) !== null) {
|
|
34
|
+
const [, table, column] = match;
|
|
35
|
+
const cols = byTable.get(table) ?? [];
|
|
36
|
+
cols.push(column);
|
|
37
|
+
byTable.set(table, cols);
|
|
38
|
+
}
|
|
39
|
+
// No ADD COLUMN statements parsed → we cannot prove the schema is consistent,
|
|
40
|
+
// so treat it as unsafe and let the caller re-throw.
|
|
41
|
+
if (byTable.size === 0) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
for (const [table, columns] of byTable) {
|
|
45
|
+
const existing = new Set(db.prepare(`PRAGMA table_info(${table})`).all().map((row) => row.name));
|
|
46
|
+
for (const column of columns) {
|
|
47
|
+
if (!existing.has(column)) {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
21
54
|
export function runMigrations(db, migrationsDir = DEFAULT_MIGRATIONS_DIR) {
|
|
22
55
|
ensureMigrationsTable(db);
|
|
23
56
|
const files = listMigrationFiles(migrationsDir);
|
|
@@ -31,8 +64,37 @@ export function runMigrations(db, migrationsDir = DEFAULT_MIGRATIONS_DIR) {
|
|
|
31
64
|
});
|
|
32
65
|
for (const fileName of files) {
|
|
33
66
|
const existing = hasMigrationStmt.get(fileName);
|
|
34
|
-
if (
|
|
67
|
+
if (existing) {
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
try {
|
|
35
71
|
runMigration(fileName);
|
|
36
72
|
}
|
|
73
|
+
catch (err) {
|
|
74
|
+
// Idempotency guard for ALTER TABLE ADD COLUMN migrations (005/006).
|
|
75
|
+
// SQLite has no `ADD COLUMN IF NOT EXISTS`, so re-running a column-adding
|
|
76
|
+
// migration on a DB that already has the column throws "duplicate column
|
|
77
|
+
// name". This only happens when the _migrations record was lost (e.g. the
|
|
78
|
+
// table was dropped) while the schema change survived.
|
|
79
|
+
//
|
|
80
|
+
// Each migration runs in a transaction (all-or-nothing), so the duplicate
|
|
81
|
+
// error rolls the whole body back. Migrations 005/006 add TWO columns
|
|
82
|
+
// each: if only the FIRST already exists, the throw fires on the first
|
|
83
|
+
// ALTER and the second column is never added. Blindly marking the
|
|
84
|
+
// migration applied would leave that second column permanently missing.
|
|
85
|
+
//
|
|
86
|
+
// So instead of trusting the error, verify that EVERY column this
|
|
87
|
+
// migration was supposed to add actually exists. Only then is it safe to
|
|
88
|
+
// record as applied; otherwise re-throw so the failure surfaces.
|
|
89
|
+
if (err instanceof Error && /duplicate column name/i.test(err.message)) {
|
|
90
|
+
const filePath = path.join(migrationsDir, fileName);
|
|
91
|
+
const sql = fs.readFileSync(filePath, "utf8");
|
|
92
|
+
if (allAddedColumnsExist(db, sql)) {
|
|
93
|
+
insertMigrationStmt.run(fileName);
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
throw err;
|
|
98
|
+
}
|
|
37
99
|
}
|
|
38
100
|
}
|
|
@@ -1,3 +1,34 @@
|
|
|
1
|
+
// Shared INSERT ... ON CONFLICT(id) upsert column lists. The import and team-pull
|
|
2
|
+
// paths differ only in how they resolve `importance` on conflict (import takes the
|
|
3
|
+
// incoming value; pull preserves MAX(local, incoming)), so the surrounding SQL is
|
|
4
|
+
// factored out to keep the two prepared statements byte-for-byte aligned.
|
|
5
|
+
const UPSERT_INSERT_HEAD = `
|
|
6
|
+
INSERT INTO memories (
|
|
7
|
+
id, project_id, session_id, source_adapter, kind, content, normalized_content,
|
|
8
|
+
importance, embedding, embedding_dim, embedding_version, author, origin_project_id,
|
|
9
|
+
created_at, updated_at
|
|
10
|
+
) VALUES (
|
|
11
|
+
@id, @project_id, @session_id, @source_adapter, @kind, @content, @normalized_content,
|
|
12
|
+
@importance, @embedding, @embedding_dim, @embedding_version, @author, @origin_project_id,
|
|
13
|
+
COALESCE(@created_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
|
14
|
+
COALESCE(@updated_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
15
|
+
)
|
|
16
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
17
|
+
project_id = excluded.project_id,
|
|
18
|
+
session_id = excluded.session_id,
|
|
19
|
+
source_adapter = excluded.source_adapter,
|
|
20
|
+
kind = excluded.kind,
|
|
21
|
+
content = excluded.content,
|
|
22
|
+
normalized_content = excluded.normalized_content,`;
|
|
23
|
+
const UPSERT_INSERT_TAIL = `
|
|
24
|
+
embedding = excluded.embedding,
|
|
25
|
+
embedding_dim = excluded.embedding_dim,
|
|
26
|
+
embedding_version = excluded.embedding_version,
|
|
27
|
+
author = excluded.author,
|
|
28
|
+
origin_project_id = excluded.origin_project_id,
|
|
29
|
+
created_at = excluded.created_at,
|
|
30
|
+
updated_at = excluded.updated_at
|
|
31
|
+
`;
|
|
1
32
|
const stmtCache = new WeakMap();
|
|
2
33
|
function getStatements(db) {
|
|
3
34
|
let stmts = stmtCache.get(db);
|
|
@@ -40,6 +71,20 @@ function getStatements(db) {
|
|
|
40
71
|
author = excluded.author,
|
|
41
72
|
origin_project_id = excluded.origin_project_id,
|
|
42
73
|
updated_at = COALESCE(excluded.updated_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
74
|
+
ON CONFLICT(id)
|
|
75
|
+
DO UPDATE SET
|
|
76
|
+
project_id = excluded.project_id,
|
|
77
|
+
session_id = excluded.session_id,
|
|
78
|
+
source_adapter = excluded.source_adapter,
|
|
79
|
+
content = excluded.content,
|
|
80
|
+
normalized_content = excluded.normalized_content,
|
|
81
|
+
importance = excluded.importance,
|
|
82
|
+
embedding = excluded.embedding,
|
|
83
|
+
embedding_dim = excluded.embedding_dim,
|
|
84
|
+
embedding_version = excluded.embedding_version,
|
|
85
|
+
author = excluded.author,
|
|
86
|
+
origin_project_id = excluded.origin_project_id,
|
|
87
|
+
updated_at = COALESCE(excluded.updated_at, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
|
|
43
88
|
`),
|
|
44
89
|
listByProject: db.prepare(`
|
|
45
90
|
SELECT
|
|
@@ -50,7 +95,28 @@ function getStatements(db) {
|
|
|
50
95
|
WHERE project_id = ?
|
|
51
96
|
ORDER BY updated_at DESC
|
|
52
97
|
`),
|
|
98
|
+
// Lightweight projection for token-savings accounting: only `content` is
|
|
99
|
+
// needed to count tokens, so we deliberately avoid pulling the (potentially
|
|
100
|
+
// multi-KB) embedding JSON and normalized_content for every row. Matters for
|
|
101
|
+
// large projects where `savings` would otherwise load the whole table.
|
|
102
|
+
listContentByProject: db.prepare("SELECT content FROM memories WHERE project_id = ?"),
|
|
103
|
+
importUpsert: db.prepare(`${UPSERT_INSERT_HEAD}\n importance = excluded.importance,${UPSERT_INSERT_TAIL}`),
|
|
104
|
+
// Importance-preserving merge for team pulls: a teammate can never lower a
|
|
105
|
+
// locally-boosted importance. better-sqlite3@12 bundles a SQLite that accepts
|
|
106
|
+
// the two-arg scalar MAX() inside DO UPDATE.
|
|
107
|
+
pullUpsert: db.prepare(`${UPSERT_INSERT_HEAD}\n importance = MAX(memories.importance, excluded.importance),${UPSERT_INSERT_TAIL}`),
|
|
53
108
|
listAllIds: db.prepare("SELECT id FROM memories"),
|
|
109
|
+
selectById: db.prepare(`
|
|
110
|
+
SELECT
|
|
111
|
+
id, project_id, session_id, source_adapter, kind, content, normalized_content,
|
|
112
|
+
importance, embedding, embedding_dim, embedding_version, author, origin_project_id,
|
|
113
|
+
access_count, last_accessed, created_at, updated_at
|
|
114
|
+
FROM memories
|
|
115
|
+
WHERE project_id = ? AND id = ?
|
|
116
|
+
LIMIT 1
|
|
117
|
+
`),
|
|
118
|
+
selectOwner: db.prepare("SELECT project_id FROM memories WHERE id = ?"),
|
|
119
|
+
deleteById: db.prepare("DELETE FROM memories WHERE project_id = ? AND id = ?"),
|
|
54
120
|
countOlderThan: db.prepare(`
|
|
55
121
|
SELECT COUNT(*) AS count
|
|
56
122
|
FROM memories
|
|
@@ -72,6 +138,9 @@ function getStatements(db) {
|
|
|
72
138
|
SET
|
|
73
139
|
content = ?,
|
|
74
140
|
normalized_content = COALESCE(?, normalized_content),
|
|
141
|
+
embedding = COALESCE(?, embedding),
|
|
142
|
+
embedding_dim = COALESCE(?, embedding_dim),
|
|
143
|
+
embedding_version = COALESCE(?, embedding_version),
|
|
75
144
|
updated_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
|
|
76
145
|
WHERE project_id = ? AND id = ?
|
|
77
146
|
`),
|
|
@@ -96,7 +165,17 @@ function getStatements(db) {
|
|
|
96
165
|
countBySession: db.prepare(`
|
|
97
166
|
SELECT COUNT(*) AS count
|
|
98
167
|
FROM memories
|
|
99
|
-
WHERE session_id = ?
|
|
168
|
+
WHERE session_id = ? AND project_id = ?
|
|
169
|
+
`),
|
|
170
|
+
countAll: db.prepare("SELECT COUNT(*) AS count FROM memories WHERE project_id = ?"),
|
|
171
|
+
// Memories whose stored embedding does not match the supplied current
|
|
172
|
+
// embedding version (NULL counts as stale). Used to surface a re-embed
|
|
173
|
+
// hint; the actual re-embed is the `sessionmem re-embed` command.
|
|
174
|
+
countStaleEmbeddings: db.prepare(`
|
|
175
|
+
SELECT COUNT(*) AS count
|
|
176
|
+
FROM memories
|
|
177
|
+
WHERE project_id = ?
|
|
178
|
+
AND (embedding_version IS NULL OR embedding_version != ?)
|
|
100
179
|
`),
|
|
101
180
|
};
|
|
102
181
|
stmtCache.set(db, stmts);
|
|
@@ -127,9 +206,62 @@ export function upsertSessionSummaryMemory(db, input) {
|
|
|
127
206
|
assertImportance(input.importance);
|
|
128
207
|
getStatements(db).upsertSessionSummary.run(toParams({ ...input, kind: "summary" }));
|
|
129
208
|
}
|
|
209
|
+
/**
|
|
210
|
+
* Upsert a memory imported from an external export. On `id` conflict the incoming
|
|
211
|
+
* record wins on every column (including importance). Cross-project ownership
|
|
212
|
+
* collisions are filtered by the caller via {@link getMemoryOwnerProjectId} before
|
|
213
|
+
* this runs, so this never reassigns another project's row.
|
|
214
|
+
*/
|
|
215
|
+
export function upsertImportedMemory(db, input) {
|
|
216
|
+
assertImportance(input.importance);
|
|
217
|
+
getStatements(db).importUpsert.run(toParams(input));
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Upsert a memory pulled from a teammate. Identical to {@link upsertImportedMemory}
|
|
221
|
+
* except importance is merged as MAX(local, incoming) so a pull can never lower a
|
|
222
|
+
* locally-boosted importance.
|
|
223
|
+
*/
|
|
224
|
+
export function upsertPulledMemory(db, input) {
|
|
225
|
+
assertImportance(input.importance);
|
|
226
|
+
getStatements(db).pullUpsert.run(toParams(input));
|
|
227
|
+
}
|
|
130
228
|
export function listMemoriesByProject(db, projectId) {
|
|
131
229
|
return getStatements(db).listByProject.all(projectId);
|
|
132
230
|
}
|
|
231
|
+
/**
|
|
232
|
+
* Return just the `content` of every memory in a project. Used by the
|
|
233
|
+
* token-savings command, which only needs `content` to count tokens and must
|
|
234
|
+
* not pay to load embedding JSON / normalized_content for the whole table.
|
|
235
|
+
*/
|
|
236
|
+
export function listMemoryContentsByProject(db, projectId) {
|
|
237
|
+
const rows = getStatements(db).listContentByProject.all(projectId);
|
|
238
|
+
return rows.map((r) => r.content);
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Fetch a single memory row scoped to a project. Returns undefined when no row
|
|
242
|
+
* matches (caller maps that to NOT_FOUND). Uses a WeakMap-cached prepared
|
|
243
|
+
* statement — this is a high-frequency path (every store/get/forget and each
|
|
244
|
+
* batch item re-reads the inserted row).
|
|
245
|
+
*/
|
|
246
|
+
export function getMemoryRecordById(db, projectId, memoryId) {
|
|
247
|
+
return getStatements(db).selectById.get(projectId, memoryId);
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Resolve the project that currently owns a globally-unique memory `id`, or
|
|
251
|
+
* undefined when the id is unused. Import/pull use this to skip (never overwrite)
|
|
252
|
+
* an id already owned by a different project.
|
|
253
|
+
*/
|
|
254
|
+
export function getMemoryOwnerProjectId(db, memoryId) {
|
|
255
|
+
const row = getStatements(db).selectOwner.get(memoryId);
|
|
256
|
+
return row?.project_id;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Hard-delete a single memory scoped to a project. Returns the number of rows
|
|
260
|
+
* removed (0 when the id does not exist in this project).
|
|
261
|
+
*/
|
|
262
|
+
export function deleteMemoryById(db, projectId, memoryId) {
|
|
263
|
+
return getStatements(db).deleteById.run(projectId, memoryId).changes;
|
|
264
|
+
}
|
|
133
265
|
/**
|
|
134
266
|
* All memory ids across every project. `id` is a globally-unique
|
|
135
267
|
* PRIMARY KEY, so duplicate-skip checks in `import` must consider every
|
|
@@ -153,6 +285,12 @@ export function deleteMemoriesOlderThan(db, projectId, cutoffIso) {
|
|
|
153
285
|
const result = getStatements(db).deleteOlderThan.run(projectId, cutoffIso);
|
|
154
286
|
return result.changes;
|
|
155
287
|
}
|
|
288
|
+
// NOTE: no MCP tool, CLI command, or service method currently calls this. It is
|
|
289
|
+
// retained as intentional repository API surface (the importance-update
|
|
290
|
+
// counterpart to updateMemoryContent) for a future importance-adjustment tool,
|
|
291
|
+
// not forgotten code. The `updateImportance` prepared statement above is wired
|
|
292
|
+
// solely for this function. Keep or remove deliberately — do not delete on a
|
|
293
|
+
// "looks unused" pass.
|
|
156
294
|
export function updateMemoryImportance(db, projectId, memoryId, nextImportance, usedAt) {
|
|
157
295
|
assertImportance(nextImportance);
|
|
158
296
|
const result = getStatements(db).updateImportance.run(nextImportance, usedAt ?? null, projectId, memoryId);
|
|
@@ -161,17 +299,36 @@ export function updateMemoryImportance(db, projectId, memoryId, nextImportance,
|
|
|
161
299
|
}
|
|
162
300
|
}
|
|
163
301
|
/**
|
|
164
|
-
* Count
|
|
165
|
-
*
|
|
302
|
+
* Count all memories stored in a project. Used to enforce per-session write
|
|
303
|
+
* soft limits — the count is
|
|
166
304
|
* checked before each storeMemory call and a warning is surfaced when the
|
|
167
305
|
* threshold is reached.
|
|
168
306
|
*/
|
|
169
|
-
export function
|
|
170
|
-
const row = getStatements(db).
|
|
307
|
+
export function countAllMemoriesByProject(db, projectId) {
|
|
308
|
+
const row = getStatements(db).countAll.get(projectId);
|
|
309
|
+
return row.count;
|
|
310
|
+
}
|
|
311
|
+
export function countMemoriesBySession(db, sessionId, projectId) {
|
|
312
|
+
const row = getStatements(db).countBySession.get(sessionId, projectId);
|
|
313
|
+
return row.count;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Count memories in a project whose embedding version differs from
|
|
317
|
+
* `currentVersion` (NULL counts as stale). Drives the startup re-embed hint;
|
|
318
|
+
* the fix is the `sessionmem re-embed` command.
|
|
319
|
+
*/
|
|
320
|
+
export function countStaleEmbeddings(db, projectId, currentVersion) {
|
|
321
|
+
const row = getStatements(db).countStaleEmbeddings.get(projectId, currentVersion);
|
|
171
322
|
return row.count;
|
|
172
323
|
}
|
|
173
|
-
export function updateMemoryContent(db, projectId, memoryId, newContent, newNormalizedContent
|
|
174
|
-
|
|
324
|
+
export function updateMemoryContent(db, projectId, memoryId, newContent, newNormalizedContent,
|
|
325
|
+
// Optional re-embedding: when content is rewritten (e.g. a redactExisting
|
|
326
|
+
// scrub) the stored embedding vector — computed from the PRE-edit text —
|
|
327
|
+
// becomes stale and inconsistent with the new normalized_content. Pass the
|
|
328
|
+
// recomputed embedding so the vector tracks the redacted text; omit to leave
|
|
329
|
+
// the existing embedding untouched (COALESCE keeps the prior value on null).
|
|
330
|
+
newEmbedding) {
|
|
331
|
+
const result = getStatements(db).updateContent.run(newContent, newNormalizedContent ?? null, newEmbedding ? JSON.stringify(newEmbedding.vector) : null, newEmbedding?.dimension ?? null, newEmbedding?.embeddingVersion ?? null, projectId, memoryId);
|
|
175
332
|
if (result.changes === 0) {
|
|
176
333
|
throw new Error(`Memory not found: ${memoryId}`);
|
|
177
334
|
}
|