sessionmem 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +372 -365
  3. package/dist/adapters/capabilities/fallbackTools.js +33 -18
  4. package/dist/adapters/claudeMdInjector.js +164 -0
  5. package/dist/adapters/factory.js +68 -9
  6. package/dist/adapters/generic.js +221 -15
  7. package/dist/adapters/global/antigravity.js +14 -7
  8. package/dist/adapters/global/claudeCode.js +46 -10
  9. package/dist/adapters/global/codex.js +73 -13
  10. package/dist/adapters/global/qcoder.js +18 -5
  11. package/dist/adapters/ide/cline.js +54 -9
  12. package/dist/adapters/ide/cursor.js +15 -13
  13. package/dist/adapters/ide/installer.js +201 -8
  14. package/dist/adapters/ide/windsurf.js +14 -13
  15. package/dist/adapters/tools/ping.js +4 -1
  16. package/dist/cli/commands/config.js +10 -1
  17. package/dist/cli/commands/import.js +6 -1
  18. package/dist/cli/commands/install.js +63 -5
  19. package/dist/cli/commands/ping.js +42 -8
  20. package/dist/cli/commands/reEmbed.js +48 -0
  21. package/dist/cli/commands/run.js +18 -2
  22. package/dist/cli/commands/savings.js +91 -0
  23. package/dist/cli/commands/sessionEnd.js +124 -0
  24. package/dist/cli/commands/sessionStart.js +52 -0
  25. package/dist/cli/commands/sync.js +39 -9
  26. package/dist/cli/commands/uninstall.js +37 -1
  27. package/dist/cli/context.js +14 -18
  28. package/dist/cli/index.js +30 -4
  29. package/dist/cli/output.js +11 -3
  30. package/dist/cli/projectId.js +69 -0
  31. package/dist/core/api/contracts.js +182 -45
  32. package/dist/core/api/errors.js +4 -7
  33. package/dist/core/api/memoryCoreService.js +409 -240
  34. package/dist/core/api/sessionLifecycleService.js +20 -2
  35. package/dist/core/config/policyConfig.js +53 -6
  36. package/dist/core/injection/formatStartupInjection.js +55 -10
  37. package/dist/core/injection/tokenBudget.js +8 -0
  38. package/dist/core/retrieve/importance.js +4 -3
  39. package/dist/core/retrieve/recencyBands.js +6 -10
  40. package/dist/core/retrieve/retrieveMemories.js +19 -4
  41. package/dist/core/retrieve/score.js +11 -1
  42. package/dist/core/schema/migrations/005_team_provenance.sql +14 -9
  43. package/dist/core/schema/migrations/006_access_pattern_boosting.sql +10 -0
  44. package/dist/core/schema/migrations/007_feedback_manual_delete.sql +23 -0
  45. package/dist/core/schema/migrations/008_fts5_search.sql +37 -0
  46. package/dist/core/schema/migrations/009_session_events_unique.sql +24 -0
  47. package/dist/core/schema/runMigrations.js +64 -2
  48. package/dist/core/storage/db.js +6 -0
  49. package/dist/core/storage/memoryFeedbackRepo.js +14 -4
  50. package/dist/core/storage/memoryRepo.js +292 -121
  51. package/dist/core/storage/memorySearchRepo.js +125 -13
  52. package/dist/core/storage/sessionEventsRepo.js +33 -10
  53. package/dist/core/storage/summarizationFailuresRepo.js +36 -26
  54. package/dist/core/storage/tokenSavingsRepo.js +20 -0
  55. package/dist/core/summarize/cloudSummarizer.js +34 -5
  56. package/dist/core/summarize/localSummarizer.js +1 -10
  57. package/dist/core/summarize/redaction.js +45 -8
  58. package/package.json +50 -48
@@ -1,34 +1,49 @@
1
+ import { z } from "zod";
1
2
  export class FallbackToolRegistrar {
2
- static getFallbackTools(capabilities) {
3
+ static getFallbackTools(capabilities, context) {
3
4
  const tools = [];
4
5
  if (!capabilities.supportsResources) {
5
6
  tools.push({
6
7
  name: "fetch_memories",
7
- description: "Fallback tool to fetch memories because host lacks MCP resource support.",
8
- schema: {
9
- type: "object",
10
- properties: {
11
- query: { type: "string" },
12
- },
13
- required: ["query"],
8
+ description: "Fallback memory retrieval for hosts that do not support MCP resources. Call this instead of accessing the sessionmem:// resource URI directly when the host lacks resource support. Semantically equivalent to retrieveMemories — returns stored memories ranked by relevance to the query. Read-only; no side effects.\n\n" +
9
+ "WHEN TO CALL: At session start and mid-session when you need to retrieve context and the host does not support MCP resources. Do not call if the host supports MCP resources — use the sessionmem:// resource URI or retrieveMemories tool instead.\n\n" +
10
+ "Parameter `query`: natural-language description of what context you need to recall (e.g. 'API design decisions', 'database schema choices').",
11
+ inputShape: {
12
+ query: z.string().min(1).max(1000).describe("Natural-language description of what context you need to recall."),
14
13
  },
15
14
  execute: async (args) => {
16
- // Wrap core retrieval logic
17
- return `Fetched memories for ${args.query}`;
18
- }
15
+ const result = await context.service.call("retrieveMemories", {
16
+ projectId: context.projectId,
17
+ query: args.query,
18
+ limit: 10,
19
+ mode: "on-demand",
20
+ depth: "default",
21
+ });
22
+ if (!result.ok)
23
+ return `Error: ${result.error.message}`;
24
+ return JSON.stringify(result.memories, null, 2);
25
+ },
19
26
  });
20
27
  }
21
28
  if (!capabilities.supportsPrompts) {
22
29
  tools.push({
23
30
  name: "startup_inject_memories",
24
- description: "Fallback tool to manually request startup injection because host lacks MCP prompt support.",
25
- schema: {
26
- type: "object",
27
- properties: {},
28
- },
31
+ description: "Fallback startup-injection for hosts that do not support MCP prompts. Call this once at the very start of a session instead of relying on the automatic sessionmem startup prompt when the host lacks prompt support. Injects the top relevant memories for the current project into the working context. No parameters required.\n\n" +
32
+ "WHEN TO CALL: Once per session start, before any user task work begins, when the host does not surface MCP prompts automatically. Do not call if the host already surfaces the sessionmem startup prompt — calling both duplicates injected context.\n\n" +
33
+ "Note: access counts are incremented on retrieval.",
34
+ inputShape: {},
29
35
  execute: async () => {
30
- return "Startup memories injected.";
31
- }
36
+ const result = await context.service.call("retrieveMemories", {
37
+ projectId: context.projectId,
38
+ query: "session startup context",
39
+ limit: 20,
40
+ mode: "auto",
41
+ depth: "default",
42
+ });
43
+ if (!result.ok)
44
+ return `Error: ${result.error.message}`;
45
+ return result.startupInjection;
46
+ },
32
47
  });
33
48
  }
34
49
  return tools;
@@ -0,0 +1,164 @@
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
2
+ import { dirname } from "path";
3
+ export const SESSIONMEM_BLOCK_START = "<!-- sessionmem:start -->";
4
+ export const SESSIONMEM_BLOCK_END = "<!-- sessionmem:end -->";
5
+ const BLOCK_CONTENT = `
6
+ ## sessionmem — Persistent Memory
7
+
8
+ sessionmem is an MCP memory layer that persists context across sessions. It is installed and active via the \`sessionmem\` MCP server. Use its tools to recall prior context and to store important decisions, facts, and context so they're available in future sessions without the user re-explaining. The user should never have to ask you to do this — it is part of how you work in this project.
9
+
10
+ ### Startup
11
+ On Claude Code, prior context is injected automatically at session start by the
12
+ sessionmem \`SessionStart\` hook — you do not need to fetch it yourself. Do NOT
13
+ call \`startup_inject_memories\` on Claude Code: the hook already provides the
14
+ injection, so calling the tool would duplicate the context (it is not even
15
+ registered on Claude Code for this reason). If you do NOT see a "Relevant prior
16
+ context" block at the start of the session AND the \`startup_inject_memories\`
17
+ tool is available (e.g. the hook is not installed, or you are on a host without
18
+ hook support), call it once before any task work, or call \`retrieveMemories\`
19
+ with the current task as the query. Never inject twice if context was already
20
+ provided.
21
+
22
+ ### When to store memories (storeMemory)
23
+ - User makes an architectural or design decision
24
+ - You discover a non-obvious project constraint or convention
25
+ - A debugging session reveals an important root cause
26
+ - User states a preference about how they want things done
27
+ - A warning or pitfall is discovered that future sessions should know about
28
+
29
+ ### When to retrieve memories mid-session (retrieveMemories)
30
+ - Before making architectural decisions (check if prior decisions exist)
31
+ - When the user references something from a previous session
32
+ - When working in an area of the codebase that may have stored warnings or decisions
33
+
34
+ ### At session end (RECOMMENDED — do this without being asked)
35
+ Before the session ends, persist what was accomplished so the next session starts
36
+ informed. Store a concise \`summary\` memory (importance 7) of the key outcomes, plus
37
+ any new decisions, facts, or warnings. Use \`batchStoreMemory\` to write several at
38
+ once. This is what makes context survive across sessions and saves tokens later.
39
+
40
+ ### Memory kinds
41
+ - \`decision\` — architectural or design choices (importance: 7-9)
42
+ - \`fact\` — project constraints, conventions, patterns (importance: 5-7)
43
+ - \`warning\` — pitfalls, gotchas, things that broke before (importance: 8-10)
44
+ - \`preference\` — how the user likes things done (importance: 5-7)
45
+ - \`summary\` — session summaries (importance: 7)
46
+
47
+ ### Other tools
48
+ - \`listMemories\` — browse all stored memories for this project
49
+ - \`getMemory\` — fetch a specific memory by ID
50
+ - \`forgetMemory\` — delete an outdated or incorrect memory
51
+ - \`batchStoreMemory\` — store multiple memories in one call (use at session end)
52
+ - \`stats\` — check memory count and health
53
+
54
+ ### Guidelines
55
+ - Don't store trivial or easily re-derivable information
56
+ - Don't retrieve memories every single turn — retrieve at task boundaries
57
+ - Keep memory content concise (1-3 sentences) and self-contained
58
+ - Use appropriate importance scores (see kinds above)
59
+ `;
60
+ export function generateClaudeMdBlock() {
61
+ return `${SESSIONMEM_BLOCK_START}\n${BLOCK_CONTENT}\n${SESSIONMEM_BLOCK_END}`;
62
+ }
63
+ export function injectClaudeMdBlock(filePath) {
64
+ try {
65
+ const dir = dirname(filePath);
66
+ if (!existsSync(dir)) {
67
+ mkdirSync(dir, { recursive: true });
68
+ }
69
+ let content = "";
70
+ if (existsSync(filePath)) {
71
+ content = readFileSync(filePath, "utf8");
72
+ }
73
+ const block = generateClaudeMdBlock();
74
+ if (hasClaudeMdBlock(filePath)) {
75
+ // Replace existing block
76
+ const startIdx = content.indexOf(SESSIONMEM_BLOCK_START);
77
+ const endIdx = content.indexOf(SESSIONMEM_BLOCK_END) + SESSIONMEM_BLOCK_END.length;
78
+ content = content.slice(0, startIdx) + block + content.slice(endIdx);
79
+ }
80
+ else {
81
+ // Append to file
82
+ if (content.length > 0 && !content.endsWith("\n")) {
83
+ content += "\n";
84
+ }
85
+ content += "\n" + block + "\n";
86
+ }
87
+ writeFileSync(filePath, content, "utf8");
88
+ return true;
89
+ }
90
+ catch {
91
+ return false;
92
+ }
93
+ }
94
+ /**
95
+ * Inject the sessionmem guidance block into an arbitrary host-guidance file
96
+ * (CLAUDE.md, AGENTS.md, Windsurf global_rules.md, a Cursor `.mdc` rule, …).
97
+ *
98
+ * The block is the same markdown for every host. The only host-specific concern
99
+ * is Cursor's `.mdc` rule format: a newly-created rule file needs an
100
+ * `alwaysApply: true` frontmatter header for Cursor to apply it on every
101
+ * request, so we seed that header before appending the block. Existing files
102
+ * (and all non-`.mdc` targets) are handled exactly like CLAUDE.md.
103
+ */
104
+ export function injectGuidanceBlock(filePath) {
105
+ try {
106
+ if (filePath.endsWith(".mdc") && !existsSync(filePath)) {
107
+ const dir = dirname(filePath);
108
+ if (!existsSync(dir)) {
109
+ mkdirSync(dir, { recursive: true });
110
+ }
111
+ writeFileSync(filePath, "---\ndescription: sessionmem persistent memory guidance\nalwaysApply: true\n---\n", "utf8");
112
+ }
113
+ }
114
+ catch {
115
+ // Best-effort frontmatter seeding; fall through to block injection which
116
+ // creates the file itself if the seeding failed.
117
+ }
118
+ return injectClaudeMdBlock(filePath);
119
+ }
120
+ export function removeClaudeMdBlock(filePath) {
121
+ try {
122
+ if (!existsSync(filePath)) {
123
+ return true;
124
+ }
125
+ const content = readFileSync(filePath, "utf8");
126
+ const startIdx = content.indexOf(SESSIONMEM_BLOCK_START);
127
+ if (startIdx === -1) {
128
+ return true;
129
+ }
130
+ const endIdx = content.indexOf(SESSIONMEM_BLOCK_END);
131
+ if (endIdx === -1) {
132
+ return true;
133
+ }
134
+ const endOfBlock = endIdx + SESSIONMEM_BLOCK_END.length;
135
+ // Remove the block and any trailing newline
136
+ let before = content.slice(0, startIdx);
137
+ let after = content.slice(endOfBlock);
138
+ // Clean up extra blank lines around the removed block
139
+ if (after.startsWith("\n")) {
140
+ after = after.slice(1);
141
+ }
142
+ if (before.endsWith("\n\n")) {
143
+ before = before.slice(0, -1);
144
+ }
145
+ writeFileSync(filePath, before + after, "utf8");
146
+ return true;
147
+ }
148
+ catch {
149
+ return false;
150
+ }
151
+ }
152
+ export function hasClaudeMdBlock(filePath) {
153
+ try {
154
+ if (!existsSync(filePath)) {
155
+ return false;
156
+ }
157
+ const content = readFileSync(filePath, "utf8");
158
+ return (content.includes(SESSIONMEM_BLOCK_START) &&
159
+ content.includes(SESSIONMEM_BLOCK_END));
160
+ }
161
+ catch {
162
+ return false;
163
+ }
164
+ }
@@ -7,28 +7,58 @@ import { ClineAdapter } from "./ide/cline.js";
7
7
  import { GenericMCPAdapter } from "./generic.js";
8
8
  import { CodexAdapter } from "./global/codex.js";
9
9
  import { QCoderAdapter } from "./global/qcoder.js";
10
+ /**
11
+ * Canonical adapter names accepted by `--adapter <name>` / SESSIONMEM_ADAPTER.
12
+ * Kept in sync with {@link AdapterFactory.forName}; surfaced to the CLI for the
13
+ * install command's choices list.
14
+ */
15
+ export const ADAPTER_NAMES = [
16
+ "claude-code",
17
+ "cursor",
18
+ "windsurf",
19
+ "cline",
20
+ "codex",
21
+ "antigravity",
22
+ "qcoder",
23
+ "generic",
24
+ ];
10
25
  export class AdapterFactory {
11
26
  /**
12
27
  * Detect the current host environment and return the appropriate adapter.
28
+ *
29
+ * Detection keys are the REAL environment variables each host sets, verified
30
+ * against live shells:
31
+ * - Claude Code sets `CLAUDECODE=1`, `CLAUDE_CODE_ENTRYPOINT=cli`, and
32
+ * `CLAUDE_CODE_SESSION_ID=...` (note the `_ID` suffix). `TERM_PROGRAM` is
33
+ * the HOST terminal (e.g. `vscode`), never `"claude-code"`. The previous
34
+ * `CLAUDE_CODE_SESSION` / `TERM_PROGRAM === "claude-code"` check never
35
+ * matched, so the SessionStart-hook install path was never selected.
36
+ * - Antigravity sets `ANTIGRAVITY_APP_DATA_DIR` / `ANTIGRAVITY_SESSION_ID`
37
+ * (and leaks `ANTIGRAVITY_CLI_ALIAS`); checked first so its own CLI wins in
38
+ * its own shell.
13
39
  */
14
40
  static detectAdapter() {
15
41
  const env = process.env;
16
42
  if (env.ANTIGRAVITY_APP_DATA_DIR || env.ANTIGRAVITY_SESSION_ID) {
17
43
  return new AntigravityAdapter();
18
44
  }
19
- if (env.CLAUDE_CODE_SESSION || env.TERM_PROGRAM === "claude-code") {
45
+ if (env.CLAUDECODE === "1" ||
46
+ env.CLAUDE_CODE_ENTRYPOINT !== undefined ||
47
+ env.CLAUDE_CODE_SESSION_ID !== undefined) {
20
48
  return new ClaudeCodeAdapter();
21
49
  }
22
- if (env.TERM_PROGRAM === "Cursor" || env.CURSOR_APP_VERSION) {
50
+ if (env.CURSOR_AGENT !== undefined ||
51
+ env.CURSOR_CLI !== undefined ||
52
+ env.CURSOR_TRACE_ID !== undefined) {
23
53
  return new CursorAdapter();
24
54
  }
25
- if (env.TERM_PROGRAM === "Windsurf") {
26
- return new WindsurfAdapter();
27
- }
28
- if (env.CLINE_SESSION_ID) {
29
- return new ClineAdapter();
30
- }
31
- if (env.CODEX_SESSION_ID) {
55
+ // Windsurf is a VS Code fork with no unique env var; --adapter windsurf
56
+ // required. No reliable auto-detection branch.
57
+ // Cline is a VS Code extension; auto-detection is impossible. Use
58
+ // --adapter cline.
59
+ // Codex sets CODEX_HOME (and may expose OPENAI_CODEX). CODEX_SESSION_ID was
60
+ // unverified and never matched.
61
+ if (env.CODEX_HOME !== undefined || env.OPENAI_CODEX !== undefined) {
32
62
  return new CodexAdapter();
33
63
  }
34
64
  if (env.QCODER_SESSION) {
@@ -37,4 +67,33 @@ export class AdapterFactory {
37
67
  // Fallback to generic MCP if no specific host is detected
38
68
  return new GenericMCPAdapter();
39
69
  }
70
+ /**
71
+ * Resolve an adapter by its canonical name. Powers the `--adapter <name>`
72
+ * install flag and the `SESSIONMEM_ADAPTER` override so a user can force a
73
+ * host explicitly when auto-detection cannot (e.g. installing from a plain
74
+ * terminal that is not inside any host). Throws on an unknown name so the CLI
75
+ * can surface a clear error rather than silently falling back to generic.
76
+ */
77
+ static forName(name) {
78
+ switch (name) {
79
+ case "claude-code":
80
+ return new ClaudeCodeAdapter();
81
+ case "cursor":
82
+ return new CursorAdapter();
83
+ case "windsurf":
84
+ return new WindsurfAdapter();
85
+ case "cline":
86
+ return new ClineAdapter();
87
+ case "codex":
88
+ return new CodexAdapter();
89
+ case "antigravity":
90
+ return new AntigravityAdapter();
91
+ case "qcoder":
92
+ return new QCoderAdapter();
93
+ case "generic":
94
+ return new GenericMCPAdapter();
95
+ default:
96
+ throw new Error(`Unknown adapter "${name}". Valid adapters: ${ADAPTER_NAMES.join(", ")}.`);
97
+ }
98
+ }
40
99
  }
@@ -1,9 +1,13 @@
1
+ import { createRequire } from "module";
1
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
- import { forgetMemoryRequestSchema, getMemoryRequestSchema, listMemoriesRequestSchema, retrieveMemoriesRequestSchema, statsRequestSchema, storeMemoryRequestSchema, } from "../core/api/contracts.js";
4
+ import { batchStoreMemoryRequestSchema, forgetMemoryRequestSchema, getMemoryRequestSchema, handleSessionEndRequestSchema, ingestSessionEventsRequestSchema, listMemoriesRequestSchema, resetAccessCountsRequestSchema, retrieveMemoriesRequestSchema, statsRequestSchema, storeMemoryRequestSchema, summarizeSessionToMemoryRequestSchema, } from "../core/api/contracts.js";
4
5
  import { join } from "path";
5
6
  import { createCliContext } from "../cli/context.js";
6
7
  import { IDEInstaller } from "./ide/installer.js";
8
+ import { FallbackToolRegistrar } from "./capabilities/fallbackTools.js";
9
+ import { countStaleEmbeddings } from "../core/storage/memoryRepo.js";
10
+ import { EMBEDDING_VERSION } from "../core/embed/embeddingVersion.js";
7
11
  /**
8
12
  * Diagnostic logging sink for the stdio server. CRITICAL: the MCP protocol
9
13
  * frames are written to STDOUT by StdioServerTransport, so anything this server
@@ -13,6 +17,11 @@ import { IDEInstaller } from "./ide/installer.js";
13
17
  function logDiagnostic(message) {
14
18
  process.stderr.write(`[sessionmem] ${message}\n`);
15
19
  }
20
+ // Read the package version dynamically so the MCP server's advertised version
21
+ // tracks package.json on every release. A hardcoded literal silently drifts on
22
+ // `npm version` bumps (postversion does not rewrite source), so mirror ping.ts.
23
+ const require = createRequire(import.meta.url);
24
+ const SERVER_VERSION = require("../../package.json").version;
16
25
  /**
17
26
  * Strip `projectId` from a request schema's shape so the tool input only asks
18
27
  * the client for the fields it should provide; the server injects projectId.
@@ -21,45 +30,182 @@ function shapeWithoutProjectId(shape) {
21
30
  const { projectId: _projectId, ...rest } = shape;
22
31
  return rest;
23
32
  }
33
+ /**
34
+ * Resolve a default sessionId for tools that require one but were invoked
35
+ * without it. Agents pass arbitrary/inconsistent sessionIds (or none), which
36
+ * breaks the per-session soft-limit counter and handleSessionEnd correlation.
37
+ * Preferring CLAUDE_CODE_SESSION_ID ties an agent's storeMemory/ingest calls to
38
+ * the same Claude Code session the SessionStart hook ran under. Callers can
39
+ * still override by supplying an explicit sessionId.
40
+ */
41
+ // Evaluated once per process lifecycle so every storeMemory/ingest call in one
42
+ // MCP server process shares the same fallback session when no env session id is
43
+ // available. Computing `session-${Date.now()}` per call would hand each call a
44
+ // different fake session, breaking the per-session soft-limit counter and
45
+ // handleSessionEnd correlation.
46
+ const PROCESS_SESSION_FALLBACK = `session-${Date.now()}`;
47
+ function resolveDefaultSessionId() {
48
+ return (process.env.CLAUDE_CODE_SESSION_ID ??
49
+ process.env.SESSION_ID ??
50
+ PROCESS_SESSION_FALLBACK);
51
+ }
52
+ function isMissing(value) {
53
+ return value === undefined || value === null || value === "";
54
+ }
24
55
  const TOOL_DEFINITIONS = [
25
56
  {
26
57
  method: "retrieveMemories",
27
- description: "Retrieve the most relevant stored memories for a semantic query, ranked by relevance, recency, and importance.",
28
- inputShape: shapeWithoutProjectId(retrieveMemoriesRequestSchema.shape),
58
+ description: "Semantically search stored memories and return the top matches ranked by a weighted combination of relevance, recency, and importance. Read-only; no side effects.\n\n" +
59
+ "WHEN TO CALL: (1) At the start of every session — pass the current task or file as the query to pre-load relevant context. (2) Mid-session whenever a new topic, file, or decision area arises that may have prior context. Do NOT call on every user turn.\n\n" +
60
+ "WHEN NOT TO CALL: If you already retrieved memories for this topic this session. Use getMemory if you have a specific memoryId. Use listMemories only to audit the full store, not for context loading.\n\n" +
61
+ "Returns up to `limit` results (default 20). `mode='auto'` is the standard startup path; `mode='on-demand'` signals an explicit mid-session lookup. `depth='deep'` runs a broader semantic sweep at higher latency — use when the topic is unfamiliar. Phrase `query` as what you need to recall, not what you are about to do.\n\n" +
62
+ "NOTE: this tool updates access-pattern counters on the memories it returns (used to boost frequently-recalled memories in future ranking), so it is NOT side-effect-free despite being a lookup.",
63
+ // retrieveMemories mutates access_count on the rows it returns, so it is
64
+ // not read-only and the previous idempotentHint was inaccurate.
65
+ annotations: { readOnlyHint: false },
66
+ inputShape: {
67
+ query: retrieveMemoriesRequestSchema.shape.query.describe("Natural-language description of what you need to recall. Phrase as a topic or question (e.g. 'database connection settings', 'auth flow decisions') — not an action ('store info about...')."),
68
+ limit: retrieveMemoriesRequestSchema.shape.limit.describe("Maximum number of memories to return. Integer 1-100, default 20. Increase for broad topic sweeps; keep at default for focused lookups."),
69
+ mode: retrieveMemoriesRequestSchema.shape.mode.describe("'auto' for the standard startup context-load path. 'on-demand' for an explicit mid-session retrieval triggered by a specific task or question."),
70
+ depth: retrieveMemoriesRequestSchema.shape.depth.describe("'default' for standard semantic search. 'deep' for a broader sweep that surfaces less-similar memories — use when the topic is new or unfamiliar."),
71
+ },
29
72
  },
30
73
  {
31
74
  method: "storeMemory",
32
- description: "Store a memory (decision, fact, summary, or warning) for the current project.",
33
- inputShape: shapeWithoutProjectId(storeMemoryRequestSchema.shape),
75
+ description: "Persist a single memory unit to the local SQLite store. Accepts decisions, facts, architectural choices, warnings, and session summaries. NOT idempotent — each call creates a new record even with identical content. Writes to disk immediately.\n\n" +
76
+ "WHEN TO CALL: After any significant decision, discovery, or conclusion that should be available in a future session. Good candidates: technology choices, non-obvious constraints, bug root-causes, architectural decisions, key facts about the codebase.\n\n" +
77
+ "WHEN NOT TO CALL: For trivial observations, transient state, or content that duplicates what was just retrieved. Do not store entire files or full conversation transcripts.\n\n" +
78
+ "`kind` categories: 'decision', 'fact', 'summary', 'warning', 'preference'. Write `content` to be self-contained — it must be useful without any surrounding conversation context. `importance` 1-10 (10 = most critical); directly affects retrieval ranking in future sessions.\n\n" +
79
+ "RESPONSE may include `warningCodes`: 'session_write_limit_warning' (this session has stored many memories — stop storing trivia and prefer batchStoreMemory) and 'redaction_partial_failure' (a redaction rule errored; the write still succeeded). Treat them as advisory signals, not errors.",
80
+ annotations: { destructiveHint: false, idempotentHint: false },
81
+ inputShape: {
82
+ memoryId: storeMemoryRequestSchema.shape.memoryId.describe("Caller-supplied unique UUID for this memory (e.g. crypto.randomUUID()). Used for deduplication and for later retrieval by ID via getMemory."),
83
+ sessionId: storeMemoryRequestSchema.shape.sessionId.describe("Identifier for the current session. Used to group memories by session for diagnostics. Use a consistent ID within a single session."),
84
+ sourceAdapter: storeMemoryRequestSchema.shape.sourceAdapter.describe("Name of the adapter or host creating this memory (e.g. 'claude-code', 'cursor', 'generic'). Used for provenance tracking."),
85
+ kind: storeMemoryRequestSchema.shape.kind.describe("Category of this memory. One of: 'decision', 'fact', 'warning', 'preference', 'summary'. These are the only recognized kinds — others are rejected."),
86
+ content: storeMemoryRequestSchema.shape.content.describe("The memory text. Must be self-contained and specific — written so it is useful without surrounding conversation context. Avoid vague phrases like 'the user decided to...'."),
87
+ importance: storeMemoryRequestSchema.shape.importance.describe("Integer 1-10 indicating criticality (10 = most important). Directly affects ranking in future retrieveMemories calls. Use 8-10 for decisions that must not be forgotten; 3-5 for useful but non-critical facts."),
88
+ redactionEnabled: storeMemoryRequestSchema.shape.redactionEnabled.describe("If true, PII is stripped from content before storage. Omit to use the project-level redaction setting from config.json."),
89
+ },
34
90
  },
35
91
  {
36
92
  method: "listMemories",
37
- description: "List all stored memories for the current project.",
93
+ description: "Return every memory stored for the current project, unfiltered and without ranking. Read-only; no side effects.\n\n" +
94
+ "WHEN TO CALL: When you need a complete inventory of stored memories — to audit what has been saved, detect duplicates, or build a full summary of all known context.\n\n" +
95
+ "WHEN NOT TO CALL: For normal context loading at session start — use retrieveMemories instead, which ranks by relevance. listMemories returns the entire store unfiltered and can be very large.",
96
+ annotations: { readOnlyHint: true, idempotentHint: true },
38
97
  inputShape: shapeWithoutProjectId(listMemoriesRequestSchema.shape),
39
98
  },
40
99
  {
41
100
  method: "getMemory",
42
- description: "Fetch a single stored memory by its ID.",
43
- inputShape: shapeWithoutProjectId(getMemoryRequestSchema.shape),
101
+ description: "Fetch a single memory record by its exact ID. Returns the full record: content, kind, importance, timestamps, and session metadata. Read-only; no side effects.\n\n" +
102
+ "WHEN TO CALL: When you already have a specific memoryId from a prior retrieveMemories or listMemories result and need its full detail.\n\n" +
103
+ "WHEN NOT TO CALL: For topic-based search — use retrieveMemories for that. This tool requires an exact ID and does not search by content.",
104
+ annotations: { readOnlyHint: true, idempotentHint: true },
105
+ inputShape: {
106
+ memoryId: getMemoryRequestSchema.shape.memoryId.describe("Exact UUID of the memory to fetch. Obtain from a prior retrieveMemories or listMemories result."),
107
+ },
44
108
  },
45
109
  {
46
110
  method: "forgetMemory",
47
- description: "Delete a stored memory by its ID.",
48
- inputShape: shapeWithoutProjectId(forgetMemoryRequestSchema.shape),
111
+ description: "Permanently delete a single memory by ID. The record is removed from the local SQLite store immediately and CANNOT be recovered. Destructive and irreversible.\n\n" +
112
+ "WHEN TO CALL: Only when a memory is known to be incorrect, dangerously outdated, or a duplicate that would mislead future sessions.\n\n" +
113
+ "WHEN NOT TO CALL: If there is any doubt. A memory that is merely old or low-relevance does not need deletion — retrieval ranking deprioritizes it automatically.",
114
+ annotations: { destructiveHint: true, idempotentHint: false },
115
+ inputShape: {
116
+ memoryId: forgetMemoryRequestSchema.shape.memoryId.describe("Exact UUID of the memory to permanently delete. Obtain from a prior listMemories or retrieveMemories call. Deletion is immediate and irreversible."),
117
+ },
49
118
  },
50
119
  {
51
120
  method: "stats",
52
- description: "Report memory statistics (total memories and session events) for the current project.",
121
+ description: "Return aggregate statistics for the current project: total stored memory count and total ingested session event count. Read-only; no side effects.\n\n" +
122
+ "WHEN TO CALL: For diagnostic or monitoring purposes — to confirm memories were stored after a session, check store health, or report usage numbers.\n\n" +
123
+ "WHEN NOT TO CALL: As part of normal context loading. stats returns counts only, not content; use retrieveMemories to load actual context.",
124
+ annotations: { readOnlyHint: true, idempotentHint: true },
53
125
  inputShape: shapeWithoutProjectId(statsRequestSchema.shape),
54
126
  },
127
+ {
128
+ method: "resetAccessCounts",
129
+ description: "Reset access-pattern counters for all memories in the current project. Sets access_count to 0 and clears last_accessed timestamps without deleting any memories. Useful after large refactors when old access patterns no longer reflect current relevance.\n\n" +
130
+ "WHEN TO CALL: After major codebase restructuring, project pivots, or when access-boosted rankings no longer reflect current relevance.\n\n" +
131
+ "WHEN NOT TO CALL: During normal operation — access patterns self-correct as usage shifts.",
132
+ annotations: { destructiveHint: false, idempotentHint: true },
133
+ inputShape: shapeWithoutProjectId(resetAccessCountsRequestSchema.shape),
134
+ },
135
+ {
136
+ method: "batchStoreMemory",
137
+ description: "Persist multiple memory units in a single atomic SQLite transaction. Significantly faster than calling storeMemory repeatedly for session-end writes of 10-20 memories.\n\n" +
138
+ "WHEN TO CALL: At session end or whenever you have multiple memories to store at once. Reduces overhead from per-insert fsync by wrapping all writes in one transaction.\n\n" +
139
+ "WHEN NOT TO CALL: For a single memory — use storeMemory instead. For imports from external files — use importMemories.\n\n" +
140
+ "Each item in the `memories` array follows the same schema as storeMemory (memoryId, sessionId, sourceAdapter, kind, content, importance). Invalid items are reported individually; valid items are still stored atomically.\n\n" +
141
+ "NOTE: the per-item `memory` echoed back in the response has its `content` truncated to 2000 characters (a batch can return many rows). The full body is still persisted — fetch it with getMemory if you need the complete text. (Single-record storeMemory echoes the full content.)",
142
+ annotations: { destructiveHint: false, idempotentHint: false },
143
+ inputShape: {
144
+ memories: batchStoreMemoryRequestSchema.shape.memories.describe("Array of memory objects to store. Each must include: memoryId (unique UUID), sessionId, sourceAdapter, kind, content (self-contained text), importance (1-10). Minimum 1 item, maximum 100.\n\n" +
145
+ "Per-item results may include `warningCodes` (e.g. 'session_write_limit_warning', 'redaction_partial_failure') — advisory signals, not failures."),
146
+ },
147
+ },
148
+ {
149
+ method: "ingestSessionEvents",
150
+ description: "Push raw session events (tool calls, decisions, file edits, user turns) to sessionmem so they can be summarized at session end and counted toward token-savings analytics. Writes immediately, in a single transaction. Re-ingesting the same (sessionId, eventIndex) is a no-op, so retries are safe.\n\n" +
151
+ "WHEN TO CALL: Periodically during a session (e.g. at task boundaries) to record what happened, OR in one batch shortly before the session ends. This is what powers automatic session-end summarization and `sessionmem savings`.\n\n" +
152
+ "WHEN NOT TO CALL: For durable, individually-important facts/decisions — use storeMemory for those. Session events are transient raw material for summarization, not first-class memories.\n\n" +
153
+ "Each event needs: id (unique), eventIndex (monotonic 0-based order within the session), eventType (e.g. 'tool_use', 'user_message'), payloadJson (a JSON string of the event body).\n\n" +
154
+ "LIMITS: at most 500 events per call. For more than 500 events, call this tool multiple times in chunks — re-ingestion of already-stored events is safe (idempotent via the (project, session, eventIndex) UNIQUE index), so overlapping chunks never double-count.",
155
+ annotations: { destructiveHint: false, idempotentHint: true },
156
+ inputShape: shapeWithoutProjectId(ingestSessionEventsRequestSchema.shape),
157
+ },
158
+ {
159
+ method: "summarizeSessionToMemory",
160
+ description: "Store an agent-authored session summary as a durable 'summary' memory in one call. Upserts on memoryId, so calling it again with the same memoryId replaces the prior summary rather than duplicating it.\n\n" +
161
+ "WHEN TO CALL: At session end when you have already written a concise summary of what was accomplished and want to persist it directly (the simpler alternative to handleSessionEnd's automatic summarization).\n\n" +
162
+ "WHEN NOT TO CALL: When you want sessionmem to generate the summary from ingested session events — use handleSessionEnd for that. For non-summary facts/decisions use storeMemory.\n\n" +
163
+ "Provide: memoryId (stable id for this session's summary), sessionId, sourceAdapter, summary (the text), importance (1-10; 7 is typical for summaries).",
164
+ annotations: { destructiveHint: false, idempotentHint: true },
165
+ inputShape: shapeWithoutProjectId(summarizeSessionToMemoryRequestSchema.shape),
166
+ },
167
+ {
168
+ method: "handleSessionEnd",
169
+ description: "Run the full session-end pipeline: auto-summarize the session's ingested events into a durable memory (when enough events exist) and apply a light retention prune of stale memories. Idempotent on the summary memory (upsert by sessionId).\n\n" +
170
+ "WHEN TO CALL: Once, at the very end of a session, after ingesting session events via ingestSessionEvents. Lets sessionmem generate and store the session summary for you.\n\n" +
171
+ "WHEN NOT TO CALL: Mid-session, or when you have already written your own summary (use summarizeSessionToMemory instead). On Claude Code this also runs automatically via the installed SessionEnd hook, so calling it explicitly is usually unnecessary there.\n\n" +
172
+ "Provide sessionId and sourceAdapter. `memoryId` (optional) pins the summary's id; omit to derive `${sessionId}-summary`. `config` (optional) tunes autoSummarize / minimumEventThreshold / cloud summarization; omit for sensible local-only defaults.\n\n" +
173
+ "RESPONSE `status` is one of: 'stored', 'skipped_threshold' (too few events), 'skipped_disabled', 'failed'. `warningCodes` may carry cloud/local fallback signals.",
174
+ annotations: { destructiveHint: false, idempotentHint: true },
175
+ inputShape: shapeWithoutProjectId(handleSessionEndRequestSchema.shape),
176
+ },
55
177
  ];
56
178
  export class GenericMCPAdapter {
57
179
  name = "Generic MCP";
180
+ /**
181
+ * When true, the `startup_inject_memories` fallback tool is NOT registered.
182
+ * Hosts that already inject prior context deterministically at session start
183
+ * (e.g. Claude Code via its SessionStart hook) set this so the agent cannot
184
+ * double-inject memories — calling the tool on top of the hook would duplicate
185
+ * the injected content and double-count access_count increments.
186
+ */
187
+ suppressStartupInjectionTool = false;
188
+ // The stdio server (startMcpServer) registers TOOLS only — it never calls
189
+ // server.registerPrompt() or server.registerResource(). Advertising prompt or
190
+ // resource support here would make FallbackToolRegistrar SKIP the
191
+ // startup_inject_memories / fetch_memories tools (it only registers them when
192
+ // the matching capability is absent), leaving the agent with no automatic
193
+ // startup-injection path. Capabilities therefore reflect reality: tools only.
194
+ // Host subclasses inherit this and MUST NOT re-enable prompts/resources unless
195
+ // they actually register them on the server.
58
196
  capabilities = {
59
- supportsPrompts: true,
60
- supportsResources: true,
197
+ supportsPrompts: false,
198
+ supportsResources: false,
61
199
  supportsTools: true,
62
200
  };
201
+ /**
202
+ * Default agent-guidance target for an undetected/generic MCP host: a
203
+ * project-local AGENTS.md (the emerging cross-tool standard). Host subclasses
204
+ * override this with the file their agent actually reads at startup.
205
+ */
206
+ guidanceTargets() {
207
+ return [join(process.cwd(), "AGENTS.md")];
208
+ }
63
209
  /**
64
210
  * Fallback for hosts that aren't specifically detected: register sessionmem
65
211
  * in a project-local `.mcp.json` (the de-facto generic MCP config format).
@@ -89,17 +235,51 @@ export class GenericMCPAdapter {
89
235
  // SESSIONMEM_PROJECT_ID) used for isolated integration tests.
90
236
  const ctx = createCliContext();
91
237
  const { service, projectId } = ctx;
238
+ // Surface stale embeddings (e.g. after an EMBEDDING_VERSION bump) so the
239
+ // operator knows semantic ranking has degraded to importance+recency for
240
+ // those rows until `sessionmem re-embed` is run. Best-effort; to stderr only
241
+ // so it can never corrupt the stdio protocol stream.
242
+ try {
243
+ const stale = countStaleEmbeddings(ctx.db, projectId, EMBEDDING_VERSION);
244
+ if (stale > 0) {
245
+ logDiagnostic(`${stale} memory(ies) have stale embeddings (version != ${EMBEDDING_VERSION}). ` +
246
+ `Run \`sessionmem re-embed\` to restore full semantic ranking.`);
247
+ }
248
+ }
249
+ catch {
250
+ // Never block server startup on a diagnostic query.
251
+ }
92
252
  const server = new McpServer({
93
253
  name: "sessionmem",
94
- version: "1.0.0",
254
+ version: SERVER_VERSION,
95
255
  });
96
256
  for (const def of TOOL_DEFINITIONS) {
97
257
  server.registerTool(def.method, {
98
258
  description: def.description,
99
259
  inputSchema: def.inputShape,
260
+ ...(def.annotations ? { annotations: def.annotations } : {}),
100
261
  }, async (args) => {
101
262
  // Inject the server-resolved projectId; clients never set it.
102
- const request = { ...args, projectId };
263
+ const enriched = { ...args, projectId };
264
+ // Default a missing sessionId for tools that require one so the
265
+ // per-session counters and session-end correlation stay consistent
266
+ // even when the agent omits (or cannot supply) a stable sessionId.
267
+ if ("sessionId" in def.inputShape && isMissing(enriched.sessionId)) {
268
+ enriched.sessionId = resolveDefaultSessionId();
269
+ }
270
+ // batchStoreMemory carries sessionId per-item, not at the top level —
271
+ // backfill each item that omitted it with a single shared default.
272
+ if (def.method === "batchStoreMemory" && Array.isArray(enriched.memories)) {
273
+ let sharedDefault;
274
+ enriched.memories = enriched.memories.map((entry) => {
275
+ if (entry && typeof entry === "object" && isMissing(entry.sessionId)) {
276
+ sharedDefault ??= resolveDefaultSessionId();
277
+ return { ...entry, sessionId: sharedDefault };
278
+ }
279
+ return entry;
280
+ });
281
+ }
282
+ const request = enriched;
103
283
  const result = await service.call(def.method, request);
104
284
  if (result.ok === false) {
105
285
  return {
@@ -122,6 +302,32 @@ export class GenericMCPAdapter {
122
302
  };
123
303
  });
124
304
  }
305
+ // Register fallback tools for hosts that lack resource or prompt support.
306
+ // These provide fetch_memories and startup_inject_memories as tool-based
307
+ // alternatives, wired to the same service instance used by TOOL_DEFINITIONS.
308
+ const fallbackTools = FallbackToolRegistrar.getFallbackTools(this.capabilities, {
309
+ service,
310
+ projectId,
311
+ }).filter((fallback) => !(this.suppressStartupInjectionTool && fallback.name === "startup_inject_memories"));
312
+ for (const fallback of fallbackTools) {
313
+ server.registerTool(fallback.name, { description: fallback.description, inputSchema: fallback.inputShape }, async (args) => {
314
+ const result = await fallback.execute(args);
315
+ return { content: [{ type: "text", text: result }] };
316
+ });
317
+ }
318
+ // Graceful shutdown: close the DB on SIGINT/SIGTERM so SQLite checkpoints
319
+ // the WAL and releases its file handles cleanly before the process exits.
320
+ const shutdown = () => {
321
+ try {
322
+ ctx.db.close();
323
+ }
324
+ catch {
325
+ // best-effort; never block exit on a close failure
326
+ }
327
+ process.exit(0);
328
+ };
329
+ process.on("SIGINT", shutdown);
330
+ process.on("SIGTERM", shutdown);
125
331
  logDiagnostic(`Starting Generic MCP server over stdio (project: ${projectId})`);
126
332
  await server.connect(new StdioServerTransport());
127
333
  }