memoryai-mcp 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +362 -304
- package/dist/claude-setup.d.ts +19 -0
- package/dist/claude-setup.js +216 -0
- package/dist/index.js +105 -9
- package/dist/kiro-setup.d.ts +11 -2
- package/dist/kiro-setup.js +143 -60
- package/package.json +46 -45
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* memoryai-claude-setup
|
|
4
|
+
*
|
|
5
|
+
* Wires MemoryAI into Claude Code at the MECHANISM level using HTTP hooks.
|
|
6
|
+
* Claude Code injects a hook's `additionalContext` straight into the model's
|
|
7
|
+
* context — no agent decision required — so memory works the moment it's set up,
|
|
8
|
+
* exactly like the OpenAI proxy. The user runs this once and never thinks about
|
|
9
|
+
* memory again:
|
|
10
|
+
*
|
|
11
|
+
* - SessionStart → POST /v1/hooks/claude/session-start (inject DNA + recent context)
|
|
12
|
+
* - UserPromptSubmit→ POST /v1/hooks/claude/user-prompt (recall before answering)
|
|
13
|
+
* - Stop → POST /v1/hooks/claude/stop (auto-store after each turn)
|
|
14
|
+
*
|
|
15
|
+
* It also registers the MCP server (so the 70+ tools are available for advanced
|
|
16
|
+
* use) and writes a CLAUDE.md note. Existing settings/CLAUDE.md are merged, never
|
|
17
|
+
* clobbered.
|
|
18
|
+
*/
|
|
19
|
+
export {};
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* memoryai-claude-setup
|
|
4
|
+
*
|
|
5
|
+
* Wires MemoryAI into Claude Code at the MECHANISM level using HTTP hooks.
|
|
6
|
+
* Claude Code injects a hook's `additionalContext` straight into the model's
|
|
7
|
+
* context — no agent decision required — so memory works the moment it's set up,
|
|
8
|
+
* exactly like the OpenAI proxy. The user runs this once and never thinks about
|
|
9
|
+
* memory again:
|
|
10
|
+
*
|
|
11
|
+
* - SessionStart → POST /v1/hooks/claude/session-start (inject DNA + recent context)
|
|
12
|
+
* - UserPromptSubmit→ POST /v1/hooks/claude/user-prompt (recall before answering)
|
|
13
|
+
* - Stop → POST /v1/hooks/claude/stop (auto-store after each turn)
|
|
14
|
+
*
|
|
15
|
+
* It also registers the MCP server (so the 70+ tools are available for advanced
|
|
16
|
+
* use) and writes a CLAUDE.md note. Existing settings/CLAUDE.md are merged, never
|
|
17
|
+
* clobbered.
|
|
18
|
+
*/
|
|
19
|
+
import { createInterface } from "node:readline";
|
|
20
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
21
|
+
import { join, dirname } from "node:path";
|
|
22
|
+
import { homedir } from "node:os";
|
|
23
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
24
|
+
function ask(question, fallback) {
|
|
25
|
+
const suffix = fallback ? ` [${fallback}]` : "";
|
|
26
|
+
return new Promise((resolve) => {
|
|
27
|
+
rl.question(`${question}${suffix}: `, (answer) => {
|
|
28
|
+
resolve(answer.trim() || fallback || "");
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
function readJsonSafe(path) {
|
|
33
|
+
if (!existsSync(path))
|
|
34
|
+
return {};
|
|
35
|
+
try {
|
|
36
|
+
return JSON.parse(readFileSync(path, "utf-8")) || {};
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
console.error(` warn ${path} is not valid JSON — leaving it untouched and aborting.`);
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function writeJson(path, data) {
|
|
44
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
45
|
+
writeFileSync(path, JSON.stringify(data, null, 2) + "\n", "utf-8");
|
|
46
|
+
}
|
|
47
|
+
/** A single HTTP hook handler bound to a MemoryAI endpoint. */
|
|
48
|
+
function httpHook(endpoint, apiKey, timeout) {
|
|
49
|
+
return {
|
|
50
|
+
type: "http",
|
|
51
|
+
url: endpoint,
|
|
52
|
+
timeout,
|
|
53
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
54
|
+
// Required for Claude Code to interpolate the env-style header value.
|
|
55
|
+
allowedEnvVars: [],
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
/** True if any handler in a hook group already points at a memoryai endpoint. */
|
|
59
|
+
function groupHasMemoryAI(group) {
|
|
60
|
+
const handlers = (group && group.hooks) || [];
|
|
61
|
+
return handlers.some((h) => typeof h?.url === "string" && h.url.includes("/v1/hooks/claude/"));
|
|
62
|
+
}
|
|
63
|
+
function ensureHook(settings, event, handler) {
|
|
64
|
+
settings.hooks = settings.hooks || {};
|
|
65
|
+
settings.hooks[event] = settings.hooks[event] || [];
|
|
66
|
+
// De-dupe: skip if a MemoryAI hook for this event already exists.
|
|
67
|
+
if (settings.hooks[event].some(groupHasMemoryAI))
|
|
68
|
+
return false;
|
|
69
|
+
settings.hooks[event].push({ hooks: [handler] });
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
const CLAUDE_MD = `
|
|
73
|
+
# MemoryAI — Persistent Memory (automatic)
|
|
74
|
+
|
|
75
|
+
MemoryAI is wired into this Claude Code via HTTP hooks, so memory works
|
|
76
|
+
automatically at the mechanism level — you don't have to call tools by hand:
|
|
77
|
+
|
|
78
|
+
- Relevant past context is injected before each prompt (UserPromptSubmit hook).
|
|
79
|
+
- Session-start context (preferences, decisions, recent work) loads on open.
|
|
80
|
+
- Decisions and preferences are stored automatically when each turn ends.
|
|
81
|
+
|
|
82
|
+
The MemoryAI MCP server is also connected for advanced use. You may call
|
|
83
|
+
\`memory_recall\` explicitly when you need deeper history, but for everyday work
|
|
84
|
+
the hooks handle it. Never store secrets or credentials.
|
|
85
|
+
`;
|
|
86
|
+
const MCP_BLOCK = (apiKey, endpoint) => ({
|
|
87
|
+
command: "npx",
|
|
88
|
+
args: ["-y", "memoryai-mcp"],
|
|
89
|
+
env: { HM_API_KEY: apiKey, HM_ENDPOINT: endpoint },
|
|
90
|
+
});
|
|
91
|
+
/**
|
|
92
|
+
* Auto-provision a fresh API key from the public self-service endpoint so the
|
|
93
|
+
* user truly does nothing — no curl, no dashboard. Returns the key string, or
|
|
94
|
+
* null on any failure (caller falls back to asking). The endpoint is public and
|
|
95
|
+
* IP-rate-limited server-side; we accept ToS on the user's behalf since running
|
|
96
|
+
* this installer is an explicit action.
|
|
97
|
+
*/
|
|
98
|
+
async function provisionKey(endpoint, name) {
|
|
99
|
+
const base = endpoint.replace(/\/+$/, "");
|
|
100
|
+
try {
|
|
101
|
+
const resp = await fetch(`${base}/v1/admin/provision`, {
|
|
102
|
+
method: "POST",
|
|
103
|
+
headers: { "Content-Type": "application/json" },
|
|
104
|
+
body: JSON.stringify({ name: name || "claude-code", tos_accepted: true }),
|
|
105
|
+
});
|
|
106
|
+
if (!resp.ok) {
|
|
107
|
+
const txt = await resp.text().catch(() => "");
|
|
108
|
+
console.error(` warn auto-provision failed (HTTP ${resp.status}). ${txt.slice(0, 200)}`);
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
const data = (await resp.json());
|
|
112
|
+
if (data?.api_key) {
|
|
113
|
+
console.log(` ok provisioned new API key (${String(data.api_key).slice(0, 10)}…, plan=${data.plan || "?"})`);
|
|
114
|
+
return data.api_key;
|
|
115
|
+
}
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
catch (e) {
|
|
119
|
+
console.error(` warn auto-provision request error: ${e instanceof Error ? e.message : String(e)}`);
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
async function main() {
|
|
124
|
+
console.log(`\nMemoryAI — Claude Code Setup (mechanism-level auto-memory)\n`);
|
|
125
|
+
// Non-interactive fast path: if everything is supplied via env, skip prompts.
|
|
126
|
+
// MEMORYAI_SCOPE = "user" (default) or "project". Enables CI / scripted installs.
|
|
127
|
+
const envKey = process.env.HM_API_KEY || process.env.MEMORYAI_API_KEY || "";
|
|
128
|
+
const envEndpoint = process.env.HM_ENDPOINT || process.env.MEMORYAI_ENDPOINT || "";
|
|
129
|
+
const envScope = (process.env.MEMORYAI_SCOPE || "").toLowerCase();
|
|
130
|
+
const nonInteractive = process.env.MEMORYAI_NONINTERACTIVE === "1" || (Boolean(envKey) && Boolean(envEndpoint));
|
|
131
|
+
let apiKey;
|
|
132
|
+
let endpoint;
|
|
133
|
+
let scopeAns;
|
|
134
|
+
if (nonInteractive) {
|
|
135
|
+
endpoint = envEndpoint || "https://memoryai.dev";
|
|
136
|
+
scopeAns = envScope || "u";
|
|
137
|
+
apiKey = envKey;
|
|
138
|
+
if (!apiKey) {
|
|
139
|
+
console.log(" ... non-interactive, no key — provisioning one");
|
|
140
|
+
const provisioned = await provisionKey(endpoint, "claude-code");
|
|
141
|
+
if (provisioned)
|
|
142
|
+
apiKey = provisioned;
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
console.log(" (non-interactive: using environment configuration)");
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
endpoint = await ask("Endpoint", envEndpoint || "https://memoryai.dev");
|
|
150
|
+
apiKey = envKey || (await ask("MemoryAI API key (blank = auto-provision a free one)")).trim();
|
|
151
|
+
if (!apiKey) {
|
|
152
|
+
console.log(" ... no key given — provisioning one for you");
|
|
153
|
+
const provisioned = await provisionKey(endpoint, "claude-code");
|
|
154
|
+
if (provisioned)
|
|
155
|
+
apiKey = provisioned;
|
|
156
|
+
}
|
|
157
|
+
scopeAns = (await ask("Apply to (u)ser globally or this (p)roject?", "u")).toLowerCase();
|
|
158
|
+
}
|
|
159
|
+
if (!apiKey) {
|
|
160
|
+
console.error("Error: could not obtain an API key (auto-provision failed). Set HM_API_KEY and re-run.");
|
|
161
|
+
process.exit(1);
|
|
162
|
+
}
|
|
163
|
+
const settingsPath = scopeAns.startsWith("p")
|
|
164
|
+
? join(process.cwd(), ".claude", "settings.json")
|
|
165
|
+
: join(homedir(), ".claude", "settings.json");
|
|
166
|
+
console.log("");
|
|
167
|
+
const settings = readJsonSafe(settingsPath);
|
|
168
|
+
// 1. MCP server (advanced tools)
|
|
169
|
+
settings.mcpServers = settings.mcpServers || {};
|
|
170
|
+
if (!settings.mcpServers.memoryai) {
|
|
171
|
+
settings.mcpServers.memoryai = MCP_BLOCK(apiKey, endpoint);
|
|
172
|
+
console.log(" add mcpServers.memoryai");
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
console.log(" skip mcpServers.memoryai (already present)");
|
|
176
|
+
}
|
|
177
|
+
// 2. The three lifecycle hooks. UserPromptSubmit gets a tighter timeout
|
|
178
|
+
// because it blocks the prompt until it returns.
|
|
179
|
+
const base = endpoint.replace(/\/+$/, "");
|
|
180
|
+
const added = {
|
|
181
|
+
SessionStart: ensureHook(settings, "SessionStart", httpHook(`${base}/v1/hooks/claude/session-start`, apiKey, 10)),
|
|
182
|
+
UserPromptSubmit: ensureHook(settings, "UserPromptSubmit", httpHook(`${base}/v1/hooks/claude/user-prompt`, apiKey, 10)),
|
|
183
|
+
Stop: ensureHook(settings, "Stop", httpHook(`${base}/v1/hooks/claude/stop`, apiKey, 15)),
|
|
184
|
+
};
|
|
185
|
+
for (const [event, didAdd] of Object.entries(added)) {
|
|
186
|
+
console.log(` ${didAdd ? "add " : "skip "} hooks.${event}${didAdd ? "" : " (already present)"}`);
|
|
187
|
+
}
|
|
188
|
+
writeJson(settingsPath, settings);
|
|
189
|
+
console.log(` write ${settingsPath}`);
|
|
190
|
+
// 3. CLAUDE.md note (append if missing).
|
|
191
|
+
const claudeMdPath = scopeAns.startsWith("p")
|
|
192
|
+
? join(process.cwd(), "CLAUDE.md")
|
|
193
|
+
: join(homedir(), ".claude", "CLAUDE.md");
|
|
194
|
+
const existing = existsSync(claudeMdPath) ? readFileSync(claudeMdPath, "utf-8") : "";
|
|
195
|
+
if (!existing.includes("MemoryAI — Persistent Memory")) {
|
|
196
|
+
writeFileSync(claudeMdPath, existing + (existing ? "\n" : "") + CLAUDE_MD, "utf-8");
|
|
197
|
+
console.log(` ${existing ? "append" : "create"} ${claudeMdPath}`);
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
console.log(` skip ${claudeMdPath} (note already present)`);
|
|
201
|
+
}
|
|
202
|
+
console.log(`
|
|
203
|
+
Done. MemoryAI runs automatically in Claude Code — nothing else to do.
|
|
204
|
+
- Context is recalled before each prompt and injected for you.
|
|
205
|
+
- Decisions/preferences are stored when each turn ends.
|
|
206
|
+
|
|
207
|
+
Next steps:
|
|
208
|
+
1. Restart Claude Code (loads the hooks + MCP server).
|
|
209
|
+
2. Just work normally. Memory persists across sessions on its own.
|
|
210
|
+
`);
|
|
211
|
+
rl.close();
|
|
212
|
+
}
|
|
213
|
+
main().catch((err) => {
|
|
214
|
+
console.error(err);
|
|
215
|
+
process.exit(1);
|
|
216
|
+
});
|
package/dist/index.js
CHANGED
|
@@ -11,10 +11,43 @@ import { z } from "zod";
|
|
|
11
11
|
const API_URL = process.env.MEMORYAI_ENDPOINT || process.env.HM_ENDPOINT || "http://localhost:8420";
|
|
12
12
|
const API_KEY = process.env.MEMORYAI_API_KEY || process.env.HM_API_KEY || "";
|
|
13
13
|
const REQUEST_TIMEOUT_MS = 30_000; // P2 #6: 30s default timeout for API requests
|
|
14
|
-
// Context Guard — per-IDE settings via env vars
|
|
14
|
+
// Context Guard — per-IDE settings via env vars.
|
|
15
|
+
// HM_COMPACT_AT and HM_CRITICAL_AT are now ABSOLUTE token counts (e.g. "100000",
|
|
16
|
+
// "150000"). The legacy meaning ("30" = 30%) is detected automatically: any
|
|
17
|
+
// value < 1000 is treated as a deprecated percentage and converted to absolute
|
|
18
|
+
// using HM_CONTEXT_CAP if present, otherwise the value is sent as-is and the
|
|
19
|
+
// server interprets it as a fraction (backward-compat path).
|
|
20
|
+
//
|
|
21
|
+
// HM_CONTEXT_CAP itself is no longer required — users set the two thresholds
|
|
22
|
+
// directly. When present it is forwarded as max_tokens so the server can clamp.
|
|
15
23
|
const CG_CONTEXT_CAP = parseInt(process.env.MEMORYAI_CONTEXT_CAP || process.env.HM_CONTEXT_CAP || "0", 10);
|
|
16
|
-
const
|
|
17
|
-
const
|
|
24
|
+
const CG_COMPACT_RAW = parseInt(process.env.MEMORYAI_COMPACT_AT || process.env.HM_COMPACT_AT || "0", 10);
|
|
25
|
+
const CG_CRITICAL_RAW = parseInt(process.env.MEMORYAI_CRITICAL_AT || process.env.HM_CRITICAL_AT || "0", 10);
|
|
26
|
+
// Heuristic: small numbers are legacy percentages; large numbers are absolute tokens.
|
|
27
|
+
// Threshold "<= 100" is generous enough to catch any sensible % (max 95%) and
|
|
28
|
+
// well below any sensible absolute count (min would be ~10K tokens).
|
|
29
|
+
function _isLegacyPct(v) { return v > 0 && v <= 100; }
|
|
30
|
+
// Resolved absolute thresholds. 0 means "not configured — use server defaults".
|
|
31
|
+
const CG_COMPACT_AT_TOKENS = (() => {
|
|
32
|
+
if (CG_COMPACT_RAW <= 0)
|
|
33
|
+
return 0;
|
|
34
|
+
if (_isLegacyPct(CG_COMPACT_RAW) && CG_CONTEXT_CAP > 0) {
|
|
35
|
+
return Math.round((CG_COMPACT_RAW / 100) * CG_CONTEXT_CAP);
|
|
36
|
+
}
|
|
37
|
+
return _isLegacyPct(CG_COMPACT_RAW) ? 0 : CG_COMPACT_RAW;
|
|
38
|
+
})();
|
|
39
|
+
const CG_CRITICAL_AT_TOKENS = (() => {
|
|
40
|
+
if (CG_CRITICAL_RAW <= 0)
|
|
41
|
+
return 0;
|
|
42
|
+
if (_isLegacyPct(CG_CRITICAL_RAW) && CG_CONTEXT_CAP > 0) {
|
|
43
|
+
return Math.round((CG_CRITICAL_RAW / 100) * CG_CONTEXT_CAP);
|
|
44
|
+
}
|
|
45
|
+
return _isLegacyPct(CG_CRITICAL_RAW) ? 0 : CG_CRITICAL_RAW;
|
|
46
|
+
})();
|
|
47
|
+
// Legacy decimal % path for the rare case where user keeps "30/50" without
|
|
48
|
+
// HM_CONTEXT_CAP — server still accepts compact_pct/critical_pct as decimals.
|
|
49
|
+
const CG_COMPACT_PCT = _isLegacyPct(CG_COMPACT_RAW) && CG_CONTEXT_CAP <= 0 ? CG_COMPACT_RAW : 0;
|
|
50
|
+
const CG_CRITICAL_PCT = _isLegacyPct(CG_CRITICAL_RAW) && CG_CONTEXT_CAP <= 0 ? CG_CRITICAL_RAW : 0;
|
|
18
51
|
// --- HTTP helper ---
|
|
19
52
|
async function api(method, path, body) {
|
|
20
53
|
const resp = await fetch(`${API_URL}${path}`, {
|
|
@@ -829,21 +862,34 @@ server.tool("context_guard_check", "[CORE] Check context pressure — returns re
|
|
|
829
862
|
max_tokens: maxTokens,
|
|
830
863
|
model: args.model || null,
|
|
831
864
|
};
|
|
832
|
-
//
|
|
865
|
+
// Per-IDE threshold overrides. Absolute (CG_*_AT_TOKENS) is preferred —
|
|
866
|
+
// server treats it as the authoritative trigger. Decimal % is the
|
|
867
|
+
// backward-compat path for users whose env still says "30/50".
|
|
868
|
+
if (CG_COMPACT_AT_TOKENS > 0)
|
|
869
|
+
payload.compact_at_tokens = CG_COMPACT_AT_TOKENS;
|
|
870
|
+
if (CG_CRITICAL_AT_TOKENS > 0)
|
|
871
|
+
payload.critical_at_tokens = CG_CRITICAL_AT_TOKENS;
|
|
833
872
|
if (CG_COMPACT_PCT > 0)
|
|
834
873
|
payload.compact_pct = CG_COMPACT_PCT / 100;
|
|
835
874
|
if (CG_CRITICAL_PCT > 0)
|
|
836
875
|
payload.critical_pct = CG_CRITICAL_PCT / 100;
|
|
837
876
|
const r = (await api("POST", "/v1/context/guard/check", payload));
|
|
838
|
-
|
|
877
|
+
// Render against the user's configured critical threshold so the bar is
|
|
878
|
+
// anchored to the absolute trigger point (HM_CRITICAL_AT), not a soft
|
|
879
|
+
// model-window percentage. When critical_at_tokens is missing, fall back
|
|
880
|
+
// to the legacy usage_percent rendering.
|
|
881
|
+
const tokens = args.estimated_tokens;
|
|
882
|
+
const critical = r.critical_at_tokens || (CG_CRITICAL_AT_TOKENS > 0 ? CG_CRITICAL_AT_TOKENS : 0);
|
|
883
|
+
const compact = r.compact_at_tokens || (CG_COMPACT_AT_TOKENS > 0 ? CG_COMPACT_AT_TOKENS : 0);
|
|
839
884
|
const barLen = 20;
|
|
840
|
-
const
|
|
885
|
+
const ratio = critical > 0 ? Math.min(1, tokens / critical) : (r.usage_percent / 100);
|
|
886
|
+
const filled = Math.max(0, Math.min(barLen, Math.round(ratio * barLen)));
|
|
841
887
|
const bar = "\u2588".repeat(filled) + "\u2591".repeat(barLen - filled);
|
|
842
888
|
return ok(`Context Guard v6:\n` +
|
|
843
|
-
`[${bar}] ${
|
|
844
|
-
`Recommendation: ${r.recommendation.toUpperCase()}${r.should_compact ? "
|
|
889
|
+
`[${bar}] ${tokens.toLocaleString()} / ${critical ? critical.toLocaleString() + " tokens" : (r.usage_percent.toFixed(1) + "%")}\n` +
|
|
890
|
+
`Recommendation: ${r.recommendation.toUpperCase()}${r.should_compact ? " \u2014 compact now" : ""}\n` +
|
|
845
891
|
`Urgency: ${r.urgency}\n` +
|
|
846
|
-
`Thresholds: compact=${r.compact_at_tokens.toLocaleString()}, critical=${r.critical_at_tokens.toLocaleString()}\n` +
|
|
892
|
+
`Thresholds: compact=${(compact || r.compact_at_tokens).toLocaleString()}, critical=${(critical || r.critical_at_tokens).toLocaleString()}\n` +
|
|
847
893
|
`DNA memories: ${r.dna_memories} | Hot: ${r.hot_memories} | Stale: ${r.stale_memories}\n` +
|
|
848
894
|
`Bootstrap ready: ${r.bootstrap_ready ? "yes" : "no"}\n` +
|
|
849
895
|
(r.last_compact_minutes_ago != null ? `Last compact: ${r.last_compact_minutes_ago.toFixed(0)} min ago` : "No compacts yet"));
|
|
@@ -874,6 +920,56 @@ server.tool("context_guard_compact", "[CORE] Save context to long-term memory wh
|
|
|
874
920
|
return err(e);
|
|
875
921
|
}
|
|
876
922
|
});
|
|
923
|
+
// ide_turn_check — server-authoritative context guard (zero guessing)
|
|
924
|
+
// The agent cannot read its host IDE's actual context window from inside an
|
|
925
|
+
// MCP tool. Instead of asking it to estimate tokens, this tool only asks for
|
|
926
|
+
// turn_count + max_tokens and the server does the math. Returns a ready-to-
|
|
927
|
+
// follow action_prompt — agent reads it verbatim, no interpretation needed.
|
|
928
|
+
server.tool("ide_turn_check", "[CORE] Server-authoritative context check for IDE hooks. Pass turn_count + max_tokens; server computes pressure and returns an action_prompt the agent should follow verbatim. Use this instead of context_guard_check from IDE auto-capture hooks.", {
|
|
929
|
+
turn_count: z.number().describe("Number of assistant turns in this conversation."),
|
|
930
|
+
max_tokens: z.number().optional().describe("Actual IDE context window in tokens — NOT the underlying model claim. Default 200000."),
|
|
931
|
+
avg_tokens_per_turn: z.number().optional().describe("Calibration knob, default 4000."),
|
|
932
|
+
skip_below_turns: z.number().optional().describe("Skip the check below this many turns (default 20)."),
|
|
933
|
+
model: z.string().optional().describe("Optional model hint when max_tokens not set."),
|
|
934
|
+
}, async (args) => {
|
|
935
|
+
try {
|
|
936
|
+
const payload = {
|
|
937
|
+
turn_count: args.turn_count,
|
|
938
|
+
max_tokens: args.max_tokens ?? CG_CONTEXT_CAP ?? 200_000,
|
|
939
|
+
avg_tokens_per_turn: args.avg_tokens_per_turn ?? 8000,
|
|
940
|
+
skip_below_turns: args.skip_below_turns ?? 10,
|
|
941
|
+
model: args.model ?? null,
|
|
942
|
+
};
|
|
943
|
+
// Per-IDE threshold overrides. Absolute first (preferred), % fallback.
|
|
944
|
+
if (CG_COMPACT_AT_TOKENS > 0)
|
|
945
|
+
payload.compact_at_tokens = CG_COMPACT_AT_TOKENS;
|
|
946
|
+
if (CG_CRITICAL_AT_TOKENS > 0)
|
|
947
|
+
payload.critical_at_tokens = CG_CRITICAL_AT_TOKENS;
|
|
948
|
+
if (CG_COMPACT_PCT > 0)
|
|
949
|
+
payload.compact_pct = CG_COMPACT_PCT / 100;
|
|
950
|
+
if (CG_CRITICAL_PCT > 0)
|
|
951
|
+
payload.critical_pct = CG_CRITICAL_PCT / 100;
|
|
952
|
+
const r = (await api("POST", "/v1/ide/guard/turn-check", payload));
|
|
953
|
+
if (r.skipped) {
|
|
954
|
+
return ok(`Turn check skipped (turn ${args.turn_count} below threshold). Recommendation: SAFE.`);
|
|
955
|
+
}
|
|
956
|
+
const tokens = r.estimated_tokens;
|
|
957
|
+
const critical = r.critical_at_tokens || (CG_CRITICAL_AT_TOKENS > 0 ? CG_CRITICAL_AT_TOKENS : 0);
|
|
958
|
+
const barLen = 20;
|
|
959
|
+
const ratio = critical > 0 ? Math.min(1, tokens / critical) : (r.usage_percent / 100);
|
|
960
|
+
const filled = Math.max(0, Math.min(barLen, Math.round(ratio * barLen)));
|
|
961
|
+
const bar = "\u2588".repeat(filled) + "\u2591".repeat(barLen - filled);
|
|
962
|
+
return ok(`IDE Turn Check (server-authoritative):\n` +
|
|
963
|
+
`[${bar}] ${tokens.toLocaleString()} / ${critical ? critical.toLocaleString() + " tokens" : (r.usage_percent.toFixed(1) + "%")}\n` +
|
|
964
|
+
`Recommendation: ${r.recommendation.toUpperCase()}\n` +
|
|
965
|
+
`Urgency: ${r.urgency}\n` +
|
|
966
|
+
`Thresholds: compact=${r.compact_at_tokens.toLocaleString()}, critical=${r.critical_at_tokens.toLocaleString()}\n` +
|
|
967
|
+
(r.action_prompt ? `\nACTION FOR AGENT (follow verbatim):\n${r.action_prompt}` : "No action needed."));
|
|
968
|
+
}
|
|
969
|
+
catch (e) {
|
|
970
|
+
return err(e);
|
|
971
|
+
}
|
|
972
|
+
});
|
|
877
973
|
// context_guard_bootstrap — DNA-first session bootstrap (IDE)
|
|
878
974
|
server.tool("context_guard_bootstrap", "Advanced: Load context from previous sessions at session start. Returns preferences, recent activity, and task-relevant memories. Call once at the beginning of a session to restore context.", {
|
|
879
975
|
task: z.string().describe("Task description for context relevance"),
|
package/dist/kiro-setup.d.ts
CHANGED
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* memoryai-kiro-setup
|
|
4
|
-
* Zero-dependency setup script that creates
|
|
5
|
-
*
|
|
4
|
+
* Zero-dependency setup script that creates, in the current project:
|
|
5
|
+
* - .kiro/settings/mcp.json (MCP server wiring)
|
|
6
|
+
* - .kiro/steering/memoryai.md (always-on instructions, soft fallback)
|
|
7
|
+
* - .kiro/hooks/memoryai-auto-recall.kiro.hook (promptSubmit → bootstrap/recall)
|
|
8
|
+
* - .kiro/hooks/memoryai-auto-capture.kiro.hook (agentStop → store/compact)
|
|
9
|
+
*
|
|
10
|
+
* The two hooks are what make memory TRULY automatic: they fire on IDE events
|
|
11
|
+
* (every prompt / end of every turn) instead of relying on the agent to
|
|
12
|
+
* remember the steering instructions. Result: the user installs once and never
|
|
13
|
+
* has to think about memory again — recall happens before answers, persistence
|
|
14
|
+
* happens after turns, compaction happens when context fills.
|
|
6
15
|
*/
|
|
7
16
|
export {};
|
package/dist/kiro-setup.js
CHANGED
|
@@ -1,12 +1,21 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* memoryai-kiro-setup
|
|
4
|
-
* Zero-dependency setup script that creates
|
|
5
|
-
*
|
|
4
|
+
* Zero-dependency setup script that creates, in the current project:
|
|
5
|
+
* - .kiro/settings/mcp.json (MCP server wiring)
|
|
6
|
+
* - .kiro/steering/memoryai.md (always-on instructions, soft fallback)
|
|
7
|
+
* - .kiro/hooks/memoryai-auto-recall.kiro.hook (promptSubmit → bootstrap/recall)
|
|
8
|
+
* - .kiro/hooks/memoryai-auto-capture.kiro.hook (agentStop → store/compact)
|
|
9
|
+
*
|
|
10
|
+
* The two hooks are what make memory TRULY automatic: they fire on IDE events
|
|
11
|
+
* (every prompt / end of every turn) instead of relying on the agent to
|
|
12
|
+
* remember the steering instructions. Result: the user installs once and never
|
|
13
|
+
* has to think about memory again — recall happens before answers, persistence
|
|
14
|
+
* happens after turns, compaction happens when context fills.
|
|
6
15
|
*/
|
|
7
16
|
import { createInterface } from "node:readline";
|
|
8
17
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
9
|
-
import { join } from "node:path";
|
|
18
|
+
import { join, dirname } from "node:path";
|
|
10
19
|
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
11
20
|
function ask(question, fallback) {
|
|
12
21
|
const suffix = fallback ? ` [${fallback}]` : "";
|
|
@@ -21,12 +30,42 @@ function writeIfMissing(filePath, content, label) {
|
|
|
21
30
|
console.log(` skip ${label} (already exists)`);
|
|
22
31
|
return false;
|
|
23
32
|
}
|
|
24
|
-
const dir =
|
|
33
|
+
const dir = dirname(filePath);
|
|
25
34
|
mkdirSync(dir, { recursive: true });
|
|
26
35
|
writeFileSync(filePath, content, "utf-8");
|
|
27
36
|
console.log(` create ${label}`);
|
|
28
37
|
return true;
|
|
29
38
|
}
|
|
39
|
+
/**
|
|
40
|
+
* Auto-provision a fresh API key from the public self-service endpoint so the
|
|
41
|
+
* user does nothing — no curl, no dashboard. Returns the key, or null on
|
|
42
|
+
* failure (caller falls back to asking). Public + IP-rate-limited server-side.
|
|
43
|
+
*/
|
|
44
|
+
async function provisionKey(endpoint, name) {
|
|
45
|
+
const base = endpoint.replace(/\/+$/, "");
|
|
46
|
+
try {
|
|
47
|
+
const resp = await fetch(`${base}/v1/admin/provision`, {
|
|
48
|
+
method: "POST",
|
|
49
|
+
headers: { "Content-Type": "application/json" },
|
|
50
|
+
body: JSON.stringify({ name: name || "kiro", tos_accepted: true }),
|
|
51
|
+
});
|
|
52
|
+
if (!resp.ok) {
|
|
53
|
+
const txt = await resp.text().catch(() => "");
|
|
54
|
+
console.error(` warn auto-provision failed (HTTP ${resp.status}). ${txt.slice(0, 200)}`);
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
const data = (await resp.json());
|
|
58
|
+
if (data?.api_key) {
|
|
59
|
+
console.log(` ok provisioned new API key (${String(data.api_key).slice(0, 10)}…, plan=${data.plan || "?"})`);
|
|
60
|
+
return data.api_key;
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
console.error(` warn auto-provision request error: ${e instanceof Error ? e.message : String(e)}`);
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
30
69
|
const MCP_CONFIG = (apiKey, endpoint) => JSON.stringify({
|
|
31
70
|
mcpServers: {
|
|
32
71
|
memoryai: {
|
|
@@ -36,76 +75,120 @@ const MCP_CONFIG = (apiKey, endpoint) => JSON.stringify({
|
|
|
36
75
|
HM_API_KEY: apiKey,
|
|
37
76
|
HM_ENDPOINT: endpoint,
|
|
38
77
|
},
|
|
78
|
+
// Auto-approve the everyday memory tools so the hooks can run them
|
|
79
|
+
// without prompting the user — this is what makes memory truly
|
|
80
|
+
// hands-off. These are all low-risk (read + append-only store +
|
|
81
|
+
// context bookkeeping); no destructive operations are listed.
|
|
82
|
+
autoApprove: [
|
|
83
|
+
"memory_bootstrap",
|
|
84
|
+
"memory_recall",
|
|
85
|
+
"memory_store",
|
|
86
|
+
"memory_recover",
|
|
87
|
+
"context_guard_check",
|
|
88
|
+
"context_guard_compact",
|
|
89
|
+
"ide_turn_check",
|
|
90
|
+
"memory_pitfall_check",
|
|
91
|
+
],
|
|
39
92
|
},
|
|
40
93
|
},
|
|
41
94
|
}, null, 2) + "\n";
|
|
42
|
-
const STEERING = `---
|
|
43
|
-
inclusion: always
|
|
44
|
-
---
|
|
45
|
-
|
|
46
|
-
# MemoryAI — Persistent Memory
|
|
47
|
-
|
|
48
|
-
You have
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
##
|
|
56
|
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
## Session End
|
|
78
|
-
|
|
79
|
-
When wrapping up or when the agent is about to stop:
|
|
80
|
-
1. Call \`memory_compact\` to consolidate the session's context into durable memories
|
|
81
|
-
2. Call \`memory_store\` with a brief summary of what was accomplished
|
|
82
|
-
|
|
83
|
-
## Rules
|
|
84
|
-
|
|
85
|
-
- Recall only when past context is actually needed — not on every message
|
|
86
|
-
- Store important outcomes after completing tasks, not after every interaction
|
|
87
|
-
- Present memories naturally — integrate recalled info into responses, don't show raw API output
|
|
88
|
-
- Use \`zone: "critical"\` for decisions that must never be forgotten
|
|
89
|
-
- Use \`retention: "forever"\` for permanent project knowledge
|
|
95
|
+
const STEERING = `---
|
|
96
|
+
inclusion: always
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
# MemoryAI — Persistent Memory (mostly automatic)
|
|
100
|
+
|
|
101
|
+
You have MemoryAI tools via MCP. Two Agent Hooks automate the common path:
|
|
102
|
+
- **Auto-Recall** (on every prompt) loads memory before you answer.
|
|
103
|
+
- **Auto-Capture** (end of every turn) stores important memory and compacts when full.
|
|
104
|
+
|
|
105
|
+
So you normally do NOT need to manage memory by hand. This file is a fallback
|
|
106
|
+
for cases the hooks don't cover, plus the rules for HOW to use memory well.
|
|
107
|
+
|
|
108
|
+
## What the hooks already handle
|
|
109
|
+
- Session-start \`memory_bootstrap\`, per-prompt \`memory_recall\`.
|
|
110
|
+
- Post-turn \`memory_store\` for decisions/preferences/facts/pitfalls/procedures.
|
|
111
|
+
- \`context_guard_check\` → \`context_guard_compact\` when context fills.
|
|
112
|
+
|
|
113
|
+
Don't duplicate these on your own unless a hook clearly didn't run.
|
|
114
|
+
|
|
115
|
+
## Memory types
|
|
116
|
+
- \`decision\` — architectural/technical decisions (DNA-protected, never decays)
|
|
117
|
+
- \`preference\` — user preferences and conventions (DNA-protected)
|
|
118
|
+
- \`fact\` — codebase facts, API details, configs
|
|
119
|
+
- \`pitfall\` — a mistake + its lesson (DNA-protected)
|
|
120
|
+
- \`procedure\` — a reusable workflow (DNA-protected)
|
|
121
|
+
- \`error\` / \`goal\` — lessons and current objectives
|
|
122
|
+
|
|
123
|
+
## Rules
|
|
124
|
+
- Recall only when past context is actually needed — not on every trivial message.
|
|
125
|
+
- Store important outcomes, not every interaction. Dedup is automatic.
|
|
126
|
+
- Integrate recalled info naturally; never show raw tool output.
|
|
127
|
+
- Never store secrets, credentials, tokens, or full API keys.
|
|
128
|
+
- Use \`zone: "critical"\` for things that must never be forgotten.
|
|
129
|
+
- Use \`retention: "forever"\` for permanent project knowledge.
|
|
90
130
|
`;
|
|
131
|
+
// ── Agent Hooks — event-level automation (the real "zero-action" layer) ──
|
|
132
|
+
// These fire on IDE events so memory works even if the agent ignores steering.
|
|
133
|
+
const HOOK_AUTO_RECALL = JSON.stringify({
|
|
134
|
+
enabled: true,
|
|
135
|
+
name: "MemoryAI Auto-Recall",
|
|
136
|
+
description: "Before answering each user message, automatically load relevant memory so the agent always has prior context — zero user action.",
|
|
137
|
+
version: "1",
|
|
138
|
+
when: { type: "promptSubmit" },
|
|
139
|
+
then: {
|
|
140
|
+
type: "askAgent",
|
|
141
|
+
prompt: "Before responding, ensure you have the relevant long-term memory (do this silently, integrate naturally, never show raw tool output):\n\n" +
|
|
142
|
+
"1. If this is the first turn of the session, call memory_bootstrap once to load preferences, decisions, and recent project context.\n" +
|
|
143
|
+
"2. If the user's message references past work, decisions, preferences, or anything that might be stored (\"what did we\", \"last time\", \"our approach\", \"why did we\", or a question about the project), call memory_recall with a focused query first, then answer using what you find.\n" +
|
|
144
|
+
"3. If the message is trivial small-talk, skip recall.\n\n" +
|
|
145
|
+
"Use recalled memory to inform the answer; do not announce that a memory system exists unless asked.",
|
|
146
|
+
},
|
|
147
|
+
}, null, 2) + "\n";
|
|
148
|
+
const HOOK_AUTO_CAPTURE = JSON.stringify({
|
|
149
|
+
enabled: true,
|
|
150
|
+
name: "MemoryAI Auto-Capture",
|
|
151
|
+
description: "Fully automatic: persist memorable events to MemoryAI after every turn, and persist session state silently when pressure thresholds hit. No user action ever required — the IDE host handles window eviction on its own and the auto-recall hook rehydrates context afterwards.",
|
|
152
|
+
version: "5",
|
|
153
|
+
when: { type: "agentStop" },
|
|
154
|
+
then: {
|
|
155
|
+
type: "askAgent",
|
|
156
|
+
prompt: "Run two background tasks SILENTLY. Do not tell the user about either task, do not change your normal reply, do not suggest /compact or 'open a new conversation' — those are not user actions in this design.\n\n" +
|
|
157
|
+
"1. PERSIST: If the turn produced a decision, preference, important project fact, lesson from a mistake (pitfall), or reusable procedure, call memory_store with the correct memory_type. Skip greetings and trivia. Dedup is automatic. Never store secrets.\n\n" +
|
|
158
|
+
"2. RELEASE CHECK: Count the assistant turns in this conversation, then call ide_turn_check with turn_count = (your count) and max_tokens = 200000. Read the response's action_prompt field and follow it VERBATIM. Every action_prompt in this design tells you to act SILENTLY — call context_guard_compact when asked, do nothing when asked. Never message the user from this hook.\n\n" +
|
|
159
|
+
"The design: MemoryAI persists to DB on threshold; the IDE host (Kiro auto-summarize, Claude Code /clear, Cursor /reset) evicts the window on its own schedule; the auto-recall hook on the next user message rehydrates context. From the user's view, nothing ever happens — the chat just keeps working.",
|
|
160
|
+
},
|
|
161
|
+
}, null, 2) + "\n";
|
|
91
162
|
async function main() {
|
|
92
163
|
const cwd = process.cwd();
|
|
93
164
|
console.log(`\nMemoryAI Kiro Setup`);
|
|
94
165
|
console.log(`Project: ${cwd}\n`);
|
|
95
|
-
const
|
|
166
|
+
const endpoint = await ask("Endpoint", process.env.HM_ENDPOINT || "https://memoryai.dev");
|
|
167
|
+
let apiKey = process.env.HM_API_KEY || (await ask("MemoryAI API key (blank = auto-provision a free one)")).trim();
|
|
96
168
|
if (!apiKey) {
|
|
97
|
-
console.
|
|
169
|
+
console.log(" ... no key given — provisioning one for you");
|
|
170
|
+
const provisioned = await provisionKey(endpoint, "kiro");
|
|
171
|
+
if (provisioned)
|
|
172
|
+
apiKey = provisioned;
|
|
173
|
+
}
|
|
174
|
+
if (!apiKey) {
|
|
175
|
+
console.error("Error: could not obtain an API key (auto-provision failed). Set HM_API_KEY and re-run.");
|
|
98
176
|
process.exit(1);
|
|
99
177
|
}
|
|
100
|
-
const endpoint = await ask("Endpoint", process.env.HM_ENDPOINT || "https://memoryai.dev");
|
|
101
178
|
console.log("");
|
|
102
179
|
writeIfMissing(join(cwd, ".kiro", "settings", "mcp.json"), MCP_CONFIG(apiKey, endpoint), ".kiro/settings/mcp.json");
|
|
103
180
|
writeIfMissing(join(cwd, ".kiro", "steering", "memoryai.md"), STEERING, ".kiro/steering/memoryai.md");
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
181
|
+
writeIfMissing(join(cwd, ".kiro", "hooks", "memoryai-auto-recall.kiro.hook"), HOOK_AUTO_RECALL, ".kiro/hooks/memoryai-auto-recall.kiro.hook");
|
|
182
|
+
writeIfMissing(join(cwd, ".kiro", "hooks", "memoryai-auto-capture.kiro.hook"), HOOK_AUTO_CAPTURE, ".kiro/hooks/memoryai-auto-capture.kiro.hook");
|
|
183
|
+
console.log(`
|
|
184
|
+
Done. MemoryAI now runs automatically — you don't have to do anything.
|
|
185
|
+
- Auto-Recall hook loads relevant memory before each answer.
|
|
186
|
+
- Auto-Capture hook stores decisions/preferences and compacts when full.
|
|
187
|
+
|
|
188
|
+
Next steps:
|
|
189
|
+
1. Restart Kiro (loads the MCP server + hooks)
|
|
190
|
+
2. Just work normally. Memory persists across sessions on its own.
|
|
191
|
+
3. Optional check: ask "What do you remember about this project?"
|
|
109
192
|
`);
|
|
110
193
|
rl.close();
|
|
111
194
|
}
|