@gamaze/hicortex 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -0
- package/dist/consolidate.d.ts +36 -0
- package/dist/consolidate.js +482 -0
- package/dist/db.d.ts +19 -0
- package/dist/db.js +140 -0
- package/dist/distiller.d.ts +15 -0
- package/dist/distiller.js +186 -0
- package/dist/embedder.d.ts +20 -0
- package/dist/embedder.js +85 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +557 -0
- package/dist/license.d.ts +5 -0
- package/dist/license.js +96 -0
- package/dist/llm.d.ts +66 -0
- package/dist/llm.js +421 -0
- package/dist/prompts.d.ts +16 -0
- package/dist/prompts.js +117 -0
- package/dist/retrieval.d.ts +47 -0
- package/dist/retrieval.js +320 -0
- package/dist/storage.d.ts +98 -0
- package/dist/storage.js +326 -0
- package/dist/types.d.ts +132 -0
- package/dist/types.js +6 -0
- package/openclaw.plugin.json +70 -0
- package/package.json +42 -0
- package/skills/hicortex-activate/SKILL.md +53 -0
- package/skills/hicortex-learn/SKILL.md +40 -0
- package/skills/hicortex-memory/SKILL.md +39 -0
package/dist/llm.d.ts
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-provider LLM client for consolidation and distillation.
|
|
3
|
+
* Ported from hicortex/consolidate/llm.py.
|
|
4
|
+
*
|
|
5
|
+
* Resolution order for LLM provider:
|
|
6
|
+
* 1. Plugin config (llmBaseUrl, llmApiKey, llmModel)
|
|
7
|
+
* 2. ~/.openclaw/openclaw.json agents.defaults.model.primary
|
|
8
|
+
* 3. Environment vars: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY
|
|
9
|
+
* 4. Fallback: Ollama at http://localhost:11434
|
|
10
|
+
*
|
|
11
|
+
* Supports: OpenAI, Anthropic (via OpenAI compat), Google, OpenRouter, Ollama, z.ai
|
|
12
|
+
*/
|
|
13
|
+
export interface LlmConfig {
|
|
14
|
+
baseUrl: string;
|
|
15
|
+
apiKey: string;
|
|
16
|
+
model: string;
|
|
17
|
+
reflectModel: string;
|
|
18
|
+
provider: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Resolve LLM configuration from plugin config, OpenClaw config, env vars, or Ollama fallback.
|
|
22
|
+
*/
|
|
23
|
+
export declare function resolveLlmConfig(pluginConfig?: {
|
|
24
|
+
llmBaseUrl?: string;
|
|
25
|
+
llmApiKey?: string;
|
|
26
|
+
llmModel?: string;
|
|
27
|
+
reflectModel?: string;
|
|
28
|
+
}): LlmConfig;
|
|
29
|
+
export declare class RateLimitError extends Error {
|
|
30
|
+
retryAfterMs: number;
|
|
31
|
+
constructor(retryAfterMs: number);
|
|
32
|
+
}
|
|
33
|
+
export declare class LlmClient {
|
|
34
|
+
private config;
|
|
35
|
+
private rateLimitedUntil;
|
|
36
|
+
constructor(config: LlmConfig);
|
|
37
|
+
/** Check if we're currently rate limited */
|
|
38
|
+
get isRateLimited(): boolean;
|
|
39
|
+
private handleRateLimit;
|
|
40
|
+
/**
|
|
41
|
+
* Fast-tier completion (importance scoring, simple tasks).
|
|
42
|
+
*/
|
|
43
|
+
completeFast(prompt: string, maxTokens?: number): Promise<string>;
|
|
44
|
+
/**
|
|
45
|
+
* Reflect-tier completion (nightly reflection, needs reasoning).
|
|
46
|
+
*/
|
|
47
|
+
completeReflect(prompt: string, maxTokens?: number): Promise<string>;
|
|
48
|
+
/**
|
|
49
|
+
* Distillation-tier completion (session knowledge extraction).
|
|
50
|
+
*/
|
|
51
|
+
completeDistill(prompt: string, maxTokens?: number): Promise<string>;
|
|
52
|
+
private complete;
|
|
53
|
+
/**
|
|
54
|
+
* Ollama: use /api/generate with think:false (important for qwen3.5 models).
|
|
55
|
+
*/
|
|
56
|
+
private completeOllama;
|
|
57
|
+
/**
|
|
58
|
+
* Anthropic Messages API (/v1/messages). Used for Anthropic and z.ai.
|
|
59
|
+
* Auth via x-api-key header.
|
|
60
|
+
*/
|
|
61
|
+
private completeAnthropic;
|
|
62
|
+
/**
|
|
63
|
+
* OpenAI-compatible /v1/chat/completions (works for OpenAI, OpenRouter, etc).
|
|
64
|
+
*/
|
|
65
|
+
private completeOpenAiCompat;
|
|
66
|
+
}
|
package/dist/llm.js
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Multi-provider LLM client for consolidation and distillation.
|
|
4
|
+
* Ported from hicortex/consolidate/llm.py.
|
|
5
|
+
*
|
|
6
|
+
* Resolution order for LLM provider:
|
|
7
|
+
* 1. Plugin config (llmBaseUrl, llmApiKey, llmModel)
|
|
8
|
+
* 2. ~/.openclaw/openclaw.json agents.defaults.model.primary
|
|
9
|
+
* 3. Environment vars: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY
|
|
10
|
+
* 4. Fallback: Ollama at http://localhost:11434
|
|
11
|
+
*
|
|
12
|
+
* Supports: OpenAI, Anthropic (via OpenAI compat), Google, OpenRouter, Ollama, z.ai
|
|
13
|
+
*/
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.LlmClient = exports.RateLimitError = void 0;
|
|
16
|
+
exports.resolveLlmConfig = resolveLlmConfig;
|
|
17
|
+
const node_fs_1 = require("node:fs");
|
|
18
|
+
const node_path_1 = require("node:path");
|
|
19
|
+
const node_os_1 = require("node:os");
|
|
20
|
+
/**
|
|
21
|
+
* Resolve LLM configuration from plugin config, OpenClaw config, env vars, or Ollama fallback.
|
|
22
|
+
*/
|
|
23
|
+
function resolveLlmConfig(pluginConfig) {
|
|
24
|
+
// 1. Plugin config
|
|
25
|
+
if (pluginConfig?.llmBaseUrl && pluginConfig?.llmApiKey) {
|
|
26
|
+
const provider = detectProvider(pluginConfig.llmBaseUrl);
|
|
27
|
+
return {
|
|
28
|
+
baseUrl: pluginConfig.llmBaseUrl,
|
|
29
|
+
apiKey: pluginConfig.llmApiKey,
|
|
30
|
+
model: pluginConfig.llmModel ?? "qwen3.5:4b",
|
|
31
|
+
reflectModel: pluginConfig.reflectModel ?? pluginConfig.llmModel ?? "qwen3.5:cloud",
|
|
32
|
+
provider,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
// 2. OpenClaw config file
|
|
36
|
+
const ocConfig = readOpenClawConfig();
|
|
37
|
+
if (ocConfig) {
|
|
38
|
+
return ocConfig;
|
|
39
|
+
}
|
|
40
|
+
// 3. Environment variables
|
|
41
|
+
const envConfig = resolveFromEnv();
|
|
42
|
+
if (envConfig) {
|
|
43
|
+
return envConfig;
|
|
44
|
+
}
|
|
45
|
+
// 4. Fallback: Ollama
|
|
46
|
+
return {
|
|
47
|
+
baseUrl: "http://localhost:11434",
|
|
48
|
+
apiKey: "",
|
|
49
|
+
model: "qwen3.5:4b",
|
|
50
|
+
reflectModel: "qwen3.5:cloud",
|
|
51
|
+
provider: "ollama",
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function detectProvider(url) {
|
|
55
|
+
const u = url.toLowerCase();
|
|
56
|
+
if (u.includes("ollama") || u.includes(":11434"))
|
|
57
|
+
return "ollama";
|
|
58
|
+
if (u.includes("anthropic"))
|
|
59
|
+
return "anthropic";
|
|
60
|
+
if (u.includes("openrouter"))
|
|
61
|
+
return "openrouter";
|
|
62
|
+
if (u.includes("googleapis") || u.includes("generativelanguage"))
|
|
63
|
+
return "google";
|
|
64
|
+
if (u.includes("z.ai") || u.includes("zai"))
|
|
65
|
+
return "zai";
|
|
66
|
+
return "openai";
|
|
67
|
+
}
|
|
68
|
+
function readOpenClawConfig() {
|
|
69
|
+
try {
|
|
70
|
+
const configPath = (0, node_path_1.join)((0, node_os_1.homedir)(), ".openclaw", "openclaw.json");
|
|
71
|
+
const raw = (0, node_fs_1.readFileSync)(configPath, "utf-8");
|
|
72
|
+
const config = JSON.parse(raw);
|
|
73
|
+
const primary = config?.agents?.defaults?.model?.primary;
|
|
74
|
+
if (!primary)
|
|
75
|
+
return null;
|
|
76
|
+
// primary format is "provider/model" (e.g. "zai/glm-5-turbo", "openai/gpt-4o")
|
|
77
|
+
if (typeof primary === "string" && (primary.includes("/") || primary.includes(":"))) {
|
|
78
|
+
const sep = primary.includes("/") ? "/" : ":";
|
|
79
|
+
const [providerHint, ...rest] = primary.split(sep);
|
|
80
|
+
const model = rest.join(sep);
|
|
81
|
+
// Accept any provider name — if it's in our URL map, we know it
|
|
82
|
+
const hint = providerHint.toLowerCase();
|
|
83
|
+
const provider = (hint in PROVIDER_BASE_URLS ? hint : "openai");
|
|
84
|
+
// Resolve base URL: OC config → per-agent models.json → built-in defaults
|
|
85
|
+
const baseUrl = readOcProviderBaseUrl(config, providerHint) ??
|
|
86
|
+
getDefaultUrlForProvider(provider);
|
|
87
|
+
// Resolve API key: OC auth-profiles.json → env vars
|
|
88
|
+
const apiKey = readOcAuthKey(providerHint) ??
|
|
89
|
+
getEnvKeyForProvider(provider);
|
|
90
|
+
if (!apiKey && provider !== "ollama")
|
|
91
|
+
return null;
|
|
92
|
+
return {
|
|
93
|
+
baseUrl,
|
|
94
|
+
apiKey: apiKey ?? "",
|
|
95
|
+
model,
|
|
96
|
+
reflectModel: model,
|
|
97
|
+
provider,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// Config file doesn't exist or is invalid
|
|
103
|
+
}
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Read provider base URL from openclaw.json → models.providers.<name>.baseUrl
|
|
108
|
+
*/
|
|
109
|
+
function readOcProviderBaseUrl(config, provider) {
|
|
110
|
+
const providerConfig = config?.models?.providers?.[provider];
|
|
111
|
+
if (providerConfig?.baseUrl)
|
|
112
|
+
return providerConfig.baseUrl;
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Read provider base URL from per-agent models.json files.
|
|
117
|
+
* Scans all agent dirs for a matching provider entry.
|
|
118
|
+
*/
|
|
119
|
+
function readAgentModelsBaseUrl(provider) {
|
|
120
|
+
try {
|
|
121
|
+
const { readdirSync } = require("node:fs");
|
|
122
|
+
const agentsDir = (0, node_path_1.join)((0, node_os_1.homedir)(), ".openclaw", "agents");
|
|
123
|
+
const agents = readdirSync(agentsDir);
|
|
124
|
+
for (const agentId of agents) {
|
|
125
|
+
try {
|
|
126
|
+
const modelsPath = (0, node_path_1.join)(agentsDir, agentId, "agent", "models.json");
|
|
127
|
+
const raw = (0, node_fs_1.readFileSync)(modelsPath, "utf-8");
|
|
128
|
+
const models = JSON.parse(raw);
|
|
129
|
+
const providerConfig = models?.providers?.[provider];
|
|
130
|
+
if (providerConfig?.baseUrl)
|
|
131
|
+
return providerConfig.baseUrl;
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
// Skip agents without models.json
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
// No agents dir
|
|
140
|
+
}
|
|
141
|
+
return undefined;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Read API key from OC's per-agent auth-profiles.json.
|
|
145
|
+
* Scans all agent dirs for a matching provider profile.
|
|
146
|
+
*/
|
|
147
|
+
function readOcAuthKey(provider) {
|
|
148
|
+
try {
|
|
149
|
+
const { readdirSync } = require("node:fs");
|
|
150
|
+
const agentsDir = (0, node_path_1.join)((0, node_os_1.homedir)(), ".openclaw", "agents");
|
|
151
|
+
const agents = readdirSync(agentsDir);
|
|
152
|
+
for (const agentId of agents) {
|
|
153
|
+
try {
|
|
154
|
+
const authPath = (0, node_path_1.join)(agentsDir, agentId, "agent", "auth-profiles.json");
|
|
155
|
+
const raw = (0, node_fs_1.readFileSync)(authPath, "utf-8");
|
|
156
|
+
const auth = JSON.parse(raw);
|
|
157
|
+
const profiles = auth?.profiles ?? {};
|
|
158
|
+
// Look for a profile matching the provider (e.g. "zai:default")
|
|
159
|
+
for (const [profileId, profile] of Object.entries(profiles)) {
|
|
160
|
+
const p = profile;
|
|
161
|
+
if (p?.provider === provider ||
|
|
162
|
+
profileId.startsWith(`${provider}:`)) {
|
|
163
|
+
if (p?.key)
|
|
164
|
+
return p.key;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
// Skip agents without auth
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// No agents dir
|
|
175
|
+
}
|
|
176
|
+
return undefined;
|
|
177
|
+
}
|
|
178
|
+
function resolveFromEnv() {
|
|
179
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
180
|
+
const openaiBaseUrl = process.env.OPENAI_BASE_URL;
|
|
181
|
+
if (openaiKey) {
|
|
182
|
+
const baseUrl = openaiBaseUrl ?? "https://api.openai.com";
|
|
183
|
+
const provider = detectProvider(baseUrl);
|
|
184
|
+
return {
|
|
185
|
+
baseUrl,
|
|
186
|
+
apiKey: openaiKey,
|
|
187
|
+
model: process.env.OPENAI_MODEL ?? "gpt-4o-mini",
|
|
188
|
+
reflectModel: process.env.OPENAI_MODEL ?? "gpt-4o-mini",
|
|
189
|
+
provider,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
const anthropicKey = process.env.ANTHROPIC_API_KEY;
|
|
193
|
+
if (anthropicKey) {
|
|
194
|
+
return {
|
|
195
|
+
baseUrl: process.env.ANTHROPIC_BASE_URL ?? "https://api.anthropic.com",
|
|
196
|
+
apiKey: anthropicKey,
|
|
197
|
+
model: "claude-sonnet-4-20250514",
|
|
198
|
+
reflectModel: "claude-sonnet-4-20250514",
|
|
199
|
+
provider: "anthropic",
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
const googleKey = process.env.GOOGLE_API_KEY;
|
|
203
|
+
if (googleKey) {
|
|
204
|
+
return {
|
|
205
|
+
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
|
206
|
+
apiKey: googleKey,
|
|
207
|
+
model: "gemini-2.0-flash",
|
|
208
|
+
reflectModel: "gemini-2.0-flash",
|
|
209
|
+
provider: "google",
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
function getEnvKeyForProvider(provider) {
|
|
215
|
+
switch (provider) {
|
|
216
|
+
case "openai":
|
|
217
|
+
return process.env.OPENAI_API_KEY;
|
|
218
|
+
case "anthropic":
|
|
219
|
+
return process.env.ANTHROPIC_API_KEY;
|
|
220
|
+
case "google":
|
|
221
|
+
return process.env.GOOGLE_API_KEY;
|
|
222
|
+
case "zai":
|
|
223
|
+
return process.env.ZAI_API_KEY ?? process.env.LLM_API_KEY;
|
|
224
|
+
default:
|
|
225
|
+
return undefined;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
/** Default base URLs for all OC-supported providers (from OC gateway binary). */
|
|
229
|
+
const PROVIDER_BASE_URLS = {
|
|
230
|
+
openai: "https://api.openai.com/v1",
|
|
231
|
+
anthropic: "https://api.anthropic.com",
|
|
232
|
+
google: "https://generativelanguage.googleapis.com/v1beta",
|
|
233
|
+
ollama: "http://localhost:11434",
|
|
234
|
+
openrouter: "https://openrouter.ai/api",
|
|
235
|
+
zai: "https://api.z.ai/api/anthropic",
|
|
236
|
+
groq: "https://api.groq.com/openai/v1",
|
|
237
|
+
deepseek: "https://api.deepseek.com",
|
|
238
|
+
mistral: "https://api.mistral.ai/v1",
|
|
239
|
+
together: "https://api.together.xyz/v1",
|
|
240
|
+
perplexity: "https://api.perplexity.ai",
|
|
241
|
+
nvidia: "https://integrate.api.nvidia.com/v1",
|
|
242
|
+
xai: "https://api.x.ai/v1",
|
|
243
|
+
venice: "https://api.venice.ai/api/v1",
|
|
244
|
+
minimax: "https://api.minimaxi.com/v1",
|
|
245
|
+
moonshot: "https://api.moonshot.ai/v1",
|
|
246
|
+
kimi: "https://api.kimi.com/coding",
|
|
247
|
+
chutes: "https://api.chutes.ai",
|
|
248
|
+
kilo: "https://api.kilo.ai/api/gateway",
|
|
249
|
+
};
|
|
250
|
+
function getDefaultUrlForProvider(provider) {
|
|
251
|
+
return PROVIDER_BASE_URLS[provider.toLowerCase()] ?? "https://api.openai.com/v1";
|
|
252
|
+
}
|
|
253
|
+
function getEnvKeyForZai() {
|
|
254
|
+
return process.env.ZAI_API_KEY ?? process.env.LLM_API_KEY;
|
|
255
|
+
}
|
|
256
|
+
// ---------------------------------------------------------------------------
|
|
257
|
+
// LLM Client class
|
|
258
|
+
// ---------------------------------------------------------------------------
|
|
259
|
+
const DEFAULT_RATE_LIMIT_RETRY_MS = 5 * 60 * 60 * 1000 + 60_000; // 5h01m safety margin
|
|
260
|
+
class RateLimitError extends Error {
|
|
261
|
+
retryAfterMs;
|
|
262
|
+
constructor(retryAfterMs) {
|
|
263
|
+
const hours = Math.round(retryAfterMs / (60 * 60 * 1000) * 10) / 10;
|
|
264
|
+
super(`Rate limited — will retry in ${hours}h`);
|
|
265
|
+
this.name = "RateLimitError";
|
|
266
|
+
this.retryAfterMs = retryAfterMs;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
exports.RateLimitError = RateLimitError;
|
|
270
|
+
class LlmClient {
|
|
271
|
+
config;
|
|
272
|
+
rateLimitedUntil = 0;
|
|
273
|
+
constructor(config) {
|
|
274
|
+
this.config = config;
|
|
275
|
+
}
|
|
276
|
+
/** Check if we're currently rate limited */
|
|
277
|
+
get isRateLimited() {
|
|
278
|
+
return Date.now() < this.rateLimitedUntil;
|
|
279
|
+
}
|
|
280
|
+
handleRateLimit(resp) {
|
|
281
|
+
// Parse Retry-After header if present (seconds)
|
|
282
|
+
const retryAfter = resp.headers.get("retry-after");
|
|
283
|
+
const retryMs = retryAfter
|
|
284
|
+
? parseInt(retryAfter, 10) * 1000
|
|
285
|
+
: DEFAULT_RATE_LIMIT_RETRY_MS;
|
|
286
|
+
this.rateLimitedUntil = Date.now() + retryMs;
|
|
287
|
+
console.log(`[hicortex] Rate limited by LLM provider. ` +
|
|
288
|
+
`Will retry after ${new Date(this.rateLimitedUntil).toISOString()}`);
|
|
289
|
+
throw new RateLimitError(retryMs);
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Fast-tier completion (importance scoring, simple tasks).
|
|
293
|
+
*/
|
|
294
|
+
async completeFast(prompt, maxTokens = 2048) {
|
|
295
|
+
return this.complete(this.config.model, prompt, maxTokens, 600_000);
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Reflect-tier completion (nightly reflection, needs reasoning).
|
|
299
|
+
*/
|
|
300
|
+
async completeReflect(prompt, maxTokens = 8192) {
|
|
301
|
+
return this.complete(this.config.reflectModel, prompt, maxTokens, 900_000);
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Distillation-tier completion (session knowledge extraction).
|
|
305
|
+
*/
|
|
306
|
+
async completeDistill(prompt, maxTokens = 2048) {
|
|
307
|
+
return this.complete(this.config.model, prompt, maxTokens, 900_000);
|
|
308
|
+
}
|
|
309
|
+
async complete(model, prompt, maxTokens, timeoutMs) {
|
|
310
|
+
if (this.isRateLimited) {
|
|
311
|
+
throw new RateLimitError(this.rateLimitedUntil - Date.now());
|
|
312
|
+
}
|
|
313
|
+
if (this.config.provider === "ollama") {
|
|
314
|
+
return this.completeOllama(model, prompt, maxTokens, timeoutMs);
|
|
315
|
+
}
|
|
316
|
+
if (this.config.provider === "anthropic" || this.config.provider === "zai") {
|
|
317
|
+
return this.completeAnthropic(model, prompt, maxTokens, timeoutMs);
|
|
318
|
+
}
|
|
319
|
+
return this.completeOpenAiCompat(model, prompt, maxTokens, timeoutMs);
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Ollama: use /api/generate with think:false (important for qwen3.5 models).
|
|
323
|
+
*/
|
|
324
|
+
async completeOllama(model, prompt, maxTokens, timeoutMs) {
|
|
325
|
+
const url = `${this.config.baseUrl.replace(/\/$/, "")}/api/generate`;
|
|
326
|
+
const resp = await fetch(url, {
|
|
327
|
+
method: "POST",
|
|
328
|
+
headers: { "Content-Type": "application/json" },
|
|
329
|
+
body: JSON.stringify({
|
|
330
|
+
model,
|
|
331
|
+
prompt,
|
|
332
|
+
stream: false,
|
|
333
|
+
think: false,
|
|
334
|
+
options: { num_predict: maxTokens, num_ctx: 32768 },
|
|
335
|
+
}),
|
|
336
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
337
|
+
});
|
|
338
|
+
if (resp.status === 429)
|
|
339
|
+
this.handleRateLimit(resp);
|
|
340
|
+
if (!resp.ok) {
|
|
341
|
+
const text = await resp.text().catch(() => "");
|
|
342
|
+
if (text.includes("1113") || text.includes("Insufficient balance")) {
|
|
343
|
+
this.handleRateLimit(resp); // z.ai/Ollama Cloud quota exceeded
|
|
344
|
+
}
|
|
345
|
+
throw new Error(`Ollama error ${resp.status}: ${text}`);
|
|
346
|
+
}
|
|
347
|
+
const data = (await resp.json());
|
|
348
|
+
return (data.response ?? "").trim();
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Anthropic Messages API (/v1/messages). Used for Anthropic and z.ai.
|
|
352
|
+
* Auth via x-api-key header.
|
|
353
|
+
*/
|
|
354
|
+
async completeAnthropic(model, prompt, maxTokens, timeoutMs) {
|
|
355
|
+
const baseUrl = this.config.baseUrl.replace(/\/$/, "");
|
|
356
|
+
const url = `${baseUrl}/v1/messages`;
|
|
357
|
+
const resp = await fetch(url, {
|
|
358
|
+
method: "POST",
|
|
359
|
+
headers: {
|
|
360
|
+
"Content-Type": "application/json",
|
|
361
|
+
"x-api-key": this.config.apiKey,
|
|
362
|
+
"anthropic-version": "2023-06-01",
|
|
363
|
+
},
|
|
364
|
+
body: JSON.stringify({
|
|
365
|
+
model,
|
|
366
|
+
messages: [{ role: "user", content: prompt }],
|
|
367
|
+
max_tokens: maxTokens,
|
|
368
|
+
}),
|
|
369
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
370
|
+
});
|
|
371
|
+
if (resp.status === 429)
|
|
372
|
+
this.handleRateLimit(resp);
|
|
373
|
+
if (!resp.ok) {
|
|
374
|
+
const text = await resp.text().catch(() => "");
|
|
375
|
+
throw new Error(`Anthropic API error ${resp.status}: ${text}`);
|
|
376
|
+
}
|
|
377
|
+
const data = (await resp.json());
|
|
378
|
+
const textBlock = data.content?.find((c) => c.type === "text");
|
|
379
|
+
return (textBlock?.text ?? "").trim();
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* OpenAI-compatible /v1/chat/completions (works for OpenAI, OpenRouter, etc).
|
|
383
|
+
*/
|
|
384
|
+
async completeOpenAiCompat(model, prompt, maxTokens, timeoutMs) {
|
|
385
|
+
const baseUrl = this.config.baseUrl.replace(/\/$/, "");
|
|
386
|
+
// Some providers (z.ai) include version in base URL already
|
|
387
|
+
const hasVersion = /\/v\d+\/?$/.test(baseUrl) || baseUrl.includes("/paas/v");
|
|
388
|
+
const url = hasVersion
|
|
389
|
+
? `${baseUrl}/chat/completions`
|
|
390
|
+
: `${baseUrl}/v1/chat/completions`;
|
|
391
|
+
const headers = {
|
|
392
|
+
"Content-Type": "application/json",
|
|
393
|
+
};
|
|
394
|
+
if (this.config.apiKey) {
|
|
395
|
+
headers["Authorization"] = `Bearer ${this.config.apiKey}`;
|
|
396
|
+
}
|
|
397
|
+
const resp = await fetch(url, {
|
|
398
|
+
method: "POST",
|
|
399
|
+
headers,
|
|
400
|
+
body: JSON.stringify({
|
|
401
|
+
model,
|
|
402
|
+
messages: [{ role: "user", content: prompt }],
|
|
403
|
+
max_tokens: maxTokens,
|
|
404
|
+
}),
|
|
405
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
406
|
+
});
|
|
407
|
+
if (resp.status === 429)
|
|
408
|
+
this.handleRateLimit(resp);
|
|
409
|
+
if (!resp.ok) {
|
|
410
|
+
const text = await resp.text().catch(() => "");
|
|
411
|
+
// z.ai: "Insufficient balance" likely means wrong endpoint (coding vs paas)
|
|
412
|
+
if (text.includes("1113") || text.includes("Insufficient balance")) {
|
|
413
|
+
console.log(`[hicortex] LLM billing error. Check that llmBaseUrl matches your plan. Current: ${baseUrl}`);
|
|
414
|
+
}
|
|
415
|
+
throw new Error(`LLM API error ${resp.status}: ${text}`);
|
|
416
|
+
}
|
|
417
|
+
const data = (await resp.json());
|
|
418
|
+
return (data.choices?.[0]?.message?.content ?? "").trim();
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
exports.LlmClient = LlmClient;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM prompt templates for memory consolidation and distillation.
|
|
3
|
+
* Copied EXACTLY from the Python codebase (proven working prompts).
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Importance scoring prompt. Takes a {memories_block} with indexed memories.
|
|
7
|
+
*/
|
|
8
|
+
export declare function importanceScoring(memoriesBlock: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Reflection prompt. Takes a {memories_block} with today's memories.
|
|
11
|
+
*/
|
|
12
|
+
export declare function reflection(memoriesBlock: string): string;
|
|
13
|
+
/**
|
|
14
|
+
* Distillation prompt. Extracts knowledge from a session transcript.
|
|
15
|
+
*/
|
|
16
|
+
export declare function distillation(projectName: string, date: string, transcript: string): string;
|
package/dist/prompts.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LLM prompt templates for memory consolidation and distillation.
|
|
4
|
+
* Copied EXACTLY from the Python codebase (proven working prompts).
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.importanceScoring = importanceScoring;
|
|
8
|
+
exports.reflection = reflection;
|
|
9
|
+
exports.distillation = distillation;
|
|
10
|
+
/**
|
|
11
|
+
* Importance scoring prompt. Takes a {memories_block} with indexed memories.
|
|
12
|
+
*/
|
|
13
|
+
function importanceScoring(memoriesBlock) {
|
|
14
|
+
return `You are a memory importance scorer. Rate each memory's long-term value.
|
|
15
|
+
|
|
16
|
+
Score each memory from 0.0 (trivial/ephemeral) to 1.0 (critical/foundational).
|
|
17
|
+
|
|
18
|
+
Scoring guide:
|
|
19
|
+
- 0.0-0.2: Routine actions, transient state, trivial fixes
|
|
20
|
+
- 0.3-0.5: Useful context, minor decisions, standard patterns
|
|
21
|
+
- 0.6-0.8: Important decisions, debugging breakthroughs, architectural choices
|
|
22
|
+
- 0.9-1.0: Foundational principles, critical constraints, core identity facts
|
|
23
|
+
|
|
24
|
+
MEMORIES:
|
|
25
|
+
${memoriesBlock}
|
|
26
|
+
|
|
27
|
+
Respond with ONLY a JSON array of scores in the same order, e.g.:
|
|
28
|
+
[0.3, 0.7, 0.5, 0.9]
|
|
29
|
+
|
|
30
|
+
No explanations. Just the JSON array.`;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Reflection prompt. Takes a {memories_block} with today's memories.
|
|
34
|
+
*/
|
|
35
|
+
function reflection(memoriesBlock) {
|
|
36
|
+
return `You are a self-improvement analyst for a multi-agent AI system. Review today's memories and extract 1-3 actionable lessons.
|
|
37
|
+
|
|
38
|
+
Quality over quantity. One genuine insight that prevents a future mistake is worth more than five restatements of what happened. Most days, 1-2 lessons is ideal. An empty array is fine if nothing warrants a lesson.
|
|
39
|
+
|
|
40
|
+
Prefer GLOBAL lessons (applicable across all projects and agents) over project-specific ones. Only mark a lesson as project-specific if it truly cannot generalize.
|
|
41
|
+
|
|
42
|
+
Good lesson: "When modifying template files processed by sed, always verify ALL substitution targets by diffing the output — partial fixes cause silent failures on other deployments"
|
|
43
|
+
Bad lesson: "The deploy script had a bug" (restatement, not actionable)
|
|
44
|
+
|
|
45
|
+
TODAY'S MEMORIES:
|
|
46
|
+
${memoriesBlock}
|
|
47
|
+
|
|
48
|
+
For each lesson, output a JSON object:
|
|
49
|
+
- "lesson": Concise, actionable rule in imperative voice
|
|
50
|
+
- "project": "global" unless genuinely project-specific
|
|
51
|
+
- "severity": "critical" | "important" | "minor"
|
|
52
|
+
- "confidence": "high" | "medium" | "low"
|
|
53
|
+
- "source_pattern": What triggered this (1 sentence)
|
|
54
|
+
|
|
55
|
+
Confidence:
|
|
56
|
+
- "high": Pattern across multiple events, or clear mistake with obvious fix. Safe to auto-inject into agent instructions.
|
|
57
|
+
- "medium": Single incident but likely to recur. Store but don't auto-propagate.
|
|
58
|
+
- "low": Speculative. Store for retrieval only.
|
|
59
|
+
|
|
60
|
+
Focus on: process gaps, repeated friction, silent failures, user corrections to agent behavior, cross-agent patterns.
|
|
61
|
+
Skip: trivial actions, already-documented rules, one-off events.
|
|
62
|
+
|
|
63
|
+
Respond with a JSON array. Empty array [] is a valid response.`;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Distillation prompt. Extracts knowledge from a session transcript.
|
|
67
|
+
*/
|
|
68
|
+
function distillation(projectName, date, transcript) {
|
|
69
|
+
return `You are a memory extraction agent. Analyze this AI session transcript and extract
|
|
70
|
+
knowledge worth remembering long-term.
|
|
71
|
+
|
|
72
|
+
SESSION TRANSCRIPT (project: ${projectName}, date: ${date}):
|
|
73
|
+
${transcript}
|
|
74
|
+
|
|
75
|
+
EXTRACT into this markdown format:
|
|
76
|
+
|
|
77
|
+
# Session Memory: ${date} - ${projectName}
|
|
78
|
+
|
|
79
|
+
## Classification: [pick one: PUBLIC / WORK / PERSONAL / SENSITIVE]
|
|
80
|
+
|
|
81
|
+
### Decisions Made
|
|
82
|
+
- [decision]: [reasoning] (${date})
|
|
83
|
+
|
|
84
|
+
### Facts Learned
|
|
85
|
+
- [fact]: [context/source] (${date})
|
|
86
|
+
|
|
87
|
+
### Problems & Solutions
|
|
88
|
+
- [problem] → [solution that worked] (${date})
|
|
89
|
+
|
|
90
|
+
### Project State Changes
|
|
91
|
+
- [what changed]: [from → to] (${date})
|
|
92
|
+
|
|
93
|
+
### Key Entities & Relationships
|
|
94
|
+
- [entity A] → [relationship] → [entity B] (${date})
|
|
95
|
+
|
|
96
|
+
### Corrections & Rejections
|
|
97
|
+
- [what AI proposed] → [why rejected/corrected] → [what user wanted instead] (${date})
|
|
98
|
+
(Include: tool use denials, "no/wrong/redo", style feedback, approach rejections,
|
|
99
|
+
user corrections of AI assumptions, quality complaints like "too verbose")
|
|
100
|
+
|
|
101
|
+
RULES:
|
|
102
|
+
- Extract MAX 20 items total (quality over quantity)
|
|
103
|
+
- Each must be useful if recalled in a future session
|
|
104
|
+
- Skip: routine code edits, standard tool usage, trivial fixes
|
|
105
|
+
- Include: architectural decisions, debugging breakthroughs, user preferences,
|
|
106
|
+
tool configurations, API discoveries, project milestones
|
|
107
|
+
- PRIORITIZE Corrections & Rejections — these are high-value signals for learning
|
|
108
|
+
what the user does NOT want. Even a single "no" or style correction is worth extracting.
|
|
109
|
+
- PRIVACY CLASSIFICATION (one of):
|
|
110
|
+
- PUBLIC: general tech knowledge, open-source patterns, publicly available info
|
|
111
|
+
- WORK: project-specific decisions, architecture choices, client/business context
|
|
112
|
+
- PERSONAL: personal preferences, family, health, lifestyle, private life
|
|
113
|
+
- SENSITIVE: API keys mentioned, credentials, financial account details, medical records
|
|
114
|
+
- Omit any section that has zero items (don't include empty sections)
|
|
115
|
+
- If nothing worth extracting, output ONLY: "NO_EXTRACT"
|
|
116
|
+
`;
|
|
117
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval layer with composite scoring, RRF fusion, and graph traversal.
|
|
3
|
+
* Ported from hicortex/retrieval.py — same scoring model and weights.
|
|
4
|
+
*
|
|
5
|
+
* Scoring model:
|
|
6
|
+
* score = similarity * 0.4 + effective_strength * 0.3 + connection_score * 0.2 + recency * 0.1
|
|
7
|
+
*
|
|
8
|
+
* Decay model (B+E+D):
|
|
9
|
+
* base_decay = 0.0005 (~60-day half-life at importance 0.5)
|
|
10
|
+
* decay_rate = 1 - base_decay * (1 - importance)
|
|
11
|
+
* decay_rate = 1 - (1 - decay_rate) * 0.7^access_count
|
|
12
|
+
* decay_rate = 1 - (1 - decay_rate) * 0.7^link_count
|
|
13
|
+
* floor = base_strength * importance * 0.1
|
|
14
|
+
* effective = floor + (base - floor) * decay_rate^hours
|
|
15
|
+
*/
|
|
16
|
+
import type Database from "better-sqlite3";
|
|
17
|
+
import type { MemorySearchResult } from "./types.js";
|
|
18
|
+
/**
|
|
19
|
+
* Compute decayed strength with adaptive decay (B+E+D model).
|
|
20
|
+
* Exported for use by consolidation decay/prune stage.
|
|
21
|
+
*/
|
|
22
|
+
export declare function effectiveStrength(baseStrength: number, lastAccessed: string | null, now: Date, options?: {
|
|
23
|
+
importance?: number;
|
|
24
|
+
accessCount?: number;
|
|
25
|
+
linkCount?: number;
|
|
26
|
+
}): number;
|
|
27
|
+
export interface EmbedFn {
|
|
28
|
+
(text: string): Promise<Float32Array>;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Main retrieval: BM25 + vector search with RRF fusion, graph traversal,
|
|
32
|
+
* and composite scoring. Strengthens accessed memories.
|
|
33
|
+
*/
|
|
34
|
+
export declare function retrieve(db: Database.Database, embedFn: EmbedFn, query: string, options?: {
|
|
35
|
+
limit?: number;
|
|
36
|
+
project?: string | null;
|
|
37
|
+
privacy?: string[];
|
|
38
|
+
sourceAgent?: string;
|
|
39
|
+
}): Promise<MemorySearchResult[]>;
|
|
40
|
+
/**
|
|
41
|
+
* Get recent context, optionally filtered by project and privacy.
|
|
42
|
+
*/
|
|
43
|
+
export declare function searchContext(db: Database.Database, options?: {
|
|
44
|
+
project?: string | null;
|
|
45
|
+
limit?: number;
|
|
46
|
+
privacy?: string[];
|
|
47
|
+
}): MemorySearchResult[];
|