@wipcomputer/memory-crystal 0.7.34-alpha.2 → 0.7.34-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/dist/bridge.js +64 -7
- package/dist/bulk-copy.js +67 -16
- package/dist/cc-hook.js +2163 -62
- package/dist/cc-poller.js +1967 -70
- package/dist/cli.js +4538 -139
- package/dist/core.js +1789 -6
- package/dist/crypto.js +153 -14
- package/dist/crystal-serve.js +64 -12
- package/dist/doctor.js +517 -52
- package/dist/dream-weaver.js +1755 -7
- package/dist/file-sync.js +407 -9
- package/dist/installer.js +840 -145
- package/dist/ldm.js +231 -16
- package/dist/mcp-server.js +1882 -17
- package/dist/migrate.js +1707 -11
- package/dist/mirror-sync.js +2052 -34
- package/dist/openclaw.js +1895 -84
- package/dist/pair.js +112 -16
- package/dist/poller.js +2275 -80
- package/dist/role.js +159 -7
- package/dist/staging.js +235 -10
- package/dist/summarize.js +142 -5
- package/package.json +3 -3
package/dist/poller.js
CHANGED
|
@@ -1,42 +1,2237 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
}
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}
|
|
11
|
-
import {
|
|
12
|
-
pushFileSync
|
|
13
|
-
} from "./chunk-CGIDSAJB.js";
|
|
14
|
-
import {
|
|
15
|
-
Crystal,
|
|
16
|
-
resolveConfig
|
|
17
|
-
} from "./chunk-2GBYLMEF.js";
|
|
18
|
-
import {
|
|
19
|
-
decryptJSON,
|
|
20
|
-
encryptJSON,
|
|
21
|
-
loadRelayKey
|
|
22
|
-
} from "./chunk-D3MACYZ4.js";
|
|
23
|
-
import {
|
|
24
|
-
ensureLdm,
|
|
25
|
-
resolveStatePath,
|
|
26
|
-
stateWritePath
|
|
27
|
-
} from "./chunk-DFQ72B7M.js";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __esm = (fn, res) => function __init() {
|
|
5
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
6
|
+
};
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
28
11
|
|
|
29
|
-
// src/
|
|
30
|
-
import {
|
|
12
|
+
// src/llm.ts
|
|
13
|
+
import { existsSync, readFileSync } from "fs";
|
|
31
14
|
import { join } from "path";
|
|
32
|
-
import {
|
|
15
|
+
import { homedir } from "os";
|
|
16
|
+
import { execSync } from "child_process";
|
|
17
|
+
function dbCacheGet(key) {
|
|
18
|
+
if (!_cacheDb) return null;
|
|
19
|
+
try {
|
|
20
|
+
const row = _cacheDb.prepare(
|
|
21
|
+
"SELECT result FROM llm_cache WHERE cache_key = ? AND created_at > ?"
|
|
22
|
+
).get(key, new Date(Date.now() - CACHE_TTL_DAYS * 864e5).toISOString());
|
|
23
|
+
if (row) {
|
|
24
|
+
_cacheDb.prepare("UPDATE llm_cache SET hit_count = hit_count + 1, last_hit_at = ? WHERE cache_key = ?").run((/* @__PURE__ */ new Date()).toISOString(), key);
|
|
25
|
+
return row.result;
|
|
26
|
+
}
|
|
27
|
+
} catch {
|
|
28
|
+
}
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
function dbCacheSet(key, type, query, intent, result, provider) {
|
|
32
|
+
if (!_cacheDb) return;
|
|
33
|
+
try {
|
|
34
|
+
_cacheDb.prepare(
|
|
35
|
+
"INSERT OR REPLACE INTO llm_cache (cache_key, cache_type, query, intent, result, provider, created_at, hit_count, last_hit_at) VALUES (?, ?, ?, ?, ?, ?, ?, 0, NULL)"
|
|
36
|
+
).run(key, type, query, intent || null, result, provider, (/* @__PURE__ */ new Date()).toISOString());
|
|
37
|
+
} catch {
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function getOpSecret(itemName, fieldLabel) {
|
|
41
|
+
try {
|
|
42
|
+
const saTokenPath = join(homedir(), ".openclaw/secrets/op-sa-token");
|
|
43
|
+
if (!existsSync(saTokenPath)) return void 0;
|
|
44
|
+
const saToken = readFileSync(saTokenPath, "utf-8").trim();
|
|
45
|
+
const result = execSync(
|
|
46
|
+
`OP_SERVICE_ACCOUNT_TOKEN="${saToken}" op item get "${itemName}" --vault "Agent Secrets" --fields "${fieldLabel}" --reveal`,
|
|
47
|
+
{ encoding: "utf-8", timeout: 5e3, stdio: ["pipe", "pipe", "pipe"] }
|
|
48
|
+
).trim();
|
|
49
|
+
return result || void 0;
|
|
50
|
+
} catch {
|
|
51
|
+
return void 0;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
async function detectProvider() {
|
|
55
|
+
if (detectionDone && detectedProvider) return detectedProvider;
|
|
56
|
+
detectionDone = true;
|
|
57
|
+
if (samplingServer) {
|
|
58
|
+
detectedProvider = { provider: "sampling", baseURL: "", apiKey: "", model: "client-selected" };
|
|
59
|
+
process.stderr.write("[memory-crystal] LLM provider: MCP Sampling (via client)\n");
|
|
60
|
+
return detectedProvider;
|
|
61
|
+
}
|
|
62
|
+
try {
|
|
63
|
+
const resp = await fetch("http://localhost:18791/v1/models", { signal: AbortSignal.timeout(1e3) });
|
|
64
|
+
if (resp.ok) {
|
|
65
|
+
const data = await resp.json();
|
|
66
|
+
const model = data?.data?.[0]?.id || "default";
|
|
67
|
+
detectedProvider = { provider: "mlx", baseURL: "http://localhost:18791/v1", apiKey: "not-needed", model };
|
|
68
|
+
process.stderr.write(`[memory-crystal] LLM provider: MLX (${model})
|
|
69
|
+
`);
|
|
70
|
+
return detectedProvider;
|
|
71
|
+
}
|
|
72
|
+
} catch {
|
|
73
|
+
}
|
|
74
|
+
try {
|
|
75
|
+
const resp = await fetch("http://localhost:11434/api/tags", { signal: AbortSignal.timeout(1e3) });
|
|
76
|
+
if (resp.ok) {
|
|
77
|
+
const data = await resp.json();
|
|
78
|
+
const models = data?.models || [];
|
|
79
|
+
const embeddingOnly = ["nomic-embed-text", "mxbai-embed", "all-minilm", "snowflake-arctic-embed"];
|
|
80
|
+
const chatModel = models.find((m) => !embeddingOnly.some((e) => m.name.startsWith(e)));
|
|
81
|
+
if (chatModel) {
|
|
82
|
+
detectedProvider = { provider: "ollama", baseURL: "http://localhost:11434/v1", apiKey: "ollama", model: chatModel.name };
|
|
83
|
+
process.stderr.write(`[memory-crystal] LLM provider: Ollama (${chatModel.name})
|
|
84
|
+
`);
|
|
85
|
+
return detectedProvider;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
} catch {
|
|
89
|
+
}
|
|
90
|
+
const openaiKey = process.env.OPENAI_API_KEY || getOpSecret("OpenAI API", "api key");
|
|
91
|
+
if (openaiKey) {
|
|
92
|
+
detectedProvider = { provider: "openai", baseURL: "https://api.openai.com/v1", apiKey: openaiKey, model: "gpt-4o-mini" };
|
|
93
|
+
process.stderr.write("[memory-crystal] LLM provider: OpenAI API\n");
|
|
94
|
+
return detectedProvider;
|
|
95
|
+
}
|
|
96
|
+
const anthropicKey = process.env.ANTHROPIC_API_KEY || getOpSecret("Anthropic Auth Token - remote bunkers", "Auth Token");
|
|
97
|
+
if (anthropicKey && !anthropicKey.startsWith("sk-ant-oat")) {
|
|
98
|
+
detectedProvider = { provider: "anthropic", baseURL: "https://api.anthropic.com", apiKey: anthropicKey, model: "claude-haiku-4-5-20251001" };
|
|
99
|
+
process.stderr.write("[memory-crystal] LLM provider: Anthropic API\n");
|
|
100
|
+
return detectedProvider;
|
|
101
|
+
}
|
|
102
|
+
detectedProvider = { provider: "none", baseURL: "", apiKey: "", model: "" };
|
|
103
|
+
process.stderr.write("[memory-crystal] LLM provider: none (deep search unavailable)\n");
|
|
104
|
+
return detectedProvider;
|
|
105
|
+
}
|
|
106
|
+
async function chatComplete(config, messages, maxTokens = 300) {
|
|
107
|
+
if (config.provider === "sampling") {
|
|
108
|
+
return samplingComplete(messages, maxTokens);
|
|
109
|
+
}
|
|
110
|
+
if (config.provider === "anthropic") {
|
|
111
|
+
return anthropicComplete(config, messages, maxTokens);
|
|
112
|
+
}
|
|
113
|
+
const resp = await fetch(`${config.baseURL}/chat/completions`, {
|
|
114
|
+
method: "POST",
|
|
115
|
+
headers: {
|
|
116
|
+
"Content-Type": "application/json",
|
|
117
|
+
"Authorization": `Bearer ${config.apiKey}`
|
|
118
|
+
},
|
|
119
|
+
body: JSON.stringify({
|
|
120
|
+
model: config.model,
|
|
121
|
+
messages,
|
|
122
|
+
max_tokens: maxTokens,
|
|
123
|
+
temperature: 0.7
|
|
124
|
+
})
|
|
125
|
+
});
|
|
126
|
+
if (!resp.ok) throw new Error(`LLM request failed: ${resp.status}`);
|
|
127
|
+
const data = await resp.json();
|
|
128
|
+
return data.choices?.[0]?.message?.content || "";
|
|
129
|
+
}
|
|
130
|
+
async function anthropicComplete(config, messages, maxTokens) {
|
|
131
|
+
const systemMsg = messages.find((m) => m.role === "system");
|
|
132
|
+
const userMessages = messages.filter((m) => m.role !== "system");
|
|
133
|
+
const body = {
|
|
134
|
+
model: config.model,
|
|
135
|
+
max_tokens: maxTokens,
|
|
136
|
+
messages: userMessages
|
|
137
|
+
};
|
|
138
|
+
if (systemMsg) body.system = systemMsg.content;
|
|
139
|
+
const resp = await fetch("https://api.anthropic.com/v1/messages", {
|
|
140
|
+
method: "POST",
|
|
141
|
+
headers: {
|
|
142
|
+
"Content-Type": "application/json",
|
|
143
|
+
"x-api-key": config.apiKey,
|
|
144
|
+
"anthropic-version": "2023-06-01"
|
|
145
|
+
},
|
|
146
|
+
body: JSON.stringify(body)
|
|
147
|
+
});
|
|
148
|
+
if (!resp.ok) throw new Error(`Anthropic request failed: ${resp.status}`);
|
|
149
|
+
const data = await resp.json();
|
|
150
|
+
return data.content?.[0]?.text || "";
|
|
151
|
+
}
|
|
152
|
+
async function samplingComplete(messages, maxTokens) {
|
|
153
|
+
if (!samplingServer) throw new Error("MCP sampling server not set");
|
|
154
|
+
const systemMsg = messages.find((m) => m.role === "system");
|
|
155
|
+
const userMessages = messages.filter((m) => m.role !== "system");
|
|
156
|
+
const result = await samplingServer.createMessage({
|
|
157
|
+
messages: userMessages.map((m) => ({
|
|
158
|
+
role: m.role,
|
|
159
|
+
content: { type: "text", text: m.content }
|
|
160
|
+
})),
|
|
161
|
+
systemPrompt: systemMsg?.content,
|
|
162
|
+
maxTokens,
|
|
163
|
+
modelPreferences: {
|
|
164
|
+
// Request cheap, fast model (Haiku-class). We don't need Opus for query expansion.
|
|
165
|
+
costPriority: 0.9,
|
|
166
|
+
speedPriority: 0.8,
|
|
167
|
+
intelligencePriority: 0.3,
|
|
168
|
+
hints: [{ name: "haiku" }]
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
if (result?.content?.type === "text") return result.content.text;
|
|
172
|
+
if (typeof result?.content === "string") return result.content;
|
|
173
|
+
return "";
|
|
174
|
+
}
|
|
175
|
+
async function expandQuery(query, intent) {
|
|
176
|
+
const cacheKey = intent ? `expand:${query}||${intent}` : `expand:${query}`;
|
|
177
|
+
const dbCached = dbCacheGet(cacheKey);
|
|
178
|
+
if (dbCached) {
|
|
179
|
+
try {
|
|
180
|
+
return JSON.parse(dbCached);
|
|
181
|
+
} catch {
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
const cached = expansionCache.get(cacheKey);
|
|
185
|
+
if (cached) return cached;
|
|
186
|
+
const config = await detectProvider();
|
|
187
|
+
if (config.provider === "none") return [];
|
|
188
|
+
try {
|
|
189
|
+
const intentContext = intent ? `
|
|
190
|
+
Query intent: ${intent}. Use this to guide your variations toward the intended domain.` : "";
|
|
191
|
+
const result = await chatComplete(config, [
|
|
192
|
+
{ role: "system", content: EXPAND_PROMPT + intentContext },
|
|
193
|
+
{ role: "user", content: query }
|
|
194
|
+
], 300);
|
|
195
|
+
const lines = result.trim().split("\n");
|
|
196
|
+
const queryLower = query.toLowerCase();
|
|
197
|
+
const queryTerms = queryLower.replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter(Boolean);
|
|
198
|
+
const hasQueryTerm = (text) => {
|
|
199
|
+
const lower = text.toLowerCase();
|
|
200
|
+
if (queryTerms.length === 0) return true;
|
|
201
|
+
return queryTerms.some((term) => lower.includes(term));
|
|
202
|
+
};
|
|
203
|
+
const variations = lines.map((line) => {
|
|
204
|
+
const colonIdx = line.indexOf(":");
|
|
205
|
+
if (colonIdx === -1) return null;
|
|
206
|
+
const type = line.slice(0, colonIdx).trim();
|
|
207
|
+
if (type !== "lex" && type !== "vec" && type !== "hyde") return null;
|
|
208
|
+
const text = line.slice(colonIdx + 1).trim();
|
|
209
|
+
if (!text || !hasQueryTerm(text)) return null;
|
|
210
|
+
return { type, text };
|
|
211
|
+
}).filter((v) => v !== null);
|
|
212
|
+
if (variations.length > 0) {
|
|
213
|
+
expansionCache.set(cacheKey, variations);
|
|
214
|
+
dbCacheSet(cacheKey, "expansion", query, intent, JSON.stringify(variations), config.provider);
|
|
215
|
+
return variations;
|
|
216
|
+
}
|
|
217
|
+
} catch (err) {
|
|
218
|
+
process.stderr.write(`[memory-crystal] Query expansion failed: ${err.message}
|
|
219
|
+
`);
|
|
220
|
+
}
|
|
221
|
+
const fallback = [
|
|
222
|
+
{ type: "lex", text: query },
|
|
223
|
+
{ type: "vec", text: query },
|
|
224
|
+
{ type: "hyde", text: `Information about ${query}` }
|
|
225
|
+
];
|
|
226
|
+
return fallback;
|
|
227
|
+
}
|
|
228
|
+
async function rerankResults(query, passages) {
|
|
229
|
+
const config = await detectProvider();
|
|
230
|
+
if (config.provider === "none") {
|
|
231
|
+
return passages.map((_, i) => ({ index: i, score: 1 - i * 0.01 }));
|
|
232
|
+
}
|
|
233
|
+
const { createHash: createHash4 } = await import("crypto");
|
|
234
|
+
const contentHash = createHash4("sha256").update(passages.map((p) => p.slice(0, 200)).sort().join("|")).digest("hex").slice(0, 16);
|
|
235
|
+
const rerankCacheKey = `rerank:${query}||${contentHash}`;
|
|
236
|
+
const dbCachedRerank = dbCacheGet(rerankCacheKey);
|
|
237
|
+
if (dbCachedRerank) {
|
|
238
|
+
try {
|
|
239
|
+
return JSON.parse(dbCachedRerank);
|
|
240
|
+
} catch {
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
try {
|
|
244
|
+
const passageList = passages.map((p, i) => `[${i}] ${p.slice(0, 500)}`).join("\n\n");
|
|
245
|
+
const result = await chatComplete(config, [
|
|
246
|
+
{ role: "system", content: RERANK_PROMPT },
|
|
247
|
+
{ role: "user", content: `Query: ${query}
|
|
248
|
+
|
|
249
|
+
Passages:
|
|
250
|
+
${passageList}` }
|
|
251
|
+
], 200);
|
|
252
|
+
const results = [];
|
|
253
|
+
for (const line of result.trim().split("\n")) {
|
|
254
|
+
const match = line.match(/^(\d+):\s*([\d.]+)/);
|
|
255
|
+
if (match) {
|
|
256
|
+
results.push({ index: parseInt(match[1]), score: parseFloat(match[2]) });
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
const scored = new Set(results.map((r) => r.index));
|
|
260
|
+
for (let i = 0; i < passages.length; i++) {
|
|
261
|
+
if (!scored.has(i)) results.push({ index: i, score: 0 });
|
|
262
|
+
}
|
|
263
|
+
const sorted = results.sort((a, b) => b.score - a.score);
|
|
264
|
+
dbCacheSet(rerankCacheKey, "rerank", query, void 0, JSON.stringify(sorted), config.provider);
|
|
265
|
+
return sorted;
|
|
266
|
+
} catch (err) {
|
|
267
|
+
process.stderr.write(`[memory-crystal] Reranking failed: ${err.message}
|
|
268
|
+
`);
|
|
269
|
+
return passages.map((_, i) => ({ index: i, score: 1 - i * 0.01 }));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
var samplingServer, expansionCache, _cacheDb, CACHE_TTL_DAYS, detectedProvider, detectionDone, EXPAND_PROMPT, RERANK_PROMPT;
|
|
273
|
+
var init_llm = __esm({
|
|
274
|
+
"src/llm.ts"() {
|
|
275
|
+
"use strict";
|
|
276
|
+
samplingServer = null;
|
|
277
|
+
expansionCache = /* @__PURE__ */ new Map();
|
|
278
|
+
_cacheDb = null;
|
|
279
|
+
CACHE_TTL_DAYS = parseInt(process.env.CRYSTAL_CACHE_TTL_DAYS || "7", 10);
|
|
280
|
+
detectedProvider = null;
|
|
281
|
+
detectionDone = false;
|
|
282
|
+
EXPAND_PROMPT = `You are a search query expander. Given a search query, generate exactly 3 variations to improve search recall.
|
|
283
|
+
|
|
284
|
+
Output exactly 3 lines in this format (no other text):
|
|
285
|
+
lex: <keyword-focused variation for full-text search>
|
|
286
|
+
vec: <semantic variation rephrased for embedding similarity>
|
|
287
|
+
hyde: <hypothetical document snippet that would answer this query>
|
|
288
|
+
|
|
289
|
+
Rules:
|
|
290
|
+
- Each variation must contain at least one term from the original query
|
|
291
|
+
- Keep variations concise (under 30 words each)
|
|
292
|
+
- lex should use specific keywords and synonyms
|
|
293
|
+
- vec should rephrase the intent naturally
|
|
294
|
+
- hyde should be a short passage as if answering the query`;
|
|
295
|
+
RERANK_PROMPT = `You are a search result re-ranker. Given a query and a list of text passages, rate each passage's relevance to the query.
|
|
296
|
+
|
|
297
|
+
Output one line per passage in this exact format:
|
|
298
|
+
<index>: <score>
|
|
299
|
+
|
|
300
|
+
Where index is the passage number (0-based) and score is a float from 0.0 to 1.0.
|
|
301
|
+
- 1.0 = perfectly relevant, directly answers the query
|
|
302
|
+
- 0.7 = highly relevant, closely related
|
|
303
|
+
- 0.4 = somewhat relevant, tangentially related
|
|
304
|
+
- 0.1 = barely relevant
|
|
305
|
+
- 0.0 = not relevant at all
|
|
306
|
+
|
|
307
|
+
Rate ALL passages. Output nothing else.`;
|
|
308
|
+
}
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
// src/search-pipeline.ts
|
|
312
|
+
var search_pipeline_exports = {};
|
|
313
|
+
__export(search_pipeline_exports, {
|
|
314
|
+
deepSearch: () => deepSearch
|
|
315
|
+
});
|
|
316
|
+
async function deepSearch(crystal, query, options = {}) {
|
|
317
|
+
const limit = options.limit || 5;
|
|
318
|
+
const candidateLimit = options.candidateLimit || DEFAULT_CANDIDATE_LIMIT;
|
|
319
|
+
const intent = options.intent;
|
|
320
|
+
const filter = options.filter;
|
|
321
|
+
const explain = options.explain || false;
|
|
322
|
+
const provider = await detectProvider();
|
|
323
|
+
if (provider.provider === "none") {
|
|
324
|
+
return crystal.search(query, limit, filter);
|
|
325
|
+
}
|
|
326
|
+
const db = crystal.sqliteDb;
|
|
327
|
+
if (!db) return crystal.search(query, limit, filter);
|
|
328
|
+
const sinceDate = filter?.since ? crystal.parseSince(filter.since) : void 0;
|
|
329
|
+
const untilDate = filter?.until ? crystal.parseSince(filter.until) : void 0;
|
|
330
|
+
const internalFilter = { ...filter, sinceDate, untilDate };
|
|
331
|
+
const initialFts = crystal.searchFTS(query, 20, internalFilter);
|
|
332
|
+
const topScore = initialFts[0]?.score ?? 0;
|
|
333
|
+
const secondScore = initialFts[1]?.score ?? 0;
|
|
334
|
+
const hasStrongSignal = !intent && initialFts.length > 0 && topScore >= STRONG_SIGNAL_MIN_SCORE && topScore - secondScore >= STRONG_SIGNAL_MIN_GAP;
|
|
335
|
+
const expanded = hasStrongSignal ? [] : await expandQuery(query, intent);
|
|
336
|
+
const allResultLists = [];
|
|
337
|
+
if (initialFts.length > 0) allResultLists.push(initialFts);
|
|
338
|
+
const [queryEmbedding] = await crystal.embed([query]);
|
|
339
|
+
const originalVec = crystal.searchVec(queryEmbedding, 30, internalFilter);
|
|
340
|
+
if (originalVec.length > 0) allResultLists.push(originalVec);
|
|
341
|
+
for (const variation of expanded) {
|
|
342
|
+
if (variation.type === "lex") {
|
|
343
|
+
const ftsResults = crystal.searchFTS(variation.text, 20, internalFilter);
|
|
344
|
+
if (ftsResults.length > 0) allResultLists.push(ftsResults);
|
|
345
|
+
} else {
|
|
346
|
+
const [embedding] = await crystal.embed([variation.text]);
|
|
347
|
+
const vecResults = crystal.searchVec(embedding, 20, internalFilter);
|
|
348
|
+
if (vecResults.length > 0) allResultLists.push(vecResults);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
const weights = allResultLists.map((_, i) => i < 2 ? 2 : 1);
|
|
352
|
+
const fused = crystal.reciprocalRankFusion(allResultLists, weights);
|
|
353
|
+
const candidates = fused.slice(0, candidateLimit);
|
|
354
|
+
if (candidates.length === 0) return [];
|
|
355
|
+
const ftsScoreMap = /* @__PURE__ */ new Map();
|
|
356
|
+
const vecScoreMap = /* @__PURE__ */ new Map();
|
|
357
|
+
if (explain) {
|
|
358
|
+
for (const r of initialFts) ftsScoreMap.set(r.text.slice(0, 200), r.score);
|
|
359
|
+
for (const r of originalVec) vecScoreMap.set(r.text.slice(0, 200), r.score);
|
|
360
|
+
}
|
|
361
|
+
const passages = candidates.map((c) => c.text.slice(0, 500));
|
|
362
|
+
const rerankQuery = intent ? `${intent}: ${query}` : query;
|
|
363
|
+
const reranked = await rerankResults(rerankQuery, passages);
|
|
364
|
+
const now = Date.now();
|
|
365
|
+
const blended = reranked.map((r) => {
|
|
366
|
+
const candidate = candidates[r.index];
|
|
367
|
+
if (!candidate) return null;
|
|
368
|
+
const rrfRank = r.index + 1;
|
|
369
|
+
let rrfWeight;
|
|
370
|
+
if (rrfRank <= 3) rrfWeight = 0.75;
|
|
371
|
+
else if (rrfRank <= 10) rrfWeight = 0.6;
|
|
372
|
+
else rrfWeight = 0.4;
|
|
373
|
+
const rrfScore = 1 / rrfRank;
|
|
374
|
+
const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
|
|
375
|
+
const ageDays = candidate.created_at ? (now - new Date(candidate.created_at).getTime()) / 864e5 : 0;
|
|
376
|
+
const recency = candidate.created_at ? crystal.recencyWeight(ageDays) : 1;
|
|
377
|
+
const finalScore = blendedScore * recency;
|
|
378
|
+
const freshness = candidate.created_at ? crystal.freshnessLabel(ageDays) : void 0;
|
|
379
|
+
const result = {
|
|
380
|
+
...candidate,
|
|
381
|
+
score: finalScore,
|
|
382
|
+
freshness
|
|
383
|
+
};
|
|
384
|
+
if (explain) {
|
|
385
|
+
const dedup = candidate.text.slice(0, 200);
|
|
386
|
+
result.explain = {
|
|
387
|
+
fts_score: ftsScoreMap.get(dedup),
|
|
388
|
+
vec_score: vecScoreMap.get(dedup),
|
|
389
|
+
rrf_rank: rrfRank,
|
|
390
|
+
rrf_score: rrfScore,
|
|
391
|
+
rerank_score: r.score,
|
|
392
|
+
recency_weight: recency,
|
|
393
|
+
final_score: finalScore
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
return result;
|
|
397
|
+
}).filter((r) => r !== null);
|
|
398
|
+
const sorted = blended.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
399
|
+
const topNormScore = sorted[0]?.score || 1;
|
|
400
|
+
return sorted.map((r) => ({ ...r, score: Math.min(r.score / topNormScore * 0.95, 0.95) }));
|
|
401
|
+
}
|
|
402
|
+
var STRONG_SIGNAL_MIN_SCORE, STRONG_SIGNAL_MIN_GAP, DEFAULT_CANDIDATE_LIMIT;
|
|
403
|
+
var init_search_pipeline = __esm({
|
|
404
|
+
"src/search-pipeline.ts"() {
|
|
405
|
+
"use strict";
|
|
406
|
+
init_llm();
|
|
407
|
+
STRONG_SIGNAL_MIN_SCORE = 0.85;
|
|
408
|
+
STRONG_SIGNAL_MIN_GAP = 0.15;
|
|
409
|
+
DEFAULT_CANDIDATE_LIMIT = 40;
|
|
410
|
+
}
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
// src/core.ts
|
|
414
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
415
|
+
import Database from "better-sqlite3";
|
|
416
|
+
import * as sqliteVec from "sqlite-vec";
|
|
417
|
+
import { readFileSync as readFileSync2, existsSync as existsSync2, mkdirSync, readdirSync, statSync } from "fs";
|
|
418
|
+
import { execSync as execSync2 } from "child_process";
|
|
419
|
+
import { join as join2, relative, extname, basename } from "path";
|
|
420
|
+
import { createHash } from "crypto";
|
|
421
|
+
import http from "http";
|
|
422
|
+
import https from "https";
|
|
423
|
+
async function embedOpenAI(texts, apiKey, model) {
|
|
424
|
+
return new Promise((resolve, reject) => {
|
|
425
|
+
const body = JSON.stringify({ input: texts, model });
|
|
426
|
+
const req = https.request({
|
|
427
|
+
hostname: "api.openai.com",
|
|
428
|
+
path: "/v1/embeddings",
|
|
429
|
+
method: "POST",
|
|
430
|
+
headers: {
|
|
431
|
+
"Content-Type": "application/json",
|
|
432
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
433
|
+
"Content-Length": Buffer.byteLength(body)
|
|
434
|
+
},
|
|
435
|
+
timeout: 3e4
|
|
436
|
+
}, (res) => {
|
|
437
|
+
let data = "";
|
|
438
|
+
res.on("data", (chunk) => data += chunk);
|
|
439
|
+
res.on("end", () => {
|
|
440
|
+
if (res.statusCode !== 200) {
|
|
441
|
+
reject(new Error(`OpenAI API error ${res.statusCode}: ${data.slice(0, 200)}`));
|
|
442
|
+
return;
|
|
443
|
+
}
|
|
444
|
+
const parsed = JSON.parse(data);
|
|
445
|
+
resolve(parsed.data.map((d) => d.embedding));
|
|
446
|
+
});
|
|
447
|
+
});
|
|
448
|
+
req.on("error", reject);
|
|
449
|
+
req.on("timeout", () => {
|
|
450
|
+
req.destroy();
|
|
451
|
+
reject(new Error("OpenAI timeout"));
|
|
452
|
+
});
|
|
453
|
+
req.write(body);
|
|
454
|
+
req.end();
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
async function embedOllama(texts, host, model) {
|
|
458
|
+
const results = [];
|
|
459
|
+
for (const text of texts) {
|
|
460
|
+
const result = await new Promise((resolve, reject) => {
|
|
461
|
+
const url = new URL("/api/embeddings", host);
|
|
462
|
+
const body = JSON.stringify({ model, prompt: text });
|
|
463
|
+
const req = http.request({
|
|
464
|
+
hostname: url.hostname,
|
|
465
|
+
port: url.port,
|
|
466
|
+
path: url.pathname,
|
|
467
|
+
method: "POST",
|
|
468
|
+
headers: {
|
|
469
|
+
"Content-Type": "application/json",
|
|
470
|
+
"Content-Length": Buffer.byteLength(body)
|
|
471
|
+
},
|
|
472
|
+
timeout: 15e3
|
|
473
|
+
}, (res) => {
|
|
474
|
+
let data = "";
|
|
475
|
+
res.on("data", (chunk) => data += chunk);
|
|
476
|
+
res.on("end", () => {
|
|
477
|
+
if (res.statusCode !== 200) {
|
|
478
|
+
reject(new Error(`Ollama error ${res.statusCode}: ${data.slice(0, 200)}`));
|
|
479
|
+
return;
|
|
480
|
+
}
|
|
481
|
+
resolve(JSON.parse(data).embedding);
|
|
482
|
+
});
|
|
483
|
+
});
|
|
484
|
+
req.on("error", reject);
|
|
485
|
+
req.on("timeout", () => {
|
|
486
|
+
req.destroy();
|
|
487
|
+
reject(new Error("Ollama timeout"));
|
|
488
|
+
});
|
|
489
|
+
req.write(body);
|
|
490
|
+
req.end();
|
|
491
|
+
});
|
|
492
|
+
results.push(result);
|
|
493
|
+
}
|
|
494
|
+
return results;
|
|
495
|
+
}
|
|
496
|
+
async function embedGoogle(texts, apiKey, model) {
|
|
497
|
+
return new Promise((resolve, reject) => {
|
|
498
|
+
const body = JSON.stringify({
|
|
499
|
+
requests: texts.map((text) => ({ model: `models/${model}`, content: { parts: [{ text }] } }))
|
|
500
|
+
});
|
|
501
|
+
const req = https.request({
|
|
502
|
+
hostname: "generativelanguage.googleapis.com",
|
|
503
|
+
path: `/v1beta/models/${model}:batchEmbedContents?key=${apiKey}`,
|
|
504
|
+
method: "POST",
|
|
505
|
+
headers: {
|
|
506
|
+
"Content-Type": "application/json",
|
|
507
|
+
"Content-Length": Buffer.byteLength(body)
|
|
508
|
+
},
|
|
509
|
+
timeout: 3e4
|
|
510
|
+
}, (res) => {
|
|
511
|
+
let data = "";
|
|
512
|
+
res.on("data", (chunk) => data += chunk);
|
|
513
|
+
res.on("end", () => {
|
|
514
|
+
if (res.statusCode !== 200) {
|
|
515
|
+
reject(new Error(`Google API error ${res.statusCode}: ${data.slice(0, 200)}`));
|
|
516
|
+
return;
|
|
517
|
+
}
|
|
518
|
+
const parsed = JSON.parse(data);
|
|
519
|
+
resolve(parsed.embeddings.map((e) => e.values));
|
|
520
|
+
});
|
|
521
|
+
});
|
|
522
|
+
req.on("error", reject);
|
|
523
|
+
req.on("timeout", () => {
|
|
524
|
+
req.destroy();
|
|
525
|
+
reject(new Error("Google timeout"));
|
|
526
|
+
});
|
|
527
|
+
req.write(body);
|
|
528
|
+
req.end();
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
var Crystal = class _Crystal {
|
|
532
|
+
config;
|
|
533
|
+
lanceDb = null;
|
|
534
|
+
sqliteDb = null;
|
|
535
|
+
chunksTable = null;
|
|
536
|
+
vecDimensions = null;
|
|
537
|
+
constructor(config) {
|
|
538
|
+
this.config = config;
|
|
539
|
+
if (!existsSync2(config.dataDir)) {
|
|
540
|
+
mkdirSync(config.dataDir, { recursive: true });
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
// ── Initialization ──
|
|
544
|
+
async init() {
|
|
545
|
+
const lanceDir = join2(this.config.dataDir, "lance");
|
|
546
|
+
const sqlitePath = join2(this.config.dataDir, "crystal.db");
|
|
547
|
+
if (!existsSync2(lanceDir)) mkdirSync(lanceDir, { recursive: true });
|
|
548
|
+
this.lanceDb = await lancedb.connect(lanceDir);
|
|
549
|
+
this.sqliteDb = new Database(sqlitePath);
|
|
550
|
+
this.sqliteDb.pragma("journal_mode = WAL");
|
|
551
|
+
sqliteVec.load(this.sqliteDb);
|
|
552
|
+
this.initSqliteTables();
|
|
553
|
+
this.initChunksTables();
|
|
554
|
+
await this.initLanceTables();
|
|
555
|
+
}
|
|
556
|
+
initSqliteTables() {
|
|
557
|
+
const db = this.sqliteDb;
|
|
558
|
+
db.exec(`
|
|
559
|
+
CREATE TABLE IF NOT EXISTS sources (
|
|
560
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
561
|
+
type TEXT NOT NULL,
|
|
562
|
+
uri TEXT NOT NULL,
|
|
563
|
+
title TEXT,
|
|
564
|
+
agent_id TEXT NOT NULL,
|
|
565
|
+
metadata TEXT DEFAULT '{}',
|
|
566
|
+
ingested_at TEXT NOT NULL,
|
|
567
|
+
chunk_count INTEGER DEFAULT 0
|
|
568
|
+
);
|
|
569
|
+
|
|
570
|
+
CREATE TABLE IF NOT EXISTS capture_state (
|
|
571
|
+
agent_id TEXT NOT NULL,
|
|
572
|
+
source_id TEXT NOT NULL,
|
|
573
|
+
last_message_count INTEGER DEFAULT 0,
|
|
574
|
+
capture_count INTEGER DEFAULT 0,
|
|
575
|
+
last_capture_at TEXT,
|
|
576
|
+
PRIMARY KEY (agent_id, source_id)
|
|
577
|
+
);
|
|
578
|
+
|
|
579
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
580
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
581
|
+
text TEXT NOT NULL,
|
|
582
|
+
category TEXT NOT NULL DEFAULT 'fact',
|
|
583
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
584
|
+
source_ids TEXT DEFAULT '[]',
|
|
585
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
586
|
+
created_at TEXT NOT NULL,
|
|
587
|
+
updated_at TEXT NOT NULL
|
|
588
|
+
);
|
|
589
|
+
|
|
590
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
591
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
592
|
+
name TEXT NOT NULL UNIQUE,
|
|
593
|
+
type TEXT NOT NULL DEFAULT 'concept',
|
|
594
|
+
description TEXT,
|
|
595
|
+
properties TEXT DEFAULT '{}',
|
|
596
|
+
created_at TEXT NOT NULL,
|
|
597
|
+
updated_at TEXT NOT NULL
|
|
598
|
+
);
|
|
599
|
+
|
|
600
|
+
CREATE TABLE IF NOT EXISTS relationships (
|
|
601
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
602
|
+
source_id INTEGER NOT NULL REFERENCES entities(id),
|
|
603
|
+
target_id INTEGER NOT NULL REFERENCES entities(id),
|
|
604
|
+
type TEXT NOT NULL,
|
|
605
|
+
description TEXT,
|
|
606
|
+
weight REAL DEFAULT 1.0,
|
|
607
|
+
valid_from TEXT NOT NULL,
|
|
608
|
+
valid_until TEXT,
|
|
609
|
+
created_at TEXT NOT NULL
|
|
610
|
+
);
|
|
611
|
+
|
|
612
|
+
CREATE INDEX IF NOT EXISTS idx_sources_agent ON sources(agent_id);
|
|
613
|
+
CREATE INDEX IF NOT EXISTS idx_memories_status ON memories(status);
|
|
614
|
+
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
|
|
615
|
+
CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships(source_id);
|
|
616
|
+
CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships(target_id);
|
|
617
|
+
|
|
618
|
+
-- LLM cache (persistent expansion + reranking results)
|
|
619
|
+
CREATE TABLE IF NOT EXISTS llm_cache (
|
|
620
|
+
cache_key TEXT PRIMARY KEY,
|
|
621
|
+
cache_type TEXT NOT NULL,
|
|
622
|
+
query TEXT NOT NULL,
|
|
623
|
+
intent TEXT,
|
|
624
|
+
result TEXT NOT NULL,
|
|
625
|
+
provider TEXT NOT NULL,
|
|
626
|
+
created_at TEXT NOT NULL,
|
|
627
|
+
hit_count INTEGER DEFAULT 0,
|
|
628
|
+
last_hit_at TEXT
|
|
629
|
+
);
|
|
630
|
+
CREATE INDEX IF NOT EXISTS idx_llm_cache_type ON llm_cache(cache_type);
|
|
631
|
+
CREATE INDEX IF NOT EXISTS idx_llm_cache_created ON llm_cache(created_at);
|
|
632
|
+
|
|
633
|
+
-- Source file indexing (optional feature)
|
|
634
|
+
CREATE TABLE IF NOT EXISTS source_collections (
|
|
635
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
636
|
+
name TEXT NOT NULL UNIQUE,
|
|
637
|
+
root_path TEXT NOT NULL,
|
|
638
|
+
glob_patterns TEXT NOT NULL DEFAULT '["**/*"]',
|
|
639
|
+
ignore_patterns TEXT NOT NULL DEFAULT '[]',
|
|
640
|
+
file_count INTEGER DEFAULT 0,
|
|
641
|
+
chunk_count INTEGER DEFAULT 0,
|
|
642
|
+
last_sync_at TEXT,
|
|
643
|
+
created_at TEXT NOT NULL
|
|
644
|
+
);
|
|
645
|
+
|
|
646
|
+
CREATE TABLE IF NOT EXISTS source_files (
|
|
647
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
648
|
+
collection_id INTEGER NOT NULL REFERENCES source_collections(id) ON DELETE CASCADE,
|
|
649
|
+
file_path TEXT NOT NULL,
|
|
650
|
+
file_hash TEXT NOT NULL,
|
|
651
|
+
file_size INTEGER NOT NULL,
|
|
652
|
+
chunk_count INTEGER DEFAULT 0,
|
|
653
|
+
last_indexed_at TEXT NOT NULL
|
|
654
|
+
);
|
|
655
|
+
|
|
656
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_source_files_path ON source_files(collection_id, file_path);
|
|
657
|
+
CREATE INDEX IF NOT EXISTS idx_source_files_collection ON source_files(collection_id);
|
|
658
|
+
`);
|
|
659
|
+
}
|
|
660
|
+
initChunksTables() {
|
|
661
|
+
const db = this.sqliteDb;
|
|
662
|
+
db.exec(`
|
|
663
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
664
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
665
|
+
text TEXT NOT NULL,
|
|
666
|
+
text_hash TEXT NOT NULL,
|
|
667
|
+
role TEXT,
|
|
668
|
+
source_type TEXT,
|
|
669
|
+
source_id TEXT,
|
|
670
|
+
agent_id TEXT,
|
|
671
|
+
token_count INTEGER,
|
|
672
|
+
created_at TEXT NOT NULL
|
|
673
|
+
);
|
|
674
|
+
|
|
675
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_agent ON chunks(agent_id);
|
|
676
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_type);
|
|
677
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(text_hash);
|
|
678
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_created ON chunks(created_at);
|
|
679
|
+
|
|
680
|
+
-- FTS5 full-text search table
|
|
681
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
682
|
+
text,
|
|
683
|
+
tokenize='porter unicode61'
|
|
684
|
+
);
|
|
685
|
+
|
|
686
|
+
-- Sync trigger: populate FTS on chunk insert
|
|
687
|
+
CREATE TRIGGER IF NOT EXISTS chunks_fts_insert AFTER INSERT ON chunks
|
|
688
|
+
BEGIN
|
|
689
|
+
INSERT INTO chunks_fts(rowid, text) VALUES (NEW.id, NEW.text);
|
|
690
|
+
END;
|
|
691
|
+
|
|
692
|
+
-- Sync trigger: clean up FTS and vec on chunk delete
|
|
693
|
+
CREATE TRIGGER IF NOT EXISTS chunks_cleanup AFTER DELETE ON chunks
|
|
694
|
+
BEGIN
|
|
695
|
+
DELETE FROM chunks_vec WHERE chunk_id = OLD.id;
|
|
696
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, text) VALUES('delete', OLD.id, OLD.text);
|
|
697
|
+
END;
|
|
698
|
+
`);
|
|
699
|
+
const vecTable = db.prepare(
|
|
700
|
+
`SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_vec'`
|
|
701
|
+
).get();
|
|
702
|
+
if (vecTable) {
|
|
703
|
+
try {
|
|
704
|
+
const row = db.prepare("SELECT embedding FROM chunks_vec LIMIT 1").get();
|
|
705
|
+
if (row?.embedding) {
|
|
706
|
+
this.vecDimensions = row.embedding.length / 4;
|
|
707
|
+
}
|
|
708
|
+
} catch {
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
ensureVecTable(dimensions) {
|
|
713
|
+
const db = this.sqliteDb;
|
|
714
|
+
const existing = db.prepare(
|
|
715
|
+
`SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_vec'`
|
|
716
|
+
).get();
|
|
717
|
+
if (!existing) {
|
|
718
|
+
db.exec(`
|
|
719
|
+
CREATE VIRTUAL TABLE chunks_vec USING vec0(
|
|
720
|
+
chunk_id INTEGER PRIMARY KEY,
|
|
721
|
+
embedding float[${dimensions}] distance_metric=cosine
|
|
722
|
+
);
|
|
723
|
+
`);
|
|
724
|
+
}
|
|
725
|
+
this.vecDimensions = dimensions;
|
|
726
|
+
}
|
|
727
|
+
async initLanceTables() {
|
|
728
|
+
const db = this.lanceDb;
|
|
729
|
+
const tableNames = await db.tableNames();
|
|
730
|
+
if (tableNames.includes("chunks")) {
|
|
731
|
+
this.chunksTable = await db.openTable("chunks");
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
// ── Embedding ──
|
|
735
|
+
async embed(texts) {
|
|
736
|
+
if (texts.length === 0) return [];
|
|
737
|
+
const cfg = this.config;
|
|
738
|
+
switch (cfg.embeddingProvider) {
|
|
739
|
+
case "openai": {
|
|
740
|
+
if (!cfg.openaiApiKey) throw new Error("OpenAI API key required");
|
|
741
|
+
const model = cfg.openaiModel || "text-embedding-3-small";
|
|
742
|
+
const maxCharsPerBatch = 8e5;
|
|
743
|
+
const results = [];
|
|
744
|
+
let batch = [];
|
|
745
|
+
let batchChars = 0;
|
|
746
|
+
for (const text of texts) {
|
|
747
|
+
if (batchChars + text.length > maxCharsPerBatch && batch.length > 0) {
|
|
748
|
+
results.push(...await embedOpenAI(batch, cfg.openaiApiKey, model));
|
|
749
|
+
batch = [];
|
|
750
|
+
batchChars = 0;
|
|
751
|
+
}
|
|
752
|
+
batch.push(text);
|
|
753
|
+
batchChars += text.length;
|
|
754
|
+
}
|
|
755
|
+
if (batch.length > 0) {
|
|
756
|
+
results.push(...await embedOpenAI(batch, cfg.openaiApiKey, model));
|
|
757
|
+
}
|
|
758
|
+
return results;
|
|
759
|
+
}
|
|
760
|
+
case "ollama":
|
|
761
|
+
return embedOllama(texts, cfg.ollamaHost || "http://localhost:11434", cfg.ollamaModel || "nomic-embed-text");
|
|
762
|
+
case "google":
|
|
763
|
+
if (!cfg.googleApiKey) throw new Error("Google API key required");
|
|
764
|
+
return embedGoogle(texts, cfg.googleApiKey, cfg.googleModel || "text-embedding-004");
|
|
765
|
+
default:
|
|
766
|
+
throw new Error(`Unknown embedding provider: ${cfg.embeddingProvider}`);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
// ── Chunking ──
|
|
770
|
+
chunkText(text, targetTokens = 400, overlapTokens = 80) {
|
|
771
|
+
const targetChars = targetTokens * 4;
|
|
772
|
+
const overlapChars = overlapTokens * 4;
|
|
773
|
+
const chunks = [];
|
|
774
|
+
let start = 0;
|
|
775
|
+
while (start < text.length) {
|
|
776
|
+
let end = Math.min(start + targetChars, text.length);
|
|
777
|
+
if (end < text.length) {
|
|
778
|
+
const minBreak = start + Math.floor(targetChars * 0.5);
|
|
779
|
+
const paraBreak = text.lastIndexOf("\n\n", end);
|
|
780
|
+
if (paraBreak > minBreak) {
|
|
781
|
+
end = paraBreak;
|
|
782
|
+
} else {
|
|
783
|
+
const sentBreak = text.lastIndexOf(". ", end);
|
|
784
|
+
if (sentBreak > minBreak) {
|
|
785
|
+
end = sentBreak + 1;
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
const chunk = text.slice(start, end).trim();
|
|
790
|
+
if (chunk.length > 0) chunks.push(chunk);
|
|
791
|
+
if (end >= text.length) break;
|
|
792
|
+
start = end - overlapChars;
|
|
793
|
+
if (start <= (chunks.length > 0 ? end - targetChars : 0)) {
|
|
794
|
+
start = end;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
return chunks;
|
|
798
|
+
}
|
|
799
|
+
// ── Ingest ──
|
|
800
|
+
async ingest(chunks) {
|
|
801
|
+
if (chunks.length === 0) return 0;
|
|
802
|
+
const db = this.sqliteDb;
|
|
803
|
+
const newChunks = chunks.filter((c) => {
|
|
804
|
+
const hash = createHash("sha256").update(c.text).digest("hex");
|
|
805
|
+
return !db.prepare("SELECT 1 FROM chunks WHERE text_hash = ?").get(hash);
|
|
806
|
+
});
|
|
807
|
+
if (newChunks.length === 0) return 0;
|
|
808
|
+
const texts = newChunks.map((c) => c.text);
|
|
809
|
+
const embeddings = await this.embed(texts);
|
|
810
|
+
if (!this.vecDimensions && embeddings.length > 0) {
|
|
811
|
+
this.ensureVecTable(embeddings[0].length);
|
|
812
|
+
}
|
|
813
|
+
const insertChunk = db.prepare(`
|
|
814
|
+
INSERT INTO chunks (text, text_hash, role, source_type, source_id, agent_id, token_count, created_at)
|
|
815
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
816
|
+
`);
|
|
817
|
+
const insertVec = db.prepare(`
|
|
818
|
+
INSERT INTO chunks_vec (chunk_id, embedding) VALUES (?, ?)
|
|
819
|
+
`);
|
|
820
|
+
const transaction = db.transaction(() => {
|
|
821
|
+
for (let i = 0; i < newChunks.length; i++) {
|
|
822
|
+
const c = newChunks[i];
|
|
823
|
+
const hash = createHash("sha256").update(c.text).digest("hex");
|
|
824
|
+
const result = insertChunk.run(
|
|
825
|
+
c.text,
|
|
826
|
+
hash,
|
|
827
|
+
c.role,
|
|
828
|
+
c.source_type,
|
|
829
|
+
c.source_id,
|
|
830
|
+
c.agent_id,
|
|
831
|
+
c.token_count,
|
|
832
|
+
c.created_at || (/* @__PURE__ */ new Date()).toISOString()
|
|
833
|
+
);
|
|
834
|
+
const chunkId = typeof result.lastInsertRowid === "bigint" ? result.lastInsertRowid : BigInt(result.lastInsertRowid);
|
|
835
|
+
insertVec.run(chunkId, new Float32Array(embeddings[i]));
|
|
836
|
+
}
|
|
837
|
+
});
|
|
838
|
+
transaction();
|
|
839
|
+
const records = newChunks.map((chunk, i) => ({
|
|
840
|
+
text: chunk.text,
|
|
841
|
+
vector: embeddings[i],
|
|
842
|
+
role: chunk.role,
|
|
843
|
+
source_type: chunk.source_type,
|
|
844
|
+
source_id: chunk.source_id,
|
|
845
|
+
agent_id: chunk.agent_id,
|
|
846
|
+
token_count: chunk.token_count,
|
|
847
|
+
created_at: chunk.created_at || (/* @__PURE__ */ new Date()).toISOString()
|
|
848
|
+
}));
|
|
849
|
+
try {
|
|
850
|
+
if (!this.chunksTable) {
|
|
851
|
+
this.chunksTable = await this.lanceDb.createTable("chunks", records);
|
|
852
|
+
} else {
|
|
853
|
+
await this.chunksTable.add(records);
|
|
854
|
+
}
|
|
855
|
+
} catch (err) {
|
|
856
|
+
console.warn("LanceDB dual-write failed (non-fatal):", err.message);
|
|
857
|
+
}
|
|
858
|
+
return newChunks.length;
|
|
859
|
+
}
|
|
860
|
+
// ── Delta Sync (export/import pre-embedded chunks) ──
|
|
861
|
+
/** Export interface for delta sync payloads. */
|
|
862
|
+
static DELTA_VERSION = 1;
|
|
863
|
+
/** Export chunks with IDs greater than sinceId. Returns pre-embedded chunks for delta sync.
|
|
864
|
+
* Core calls this to build delta payloads for Nodes. */
|
|
865
|
+
exportChunksSince(sinceId) {
|
|
866
|
+
const db = this.sqliteDb;
|
|
867
|
+
const rows = db.prepare(`
|
|
868
|
+
SELECT c.id, c.text, c.text_hash, c.role, c.source_type, c.source_id,
|
|
869
|
+
c.agent_id, c.token_count, c.created_at, v.embedding
|
|
870
|
+
FROM chunks c
|
|
871
|
+
LEFT JOIN chunks_vec v ON v.chunk_id = c.id
|
|
872
|
+
WHERE c.id > ?
|
|
873
|
+
ORDER BY c.id ASC
|
|
874
|
+
`).all(sinceId);
|
|
875
|
+
return rows.map((row) => ({
|
|
876
|
+
id: row.id,
|
|
877
|
+
text: row.text,
|
|
878
|
+
text_hash: row.text_hash,
|
|
879
|
+
role: row.role,
|
|
880
|
+
source_type: row.source_type,
|
|
881
|
+
source_id: row.source_id,
|
|
882
|
+
agent_id: row.agent_id,
|
|
883
|
+
token_count: row.token_count,
|
|
884
|
+
created_at: row.created_at,
|
|
885
|
+
// Convert Float32Array buffer to number[] for JSON serialization
|
|
886
|
+
embedding: row.embedding ? Array.from(new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4)) : null
|
|
887
|
+
}));
|
|
888
|
+
}
|
|
889
|
+
/** Get the highest chunk ID in the database. Used for watermark tracking. */
|
|
890
|
+
getMaxChunkId() {
|
|
891
|
+
const db = this.sqliteDb;
|
|
892
|
+
const row = db.prepare("SELECT MAX(id) as maxId FROM chunks").get();
|
|
893
|
+
return row.maxId || 0;
|
|
894
|
+
}
|
|
895
|
+
/** Import pre-embedded chunks from Core. Node calls this to apply delta payloads.
|
|
896
|
+
* Skips chunks that already exist (by text_hash). Does NOT re-embed. */
|
|
897
|
+
importChunks(exported) {
|
|
898
|
+
if (exported.length === 0) return 0;
|
|
899
|
+
const db = this.sqliteDb;
|
|
900
|
+
const firstWithEmbed = exported.find((c) => c.embedding && c.embedding.length > 0);
|
|
901
|
+
if (firstWithEmbed && !this.vecDimensions) {
|
|
902
|
+
this.ensureVecTable(firstWithEmbed.embedding.length);
|
|
903
|
+
}
|
|
904
|
+
const insertChunk = db.prepare(`
|
|
905
|
+
INSERT INTO chunks (text, text_hash, role, source_type, source_id, agent_id, token_count, created_at)
|
|
906
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
907
|
+
`);
|
|
908
|
+
const insertVec = db.prepare(`
|
|
909
|
+
INSERT INTO chunks_vec (chunk_id, embedding) VALUES (?, ?)
|
|
910
|
+
`);
|
|
911
|
+
const checkHash = db.prepare("SELECT 1 FROM chunks WHERE text_hash = ?");
|
|
912
|
+
let imported = 0;
|
|
913
|
+
const transaction = db.transaction(() => {
|
|
914
|
+
for (const chunk of exported) {
|
|
915
|
+
if (checkHash.get(chunk.text_hash)) continue;
|
|
916
|
+
const result = insertChunk.run(
|
|
917
|
+
chunk.text,
|
|
918
|
+
chunk.text_hash,
|
|
919
|
+
chunk.role,
|
|
920
|
+
chunk.source_type,
|
|
921
|
+
chunk.source_id,
|
|
922
|
+
chunk.agent_id,
|
|
923
|
+
chunk.token_count,
|
|
924
|
+
chunk.created_at
|
|
925
|
+
);
|
|
926
|
+
if (chunk.embedding && chunk.embedding.length > 0) {
|
|
927
|
+
const chunkId = typeof result.lastInsertRowid === "bigint" ? result.lastInsertRowid : BigInt(result.lastInsertRowid);
|
|
928
|
+
insertVec.run(chunkId, new Float32Array(chunk.embedding));
|
|
929
|
+
}
|
|
930
|
+
imported++;
|
|
931
|
+
}
|
|
932
|
+
});
|
|
933
|
+
transaction();
|
|
934
|
+
return imported;
|
|
935
|
+
}
|
|
936
|
+
// ── Recency helpers ──
|
|
937
|
+
recencyWeight(ageDays) {
|
|
938
|
+
return Math.max(0.3, Math.exp(-ageDays * 0.1));
|
|
939
|
+
}
|
|
940
|
+
/** Parse relative time strings ("24h", "7d", "30d") or ISO dates into ISO date strings. */
|
|
941
|
+
parseSince(since) {
|
|
942
|
+
const match = since.match(/^(\d+)(h|d)$/);
|
|
943
|
+
if (match) {
|
|
944
|
+
const [, num, unit] = match;
|
|
945
|
+
const ms = unit === "h" ? parseInt(num) * 36e5 : parseInt(num) * 864e5;
|
|
946
|
+
return new Date(Date.now() - ms).toISOString();
|
|
947
|
+
}
|
|
948
|
+
const parsed = new Date(since);
|
|
949
|
+
if (!isNaN(parsed.getTime())) return parsed.toISOString();
|
|
950
|
+
return void 0;
|
|
951
|
+
}
|
|
952
|
+
freshnessLabel(ageDays) {
|
|
953
|
+
if (ageDays < 3) return "fresh";
|
|
954
|
+
if (ageDays < 7) return "recent";
|
|
955
|
+
if (ageDays < 14) return "aging";
|
|
956
|
+
return "stale";
|
|
957
|
+
}
|
|
958
|
+
// ── Search (Hybrid: BM25 + Vector + RRF fusion + Recency) ──
|
|
959
|
+
async search(query, limit = 5, filter) {
|
|
960
|
+
const db = this.sqliteDb;
|
|
961
|
+
const sqliteChunks = db.prepare("SELECT COUNT(*) as count FROM chunks").get()?.count || 0;
|
|
962
|
+
let lanceChunks = 0;
|
|
963
|
+
if (this.chunksTable) {
|
|
964
|
+
try {
|
|
965
|
+
lanceChunks = await this.chunksTable.countRows();
|
|
966
|
+
} catch {
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
if (sqliteChunks === 0 || lanceChunks > 0 && sqliteChunks < lanceChunks * 0.5) {
|
|
970
|
+
return this.searchLanceFallback(query, limit, filter);
|
|
971
|
+
}
|
|
972
|
+
const sinceDate = filter?.since ? this.parseSince(filter.since) : void 0;
|
|
973
|
+
const untilDate = filter?.until ? this.parseSince(filter.until) : void 0;
|
|
974
|
+
const [embedding] = await this.embed([query]);
|
|
975
|
+
const fetchLimit = Math.max(limit * 5, 50);
|
|
976
|
+
const vecResults = this.searchVec(embedding, fetchLimit, { ...filter, sinceDate, untilDate });
|
|
977
|
+
const ftsResults = this.searchFTS(query, fetchLimit, { ...filter, sinceDate, untilDate });
|
|
978
|
+
const fused = this.reciprocalRankFusion([ftsResults, vecResults], [2, 1]);
|
|
979
|
+
const now = Date.now();
|
|
980
|
+
const scored = fused.map((r) => {
|
|
981
|
+
const ageDays = r.created_at ? (now - new Date(r.created_at).getTime()) / 864e5 : 0;
|
|
982
|
+
const recency = r.created_at ? this.recencyWeight(ageDays) : 1;
|
|
983
|
+
const rescaled = r.score * recency;
|
|
984
|
+
return {
|
|
985
|
+
...r,
|
|
986
|
+
score: rescaled,
|
|
987
|
+
freshness: r.created_at ? this.freshnessLabel(ageDays) : void 0
|
|
988
|
+
};
|
|
989
|
+
});
|
|
990
|
+
const sorted = scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
991
|
+
const topScore = sorted[0]?.score || 1;
|
|
992
|
+
return sorted.map((r) => ({ ...r, score: Math.min(r.score / topScore * 0.95, 0.95) }));
|
|
993
|
+
}
|
|
994
|
+
/** Deep search: query expansion + LLM re-ranking + position-aware blending.
|
|
995
|
+
* Falls back to standard search if no LLM provider is available.
|
|
996
|
+
* Supports intent disambiguation, candidateLimit tuning, and explain traces. */
|
|
997
|
+
async deepSearch(query, limit = 5, filter, options) {
|
|
998
|
+
const { deepSearch: deepSearchFn } = await Promise.resolve().then(() => (init_search_pipeline(), search_pipeline_exports));
|
|
999
|
+
return deepSearchFn(this, query, { limit, filter, ...options });
|
|
1000
|
+
}
|
|
1001
|
+
/** Structured search: pass pre-expanded queries to skip LLM expansion.
|
|
1002
|
+
* Each query is typed (lex, vec, hyde) and searched independently, then fused with RRF. */
|
|
1003
|
+
async structuredSearch(queries, limit = 5, filter) {
|
|
1004
|
+
const db = this.sqliteDb;
|
|
1005
|
+
const sinceDate = filter?.since ? this.parseSince(filter.since) : void 0;
|
|
1006
|
+
const untilDate = filter?.until ? this.parseSince(filter.until) : void 0;
|
|
1007
|
+
const internalFilter = { ...filter, sinceDate, untilDate };
|
|
1008
|
+
const allResultLists = [];
|
|
1009
|
+
for (const q of queries) {
|
|
1010
|
+
if (q.type === "lex") {
|
|
1011
|
+
const fts = this.searchFTS(q.text, Math.max(limit * 5, 50), internalFilter);
|
|
1012
|
+
if (fts.length > 0) allResultLists.push(fts);
|
|
1013
|
+
} else {
|
|
1014
|
+
const [embedding] = await this.embed([q.text]);
|
|
1015
|
+
const vec = this.searchVec(embedding, Math.max(limit * 5, 50), internalFilter);
|
|
1016
|
+
if (vec.length > 0) allResultLists.push(vec);
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
const weights = allResultLists.map((_, i) => i === 0 ? 2 : 1);
|
|
1020
|
+
const fused = this.reciprocalRankFusion(allResultLists, weights);
|
|
1021
|
+
const now = Date.now();
|
|
1022
|
+
const scored = fused.map((r) => {
|
|
1023
|
+
const ageDays = r.created_at ? (now - new Date(r.created_at).getTime()) / 864e5 : 0;
|
|
1024
|
+
const recency = r.created_at ? this.recencyWeight(ageDays) : 1;
|
|
1025
|
+
return { ...r, score: r.score * recency, freshness: r.created_at ? this.freshnessLabel(ageDays) : void 0 };
|
|
1026
|
+
});
|
|
1027
|
+
const sorted = scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
1028
|
+
const topScore = sorted[0]?.score || 1;
|
|
1029
|
+
return sorted.map((r) => ({ ...r, score: Math.min(r.score / topScore * 0.95, 0.95) }));
|
|
1030
|
+
}
|
|
1031
|
+
/** Vector search via sqlite-vec. Two-step pattern: MATCH first, then JOIN. */
|
|
1032
|
+
searchVec(embedding, limit, filter) {
|
|
1033
|
+
const db = this.sqliteDb;
|
|
1034
|
+
if (!this.vecDimensions) return [];
|
|
1035
|
+
const vecRows = db.prepare(`
|
|
1036
|
+
SELECT chunk_id, distance
|
|
1037
|
+
FROM chunks_vec
|
|
1038
|
+
WHERE embedding MATCH ? AND k = ?
|
|
1039
|
+
`).all(new Float32Array(embedding), limit);
|
|
1040
|
+
if (vecRows.length === 0) return [];
|
|
1041
|
+
const ids = vecRows.map((r) => r.chunk_id);
|
|
1042
|
+
const distMap = new Map(vecRows.map((r) => [r.chunk_id, r.distance]));
|
|
1043
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
1044
|
+
let sql = `SELECT id, text, role, source_type, source_id, agent_id, created_at FROM chunks WHERE id IN (${placeholders})`;
|
|
1045
|
+
const params = [...ids];
|
|
1046
|
+
if (filter?.agent_id) {
|
|
1047
|
+
sql += " AND agent_id = ?";
|
|
1048
|
+
params.push(filter.agent_id);
|
|
1049
|
+
}
|
|
1050
|
+
if (filter?.source_type) {
|
|
1051
|
+
sql += " AND source_type = ?";
|
|
1052
|
+
params.push(filter.source_type);
|
|
1053
|
+
}
|
|
1054
|
+
if (filter?.sinceDate) {
|
|
1055
|
+
sql += " AND created_at >= ?";
|
|
1056
|
+
params.push(filter.sinceDate);
|
|
1057
|
+
}
|
|
1058
|
+
if (filter?.untilDate) {
|
|
1059
|
+
sql += " AND created_at < ?";
|
|
1060
|
+
params.push(filter.untilDate);
|
|
1061
|
+
}
|
|
1062
|
+
const rows = db.prepare(sql).all(...params);
|
|
1063
|
+
return rows.map((row) => ({
|
|
1064
|
+
text: row.text,
|
|
1065
|
+
role: row.role,
|
|
1066
|
+
score: 1 - (distMap.get(row.id) || 1),
|
|
1067
|
+
// cosine similarity from distance
|
|
1068
|
+
source_type: row.source_type,
|
|
1069
|
+
source_id: row.source_id,
|
|
1070
|
+
agent_id: row.agent_id,
|
|
1071
|
+
created_at: row.created_at
|
|
1072
|
+
}));
|
|
1073
|
+
}
|
|
1074
|
+
/** Full-text search via FTS5 with BM25 scoring. */
|
|
1075
|
+
searchFTS(query, limit, filter) {
|
|
1076
|
+
const db = this.sqliteDb;
|
|
1077
|
+
const ftsQuery = this.buildFTS5Query(query);
|
|
1078
|
+
if (!ftsQuery) return [];
|
|
1079
|
+
let sql = `
|
|
1080
|
+
SELECT c.id, c.text, c.role, c.source_type, c.source_id, c.agent_id, c.created_at,
|
|
1081
|
+
bm25(chunks_fts) as bm25_score
|
|
1082
|
+
FROM chunks_fts f
|
|
1083
|
+
JOIN chunks c ON c.id = f.rowid
|
|
1084
|
+
WHERE chunks_fts MATCH ?
|
|
1085
|
+
`;
|
|
1086
|
+
const params = [ftsQuery];
|
|
1087
|
+
if (filter?.agent_id) {
|
|
1088
|
+
sql += " AND c.agent_id = ?";
|
|
1089
|
+
params.push(filter.agent_id);
|
|
1090
|
+
}
|
|
1091
|
+
if (filter?.source_type) {
|
|
1092
|
+
sql += " AND c.source_type = ?";
|
|
1093
|
+
params.push(filter.source_type);
|
|
1094
|
+
}
|
|
1095
|
+
if (filter?.sinceDate) {
|
|
1096
|
+
sql += " AND c.created_at >= ?";
|
|
1097
|
+
params.push(filter.sinceDate);
|
|
1098
|
+
}
|
|
1099
|
+
if (filter?.untilDate) {
|
|
1100
|
+
sql += " AND c.created_at < ?";
|
|
1101
|
+
params.push(filter.untilDate);
|
|
1102
|
+
}
|
|
1103
|
+
sql += " ORDER BY bm25_score LIMIT ?";
|
|
1104
|
+
params.push(limit);
|
|
1105
|
+
const rows = db.prepare(sql).all(...params);
|
|
1106
|
+
return rows.map((row) => ({
|
|
1107
|
+
text: row.text,
|
|
1108
|
+
role: row.role,
|
|
1109
|
+
// BM25 scores are negative (lower = better). Normalize to [0..1).
|
|
1110
|
+
// |x| / (1 + |x|) maps: strong(-10)->0.91, medium(-2)->0.67, weak(-0.5)->0.33
|
|
1111
|
+
score: Math.abs(row.bm25_score) / (1 + Math.abs(row.bm25_score)),
|
|
1112
|
+
source_type: row.source_type,
|
|
1113
|
+
source_id: row.source_id,
|
|
1114
|
+
agent_id: row.agent_id,
|
|
1115
|
+
created_at: row.created_at
|
|
1116
|
+
}));
|
|
1117
|
+
}
|
|
1118
|
+
/** Build a safe FTS5 query from user input. */
|
|
1119
|
+
buildFTS5Query(query) {
|
|
1120
|
+
const terms = query.split(/\s+/).map((t) => t.replace(/[^\p{L}\p{N}']/gu, "").toLowerCase()).filter((t) => t.length > 0);
|
|
1121
|
+
if (terms.length === 0) return null;
|
|
1122
|
+
if (terms.length === 1) return `"${terms[0]}"*`;
|
|
1123
|
+
return terms.map((t) => `"${t}"*`).join(" AND ");
|
|
1124
|
+
}
|
|
1125
|
+
/**
|
|
1126
|
+
* Reciprocal Rank Fusion. Ported from QMD (MIT License, Tobi Lutke, 2024-2026).
|
|
1127
|
+
* Fuses multiple ranked result lists into one using RRF scoring.
|
|
1128
|
+
* Uses text content as dedup key (instead of QMD's file path).
|
|
1129
|
+
*/
|
|
1130
|
+
reciprocalRankFusion(resultLists, weights = [], k = 60) {
|
|
1131
|
+
const scores = /* @__PURE__ */ new Map();
|
|
1132
|
+
for (let listIdx = 0; listIdx < resultLists.length; listIdx++) {
|
|
1133
|
+
const list = resultLists[listIdx];
|
|
1134
|
+
if (!list) continue;
|
|
1135
|
+
const weight = weights[listIdx] ?? 1;
|
|
1136
|
+
for (let rank = 0; rank < list.length; rank++) {
|
|
1137
|
+
const result = list[rank];
|
|
1138
|
+
if (!result) continue;
|
|
1139
|
+
const rrfContribution = weight / (k + rank + 1);
|
|
1140
|
+
const dedup = result.text.slice(0, 200);
|
|
1141
|
+
const existing = scores.get(dedup);
|
|
1142
|
+
if (existing) {
|
|
1143
|
+
existing.rrfScore += rrfContribution;
|
|
1144
|
+
existing.topRank = Math.min(existing.topRank, rank);
|
|
1145
|
+
} else {
|
|
1146
|
+
scores.set(dedup, {
|
|
1147
|
+
result,
|
|
1148
|
+
rrfScore: rrfContribution,
|
|
1149
|
+
topRank: rank
|
|
1150
|
+
});
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
for (const entry of scores.values()) {
|
|
1155
|
+
if (entry.topRank === 0) {
|
|
1156
|
+
entry.rrfScore += 0.05;
|
|
1157
|
+
} else if (entry.topRank <= 2) {
|
|
1158
|
+
entry.rrfScore += 0.02;
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
return Array.from(scores.values()).sort((a, b) => b.rrfScore - a.rrfScore).map((e) => ({ ...e.result, score: e.rrfScore }));
|
|
1162
|
+
}
|
|
1163
|
+
/** LanceDB fallback for search (used when sqlite-vec tables are empty, pre-migration). */
|
|
1164
|
+
async searchLanceFallback(query, limit, filter) {
|
|
1165
|
+
if (!this.chunksTable) return [];
|
|
1166
|
+
const [embedding] = await this.embed([query]);
|
|
1167
|
+
const fetchLimit = Math.max(limit * 3, 30);
|
|
1168
|
+
let queryBuilder = this.chunksTable.vectorSearch(embedding).distanceType("cosine").limit(fetchLimit);
|
|
1169
|
+
if (filter?.agent_id) {
|
|
1170
|
+
queryBuilder = queryBuilder.where(`agent_id = '${filter.agent_id}'`);
|
|
1171
|
+
}
|
|
1172
|
+
if (filter?.source_type) {
|
|
1173
|
+
queryBuilder = queryBuilder.where(`source_type = '${filter.source_type}'`);
|
|
1174
|
+
}
|
|
1175
|
+
const results = await queryBuilder.toArray();
|
|
1176
|
+
const now = Date.now();
|
|
1177
|
+
return results.map((row) => {
|
|
1178
|
+
const cosine = row._distance != null ? 1 - row._distance : 0;
|
|
1179
|
+
const createdAt = row.created_at || "";
|
|
1180
|
+
const ageDays = createdAt ? (now - new Date(createdAt).getTime()) / 864e5 : 0;
|
|
1181
|
+
const weight = createdAt ? this.recencyWeight(ageDays) : 1;
|
|
1182
|
+
return {
|
|
1183
|
+
text: row.text,
|
|
1184
|
+
role: row.role,
|
|
1185
|
+
score: cosine * weight,
|
|
1186
|
+
source_type: row.source_type,
|
|
1187
|
+
source_id: row.source_id,
|
|
1188
|
+
agent_id: row.agent_id,
|
|
1189
|
+
created_at: createdAt,
|
|
1190
|
+
freshness: createdAt ? this.freshnessLabel(ageDays) : void 0
|
|
1191
|
+
};
|
|
1192
|
+
}).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
1193
|
+
}
|
|
1194
|
+
// ── Remember (explicit fact storage) ──
|
|
1195
|
+
async remember(text, category = "fact") {
|
|
1196
|
+
const db = this.sqliteDb;
|
|
1197
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1198
|
+
const stmt = db.prepare(`
|
|
1199
|
+
INSERT INTO memories (text, category, confidence, source_ids, status, created_at, updated_at)
|
|
1200
|
+
VALUES (?, ?, 1.0, '[]', 'active', ?, ?)
|
|
1201
|
+
`);
|
|
1202
|
+
const result = stmt.run(text, category, now, now);
|
|
1203
|
+
await this.ingest([{
|
|
1204
|
+
text,
|
|
1205
|
+
role: "system",
|
|
1206
|
+
source_type: "manual",
|
|
1207
|
+
source_id: `memory:${result.lastInsertRowid}`,
|
|
1208
|
+
agent_id: "system",
|
|
1209
|
+
token_count: Math.ceil(text.length / 4),
|
|
1210
|
+
created_at: now
|
|
1211
|
+
}]);
|
|
1212
|
+
return result.lastInsertRowid;
|
|
1213
|
+
}
|
|
1214
|
+
// ── Forget (deprecate a memory) ──
|
|
1215
|
+
forget(memoryId) {
|
|
1216
|
+
const db = this.sqliteDb;
|
|
1217
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1218
|
+
const result = db.prepare(`
|
|
1219
|
+
UPDATE memories SET status = 'deprecated', updated_at = ? WHERE id = ? AND status = 'active'
|
|
1220
|
+
`).run(now, memoryId);
|
|
1221
|
+
return result.changes > 0;
|
|
1222
|
+
}
|
|
1223
|
+
// ── Status ──
|
|
1224
|
+
async status() {
|
|
1225
|
+
const db = this.sqliteDb;
|
|
1226
|
+
const sqliteChunks = db.prepare("SELECT COUNT(*) as count FROM chunks").get()?.count || 0;
|
|
1227
|
+
let lanceChunks = 0;
|
|
1228
|
+
if (this.chunksTable) {
|
|
1229
|
+
try {
|
|
1230
|
+
lanceChunks = await this.chunksTable.countRows();
|
|
1231
|
+
} catch {
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
const chunks = Math.max(sqliteChunks, lanceChunks);
|
|
1235
|
+
const oldest = db.prepare("SELECT MIN(created_at) as ts FROM chunks").get()?.ts || null;
|
|
1236
|
+
const newest = db.prepare("SELECT MAX(created_at) as ts FROM chunks").get()?.ts || null;
|
|
1237
|
+
const memories = db.prepare("SELECT COUNT(*) as count FROM memories WHERE status = ?").get("active")?.count || 0;
|
|
1238
|
+
const sources = db.prepare("SELECT COUNT(*) as count FROM sources").get()?.count || 0;
|
|
1239
|
+
const chunkAgentRows = db.prepare("SELECT DISTINCT agent_id FROM chunks WHERE agent_id IS NOT NULL").all();
|
|
1240
|
+
const sourceAgentRows = db.prepare("SELECT DISTINCT agent_id FROM sources").all();
|
|
1241
|
+
const captureAgentRows = db.prepare("SELECT DISTINCT agent_id FROM capture_state").all();
|
|
1242
|
+
const agents = [.../* @__PURE__ */ new Set([
|
|
1243
|
+
...chunkAgentRows.map((r) => r.agent_id),
|
|
1244
|
+
...sourceAgentRows.map((r) => r.agent_id),
|
|
1245
|
+
...captureAgentRows.map((r) => r.agent_id)
|
|
1246
|
+
])];
|
|
1247
|
+
const captureInfo = db.prepare(
|
|
1248
|
+
"SELECT COUNT(*) as count, MAX(last_capture_at) as latest FROM capture_state"
|
|
1249
|
+
).get();
|
|
1250
|
+
return {
|
|
1251
|
+
chunks,
|
|
1252
|
+
memories,
|
|
1253
|
+
sources,
|
|
1254
|
+
agents,
|
|
1255
|
+
oldestChunk: oldest,
|
|
1256
|
+
newestChunk: newest,
|
|
1257
|
+
embeddingProvider: this.config.embeddingProvider,
|
|
1258
|
+
dataDir: this.config.dataDir,
|
|
1259
|
+
capturedSessions: captureInfo?.count || 0,
|
|
1260
|
+
latestCapture: captureInfo?.latest || null
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
// ── Capture State (for incremental ingestion) ──
|
|
1264
|
+
getCaptureState(agentId, sourceId) {
|
|
1265
|
+
const db = this.sqliteDb;
|
|
1266
|
+
const row = db.prepare("SELECT last_message_count, capture_count FROM capture_state WHERE agent_id = ? AND source_id = ?").get(agentId, sourceId);
|
|
1267
|
+
if (!row) return { lastMessageCount: 0, captureCount: 0 };
|
|
1268
|
+
return {
|
|
1269
|
+
lastMessageCount: row.last_message_count,
|
|
1270
|
+
captureCount: row.capture_count
|
|
1271
|
+
};
|
|
1272
|
+
}
|
|
1273
|
+
setCaptureState(agentId, sourceId, messageCount, captureCount) {
|
|
1274
|
+
const db = this.sqliteDb;
|
|
1275
|
+
db.prepare(`
|
|
1276
|
+
INSERT OR REPLACE INTO capture_state (agent_id, source_id, last_message_count, capture_count, last_capture_at)
|
|
1277
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1278
|
+
`).run(agentId, sourceId, messageCount, captureCount, (/* @__PURE__ */ new Date()).toISOString());
|
|
1279
|
+
}
|
|
1280
|
+
// ── Source File Indexing (optional feature) ──
|
|
1281
|
+
//
|
|
1282
|
+
// Add directories as "collections", sync to index/re-index changed files.
|
|
1283
|
+
// All source chunks get source_type='file' so they're searchable alongside
|
|
1284
|
+
// conversations and memories. Nothing here is required... you can use MC
|
|
1285
|
+
// without ever touching sources.
|
|
1286
|
+
// Default patterns for files worth indexing
|
|
1287
|
+
static DEFAULT_INCLUDE = [
|
|
1288
|
+
"**/*.ts",
|
|
1289
|
+
"**/*.js",
|
|
1290
|
+
"**/*.tsx",
|
|
1291
|
+
"**/*.jsx",
|
|
1292
|
+
"**/*.py",
|
|
1293
|
+
"**/*.rs",
|
|
1294
|
+
"**/*.go",
|
|
1295
|
+
"**/*.java",
|
|
1296
|
+
"**/*.md",
|
|
1297
|
+
"**/*.txt",
|
|
1298
|
+
"**/*.json",
|
|
1299
|
+
"**/*.yaml",
|
|
1300
|
+
"**/*.yml",
|
|
1301
|
+
"**/*.toml",
|
|
1302
|
+
"**/*.sh",
|
|
1303
|
+
"**/*.bash",
|
|
1304
|
+
"**/*.zsh",
|
|
1305
|
+
"**/*.css",
|
|
1306
|
+
"**/*.html",
|
|
1307
|
+
"**/*.svg",
|
|
1308
|
+
"**/*.sql",
|
|
1309
|
+
"**/*.graphql",
|
|
1310
|
+
"**/*.c",
|
|
1311
|
+
"**/*.cpp",
|
|
1312
|
+
"**/*.h",
|
|
1313
|
+
"**/*.hpp",
|
|
1314
|
+
"**/*.swift",
|
|
1315
|
+
"**/*.kt",
|
|
1316
|
+
"**/*.rb",
|
|
1317
|
+
"**/*.env.example",
|
|
1318
|
+
"**/*.gitignore",
|
|
1319
|
+
"**/Makefile",
|
|
1320
|
+
"**/Dockerfile",
|
|
1321
|
+
"**/Cargo.toml",
|
|
1322
|
+
"**/package.json",
|
|
1323
|
+
"**/tsconfig.json"
|
|
1324
|
+
];
|
|
1325
|
+
static DEFAULT_IGNORE = [
|
|
1326
|
+
"**/node_modules/**",
|
|
1327
|
+
"**/.git/**",
|
|
1328
|
+
"**/dist/**",
|
|
1329
|
+
"**/build/**",
|
|
1330
|
+
"**/.next/**",
|
|
1331
|
+
"**/.cache/**",
|
|
1332
|
+
"**/coverage/**",
|
|
1333
|
+
"**/__pycache__/**",
|
|
1334
|
+
"**/target/**",
|
|
1335
|
+
"**/vendor/**",
|
|
1336
|
+
"**/.venv/**",
|
|
1337
|
+
"**/*.lock",
|
|
1338
|
+
"**/package-lock.json",
|
|
1339
|
+
"**/yarn.lock",
|
|
1340
|
+
"**/bun.lockb",
|
|
1341
|
+
"**/*.min.js",
|
|
1342
|
+
"**/*.min.css",
|
|
1343
|
+
"**/*.map",
|
|
1344
|
+
"**/*.png",
|
|
1345
|
+
"**/*.jpg",
|
|
1346
|
+
"**/*.jpeg",
|
|
1347
|
+
"**/*.gif",
|
|
1348
|
+
"**/*.ico",
|
|
1349
|
+
"**/*.webp",
|
|
1350
|
+
"**/*.woff",
|
|
1351
|
+
"**/*.woff2",
|
|
1352
|
+
"**/*.ttf",
|
|
1353
|
+
"**/*.eot",
|
|
1354
|
+
"**/*.mp3",
|
|
1355
|
+
"**/*.mp4",
|
|
1356
|
+
"**/*.wav",
|
|
1357
|
+
"**/*.ogg",
|
|
1358
|
+
"**/*.webm",
|
|
1359
|
+
"**/*.zip",
|
|
1360
|
+
"**/*.tar",
|
|
1361
|
+
"**/*.gz",
|
|
1362
|
+
"**/*.br",
|
|
1363
|
+
"**/*.sqlite",
|
|
1364
|
+
"**/*.db",
|
|
1365
|
+
"**/*.lance/**",
|
|
1366
|
+
"**/*.jsonl",
|
|
1367
|
+
"**/secrets/**",
|
|
1368
|
+
"**/.env"
|
|
1369
|
+
];
|
|
1370
|
+
/** Add a directory as a source collection for indexing. */
|
|
1371
|
+
async sourcesAdd(rootPath, name, options) {
|
|
1372
|
+
const db = this.sqliteDb;
|
|
1373
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1374
|
+
const includePatterns = JSON.stringify(options?.include || _Crystal.DEFAULT_INCLUDE);
|
|
1375
|
+
const ignorePatterns = JSON.stringify(options?.ignore || _Crystal.DEFAULT_IGNORE);
|
|
1376
|
+
const existing = db.prepare("SELECT * FROM source_collections WHERE name = ?").get(name);
|
|
1377
|
+
if (existing) {
|
|
1378
|
+
throw new Error(`Collection "${name}" already exists. Use sourcesSync() to update it.`);
|
|
1379
|
+
}
|
|
1380
|
+
db.prepare(`
|
|
1381
|
+
INSERT INTO source_collections (name, root_path, glob_patterns, ignore_patterns, created_at)
|
|
1382
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1383
|
+
`).run(name, rootPath, includePatterns, ignorePatterns, now);
|
|
1384
|
+
const row = db.prepare("SELECT * FROM source_collections WHERE name = ?").get(name);
|
|
1385
|
+
return row;
|
|
1386
|
+
}
|
|
1387
|
+
/** Remove a source collection and its file records. Chunks remain in LanceDB. */
|
|
1388
|
+
sourcesRemove(name) {
|
|
1389
|
+
const db = this.sqliteDb;
|
|
1390
|
+
const col = db.prepare("SELECT id FROM source_collections WHERE name = ?").get(name);
|
|
1391
|
+
if (!col) return false;
|
|
1392
|
+
db.prepare("DELETE FROM source_files WHERE collection_id = ?").run(col.id);
|
|
1393
|
+
db.prepare("DELETE FROM source_collections WHERE id = ?").run(col.id);
|
|
1394
|
+
return true;
|
|
1395
|
+
}
|
|
1396
|
+
/** Sync a collection: scan files, detect changes, re-index what changed. */
|
|
1397
|
+
async sourcesSync(name, options) {
|
|
1398
|
+
const db = this.sqliteDb;
|
|
1399
|
+
const startTime = Date.now();
|
|
1400
|
+
const batchSize = options?.batchSize || 20;
|
|
1401
|
+
const col = db.prepare("SELECT * FROM source_collections WHERE name = ?").get(name);
|
|
1402
|
+
if (!col) throw new Error(`Collection "${name}" not found. Add it first with sourcesAdd().`);
|
|
1403
|
+
const includePatterns = JSON.parse(col.glob_patterns);
|
|
1404
|
+
const ignorePatterns = JSON.parse(col.ignore_patterns);
|
|
1405
|
+
const files = this.scanDirectory(col.root_path, includePatterns, ignorePatterns);
|
|
1406
|
+
const existingFiles = /* @__PURE__ */ new Map();
|
|
1407
|
+
const rows = db.prepare("SELECT id, file_path, file_hash FROM source_files WHERE collection_id = ?").all(col.id);
|
|
1408
|
+
for (const row of rows) {
|
|
1409
|
+
existingFiles.set(row.file_path, { id: row.id, file_hash: row.file_hash });
|
|
1410
|
+
}
|
|
1411
|
+
let added = 0;
|
|
1412
|
+
let updated = 0;
|
|
1413
|
+
let removed = 0;
|
|
1414
|
+
let chunksAdded = 0;
|
|
1415
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1416
|
+
const toIndex = [];
|
|
1417
|
+
for (const absPath of files) {
|
|
1418
|
+
const relPath = relative(col.root_path, absPath);
|
|
1419
|
+
let content;
|
|
1420
|
+
try {
|
|
1421
|
+
content = readFileSync2(absPath, "utf-8");
|
|
1422
|
+
} catch {
|
|
1423
|
+
continue;
|
|
1424
|
+
}
|
|
1425
|
+
const stat = statSync(absPath);
|
|
1426
|
+
if (stat.size > 500 * 1024) continue;
|
|
1427
|
+
const hash = createHash("sha256").update(content).digest("hex");
|
|
1428
|
+
const existing = existingFiles.get(relPath);
|
|
1429
|
+
if (existing) {
|
|
1430
|
+
existingFiles.delete(relPath);
|
|
1431
|
+
if (existing.file_hash === hash) continue;
|
|
1432
|
+
toIndex.push({ relPath, absPath, hash, size: stat.size, isUpdate: true });
|
|
1433
|
+
} else {
|
|
1434
|
+
toIndex.push({ relPath, absPath, hash, size: stat.size, isUpdate: false });
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
if (options?.dryRun) {
|
|
1438
|
+
const newFiles = toIndex.filter((f) => !f.isUpdate).length;
|
|
1439
|
+
const updatedFiles = toIndex.filter((f) => f.isUpdate).length;
|
|
1440
|
+
return {
|
|
1441
|
+
collection: name,
|
|
1442
|
+
added: newFiles,
|
|
1443
|
+
updated: updatedFiles,
|
|
1444
|
+
removed: existingFiles.size,
|
|
1445
|
+
chunks_added: 0,
|
|
1446
|
+
duration_ms: Date.now() - startTime
|
|
1447
|
+
};
|
|
1448
|
+
}
|
|
1449
|
+
for (let i = 0; i < toIndex.length; i += batchSize) {
|
|
1450
|
+
const batch = toIndex.slice(i, i + batchSize);
|
|
1451
|
+
const allChunks = [];
|
|
1452
|
+
for (const file of batch) {
|
|
1453
|
+
const content = readFileSync2(file.absPath, "utf-8");
|
|
1454
|
+
const ext = extname(file.absPath);
|
|
1455
|
+
const fileName = basename(file.absPath);
|
|
1456
|
+
const header = `File: ${file.relPath}
|
|
1457
|
+
|
|
1458
|
+
`;
|
|
1459
|
+
const textChunks = this.chunkText(header + content, 400, 80);
|
|
1460
|
+
const fileChunks = textChunks.map((text) => ({
|
|
1461
|
+
text,
|
|
1462
|
+
role: "system",
|
|
1463
|
+
source_type: "file",
|
|
1464
|
+
source_id: `file:${name}:${file.relPath}`,
|
|
1465
|
+
agent_id: "system",
|
|
1466
|
+
token_count: Math.ceil(text.length / 4),
|
|
1467
|
+
created_at: now
|
|
1468
|
+
}));
|
|
1469
|
+
allChunks.push(...fileChunks);
|
|
1470
|
+
if (file.isUpdate) {
|
|
1471
|
+
db.prepare(`
|
|
1472
|
+
UPDATE source_files SET file_hash = ?, file_size = ?, chunk_count = ?, last_indexed_at = ?
|
|
1473
|
+
WHERE collection_id = ? AND file_path = ?
|
|
1474
|
+
`).run(file.hash, file.size, fileChunks.length, now, col.id, file.relPath);
|
|
1475
|
+
updated++;
|
|
1476
|
+
} else {
|
|
1477
|
+
db.prepare(`
|
|
1478
|
+
INSERT INTO source_files (collection_id, file_path, file_hash, file_size, chunk_count, last_indexed_at)
|
|
1479
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1480
|
+
`).run(col.id, file.relPath, file.hash, file.size, fileChunks.length, now);
|
|
1481
|
+
added++;
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
if (allChunks.length > 0) {
|
|
1485
|
+
const ingested = await this.ingest(allChunks);
|
|
1486
|
+
chunksAdded += ingested;
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
for (const [relPath, { id }] of existingFiles) {
|
|
1490
|
+
db.prepare("DELETE FROM source_files WHERE id = ?").run(id);
|
|
1491
|
+
removed++;
|
|
1492
|
+
}
|
|
1493
|
+
const fileCount = db.prepare("SELECT COUNT(*) as count FROM source_files WHERE collection_id = ?").get(col.id).count;
|
|
1494
|
+
const chunkCount = db.prepare("SELECT SUM(chunk_count) as total FROM source_files WHERE collection_id = ?").get(col.id).total || 0;
|
|
1495
|
+
db.prepare("UPDATE source_collections SET file_count = ?, chunk_count = ?, last_sync_at = ? WHERE id = ?").run(fileCount, chunkCount, now, col.id);
|
|
1496
|
+
return {
|
|
1497
|
+
collection: name,
|
|
1498
|
+
added,
|
|
1499
|
+
updated,
|
|
1500
|
+
removed,
|
|
1501
|
+
chunks_added: chunksAdded,
|
|
1502
|
+
duration_ms: Date.now() - startTime
|
|
1503
|
+
};
|
|
1504
|
+
}
|
|
1505
|
+
/** Get status of all source collections. */
|
|
1506
|
+
sourcesStatus() {
|
|
1507
|
+
const db = this.sqliteDb;
|
|
1508
|
+
const collections = db.prepare("SELECT name, root_path, file_count, chunk_count, last_sync_at FROM source_collections").all();
|
|
1509
|
+
const totalFiles = collections.reduce((sum, c) => sum + c.file_count, 0);
|
|
1510
|
+
const totalChunks = collections.reduce((sum, c) => sum + c.chunk_count, 0);
|
|
1511
|
+
return {
|
|
1512
|
+
collections: collections.map((c) => ({
|
|
1513
|
+
name: c.name,
|
|
1514
|
+
root_path: c.root_path,
|
|
1515
|
+
file_count: c.file_count,
|
|
1516
|
+
chunk_count: c.chunk_count,
|
|
1517
|
+
last_sync_at: c.last_sync_at
|
|
1518
|
+
})),
|
|
1519
|
+
total_files: totalFiles,
|
|
1520
|
+
total_chunks: totalChunks
|
|
1521
|
+
};
|
|
1522
|
+
}
|
|
1523
|
+
/** Scan a directory recursively, matching include/ignore patterns. */
|
|
1524
|
+
scanDirectory(rootPath, includePatterns, ignorePatterns) {
|
|
1525
|
+
const results = [];
|
|
1526
|
+
const allowedExtensions = /* @__PURE__ */ new Set();
|
|
1527
|
+
const allowedExactNames = /* @__PURE__ */ new Set();
|
|
1528
|
+
for (const pattern of includePatterns) {
|
|
1529
|
+
const extMatch = pattern.match(/\*\*\/\*(\.\w+)$/);
|
|
1530
|
+
if (extMatch) {
|
|
1531
|
+
allowedExtensions.add(extMatch[1]);
|
|
1532
|
+
}
|
|
1533
|
+
const nameMatch = pattern.match(/\*\*\/([^*]+)$/);
|
|
1534
|
+
if (nameMatch && !nameMatch[1].startsWith("*.")) {
|
|
1535
|
+
allowedExactNames.add(nameMatch[1]);
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
const ignoreDirs = /* @__PURE__ */ new Set();
|
|
1539
|
+
for (const pattern of ignorePatterns) {
|
|
1540
|
+
const dirMatch = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
|
|
1541
|
+
if (dirMatch) {
|
|
1542
|
+
ignoreDirs.add(dirMatch[1]);
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
const ignoreFiles = /* @__PURE__ */ new Set();
|
|
1546
|
+
for (const pattern of ignorePatterns) {
|
|
1547
|
+
const fileMatch = pattern.match(/\*\*\/\*(\.\w+)$/);
|
|
1548
|
+
if (fileMatch) {
|
|
1549
|
+
ignoreFiles.add(fileMatch[1]);
|
|
1550
|
+
}
|
|
1551
|
+
const exactMatch = pattern.match(/\*\*\/([^*]+)$/);
|
|
1552
|
+
if (exactMatch && !exactMatch[1].includes("/")) {
|
|
1553
|
+
ignoreFiles.add(exactMatch[1]);
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
const walk = (dir) => {
|
|
1557
|
+
let entries;
|
|
1558
|
+
try {
|
|
1559
|
+
entries = readdirSync(dir);
|
|
1560
|
+
} catch {
|
|
1561
|
+
return;
|
|
1562
|
+
}
|
|
1563
|
+
for (const entry of entries) {
|
|
1564
|
+
const fullPath = join2(dir, entry);
|
|
1565
|
+
let stat;
|
|
1566
|
+
try {
|
|
1567
|
+
stat = statSync(fullPath);
|
|
1568
|
+
} catch {
|
|
1569
|
+
continue;
|
|
1570
|
+
}
|
|
1571
|
+
if (stat.isDirectory()) {
|
|
1572
|
+
if (ignoreDirs.has(entry)) continue;
|
|
1573
|
+
if (entry.startsWith(".")) continue;
|
|
1574
|
+
walk(fullPath);
|
|
1575
|
+
} else if (stat.isFile()) {
|
|
1576
|
+
const ext = extname(entry);
|
|
1577
|
+
if (ignoreFiles.has(ext)) continue;
|
|
1578
|
+
if (ignoreFiles.has(entry)) continue;
|
|
1579
|
+
if (allowedExtensions.has(ext) || allowedExactNames.has(entry)) {
|
|
1580
|
+
results.push(fullPath);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
};
|
|
1585
|
+
walk(rootPath);
|
|
1586
|
+
return results;
|
|
1587
|
+
}
|
|
1588
|
+
// ── Orphan Cleanup ──
|
|
1589
|
+
/** Clean orphaned entries in chunks_vec and chunks_fts that no longer have
|
|
1590
|
+
* corresponding rows in the chunks table. Returns counts of what was found/cleaned. */
|
|
1591
|
+
cleanOrphans(options) {
|
|
1592
|
+
const db = this.sqliteDb;
|
|
1593
|
+
const dryRun = options?.dryRun ?? false;
|
|
1594
|
+
const orphanedVec = db.prepare(
|
|
1595
|
+
"SELECT COUNT(*) as cnt FROM chunks_vec WHERE chunk_id NOT IN (SELECT id FROM chunks)"
|
|
1596
|
+
).get().cnt;
|
|
1597
|
+
const orphanedFts = db.prepare(
|
|
1598
|
+
"SELECT COUNT(*) as cnt FROM chunks_fts WHERE rowid NOT IN (SELECT id FROM chunks)"
|
|
1599
|
+
).get().cnt;
|
|
1600
|
+
if (dryRun) {
|
|
1601
|
+
return { orphanedVec, orphanedFts, cleanedVec: 0, cleanedFts: 0, dryRun: true };
|
|
1602
|
+
}
|
|
1603
|
+
let cleanedVec = 0;
|
|
1604
|
+
if (orphanedVec > 0) {
|
|
1605
|
+
const ids = db.prepare(
|
|
1606
|
+
"SELECT chunk_id FROM chunks_vec WHERE chunk_id NOT IN (SELECT id FROM chunks)"
|
|
1607
|
+
).all();
|
|
1608
|
+
const del = db.prepare("DELETE FROM chunks_vec WHERE chunk_id = ?");
|
|
1609
|
+
const BATCH = 1e3;
|
|
1610
|
+
for (let i = 0; i < ids.length; i += BATCH) {
|
|
1611
|
+
const batch = ids.slice(i, i + BATCH);
|
|
1612
|
+
db.transaction(() => {
|
|
1613
|
+
for (const r of batch) {
|
|
1614
|
+
del.run(r.chunk_id);
|
|
1615
|
+
cleanedVec++;
|
|
1616
|
+
}
|
|
1617
|
+
})();
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
let cleanedFts = 0;
|
|
1621
|
+
if (orphanedFts > 0) {
|
|
1622
|
+
db.exec("DELETE FROM chunks_fts");
|
|
1623
|
+
db.exec("INSERT INTO chunks_fts(rowid, text) SELECT id, text FROM chunks");
|
|
1624
|
+
cleanedFts = orphanedFts;
|
|
1625
|
+
}
|
|
1626
|
+
return { orphanedVec, orphanedFts, cleanedVec, cleanedFts, dryRun: false };
|
|
1627
|
+
}
|
|
1628
|
+
// ── Cleanup ──
|
|
1629
|
+
close() {
|
|
1630
|
+
this.sqliteDb?.close();
|
|
1631
|
+
}
|
|
1632
|
+
};
|
|
1633
|
+
function resolveConfig(overrides) {
|
|
1634
|
+
const HOME3 = process.env.HOME || "";
|
|
1635
|
+
const ldmMemory = join2(HOME3, ".ldm", "memory");
|
|
1636
|
+
let dataDir = overrides?.dataDir || process.env.CRYSTAL_DATA_DIR;
|
|
1637
|
+
if (!dataDir) {
|
|
1638
|
+
if (existsSync2(join2(ldmMemory, "crystal.db"))) {
|
|
1639
|
+
dataDir = ldmMemory;
|
|
1640
|
+
} else {
|
|
1641
|
+
const legacyDir = join2(HOME3, ".openclaw", "memory-crystal");
|
|
1642
|
+
if (existsSync2(join2(legacyDir, "crystal.db"))) {
|
|
1643
|
+
dataDir = legacyDir;
|
|
1644
|
+
} else {
|
|
1645
|
+
dataDir = ldmMemory;
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
loadEnvFile(join2(dataDir, ".env"));
|
|
1650
|
+
const openaiApiKey = overrides?.openaiApiKey || process.env.OPENAI_API_KEY || opRead("OpenAI API", "api key");
|
|
1651
|
+
const googleApiKey = overrides?.googleApiKey || process.env.GOOGLE_API_KEY || opRead("Google AI", "api key");
|
|
1652
|
+
const remoteToken = overrides?.remoteToken || process.env.CRYSTAL_REMOTE_TOKEN || opRead("Memory Crystal Remote", "token");
|
|
1653
|
+
return {
|
|
1654
|
+
dataDir,
|
|
1655
|
+
embeddingProvider: overrides?.embeddingProvider || process.env.CRYSTAL_EMBEDDING_PROVIDER || "openai",
|
|
1656
|
+
openaiApiKey,
|
|
1657
|
+
openaiModel: overrides?.openaiModel || process.env.CRYSTAL_OPENAI_MODEL || "text-embedding-3-small",
|
|
1658
|
+
ollamaHost: overrides?.ollamaHost || process.env.CRYSTAL_OLLAMA_HOST || "http://localhost:11434",
|
|
1659
|
+
ollamaModel: overrides?.ollamaModel || process.env.CRYSTAL_OLLAMA_MODEL || "nomic-embed-text",
|
|
1660
|
+
googleApiKey,
|
|
1661
|
+
googleModel: overrides?.googleModel || process.env.CRYSTAL_GOOGLE_MODEL || "text-embedding-004",
|
|
1662
|
+
remoteUrl: overrides?.remoteUrl || process.env.CRYSTAL_REMOTE_URL,
|
|
1663
|
+
remoteToken
|
|
1664
|
+
};
|
|
1665
|
+
}
|
|
1666
|
+
function loadEnvFile(path) {
|
|
1667
|
+
if (!existsSync2(path)) return;
|
|
1668
|
+
const content = readFileSync2(path, "utf8");
|
|
1669
|
+
for (const line of content.split("\n")) {
|
|
1670
|
+
const trimmed = line.trim();
|
|
1671
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
1672
|
+
const eqIdx = trimmed.indexOf("=");
|
|
1673
|
+
if (eqIdx === -1) continue;
|
|
1674
|
+
const key = trimmed.slice(0, eqIdx).trim();
|
|
1675
|
+
let value = trimmed.slice(eqIdx + 1).trim();
|
|
1676
|
+
if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
|
|
1677
|
+
value = value.slice(1, -1);
|
|
1678
|
+
}
|
|
1679
|
+
if (key && !process.env[key]) {
|
|
1680
|
+
process.env[key] = value;
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
function opRead(item, field) {
|
|
1685
|
+
try {
|
|
1686
|
+
const HOME3 = process.env.HOME || "";
|
|
1687
|
+
let saTokenPath = join2(HOME3, ".ldm", "secrets", "op-sa-token");
|
|
1688
|
+
if (!existsSync2(saTokenPath)) {
|
|
1689
|
+
saTokenPath = join2(HOME3, ".openclaw", "secrets", "op-sa-token");
|
|
1690
|
+
}
|
|
1691
|
+
if (!existsSync2(saTokenPath)) return void 0;
|
|
1692
|
+
const saToken = readFileSync2(saTokenPath, "utf8").trim();
|
|
1693
|
+
return execSync2(`op read "op://Agent Secrets/${item}/${field}" 2>/dev/null`, {
|
|
1694
|
+
encoding: "utf8",
|
|
1695
|
+
env: { ...process.env, OP_SERVICE_ACCOUNT_TOKEN: saToken },
|
|
1696
|
+
timeout: 1e4
|
|
1697
|
+
}).trim() || void 0;
|
|
1698
|
+
} catch {
|
|
1699
|
+
return void 0;
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1702
|
+
|
|
1703
|
+
// src/crypto.ts
|
|
1704
|
+
import { readFileSync as readFileSync4, existsSync as existsSync4 } from "fs";
|
|
1705
|
+
import { createCipheriv, createDecipheriv, createHmac, randomBytes, hkdfSync } from "crypto";
|
|
1706
|
+
|
|
1707
|
+
// src/ldm.ts
|
|
1708
|
+
import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync, copyFileSync, chmodSync, readdirSync as readdirSync2 } from "fs";
|
|
1709
|
+
import { join as join3, dirname } from "path";
|
|
1710
|
+
import { execSync as execSync3 } from "child_process";
|
|
1711
|
+
import { fileURLToPath } from "url";
|
|
1712
|
+
var HOME = process.env.HOME || "";
|
|
1713
|
+
var LDM_ROOT = join3(HOME, ".ldm");
|
|
1714
|
+
function loadAgentConfig(id) {
|
|
1715
|
+
const cfgPath = join3(LDM_ROOT, "agents", id, "config.json");
|
|
1716
|
+
try {
|
|
1717
|
+
if (existsSync3(cfgPath)) return JSON.parse(readFileSync3(cfgPath, "utf-8"));
|
|
1718
|
+
} catch {
|
|
1719
|
+
}
|
|
1720
|
+
return null;
|
|
1721
|
+
}
|
|
1722
|
+
function getAgentId(harnessHint) {
|
|
1723
|
+
if (process.env.CRYSTAL_AGENT_ID) return process.env.CRYSTAL_AGENT_ID;
|
|
1724
|
+
const agentsDir = join3(LDM_ROOT, "agents");
|
|
1725
|
+
if (existsSync3(agentsDir)) {
|
|
1726
|
+
try {
|
|
1727
|
+
for (const d of readdirSync2(agentsDir)) {
|
|
1728
|
+
const cfg = loadAgentConfig(d);
|
|
1729
|
+
if (!cfg || !cfg.agentId) continue;
|
|
1730
|
+
if (!harnessHint) return cfg.agentId;
|
|
1731
|
+
if (harnessHint === "claude-code" && cfg.harness === "claude-code-cli") return cfg.agentId;
|
|
1732
|
+
if (harnessHint === "openclaw" && cfg.harness === "openclaw") return cfg.agentId;
|
|
1733
|
+
}
|
|
1734
|
+
} catch {
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
return harnessHint === "openclaw" ? "oc-lesa-mini" : "cc-mini";
|
|
1738
|
+
}
|
|
1739
|
+
function ldmPaths(agentId) {
|
|
1740
|
+
const id = agentId || getAgentId();
|
|
1741
|
+
const agentRoot = join3(LDM_ROOT, "agents", id);
|
|
1742
|
+
return {
|
|
1743
|
+
root: LDM_ROOT,
|
|
1744
|
+
bin: join3(LDM_ROOT, "bin"),
|
|
1745
|
+
secrets: join3(LDM_ROOT, "secrets"),
|
|
1746
|
+
state: join3(LDM_ROOT, "state"),
|
|
1747
|
+
config: join3(LDM_ROOT, "config.json"),
|
|
1748
|
+
crystalDb: join3(LDM_ROOT, "memory", "crystal.db"),
|
|
1749
|
+
crystalLance: join3(LDM_ROOT, "memory", "lance"),
|
|
1750
|
+
agentRoot,
|
|
1751
|
+
transcripts: join3(agentRoot, "memory", "transcripts"),
|
|
1752
|
+
sessions: join3(agentRoot, "memory", "sessions"),
|
|
1753
|
+
daily: join3(agentRoot, "memory", "daily"),
|
|
1754
|
+
journals: join3(agentRoot, "memory", "journals"),
|
|
1755
|
+
workspace: join3(agentRoot, "memory", "workspace")
|
|
1756
|
+
};
|
|
1757
|
+
}
|
|
1758
|
+
function loadConfig() {
|
|
1759
|
+
const configPath = join3(LDM_ROOT, "config.json");
|
|
1760
|
+
try {
|
|
1761
|
+
if (existsSync3(configPath)) {
|
|
1762
|
+
return JSON.parse(readFileSync3(configPath, "utf-8"));
|
|
1763
|
+
}
|
|
1764
|
+
} catch {
|
|
1765
|
+
}
|
|
1766
|
+
return null;
|
|
1767
|
+
}
|
|
1768
|
+
function saveConfig(config) {
|
|
1769
|
+
const configPath = join3(LDM_ROOT, "config.json");
|
|
1770
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n");
|
|
1771
|
+
}
|
|
1772
|
+
function scaffoldLdm(agentId) {
|
|
1773
|
+
const paths = ldmPaths(agentId);
|
|
1774
|
+
mkdirSync2(join3(paths.root, "memory"), { recursive: true });
|
|
1775
|
+
mkdirSync2(paths.crystalLance, { recursive: true });
|
|
1776
|
+
mkdirSync2(paths.bin, { recursive: true });
|
|
1777
|
+
mkdirSync2(paths.secrets, { recursive: true, mode: 448 });
|
|
1778
|
+
mkdirSync2(paths.state, { recursive: true });
|
|
1779
|
+
mkdirSync2(paths.transcripts, { recursive: true });
|
|
1780
|
+
mkdirSync2(paths.sessions, { recursive: true });
|
|
1781
|
+
mkdirSync2(paths.daily, { recursive: true });
|
|
1782
|
+
mkdirSync2(paths.journals, { recursive: true });
|
|
1783
|
+
mkdirSync2(paths.workspace, { recursive: true });
|
|
1784
|
+
const id = agentId || getAgentId();
|
|
1785
|
+
let config = loadConfig();
|
|
1786
|
+
if (!config) {
|
|
1787
|
+
config = {
|
|
1788
|
+
version: "1.0.0",
|
|
1789
|
+
agents: [id],
|
|
1790
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1791
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1792
|
+
};
|
|
1793
|
+
} else {
|
|
1794
|
+
if (!config.agents.includes(id)) {
|
|
1795
|
+
config.agents.push(id);
|
|
1796
|
+
}
|
|
1797
|
+
config.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1798
|
+
}
|
|
1799
|
+
saveConfig(config);
|
|
1800
|
+
return paths;
|
|
1801
|
+
}
|
|
1802
|
+
var LEGACY_OC_DIR = join3(HOME, ".openclaw");
|
|
1803
|
+
function resolveStatePath(filename) {
|
|
1804
|
+
const paths = ldmPaths();
|
|
1805
|
+
const ldmPath = join3(paths.state, filename);
|
|
1806
|
+
if (existsSync3(ldmPath)) return ldmPath;
|
|
1807
|
+
const legacyPath = join3(LEGACY_OC_DIR, "memory", filename);
|
|
1808
|
+
if (existsSync3(legacyPath)) return legacyPath;
|
|
1809
|
+
return ldmPath;
|
|
1810
|
+
}
|
|
1811
|
+
function stateWritePath(filename) {
|
|
1812
|
+
const paths = ldmPaths();
|
|
1813
|
+
const dir = paths.state;
|
|
1814
|
+
if (!existsSync3(dir)) mkdirSync2(dir, { recursive: true });
|
|
1815
|
+
return join3(dir, filename);
|
|
1816
|
+
}
|
|
1817
|
+
function resolveSecretPath(filename) {
|
|
1818
|
+
const paths = ldmPaths();
|
|
1819
|
+
const ldmPath = join3(paths.secrets, filename);
|
|
1820
|
+
if (existsSync3(ldmPath)) return ldmPath;
|
|
1821
|
+
const legacyPath = join3(LEGACY_OC_DIR, "secrets", filename);
|
|
1822
|
+
if (existsSync3(legacyPath)) return legacyPath;
|
|
1823
|
+
return ldmPath;
|
|
1824
|
+
}
|
|
1825
|
+
function ensureLdm(agentId) {
|
|
1826
|
+
const paths = ldmPaths(agentId);
|
|
1827
|
+
if (existsSync3(paths.transcripts) && existsSync3(paths.config)) {
|
|
1828
|
+
return paths;
|
|
1829
|
+
}
|
|
1830
|
+
return scaffoldLdm(agentId);
|
|
1831
|
+
}
|
|
1832
|
+
|
|
1833
|
+
// src/crypto.ts
|
|
1834
|
+
import { createHash as createHash2 } from "crypto";
|
|
1835
|
+
var KEY_PATH = process.env.CRYSTAL_RELAY_KEY_PATH || resolveSecretPath("crystal-relay-key");
|
|
1836
|
+
function loadRelayKey() {
|
|
1837
|
+
if (!existsSync4(KEY_PATH)) {
|
|
1838
|
+
throw new Error(
|
|
1839
|
+
`Relay key not found at ${KEY_PATH}
|
|
1840
|
+
Generate one: mkdir -p ~/.ldm/secrets && openssl rand -base64 32 > ~/.ldm/secrets/crystal-relay-key && chmod 600 ~/.ldm/secrets/crystal-relay-key
|
|
1841
|
+
Or run: crystal pair`
|
|
1842
|
+
);
|
|
1843
|
+
}
|
|
1844
|
+
const raw = readFileSync4(KEY_PATH, "utf-8").trim();
|
|
1845
|
+
const key = Buffer.from(raw, "base64");
|
|
1846
|
+
if (key.length !== 32) {
|
|
1847
|
+
throw new Error(`Relay key must be 32 bytes (256 bits). Got ${key.length} bytes. Regenerate with: openssl rand -base64 32`);
|
|
1848
|
+
}
|
|
1849
|
+
return key;
|
|
1850
|
+
}
|
|
1851
|
+
function deriveSigningKey(masterKey) {
|
|
1852
|
+
return Buffer.from(hkdfSync("sha256", masterKey, "", "crystal-relay-sign", 32));
|
|
1853
|
+
}
|
|
1854
|
+
function encrypt(plaintext, masterKey) {
|
|
1855
|
+
const nonce = randomBytes(12);
|
|
1856
|
+
const cipher = createCipheriv("aes-256-gcm", masterKey, nonce);
|
|
1857
|
+
const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]);
|
|
1858
|
+
const tag = cipher.getAuthTag();
|
|
1859
|
+
const signingKey = deriveSigningKey(masterKey);
|
|
1860
|
+
const hmacData = Buffer.concat([nonce, ciphertext, tag]);
|
|
1861
|
+
const hmac = createHmac("sha256", signingKey).update(hmacData).digest("hex");
|
|
1862
|
+
return {
|
|
1863
|
+
v: 1,
|
|
1864
|
+
nonce: nonce.toString("base64"),
|
|
1865
|
+
ciphertext: ciphertext.toString("base64"),
|
|
1866
|
+
tag: tag.toString("base64"),
|
|
1867
|
+
hmac
|
|
1868
|
+
};
|
|
1869
|
+
}
|
|
1870
|
+
function decrypt(payload, masterKey) {
|
|
1871
|
+
if (payload.v !== 1) {
|
|
1872
|
+
throw new Error(`Unknown payload version: ${payload.v}`);
|
|
1873
|
+
}
|
|
1874
|
+
const nonce = Buffer.from(payload.nonce, "base64");
|
|
1875
|
+
const ciphertext = Buffer.from(payload.ciphertext, "base64");
|
|
1876
|
+
const tag = Buffer.from(payload.tag, "base64");
|
|
1877
|
+
const signingKey = deriveSigningKey(masterKey);
|
|
1878
|
+
const hmacData = Buffer.concat([nonce, ciphertext, tag]);
|
|
1879
|
+
const expectedHmac = createHmac("sha256", signingKey).update(hmacData).digest("hex");
|
|
1880
|
+
if (payload.hmac !== expectedHmac) {
|
|
1881
|
+
throw new Error("HMAC verification failed \u2014 blob rejected (tampered or wrong key)");
|
|
1882
|
+
}
|
|
1883
|
+
const decipher = createDecipheriv("aes-256-gcm", masterKey, nonce);
|
|
1884
|
+
decipher.setAuthTag(tag);
|
|
1885
|
+
return Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
1886
|
+
}
|
|
1887
|
+
function encryptJSON(data, masterKey) {
|
|
1888
|
+
const plaintext = Buffer.from(JSON.stringify(data), "utf-8");
|
|
1889
|
+
return encrypt(plaintext, masterKey);
|
|
1890
|
+
}
|
|
1891
|
+
function decryptJSON(payload, masterKey) {
|
|
1892
|
+
const plaintext = decrypt(payload, masterKey);
|
|
1893
|
+
return JSON.parse(plaintext.toString("utf-8"));
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1896
|
+
// src/summarize.ts
|
|
1897
|
+
import { writeFileSync as writeFileSync2, existsSync as existsSync5, mkdirSync as mkdirSync3 } from "fs";
|
|
1898
|
+
import { join as join4 } from "path";
|
|
1899
|
+
import https2 from "https";
|
|
1900
|
+
import http2 from "http";
|
|
1901
|
+
var SUMMARY_MODE = process.env.CRYSTAL_SUMMARY_MODE || "simple";
|
|
1902
|
+
var SUMMARY_PROVIDER = process.env.CRYSTAL_SUMMARY_PROVIDER || "openai";
|
|
1903
|
+
var SUMMARY_MODEL = process.env.CRYSTAL_SUMMARY_MODEL || "gpt-4o-mini";
|
|
1904
|
+
function generateSimpleSummary(messages) {
|
|
1905
|
+
const firstUser = messages.find((m) => m.role === "user");
|
|
1906
|
+
const title = firstUser ? firstUser.text.slice(0, 80).replace(/\n/g, " ").trim() : "Untitled Session";
|
|
1907
|
+
const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
|
|
1908
|
+
const preview = messages.slice(0, 10).map((m) => {
|
|
1909
|
+
const roleLabel = m.role === "user" ? "User" : "Assistant";
|
|
1910
|
+
const snippet = m.text.slice(0, 200).replace(/\n/g, " ").trim();
|
|
1911
|
+
return `**${roleLabel}:** ${snippet}${m.text.length > 200 ? "..." : ""}`;
|
|
1912
|
+
}).join("\n\n");
|
|
1913
|
+
const date = messages[0]?.timestamp?.slice(0, 10) || (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
1914
|
+
return {
|
|
1915
|
+
title,
|
|
1916
|
+
slug,
|
|
1917
|
+
summary: preview,
|
|
1918
|
+
topics: [],
|
|
1919
|
+
messageCount: messages.length,
|
|
1920
|
+
date
|
|
1921
|
+
};
|
|
1922
|
+
}
|
|
1923
|
+
async function generateLlmSummary(messages) {
|
|
1924
|
+
const condensed = messages.slice(0, 30).map((m) => {
|
|
1925
|
+
const roleLabel = m.role === "user" ? "User" : "Assistant";
|
|
1926
|
+
const text = m.text.slice(0, 500);
|
|
1927
|
+
return `${roleLabel}: ${text}`;
|
|
1928
|
+
}).join("\n\n");
|
|
1929
|
+
const prompt = `Summarize this conversation. Return JSON only, no markdown fences.
|
|
1930
|
+
|
|
1931
|
+
Format:
|
|
1932
|
+
{"title": "short title", "slug": "url-safe-slug", "summary": "2-4 sentences", "topics": ["topic1", "topic2"]}
|
|
1933
|
+
|
|
1934
|
+
Conversation:
|
|
1935
|
+
${condensed}`;
|
|
1936
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
1937
|
+
if (!apiKey) {
|
|
1938
|
+
return generateSimpleSummary(messages);
|
|
1939
|
+
}
|
|
1940
|
+
try {
|
|
1941
|
+
const body = JSON.stringify({
|
|
1942
|
+
model: SUMMARY_MODEL,
|
|
1943
|
+
messages: [{ role: "user", content: prompt }],
|
|
1944
|
+
temperature: 0.3,
|
|
1945
|
+
max_tokens: 300
|
|
1946
|
+
});
|
|
1947
|
+
const result = await httpPost("https://api.openai.com/v1/chat/completions", body, {
|
|
1948
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
1949
|
+
"Content-Type": "application/json"
|
|
1950
|
+
});
|
|
1951
|
+
const parsed = JSON.parse(result);
|
|
1952
|
+
const content = parsed.choices?.[0]?.message?.content || "";
|
|
1953
|
+
const jsonStr = content.replace(/```json?\n?/g, "").replace(/```/g, "").trim();
|
|
1954
|
+
const data = JSON.parse(jsonStr);
|
|
1955
|
+
const date = messages[0]?.timestamp?.slice(0, 10) || (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
1956
|
+
return {
|
|
1957
|
+
title: data.title || "Untitled",
|
|
1958
|
+
slug: (data.slug || "untitled").slice(0, 50),
|
|
1959
|
+
summary: data.summary || "",
|
|
1960
|
+
topics: data.topics || [],
|
|
1961
|
+
messageCount: messages.length,
|
|
1962
|
+
date
|
|
1963
|
+
};
|
|
1964
|
+
} catch {
|
|
1965
|
+
return generateSimpleSummary(messages);
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
function httpPost(url, body, headers) {
|
|
1969
|
+
return new Promise((resolve, reject) => {
|
|
1970
|
+
const parsed = new URL(url);
|
|
1971
|
+
const client = parsed.protocol === "https:" ? https2 : http2;
|
|
1972
|
+
const req = client.request({
|
|
1973
|
+
hostname: parsed.hostname,
|
|
1974
|
+
port: parsed.port,
|
|
1975
|
+
path: parsed.pathname + parsed.search,
|
|
1976
|
+
method: "POST",
|
|
1977
|
+
headers: { ...headers, "Content-Length": Buffer.byteLength(body) },
|
|
1978
|
+
timeout: 3e4
|
|
1979
|
+
}, (res) => {
|
|
1980
|
+
let data = "";
|
|
1981
|
+
res.on("data", (chunk) => {
|
|
1982
|
+
data += chunk;
|
|
1983
|
+
});
|
|
1984
|
+
res.on("end", () => {
|
|
1985
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) {
|
|
1986
|
+
resolve(data);
|
|
1987
|
+
} else {
|
|
1988
|
+
reject(new Error(`HTTP ${res.statusCode}: ${data.slice(0, 200)}`));
|
|
1989
|
+
}
|
|
1990
|
+
});
|
|
1991
|
+
});
|
|
1992
|
+
req.on("error", reject);
|
|
1993
|
+
req.on("timeout", () => {
|
|
1994
|
+
req.destroy();
|
|
1995
|
+
reject(new Error("Request timeout"));
|
|
1996
|
+
});
|
|
1997
|
+
req.write(body);
|
|
1998
|
+
req.end();
|
|
1999
|
+
});
|
|
2000
|
+
}
|
|
2001
|
+
async function generateSessionSummary(messages) {
|
|
2002
|
+
if (SUMMARY_MODE === "llm") {
|
|
2003
|
+
return generateLlmSummary(messages);
|
|
2004
|
+
}
|
|
2005
|
+
return generateSimpleSummary(messages);
|
|
2006
|
+
}
|
|
2007
|
+
function formatSummaryMarkdown(summary, sessionId) {
|
|
2008
|
+
const lines = [];
|
|
2009
|
+
lines.push(`# ${summary.title}`);
|
|
2010
|
+
lines.push("");
|
|
2011
|
+
lines.push(`**Session:** ${sessionId} **Date:** ${summary.date} **Messages:** ${summary.messageCount}`);
|
|
2012
|
+
lines.push("");
|
|
2013
|
+
lines.push("## Summary");
|
|
2014
|
+
lines.push("");
|
|
2015
|
+
lines.push(summary.summary);
|
|
2016
|
+
if (summary.topics.length > 0) {
|
|
2017
|
+
lines.push("");
|
|
2018
|
+
lines.push("## Key Topics");
|
|
2019
|
+
lines.push("");
|
|
2020
|
+
for (const topic of summary.topics) {
|
|
2021
|
+
lines.push(`- ${topic}`);
|
|
2022
|
+
}
|
|
2023
|
+
}
|
|
2024
|
+
lines.push("");
|
|
2025
|
+
return lines.join("\n");
|
|
2026
|
+
}
|
|
2027
|
+
function writeSummaryFile(sessionsDir, summary, agentId, sessionId) {
|
|
2028
|
+
if (!existsSync5(sessionsDir)) mkdirSync3(sessionsDir, { recursive: true });
|
|
2029
|
+
const now = /* @__PURE__ */ new Date();
|
|
2030
|
+
const dateStr = now.toISOString().slice(0, 10);
|
|
2031
|
+
const timeStr = now.toISOString().slice(11, 19).replace(/:/g, "-");
|
|
2032
|
+
const filename = `${dateStr}--${timeStr}--${agentId}--${summary.slug}.md`;
|
|
2033
|
+
const filepath = join4(sessionsDir, filename);
|
|
2034
|
+
const content = formatSummaryMarkdown(summary, sessionId);
|
|
2035
|
+
writeFileSync2(filepath, content);
|
|
2036
|
+
return filepath;
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
// src/staging.ts
|
|
2040
|
+
import {
|
|
2041
|
+
existsSync as existsSync6,
|
|
2042
|
+
mkdirSync as mkdirSync4,
|
|
2043
|
+
readdirSync as readdirSync3,
|
|
2044
|
+
renameSync,
|
|
2045
|
+
unlinkSync,
|
|
2046
|
+
writeFileSync as writeFileSync3
|
|
2047
|
+
} from "fs";
|
|
2048
|
+
import { join as join5 } from "path";
|
|
2049
|
+
import { execSync as execSync4 } from "child_process";
|
|
2050
|
+
var HOME2 = process.env.HOME || "";
|
|
2051
|
+
var STAGING_ROOT = join5(HOME2, ".ldm", "staging");
|
|
2052
|
+
function stagingPaths(agentId) {
|
|
2053
|
+
const root = join5(STAGING_ROOT, agentId);
|
|
2054
|
+
return {
|
|
2055
|
+
root,
|
|
2056
|
+
transcripts: join5(root, "transcripts"),
|
|
2057
|
+
readyFile: join5(root, "READY")
|
|
2058
|
+
};
|
|
2059
|
+
}
|
|
2060
|
+
function ensureStaging(agentId) {
|
|
2061
|
+
const paths = stagingPaths(agentId);
|
|
2062
|
+
mkdirSync4(paths.transcripts, { recursive: true });
|
|
2063
|
+
return paths;
|
|
2064
|
+
}
|
|
2065
|
+
function markReady(agentId) {
|
|
2066
|
+
const paths = stagingPaths(agentId);
|
|
2067
|
+
writeFileSync3(paths.readyFile, JSON.stringify({
|
|
2068
|
+
markedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2069
|
+
agentId
|
|
2070
|
+
}));
|
|
2071
|
+
}
|
|
2072
|
+
function isNewAgent(agentId) {
|
|
2073
|
+
const paths = ldmPaths(agentId);
|
|
2074
|
+
return !existsSync6(paths.agentRoot);
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
// src/file-sync.ts
|
|
2078
|
+
import { createHash as createHash3 } from "crypto";
|
|
2079
|
+
import {
|
|
2080
|
+
existsSync as existsSync7,
|
|
2081
|
+
mkdirSync as mkdirSync5,
|
|
2082
|
+
readFileSync as readFileSync7,
|
|
2083
|
+
writeFileSync as writeFileSync4,
|
|
2084
|
+
unlinkSync as unlinkSync2,
|
|
2085
|
+
readdirSync as readdirSync4,
|
|
2086
|
+
statSync as statSync2
|
|
2087
|
+
} from "fs";
|
|
2088
|
+
import { join as join6, relative as relative2, dirname as dirname2 } from "path";
|
|
33
2089
|
var RELAY_URL = process.env.CRYSTAL_RELAY_URL || "";
|
|
34
2090
|
var RELAY_TOKEN = process.env.CRYSTAL_RELAY_TOKEN || "";
|
|
2091
|
+
var EXCLUDE_PATTERNS = [
|
|
2092
|
+
"memory/crystal.db",
|
|
2093
|
+
// DB syncs via delta chunks, not file copy
|
|
2094
|
+
"memory/crystal.db-wal",
|
|
2095
|
+
"memory/crystal.db-shm",
|
|
2096
|
+
"memory/crystal.db.bak",
|
|
2097
|
+
"memory/crystal.db.tmp",
|
|
2098
|
+
"memory/lance/",
|
|
2099
|
+
// LanceDB (deprecated, not synced)
|
|
2100
|
+
"state/",
|
|
2101
|
+
// Local state files (watermarks, etc.)
|
|
2102
|
+
"secrets/",
|
|
2103
|
+
// Encryption keys, tokens
|
|
2104
|
+
"staging/",
|
|
2105
|
+
// Staging pipeline (Core-only)
|
|
2106
|
+
"bin/",
|
|
2107
|
+
// Local scripts
|
|
2108
|
+
".DS_Store"
|
|
2109
|
+
];
|
|
2110
|
+
function shouldExclude(relativePath) {
|
|
2111
|
+
for (const pattern of EXCLUDE_PATTERNS) {
|
|
2112
|
+
if (relativePath === pattern || relativePath.startsWith(pattern)) return true;
|
|
2113
|
+
}
|
|
2114
|
+
const parts = relativePath.split("/");
|
|
2115
|
+
for (const part of parts) {
|
|
2116
|
+
if (part.startsWith(".") && part !== ".ldm") return true;
|
|
2117
|
+
}
|
|
2118
|
+
return false;
|
|
2119
|
+
}
|
|
2120
|
+
function scanDir(baseDir, currentDir, entries) {
|
|
2121
|
+
if (!existsSync7(currentDir)) return;
|
|
2122
|
+
const items = readdirSync4(currentDir);
|
|
2123
|
+
for (const item of items) {
|
|
2124
|
+
const fullPath = join6(currentDir, item);
|
|
2125
|
+
const relPath = relative2(baseDir, fullPath);
|
|
2126
|
+
if (shouldExclude(relPath)) continue;
|
|
2127
|
+
let stat;
|
|
2128
|
+
try {
|
|
2129
|
+
stat = statSync2(fullPath);
|
|
2130
|
+
} catch {
|
|
2131
|
+
continue;
|
|
2132
|
+
}
|
|
2133
|
+
if (stat.isDirectory()) {
|
|
2134
|
+
scanDir(baseDir, fullPath, entries);
|
|
2135
|
+
} else if (stat.isFile()) {
|
|
2136
|
+
if (stat.size > 50 * 1024 * 1024) continue;
|
|
2137
|
+
const content = readFileSync7(fullPath);
|
|
2138
|
+
const sha256 = createHash3("sha256").update(content).digest("hex");
|
|
2139
|
+
entries.push({ path: relPath, sha256, size: stat.size });
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
function generateManifest() {
|
|
2144
|
+
const paths = ldmPaths();
|
|
2145
|
+
const entries = [];
|
|
2146
|
+
scanDir(paths.root, paths.root, entries);
|
|
2147
|
+
return {
|
|
2148
|
+
version: 1,
|
|
2149
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2150
|
+
fileCount: entries.length,
|
|
2151
|
+
entries
|
|
2152
|
+
};
|
|
2153
|
+
}
|
|
2154
|
+
function loadFileSyncState() {
|
|
2155
|
+
const statePath = resolveStatePath("file-sync-state.json");
|
|
2156
|
+
try {
|
|
2157
|
+
if (existsSync7(statePath)) {
|
|
2158
|
+
return JSON.parse(readFileSync7(statePath, "utf-8"));
|
|
2159
|
+
}
|
|
2160
|
+
} catch {
|
|
2161
|
+
}
|
|
2162
|
+
return { lastSync: null, lastManifestHash: null, filesTransferred: 0, filesDeleted: 0 };
|
|
2163
|
+
}
|
|
2164
|
+
function saveFileSyncState(state) {
|
|
2165
|
+
const writePath = stateWritePath("file-sync-state.json");
|
|
2166
|
+
writeFileSync4(writePath, JSON.stringify(state, null, 2));
|
|
2167
|
+
}
|
|
2168
|
+
async function pushFileSync() {
|
|
2169
|
+
if (!RELAY_URL || !RELAY_TOKEN) {
|
|
2170
|
+
throw new Error("CRYSTAL_RELAY_URL and CRYSTAL_RELAY_TOKEN must be set");
|
|
2171
|
+
}
|
|
2172
|
+
const relayKey = loadRelayKey();
|
|
2173
|
+
const paths = ldmPaths();
|
|
2174
|
+
const manifest = generateManifest();
|
|
2175
|
+
const manifestJson = JSON.stringify(manifest.entries.map((e) => `${e.path}:${e.sha256}`));
|
|
2176
|
+
const manifestHash = createHash3("sha256").update(manifestJson).digest("hex");
|
|
2177
|
+
const state = loadFileSyncState();
|
|
2178
|
+
if (state.lastManifestHash === manifestHash) {
|
|
2179
|
+
return { manifest: 0, files: 0 };
|
|
2180
|
+
}
|
|
2181
|
+
const encryptedManifest = encryptJSON(manifest, relayKey);
|
|
2182
|
+
const manifestResp = await fetch(`${RELAY_URL}/drop/files`, {
|
|
2183
|
+
method: "POST",
|
|
2184
|
+
headers: {
|
|
2185
|
+
"Authorization": `Bearer ${RELAY_TOKEN}`,
|
|
2186
|
+
"Content-Type": "application/octet-stream",
|
|
2187
|
+
"X-File-Type": "manifest"
|
|
2188
|
+
},
|
|
2189
|
+
body: JSON.stringify(encryptedManifest)
|
|
2190
|
+
});
|
|
2191
|
+
if (!manifestResp.ok) {
|
|
2192
|
+
throw new Error(`Manifest push failed: ${manifestResp.status} ${await manifestResp.text()}`);
|
|
2193
|
+
}
|
|
2194
|
+
let filesPushed = 0;
|
|
2195
|
+
for (const entry of manifest.entries) {
|
|
2196
|
+
const fullPath = join6(paths.root, entry.path);
|
|
2197
|
+
if (!existsSync7(fullPath)) continue;
|
|
2198
|
+
const content = readFileSync7(fullPath);
|
|
2199
|
+
const encrypted = encrypt(content, relayKey);
|
|
2200
|
+
const filePayload = JSON.stringify({
|
|
2201
|
+
path: entry.path,
|
|
2202
|
+
sha256: entry.sha256,
|
|
2203
|
+
size: entry.size,
|
|
2204
|
+
data: encrypted
|
|
2205
|
+
});
|
|
2206
|
+
const fileResp = await fetch(`${RELAY_URL}/drop/files`, {
|
|
2207
|
+
method: "POST",
|
|
2208
|
+
headers: {
|
|
2209
|
+
"Authorization": `Bearer ${RELAY_TOKEN}`,
|
|
2210
|
+
"Content-Type": "application/octet-stream",
|
|
2211
|
+
"X-File-Type": "file"
|
|
2212
|
+
},
|
|
2213
|
+
body: filePayload
|
|
2214
|
+
});
|
|
2215
|
+
if (fileResp.ok) filesPushed++;
|
|
2216
|
+
}
|
|
2217
|
+
state.lastSync = (/* @__PURE__ */ new Date()).toISOString();
|
|
2218
|
+
state.lastManifestHash = manifestHash;
|
|
2219
|
+
state.filesTransferred += filesPushed;
|
|
2220
|
+
saveFileSyncState(state);
|
|
2221
|
+
return { manifest: manifest.fileCount, files: filesPushed };
|
|
2222
|
+
}
|
|
2223
|
+
|
|
2224
|
+
// src/poller.ts
|
|
2225
|
+
import { readFileSync as readFileSync8, writeFileSync as writeFileSync5, appendFileSync, existsSync as existsSync8, mkdirSync as mkdirSync6 } from "fs";
|
|
2226
|
+
import { join as join7 } from "path";
|
|
2227
|
+
import { createDecipheriv as createDecipheriv2 } from "crypto";
|
|
2228
|
+
var RELAY_URL2 = process.env.CRYSTAL_RELAY_URL || "";
|
|
2229
|
+
var RELAY_TOKEN2 = process.env.CRYSTAL_RELAY_TOKEN || "";
|
|
35
2230
|
var POLLER_STATE_PATH = resolveStatePath("relay-poller-state.json");
|
|
36
2231
|
function loadState() {
|
|
37
2232
|
try {
|
|
38
|
-
if (
|
|
39
|
-
const raw = JSON.parse(
|
|
2233
|
+
if (existsSync8(POLLER_STATE_PATH)) {
|
|
2234
|
+
const raw = JSON.parse(readFileSync8(POLLER_STATE_PATH, "utf-8"));
|
|
40
2235
|
return {
|
|
41
2236
|
lastPoll: raw.lastPoll ?? null,
|
|
42
2237
|
totalIngested: raw.totalIngested ?? 0,
|
|
@@ -52,13 +2247,13 @@ function loadState() {
|
|
|
52
2247
|
}
|
|
53
2248
|
function saveState(state) {
|
|
54
2249
|
const writePath = stateWritePath("relay-poller-state.json");
|
|
55
|
-
|
|
2250
|
+
writeFileSync5(writePath, JSON.stringify(state, null, 2));
|
|
56
2251
|
}
|
|
57
2252
|
async function pollConversations(crystal, relayKey) {
|
|
58
2253
|
let ingested = 0;
|
|
59
2254
|
let errors = 0;
|
|
60
|
-
const listResp = await fetch(`${
|
|
61
|
-
headers: { "Authorization": `Bearer ${
|
|
2255
|
+
const listResp = await fetch(`${RELAY_URL2}/pickup/conversations`, {
|
|
2256
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
62
2257
|
});
|
|
63
2258
|
if (!listResp.ok) {
|
|
64
2259
|
throw new Error(`Relay list failed: ${listResp.status} ${await listResp.text()}`);
|
|
@@ -69,8 +2264,8 @@ async function pollConversations(crystal, relayKey) {
|
|
|
69
2264
|
`);
|
|
70
2265
|
for (const blob of listData.blobs) {
|
|
71
2266
|
try {
|
|
72
|
-
const blobResp = await fetch(`${
|
|
73
|
-
headers: { "Authorization": `Bearer ${
|
|
2267
|
+
const blobResp = await fetch(`${RELAY_URL2}/pickup/conversations/${blob.id}`, {
|
|
2268
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
74
2269
|
});
|
|
75
2270
|
if (!blobResp.ok) {
|
|
76
2271
|
process.stderr.write(`[relay-poller] failed to fetch blob ${blob.id}: ${blobResp.status}
|
|
@@ -86,9 +2281,9 @@ async function pollConversations(crystal, relayKey) {
|
|
|
86
2281
|
} catch (err) {
|
|
87
2282
|
process.stderr.write(`[relay-poller] blob ${blob.id} failed verification: ${err.message} ... DISCARDED
|
|
88
2283
|
`);
|
|
89
|
-
await fetch(`${
|
|
2284
|
+
await fetch(`${RELAY_URL2}/confirm/conversations/${blob.id}`, {
|
|
90
2285
|
method: "DELETE",
|
|
91
|
-
headers: { "Authorization": `Bearer ${
|
|
2286
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
92
2287
|
});
|
|
93
2288
|
errors++;
|
|
94
2289
|
continue;
|
|
@@ -97,13 +2292,13 @@ async function pollConversations(crystal, relayKey) {
|
|
|
97
2292
|
process.stderr.write(`[relay-poller] new agent "${drop.agent_id}" detected, routing to staging
|
|
98
2293
|
`);
|
|
99
2294
|
const staging = ensureStaging(drop.agent_id);
|
|
100
|
-
const jsonlPath =
|
|
2295
|
+
const jsonlPath = join7(staging.transcripts, `relay-${blob.id}.jsonl`);
|
|
101
2296
|
const jsonlLines = drop.messages.map((m) => JSON.stringify(m)).join("\n") + "\n";
|
|
102
|
-
|
|
2297
|
+
writeFileSync5(jsonlPath, jsonlLines);
|
|
103
2298
|
markReady(drop.agent_id);
|
|
104
|
-
await fetch(`${
|
|
2299
|
+
await fetch(`${RELAY_URL2}/confirm/conversations/${blob.id}`, {
|
|
105
2300
|
method: "DELETE",
|
|
106
|
-
headers: { "Authorization": `Bearer ${
|
|
2301
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
107
2302
|
});
|
|
108
2303
|
process.stderr.write(`[relay-poller] staged ${drop.messages.length} messages for ${drop.agent_id}
|
|
109
2304
|
`);
|
|
@@ -138,17 +2333,17 @@ async function pollConversations(crystal, relayKey) {
|
|
|
138
2333
|
}
|
|
139
2334
|
const count = await crystal.ingest(chunks);
|
|
140
2335
|
ingested += count;
|
|
141
|
-
await fetch(`${
|
|
2336
|
+
await fetch(`${RELAY_URL2}/confirm/conversations/${blob.id}`, {
|
|
142
2337
|
method: "DELETE",
|
|
143
|
-
headers: { "Authorization": `Bearer ${
|
|
2338
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
144
2339
|
});
|
|
145
2340
|
process.stderr.write(`[relay-poller] blob ${blob.id}: ${count} chunks ingested from ${drop.agent_id}
|
|
146
2341
|
`);
|
|
147
2342
|
try {
|
|
148
2343
|
const remotePaths = ensureLdm(drop.agent_id);
|
|
149
|
-
const jsonlPath =
|
|
2344
|
+
const jsonlPath = join7(remotePaths.transcripts, `relay-${blob.id}.jsonl`);
|
|
150
2345
|
const jsonlLines = drop.messages.map((m) => JSON.stringify(m)).join("\n") + "\n";
|
|
151
|
-
|
|
2346
|
+
writeFileSync5(jsonlPath, jsonlLines);
|
|
152
2347
|
const summaryMsgs = drop.messages.map((m) => ({
|
|
153
2348
|
role: m.role,
|
|
154
2349
|
text: m.text,
|
|
@@ -181,8 +2376,8 @@ async function pollChatgpt(crystal, relayKey) {
|
|
|
181
2376
|
let ingested = 0;
|
|
182
2377
|
let attachments = 0;
|
|
183
2378
|
let errors = 0;
|
|
184
|
-
const listResp = await fetch(`${
|
|
185
|
-
headers: { "Authorization": `Bearer ${
|
|
2379
|
+
const listResp = await fetch(`${RELAY_URL2}/pickup/chatgpt`, {
|
|
2380
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
186
2381
|
});
|
|
187
2382
|
if (!listResp.ok) {
|
|
188
2383
|
process.stderr.write(`[relay-poller] chatgpt list failed: ${listResp.status}
|
|
@@ -196,8 +2391,8 @@ async function pollChatgpt(crystal, relayKey) {
|
|
|
196
2391
|
}
|
|
197
2392
|
for (const blob of listData.blobs) {
|
|
198
2393
|
try {
|
|
199
|
-
const blobResp = await fetch(`${
|
|
200
|
-
headers: { "Authorization": `Bearer ${
|
|
2394
|
+
const blobResp = await fetch(`${RELAY_URL2}/pickup/chatgpt/${blob.id}`, {
|
|
2395
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
201
2396
|
});
|
|
202
2397
|
if (!blobResp.ok) {
|
|
203
2398
|
process.stderr.write(`[relay-poller] chatgpt: failed to fetch ${blob.id}: ${blobResp.status}
|
|
@@ -256,8 +2451,8 @@ async function pollChatgpt(crystal, relayKey) {
|
|
|
256
2451
|
errors++;
|
|
257
2452
|
}
|
|
258
2453
|
}
|
|
259
|
-
const attResp = await fetch(`${
|
|
260
|
-
headers: { "Authorization": `Bearer ${
|
|
2454
|
+
const attResp = await fetch(`${RELAY_URL2}/pickup/chatgpt-attachments`, {
|
|
2455
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
261
2456
|
});
|
|
262
2457
|
if (attResp.ok) {
|
|
263
2458
|
const attData = await attResp.json();
|
|
@@ -314,7 +2509,7 @@ Result: ${tc.result}` : ""}`;
|
|
|
314
2509
|
const count = await crystal.ingest(chunks);
|
|
315
2510
|
try {
|
|
316
2511
|
const remotePaths = ensureLdm(drop.agent_id);
|
|
317
|
-
const jsonlPath =
|
|
2512
|
+
const jsonlPath = join7(remotePaths.transcripts, `cloud-${drop.timestamp.replace(/[:.]/g, "-")}.jsonl`);
|
|
318
2513
|
const line = JSON.stringify({
|
|
319
2514
|
type: "conversation",
|
|
320
2515
|
agent_id: drop.agent_id,
|
|
@@ -347,8 +2542,8 @@ async function fetchAndSaveAttachment(data, agentId, relayKey) {
|
|
|
347
2542
|
`);
|
|
348
2543
|
return false;
|
|
349
2544
|
}
|
|
350
|
-
const blobResp = await fetch(`${
|
|
351
|
-
headers: { "Authorization": `Bearer ${
|
|
2545
|
+
const blobResp = await fetch(`${RELAY_URL2}/pickup/chatgpt-attachments/${blobId}`, {
|
|
2546
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
352
2547
|
});
|
|
353
2548
|
if (!blobResp.ok) {
|
|
354
2549
|
process.stderr.write(`[relay-poller] chatgpt: failed to fetch attachment ${blobId}: ${blobResp.status}
|
|
@@ -361,17 +2556,17 @@ async function fetchAndSaveAttachment(data, agentId, relayKey) {
|
|
|
361
2556
|
const encryptedData = Buffer.from(encryptedPayload.data, "base64");
|
|
362
2557
|
const ciphertext = encryptedData.subarray(0, encryptedData.length - 16);
|
|
363
2558
|
const tag = encryptedData.subarray(encryptedData.length - 16);
|
|
364
|
-
const decipher =
|
|
2559
|
+
const decipher = createDecipheriv2("aes-256-gcm", relayKey, nonce);
|
|
365
2560
|
decipher.setAuthTag(tag);
|
|
366
2561
|
const decryptedBuf = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
367
2562
|
const remotePaths = ensureLdm(agentId);
|
|
368
|
-
const attachmentsDir =
|
|
369
|
-
if (!
|
|
2563
|
+
const attachmentsDir = join7(remotePaths.agentRoot, "memory", "attachments");
|
|
2564
|
+
if (!existsSync8(attachmentsDir)) mkdirSync6(attachmentsDir, { recursive: true });
|
|
370
2565
|
const safeFilename = data.filename.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
371
|
-
const filePath =
|
|
372
|
-
|
|
2566
|
+
const filePath = join7(attachmentsDir, `${blobId}-${safeFilename}`);
|
|
2567
|
+
writeFileSync5(filePath, decryptedBuf);
|
|
373
2568
|
const metaPath = filePath + ".meta.json";
|
|
374
|
-
|
|
2569
|
+
writeFileSync5(metaPath, JSON.stringify({
|
|
375
2570
|
filename: data.filename,
|
|
376
2571
|
mime_type: data.mime_type,
|
|
377
2572
|
size_bytes: data.size_bytes,
|
|
@@ -393,9 +2588,9 @@ async function fetchAndSaveAttachment(data, agentId, relayKey) {
|
|
|
393
2588
|
}
|
|
394
2589
|
}
|
|
395
2590
|
async function confirmBlob(channel, id) {
|
|
396
|
-
await fetch(`${
|
|
2591
|
+
await fetch(`${RELAY_URL2}/confirm/${channel}/${id}`, {
|
|
397
2592
|
method: "DELETE",
|
|
398
|
-
headers: { "Authorization": `Bearer ${
|
|
2593
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
399
2594
|
});
|
|
400
2595
|
}
|
|
401
2596
|
function appendDailyBreadcrumb(agentId, text) {
|
|
@@ -403,9 +2598,9 @@ function appendDailyBreadcrumb(agentId, text) {
|
|
|
403
2598
|
const remotePaths = ensureLdm(agentId);
|
|
404
2599
|
const now = /* @__PURE__ */ new Date();
|
|
405
2600
|
const dateStr = now.toISOString().slice(0, 10);
|
|
406
|
-
const dailyPath =
|
|
407
|
-
if (!
|
|
408
|
-
|
|
2601
|
+
const dailyPath = join7(remotePaths.daily, `${dateStr}.md`);
|
|
2602
|
+
if (!existsSync8(dailyPath)) {
|
|
2603
|
+
writeFileSync5(dailyPath, `# ${dateStr} - ${agentId} Daily Log (via relay)
|
|
409
2604
|
|
|
410
2605
|
`);
|
|
411
2606
|
}
|
|
@@ -418,7 +2613,7 @@ function appendDailyBreadcrumb(agentId, text) {
|
|
|
418
2613
|
}
|
|
419
2614
|
}
|
|
420
2615
|
async function pollOnce() {
|
|
421
|
-
if (!
|
|
2616
|
+
if (!RELAY_URL2 || !RELAY_TOKEN2) {
|
|
422
2617
|
throw new Error("CRYSTAL_RELAY_URL and CRYSTAL_RELAY_TOKEN must be set");
|
|
423
2618
|
}
|
|
424
2619
|
const relayKey = loadRelayKey();
|
|
@@ -436,8 +2631,8 @@ async function pollOnce() {
|
|
|
436
2631
|
}
|
|
437
2632
|
async function pollCommands() {
|
|
438
2633
|
const relayKey = loadRelayKey();
|
|
439
|
-
const listResp = await fetch(`${
|
|
440
|
-
headers: { "Authorization": `Bearer ${
|
|
2634
|
+
const listResp = await fetch(`${RELAY_URL2}/pickup/commands`, {
|
|
2635
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
441
2636
|
});
|
|
442
2637
|
if (!listResp.ok) return;
|
|
443
2638
|
const listData = await listResp.json();
|
|
@@ -446,8 +2641,8 @@ async function pollCommands() {
|
|
|
446
2641
|
`);
|
|
447
2642
|
for (const blob of listData.blobs) {
|
|
448
2643
|
try {
|
|
449
|
-
const blobResp = await fetch(`${
|
|
450
|
-
headers: { "Authorization": `Bearer ${
|
|
2644
|
+
const blobResp = await fetch(`${RELAY_URL2}/pickup/commands/${blob.id}`, {
|
|
2645
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
451
2646
|
});
|
|
452
2647
|
if (!blobResp.ok) continue;
|
|
453
2648
|
const encryptedText = await blobResp.text();
|
|
@@ -456,9 +2651,9 @@ async function pollCommands() {
|
|
|
456
2651
|
try {
|
|
457
2652
|
cmd = decryptJSON(encrypted, relayKey);
|
|
458
2653
|
} catch {
|
|
459
|
-
await fetch(`${
|
|
2654
|
+
await fetch(`${RELAY_URL2}/confirm/commands/${blob.id}`, {
|
|
460
2655
|
method: "DELETE",
|
|
461
|
-
headers: { "Authorization": `Bearer ${
|
|
2656
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
462
2657
|
});
|
|
463
2658
|
continue;
|
|
464
2659
|
}
|
|
@@ -484,9 +2679,9 @@ async function pollCommands() {
|
|
|
484
2679
|
process.stderr.write(`[relay-poller] gateway not available (non-fatal): ${err.message}
|
|
485
2680
|
`);
|
|
486
2681
|
}
|
|
487
|
-
await fetch(`${
|
|
2682
|
+
await fetch(`${RELAY_URL2}/confirm/commands/${blob.id}`, {
|
|
488
2683
|
method: "DELETE",
|
|
489
|
-
headers: { "Authorization": `Bearer ${
|
|
2684
|
+
headers: { "Authorization": `Bearer ${RELAY_TOKEN2}` }
|
|
490
2685
|
});
|
|
491
2686
|
} catch (err) {
|
|
492
2687
|
process.stderr.write(`[relay-poller] command processing error: ${err.message}
|
|
@@ -495,7 +2690,7 @@ async function pollCommands() {
|
|
|
495
2690
|
}
|
|
496
2691
|
}
|
|
497
2692
|
async function pushDelta(force) {
|
|
498
|
-
if (!
|
|
2693
|
+
if (!RELAY_URL2 || !RELAY_TOKEN2) {
|
|
499
2694
|
throw new Error("CRYSTAL_RELAY_URL and CRYSTAL_RELAY_TOKEN must be set");
|
|
500
2695
|
}
|
|
501
2696
|
const relayKey = loadRelayKey();
|
|
@@ -525,10 +2720,10 @@ async function pushDelta(force) {
|
|
|
525
2720
|
chunks
|
|
526
2721
|
};
|
|
527
2722
|
const encrypted = encryptJSON(deltaPayload, relayKey);
|
|
528
|
-
const resp = await fetch(`${
|
|
2723
|
+
const resp = await fetch(`${RELAY_URL2}/drop/mirror`, {
|
|
529
2724
|
method: "POST",
|
|
530
2725
|
headers: {
|
|
531
|
-
"Authorization": `Bearer ${
|
|
2726
|
+
"Authorization": `Bearer ${RELAY_TOKEN2}`,
|
|
532
2727
|
"Content-Type": "application/octet-stream"
|
|
533
2728
|
},
|
|
534
2729
|
body: JSON.stringify(encrypted)
|
|
@@ -548,9 +2743,9 @@ async function pushDelta(force) {
|
|
|
548
2743
|
var args = process.argv.slice(2);
|
|
549
2744
|
if (args.includes("--status")) {
|
|
550
2745
|
const state = loadState();
|
|
551
|
-
const mode =
|
|
2746
|
+
const mode = RELAY_URL2 && RELAY_TOKEN2 ? "configured" : "not configured";
|
|
552
2747
|
console.log(`Relay poller status:`);
|
|
553
|
-
console.log(` Relay URL: ${
|
|
2748
|
+
console.log(` Relay URL: ${RELAY_URL2 || "(not set)"}`);
|
|
554
2749
|
console.log(` Mode: ${mode}`);
|
|
555
2750
|
console.log(` Last poll: ${state.lastPoll || "never"}`);
|
|
556
2751
|
console.log(` Total ingested: ${state.totalIngested} (conversations)`);
|