@qearlyao/familiar 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/config.example.toml +1 -1
- package/dist/added-models.js +6 -15
- package/dist/agent-events.js +1 -3
- package/dist/agent.js +3 -4
- package/dist/browser-tools.js +12 -11
- package/dist/chat-log.js +3 -2
- package/dist/cli.js +2 -2
- package/dist/config-overrides.js +5 -14
- package/dist/config-registry.js +1 -4
- package/dist/config.js +45 -113
- package/dist/contact-note.js +2 -12
- package/dist/data-retention.js +1 -3
- package/dist/discord.js +2 -2
- package/dist/generated-media.js +3 -2
- package/dist/hot-reload.js +1 -3
- package/dist/image-gen.js +12 -51
- package/dist/inbound-attachments.js +53 -22
- package/dist/memory/diary/ambient-injector.js +1 -3
- package/dist/memory/diary/ambient.js +1 -3
- package/dist/memory/diary/chunks.js +1 -3
- package/dist/memory/diary/indexer.js +1 -3
- package/dist/memory/doctor.js +3 -8
- package/dist/memory/index/chunk-indexer.js +6 -2
- package/dist/memory/index/retrieval.js +1 -3
- package/dist/memory/index/store.js +47 -19
- package/dist/memory/lcm/backfill.js +19 -16
- package/dist/memory/lcm/context-transformer.js +12 -24
- package/dist/memory/lcm/context.js +10 -4
- package/dist/memory/lcm/eviction-score.js +25 -13
- package/dist/memory/lcm/indexer.js +1 -5
- package/dist/memory/lcm/normalize.js +22 -1
- package/dist/memory/lcm/store.js +27 -24
- package/dist/memory/operator.js +2 -4
- package/dist/memory/service.js +1 -3
- package/dist/memory/tools.js +0 -4
- package/dist/memory/util.js +6 -0
- package/dist/models.js +3 -0
- package/dist/persona.js +2 -14
- package/dist/runtime.js +2 -23
- package/dist/scheduler.js +15 -49
- package/dist/service.js +24 -14
- package/dist/settings.js +7 -32
- package/dist/tts.js +0 -6
- package/dist/util/fs.js +41 -0
- package/dist/util/guards.js +8 -0
- package/dist/util/image-mime.js +31 -0
- package/dist/util/time.js +29 -0
- package/dist/web-auth.js +4 -1
- package/dist/web-tools.js +8 -5
- package/dist/web.js +188 -62
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
- package/web/dist/assets/index-B23WT77N.js +63 -0
- package/web/dist/assets/index-D3MotFzN.css +2 -0
- package/web/dist/index.html +2 -2
- package/web/dist/assets/index-C-w9fjBf.js +0 -61
- package/web/dist/assets/index-CcQ13VAY.css +0 -2
package/dist/image-gen.js
CHANGED
|
@@ -1,21 +1,15 @@
|
|
|
1
1
|
import { randomUUID } from "node:crypto";
|
|
2
2
|
import { lstat, writeFile } from "node:fs/promises";
|
|
3
|
-
import { basename,
|
|
3
|
+
import { basename, isAbsolute, relative, resolve } from "node:path";
|
|
4
4
|
import { findEnvKeys, generateImages, getEnvApiKey, getImageModels, getImageProviders, } from "@earendil-works/pi-ai";
|
|
5
5
|
import { Type } from "typebox";
|
|
6
6
|
import { ensureGeneratedAttachmentsDir } from "./generated-media.js";
|
|
7
7
|
import { ensureInlineImageDerivative } from "./image-derivatives.js";
|
|
8
8
|
import { promptImagesFromAttachments } from "./inbound-attachments.js";
|
|
9
9
|
import { parseModelRef } from "./models.js";
|
|
10
|
+
import { imageMimeTypeFromPath, sniffImageMimeType } from "./util/image-mime.js";
|
|
10
11
|
const IMAGE_GEN_NOTICE_PREFIX = "Generated image attachment:";
|
|
11
12
|
const OPENROUTER_IMAGE_BASE_URL = "https://openrouter.ai/api/v1";
|
|
12
|
-
const IMAGE_MIME_BY_EXTENSION = {
|
|
13
|
-
".jpg": "image/jpeg",
|
|
14
|
-
".jpeg": "image/jpeg",
|
|
15
|
-
".png": "image/png",
|
|
16
|
-
".gif": "image/gif",
|
|
17
|
-
".webp": "image/webp",
|
|
18
|
-
};
|
|
19
13
|
const imageGenSchema = Type.Object({
|
|
20
14
|
prompt: Type.String({ description: "Image generation prompt." }),
|
|
21
15
|
referenceImages: Type.Optional(Type.Array(Type.String(), {
|
|
@@ -107,20 +101,6 @@ function textOutput(result) {
|
|
|
107
101
|
.filter(Boolean)
|
|
108
102
|
.join("\n");
|
|
109
103
|
}
|
|
110
|
-
function imageMimeTypeFromBytes(buffer) {
|
|
111
|
-
if (buffer.subarray(0, 3).equals(Buffer.from([0xff, 0xd8, 0xff])))
|
|
112
|
-
return "image/jpeg";
|
|
113
|
-
if (buffer.subarray(0, 8).equals(Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]))) {
|
|
114
|
-
return "image/png";
|
|
115
|
-
}
|
|
116
|
-
if (buffer.subarray(0, 6).toString("ascii") === "GIF87a" || buffer.subarray(0, 6).toString("ascii") === "GIF89a") {
|
|
117
|
-
return "image/gif";
|
|
118
|
-
}
|
|
119
|
-
if (buffer.subarray(0, 4).toString("ascii") === "RIFF" && buffer.subarray(8, 12).toString("ascii") === "WEBP") {
|
|
120
|
-
return "image/webp";
|
|
121
|
-
}
|
|
122
|
-
return undefined;
|
|
123
|
-
}
|
|
124
104
|
function recoveredImageFromBase64(value) {
|
|
125
105
|
const data = value.trim();
|
|
126
106
|
if (!/^[A-Za-z0-9+/]+={0,2}$/.test(data) || data.length % 4 !== 0)
|
|
@@ -128,7 +108,7 @@ function recoveredImageFromBase64(value) {
|
|
|
128
108
|
const buffer = Buffer.from(data, "base64");
|
|
129
109
|
if (!buffer.length)
|
|
130
110
|
return undefined;
|
|
131
|
-
const detectedMimeType =
|
|
111
|
+
const detectedMimeType = sniffImageMimeType(buffer);
|
|
132
112
|
if (!detectedMimeType)
|
|
133
113
|
return undefined;
|
|
134
114
|
return {
|
|
@@ -167,9 +147,6 @@ function normalizeCompatibleImageText(result) {
|
|
|
167
147
|
return result;
|
|
168
148
|
return { ...result, output };
|
|
169
149
|
}
|
|
170
|
-
function mimeTypeFromPath(path) {
|
|
171
|
-
return IMAGE_MIME_BY_EXTENSION[extname(path).toLowerCase()];
|
|
172
|
-
}
|
|
173
150
|
function resolveWorkspaceReferencePath(config, rawRef) {
|
|
174
151
|
const path = isAbsolute(rawRef) ? resolve(rawRef) : resolve(config.workspacePath, rawRef);
|
|
175
152
|
const workspaceRelative = relative(config.workspacePath, path);
|
|
@@ -190,7 +167,7 @@ async function collectWorkspaceReferenceImages(config, rawRef) {
|
|
|
190
167
|
}
|
|
191
168
|
if (!pathStat.isFile())
|
|
192
169
|
throw new Error(`Reference image path is not a file or folder: ${rawRef}`);
|
|
193
|
-
const mimeType =
|
|
170
|
+
const mimeType = imageMimeTypeFromPath(path);
|
|
194
171
|
if (!mimeType)
|
|
195
172
|
throw new Error(`Reference image path is not a supported image: ${rawRef}`);
|
|
196
173
|
return [
|
|
@@ -338,11 +315,7 @@ async function tryGenerateImages(config, ref, prompt, references, workspaceRefs,
|
|
|
338
315
|
}
|
|
339
316
|
function attemptDetails(model, result) {
|
|
340
317
|
return {
|
|
341
|
-
|
|
342
|
-
model: model.id,
|
|
343
|
-
api: model.api,
|
|
344
|
-
baseUrl: model.baseUrl,
|
|
345
|
-
...(result.responseId ? { responseId: result.responseId } : {}),
|
|
318
|
+
model: `${model.provider}/${model.id}`,
|
|
346
319
|
stopReason: result.stopReason,
|
|
347
320
|
...(result.errorMessage ? { errorMessage: result.errorMessage } : {}),
|
|
348
321
|
};
|
|
@@ -378,18 +351,8 @@ export function createImageGenTool(config, mediaSink, deps = {}) {
|
|
|
378
351
|
}
|
|
379
352
|
catch (error) {
|
|
380
353
|
const message = error instanceof Error ? error.message : String(error);
|
|
381
|
-
let baseUrl = "";
|
|
382
|
-
try {
|
|
383
|
-
baseUrl = resolveImageModel(config, ref).baseUrl;
|
|
384
|
-
}
|
|
385
|
-
catch {
|
|
386
|
-
baseUrl = "";
|
|
387
|
-
}
|
|
388
354
|
attempts.push({
|
|
389
|
-
|
|
390
|
-
model: ref.id,
|
|
391
|
-
api: config.imageGen.api,
|
|
392
|
-
baseUrl,
|
|
355
|
+
model: `${ref.provider}/${ref.id}`,
|
|
393
356
|
stopReason: "error",
|
|
394
357
|
errorMessage: message,
|
|
395
358
|
});
|
|
@@ -414,8 +377,10 @@ export function createImageGenTool(config, mediaSink, deps = {}) {
|
|
|
414
377
|
if (!selected)
|
|
415
378
|
throw new Error(`Image generation failed: ${selectedError}`);
|
|
416
379
|
const attachments = await writeGeneratedImages(config, mediaSink, selected.result);
|
|
380
|
+
const primaryAttachment = attachments[0];
|
|
417
381
|
const notices = attachments.map((attachment) => formatImageGenNotice(attachment.name));
|
|
418
382
|
const sideText = textOutput(selected.result);
|
|
383
|
+
const selectedAttempt = attempts.at(-1);
|
|
419
384
|
return {
|
|
420
385
|
content: [
|
|
421
386
|
{
|
|
@@ -424,15 +389,11 @@ export function createImageGenTool(config, mediaSink, deps = {}) {
|
|
|
424
389
|
},
|
|
425
390
|
],
|
|
426
391
|
details: {
|
|
427
|
-
|
|
428
|
-
model: selected.model.id,
|
|
429
|
-
api: selected.model.api,
|
|
430
|
-
baseUrl: selected.model.baseUrl,
|
|
431
|
-
prompt,
|
|
432
|
-
...(selected.result.responseId ? { responseId: selected.result.responseId } : {}),
|
|
392
|
+
model: `${selected.model.provider}/${selected.model.id}`,
|
|
433
393
|
...(sideText ? { textOutput: sideText } : {}),
|
|
434
|
-
|
|
435
|
-
|
|
394
|
+
...(primaryAttachment ? { id: primaryAttachment.id, localPath: primaryAttachment.localPath } : {}),
|
|
395
|
+
stopReason: selectedAttempt?.stopReason ?? selected.result.stopReason,
|
|
396
|
+
...(selectedAttempt?.errorMessage ? { errorMessage: selectedAttempt.errorMessage } : {}),
|
|
436
397
|
},
|
|
437
398
|
};
|
|
438
399
|
},
|
|
@@ -4,10 +4,13 @@ import { basename, extname, resolve } from "node:path";
|
|
|
4
4
|
import { attachmentsDir, publicAttachmentPath } from "./generated-media.js";
|
|
5
5
|
import { ensureInlineImageDerivative, MAX_INLINE_IMAGE_BASE64_BYTES } from "./image-derivatives.js";
|
|
6
6
|
import { deriveInboundAttachmentText } from "./media-understanding.js";
|
|
7
|
+
import { IMAGE_EXTENSION_BY_MIME, sniffImageMimeType } from "./util/image-mime.js";
|
|
7
8
|
export { MAX_INLINE_IMAGE_BASE64_BYTES } from "./image-derivatives.js";
|
|
8
9
|
export const MAX_INBOUND_ATTACHMENTS = 4;
|
|
9
10
|
export const MAX_INBOUND_ATTACHMENT_BYTES = 12 * 1024 * 1024;
|
|
10
11
|
export const MAX_INBOUND_TOTAL_BYTES = 24 * 1024 * 1024;
|
|
12
|
+
const TEXT_ATTACHMENT_PREVIEW_LINES = 2;
|
|
13
|
+
const TEXT_ATTACHMENT_PREVIEW_CHARS = 1000;
|
|
11
14
|
const ALLOWED_MIME_TYPES = new Set([
|
|
12
15
|
"image/jpeg",
|
|
13
16
|
"image/png",
|
|
@@ -23,10 +26,7 @@ const ALLOWED_MIME_TYPES = new Set([
|
|
|
23
26
|
"text/plain",
|
|
24
27
|
]);
|
|
25
28
|
const EXTENSIONS_BY_MIME = {
|
|
26
|
-
|
|
27
|
-
"image/png": ".png",
|
|
28
|
-
"image/gif": ".gif",
|
|
29
|
-
"image/webp": ".webp",
|
|
29
|
+
...IMAGE_EXTENSION_BY_MIME,
|
|
30
30
|
"audio/mpeg": ".mp3",
|
|
31
31
|
"audio/ogg": ".ogg",
|
|
32
32
|
"audio/wav": ".wav",
|
|
@@ -51,32 +51,42 @@ function kindFromMime(mimeType) {
|
|
|
51
51
|
return "video";
|
|
52
52
|
return "file";
|
|
53
53
|
}
|
|
54
|
+
function textAttachmentPreview(buffer, mimeType) {
|
|
55
|
+
if (mimeType !== "text/plain")
|
|
56
|
+
return undefined;
|
|
57
|
+
const decoded = new TextDecoder("utf-8", { fatal: false }).decode(buffer);
|
|
58
|
+
const normalized = decoded.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
|
|
59
|
+
if (!normalized || normalized.includes("\uFFFD"))
|
|
60
|
+
return undefined;
|
|
61
|
+
const lines = normalized.split("\n").slice(0, TEXT_ATTACHMENT_PREVIEW_LINES);
|
|
62
|
+
const preview = lines.join("\n").slice(0, TEXT_ATTACHMENT_PREVIEW_CHARS).trim();
|
|
63
|
+
return preview || undefined;
|
|
64
|
+
}
|
|
54
65
|
function sniffText(buffer) {
|
|
55
66
|
if (buffer.length === 0)
|
|
56
67
|
return "text/plain";
|
|
57
68
|
const head = buffer.subarray(0, Math.min(buffer.length, 512));
|
|
58
69
|
if (head.includes(0))
|
|
59
70
|
return undefined;
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
:
|
|
63
|
-
}
|
|
64
|
-
function sniffMimeType(buffer, declared) {
|
|
65
|
-
let detected;
|
|
66
|
-
if (buffer.subarray(0, 3).equals(Buffer.from([0xff, 0xd8, 0xff])))
|
|
67
|
-
detected = "image/jpeg";
|
|
68
|
-
else if (buffer.subarray(0, 8).equals(Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]))) {
|
|
69
|
-
detected = "image/png";
|
|
71
|
+
let decoded;
|
|
72
|
+
try {
|
|
73
|
+
decoded = new TextDecoder("utf-8", { fatal: true }).decode(head);
|
|
70
74
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
detected = "image/gif";
|
|
75
|
+
catch {
|
|
76
|
+
return undefined;
|
|
74
77
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
+
for (const char of decoded) {
|
|
79
|
+
const code = char.codePointAt(0) ?? 0;
|
|
80
|
+
if (code === 9 || code === 10 || code === 13)
|
|
81
|
+
continue;
|
|
82
|
+
if (code < 32 || code === 127)
|
|
83
|
+
return undefined;
|
|
78
84
|
}
|
|
79
|
-
|
|
85
|
+
return "text/plain";
|
|
86
|
+
}
|
|
87
|
+
function sniffMimeType(buffer, declared) {
|
|
88
|
+
let detected = sniffImageMimeType(buffer);
|
|
89
|
+
if (!detected && buffer.subarray(0, 4).toString("ascii") === "%PDF")
|
|
80
90
|
detected = "application/pdf";
|
|
81
91
|
else if (buffer.subarray(0, 3).toString("ascii") === "ID3" ||
|
|
82
92
|
buffer.subarray(0, 2).equals(Buffer.from([0xff, 0xfb]))) {
|
|
@@ -204,6 +214,18 @@ export async function materializeInboundAttachments(config, inputs) {
|
|
|
204
214
|
source: attachment.source,
|
|
205
215
|
sha256: attachment.sha256,
|
|
206
216
|
};
|
|
217
|
+
const textPreview = textAttachmentPreview(attachment.buffer, attachment.mimeType);
|
|
218
|
+
if (textPreview) {
|
|
219
|
+
finalAttachment.derived = {
|
|
220
|
+
...finalAttachment.derived,
|
|
221
|
+
text: {
|
|
222
|
+
provider: "local",
|
|
223
|
+
model: "text-preview",
|
|
224
|
+
label: "preview",
|
|
225
|
+
text: textPreview,
|
|
226
|
+
},
|
|
227
|
+
};
|
|
228
|
+
}
|
|
207
229
|
const derivedImage = await ensureInlineImageDerivative(config, finalAttachment);
|
|
208
230
|
if (derivedImage) {
|
|
209
231
|
if (derivedImage.localPath && !existingDerivedPaths.has(derivedImage.localPath)) {
|
|
@@ -260,7 +282,16 @@ export async function promptImagesFromAttachments(attachments) {
|
|
|
260
282
|
export function promptAttachmentNotes(attachments) {
|
|
261
283
|
return attachments
|
|
262
284
|
.map((attachment) => {
|
|
263
|
-
const attrs =
|
|
285
|
+
const attrs = [
|
|
286
|
+
`name="${attachment.name}"`,
|
|
287
|
+
`id="${attachment.id}"`,
|
|
288
|
+
`kind="${attachment.kind ?? "file"}"`,
|
|
289
|
+
`mime="${attachment.mimeType ?? "unknown"}"`,
|
|
290
|
+
`size="${attachment.size ?? "unknown"}"`,
|
|
291
|
+
attachment.localPath ? `path="${attachment.localPath}"` : undefined,
|
|
292
|
+
]
|
|
293
|
+
.filter(Boolean)
|
|
294
|
+
.join(" ");
|
|
264
295
|
const derivedText = attachment.derived?.text?.text;
|
|
265
296
|
if (derivedText) {
|
|
266
297
|
const label = attachment.derived?.text?.label || (attachment.kind === "audio" ? "transcription" : "summary");
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { positiveIntegerOrDefault } from "../util.js";
|
|
1
2
|
import { retrieveAmbientDiary } from "./ambient.js";
|
|
2
3
|
const INJECTED_MEMORY_OPEN = "<injected_memory>";
|
|
3
4
|
const INJECTED_MEMORY_CLOSE = "</injected_memory>";
|
|
@@ -64,9 +65,6 @@ export class AmbientDiaryInjector {
|
|
|
64
65
|
}
|
|
65
66
|
}
|
|
66
67
|
}
|
|
67
|
-
function positiveIntegerOrDefault(value, fallback) {
|
|
68
|
-
return value !== undefined && Number.isInteger(value) && value > 0 ? value : fallback;
|
|
69
|
-
}
|
|
70
68
|
function nonNegativeIntegerOrDefault(value, fallback) {
|
|
71
69
|
return value !== undefined && Number.isInteger(value) && value >= 0 ? value : fallback;
|
|
72
70
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { retrieveMemory, } from "../index/retrieval.js";
|
|
2
|
+
import { positiveIntegerOrDefault } from "../util.js";
|
|
2
3
|
import { DIARY_CHUNK_CORPUS } from "./chunks.js";
|
|
3
4
|
const DEFAULT_LIMIT = 4;
|
|
4
5
|
const DEFAULT_CANDIDATE_MULTIPLIER = 5;
|
|
@@ -119,6 +120,3 @@ function normalizeUnit(value) {
|
|
|
119
120
|
const absolute = Math.abs(value);
|
|
120
121
|
return Math.max(0, Math.min(1, absolute > 1 ? absolute / 10 : absolute));
|
|
121
122
|
}
|
|
122
|
-
function positiveIntegerOrDefault(value, fallback) {
|
|
123
|
-
return value !== undefined && Number.isInteger(value) && value > 0 ? value : fallback;
|
|
124
|
-
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { basename } from "node:path";
|
|
2
|
+
import { positiveIntegerOrDefault } from "../util.js";
|
|
2
3
|
export const DIARY_CHUNK_CORPUS = "diary_chunk";
|
|
3
4
|
const DEFAULT_MAX_CHARS = 2400;
|
|
4
5
|
const DIARY_DATE_RE = /^(\d{4}-\d{2}-\d{2})\.md$/;
|
|
@@ -226,6 +227,3 @@ function stripInlineMarkdown(value) {
|
|
|
226
227
|
function isMarkdownHeading(line) {
|
|
227
228
|
return /^#{1,6}\s+/.test(line);
|
|
228
229
|
}
|
|
229
|
-
function positiveIntegerOrDefault(value, fallback) {
|
|
230
|
-
return value !== undefined && Number.isInteger(value) && value > 0 ? value : fallback;
|
|
231
|
-
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { readdir, readFile, stat } from "node:fs/promises";
|
|
2
2
|
import { basename, isAbsolute, join, resolve } from "node:path";
|
|
3
|
+
import { isEnoent } from "../../util/fs.js";
|
|
3
4
|
import { DIARY_CHUNK_CORPUS, indexDiaryMarkdown } from "./chunks.js";
|
|
4
5
|
export const DIARY_INDEX_FILE_RE = /^\d{4}-\d{2}-\d{2}\.md$/;
|
|
5
6
|
export function isDatedDiaryMarkdownFile(path) {
|
|
@@ -22,9 +23,6 @@ export async function listDiaryMarkdownFiles(config) {
|
|
|
22
23
|
.map((entry) => join(config.memory.diariesDir, entry.name))
|
|
23
24
|
.sort();
|
|
24
25
|
}
|
|
25
|
-
function isEnoent(error) {
|
|
26
|
-
return !!error && typeof error === "object" && "code" in error && error.code === "ENOENT";
|
|
27
|
-
}
|
|
28
26
|
export async function indexDiaryFile(options) {
|
|
29
27
|
const path = resolveDiaryPath(options.config, options.path);
|
|
30
28
|
const sourceId = basename(path);
|
package/dist/memory/doctor.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { runInTransaction } from "./util.js";
|
|
1
2
|
export function runDoctor(stores, opts = {}) {
|
|
2
3
|
void opts;
|
|
3
4
|
const findings = [];
|
|
@@ -32,10 +33,7 @@ export function applyDoctorFixes(stores, report) {
|
|
|
32
33
|
fixed += before;
|
|
33
34
|
}
|
|
34
35
|
};
|
|
35
|
-
|
|
36
|
-
runIndexFixes();
|
|
37
|
-
else
|
|
38
|
-
stores.index.db.transaction(runIndexFixes).immediate();
|
|
36
|
+
runInTransaction(stores.index.db, runIndexFixes);
|
|
39
37
|
const runLcmFixes = () => {
|
|
40
38
|
fixed += stores.lcm.db
|
|
41
39
|
.prepare(`DELETE FROM lcm_segments
|
|
@@ -63,10 +61,7 @@ export function applyDoctorFixes(stores, report) {
|
|
|
63
61
|
}
|
|
64
62
|
}
|
|
65
63
|
};
|
|
66
|
-
|
|
67
|
-
runLcmFixes();
|
|
68
|
-
else
|
|
69
|
-
stores.lcm.db.transaction(runLcmFixes).immediate();
|
|
64
|
+
runInTransaction(stores.lcm.db, runLcmFixes);
|
|
70
65
|
if (report.findings.some((finding) => finding.kind === "summary_fk_violation")) {
|
|
71
66
|
warnings.push("summary FK violations were not modified; inspect LCM summary lineage manually");
|
|
72
67
|
}
|
|
@@ -99,10 +99,13 @@ export class ChunkIndexer {
|
|
|
99
99
|
const toInsert = [];
|
|
100
100
|
const insertPositions = [];
|
|
101
101
|
const existingMappings = [];
|
|
102
|
+
const existingMappingIds = new Map();
|
|
103
|
+
const insertKnownMissing = new Set();
|
|
102
104
|
for (let resultIndex = 0; resultIndex < prepared.length; resultIndex++) {
|
|
103
105
|
const item = prepared[resultIndex];
|
|
104
106
|
if (item.existingId !== null) {
|
|
105
107
|
ids[resultIndex] = item.existingId;
|
|
108
|
+
existingMappingIds.set(item.contentHash, item.existingId);
|
|
106
109
|
existingMappings.push({
|
|
107
110
|
corpus: item.input.corpus,
|
|
108
111
|
sourceId: item.sourceId,
|
|
@@ -120,6 +123,7 @@ export class ChunkIndexer {
|
|
|
120
123
|
if (!embedding)
|
|
121
124
|
throw new Error("Missing embedding for memory chunk");
|
|
122
125
|
insertPositions.push(resultIndex);
|
|
126
|
+
insertKnownMissing.add(item.contentHash);
|
|
123
127
|
toInsert.push({
|
|
124
128
|
corpus: item.input.corpus,
|
|
125
129
|
sourceId: item.sourceId,
|
|
@@ -137,8 +141,8 @@ export class ChunkIndexer {
|
|
|
137
141
|
if (replaceSource) {
|
|
138
142
|
this.store.deleteBySourceExceptMappings(replaceSource.corpus, replaceSource.sourceId, replaceSource.keepMappings);
|
|
139
143
|
}
|
|
140
|
-
this.store.recordSourceMappings(existingMappings);
|
|
141
|
-
insertedIds = this.store.insertChunks(toInsert);
|
|
144
|
+
this.store.recordSourceMappings(existingMappings, existingMappingIds);
|
|
145
|
+
insertedIds = this.store.insertChunks(toInsert, undefined, insertKnownMissing);
|
|
142
146
|
};
|
|
143
147
|
if (replaceSource && !this.store.db.inTransaction)
|
|
144
148
|
this.store.db.transaction(writeChunks).immediate();
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { positiveIntegerOrDefault } from "../util.js";
|
|
1
2
|
const DEFAULT_LIMIT = 8;
|
|
2
3
|
const DEFAULT_CANDIDATE_MULTIPLIER = 4;
|
|
3
4
|
const RRF_K = 60;
|
|
@@ -241,6 +242,3 @@ function chunkSources(chunk) {
|
|
|
241
242
|
function uniqueStrings(values) {
|
|
242
243
|
return Array.from(new Set(values?.filter((value) => value.trim()) ?? []));
|
|
243
244
|
}
|
|
244
|
-
function positiveIntegerOrDefault(value, fallback) {
|
|
245
|
-
return value !== undefined && Number.isInteger(value) && value > 0 ? value : fallback;
|
|
246
|
-
}
|
|
@@ -55,25 +55,31 @@ export class MemoryIndexStore {
|
|
|
55
55
|
insertChunk(input) {
|
|
56
56
|
return this.insertChunks([input])[0];
|
|
57
57
|
}
|
|
58
|
-
insertChunks(inputs) {
|
|
58
|
+
insertChunks(inputs, preloadedIds, knownMissingHashes) {
|
|
59
59
|
if (inputs.length === 0)
|
|
60
60
|
return [];
|
|
61
61
|
const rows = inputs.map((input) => this.normalizeInput(input));
|
|
62
|
+
const knownIds = new Map(preloadedIds);
|
|
62
63
|
const out = [];
|
|
63
64
|
const insert = this.db.transaction((items) => {
|
|
64
65
|
for (const item of items)
|
|
65
|
-
out.push(this.insertNormalized(item));
|
|
66
|
+
out.push(this.insertNormalized(item, knownIds, knownMissingHashes));
|
|
66
67
|
});
|
|
67
68
|
insert.immediate(rows);
|
|
68
69
|
return out;
|
|
69
70
|
}
|
|
70
|
-
recordSourceMappings(inputs) {
|
|
71
|
+
recordSourceMappings(inputs, preloadedIds) {
|
|
71
72
|
if (inputs.length === 0)
|
|
72
73
|
return;
|
|
73
74
|
const rows = inputs.map((input) => this.normalizeInput(input));
|
|
74
75
|
this.db
|
|
75
76
|
.transaction((items) => {
|
|
76
77
|
for (const item of items) {
|
|
78
|
+
const preloadedId = preloadedIds?.get(item.contentHash);
|
|
79
|
+
if (preloadedId !== undefined) {
|
|
80
|
+
this.insertSourceMapping(preloadedId, item);
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
77
83
|
const existing = this.db
|
|
78
84
|
.prepare("SELECT id FROM memory_chunks WHERE content_hash = ?")
|
|
79
85
|
.get(item.contentHash);
|
|
@@ -88,8 +94,9 @@ export class MemoryIndexStore {
|
|
|
88
94
|
const out = [];
|
|
89
95
|
const replace = this.db.transaction(() => {
|
|
90
96
|
this.deleteBySourceInternal(corpus, sourceId);
|
|
97
|
+
const knownIds = new Map();
|
|
91
98
|
for (const item of rows)
|
|
92
|
-
out.push(this.insertNormalized(item));
|
|
99
|
+
out.push(this.insertNormalized(item, knownIds));
|
|
93
100
|
});
|
|
94
101
|
replace.immediate();
|
|
95
102
|
return out;
|
|
@@ -168,19 +175,19 @@ export class MemoryIndexStore {
|
|
|
168
175
|
return rows.map((row) => ({ id: row.id, score: row.score, chunk: rowToChunk(row) }));
|
|
169
176
|
}
|
|
170
177
|
searchSemanticLinear(query, normalized) {
|
|
171
|
-
const
|
|
172
|
-
.prepare(normalized.corpus
|
|
178
|
+
const stmt = this.db.prepare(normalized.corpus
|
|
173
179
|
? `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c WHERE c.corpus = ?`
|
|
174
|
-
: `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c`)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
180
|
+
: `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c`);
|
|
181
|
+
const best = [];
|
|
182
|
+
for (const row of stmt.iterate(...(normalized.corpus ? [normalized.corpus] : []))) {
|
|
183
|
+
const hit = {
|
|
184
|
+
id: row.id,
|
|
185
|
+
score: cosineDistance(query, decodeVector(row.embedding, row.embedding_dimensions)),
|
|
186
|
+
chunk: rowToChunk(row),
|
|
187
|
+
};
|
|
188
|
+
insertBoundedHit(best, hit, normalized.limit);
|
|
189
|
+
}
|
|
190
|
+
return best;
|
|
184
191
|
}
|
|
185
192
|
deleteChunk(id) {
|
|
186
193
|
const remove = this.db.transaction(() => {
|
|
@@ -243,6 +250,7 @@ export class MemoryIndexStore {
|
|
|
243
250
|
this.deleteBySource(corpus, sourceId);
|
|
244
251
|
return;
|
|
245
252
|
}
|
|
253
|
+
const keptHashes = new Set(kept.map((item) => item.contentHash));
|
|
246
254
|
this.db
|
|
247
255
|
.transaction(() => {
|
|
248
256
|
const rows = this.db
|
|
@@ -259,7 +267,8 @@ export class MemoryIndexStore {
|
|
|
259
267
|
this.db
|
|
260
268
|
.prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ? AND chunk_index = ?")
|
|
261
269
|
.run(corpus, sourceId, row.chunk_index);
|
|
262
|
-
|
|
270
|
+
if (!keptHashes.has(row.content_hash))
|
|
271
|
+
this.deleteOrphanChunk(row.id);
|
|
263
272
|
}
|
|
264
273
|
})
|
|
265
274
|
.immediate();
|
|
@@ -362,9 +371,15 @@ export class MemoryIndexStore {
|
|
|
362
371
|
}),
|
|
363
372
|
};
|
|
364
373
|
}
|
|
365
|
-
insertNormalized(item) {
|
|
366
|
-
const
|
|
374
|
+
insertNormalized(item, knownIds, knownMissingHashes) {
|
|
375
|
+
const knownId = knownIds.get(item.contentHash);
|
|
376
|
+
const existing = knownId !== undefined
|
|
377
|
+
? { id: knownId }
|
|
378
|
+
: knownMissingHashes?.has(item.contentHash)
|
|
379
|
+
? undefined
|
|
380
|
+
: this.db.prepare("SELECT id FROM memory_chunks WHERE content_hash = ?").get(item.contentHash);
|
|
367
381
|
if (existing) {
|
|
382
|
+
knownIds.set(item.contentHash, existing.id);
|
|
368
383
|
this.insertSourceMapping(existing.id, item);
|
|
369
384
|
return existing.id;
|
|
370
385
|
}
|
|
@@ -383,6 +398,7 @@ export class MemoryIndexStore {
|
|
|
383
398
|
.prepare("INSERT INTO memory_vec(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)")
|
|
384
399
|
.run(id, encodeVector(item.embedding));
|
|
385
400
|
}
|
|
401
|
+
knownIds.set(item.contentHash, id);
|
|
386
402
|
this.insertSourceMapping(id, item);
|
|
387
403
|
return id;
|
|
388
404
|
}
|
|
@@ -434,6 +450,18 @@ function normalizeSearchOptions(options) {
|
|
|
434
450
|
corpus: options.corpus,
|
|
435
451
|
};
|
|
436
452
|
}
|
|
453
|
+
function insertBoundedHit(best, hit, limit) {
|
|
454
|
+
if (limit <= 0)
|
|
455
|
+
return;
|
|
456
|
+
let index = best.findIndex((candidate) => hit.score < candidate.score);
|
|
457
|
+
if (index < 0)
|
|
458
|
+
index = best.length;
|
|
459
|
+
if (index >= limit)
|
|
460
|
+
return;
|
|
461
|
+
best.splice(index, 0, hit);
|
|
462
|
+
if (best.length > limit)
|
|
463
|
+
best.length = limit;
|
|
464
|
+
}
|
|
437
465
|
function sourcesJsonSelect(chunkIdExpr) {
|
|
438
466
|
return `(SELECT json_group_array(json_object(
|
|
439
467
|
'corpus', s.corpus,
|
|
@@ -2,7 +2,6 @@ import { readdir, readFile } from "node:fs/promises";
|
|
|
2
2
|
import { relative, resolve } from "node:path";
|
|
3
3
|
import { indexLcmRecords } from "./indexer.js";
|
|
4
4
|
import { normalizeChatRecords } from "./normalize.js";
|
|
5
|
-
import { computeLcmRecordKey } from "./store.js";
|
|
6
5
|
const DEFAULT_YIELD_EVERY_N = 1024;
|
|
7
6
|
const INDEX_BATCH_SIZE = 32;
|
|
8
7
|
export async function backfillFromChatLogs(deps, options) {
|
|
@@ -66,15 +65,12 @@ export async function backfillFromChatLogs(deps, options) {
|
|
|
66
65
|
}
|
|
67
66
|
const inserted = [];
|
|
68
67
|
for (const record of batch.records) {
|
|
69
|
-
|
|
68
|
+
const result = deps.lcmStore.insertRecordReturningStored(record);
|
|
69
|
+
if (!result.inserted) {
|
|
70
70
|
report.recordsSkippedDuplicate += 1;
|
|
71
71
|
continue;
|
|
72
72
|
}
|
|
73
|
-
|
|
74
|
-
const stored = deps.lcmStore.getRecord(id);
|
|
75
|
-
if (!stored)
|
|
76
|
-
throw new Error(`Failed to read backfilled LCM record: ${id}`);
|
|
77
|
-
inserted.push(stored);
|
|
73
|
+
inserted.push(result.record);
|
|
78
74
|
report.recordsInserted += 1;
|
|
79
75
|
if (inserted.length >= INDEX_BATCH_SIZE) {
|
|
80
76
|
report.indexedChunks += (await indexLcmRecords({ indexer: deps.indexer, records: inserted, signal: options.signal })).ids.length;
|
|
@@ -225,17 +221,24 @@ function countMissingSegments(lcmStore, segmentIds) {
|
|
|
225
221
|
return missing;
|
|
226
222
|
}
|
|
227
223
|
function countExistingRecords(lcmStore, records) {
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
224
|
+
if (records.length === 0)
|
|
225
|
+
return 0;
|
|
226
|
+
const keys = records.map((record) => lcmStore.computeRecordKey(record));
|
|
227
|
+
const existingKeys = new Set();
|
|
228
|
+
for (const chunk of chunks([...new Set(keys)], 256)) {
|
|
229
|
+
const rows = lcmStore.db
|
|
230
|
+
.prepare(`SELECT record_key FROM lcm_records WHERE record_key IN (${chunk.map(() => "?").join(",")})`)
|
|
231
|
+
.all(...chunk);
|
|
232
|
+
for (const row of rows)
|
|
233
|
+
existingKeys.add(row.record_key);
|
|
232
234
|
}
|
|
233
|
-
return
|
|
235
|
+
return keys.reduce((total, key) => total + (existingKeys.has(key) ? 1 : 0), 0);
|
|
234
236
|
}
|
|
235
|
-
function
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
.
|
|
237
|
+
function chunks(items, size) {
|
|
238
|
+
const out = [];
|
|
239
|
+
for (let index = 0; index < items.length; index += size)
|
|
240
|
+
out.push(items.slice(index, index + size));
|
|
241
|
+
return out;
|
|
239
242
|
}
|
|
240
243
|
function errorCode(error) {
|
|
241
244
|
return error && typeof error === "object" && "code" in error
|