atlas-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +32 -0
- package/README.md +282 -0
- package/package.json +72 -0
- package/public/app/assets/app-CxbS1w9p.js +3981 -0
- package/public/app/assets/index-BA6nxCuI.css +1 -0
- package/public/app/assets/index-BXmIRrQH.js +177 -0
- package/public/app/index.html +27 -0
- package/public/assets/brain-atlas.LICENSE.txt +16 -0
- package/public/assets/brain-atlas.glb +0 -0
- package/public/assets/brain.obj +27282 -0
- package/public/fonts/DepartureMono-Regular.woff +0 -0
- package/public/fonts/DepartureMono-Regular.woff2 +0 -0
- package/scripts/sync-memory-vectors.js +46 -0
- package/src/audit.js +9 -0
- package/src/cli/args.js +87 -0
- package/src/cli/commands/add.js +103 -0
- package/src/cli/commands/config.js +228 -0
- package/src/cli/commands/delete.js +75 -0
- package/src/cli/commands/entities.js +39 -0
- package/src/cli/commands/entity.js +47 -0
- package/src/cli/commands/get.js +46 -0
- package/src/cli/commands/list.js +53 -0
- package/src/cli/commands/related.js +56 -0
- package/src/cli/commands/search.js +68 -0
- package/src/cli/commands/update.js +58 -0
- package/src/cli/deps.js +114 -0
- package/src/cli/env-file.js +44 -0
- package/src/cli/format.js +246 -0
- package/src/cli.js +187 -0
- package/src/cognitive-worker.js +381 -0
- package/src/db.js +2674 -0
- package/src/extraction-context.js +31 -0
- package/src/ingestion-service.js +387 -0
- package/src/ingestion-worker.js +225 -0
- package/src/llm-config.js +31 -0
- package/src/llm.js +789 -0
- package/src/logger.js +51 -0
- package/src/mcp-server.js +577 -0
- package/src/memory-comparison.js +421 -0
- package/src/related-memories.js +232 -0
- package/src/run-cognitive-worker.js +12 -0
- package/src/run-ingestion-worker.js +13 -0
- package/src/run-vector-worker.js +12 -0
- package/src/schemas.js +413 -0
- package/src/semantic-validation.js +430 -0
- package/src/server.js +827 -0
- package/src/shared/brain-regions.js +61 -0
- package/src/shared/entity-lens.js +249 -0
- package/src/shared/memory-placement.js +171 -0
- package/src/shared/memory-search.js +55 -0
- package/src/shared/region-anchors.js +112 -0
- package/src/shared/region-mapper.js +247 -0
- package/src/vector-store.js +546 -0
- package/src/vector-worker.js +71 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export const EXTRACTION_CONTEXT_LIMIT = 5;
|
|
2
|
+
export const EXTRACTION_CONTEXT_THRESHOLD = 0.7;
|
|
3
|
+
|
|
4
|
+
export async function retrieveExtractionContext(
|
|
5
|
+
text,
|
|
6
|
+
{ searchMemoryVectors, getEntitiesForMemory },
|
|
7
|
+
) {
|
|
8
|
+
const hits = await searchMemoryVectors(text, {
|
|
9
|
+
limit: EXTRACTION_CONTEXT_LIMIT,
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
const entities = hits
|
|
13
|
+
.filter(({ score }) => Number(score) >= EXTRACTION_CONTEXT_THRESHOLD)
|
|
14
|
+
.slice(0, EXTRACTION_CONTEXT_LIMIT)
|
|
15
|
+
.flatMap(({ id }) => getEntitiesForMemory?.(id) || [])
|
|
16
|
+
.map((entity) => ({
|
|
17
|
+
canonicalName: entity.canonical_name || entity.canonicalName,
|
|
18
|
+
aliases: [entity.mention].filter(Boolean),
|
|
19
|
+
kind: entity.kind,
|
|
20
|
+
}));
|
|
21
|
+
const unique = new Map();
|
|
22
|
+
for (const entity of entities) {
|
|
23
|
+
if (!entity.canonicalName || !entity.kind) continue;
|
|
24
|
+
const key = `${entity.kind}:${entity.canonicalName.toLocaleLowerCase()}`;
|
|
25
|
+
const current = unique.get(key);
|
|
26
|
+
unique.set(key, current
|
|
27
|
+
? { ...current, aliases: [...new Set([...current.aliases, ...entity.aliases])] }
|
|
28
|
+
: entity);
|
|
29
|
+
}
|
|
30
|
+
return { entities: [...unique.values()] };
|
|
31
|
+
}
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { retrieveExtractionContext } from "./extraction-context.js";
|
|
3
|
+
import {
|
|
4
|
+
COGNITIVE_ANNOTATION_SCHEMA_VERSION,
|
|
5
|
+
SEMANTIC_EXTRACTION_SCHEMA_VERSION,
|
|
6
|
+
} from "./schemas.js";
|
|
7
|
+
|
|
8
|
+
const DEFAULT_METADATA = Object.freeze({ confidence: 0.6, tags: [] });
|
|
9
|
+
|
|
10
|
+
export function createIngestionService(dependencies) {
|
|
11
|
+
for (const name of [
|
|
12
|
+
"createMemorySource", "updateMemorySourceStatus", "extractAtomicMemories",
|
|
13
|
+
"storeMemory", "updateMemoryGraph", "getMemory", "linkSourceMemory",
|
|
14
|
+
"enqueueAnnotationJob", "enqueueVectorIndexJob", "withTransaction",
|
|
15
|
+
]) {
|
|
16
|
+
if (typeof dependencies[name] !== "function") {
|
|
17
|
+
throw new Error(`Ingestion service is missing required dependency: ${name}`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function ingest({
|
|
22
|
+
text,
|
|
23
|
+
ingestionDate = new Date().toISOString(),
|
|
24
|
+
source = "ui",
|
|
25
|
+
metadata = {},
|
|
26
|
+
sourceId = randomUUID(),
|
|
27
|
+
}) {
|
|
28
|
+
const existing = dependencies.getMemorySource?.(sourceId);
|
|
29
|
+
if (existing) {
|
|
30
|
+
if (existing.text !== text) {
|
|
31
|
+
throw new Error(`Source ${sourceId} already exists with different text`);
|
|
32
|
+
}
|
|
33
|
+
if (existing.extraction_status === "completed") {
|
|
34
|
+
return completedSourceResult(existing);
|
|
35
|
+
}
|
|
36
|
+
} else {
|
|
37
|
+
dependencies.createMemorySource({
|
|
38
|
+
id: sourceId,
|
|
39
|
+
text,
|
|
40
|
+
source,
|
|
41
|
+
ingestionDate,
|
|
42
|
+
metadata,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
return processSource({ sourceId, text, ingestionDate, source, metadata });
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function enqueue({
|
|
49
|
+
text,
|
|
50
|
+
ingestionDate = new Date().toISOString(),
|
|
51
|
+
source = "ui",
|
|
52
|
+
metadata = {},
|
|
53
|
+
sourceId = randomUUID(),
|
|
54
|
+
}) {
|
|
55
|
+
if (typeof dependencies.enqueueIngestionJob !== "function") {
|
|
56
|
+
throw new Error("Ingestion service is missing required dependency: enqueueIngestionJob");
|
|
57
|
+
}
|
|
58
|
+
const existing = dependencies.getMemorySource?.(sourceId);
|
|
59
|
+
if (existing) {
|
|
60
|
+
if (existing.text !== text) {
|
|
61
|
+
throw new Error(`Source ${sourceId} already exists with different text`);
|
|
62
|
+
}
|
|
63
|
+
if (existing.extraction_status === "completed") {
|
|
64
|
+
return completedSourceResult(existing);
|
|
65
|
+
}
|
|
66
|
+
} else {
|
|
67
|
+
dependencies.createMemorySource({
|
|
68
|
+
id: sourceId,
|
|
69
|
+
text,
|
|
70
|
+
source,
|
|
71
|
+
ingestionDate,
|
|
72
|
+
metadata,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
dependencies.enqueueIngestionJob({ sourceId });
|
|
76
|
+
return { sourceId, status: "queued", memories: [] };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async function runIngestion(sourceId, { metadata = {} } = {}) {
|
|
80
|
+
const sourceRecord = dependencies.getMemorySource?.(sourceId);
|
|
81
|
+
if (!sourceRecord) throw new Error(`Source not found: ${sourceId}`);
|
|
82
|
+
return processSource({
|
|
83
|
+
sourceId,
|
|
84
|
+
text: sourceRecord.text,
|
|
85
|
+
ingestionDate: sourceRecord.ingestion_date,
|
|
86
|
+
source: sourceRecord.source,
|
|
87
|
+
metadata: { ...sourceRecord.metadata_json, ...metadata },
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function reprocess(sourceId, { metadata = {} } = {}) {
|
|
92
|
+
const sourceRecord = dependencies.getMemorySource(sourceId, {
|
|
93
|
+
includeRevisions: true,
|
|
94
|
+
});
|
|
95
|
+
if (!sourceRecord) throw new Error(`Source not found: ${sourceId}`);
|
|
96
|
+
const revision = sourceRecord.revisions?.[0];
|
|
97
|
+
return processSource({
|
|
98
|
+
sourceId,
|
|
99
|
+
sourceRevisionId: revision?.id ?? null,
|
|
100
|
+
text: revision?.text ?? sourceRecord.text,
|
|
101
|
+
ingestionDate: sourceRecord.ingestion_date,
|
|
102
|
+
source: sourceRecord.source,
|
|
103
|
+
metadata: { ...sourceRecord.metadata_json, ...metadata },
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async function processSource({
|
|
108
|
+
sourceId,
|
|
109
|
+
sourceRevisionId = null,
|
|
110
|
+
text,
|
|
111
|
+
ingestionDate,
|
|
112
|
+
source,
|
|
113
|
+
metadata,
|
|
114
|
+
}) {
|
|
115
|
+
const model = await resolveModel(dependencies);
|
|
116
|
+
dependencies.updateMemorySourceStatus(sourceId, "processing", {
|
|
117
|
+
incrementAttempts: true,
|
|
118
|
+
model,
|
|
119
|
+
schemaVersion: SEMANTIC_EXTRACTION_SCHEMA_VERSION,
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const canonicalizationContext = await optionalCanonicalizationContext(
|
|
124
|
+
text,
|
|
125
|
+
dependencies,
|
|
126
|
+
);
|
|
127
|
+
const semanticExtraction = await dependencies.extractAtomicMemories(
|
|
128
|
+
text,
|
|
129
|
+
ingestionDate,
|
|
130
|
+
canonicalizationContext,
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
const prepared = [];
|
|
134
|
+
for (const atom of semanticExtraction.memories) {
|
|
135
|
+
prepared.push(await prepareAtom({
|
|
136
|
+
atom,
|
|
137
|
+
source,
|
|
138
|
+
ingestionDate,
|
|
139
|
+
metadata,
|
|
140
|
+
model,
|
|
141
|
+
}));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const outcomes = dependencies.withTransaction(() => {
|
|
145
|
+
const persisted = prepared.map((item) => persistAtom({
|
|
146
|
+
...item,
|
|
147
|
+
sourceId,
|
|
148
|
+
sourceRevisionId,
|
|
149
|
+
source,
|
|
150
|
+
ingestionDate,
|
|
151
|
+
model,
|
|
152
|
+
}));
|
|
153
|
+
dependencies.updateMemorySourceStatus(sourceId, "completed", {
|
|
154
|
+
model,
|
|
155
|
+
schemaVersion: SEMANTIC_EXTRACTION_SCHEMA_VERSION,
|
|
156
|
+
});
|
|
157
|
+
return persisted;
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
sourceId,
|
|
162
|
+
status: "completed",
|
|
163
|
+
memories: outcomes,
|
|
164
|
+
...(outcomes.length === 0 ? { reason: "no_durable_memory" } : {}),
|
|
165
|
+
};
|
|
166
|
+
} catch (error) {
|
|
167
|
+
dependencies.updateMemorySourceStatus(sourceId, "failed", {
|
|
168
|
+
error: error.message,
|
|
169
|
+
model,
|
|
170
|
+
schemaVersion: SEMANTIC_EXTRACTION_SCHEMA_VERSION,
|
|
171
|
+
});
|
|
172
|
+
error.sourceId = sourceId;
|
|
173
|
+
error.code ||= "INGESTION_FAILED";
|
|
174
|
+
throw error;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async function prepareAtom({ atom, source, ingestionDate, metadata, model }) {
|
|
179
|
+
const candidates = await writeCandidates(atom.text, dependencies);
|
|
180
|
+
let decision = createDecision("No similar stored memory was found.", 1);
|
|
181
|
+
if (candidates.available && candidates.items.length > 0) {
|
|
182
|
+
try {
|
|
183
|
+
decision = await dependencies.decideMemoryWrite(
|
|
184
|
+
{ text: atom.text, summary: atom.summary, ...atomMetadata(atom, metadata) },
|
|
185
|
+
candidates.items,
|
|
186
|
+
);
|
|
187
|
+
} catch {
|
|
188
|
+
decision = createDecision("Memory matching was inconclusive.");
|
|
189
|
+
}
|
|
190
|
+
} else if (!candidates.available) {
|
|
191
|
+
decision = createDecision("Memory candidate search was unavailable.");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (decision.action !== "create") {
|
|
195
|
+
const valid = decision.confidence >= 0.85
|
|
196
|
+
&& candidates.items.some(({ id }) => id === decision.matchedMemoryId);
|
|
197
|
+
if (!valid) decision = createDecision("The possible memory match was uncertain.");
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
let storedAtom = atom;
|
|
201
|
+
const replacementText = decision.replacementText?.trim();
|
|
202
|
+
if (decision.action === "update" && replacementText && replacementText !== atom.text) {
|
|
203
|
+
const replacement = await dependencies.extractAtomicMemories(
|
|
204
|
+
replacementText,
|
|
205
|
+
ingestionDate,
|
|
206
|
+
await optionalCanonicalizationContext(replacementText, dependencies),
|
|
207
|
+
);
|
|
208
|
+
if (replacement.memories.length !== 1) {
|
|
209
|
+
throw new Error("Memory update replacement must resolve to exactly one atomic memory");
|
|
210
|
+
}
|
|
211
|
+
storedAtom = replacement.memories[0];
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
atom: storedAtom,
|
|
216
|
+
sourceEvidence: atom.evidenceSpans,
|
|
217
|
+
decision,
|
|
218
|
+
memoryId: decision.action === "create"
|
|
219
|
+
? dependencies.createMemoryId?.() ?? randomUUID()
|
|
220
|
+
: decision.matchedMemoryId,
|
|
221
|
+
metadata: atomMetadata(storedAtom, metadata),
|
|
222
|
+
source,
|
|
223
|
+
ingestionDate,
|
|
224
|
+
model,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function persistAtom({
|
|
229
|
+
atom, sourceEvidence, decision, memoryId, metadata, sourceId,
|
|
230
|
+
sourceRevisionId, source, ingestionDate, model,
|
|
231
|
+
}) {
|
|
232
|
+
let memory;
|
|
233
|
+
if (decision.action === "unchanged") {
|
|
234
|
+
memory = dependencies.getMemory(memoryId);
|
|
235
|
+
if (!memory) throw new Error(`Matched memory not found: ${memoryId}`);
|
|
236
|
+
} else if (decision.action === "update") {
|
|
237
|
+
memory = dependencies.updateMemoryGraph({
|
|
238
|
+
memoryId,
|
|
239
|
+
rawText: atom.text,
|
|
240
|
+
ingestionDate,
|
|
241
|
+
extraction: atom,
|
|
242
|
+
model,
|
|
243
|
+
metadata,
|
|
244
|
+
schemaVersion: SEMANTIC_EXTRACTION_SCHEMA_VERSION,
|
|
245
|
+
}).memory;
|
|
246
|
+
} else {
|
|
247
|
+
memory = dependencies.storeMemory(
|
|
248
|
+
memoryId,
|
|
249
|
+
atom.text,
|
|
250
|
+
ingestionDate,
|
|
251
|
+
atom,
|
|
252
|
+
model,
|
|
253
|
+
source,
|
|
254
|
+
metadata,
|
|
255
|
+
);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const action = normalizeAction(decision.action);
|
|
259
|
+
dependencies.linkSourceMemory({
|
|
260
|
+
sourceId,
|
|
261
|
+
sourceRevisionId,
|
|
262
|
+
memoryId,
|
|
263
|
+
action,
|
|
264
|
+
evidence: sourceEvidence,
|
|
265
|
+
model,
|
|
266
|
+
schemaVersion: SEMANTIC_EXTRACTION_SCHEMA_VERSION,
|
|
267
|
+
confidence: decision.confidence,
|
|
268
|
+
reason: decision.reason,
|
|
269
|
+
});
|
|
270
|
+
if (action !== "unchanged") {
|
|
271
|
+
dependencies.enqueueAnnotationJob({
|
|
272
|
+
memoryId,
|
|
273
|
+
sourceId,
|
|
274
|
+
model,
|
|
275
|
+
schemaVersion: COGNITIVE_ANNOTATION_SCHEMA_VERSION,
|
|
276
|
+
});
|
|
277
|
+
dependencies.enqueueVectorIndexJob({ memoryId, sourceId });
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const serialized = dependencies.serializeMemory?.(memory) ?? memory;
|
|
281
|
+
return {
|
|
282
|
+
action,
|
|
283
|
+
memory: serialized,
|
|
284
|
+
matchedMemoryId: action === "created" ? null : decision.matchedMemoryId,
|
|
285
|
+
confidence: decision.confidence,
|
|
286
|
+
reason: decision.reason,
|
|
287
|
+
evidenceSpans: sourceEvidence,
|
|
288
|
+
annotationStatus: action === "unchanged"
|
|
289
|
+
? normalizeStatus(dependencies.getAnnotationStatus?.(memoryId), "completed")
|
|
290
|
+
: "pending",
|
|
291
|
+
indexStatus: action === "unchanged"
|
|
292
|
+
? normalizeStatus(dependencies.getVectorIndexStatus?.(memoryId), "completed")
|
|
293
|
+
: "pending",
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function completedSourceResult(sourceRecord) {
|
|
298
|
+
const links = dependencies.getSourceMemoryLinks?.(sourceRecord.id) || [];
|
|
299
|
+
return {
|
|
300
|
+
sourceId: sourceRecord.id,
|
|
301
|
+
status: "completed",
|
|
302
|
+
memories: links.map((link) => ({
|
|
303
|
+
action: link.action,
|
|
304
|
+
memory: dependencies.serializeMemory?.(dependencies.getMemory(link.memory_id))
|
|
305
|
+
?? dependencies.getMemory(link.memory_id),
|
|
306
|
+
matchedMemoryId: link.action === "created" ? null : link.memory_id,
|
|
307
|
+
confidence: link.decision_confidence,
|
|
308
|
+
reason: link.decision_reason,
|
|
309
|
+
evidenceSpans: link.evidence_json,
|
|
310
|
+
annotationStatus: normalizeStatus(
|
|
311
|
+
dependencies.getAnnotationStatus?.(link.memory_id),
|
|
312
|
+
"completed",
|
|
313
|
+
),
|
|
314
|
+
indexStatus: normalizeStatus(
|
|
315
|
+
dependencies.getVectorIndexStatus?.(link.memory_id),
|
|
316
|
+
"completed",
|
|
317
|
+
),
|
|
318
|
+
})),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return { ingest, enqueue, runIngestion, reprocess };
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
async function optionalCanonicalizationContext(text, dependencies) {
|
|
326
|
+
try {
|
|
327
|
+
return await retrieveExtractionContext(text, dependencies);
|
|
328
|
+
} catch {
|
|
329
|
+
return { entities: [] };
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async function writeCandidates(text, dependencies) {
|
|
334
|
+
try {
|
|
335
|
+
const hits = await dependencies.searchMemoryVectors(text, { limit: 5 });
|
|
336
|
+
return {
|
|
337
|
+
available: true,
|
|
338
|
+
items: hits.map(({ id }) => serializeCandidate(id, dependencies)).filter(Boolean),
|
|
339
|
+
};
|
|
340
|
+
} catch {
|
|
341
|
+
return { available: false, items: [] };
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
async function resolveModel(dependencies) {
|
|
346
|
+
return dependencies.model || dependencies.getModel?.() || "unknown";
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function atomMetadata(atom, metadata) {
|
|
350
|
+
const extractedType = dominantType(atom.types);
|
|
351
|
+
return {
|
|
352
|
+
...DEFAULT_METADATA,
|
|
353
|
+
type: metadata.forceType && metadata.type
|
|
354
|
+
? metadata.type
|
|
355
|
+
: extractedType || metadata.type || "fact",
|
|
356
|
+
title: atom.summary || atom.text.slice(0, 50),
|
|
357
|
+
confidence: atom.durability?.confidence ?? DEFAULT_METADATA.confidence,
|
|
358
|
+
tags: Array.isArray(metadata.tags) ? metadata.tags : [],
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function dominantType(types = []) {
|
|
363
|
+
return [...types].sort((left, right) =>
|
|
364
|
+
right.weight - left.weight || left.type.localeCompare(right.type))[0]?.type || null;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function serializeCandidate(id, dependencies) {
|
|
368
|
+
const memory = dependencies.getMemory(id);
|
|
369
|
+
if (!memory) return null;
|
|
370
|
+
const extraction = dependencies.getLatestExtraction?.(id)?.extraction_json ?? null;
|
|
371
|
+
return { id: memory.id, text: memory.raw_text, summary: memory.summary,
|
|
372
|
+
type: memory.type, title: memory.title, tags: memory.tags, extraction };
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function createDecision(reason, confidence = 0) {
|
|
376
|
+
return { action: "create", matchedMemoryId: null, confidence, reason, replacementText: "" };
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
function normalizeAction(action) {
|
|
380
|
+
return action === "create" ? "created" : action === "update" ? "updated" : "unchanged";
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function normalizeStatus(status, fallback) {
|
|
384
|
+
return ["pending", "processing", "completed", "failed"].includes(status)
|
|
385
|
+
? status
|
|
386
|
+
: fallback;
|
|
387
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import { calculateRetryDelay } from "./cognitive-worker.js";
|
|
2
|
+
|
|
3
|
+
const DEFAULT_POLL_INTERVAL_MS = 1_000;
|
|
4
|
+
const DEFAULT_STALE_AFTER_MS = 5 * 60_000;
|
|
5
|
+
const DEFAULT_BASE_RETRY_MS = 1_000;
|
|
6
|
+
const DEFAULT_MAX_RETRY_MS = 60_000;
|
|
7
|
+
const DEFAULT_MAX_ATTEMPTS = 5;
|
|
8
|
+
|
|
9
|
+
function requiredFunction(value, name) {
|
|
10
|
+
if (typeof value !== "function") {
|
|
11
|
+
throw new TypeError(`ingestion worker requires ${name}()`);
|
|
12
|
+
}
|
|
13
|
+
return value;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function optionalFunction(value) {
|
|
17
|
+
return typeof value === "function" ? value : null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function errorMessage(error) {
|
|
21
|
+
if (error instanceof Error) return error.message;
|
|
22
|
+
return String(error);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function jobId(job) {
|
|
26
|
+
return job.id ?? job.jobId ?? job.job_id;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function sourceId(job) {
|
|
30
|
+
return job.sourceId ?? job.source_id ?? job.source?.id;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function attemptCount(job) {
|
|
34
|
+
const value = Number(job.attempts ?? job.attemptCount ?? job.attempt_count);
|
|
35
|
+
return Number.isInteger(value) && value > 0 ? value : 1;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function maximumAttempts(job, fallback) {
|
|
39
|
+
const value = Number(job.maxAttempts ?? job.max_attempts ?? job.maximumAttempts);
|
|
40
|
+
return Number.isInteger(value) && value > 0 ? Math.min(value, fallback) : fallback;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function validateOptions(options) {
|
|
44
|
+
for (const [name, value] of [
|
|
45
|
+
["pollIntervalMs", options.pollIntervalMs],
|
|
46
|
+
["staleAfterMs", options.staleAfterMs],
|
|
47
|
+
["baseRetryMs", options.baseRetryMs],
|
|
48
|
+
["maxRetryMs", options.maxRetryMs],
|
|
49
|
+
]) {
|
|
50
|
+
if (!Number.isFinite(value) || value < 0) {
|
|
51
|
+
throw new RangeError(`${name} must be a non-negative finite number`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (!Number.isInteger(options.maxAttempts) || options.maxAttempts < 1) {
|
|
55
|
+
throw new RangeError("maxAttempts must be a positive integer");
|
|
56
|
+
}
|
|
57
|
+
if (options.maxRetryMs < options.baseRetryMs) {
|
|
58
|
+
throw new RangeError("maxRetryMs must be at least baseRetryMs");
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function createIngestionWorker({
|
|
63
|
+
db,
|
|
64
|
+
ingestionService,
|
|
65
|
+
now = () => new Date(),
|
|
66
|
+
setTimer = setTimeout,
|
|
67
|
+
clearTimer = clearTimeout,
|
|
68
|
+
pollIntervalMs = DEFAULT_POLL_INTERVAL_MS,
|
|
69
|
+
staleAfterMs = DEFAULT_STALE_AFTER_MS,
|
|
70
|
+
baseRetryMs = DEFAULT_BASE_RETRY_MS,
|
|
71
|
+
maxRetryMs = DEFAULT_MAX_RETRY_MS,
|
|
72
|
+
maxAttempts = DEFAULT_MAX_ATTEMPTS,
|
|
73
|
+
onError = () => {},
|
|
74
|
+
} = {}) {
|
|
75
|
+
if (!db || typeof db !== "object") {
|
|
76
|
+
throw new TypeError("ingestion worker requires a db adapter");
|
|
77
|
+
}
|
|
78
|
+
if (!ingestionService || typeof ingestionService.runIngestion !== "function") {
|
|
79
|
+
throw new TypeError("ingestion worker requires ingestionService.runIngestion()");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const options = { pollIntervalMs, staleAfterMs, baseRetryMs, maxRetryMs, maxAttempts };
|
|
83
|
+
validateOptions(options);
|
|
84
|
+
|
|
85
|
+
const claimJob = requiredFunction(db.claimIngestionJob, "db.claimIngestionJob");
|
|
86
|
+
const recoverJobs = requiredFunction(db.recoverIngestionJobs, "db.recoverIngestionJobs");
|
|
87
|
+
const completeJob = requiredFunction(db.completeIngestionJob, "db.completeIngestionJob");
|
|
88
|
+
const retryJob = requiredFunction(db.retryIngestionJob, "db.retryIngestionJob");
|
|
89
|
+
const failJob = optionalFunction(db.failIngestionJob);
|
|
90
|
+
|
|
91
|
+
let timer = null;
|
|
92
|
+
let resolvePoll = null;
|
|
93
|
+
let loopPromise = null;
|
|
94
|
+
let stopped = true;
|
|
95
|
+
|
|
96
|
+
const currentDate = () => {
|
|
97
|
+
const value = now();
|
|
98
|
+
const date = value instanceof Date ? value : new Date(value);
|
|
99
|
+
if (Number.isNaN(date.getTime())) {
|
|
100
|
+
throw new TypeError("now() must return a valid Date or date value");
|
|
101
|
+
}
|
|
102
|
+
return date;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const reschedule = async (job, error) => {
|
|
106
|
+
const attempts = attemptCount(job);
|
|
107
|
+
const attemptLimit = maximumAttempts(job, maxAttempts);
|
|
108
|
+
const id = jobId(job);
|
|
109
|
+
const message = errorMessage(error);
|
|
110
|
+
const failedAt = currentDate();
|
|
111
|
+
|
|
112
|
+
if (attempts >= attemptLimit && failJob) {
|
|
113
|
+
await failJob.call(db, id, {
|
|
114
|
+
error: message,
|
|
115
|
+
attempts,
|
|
116
|
+
failedAt: failedAt.toISOString(),
|
|
117
|
+
});
|
|
118
|
+
return { status: "failed", job, error };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const delayMs = calculateRetryDelay(attempts, { baseRetryMs, maxRetryMs });
|
|
122
|
+
const retryAt = new Date(failedAt.getTime() + delayMs);
|
|
123
|
+
await retryJob.call(db, {
|
|
124
|
+
jobId: id,
|
|
125
|
+
error: message,
|
|
126
|
+
attempts,
|
|
127
|
+
retryAt: retryAt.toISOString(),
|
|
128
|
+
terminal: attempts >= attemptLimit,
|
|
129
|
+
updatedAt: failedAt.toISOString(),
|
|
130
|
+
});
|
|
131
|
+
return {
|
|
132
|
+
status: attempts >= attemptLimit ? "failed" : "retrying",
|
|
133
|
+
job,
|
|
134
|
+
error,
|
|
135
|
+
retryAt,
|
|
136
|
+
delayMs,
|
|
137
|
+
};
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const runOnce = async () => {
|
|
141
|
+
const claimedAt = currentDate();
|
|
142
|
+
const job = await claimJob.call(db, { now: claimedAt.toISOString() });
|
|
143
|
+
if (!job) return { status: "idle" };
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
const id = sourceId(job);
|
|
147
|
+
if (!id) throw new Error(`ingestion job ${jobId(job)} has no source id`);
|
|
148
|
+
const result = await ingestionService.runIngestion(id);
|
|
149
|
+
await completeJob.call(db, {
|
|
150
|
+
jobId: jobId(job),
|
|
151
|
+
completedAt: currentDate().toISOString(),
|
|
152
|
+
});
|
|
153
|
+
return { status: "completed", job, result };
|
|
154
|
+
} catch (error) {
|
|
155
|
+
return reschedule(job, error);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const waitForPoll = () =>
|
|
160
|
+
new Promise((resolve) => {
|
|
161
|
+
resolvePoll = resolve;
|
|
162
|
+
timer = setTimer(() => {
|
|
163
|
+
timer = null;
|
|
164
|
+
resolvePoll = null;
|
|
165
|
+
resolve();
|
|
166
|
+
}, pollIntervalMs);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const run = async ({ signal, recover = true } = {}) => {
|
|
170
|
+
stopped = false;
|
|
171
|
+
if (recover) {
|
|
172
|
+
const recoveredAt = currentDate();
|
|
173
|
+
await recoverJobs.call(db, {
|
|
174
|
+
now: recoveredAt.toISOString(),
|
|
175
|
+
retryAt: recoveredAt.toISOString(),
|
|
176
|
+
staleBefore: new Date(recoveredAt.getTime() - staleAfterMs).toISOString(),
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
while (!stopped && !signal?.aborted) {
|
|
181
|
+
const result = await runOnce();
|
|
182
|
+
if (result.status === "idle") await waitForPoll();
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
const start = () => {
|
|
187
|
+
if (loopPromise) return loopPromise;
|
|
188
|
+
stopped = false;
|
|
189
|
+
loopPromise = run()
|
|
190
|
+
.catch((error) => {
|
|
191
|
+
onError(error);
|
|
192
|
+
throw error;
|
|
193
|
+
})
|
|
194
|
+
.finally(() => {
|
|
195
|
+
loopPromise = null;
|
|
196
|
+
stopped = true;
|
|
197
|
+
});
|
|
198
|
+
return loopPromise;
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const stop = async () => {
|
|
202
|
+
stopped = true;
|
|
203
|
+
if (timer !== null) {
|
|
204
|
+
clearTimer(timer);
|
|
205
|
+
timer = null;
|
|
206
|
+
resolvePoll?.();
|
|
207
|
+
resolvePoll = null;
|
|
208
|
+
}
|
|
209
|
+
await loopPromise;
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
start,
|
|
214
|
+
run,
|
|
215
|
+
runOnce,
|
|
216
|
+
stop,
|
|
217
|
+
get running() {
|
|
218
|
+
return loopPromise !== null;
|
|
219
|
+
},
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export async function runIngestionWorkerCycle(dependencies) {
|
|
224
|
+
return createIngestionWorker(dependencies).runOnce();
|
|
225
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import createLogger from "./logger.js";
|
|
2
|
+
|
|
3
|
+
const log = createLogger("llm-config");
|
|
4
|
+
|
|
5
|
+
const PROVIDERS = {
|
|
6
|
+
tokenrouter: {
|
|
7
|
+
baseUrl: "https://api.tokenrouter.com/v1/chat/completions",
|
|
8
|
+
model: "MiniMax-M3",
|
|
9
|
+
apiKeyEnv: "TOKENROUTER_API_KEY",
|
|
10
|
+
},
|
|
11
|
+
openrouter: {
|
|
12
|
+
baseUrl: "https://openrouter.ai/api/v1/chat/completions",
|
|
13
|
+
model: "deepseek/deepseek-v4-flash",
|
|
14
|
+
apiKeyEnv: "OPENROUTER_API_KEY",
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const activeProvider = process.env.LLM_PROVIDER || "tokenrouter";
|
|
19
|
+
|
|
20
|
+
const config = PROVIDERS[activeProvider];
|
|
21
|
+
|
|
22
|
+
if (!config) {
|
|
23
|
+
throw new Error(`Unknown LLM_PROVIDER "${activeProvider}". Valid: ${Object.keys(PROVIDERS).join(", ")}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export const baseUrl = config.baseUrl;
|
|
27
|
+
export const model = process.env.LLM_MODEL || config.model;
|
|
28
|
+
export const apiKeyEnv = config.apiKeyEnv;
|
|
29
|
+
export const providerName = activeProvider;
|
|
30
|
+
|
|
31
|
+
log.info("provider configured", { provider: activeProvider, model, baseUrl: config.baseUrl });
|