@equationalapplications/core-llm-wiki 4.15.3 → 4.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -0
- package/dist/{chunk-J4GBC6CP.mjs → chunk-2BGLPRT3.mjs} +238 -46
- package/dist/chunk-2BGLPRT3.mjs.map +1 -0
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +444 -49
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +211 -8
- package/dist/index.mjs.map +1 -1
- package/dist/{testing-NH1_Aigh.d.mts → testing-BMsplvLy.d.mts} +129 -16
- package/dist/{testing-NH1_Aigh.d.ts → testing-BMsplvLy.d.ts} +129 -16
- package/dist/testing.d.mts +1 -1
- package/dist/testing.d.ts +1 -1
- package/dist/testing.js +155 -43
- package/dist/testing.js.map +1 -1
- package/dist/testing.mjs +1 -1
- package/package.json +2 -2
- package/dist/chunk-J4GBC6CP.mjs.map +0 -1
package/dist/index.js
CHANGED
|
@@ -87,6 +87,13 @@ async function setupDatabase(db, prefix) {
|
|
|
87
87
|
memory_checkpoint INTEGER NOT NULL DEFAULT 0
|
|
88
88
|
);
|
|
89
89
|
|
|
90
|
+
CREATE TABLE IF NOT EXISTS ${prefix}entity_manifests (
|
|
91
|
+
entity_id TEXT PRIMARY KEY,
|
|
92
|
+
mode TEXT NOT NULL DEFAULT 'off',
|
|
93
|
+
manifest_json TEXT NOT NULL DEFAULT '{"node_types":[],"edge_types":[]}',
|
|
94
|
+
updated_at INTEGER NOT NULL
|
|
95
|
+
);
|
|
96
|
+
|
|
90
97
|
CREATE TABLE IF NOT EXISTS ${prefix}meta (
|
|
91
98
|
key TEXT PRIMARY KEY,
|
|
92
99
|
value TEXT NOT NULL
|
|
@@ -196,6 +203,20 @@ var MIGRATIONS = [
|
|
|
196
203
|
CREATE INDEX IF NOT EXISTS ${prefix}edges_entity_idx ON ${prefix}edges (entity_id);
|
|
197
204
|
`);
|
|
198
205
|
}
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
version: 6,
|
|
209
|
+
description: "Add entity_manifests table for per-entity ontology state",
|
|
210
|
+
run: async (db, prefix) => {
|
|
211
|
+
await db.execAsync(`
|
|
212
|
+
CREATE TABLE IF NOT EXISTS ${prefix}entity_manifests (
|
|
213
|
+
entity_id TEXT PRIMARY KEY,
|
|
214
|
+
mode TEXT NOT NULL DEFAULT 'off',
|
|
215
|
+
manifest_json TEXT NOT NULL DEFAULT '{"node_types":[],"edge_types":[]}',
|
|
216
|
+
updated_at INTEGER NOT NULL
|
|
217
|
+
);
|
|
218
|
+
`);
|
|
219
|
+
}
|
|
199
220
|
}
|
|
200
221
|
];
|
|
201
222
|
for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
@@ -319,8 +340,8 @@ var EntryRepository = class extends BaseRepository {
|
|
|
319
340
|
`INSERT INTO ${this.prefix}entries (
|
|
320
341
|
id, entity_id, title, body, tags, confidence, source_type,
|
|
321
342
|
source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count,
|
|
322
|
-
deleted_at, embedding_blob, embedding
|
|
323
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
343
|
+
deleted_at, embedding_blob, embedding, okf_type
|
|
344
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
324
345
|
ON CONFLICT(id) DO UPDATE SET
|
|
325
346
|
entity_id = excluded.entity_id,
|
|
326
347
|
title = excluded.title,
|
|
@@ -335,7 +356,8 @@ var EntryRepository = class extends BaseRepository {
|
|
|
335
356
|
access_count = excluded.access_count,
|
|
336
357
|
deleted_at = excluded.deleted_at,
|
|
337
358
|
embedding_blob = CASE WHEN excluded.embedding_blob IS NULL THEN embedding_blob ELSE excluded.embedding_blob END,
|
|
338
|
-
embedding = NULL
|
|
359
|
+
embedding = NULL,
|
|
360
|
+
okf_type = excluded.okf_type`,
|
|
339
361
|
[
|
|
340
362
|
fact.id,
|
|
341
363
|
fact.entity_id,
|
|
@@ -352,7 +374,8 @@ var EntryRepository = class extends BaseRepository {
|
|
|
352
374
|
fact.access_count,
|
|
353
375
|
fact.deleted_at ?? null,
|
|
354
376
|
embeddingBlob ?? null,
|
|
355
|
-
null
|
|
377
|
+
null,
|
|
378
|
+
fact.okf_type ?? null
|
|
356
379
|
]
|
|
357
380
|
);
|
|
358
381
|
await this.outbox.push({
|
|
@@ -1430,6 +1453,91 @@ var EdgeRepository = class extends BaseRepository {
|
|
|
1430
1453
|
}
|
|
1431
1454
|
};
|
|
1432
1455
|
|
|
1456
|
+
// src/utils/ontology.ts
|
|
1457
|
+
function emptyManifest() {
|
|
1458
|
+
return { node_types: [], edge_types: [] };
|
|
1459
|
+
}
|
|
1460
|
+
function normalizeTitleKey(title) {
|
|
1461
|
+
return title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
1462
|
+
}
|
|
1463
|
+
function resolveNodeType(raw, manifest) {
|
|
1464
|
+
const slug = raw.trim();
|
|
1465
|
+
if (!slug) return null;
|
|
1466
|
+
const hit = manifest.node_types.find((n) => n.type.toLowerCase() === slug.toLowerCase());
|
|
1467
|
+
return hit?.type ?? null;
|
|
1468
|
+
}
|
|
1469
|
+
function resolveEdgeDefinition(rawEdgeType, manifest) {
|
|
1470
|
+
const slug = rawEdgeType.trim();
|
|
1471
|
+
if (!slug) return null;
|
|
1472
|
+
return manifest.edge_types.find((e) => e.type.toLowerCase() === slug.toLowerCase()) ?? null;
|
|
1473
|
+
}
|
|
1474
|
+
function validateManifest(manifest) {
|
|
1475
|
+
const nodeSlugs = /* @__PURE__ */ new Set();
|
|
1476
|
+
for (const node of manifest.node_types ?? []) {
|
|
1477
|
+
const type = node.type?.trim();
|
|
1478
|
+
if (!type) throw new Error("Ontology node type slug must be non-empty");
|
|
1479
|
+
const key = type.toLowerCase();
|
|
1480
|
+
if (nodeSlugs.has(key)) throw new Error(`Duplicate node type: ${type}`);
|
|
1481
|
+
nodeSlugs.add(key);
|
|
1482
|
+
}
|
|
1483
|
+
const edgeSlugs = /* @__PURE__ */ new Set();
|
|
1484
|
+
for (const edge of manifest.edge_types ?? []) {
|
|
1485
|
+
const edgeType = edge.type?.trim();
|
|
1486
|
+
const sourceType = edge.source_type?.trim();
|
|
1487
|
+
const targetType = edge.target_type?.trim();
|
|
1488
|
+
if (!edgeType) throw new Error("Ontology edge type slug must be non-empty");
|
|
1489
|
+
const edgeKey = edgeType.toLowerCase();
|
|
1490
|
+
if (edgeSlugs.has(edgeKey)) throw new Error(`Duplicate edge type: ${edgeType}`);
|
|
1491
|
+
edgeSlugs.add(edgeKey);
|
|
1492
|
+
if (!sourceType || !targetType || !nodeSlugs.has(sourceType.toLowerCase()) || !nodeSlugs.has(targetType.toLowerCase())) {
|
|
1493
|
+
throw new Error(`Edge type ${edgeType} references unknown node type`);
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
function mergeOntologyUpdates(current, updates) {
|
|
1498
|
+
const node_types = [...current.node_types];
|
|
1499
|
+
const edge_types = [...current.edge_types];
|
|
1500
|
+
const nodeSlugs = new Set(node_types.map((n) => n.type.trim().toLowerCase()));
|
|
1501
|
+
const edgeSlugs = new Set(edge_types.map((e) => e.type.trim().toLowerCase()));
|
|
1502
|
+
for (const node of updates.node_types ?? []) {
|
|
1503
|
+
const type = node?.type?.trim();
|
|
1504
|
+
if (!type) continue;
|
|
1505
|
+
const key = type.toLowerCase();
|
|
1506
|
+
if (nodeSlugs.has(key)) continue;
|
|
1507
|
+
node_types.push({ type, description: String(node.description ?? "") });
|
|
1508
|
+
nodeSlugs.add(key);
|
|
1509
|
+
}
|
|
1510
|
+
for (const edge of updates.edge_types ?? []) {
|
|
1511
|
+
const edgeType = edge?.type?.trim();
|
|
1512
|
+
const sourceType = edge?.source_type?.trim();
|
|
1513
|
+
const targetType = edge?.target_type?.trim();
|
|
1514
|
+
if (!edgeType || !sourceType || !targetType) continue;
|
|
1515
|
+
const edgeKey = edgeType.toLowerCase();
|
|
1516
|
+
if (edgeSlugs.has(edgeKey)) continue;
|
|
1517
|
+
if (!nodeSlugs.has(sourceType.toLowerCase()) || !nodeSlugs.has(targetType.toLowerCase())) continue;
|
|
1518
|
+
edge_types.push({
|
|
1519
|
+
type: edgeType,
|
|
1520
|
+
source_type: sourceType,
|
|
1521
|
+
target_type: targetType,
|
|
1522
|
+
description: String(edge.description ?? "")
|
|
1523
|
+
});
|
|
1524
|
+
edgeSlugs.add(edgeKey);
|
|
1525
|
+
}
|
|
1526
|
+
return { node_types, edge_types };
|
|
1527
|
+
}
|
|
1528
|
+
function validateInlineEdges(sourceType, _targetType, edges, manifest) {
|
|
1529
|
+
if (!Array.isArray(edges)) return [];
|
|
1530
|
+
const valid = [];
|
|
1531
|
+
for (const edge of edges) {
|
|
1532
|
+
if (typeof edge?.edge_type !== "string" || typeof edge?.target_title !== "string") continue;
|
|
1533
|
+
const def = resolveEdgeDefinition(edge.edge_type, manifest);
|
|
1534
|
+
if (!def) continue;
|
|
1535
|
+
if (def.source_type.toLowerCase() !== sourceType.toLowerCase()) continue;
|
|
1536
|
+
valid.push({ edge_type: def.type, target_title: edge.target_title });
|
|
1537
|
+
}
|
|
1538
|
+
return valid;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1433
1541
|
// src/repositories/MetadataRepository.ts
|
|
1434
1542
|
var MetadataRepository = class extends BaseRepository {
|
|
1435
1543
|
// CHECKPOINTS TABLE METHODS
|
|
@@ -1528,6 +1636,44 @@ var MetadataRepository = class extends BaseRepository {
|
|
|
1528
1636
|
);
|
|
1529
1637
|
return rows.map((r) => r.entity_id);
|
|
1530
1638
|
}
|
|
1639
|
+
async getManifest(entityId, tx) {
|
|
1640
|
+
const executor = this.getExecutor(tx);
|
|
1641
|
+
const row = await executor.getFirstAsync(`SELECT mode, manifest_json FROM ${this.prefix}entity_manifests WHERE entity_id = ?`, [entityId]);
|
|
1642
|
+
if (!row) return null;
|
|
1643
|
+
if (row.mode !== "off" && row.mode !== "strict" && row.mode !== "emergent") {
|
|
1644
|
+
throw new Error(`Invalid ontology mode for entity ${entityId}: ${JSON.stringify(row.mode)}`);
|
|
1645
|
+
}
|
|
1646
|
+
let manifest;
|
|
1647
|
+
try {
|
|
1648
|
+
manifest = JSON.parse(row.manifest_json);
|
|
1649
|
+
} catch (error) {
|
|
1650
|
+
throw new Error(`Invalid manifest_json for entity ${entityId}: ${error.message}`);
|
|
1651
|
+
}
|
|
1652
|
+
validateManifest(manifest);
|
|
1653
|
+
return {
|
|
1654
|
+
mode: row.mode,
|
|
1655
|
+
manifest
|
|
1656
|
+
};
|
|
1657
|
+
}
|
|
1658
|
+
async setManifest(entityId, data, tx) {
|
|
1659
|
+
validateManifest(data.manifest);
|
|
1660
|
+
const executor = this.getExecutor(tx);
|
|
1661
|
+
await executor.runAsync(
|
|
1662
|
+
`INSERT INTO ${this.prefix}entity_manifests (entity_id, mode, manifest_json, updated_at)
|
|
1663
|
+
VALUES (?, ?, ?, ?)
|
|
1664
|
+
ON CONFLICT(entity_id) DO UPDATE SET mode = excluded.mode, manifest_json = excluded.manifest_json, updated_at = excluded.updated_at`,
|
|
1665
|
+
[entityId, data.mode, JSON.stringify(data.manifest), Date.now()]
|
|
1666
|
+
);
|
|
1667
|
+
}
|
|
1668
|
+
async mergeManifestUpdates(entityId, updates, tx) {
|
|
1669
|
+
const current = await this.getManifest(entityId, tx) ?? {
|
|
1670
|
+
mode: "emergent",
|
|
1671
|
+
manifest: emptyManifest()
|
|
1672
|
+
};
|
|
1673
|
+
const merged = mergeOntologyUpdates(current.manifest, updates);
|
|
1674
|
+
await this.setManifest(entityId, { mode: current.mode, manifest: merged }, tx);
|
|
1675
|
+
return merged;
|
|
1676
|
+
}
|
|
1531
1677
|
};
|
|
1532
1678
|
|
|
1533
1679
|
// src/utils/cosine.ts
|
|
@@ -2343,30 +2489,54 @@ var PromptService = class {
|
|
|
2343
2489
|
return typeof value === "string" ? value : JSON.stringify(value, null, 2);
|
|
2344
2490
|
});
|
|
2345
2491
|
}
|
|
2346
|
-
|
|
2492
|
+
hasOntologyPlaceholders(template) {
|
|
2493
|
+
return /\{\{\s*ontology(?:Manifest|ModeInstructions)\s*\}\}/.test(template);
|
|
2494
|
+
}
|
|
2495
|
+
buildSystemPrompt(template, variables, ontologyContext) {
|
|
2496
|
+
const shouldHydrate = Object.keys(variables).some(
|
|
2497
|
+
(key) => new RegExp(`\\{\\{\\s*${key}\\s*\\}\\}`).test(template)
|
|
2498
|
+
) || ontologyContext != null && this.hasOntologyPlaceholders(template);
|
|
2499
|
+
const hydrated = shouldHydrate ? this.hydrate(template, { ...variables, ...ontologyContext ?? {} }) : template;
|
|
2500
|
+
return this.hasOntologyPlaceholders(template) ? ontologyContext != null ? hydrated : hydrated.replace(/\{\{\s*ontology(?:Manifest|ModeInstructions)\s*\}\}/g, "") : this.appendOntology(hydrated, ontologyContext);
|
|
2501
|
+
}
|
|
2502
|
+
appendOntology(systemPrompt, ctx) {
|
|
2503
|
+
if (!ctx) return systemPrompt;
|
|
2504
|
+
return `${systemPrompt}
|
|
2505
|
+
|
|
2506
|
+
${ctx.ontologyModeInstructions}`;
|
|
2507
|
+
}
|
|
2508
|
+
buildIngestPrompt(documentChunk, runtimeOverride, ontologyContext) {
|
|
2347
2509
|
const template = runtimeOverride ?? this.globalOverrides?.ingestSystemPrompt ?? INGEST_SYSTEM_PROMPT;
|
|
2348
|
-
|
|
2510
|
+
const hasDocumentChunk = /\{\{\s*documentChunk\s*\}\}/.test(template);
|
|
2511
|
+
if (hasDocumentChunk || this.hasOntologyPlaceholders(template)) {
|
|
2349
2512
|
return {
|
|
2350
|
-
systemPrompt: this.
|
|
2351
|
-
userPrompt: "Please extract the facts."
|
|
2513
|
+
systemPrompt: this.buildSystemPrompt(template, { documentChunk }, ontologyContext),
|
|
2514
|
+
userPrompt: hasDocumentChunk ? "Please extract the facts." : `Document Chunk:
|
|
2515
|
+
${documentChunk}`
|
|
2352
2516
|
};
|
|
2353
2517
|
}
|
|
2354
2518
|
return {
|
|
2355
|
-
systemPrompt: template,
|
|
2519
|
+
systemPrompt: this.appendOntology(template, ontologyContext),
|
|
2356
2520
|
userPrompt: `Document Chunk:
|
|
2357
2521
|
${documentChunk}`
|
|
2358
2522
|
};
|
|
2359
2523
|
}
|
|
2360
|
-
buildLibrarianPrompt(events, currentFacts, runtimeOverride) {
|
|
2524
|
+
buildLibrarianPrompt(events, currentFacts, runtimeOverride, ontologyContext) {
|
|
2361
2525
|
const template = runtimeOverride ?? this.globalOverrides?.librarianSystemPrompt ?? LIBRARIAN_SYSTEM_PROMPT;
|
|
2362
|
-
|
|
2526
|
+
const hasEvents = /\{\{\s*events\s*\}\}/.test(template);
|
|
2527
|
+
const hasCurrentFacts = /\{\{\s*currentFacts\s*\}\}/.test(template);
|
|
2528
|
+
if (hasEvents || hasCurrentFacts || this.hasOntologyPlaceholders(template)) {
|
|
2363
2529
|
return {
|
|
2364
|
-
systemPrompt: this.
|
|
2365
|
-
userPrompt: "Please synthesize the context."
|
|
2530
|
+
systemPrompt: this.buildSystemPrompt(template, { events, currentFacts }, ontologyContext),
|
|
2531
|
+
userPrompt: hasEvents || hasCurrentFacts ? "Please synthesize the context." : `Events:
|
|
2532
|
+
${JSON.stringify(events, null, 2)}
|
|
2533
|
+
|
|
2534
|
+
Current Facts:
|
|
2535
|
+
${JSON.stringify(currentFacts, null, 2)}`
|
|
2366
2536
|
};
|
|
2367
2537
|
}
|
|
2368
2538
|
return {
|
|
2369
|
-
systemPrompt: template,
|
|
2539
|
+
systemPrompt: this.appendOntology(template, ontologyContext),
|
|
2370
2540
|
userPrompt: `Events:
|
|
2371
2541
|
${JSON.stringify(events, null, 2)}
|
|
2372
2542
|
|
|
@@ -2399,7 +2569,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
2399
2569
|
|
|
2400
2570
|
// src/services/IngestionService.ts
|
|
2401
2571
|
var IngestionService = class {
|
|
2402
|
-
constructor(db, prefix, options, entryRepo, searchService, jobManager, embeddingService, promptService) {
|
|
2572
|
+
constructor(db, prefix, options, entryRepo, searchService, jobManager, embeddingService, promptService, ontologyService) {
|
|
2403
2573
|
this.db = db;
|
|
2404
2574
|
this.prefix = prefix;
|
|
2405
2575
|
this.options = options;
|
|
@@ -2407,6 +2577,7 @@ var IngestionService = class {
|
|
|
2407
2577
|
this.searchService = searchService;
|
|
2408
2578
|
this.jobManager = jobManager;
|
|
2409
2579
|
this.embeddingService = embeddingService;
|
|
2580
|
+
this.ontologyService = ontologyService;
|
|
2410
2581
|
this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
|
|
2411
2582
|
}
|
|
2412
2583
|
async ingestDocument(entityId, params) {
|
|
@@ -2431,23 +2602,33 @@ var IngestionService = class {
|
|
|
2431
2602
|
if (chunks.length === 0) return { truncated: false, chunks: 0 };
|
|
2432
2603
|
const chunkResults = await withConcurrency(
|
|
2433
2604
|
chunks.map((chunk) => async () => {
|
|
2434
|
-
const
|
|
2605
|
+
const ontologyContext = await this.ontologyService?.buildPromptContext(entityId) ?? null;
|
|
2606
|
+
const { systemPrompt, userPrompt } = this.promptService.buildIngestPrompt(
|
|
2607
|
+
chunk,
|
|
2608
|
+
params.promptOverride,
|
|
2609
|
+
ontologyContext
|
|
2610
|
+
);
|
|
2435
2611
|
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2436
2612
|
const result = parseJsonResponse(responseText);
|
|
2437
|
-
return
|
|
2613
|
+
return {
|
|
2614
|
+
facts: (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null),
|
|
2615
|
+
ontology_updates: result.ontology_updates
|
|
2616
|
+
};
|
|
2438
2617
|
}),
|
|
2439
2618
|
chunkConcurrency
|
|
2440
2619
|
);
|
|
2441
2620
|
const seen = /* @__PURE__ */ new Set();
|
|
2442
|
-
const
|
|
2443
|
-
for (const
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2621
|
+
const orderedChunkFacts = [];
|
|
2622
|
+
for (const chunkResult of chunkResults) {
|
|
2623
|
+
const dedupedFacts = [];
|
|
2624
|
+
for (const fact of chunkResult.facts) {
|
|
2625
|
+
const normalizedTitle = normalizeTitleKey(fact.title);
|
|
2626
|
+
if (!seen.has(normalizedTitle)) {
|
|
2627
|
+
seen.add(normalizedTitle);
|
|
2628
|
+
dedupedFacts.push(fact);
|
|
2449
2629
|
}
|
|
2450
2630
|
}
|
|
2631
|
+
orderedChunkFacts.push({ facts: dedupedFacts, ontology_updates: chunkResult.ontology_updates });
|
|
2451
2632
|
}
|
|
2452
2633
|
const now = Date.now();
|
|
2453
2634
|
const insertedFacts = [];
|
|
@@ -2455,26 +2636,63 @@ var IngestionService = class {
|
|
|
2455
2636
|
await this.db.withTransactionAsync(async (tx) => {
|
|
2456
2637
|
deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
|
|
2457
2638
|
await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2639
|
+
const titleIndex = /* @__PURE__ */ new Map();
|
|
2640
|
+
const pendingEdges = [];
|
|
2641
|
+
const existingFacts = await this.entryRepo.findRecentByEntityId(entityId, 500, tx);
|
|
2642
|
+
for (const existing of existingFacts) {
|
|
2643
|
+
titleIndex.set(normalizeTitleKey(existing.title), {
|
|
2644
|
+
id: existing.id,
|
|
2645
|
+
okf_type: existing.okf_type ?? null
|
|
2646
|
+
});
|
|
2647
|
+
}
|
|
2648
|
+
let ontologyState = await this.ontologyService?.getEffectiveState(entityId, tx) ?? { mode: "off", manifest: { node_types: [], edge_types: [] } };
|
|
2649
|
+
let { mode, manifest } = ontologyState;
|
|
2650
|
+
for (const { facts, ontology_updates } of orderedChunkFacts) {
|
|
2651
|
+
if (mode === "emergent" && ontology_updates && this.ontologyService) {
|
|
2652
|
+
manifest = await this.ontologyService.mergeEmergentUpdates(entityId, ontology_updates, tx);
|
|
2653
|
+
ontologyState = await this.ontologyService.getEffectiveState(entityId, tx);
|
|
2654
|
+
mode = ontologyState.mode;
|
|
2655
|
+
}
|
|
2656
|
+
for (const fact of facts) {
|
|
2657
|
+
const ontologyFact = fact;
|
|
2658
|
+
const normalized = this.ontologyService?.validateAndNormalizeFact(ontologyFact, manifest) ?? { okf_type: null, edges: [] };
|
|
2659
|
+
const id = generateId("fact_");
|
|
2660
|
+
const wikiFact = {
|
|
2661
|
+
id,
|
|
2662
|
+
entity_id: entityId,
|
|
2663
|
+
title: fact.title,
|
|
2664
|
+
body: fact.body,
|
|
2665
|
+
tags: fact.tags,
|
|
2666
|
+
confidence: fact.confidence,
|
|
2667
|
+
source_type: "immutable_document",
|
|
2668
|
+
source_hash: sourceHash,
|
|
2669
|
+
source_ref: sourceRef,
|
|
2670
|
+
created_at: now,
|
|
2671
|
+
updated_at: now,
|
|
2672
|
+
last_accessed_at: null,
|
|
2673
|
+
access_count: 0,
|
|
2674
|
+
deleted_at: null,
|
|
2675
|
+
okf_type: normalized.okf_type
|
|
2676
|
+
};
|
|
2677
|
+
await this.entryRepo.upsert(wikiFact, tx);
|
|
2678
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2679
|
+
titleIndex.set(normalizeTitleKey(fact.title), { id, okf_type: normalized.okf_type });
|
|
2680
|
+
if (normalized.edges.length > 0) {
|
|
2681
|
+
pendingEdges.push({ sourceId: id, sourceType: normalized.okf_type, edges: normalized.edges });
|
|
2682
|
+
}
|
|
2683
|
+
}
|
|
2684
|
+
}
|
|
2685
|
+
for (const item of pendingEdges) {
|
|
2686
|
+
await this.ontologyService?.resolveAndPersistEdges(
|
|
2687
|
+
entityId,
|
|
2688
|
+
item.sourceId,
|
|
2689
|
+
item.sourceType,
|
|
2690
|
+
item.edges ?? [],
|
|
2691
|
+
manifest,
|
|
2692
|
+
titleIndex,
|
|
2693
|
+
tx,
|
|
2694
|
+
now
|
|
2695
|
+
);
|
|
2478
2696
|
}
|
|
2479
2697
|
});
|
|
2480
2698
|
await this.searchService.sync(entityId);
|
|
@@ -2501,7 +2719,7 @@ var IngestionService = class {
|
|
|
2501
2719
|
var FUZZY_THRESHOLD = 0.5;
|
|
2502
2720
|
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
2503
2721
|
var MaintenanceService = class {
|
|
2504
|
-
constructor(db, prefix, options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService, promptService) {
|
|
2722
|
+
constructor(db, prefix, options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService, promptService, ontologyService) {
|
|
2505
2723
|
this.db = db;
|
|
2506
2724
|
this.prefix = prefix;
|
|
2507
2725
|
this.options = options;
|
|
@@ -2512,6 +2730,7 @@ var MaintenanceService = class {
|
|
|
2512
2730
|
this.searchService = searchService;
|
|
2513
2731
|
this.jobManager = jobManager;
|
|
2514
2732
|
this.embeddingService = embeddingService;
|
|
2733
|
+
this.ontologyService = ontologyService;
|
|
2515
2734
|
this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
|
|
2516
2735
|
}
|
|
2517
2736
|
async runPrune(entityId, options) {
|
|
@@ -2734,21 +2953,36 @@ var MaintenanceService = class {
|
|
|
2734
2953
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
2735
2954
|
};
|
|
2736
2955
|
});
|
|
2956
|
+
const ontologyContext = await this.ontologyService?.buildPromptContext(entityId) ?? null;
|
|
2737
2957
|
const { systemPrompt, userPrompt } = this.promptService.buildLibrarianPrompt(
|
|
2738
2958
|
events.reverse(),
|
|
2739
2959
|
currentFacts,
|
|
2740
|
-
promptOverride
|
|
2960
|
+
promptOverride,
|
|
2961
|
+
ontologyContext
|
|
2741
2962
|
);
|
|
2742
2963
|
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2743
2964
|
const result = parseJsonResponse(responseText);
|
|
2744
2965
|
const facts = Array.isArray(result.facts) ? result.facts : [];
|
|
2745
2966
|
const tasks = Array.isArray(result.tasks) ? result.tasks : [];
|
|
2967
|
+
const ontologyUpdates = result.ontology_updates;
|
|
2746
2968
|
const validFacts = facts.map(validateFact).filter((f) => f !== null);
|
|
2747
2969
|
const validTasks = tasks.map(validateTask).filter((t) => t !== null);
|
|
2748
2970
|
const now = Date.now();
|
|
2749
2971
|
const insertedFacts = [];
|
|
2750
2972
|
await this.db.withTransactionAsync(async (tx) => {
|
|
2973
|
+
let { mode, manifest } = await this.ontologyService?.getEffectiveState(entityId, tx) ?? { mode: "off", manifest: { node_types: [], edge_types: [] } };
|
|
2974
|
+
if (mode === "emergent" && ontologyUpdates && this.ontologyService) {
|
|
2975
|
+
manifest = await this.ontologyService.mergeEmergentUpdates(entityId, ontologyUpdates, tx);
|
|
2976
|
+
}
|
|
2977
|
+
const titleIndex = /* @__PURE__ */ new Map();
|
|
2978
|
+
for (const existing of currentFactsRows) {
|
|
2979
|
+
titleIndex.set(normalizeTitleKey(existing.title), {
|
|
2980
|
+
id: existing.id,
|
|
2981
|
+
okf_type: existing.okf_type ?? null
|
|
2982
|
+
});
|
|
2983
|
+
}
|
|
2751
2984
|
const factsForDedupe = await this.entryRepo.findRecentByEntityId(entityId, 100, tx);
|
|
2985
|
+
const pendingEdges = [];
|
|
2752
2986
|
for (const fact of validFacts) {
|
|
2753
2987
|
const newTokens = titleTokens(fact.title);
|
|
2754
2988
|
let skip = false;
|
|
@@ -2765,6 +2999,8 @@ var MaintenanceService = class {
|
|
|
2765
2999
|
}
|
|
2766
3000
|
}
|
|
2767
3001
|
if (skip) continue;
|
|
3002
|
+
const ontologyFact = fact;
|
|
3003
|
+
const normalized = this.ontologyService?.validateAndNormalizeFact(ontologyFact, manifest) ?? { okf_type: null, edges: [] };
|
|
2768
3004
|
const id = generateId("fact_");
|
|
2769
3005
|
const factObj = {
|
|
2770
3006
|
id,
|
|
@@ -2780,11 +3016,28 @@ var MaintenanceService = class {
|
|
|
2780
3016
|
updated_at: now,
|
|
2781
3017
|
last_accessed_at: null,
|
|
2782
3018
|
access_count: 0,
|
|
2783
|
-
deleted_at: null
|
|
3019
|
+
deleted_at: null,
|
|
3020
|
+
okf_type: normalized.okf_type
|
|
2784
3021
|
};
|
|
2785
3022
|
await this.entryRepo.upsert(factObj, tx);
|
|
2786
3023
|
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2787
3024
|
factsForDedupe.push(factObj);
|
|
3025
|
+
titleIndex.set(normalizeTitleKey(fact.title), { id, okf_type: normalized.okf_type });
|
|
3026
|
+
if (normalized.edges.length > 0) {
|
|
3027
|
+
pendingEdges.push({ sourceId: id, sourceType: normalized.okf_type, edges: normalized.edges });
|
|
3028
|
+
}
|
|
3029
|
+
}
|
|
3030
|
+
for (const item of pendingEdges) {
|
|
3031
|
+
await this.ontologyService?.resolveAndPersistEdges(
|
|
3032
|
+
entityId,
|
|
3033
|
+
item.sourceId,
|
|
3034
|
+
item.sourceType,
|
|
3035
|
+
item.edges ?? [],
|
|
3036
|
+
manifest,
|
|
3037
|
+
titleIndex,
|
|
3038
|
+
tx,
|
|
3039
|
+
now
|
|
3040
|
+
);
|
|
2788
3041
|
}
|
|
2789
3042
|
for (const task of validTasks) {
|
|
2790
3043
|
const id = generateId("task_");
|
|
@@ -4121,6 +4374,113 @@ var WriteService = class {
|
|
|
4121
4374
|
}
|
|
4122
4375
|
};
|
|
4123
4376
|
|
|
4377
|
+
// src/prompts/ontology.ts
|
|
4378
|
+
var FACT_ONTOLOGY_FIELDS = `
|
|
4379
|
+
Each fact may optionally include:
|
|
4380
|
+
- "okf_type": string \u2014 must be one of the node_types in the manifest
|
|
4381
|
+
- "edges": [{ "edge_type": string, "target_title": string }] \u2014 target_title must match another fact's title in this response or existing memory`;
|
|
4382
|
+
var EMERGENT_EXTRA = `
|
|
4383
|
+
You may also return "ontology_updates" to propose new types:
|
|
4384
|
+
"ontology_updates": {
|
|
4385
|
+
"node_types": [{ "type": "slug", "description": "..." }],
|
|
4386
|
+
"edge_types": [{ "type": "slug", "source_type": "...", "target_type": "...", "description": "..." }]
|
|
4387
|
+
}
|
|
4388
|
+
Only propose types not already in the manifest. Do not redefine existing types.`;
|
|
4389
|
+
function buildOntologyPromptAppendix(mode, manifestJson) {
|
|
4390
|
+
const strictRules = mode === "strict" ? "STRICT MODE: Use ONLY types defined in the manifest. If a fact does not fit any node_type, omit okf_type and edges entirely." : "EMERGENT MODE: Prefer manifest types. You may propose new types via ontology_updates when necessary.";
|
|
4391
|
+
const fallback = "If okf_type or an edge is invalid, omit them rather than inventing unlisted types.";
|
|
4392
|
+
const ontologyModeInstructions = [
|
|
4393
|
+
"## Ontology constraints",
|
|
4394
|
+
strictRules,
|
|
4395
|
+
fallback,
|
|
4396
|
+
FACT_ONTOLOGY_FIELDS,
|
|
4397
|
+
mode === "emergent" ? EMERGENT_EXTRA : "",
|
|
4398
|
+
"Manifest:",
|
|
4399
|
+
manifestJson
|
|
4400
|
+
].filter(Boolean).join("\n");
|
|
4401
|
+
return { ontologyManifest: manifestJson, ontologyModeInstructions };
|
|
4402
|
+
}
|
|
4403
|
+
|
|
4404
|
+
// src/services/OntologyService.ts
|
|
4405
|
+
var OntologyService = class {
|
|
4406
|
+
constructor(metadataRepo, edgeRepo, ontologyConfig) {
|
|
4407
|
+
this.metadataRepo = metadataRepo;
|
|
4408
|
+
this.edgeRepo = edgeRepo;
|
|
4409
|
+
this.ontologyConfig = ontologyConfig;
|
|
4410
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
4411
|
+
}
|
|
4412
|
+
resolveMode(storedMode) {
|
|
4413
|
+
return storedMode ?? this.ontologyConfig?.mode ?? "off";
|
|
4414
|
+
}
|
|
4415
|
+
invalidateCache(entityId) {
|
|
4416
|
+
this.cache.delete(entityId);
|
|
4417
|
+
}
|
|
4418
|
+
async getEffectiveState(entityId, tx) {
|
|
4419
|
+
if (!tx) {
|
|
4420
|
+
const cached = this.cache.get(entityId);
|
|
4421
|
+
if (cached) return cached;
|
|
4422
|
+
}
|
|
4423
|
+
const row = await this.metadataRepo.getManifest(entityId, tx);
|
|
4424
|
+
if (row) {
|
|
4425
|
+
const state = { mode: this.resolveMode(row.mode), manifest: row.manifest };
|
|
4426
|
+
if (!tx) this.cache.set(entityId, state);
|
|
4427
|
+
return state;
|
|
4428
|
+
}
|
|
4429
|
+
const seed = this.ontologyConfig?.seedManifests?.[entityId];
|
|
4430
|
+
if (seed) {
|
|
4431
|
+
const state = {
|
|
4432
|
+
mode: this.resolveMode(seed.mode),
|
|
4433
|
+
manifest: seed.manifest
|
|
4434
|
+
};
|
|
4435
|
+
if (tx) {
|
|
4436
|
+
await this.metadataRepo.setManifest(entityId, state, tx);
|
|
4437
|
+
} else {
|
|
4438
|
+
this.cache.set(entityId, state);
|
|
4439
|
+
}
|
|
4440
|
+
return state;
|
|
4441
|
+
}
|
|
4442
|
+
return { mode: "off", manifest: emptyManifest() };
|
|
4443
|
+
}
|
|
4444
|
+
async buildPromptContext(entityId) {
|
|
4445
|
+
const { mode, manifest } = await this.getEffectiveState(entityId);
|
|
4446
|
+
if (mode === "off") return null;
|
|
4447
|
+
const manifestJson = JSON.stringify(manifest, null, 2);
|
|
4448
|
+
return buildOntologyPromptAppendix(mode, manifestJson);
|
|
4449
|
+
}
|
|
4450
|
+
async mergeEmergentUpdates(entityId, updates, tx) {
|
|
4451
|
+
const merged = await this.metadataRepo.mergeManifestUpdates(entityId, updates, tx);
|
|
4452
|
+
this.invalidateCache(entityId);
|
|
4453
|
+
return merged;
|
|
4454
|
+
}
|
|
4455
|
+
validateAndNormalizeFact(fact, manifest) {
|
|
4456
|
+
const rawType = typeof fact.okf_type === "string" ? fact.okf_type : "";
|
|
4457
|
+
const canonical = resolveNodeType(rawType, manifest);
|
|
4458
|
+
if (!canonical) return { okf_type: null, edges: [] };
|
|
4459
|
+
const edges = validateInlineEdges(canonical, null, fact.edges ?? [], manifest);
|
|
4460
|
+
return { okf_type: canonical, edges };
|
|
4461
|
+
}
|
|
4462
|
+
async resolveAndPersistEdges(entityId, sourceId, sourceType, edges, manifest, titleIndex, tx, now) {
|
|
4463
|
+
if (!sourceType || edges.length === 0) return;
|
|
4464
|
+
for (const edge of edges) {
|
|
4465
|
+
const def = resolveEdgeDefinition(edge.edge_type, manifest);
|
|
4466
|
+
if (!def || def.source_type.toLowerCase() !== sourceType.toLowerCase()) continue;
|
|
4467
|
+
const targetKey = normalizeTitleKey(edge.target_title);
|
|
4468
|
+
const target = titleIndex.get(targetKey);
|
|
4469
|
+
if (!target) continue;
|
|
4470
|
+
if (def.target_type.toLowerCase() !== (target.okf_type ?? "").toLowerCase()) continue;
|
|
4471
|
+
const wikiEdge = {
|
|
4472
|
+
id: generateId(),
|
|
4473
|
+
entity_id: entityId,
|
|
4474
|
+
source_id: sourceId,
|
|
4475
|
+
target_id: target.id,
|
|
4476
|
+
edge_type: def.type,
|
|
4477
|
+
created_at: now
|
|
4478
|
+
};
|
|
4479
|
+
await this.edgeRepo.addIgnoreDuplicate(wikiEdge, tx);
|
|
4480
|
+
}
|
|
4481
|
+
}
|
|
4482
|
+
};
|
|
4483
|
+
|
|
4124
4484
|
// src/WikiMemory.ts
|
|
4125
4485
|
var TABLE_PREFIX_PATTERN = /^[A-Za-z][A-Za-z0-9_]{0,30}_$/;
|
|
4126
4486
|
var _testAccessNonTestEnvWarned;
|
|
@@ -4142,6 +4502,11 @@ var WikiMemory = class {
|
|
|
4142
4502
|
this.eventRepo = new EventRepository(db, this.prefix);
|
|
4143
4503
|
this.edgeRepo = new EdgeRepository(db, this.prefix);
|
|
4144
4504
|
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
4505
|
+
this.ontologyService = new OntologyService(
|
|
4506
|
+
this.metadataRepo,
|
|
4507
|
+
this.edgeRepo,
|
|
4508
|
+
options.config?.ontology
|
|
4509
|
+
);
|
|
4145
4510
|
this.embeddingService = new EmbeddingService(this.db, this.options, this.entryRepo, this.metadataRepo);
|
|
4146
4511
|
this.searchService = new SearchService(this.entryRepo);
|
|
4147
4512
|
this.jobManager = new JobManager(this.prefix);
|
|
@@ -4154,7 +4519,8 @@ var WikiMemory = class {
|
|
|
4154
4519
|
this.searchService,
|
|
4155
4520
|
this.jobManager,
|
|
4156
4521
|
this.embeddingService,
|
|
4157
|
-
this.promptService
|
|
4522
|
+
this.promptService,
|
|
4523
|
+
this.ontologyService
|
|
4158
4524
|
);
|
|
4159
4525
|
this.maintenanceService = new MaintenanceService(
|
|
4160
4526
|
this.db,
|
|
@@ -4167,7 +4533,8 @@ var WikiMemory = class {
|
|
|
4167
4533
|
this.searchService,
|
|
4168
4534
|
this.jobManager,
|
|
4169
4535
|
this.embeddingService,
|
|
4170
|
-
this.promptService
|
|
4536
|
+
this.promptService,
|
|
4537
|
+
this.ontologyService
|
|
4171
4538
|
);
|
|
4172
4539
|
this.importExportService = new ImportExportService(
|
|
4173
4540
|
this.db,
|
|
@@ -4362,6 +4729,34 @@ var WikiMemory = class {
|
|
|
4362
4729
|
async markOutboxEventsProcessed(eventIds) {
|
|
4363
4730
|
await this.outboxRepo.acknowledge(eventIds);
|
|
4364
4731
|
}
|
|
4732
|
+
/**
|
|
4733
|
+
* Returns the effective ontology mode and manifest for an entity.
|
|
4734
|
+
* Resolution order: persisted DB row → `WikiConfig.ontology.seedManifests[entityId]` → `null`.
|
|
4735
|
+
*/
|
|
4736
|
+
async getOntologyManifest(entityId) {
|
|
4737
|
+
const row = await this.metadataRepo.getManifest(entityId);
|
|
4738
|
+
if (row) return { mode: this.ontologyService.resolveMode(row.mode), manifest: row.manifest };
|
|
4739
|
+
const seed = this.options.config?.ontology?.seedManifests?.[entityId];
|
|
4740
|
+
if (seed) {
|
|
4741
|
+
return {
|
|
4742
|
+
mode: this.ontologyService.resolveMode(seed.mode),
|
|
4743
|
+
manifest: seed.manifest
|
|
4744
|
+
};
|
|
4745
|
+
}
|
|
4746
|
+
return null;
|
|
4747
|
+
}
|
|
4748
|
+
/**
|
|
4749
|
+
* Seeds or replaces an entity's ontology manifest and optional mode override.
|
|
4750
|
+
* Validates manifest invariants (unique type slugs, edge endpoints reference node types).
|
|
4751
|
+
* Invalidates the in-memory ontology cache for this entity.
|
|
4752
|
+
*/
|
|
4753
|
+
async setOntologyManifest(entityId, manifest, options) {
|
|
4754
|
+
const mode = options?.mode ?? this.ontologyService.resolveMode();
|
|
4755
|
+
await this.db.withTransactionAsync(
|
|
4756
|
+
(tx) => this.metadataRepo.setManifest(entityId, { mode, manifest }, tx)
|
|
4757
|
+
);
|
|
4758
|
+
this.ontologyService.invalidateCache(entityId);
|
|
4759
|
+
}
|
|
4365
4760
|
};
|
|
4366
4761
|
_testAccessNonTestEnvWarned = new WeakMap();
|
|
4367
4762
|
|