@hasna/knowledge 0.2.21 → 0.2.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/open-knowledge-mcp.js +583 -11
- package/bin/open-knowledge.js +140 -70
- package/docs/architecture/ai-native-knowledge-base.md +8 -0
- package/package.json +1 -1
- package/src/cli.ts +44 -6
- package/src/service.ts +44 -0
- package/src/wiki-compiler.ts +711 -0
package/README.md
CHANGED
|
@@ -76,6 +76,11 @@ open-knowledge db init --scope project
|
|
|
76
76
|
# Initialize scalable wiki/schema/index/log artifacts
|
|
77
77
|
open-knowledge wiki init --scope project
|
|
78
78
|
|
|
79
|
+
# Compile cited wiki pages, file approved answers, and lint wiki health
|
|
80
|
+
open-knowledge wiki compile "handbook policy" --title "Handbook Policy" --scope project --json
|
|
81
|
+
open-knowledge wiki file-answer "How do we cite policy?" --content "Use cited source context." --approve-write --scope project --json
|
|
82
|
+
open-knowledge wiki lint --scope project --json
|
|
83
|
+
|
|
79
84
|
# Ingest an open-files source manifest into the project SQLite catalog
|
|
80
85
|
open-knowledge ingest manifest ./open-files-manifest.jsonl --scope project --json
|
|
81
86
|
|
|
@@ -237,11 +242,22 @@ Initialize or inspect the versioned SQLite catalog at
|
|
|
237
242
|
### wiki
|
|
238
243
|
```bash
|
|
239
244
|
open-knowledge wiki init [--scope project]
|
|
245
|
+
open-knowledge wiki compile [query|source-ref...] [--title <title>] [--limit <n>] [--scope project] [--json]
|
|
246
|
+
open-knowledge wiki file-answer <prompt> --content <answer> [--approve-write] [--scope project] [--json]
|
|
247
|
+
open-knowledge wiki lint [--scope project] [--json]
|
|
240
248
|
```
|
|
241
249
|
Create starter generated-knowledge artifacts through the artifact store:
|
|
242
250
|
`schemas/v1.md`, `indexes/root.md`, `wiki/README.md`, and a dated JSONL log
|
|
243
251
|
partition.
|
|
244
252
|
|
|
253
|
+
`wiki compile` turns existing source chunks into a cited Markdown page under
|
|
254
|
+
`wiki/generated/`, updates `knowledge_indexes`, records citations and a concept
|
|
255
|
+
backlink, and appends a JSONL log partition. `wiki file-answer` keeps answer
|
|
256
|
+
filing as a dry run unless `--approve-write` is supplied, then writes a cited
|
|
257
|
+
answer note under `wiki/answers/`. `wiki lint` checks generated pages for
|
|
258
|
+
missing citations, stale citations, duplicate titles, orphan pages, unresolved
|
|
259
|
+
source refs, contradiction markers, and new article candidates.
|
|
260
|
+
|
|
245
261
|
### source
|
|
246
262
|
```bash
|
|
247
263
|
open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope project] [--json]
|
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync8, readFileSync as readFileSync8, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.22",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -18062,6 +18062,9 @@ async function runProviderWebSearch(options) {
|
|
|
18062
18062
|
};
|
|
18063
18063
|
}
|
|
18064
18064
|
|
|
18065
|
+
// src/wiki-compiler.ts
|
|
18066
|
+
import { createHash as createHash10, randomUUID as randomUUID9 } from "crypto";
|
|
18067
|
+
|
|
18065
18068
|
// src/storage-contract.ts
|
|
18066
18069
|
import { createHash as createHash9, randomUUID as randomUUID8 } from "crypto";
|
|
18067
18070
|
var GENERATED_ARTIFACTS = [
|
|
@@ -18249,18 +18252,556 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
18249
18252
|
insert(objects);
|
|
18250
18253
|
}
|
|
18251
18254
|
|
|
18252
|
-
// src/wiki-
|
|
18253
|
-
|
|
18255
|
+
// src/wiki-compiler.ts
|
|
18256
|
+
function stableId6(prefix, value) {
|
|
18257
|
+
return `${prefix}_${createHash10("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
18258
|
+
}
|
|
18259
|
+
function slugify2(value) {
|
|
18260
|
+
const slug = value.normalize("NFKC").toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
|
|
18261
|
+
return slug || "knowledge-page";
|
|
18262
|
+
}
|
|
18254
18263
|
function todayParts(now) {
|
|
18264
|
+
return {
|
|
18265
|
+
year: String(now.getUTCFullYear()),
|
|
18266
|
+
month: String(now.getUTCMonth() + 1).padStart(2, "0"),
|
|
18267
|
+
day: String(now.getUTCDate()).padStart(2, "0")
|
|
18268
|
+
};
|
|
18269
|
+
}
|
|
18270
|
+
function estimateTokenCount2(text) {
|
|
18271
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
18272
|
+
return Math.max(1, Math.ceil(words * 1.25));
|
|
18273
|
+
}
|
|
18274
|
+
function parseJsonObject4(value) {
|
|
18275
|
+
if (!value)
|
|
18276
|
+
return {};
|
|
18277
|
+
try {
|
|
18278
|
+
const parsed = JSON.parse(value);
|
|
18279
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
|
18280
|
+
} catch {
|
|
18281
|
+
return {};
|
|
18282
|
+
}
|
|
18283
|
+
}
|
|
18284
|
+
function queryTerms3(query) {
|
|
18285
|
+
return Array.from(new Set((query ?? "").toLowerCase().match(/[\p{L}\p{N}_]+/gu) ?? [])).slice(0, 12);
|
|
18286
|
+
}
|
|
18287
|
+
function escapeLike(value) {
|
|
18288
|
+
return value.replace(/[\\%_]/g, (char) => `\\${char}`);
|
|
18289
|
+
}
|
|
18290
|
+
function selectSourceChunks(db, options) {
|
|
18291
|
+
const limit = Math.max(1, Math.min(options.limit ?? 10, 50));
|
|
18292
|
+
const sourceRefs = options.sourceRefs ?? [];
|
|
18293
|
+
const terms = queryTerms3(options.query);
|
|
18294
|
+
const where = ["c.kind = 'source'"];
|
|
18295
|
+
const params = [];
|
|
18296
|
+
if (sourceRefs.length > 0) {
|
|
18297
|
+
where.push(`(${sourceRefs.map(() => "(s.uri = ? OR c.metadata_json LIKE ?)").join(" OR ")})`);
|
|
18298
|
+
for (const ref of sourceRefs) {
|
|
18299
|
+
params.push(ref, `%${escapeLike(ref)}%`);
|
|
18300
|
+
}
|
|
18301
|
+
}
|
|
18302
|
+
if (terms.length > 0) {
|
|
18303
|
+
where.push(`(${terms.map(() => "lower(c.text) LIKE ? ESCAPE '\\'").join(" OR ")})`);
|
|
18304
|
+
for (const term of terms)
|
|
18305
|
+
params.push(`%${escapeLike(term)}%`);
|
|
18306
|
+
}
|
|
18307
|
+
params.push(limit);
|
|
18308
|
+
return db.query(`SELECT
|
|
18309
|
+
c.id AS chunk_id,
|
|
18310
|
+
c.text,
|
|
18311
|
+
c.start_offset,
|
|
18312
|
+
c.end_offset,
|
|
18313
|
+
c.metadata_json,
|
|
18314
|
+
c.source_revision_id,
|
|
18315
|
+
sr.revision,
|
|
18316
|
+
sr.hash,
|
|
18317
|
+
s.uri AS source_uri,
|
|
18318
|
+
s.title AS source_title
|
|
18319
|
+
FROM chunks c
|
|
18320
|
+
JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
18321
|
+
JOIN sources s ON s.id = sr.source_id
|
|
18322
|
+
WHERE ${where.join(" AND ")}
|
|
18323
|
+
ORDER BY c.created_at ASC, c.ordinal ASC
|
|
18324
|
+
LIMIT ?`).all(...params);
|
|
18325
|
+
}
|
|
18326
|
+
function excerpt(text, max = 420) {
|
|
18327
|
+
const normalized = text.replace(/\s+/g, " ").trim();
|
|
18328
|
+
return normalized.length <= max ? normalized : `${normalized.slice(0, max - 1).trim()}...`;
|
|
18329
|
+
}
|
|
18330
|
+
function titleFor(options, rows) {
|
|
18331
|
+
if (options.title?.trim())
|
|
18332
|
+
return options.title.trim();
|
|
18333
|
+
if (options.query?.trim())
|
|
18334
|
+
return options.query.trim();
|
|
18335
|
+
return rows[0]?.source_title ?? "Compiled Knowledge";
|
|
18336
|
+
}
|
|
18337
|
+
function compileBody(title, rows, now) {
|
|
18338
|
+
const sourceLines = rows.map((row, index) => {
|
|
18339
|
+
const label = `S${index + 1}`;
|
|
18340
|
+
return `- [${label}] ${row.source_title ?? row.source_uri ?? "Source"} (${row.source_uri ?? "unknown"}, revision ${row.revision ?? "unknown"}, hash ${row.hash ?? "unknown"})`;
|
|
18341
|
+
});
|
|
18342
|
+
const noteLines = rows.map((row, index) => {
|
|
18343
|
+
const label = `S${index + 1}`;
|
|
18344
|
+
return [
|
|
18345
|
+
`## ${row.source_title ?? `Source ${index + 1}`}`,
|
|
18346
|
+
"",
|
|
18347
|
+
excerpt(row.text),
|
|
18348
|
+
"",
|
|
18349
|
+
`Citation: [${label}]`
|
|
18350
|
+
].join(`
|
|
18351
|
+
`);
|
|
18352
|
+
});
|
|
18353
|
+
return [
|
|
18354
|
+
`# ${title}`,
|
|
18355
|
+
"",
|
|
18356
|
+
`Generated at: ${now}`,
|
|
18357
|
+
"",
|
|
18358
|
+
"## Sources",
|
|
18359
|
+
"",
|
|
18360
|
+
...sourceLines,
|
|
18361
|
+
"",
|
|
18362
|
+
...noteLines,
|
|
18363
|
+
""
|
|
18364
|
+
].join(`
|
|
18365
|
+
`);
|
|
18366
|
+
}
|
|
18367
|
+
async function writeArtifact(store, entry) {
|
|
18368
|
+
const written = await store.put(entry);
|
|
18369
|
+
return {
|
|
18370
|
+
key: written.key,
|
|
18371
|
+
uri: written.uri,
|
|
18372
|
+
kind: entry.key.startsWith("logs/") ? "log" : "wiki_page",
|
|
18373
|
+
content_type: entry.content_type,
|
|
18374
|
+
...hashArtifactBody(entry.body),
|
|
18375
|
+
metadata: {
|
|
18376
|
+
...entry.metadata ?? {}
|
|
18377
|
+
}
|
|
18378
|
+
};
|
|
18379
|
+
}
|
|
18380
|
+
async function appendLog(store, event, now) {
|
|
18381
|
+
const { year, month, day } = todayParts(now);
|
|
18382
|
+
const key = `logs/${year}/${month}/${day}.jsonl`;
|
|
18383
|
+
let existing = "";
|
|
18384
|
+
try {
|
|
18385
|
+
existing = await store.getText(key);
|
|
18386
|
+
} catch {
|
|
18387
|
+
existing = "";
|
|
18388
|
+
}
|
|
18389
|
+
return writeArtifact(store, {
|
|
18390
|
+
key,
|
|
18391
|
+
body: `${existing}${JSON.stringify(event)}
|
|
18392
|
+
`,
|
|
18393
|
+
content_type: "application/x-ndjson"
|
|
18394
|
+
});
|
|
18395
|
+
}
|
|
18396
|
+
function upsertWikiPage(db, input) {
|
|
18397
|
+
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
18398
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
18399
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
18400
|
+
title = excluded.title,
|
|
18401
|
+
artifact_uri = excluded.artifact_uri,
|
|
18402
|
+
content_hash = excluded.content_hash,
|
|
18403
|
+
status = excluded.status,
|
|
18404
|
+
metadata_json = excluded.metadata_json,
|
|
18405
|
+
updated_at = excluded.updated_at`, [
|
|
18406
|
+
input.pageId,
|
|
18407
|
+
input.path,
|
|
18408
|
+
input.title,
|
|
18409
|
+
input.artifactUri,
|
|
18410
|
+
input.contentHash,
|
|
18411
|
+
"active",
|
|
18412
|
+
JSON.stringify({
|
|
18413
|
+
artifact_key: input.path,
|
|
18414
|
+
provenance: input.provenance
|
|
18415
|
+
}),
|
|
18416
|
+
input.now,
|
|
18417
|
+
input.now
|
|
18418
|
+
]);
|
|
18419
|
+
const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(input.pageId);
|
|
18420
|
+
for (const row of existing)
|
|
18421
|
+
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
|
|
18422
|
+
db.run("DELETE FROM chunks WHERE wiki_page_id = ?", [input.pageId]);
|
|
18423
|
+
const chunkId = stableId6("chk", `${input.pageId}\x00${input.contentHash}`);
|
|
18424
|
+
db.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
18425
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
18426
|
+
chunkId,
|
|
18427
|
+
input.pageId,
|
|
18428
|
+
"wiki",
|
|
18429
|
+
0,
|
|
18430
|
+
input.body,
|
|
18431
|
+
estimateTokenCount2(input.body),
|
|
18432
|
+
0,
|
|
18433
|
+
input.body.length,
|
|
18434
|
+
JSON.stringify({
|
|
18435
|
+
artifact_key: input.path,
|
|
18436
|
+
artifact_uri: input.artifactUri,
|
|
18437
|
+
content_hash: input.contentHash,
|
|
18438
|
+
provenance: input.provenance
|
|
18439
|
+
}),
|
|
18440
|
+
input.now
|
|
18441
|
+
]);
|
|
18442
|
+
db.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)", [
|
|
18443
|
+
chunkId,
|
|
18444
|
+
input.body,
|
|
18445
|
+
input.title,
|
|
18446
|
+
input.artifactUri
|
|
18447
|
+
]);
|
|
18448
|
+
}
|
|
18449
|
+
function replacePageCitations(db, pageId, citations, now) {
|
|
18450
|
+
db.run("DELETE FROM citations WHERE wiki_page_id = ?", [pageId]);
|
|
18451
|
+
for (const citation of citations) {
|
|
18452
|
+
db.run(`INSERT INTO citations (id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset, metadata_json, created_at)
|
|
18453
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
18454
|
+
stableId6("cit", `${pageId}\x00${citation.source_uri}\x00${citation.chunk_id ?? randomUUID9()}`),
|
|
18455
|
+
pageId,
|
|
18456
|
+
citation.chunk_id,
|
|
18457
|
+
citation.source_uri,
|
|
18458
|
+
citation.quote,
|
|
18459
|
+
citation.start_offset,
|
|
18460
|
+
citation.end_offset,
|
|
18461
|
+
JSON.stringify(citation.metadata),
|
|
18462
|
+
now
|
|
18463
|
+
]);
|
|
18464
|
+
}
|
|
18465
|
+
return citations.length;
|
|
18466
|
+
}
|
|
18467
|
+
function upsertIndex(db, input) {
|
|
18468
|
+
db.run(`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
18469
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
18470
|
+
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
18471
|
+
artifact_uri = excluded.artifact_uri,
|
|
18472
|
+
metadata_json = excluded.metadata_json,
|
|
18473
|
+
updated_at = excluded.updated_at`, [
|
|
18474
|
+
stableId6("idx", `wiki-topic\x00${input.path}`),
|
|
18475
|
+
"wiki_topic",
|
|
18476
|
+
input.title,
|
|
18477
|
+
input.artifactUri,
|
|
18478
|
+
input.path,
|
|
18479
|
+
JSON.stringify({
|
|
18480
|
+
artifact_key: input.path,
|
|
18481
|
+
content_hash: input.contentHash
|
|
18482
|
+
}),
|
|
18483
|
+
input.now,
|
|
18484
|
+
input.now
|
|
18485
|
+
]);
|
|
18486
|
+
return 1;
|
|
18487
|
+
}
|
|
18488
|
+
function firstConcept(title) {
|
|
18489
|
+
return title.toLowerCase().match(/[a-z0-9][a-z0-9-]{2,}/)?.[0] ?? "knowledge";
|
|
18490
|
+
}
|
|
18491
|
+
async function compileWikiPage(options) {
|
|
18492
|
+
const nowDate = options.now ?? new Date;
|
|
18493
|
+
const now = nowDate.toISOString();
|
|
18494
|
+
migrateKnowledgeDb(options.dbPath);
|
|
18495
|
+
const readDb = openKnowledgeDb(options.dbPath);
|
|
18496
|
+
let rows;
|
|
18497
|
+
try {
|
|
18498
|
+
rows = selectSourceChunks(readDb, options);
|
|
18499
|
+
} finally {
|
|
18500
|
+
readDb.close();
|
|
18501
|
+
}
|
|
18502
|
+
if (rows.length === 0)
|
|
18503
|
+
throw new Error("No source chunks matched wiki compile input.");
|
|
18504
|
+
const title = titleFor(options, rows);
|
|
18505
|
+
const slug = slugify2(title);
|
|
18506
|
+
const path = `wiki/generated/${slug}.md`;
|
|
18507
|
+
const body = compileBody(title, rows, now);
|
|
18508
|
+
const sourceRefs = rows.map((row) => {
|
|
18509
|
+
const metadata = parseJsonObject4(row.metadata_json);
|
|
18510
|
+
return typeof metadata.source_ref === "string" ? metadata.source_ref : row.source_uri;
|
|
18511
|
+
}).filter((ref) => Boolean(ref));
|
|
18512
|
+
const provenance = generatedArtifactProvenance({
|
|
18513
|
+
generated_from: "wiki_compile",
|
|
18514
|
+
artifact_key: path,
|
|
18515
|
+
source_refs: sourceRefs
|
|
18516
|
+
});
|
|
18517
|
+
const pageArtifact = await writeArtifact(options.store, {
|
|
18518
|
+
key: path,
|
|
18519
|
+
body,
|
|
18520
|
+
content_type: "text/markdown",
|
|
18521
|
+
metadata: { generated_from: "wiki_compile" }
|
|
18522
|
+
});
|
|
18523
|
+
const pageId = stableId6("wiki", path);
|
|
18524
|
+
const citations = rows.map((row) => ({
|
|
18525
|
+
chunk_id: row.chunk_id,
|
|
18526
|
+
source_uri: row.source_uri ?? "unknown",
|
|
18527
|
+
quote: excerpt(row.text, 240),
|
|
18528
|
+
start_offset: row.start_offset,
|
|
18529
|
+
end_offset: row.end_offset,
|
|
18530
|
+
metadata: {
|
|
18531
|
+
source_revision_id: row.source_revision_id,
|
|
18532
|
+
revision: row.revision,
|
|
18533
|
+
hash: row.hash,
|
|
18534
|
+
source_ref: parseJsonObject4(row.metadata_json).source_ref ?? row.source_uri
|
|
18535
|
+
}
|
|
18536
|
+
}));
|
|
18537
|
+
const concept = firstConcept(title);
|
|
18538
|
+
const conceptPath = `wiki/concepts/${slugify2(concept)}.md`;
|
|
18539
|
+
const conceptBody = [`# ${concept}`, "", `Related page: [[${path}]]`, ""].join(`
|
|
18540
|
+
`);
|
|
18541
|
+
const conceptProvenance = generatedArtifactProvenance({
|
|
18542
|
+
generated_from: "wiki_compile_concept",
|
|
18543
|
+
artifact_key: conceptPath,
|
|
18544
|
+
source_refs: sourceRefs
|
|
18545
|
+
});
|
|
18546
|
+
const conceptArtifact = await writeArtifact(options.store, {
|
|
18547
|
+
key: conceptPath,
|
|
18548
|
+
body: conceptBody,
|
|
18549
|
+
content_type: "text/markdown",
|
|
18550
|
+
metadata: { generated_from: "wiki_compile_concept" }
|
|
18551
|
+
});
|
|
18552
|
+
const conceptPageId = stableId6("wiki", conceptPath);
|
|
18553
|
+
const log = await appendLog(options.store, {
|
|
18554
|
+
ts: now,
|
|
18555
|
+
event: "wiki_compile_completed",
|
|
18556
|
+
page_key: path,
|
|
18557
|
+
source_refs: sourceRefs,
|
|
18558
|
+
chunks_seen: rows.length
|
|
18559
|
+
}, nowDate);
|
|
18560
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
18561
|
+
try {
|
|
18562
|
+
recordStorageObjects(db, [pageArtifact, conceptArtifact, log], nowDate);
|
|
18563
|
+
upsertWikiPage(db, {
|
|
18564
|
+
pageId,
|
|
18565
|
+
path,
|
|
18566
|
+
title,
|
|
18567
|
+
artifactUri: pageArtifact.uri,
|
|
18568
|
+
contentHash: pageArtifact.hash ?? "",
|
|
18569
|
+
body,
|
|
18570
|
+
provenance,
|
|
18571
|
+
now
|
|
18572
|
+
});
|
|
18573
|
+
upsertWikiPage(db, {
|
|
18574
|
+
pageId: conceptPageId,
|
|
18575
|
+
path: conceptPath,
|
|
18576
|
+
title: concept,
|
|
18577
|
+
artifactUri: conceptArtifact.uri,
|
|
18578
|
+
contentHash: conceptArtifact.hash ?? "",
|
|
18579
|
+
body: conceptBody,
|
|
18580
|
+
provenance: conceptProvenance,
|
|
18581
|
+
now
|
|
18582
|
+
});
|
|
18583
|
+
db.run(`INSERT OR REPLACE INTO wiki_backlinks (from_page_id, to_page_id, label, created_at)
|
|
18584
|
+
VALUES (?, ?, ?, ?)`, [pageId, conceptPageId, "concept", now]);
|
|
18585
|
+
const citationsWritten = replacePageCitations(db, pageId, citations, now);
|
|
18586
|
+
const indexesUpdated = upsertIndex(db, {
|
|
18587
|
+
title,
|
|
18588
|
+
path,
|
|
18589
|
+
artifactUri: pageArtifact.uri,
|
|
18590
|
+
contentHash: pageArtifact.hash ?? "",
|
|
18591
|
+
now
|
|
18592
|
+
});
|
|
18593
|
+
return {
|
|
18594
|
+
page_id: pageId,
|
|
18595
|
+
path,
|
|
18596
|
+
artifact_uri: pageArtifact.uri,
|
|
18597
|
+
content_hash: pageArtifact.hash ?? "",
|
|
18598
|
+
chunks_seen: rows.length,
|
|
18599
|
+
citations_written: citationsWritten,
|
|
18600
|
+
concept_page_id: conceptPageId,
|
|
18601
|
+
indexes_updated: indexesUpdated,
|
|
18602
|
+
log_key: log.key,
|
|
18603
|
+
warnings: []
|
|
18604
|
+
};
|
|
18605
|
+
} finally {
|
|
18606
|
+
db.close();
|
|
18607
|
+
}
|
|
18608
|
+
}
|
|
18609
|
+
async function fileAnswerToWiki(options) {
|
|
18610
|
+
if (!options.approveWrite) {
|
|
18611
|
+
return {
|
|
18612
|
+
approved: false,
|
|
18613
|
+
durable_writes_performed: false,
|
|
18614
|
+
page_id: null,
|
|
18615
|
+
path: null,
|
|
18616
|
+
artifact_uri: null,
|
|
18617
|
+
citations_written: 0,
|
|
18618
|
+
log_key: null,
|
|
18619
|
+
message: "Dry-run: answer filing requires --approve-write."
|
|
18620
|
+
};
|
|
18621
|
+
}
|
|
18622
|
+
const nowDate = options.now ?? new Date;
|
|
18623
|
+
const now = nowDate.toISOString();
|
|
18624
|
+
const title = options.prompt.length > 80 ? `${options.prompt.slice(0, 77)}...` : options.prompt;
|
|
18625
|
+
const slug = slugify2(title);
|
|
18626
|
+
const path = `wiki/answers/${slug}.md`;
|
|
18627
|
+
const citations = options.context.citations;
|
|
18628
|
+
const body = [
|
|
18629
|
+
`# ${title}`,
|
|
18630
|
+
"",
|
|
18631
|
+
options.answer,
|
|
18632
|
+
"",
|
|
18633
|
+
"## Citations",
|
|
18634
|
+
"",
|
|
18635
|
+
...citations.map((citation, index) => `- [C${index + 1}] ${citation.source_ref ?? citation.source_uri ?? citation.artifact_path ?? citation.artifact_uri ?? "unknown"} ${citation.hash ? `(hash ${citation.hash})` : ""}`),
|
|
18636
|
+
""
|
|
18637
|
+
].join(`
|
|
18638
|
+
`);
|
|
18639
|
+
const sourceRefs = citations.map((citation) => citation.source_ref ?? citation.source_uri).filter((ref) => Boolean(ref));
|
|
18640
|
+
const provenance = generatedArtifactProvenance({
|
|
18641
|
+
generated_from: "knowledge_answer",
|
|
18642
|
+
artifact_key: path,
|
|
18643
|
+
source_refs: sourceRefs
|
|
18644
|
+
});
|
|
18645
|
+
const artifact = await writeArtifact(options.store, {
|
|
18646
|
+
key: path,
|
|
18647
|
+
body,
|
|
18648
|
+
content_type: "text/markdown",
|
|
18649
|
+
metadata: { generated_from: "knowledge_answer" }
|
|
18650
|
+
});
|
|
18651
|
+
const log = await appendLog(options.store, {
|
|
18652
|
+
ts: now,
|
|
18653
|
+
event: "wiki_answer_filed",
|
|
18654
|
+
page_key: path,
|
|
18655
|
+
prompt: options.prompt,
|
|
18656
|
+
citations: citations.length
|
|
18657
|
+
}, nowDate);
|
|
18658
|
+
const pageId = stableId6("wiki", path);
|
|
18659
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
18660
|
+
try {
|
|
18661
|
+
recordStorageObjects(db, [artifact, log], nowDate);
|
|
18662
|
+
upsertWikiPage(db, {
|
|
18663
|
+
pageId,
|
|
18664
|
+
path,
|
|
18665
|
+
title,
|
|
18666
|
+
artifactUri: artifact.uri,
|
|
18667
|
+
contentHash: artifact.hash ?? "",
|
|
18668
|
+
body,
|
|
18669
|
+
provenance,
|
|
18670
|
+
now
|
|
18671
|
+
});
|
|
18672
|
+
const written = replacePageCitations(db, pageId, citations.map((citation) => ({
|
|
18673
|
+
chunk_id: citation.chunk_id,
|
|
18674
|
+
source_uri: citation.source_uri ?? citation.artifact_uri ?? "unknown",
|
|
18675
|
+
quote: citation.quote,
|
|
18676
|
+
start_offset: citation.start_offset,
|
|
18677
|
+
end_offset: citation.end_offset,
|
|
18678
|
+
metadata: {
|
|
18679
|
+
source_ref: citation.source_ref,
|
|
18680
|
+
artifact_path: citation.artifact_path,
|
|
18681
|
+
revision: citation.revision,
|
|
18682
|
+
hash: citation.hash
|
|
18683
|
+
}
|
|
18684
|
+
})), now);
|
|
18685
|
+
upsertIndex(db, {
|
|
18686
|
+
title,
|
|
18687
|
+
path,
|
|
18688
|
+
artifactUri: artifact.uri,
|
|
18689
|
+
contentHash: artifact.hash ?? "",
|
|
18690
|
+
now
|
|
18691
|
+
});
|
|
18692
|
+
return {
|
|
18693
|
+
approved: true,
|
|
18694
|
+
durable_writes_performed: true,
|
|
18695
|
+
page_id: pageId,
|
|
18696
|
+
path,
|
|
18697
|
+
artifact_uri: artifact.uri,
|
|
18698
|
+
citations_written: written,
|
|
18699
|
+
log_key: log.key,
|
|
18700
|
+
message: `Filed answer to ${path}`
|
|
18701
|
+
};
|
|
18702
|
+
} finally {
|
|
18703
|
+
db.close();
|
|
18704
|
+
}
|
|
18705
|
+
}
|
|
18706
|
+
function addIssue(issues, issue2) {
|
|
18707
|
+
issues.push(issue2);
|
|
18708
|
+
}
|
|
18709
|
+
function lintWiki(options) {
|
|
18710
|
+
migrateKnowledgeDb(options.dbPath);
|
|
18711
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
18712
|
+
const issues = [];
|
|
18713
|
+
try {
|
|
18714
|
+
const activePages = db.query("SELECT COUNT(*) AS n FROM wiki_pages WHERE status = 'active'").get()?.n ?? 0;
|
|
18715
|
+
const citationCount = db.query("SELECT COUNT(*) AS n FROM citations").get()?.n ?? 0;
|
|
18716
|
+
const backlinkCount = db.query("SELECT COUNT(*) AS n FROM wiki_backlinks").get()?.n ?? 0;
|
|
18717
|
+
const missingCitations = db.query(`SELECT wp.id, wp.path
|
|
18718
|
+
FROM wiki_pages wp
|
|
18719
|
+
LEFT JOIN citations c ON c.wiki_page_id = wp.id
|
|
18720
|
+
WHERE wp.status = 'active' AND wp.path LIKE 'wiki/generated/%'
|
|
18721
|
+
GROUP BY wp.id
|
|
18722
|
+
HAVING COUNT(c.id) = 0`).all();
|
|
18723
|
+
for (const page of missingCitations) {
|
|
18724
|
+
addIssue(issues, { type: "missing_citation", severity: "error", page_id: page.id, path: page.path, message: "Generated wiki page has no citations." });
|
|
18725
|
+
}
|
|
18726
|
+
const stale = db.query(`SELECT wp.id AS page_id, wp.path, c.source_uri, c.chunk_id
|
|
18727
|
+
FROM citations c
|
|
18728
|
+
JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
18729
|
+
LEFT JOIN chunks ch ON ch.id = c.chunk_id
|
|
18730
|
+
WHERE ch.metadata_json LIKE '%"stale":true%' OR ch.metadata_json LIKE '%"status":"stale"%' OR ch.metadata_json LIKE '%"status":"deleted"%'`).all();
|
|
18731
|
+
for (const row of stale) {
|
|
18732
|
+
addIssue(issues, { type: "stale_citation", severity: "warn", page_id: row.page_id, path: row.path, source_uri: row.source_uri, chunk_id: row.chunk_id ?? undefined, message: "Page cites a stale or deleted source chunk." });
|
|
18733
|
+
}
|
|
18734
|
+
const duplicates = db.query(`SELECT lower(title) AS title, COUNT(*) AS n
|
|
18735
|
+
FROM wiki_pages
|
|
18736
|
+
WHERE status = 'active'
|
|
18737
|
+
GROUP BY lower(title)
|
|
18738
|
+
HAVING COUNT(*) > 1`).all();
|
|
18739
|
+
for (const row of duplicates) {
|
|
18740
|
+
addIssue(issues, { type: "duplicate_page", severity: "warn", message: `Duplicate active wiki title: ${row.title} (${row.n} pages).` });
|
|
18741
|
+
}
|
|
18742
|
+
const orphans = db.query(`SELECT wp.id, wp.path
|
|
18743
|
+
FROM wiki_pages wp
|
|
18744
|
+
LEFT JOIN wiki_backlinks wb1 ON wb1.from_page_id = wp.id
|
|
18745
|
+
LEFT JOIN wiki_backlinks wb2 ON wb2.to_page_id = wp.id
|
|
18746
|
+
WHERE wp.status = 'active'
|
|
18747
|
+
AND wp.path NOT IN ('wiki/README.md')
|
|
18748
|
+
GROUP BY wp.id
|
|
18749
|
+
HAVING COUNT(wb1.to_page_id) = 0 AND COUNT(wb2.from_page_id) = 0`).all();
|
|
18750
|
+
for (const page of orphans) {
|
|
18751
|
+
addIssue(issues, { type: "orphan_page", severity: "info", page_id: page.id, path: page.path, message: "Wiki page has no backlinks." });
|
|
18752
|
+
}
|
|
18753
|
+
const unresolved = db.query(`SELECT wp.id AS page_id, wp.path, c.source_uri
|
|
18754
|
+
FROM citations c
|
|
18755
|
+
JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
18756
|
+
LEFT JOIN sources s ON s.uri = c.source_uri
|
|
18757
|
+
WHERE s.id IS NULL AND c.source_uri NOT LIKE 'file://%' AND c.source_uri NOT LIKE 's3://%' AND c.source_uri NOT LIKE 'https://%' AND c.source_uri NOT LIKE 'open-files://%'`).all();
|
|
18758
|
+
for (const row of unresolved) {
|
|
18759
|
+
addIssue(issues, { type: "unresolved_source_ref", severity: "error", page_id: row.page_id, path: row.path, source_uri: row.source_uri, message: "Citation source URI cannot be resolved to a known or allowed source ref." });
|
|
18760
|
+
}
|
|
18761
|
+
const contradictions = db.query(`SELECT id, path FROM wiki_pages WHERE lower(metadata_json) LIKE '%contradiction%'`).all();
|
|
18762
|
+
for (const page of contradictions) {
|
|
18763
|
+
addIssue(issues, { type: "contradiction_marker", severity: "warn", page_id: page.id, path: page.path, message: "Page metadata contains a contradiction marker." });
|
|
18764
|
+
}
|
|
18765
|
+
const newArticleCandidates = db.query(`SELECT c.id AS chunk_id, s.uri AS source_uri
|
|
18766
|
+
FROM chunks c
|
|
18767
|
+
JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
18768
|
+
JOIN sources s ON s.id = sr.source_id
|
|
18769
|
+
LEFT JOIN citations cit ON cit.chunk_id = c.id
|
|
18770
|
+
WHERE c.kind = 'source'
|
|
18771
|
+
GROUP BY c.id
|
|
18772
|
+
HAVING COUNT(cit.id) = 0
|
|
18773
|
+
LIMIT 25`).all();
|
|
18774
|
+
for (const row of newArticleCandidates) {
|
|
18775
|
+
addIssue(issues, { type: "new_article_candidate", severity: "info", chunk_id: row.chunk_id, source_uri: row.source_uri ?? undefined, message: "Source chunk is indexed but not cited by any wiki page yet." });
|
|
18776
|
+
}
|
|
18777
|
+
return {
|
|
18778
|
+
ok: issues.every((issue2) => issue2.severity !== "error"),
|
|
18779
|
+
issue_count: issues.length,
|
|
18780
|
+
issues,
|
|
18781
|
+
counts: {
|
|
18782
|
+
active_pages: activePages,
|
|
18783
|
+
citations: citationCount,
|
|
18784
|
+
backlinks: backlinkCount,
|
|
18785
|
+
new_article_candidates: newArticleCandidates.length
|
|
18786
|
+
}
|
|
18787
|
+
};
|
|
18788
|
+
} finally {
|
|
18789
|
+
db.close();
|
|
18790
|
+
}
|
|
18791
|
+
}
|
|
18792
|
+
|
|
18793
|
+
// src/wiki-layout.ts
|
|
18794
|
+
import { createHash as createHash11 } from "crypto";
|
|
18795
|
+
function todayParts2(now) {
|
|
18255
18796
|
const year = String(now.getUTCFullYear());
|
|
18256
18797
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
18257
18798
|
const day = String(now.getUTCDate()).padStart(2, "0");
|
|
18258
18799
|
return { year, month, day };
|
|
18259
18800
|
}
|
|
18260
|
-
function
|
|
18261
|
-
return `${prefix}_${
|
|
18801
|
+
function stableId7(prefix, value) {
|
|
18802
|
+
return `${prefix}_${createHash11("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
18262
18803
|
}
|
|
18263
|
-
function
|
|
18804
|
+
function estimateTokenCount3(text) {
|
|
18264
18805
|
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
18265
18806
|
return Math.max(1, Math.ceil(words * 1.25));
|
|
18266
18807
|
}
|
|
@@ -18319,7 +18860,7 @@ Pages should be concise, cited, and organized for both humans and agents.
|
|
|
18319
18860
|
`;
|
|
18320
18861
|
}
|
|
18321
18862
|
async function initializeWikiLayout(store, now = new Date) {
|
|
18322
|
-
const { year, month, day } =
|
|
18863
|
+
const { year, month, day } = todayParts2(now);
|
|
18323
18864
|
const schemaKey = "schemas/v1.md";
|
|
18324
18865
|
const rootIndexKey = "indexes/root.md";
|
|
18325
18866
|
const wikiReadmeKey = "wiki/README.md";
|
|
@@ -18376,7 +18917,7 @@ function provenanceFor(artifact) {
|
|
|
18376
18917
|
}
|
|
18377
18918
|
function recordWikiChunk(db, pageId, title, artifact, body, now) {
|
|
18378
18919
|
const provenance = provenanceFor(artifact);
|
|
18379
|
-
const chunkId =
|
|
18920
|
+
const chunkId = stableId7("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
|
|
18380
18921
|
const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
|
|
18381
18922
|
for (const row of existing)
|
|
18382
18923
|
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
|
|
@@ -18388,7 +18929,7 @@ function recordWikiChunk(db, pageId, title, artifact, body, now) {
|
|
|
18388
18929
|
"wiki",
|
|
18389
18930
|
0,
|
|
18390
18931
|
body,
|
|
18391
|
-
|
|
18932
|
+
estimateTokenCount3(body),
|
|
18392
18933
|
0,
|
|
18393
18934
|
body.length,
|
|
18394
18935
|
JSON.stringify({
|
|
@@ -18412,7 +18953,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
18412
18953
|
artifact_uri = excluded.artifact_uri,
|
|
18413
18954
|
metadata_json = excluded.metadata_json,
|
|
18414
18955
|
updated_at = excluded.updated_at`, [
|
|
18415
|
-
|
|
18956
|
+
stableId7("idx", "root:indexes/root.md"),
|
|
18416
18957
|
"root",
|
|
18417
18958
|
"root",
|
|
18418
18959
|
rootIndex.uri,
|
|
@@ -18427,7 +18968,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
18427
18968
|
]);
|
|
18428
18969
|
}
|
|
18429
18970
|
if (wikiReadme) {
|
|
18430
|
-
const wikiPageId =
|
|
18971
|
+
const wikiPageId = stableId7("wiki", "wiki/README.md");
|
|
18431
18972
|
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
18432
18973
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
18433
18974
|
ON CONFLICT(path) DO UPDATE SET
|
|
@@ -18599,6 +19140,37 @@ class KnowledgeService {
|
|
|
18599
19140
|
}
|
|
18600
19141
|
return result;
|
|
18601
19142
|
}
|
|
19143
|
+
async compileWiki(options = {}) {
|
|
19144
|
+
const workspace = this.ensureWorkspace();
|
|
19145
|
+
return compileWikiPage({
|
|
19146
|
+
...options,
|
|
19147
|
+
dbPath: workspace.knowledgeDbPath,
|
|
19148
|
+
store: this.artifactStore()
|
|
19149
|
+
});
|
|
19150
|
+
}
|
|
19151
|
+
async fileAnswer(options) {
|
|
19152
|
+
const workspace = this.ensureWorkspace();
|
|
19153
|
+
const context = await this.retrieveContext({
|
|
19154
|
+
query: options.prompt,
|
|
19155
|
+
limit: options.limit,
|
|
19156
|
+
semantic: options.semantic,
|
|
19157
|
+
modelRef: options.modelRef,
|
|
19158
|
+
dimensions: options.dimensions,
|
|
19159
|
+
fake: options.fake
|
|
19160
|
+
});
|
|
19161
|
+
return fileAnswerToWiki({
|
|
19162
|
+
dbPath: workspace.knowledgeDbPath,
|
|
19163
|
+
store: this.artifactStore(),
|
|
19164
|
+
prompt: options.prompt,
|
|
19165
|
+
answer: options.answer,
|
|
19166
|
+
context,
|
|
19167
|
+
approveWrite: options.approveWrite
|
|
19168
|
+
});
|
|
19169
|
+
}
|
|
19170
|
+
lintWiki() {
|
|
19171
|
+
const workspace = this.ensureWorkspace();
|
|
19172
|
+
return lintWiki({ dbPath: workspace.knowledgeDbPath });
|
|
19173
|
+
}
|
|
18602
19174
|
async ingestManifest(input) {
|
|
18603
19175
|
const workspace = this.ensureWorkspace();
|
|
18604
19176
|
return ingestOpenFilesManifest({
|