@hasna/knowledge 0.2.18 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -9
- package/bin/open-knowledge-mcp.js +292 -15
- package/bin/open-knowledge.js +99 -58
- package/docs/architecture/ai-native-knowledge-base.md +7 -0
- package/docs/architecture/hybrid-semantic-search.md +19 -1
- package/package.json +1 -1
- package/src/cli.ts +46 -12
- package/src/knowledge-db.ts +27 -1
- package/src/mcp.js +44 -0
- package/src/reindex.ts +260 -0
- package/src/service.ts +28 -0
package/README.md
CHANGED
|
@@ -80,6 +80,11 @@ open-knowledge ingest source file:///absolute/path/to/handbook.md --purpose know
|
|
|
80
80
|
# Consume open-files change events and invalidate stale source chunks
|
|
81
81
|
open-knowledge reindex outbox ./open-files-outbox.jsonl --scope project --json
|
|
82
82
|
|
|
83
|
+
# Inspect and refresh the embedding queue after source changes
|
|
84
|
+
open-knowledge reindex status --scope project --json
|
|
85
|
+
open-knowledge reindex enqueue --scope project --json
|
|
86
|
+
open-knowledge reindex embeddings --scope project --fake --json
|
|
87
|
+
|
|
83
88
|
# Resolve indexed source text and citation evidence through the read-only source boundary
|
|
84
89
|
open-knowledge source resolve open-files://file/f_123/revision/rev_456 --scope project --json
|
|
85
90
|
|
|
@@ -243,11 +248,23 @@ resolver API lands.
|
|
|
243
248
|
|
|
244
249
|
### reindex
|
|
245
250
|
```bash
|
|
251
|
+
open-knowledge reindex status [--model openai:text-embedding-3-small] [--scope project] [--json]
|
|
252
|
+
open-knowledge reindex enqueue [--model openai:text-embedding-3-small] [--scope project] [--json]
|
|
253
|
+
open-knowledge reindex embeddings [--full] [--limit <n>] [--model openai:text-embedding-3-small] [--scope project] [--json]
|
|
246
254
|
open-knowledge reindex outbox <file|s3://bucket/key> [--scope project] [--json]
|
|
247
255
|
```
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
256
|
+
Inspect and operate index refresh work. `reindex status` reports missing
|
|
257
|
+
embedding rows, stale revisions, queued jobs, and vector counts. `reindex
|
|
258
|
+
enqueue` adds missing source chunks to `reindex_queue` idempotently. `reindex
|
|
259
|
+
embeddings` records an `embedding-refresh` run, indexes missing chunks, and
|
|
260
|
+
marks completed queue rows; `--full` first clears `chunk_embeddings` and
|
|
261
|
+
`vector_index_entries` so the current source catalog is rebuilt from scratch.
|
|
262
|
+
|
|
263
|
+
`reindex outbox` consumes open-files JSON or JSONL change events. This
|
|
264
|
+
invalidates matching source chunks and embeddings by source ref, revision, or
|
|
265
|
+
hash, updates permission/path/delete metadata, and records a local run ledger.
|
|
266
|
+
Outbox inputs can be local files or allowed S3 objects, but raw source files
|
|
267
|
+
remain owned by `open-files`.
|
|
251
268
|
|
|
252
269
|
### search
|
|
253
270
|
```bash
|
|
@@ -369,12 +386,13 @@ The MCP server exposes item tools (`ok_add`, `ok_list`, `ok_get`, `ok_update`,
|
|
|
369
386
|
`ok_import`, `ok_batch`), workspace/storage inspection (`ok_paths`,
|
|
370
387
|
`ok_storage_status`), provider/embedding tools (`ok_provider_status`,
|
|
371
388
|
`ok_provider_models`, `ok_embeddings_status`, `ok_embeddings_index`,
|
|
372
|
-
`ok_semantic_search`),
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
389
|
+
`ok_semantic_search`), reindex tools (`ok_reindex_status`,
|
|
390
|
+
`ok_reindex_enqueue`, `ok_reindex_embeddings`), hybrid retrieval (`ok_search`),
|
|
391
|
+
and source-ref parsing/resolution (`ok_parse_source_ref`,
|
|
392
|
+
`ok_resolve_source`). The `knowledge_search` MCP tool returns reranked citation
|
|
393
|
+
context packs for agent prompts, and `knowledge_ask` runs the same prompt flow
|
|
394
|
+
exposed by `open-knowledge ask`. `ok_web_search` exposes safety-gated provider
|
|
395
|
+
web search to MCP clients.
|
|
378
396
|
|
|
379
397
|
## Source And Artifact Boundary
|
|
380
398
|
|
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.20",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -14386,6 +14386,28 @@ CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(statu
|
|
|
14386
14386
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14387
14387
|
VALUES (4, datetime('now'));
|
|
14388
14388
|
`;
|
|
14389
|
+
var MIGRATION_5 = `
|
|
14390
|
+
CREATE TABLE IF NOT EXISTS reindex_queue (
|
|
14391
|
+
id TEXT PRIMARY KEY,
|
|
14392
|
+
kind TEXT NOT NULL,
|
|
14393
|
+
target_id TEXT NOT NULL,
|
|
14394
|
+
source_uri TEXT,
|
|
14395
|
+
reason TEXT NOT NULL,
|
|
14396
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
14397
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
14398
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
14399
|
+
created_at TEXT NOT NULL,
|
|
14400
|
+
updated_at TEXT NOT NULL,
|
|
14401
|
+
UNIQUE(kind, target_id, reason)
|
|
14402
|
+
);
|
|
14403
|
+
|
|
14404
|
+
CREATE INDEX IF NOT EXISTS idx_reindex_queue_status ON reindex_queue(status);
|
|
14405
|
+
CREATE INDEX IF NOT EXISTS idx_reindex_queue_kind_target ON reindex_queue(kind, target_id);
|
|
14406
|
+
CREATE INDEX IF NOT EXISTS idx_reindex_queue_source_uri ON reindex_queue(source_uri);
|
|
14407
|
+
|
|
14408
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14409
|
+
VALUES (5, datetime('now'));
|
|
14410
|
+
`;
|
|
14389
14411
|
function openKnowledgeDb(path) {
|
|
14390
14412
|
ensureParentDir(path);
|
|
14391
14413
|
const db = new Database(path);
|
|
@@ -14403,6 +14425,8 @@ function migrateKnowledgeDb(path) {
|
|
|
14403
14425
|
db.exec(MIGRATION_3);
|
|
14404
14426
|
if (getSchemaVersion(db) < 4)
|
|
14405
14427
|
db.exec(MIGRATION_4);
|
|
14428
|
+
if (getSchemaVersion(db) < 5)
|
|
14429
|
+
db.exec(MIGRATION_5);
|
|
14406
14430
|
return { path, schema_version: getSchemaVersion(db) };
|
|
14407
14431
|
} finally {
|
|
14408
14432
|
db.close();
|
|
@@ -14434,7 +14458,8 @@ function getKnowledgeDbStats(path) {
|
|
|
14434
14458
|
approval_gates: count(db, "approval_gates"),
|
|
14435
14459
|
storage_objects: count(db, "storage_objects"),
|
|
14436
14460
|
embeddings: count(db, "chunk_embeddings"),
|
|
14437
|
-
vector_entries: count(db, "vector_index_entries")
|
|
14461
|
+
vector_entries: count(db, "vector_index_entries"),
|
|
14462
|
+
reindex_queue: count(db, "reindex_queue")
|
|
14438
14463
|
};
|
|
14439
14464
|
} finally {
|
|
14440
14465
|
db.close();
|
|
@@ -17348,10 +17373,197 @@ async function ingestSourceRef(options) {
|
|
|
17348
17373
|
};
|
|
17349
17374
|
}
|
|
17350
17375
|
|
|
17351
|
-
// src/
|
|
17376
|
+
// src/reindex.ts
|
|
17352
17377
|
import { createHash as createHash7, randomUUID as randomUUID6 } from "crypto";
|
|
17378
|
+
function stableId5(prefix, value) {
|
|
17379
|
+
return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
17380
|
+
}
|
|
17381
|
+
function queueCounts(dbPath) {
|
|
17382
|
+
const db = openKnowledgeDb(dbPath);
|
|
17383
|
+
try {
|
|
17384
|
+
const rows = db.query(`SELECT status, COUNT(*) AS n FROM reindex_queue GROUP BY status ORDER BY status`).all();
|
|
17385
|
+
return Object.fromEntries(rows.map((row) => [row.status, row.n]));
|
|
17386
|
+
} finally {
|
|
17387
|
+
db.close();
|
|
17388
|
+
}
|
|
17389
|
+
}
|
|
17390
|
+
function missingEmbeddingRows(dbPath, options) {
|
|
17391
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
17392
|
+
const parsed = parseModelRef(modelRef);
|
|
17393
|
+
const db = openKnowledgeDb(dbPath);
|
|
17394
|
+
try {
|
|
17395
|
+
return db.query(`SELECT c.id AS chunk_id, c.source_revision_id, s.uri AS source_uri
|
|
17396
|
+
FROM chunks c
|
|
17397
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
17398
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
17399
|
+
LEFT JOIN vector_index_entries v ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
17400
|
+
WHERE v.id IS NULL
|
|
17401
|
+
ORDER BY c.created_at ASC, c.ordinal ASC`).all(parsed.provider, parsed.model);
|
|
17402
|
+
} finally {
|
|
17403
|
+
db.close();
|
|
17404
|
+
}
|
|
17405
|
+
}
|
|
17406
|
+
function reindexHealth(options) {
|
|
17407
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17408
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17409
|
+
try {
|
|
17410
|
+
const version2 = db.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version ?? 0;
|
|
17411
|
+
const chunks = db.query("SELECT COUNT(*) AS n FROM chunks").get()?.n ?? 0;
|
|
17412
|
+
const vectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
|
|
17413
|
+
const missing = missingEmbeddingRows(options.dbPath, options).length;
|
|
17414
|
+
const stale = db.query(`SELECT COUNT(*) AS n FROM source_revisions
|
|
17415
|
+
WHERE metadata_json LIKE '%"reindex_required":true%' OR metadata_json LIKE '%"status":"stale"%'`).get()?.n ?? 0;
|
|
17416
|
+
return {
|
|
17417
|
+
schema_version: version2,
|
|
17418
|
+
chunks,
|
|
17419
|
+
vector_entries: vectorEntries,
|
|
17420
|
+
missing_embeddings: missing,
|
|
17421
|
+
queued: queueCounts(options.dbPath),
|
|
17422
|
+
stale_revisions: stale
|
|
17423
|
+
};
|
|
17424
|
+
} finally {
|
|
17425
|
+
db.close();
|
|
17426
|
+
}
|
|
17427
|
+
}
|
|
17428
|
+
function enqueueMissingEmbeddings(options) {
|
|
17429
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17430
|
+
const now = (options.now ?? new Date).toISOString();
|
|
17431
|
+
const reason = options.reason ?? "missing_embedding";
|
|
17432
|
+
const rows = missingEmbeddingRows(options.dbPath, options);
|
|
17433
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17434
|
+
let enqueued = 0;
|
|
17435
|
+
let alreadyQueued = 0;
|
|
17436
|
+
try {
|
|
17437
|
+
const write = db.transaction(() => {
|
|
17438
|
+
for (const row of rows) {
|
|
17439
|
+
const id = stableId5("rq", `embedding\x00${row.chunk_id}\x00${reason}`);
|
|
17440
|
+
const before = db.query("SELECT id FROM reindex_queue WHERE kind = ? AND target_id = ? AND reason = ?").get("embedding", row.chunk_id, reason);
|
|
17441
|
+
if (before) {
|
|
17442
|
+
alreadyQueued += 1;
|
|
17443
|
+
continue;
|
|
17444
|
+
}
|
|
17445
|
+
db.run(`INSERT INTO reindex_queue (id, kind, target_id, source_uri, reason, status, metadata_json, created_at, updated_at)
|
|
17446
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17447
|
+
id,
|
|
17448
|
+
"embedding",
|
|
17449
|
+
row.chunk_id,
|
|
17450
|
+
row.source_uri,
|
|
17451
|
+
reason,
|
|
17452
|
+
"pending",
|
|
17453
|
+
JSON.stringify({ source_revision_id: row.source_revision_id }),
|
|
17454
|
+
now,
|
|
17455
|
+
now
|
|
17456
|
+
]);
|
|
17457
|
+
enqueued += 1;
|
|
17458
|
+
}
|
|
17459
|
+
});
|
|
17460
|
+
write();
|
|
17461
|
+
} finally {
|
|
17462
|
+
db.close();
|
|
17463
|
+
}
|
|
17464
|
+
return { enqueued, already_queued: alreadyQueued, reason };
|
|
17465
|
+
}
|
|
17466
|
+
function clearEmbeddingIndex(dbPath) {
|
|
17467
|
+
const db = openKnowledgeDb(dbPath);
|
|
17468
|
+
try {
|
|
17469
|
+
const embeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
|
|
17470
|
+
const vectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
|
|
17471
|
+
db.run("DELETE FROM vector_index_entries");
|
|
17472
|
+
db.run("DELETE FROM chunk_embeddings");
|
|
17473
|
+
return { embeddings, vectorEntries };
|
|
17474
|
+
} finally {
|
|
17475
|
+
db.close();
|
|
17476
|
+
}
|
|
17477
|
+
}
|
|
17478
|
+
function completeIndexedQueueItems(dbPath, options, now) {
|
|
17479
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
17480
|
+
const parsed = parseModelRef(modelRef);
|
|
17481
|
+
const db = openKnowledgeDb(dbPath);
|
|
17482
|
+
try {
|
|
17483
|
+
const result = db.run(`UPDATE reindex_queue
|
|
17484
|
+
SET status = ?, updated_at = ?
|
|
17485
|
+
WHERE kind = ?
|
|
17486
|
+
AND status = ?
|
|
17487
|
+
AND EXISTS (
|
|
17488
|
+
SELECT 1 FROM vector_index_entries v
|
|
17489
|
+
WHERE v.chunk_id = reindex_queue.target_id
|
|
17490
|
+
AND v.provider = ?
|
|
17491
|
+
AND v.model = ?
|
|
17492
|
+
)`, ["completed", now, "embedding", "pending", parsed.provider, parsed.model]);
|
|
17493
|
+
return result.changes;
|
|
17494
|
+
} finally {
|
|
17495
|
+
db.close();
|
|
17496
|
+
}
|
|
17497
|
+
}
|
|
17498
|
+
async function refreshEmbeddingIndex(options) {
|
|
17499
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17500
|
+
const now = (options.now ?? new Date).toISOString();
|
|
17501
|
+
const runId = `run_${randomUUID6()}`;
|
|
17502
|
+
const deleted = options.full ? clearEmbeddingIndex(options.dbPath) : { embeddings: 0, vectorEntries: 0 };
|
|
17503
|
+
const queued = enqueueMissingEmbeddings({ ...options, reason: options.full ? "full_embedding_rebuild" : "missing_embedding" });
|
|
17504
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17505
|
+
try {
|
|
17506
|
+
db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
17507
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17508
|
+
runId,
|
|
17509
|
+
"embedding-refresh",
|
|
17510
|
+
options.full ? "full" : "incremental",
|
|
17511
|
+
"running",
|
|
17512
|
+
"local",
|
|
17513
|
+
resolveEmbeddingModelRef(options.modelRef, options.config),
|
|
17514
|
+
JSON.stringify({ full: options.full === true, queued }),
|
|
17515
|
+
now,
|
|
17516
|
+
now
|
|
17517
|
+
]);
|
|
17518
|
+
} finally {
|
|
17519
|
+
db.close();
|
|
17520
|
+
}
|
|
17521
|
+
const indexed = await indexKnowledgeEmbeddings({
|
|
17522
|
+
dbPath: options.dbPath,
|
|
17523
|
+
config: options.config,
|
|
17524
|
+
env: options.env,
|
|
17525
|
+
modelRef: options.modelRef,
|
|
17526
|
+
dimensions: options.dimensions,
|
|
17527
|
+
fake: options.fake,
|
|
17528
|
+
limit: options.limit,
|
|
17529
|
+
now: options.now
|
|
17530
|
+
});
|
|
17531
|
+
const completedQueueItems = completeIndexedQueueItems(options.dbPath, options, now);
|
|
17532
|
+
const doneDb = openKnowledgeDb(options.dbPath);
|
|
17533
|
+
try {
|
|
17534
|
+
doneDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
|
|
17535
|
+
"completed",
|
|
17536
|
+
JSON.stringify({ full: options.full === true, queued, indexed, completed_queue_items: completedQueueItems }),
|
|
17537
|
+
now,
|
|
17538
|
+
runId
|
|
17539
|
+
]);
|
|
17540
|
+
doneDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
17541
|
+
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
17542
|
+
`evt_${randomUUID6()}`,
|
|
17543
|
+
runId,
|
|
17544
|
+
"info",
|
|
17545
|
+
"embedding_refresh_completed",
|
|
17546
|
+
JSON.stringify({ queued, indexed, completed_queue_items: completedQueueItems }),
|
|
17547
|
+
now
|
|
17548
|
+
]);
|
|
17549
|
+
} finally {
|
|
17550
|
+
doneDb.close();
|
|
17551
|
+
}
|
|
17552
|
+
return {
|
|
17553
|
+
run_id: runId,
|
|
17554
|
+
full: options.full === true,
|
|
17555
|
+
deleted_embeddings: deleted.embeddings,
|
|
17556
|
+
deleted_vector_entries: deleted.vectorEntries,
|
|
17557
|
+
queued,
|
|
17558
|
+
indexed,
|
|
17559
|
+
completed_queue_items: completedQueueItems
|
|
17560
|
+
};
|
|
17561
|
+
}
|
|
17562
|
+
|
|
17563
|
+
// src/web-search.ts
|
|
17564
|
+
import { createHash as createHash8, randomUUID as randomUUID7 } from "crypto";
|
|
17353
17565
|
function stableHash(value) {
|
|
17354
|
-
return `sha256:${
|
|
17566
|
+
return `sha256:${createHash8("sha256").update(value).digest("hex")}`;
|
|
17355
17567
|
}
|
|
17356
17568
|
function estimateTokens2(text) {
|
|
17357
17569
|
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
@@ -17492,7 +17704,7 @@ async function runProviderWebSearch(options) {
|
|
|
17492
17704
|
const parsed = parseModelRef(modelRef);
|
|
17493
17705
|
const provider = options.provider ?? parsed.provider;
|
|
17494
17706
|
const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
|
|
17495
|
-
const runId = `run_${
|
|
17707
|
+
const runId = `run_${randomUUID7()}`;
|
|
17496
17708
|
if (!options.fake && options.safetyPolicy)
|
|
17497
17709
|
assertWebSearchAllowed(options.safetyPolicy);
|
|
17498
17710
|
if (!options.fake && provider !== "openai" && provider !== "anthropic") {
|
|
@@ -17564,7 +17776,7 @@ async function runProviderWebSearch(options) {
|
|
|
17564
17776
|
]);
|
|
17565
17777
|
writeDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
17566
17778
|
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
17567
|
-
`evt_${
|
|
17779
|
+
`evt_${randomUUID7()}`,
|
|
17568
17780
|
runId,
|
|
17569
17781
|
"info",
|
|
17570
17782
|
"provider_web_search_completed",
|
|
@@ -17600,7 +17812,7 @@ async function runProviderWebSearch(options) {
|
|
|
17600
17812
|
}
|
|
17601
17813
|
|
|
17602
17814
|
// src/storage-contract.ts
|
|
17603
|
-
import { createHash as
|
|
17815
|
+
import { createHash as createHash9, randomUUID as randomUUID8 } from "crypto";
|
|
17604
17816
|
var GENERATED_ARTIFACTS = [
|
|
17605
17817
|
{
|
|
17606
17818
|
kind: "schema",
|
|
@@ -17636,7 +17848,7 @@ var GENERATED_ARTIFACTS = [
|
|
|
17636
17848
|
function hashArtifactBody(body) {
|
|
17637
17849
|
const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
|
|
17638
17850
|
return {
|
|
17639
|
-
hash: `sha256:${
|
|
17851
|
+
hash: `sha256:${createHash9("sha256").update(bytes).digest("hex")}`,
|
|
17640
17852
|
size_bytes: bytes.byteLength
|
|
17641
17853
|
};
|
|
17642
17854
|
}
|
|
@@ -17761,7 +17973,7 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
17761
17973
|
`);
|
|
17762
17974
|
const insert = db.transaction((entries) => {
|
|
17763
17975
|
for (const entry of entries) {
|
|
17764
|
-
statement.run(
|
|
17976
|
+
statement.run(randomUUID8(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
|
|
17765
17977
|
key: entry.key,
|
|
17766
17978
|
...entry.metadata ?? {}
|
|
17767
17979
|
}), timestamp, timestamp);
|
|
@@ -17771,15 +17983,15 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
17771
17983
|
}
|
|
17772
17984
|
|
|
17773
17985
|
// src/wiki-layout.ts
|
|
17774
|
-
import { createHash as
|
|
17986
|
+
import { createHash as createHash10 } from "crypto";
|
|
17775
17987
|
function todayParts(now) {
|
|
17776
17988
|
const year = String(now.getUTCFullYear());
|
|
17777
17989
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
17778
17990
|
const day = String(now.getUTCDate()).padStart(2, "0");
|
|
17779
17991
|
return { year, month, day };
|
|
17780
17992
|
}
|
|
17781
|
-
function
|
|
17782
|
-
return `${prefix}_${
|
|
17993
|
+
function stableId6(prefix, value) {
|
|
17994
|
+
return `${prefix}_${createHash10("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
17783
17995
|
}
|
|
17784
17996
|
function estimateTokenCount2(text) {
|
|
17785
17997
|
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
@@ -17897,7 +18109,7 @@ function provenanceFor(artifact) {
|
|
|
17897
18109
|
}
|
|
17898
18110
|
function recordWikiChunk(db, pageId, title, artifact, body, now) {
|
|
17899
18111
|
const provenance = provenanceFor(artifact);
|
|
17900
|
-
const chunkId =
|
|
18112
|
+
const chunkId = stableId6("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
|
|
17901
18113
|
const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
|
|
17902
18114
|
for (const row of existing)
|
|
17903
18115
|
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
|
|
@@ -17933,7 +18145,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
17933
18145
|
artifact_uri = excluded.artifact_uri,
|
|
17934
18146
|
metadata_json = excluded.metadata_json,
|
|
17935
18147
|
updated_at = excluded.updated_at`, [
|
|
17936
|
-
|
|
18148
|
+
stableId6("idx", "root:indexes/root.md"),
|
|
17937
18149
|
"root",
|
|
17938
18150
|
"root",
|
|
17939
18151
|
rootIndex.uri,
|
|
@@ -17948,7 +18160,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
17948
18160
|
]);
|
|
17949
18161
|
}
|
|
17950
18162
|
if (wikiReadme) {
|
|
17951
|
-
const wikiPageId =
|
|
18163
|
+
const wikiPageId = stableId6("wiki", "wiki/README.md");
|
|
17952
18164
|
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
17953
18165
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
17954
18166
|
ON CONFLICT(path) DO UPDATE SET
|
|
@@ -18094,6 +18306,30 @@ class KnowledgeService {
|
|
|
18094
18306
|
safetyPolicy: this.safetyPolicy()
|
|
18095
18307
|
});
|
|
18096
18308
|
}
|
|
18309
|
+
reindexHealth(options = {}) {
|
|
18310
|
+
const workspace = this.ensureWorkspace();
|
|
18311
|
+
return reindexHealth({
|
|
18312
|
+
...options,
|
|
18313
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18314
|
+
config: this.config()
|
|
18315
|
+
});
|
|
18316
|
+
}
|
|
18317
|
+
enqueueReindex(options = {}) {
|
|
18318
|
+
const workspace = this.ensureWorkspace();
|
|
18319
|
+
return enqueueMissingEmbeddings({
|
|
18320
|
+
...options,
|
|
18321
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18322
|
+
config: this.config()
|
|
18323
|
+
});
|
|
18324
|
+
}
|
|
18325
|
+
async refreshEmbeddings(options = {}) {
|
|
18326
|
+
const workspace = this.ensureWorkspace();
|
|
18327
|
+
return refreshEmbeddingIndex({
|
|
18328
|
+
...options,
|
|
18329
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18330
|
+
config: this.config()
|
|
18331
|
+
});
|
|
18332
|
+
}
|
|
18097
18333
|
providerStatus(env = process.env) {
|
|
18098
18334
|
return providerStatus(this.config(), env);
|
|
18099
18335
|
}
|
|
@@ -18287,6 +18523,47 @@ function buildServer() {
|
|
|
18287
18523
|
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18288
18524
|
}
|
|
18289
18525
|
});
|
|
18526
|
+
registerTool(server, "ok_reindex_status", "Reindex status", "Inspect missing embeddings, queued jobs, stale revisions, and vector index health", {
|
|
18527
|
+
scope: scopeField,
|
|
18528
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
18529
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
18530
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
18531
|
+
}, async ({ scope, model, dimensions, fake }) => {
|
|
18532
|
+
const service = createKnowledgeService({ scope });
|
|
18533
|
+
try {
|
|
18534
|
+
return jsonText({ ok: true, ...service.reindexHealth({ modelRef: model, dimensions, fake }) });
|
|
18535
|
+
} catch (error48) {
|
|
18536
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18537
|
+
}
|
|
18538
|
+
});
|
|
18539
|
+
registerTool(server, "ok_reindex_enqueue", "Enqueue reindex work", "Queue missing embedding refresh jobs for indexed source chunks", {
|
|
18540
|
+
scope: scopeField,
|
|
18541
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
18542
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
18543
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
18544
|
+
}, async ({ scope, model, dimensions, fake }) => {
|
|
18545
|
+
const service = createKnowledgeService({ scope });
|
|
18546
|
+
try {
|
|
18547
|
+
return jsonText({ ok: true, ...service.enqueueReindex({ modelRef: model, dimensions, fake }) });
|
|
18548
|
+
} catch (error48) {
|
|
18549
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18550
|
+
}
|
|
18551
|
+
});
|
|
18552
|
+
registerTool(server, "ok_reindex_embeddings", "Refresh embedding index", "Run incremental or full embedding refresh jobs with run-ledger tracking", {
|
|
18553
|
+
scope: scopeField,
|
|
18554
|
+
full: exports_external.boolean().optional().describe("Delete and rebuild all embedding/vector rows first"),
|
|
18555
|
+
limit: exports_external.number().optional().describe("Maximum chunks to embed"),
|
|
18556
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
18557
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
18558
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
18559
|
+
}, async ({ scope, full, limit, model, dimensions, fake }) => {
|
|
18560
|
+
const service = createKnowledgeService({ scope });
|
|
18561
|
+
try {
|
|
18562
|
+
return jsonText({ ok: true, ...await service.refreshEmbeddings({ full, limit, modelRef: model, dimensions, fake }) });
|
|
18563
|
+
} catch (error48) {
|
|
18564
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18565
|
+
}
|
|
18566
|
+
});
|
|
18290
18567
|
registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
|
|
18291
18568
|
scope: scopeField,
|
|
18292
18569
|
query: exports_external.string().describe("Semantic query"),
|