@hasna/knowledge 0.2.17 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -8
- package/bin/open-knowledge-mcp.js +565 -11
- package/bin/open-knowledge.js +107 -59
- package/docs/architecture/ai-native-knowledge-base.md +14 -0
- package/docs/architecture/hybrid-semantic-search.md +15 -1
- package/package.json +1 -1
- package/src/cli.ts +76 -13
- package/src/knowledge-db.ts +27 -1
- package/src/mcp.js +62 -0
- package/src/reindex.ts +260 -0
- package/src/service.ts +39 -0
- package/src/web-search.ts +330 -0
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.19",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -14386,6 +14386,28 @@ CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(statu
|
|
|
14386
14386
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14387
14387
|
VALUES (4, datetime('now'));
|
|
14388
14388
|
`;
|
|
14389
|
+
var MIGRATION_5 = `
|
|
14390
|
+
CREATE TABLE IF NOT EXISTS reindex_queue (
|
|
14391
|
+
id TEXT PRIMARY KEY,
|
|
14392
|
+
kind TEXT NOT NULL,
|
|
14393
|
+
target_id TEXT NOT NULL,
|
|
14394
|
+
source_uri TEXT,
|
|
14395
|
+
reason TEXT NOT NULL,
|
|
14396
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
14397
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
14398
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
14399
|
+
created_at TEXT NOT NULL,
|
|
14400
|
+
updated_at TEXT NOT NULL,
|
|
14401
|
+
UNIQUE(kind, target_id, reason)
|
|
14402
|
+
);
|
|
14403
|
+
|
|
14404
|
+
CREATE INDEX IF NOT EXISTS idx_reindex_queue_status ON reindex_queue(status);
|
|
14405
|
+
CREATE INDEX IF NOT EXISTS idx_reindex_queue_kind_target ON reindex_queue(kind, target_id);
|
|
14406
|
+
CREATE INDEX IF NOT EXISTS idx_reindex_queue_source_uri ON reindex_queue(source_uri);
|
|
14407
|
+
|
|
14408
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14409
|
+
VALUES (5, datetime('now'));
|
|
14410
|
+
`;
|
|
14389
14411
|
function openKnowledgeDb(path) {
|
|
14390
14412
|
ensureParentDir(path);
|
|
14391
14413
|
const db = new Database(path);
|
|
@@ -14403,6 +14425,8 @@ function migrateKnowledgeDb(path) {
|
|
|
14403
14425
|
db.exec(MIGRATION_3);
|
|
14404
14426
|
if (getSchemaVersion(db) < 4)
|
|
14405
14427
|
db.exec(MIGRATION_4);
|
|
14428
|
+
if (getSchemaVersion(db) < 5)
|
|
14429
|
+
db.exec(MIGRATION_5);
|
|
14406
14430
|
return { path, schema_version: getSchemaVersion(db) };
|
|
14407
14431
|
} finally {
|
|
14408
14432
|
db.close();
|
|
@@ -14434,7 +14458,8 @@ function getKnowledgeDbStats(path) {
|
|
|
14434
14458
|
approval_gates: count(db, "approval_gates"),
|
|
14435
14459
|
storage_objects: count(db, "storage_objects"),
|
|
14436
14460
|
embeddings: count(db, "chunk_embeddings"),
|
|
14437
|
-
vector_entries: count(db, "vector_index_entries")
|
|
14461
|
+
vector_entries: count(db, "vector_index_entries"),
|
|
14462
|
+
reindex_queue: count(db, "reindex_queue")
|
|
14438
14463
|
};
|
|
14439
14464
|
} finally {
|
|
14440
14465
|
db.close();
|
|
@@ -17348,8 +17373,446 @@ async function ingestSourceRef(options) {
|
|
|
17348
17373
|
};
|
|
17349
17374
|
}
|
|
17350
17375
|
|
|
17351
|
-
// src/
|
|
17376
|
+
// src/reindex.ts
|
|
17352
17377
|
import { createHash as createHash7, randomUUID as randomUUID6 } from "crypto";
|
|
17378
|
+
function stableId5(prefix, value) {
|
|
17379
|
+
return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
17380
|
+
}
|
|
17381
|
+
function queueCounts(dbPath) {
|
|
17382
|
+
const db = openKnowledgeDb(dbPath);
|
|
17383
|
+
try {
|
|
17384
|
+
const rows = db.query(`SELECT status, COUNT(*) AS n FROM reindex_queue GROUP BY status ORDER BY status`).all();
|
|
17385
|
+
return Object.fromEntries(rows.map((row) => [row.status, row.n]));
|
|
17386
|
+
} finally {
|
|
17387
|
+
db.close();
|
|
17388
|
+
}
|
|
17389
|
+
}
|
|
17390
|
+
function missingEmbeddingRows(dbPath, options) {
|
|
17391
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
17392
|
+
const parsed = parseModelRef(modelRef);
|
|
17393
|
+
const db = openKnowledgeDb(dbPath);
|
|
17394
|
+
try {
|
|
17395
|
+
return db.query(`SELECT c.id AS chunk_id, c.source_revision_id, s.uri AS source_uri
|
|
17396
|
+
FROM chunks c
|
|
17397
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
17398
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
17399
|
+
LEFT JOIN vector_index_entries v ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
17400
|
+
WHERE v.id IS NULL
|
|
17401
|
+
ORDER BY c.created_at ASC, c.ordinal ASC`).all(parsed.provider, parsed.model);
|
|
17402
|
+
} finally {
|
|
17403
|
+
db.close();
|
|
17404
|
+
}
|
|
17405
|
+
}
|
|
17406
|
+
function reindexHealth(options) {
|
|
17407
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17408
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17409
|
+
try {
|
|
17410
|
+
const version2 = db.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version ?? 0;
|
|
17411
|
+
const chunks = db.query("SELECT COUNT(*) AS n FROM chunks").get()?.n ?? 0;
|
|
17412
|
+
const vectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
|
|
17413
|
+
const missing = missingEmbeddingRows(options.dbPath, options).length;
|
|
17414
|
+
const stale = db.query(`SELECT COUNT(*) AS n FROM source_revisions
|
|
17415
|
+
WHERE metadata_json LIKE '%"reindex_required":true%' OR metadata_json LIKE '%"status":"stale"%'`).get()?.n ?? 0;
|
|
17416
|
+
return {
|
|
17417
|
+
schema_version: version2,
|
|
17418
|
+
chunks,
|
|
17419
|
+
vector_entries: vectorEntries,
|
|
17420
|
+
missing_embeddings: missing,
|
|
17421
|
+
queued: queueCounts(options.dbPath),
|
|
17422
|
+
stale_revisions: stale
|
|
17423
|
+
};
|
|
17424
|
+
} finally {
|
|
17425
|
+
db.close();
|
|
17426
|
+
}
|
|
17427
|
+
}
|
|
17428
|
+
function enqueueMissingEmbeddings(options) {
|
|
17429
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17430
|
+
const now = (options.now ?? new Date).toISOString();
|
|
17431
|
+
const reason = options.reason ?? "missing_embedding";
|
|
17432
|
+
const rows = missingEmbeddingRows(options.dbPath, options);
|
|
17433
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17434
|
+
let enqueued = 0;
|
|
17435
|
+
let alreadyQueued = 0;
|
|
17436
|
+
try {
|
|
17437
|
+
const write = db.transaction(() => {
|
|
17438
|
+
for (const row of rows) {
|
|
17439
|
+
const id = stableId5("rq", `embedding\x00${row.chunk_id}\x00${reason}`);
|
|
17440
|
+
const before = db.query("SELECT id FROM reindex_queue WHERE kind = ? AND target_id = ? AND reason = ?").get("embedding", row.chunk_id, reason);
|
|
17441
|
+
if (before) {
|
|
17442
|
+
alreadyQueued += 1;
|
|
17443
|
+
continue;
|
|
17444
|
+
}
|
|
17445
|
+
db.run(`INSERT INTO reindex_queue (id, kind, target_id, source_uri, reason, status, metadata_json, created_at, updated_at)
|
|
17446
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17447
|
+
id,
|
|
17448
|
+
"embedding",
|
|
17449
|
+
row.chunk_id,
|
|
17450
|
+
row.source_uri,
|
|
17451
|
+
reason,
|
|
17452
|
+
"pending",
|
|
17453
|
+
JSON.stringify({ source_revision_id: row.source_revision_id }),
|
|
17454
|
+
now,
|
|
17455
|
+
now
|
|
17456
|
+
]);
|
|
17457
|
+
enqueued += 1;
|
|
17458
|
+
}
|
|
17459
|
+
});
|
|
17460
|
+
write();
|
|
17461
|
+
} finally {
|
|
17462
|
+
db.close();
|
|
17463
|
+
}
|
|
17464
|
+
return { enqueued, already_queued: alreadyQueued, reason };
|
|
17465
|
+
}
|
|
17466
|
+
function clearEmbeddingIndex(dbPath) {
|
|
17467
|
+
const db = openKnowledgeDb(dbPath);
|
|
17468
|
+
try {
|
|
17469
|
+
const embeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
|
|
17470
|
+
const vectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
|
|
17471
|
+
db.run("DELETE FROM vector_index_entries");
|
|
17472
|
+
db.run("DELETE FROM chunk_embeddings");
|
|
17473
|
+
return { embeddings, vectorEntries };
|
|
17474
|
+
} finally {
|
|
17475
|
+
db.close();
|
|
17476
|
+
}
|
|
17477
|
+
}
|
|
17478
|
+
function completeIndexedQueueItems(dbPath, options, now) {
|
|
17479
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
17480
|
+
const parsed = parseModelRef(modelRef);
|
|
17481
|
+
const db = openKnowledgeDb(dbPath);
|
|
17482
|
+
try {
|
|
17483
|
+
const result = db.run(`UPDATE reindex_queue
|
|
17484
|
+
SET status = ?, updated_at = ?
|
|
17485
|
+
WHERE kind = ?
|
|
17486
|
+
AND status = ?
|
|
17487
|
+
AND EXISTS (
|
|
17488
|
+
SELECT 1 FROM vector_index_entries v
|
|
17489
|
+
WHERE v.chunk_id = reindex_queue.target_id
|
|
17490
|
+
AND v.provider = ?
|
|
17491
|
+
AND v.model = ?
|
|
17492
|
+
)`, ["completed", now, "embedding", "pending", parsed.provider, parsed.model]);
|
|
17493
|
+
return result.changes;
|
|
17494
|
+
} finally {
|
|
17495
|
+
db.close();
|
|
17496
|
+
}
|
|
17497
|
+
}
|
|
17498
|
+
async function refreshEmbeddingIndex(options) {
|
|
17499
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17500
|
+
const now = (options.now ?? new Date).toISOString();
|
|
17501
|
+
const runId = `run_${randomUUID6()}`;
|
|
17502
|
+
const deleted = options.full ? clearEmbeddingIndex(options.dbPath) : { embeddings: 0, vectorEntries: 0 };
|
|
17503
|
+
const queued = enqueueMissingEmbeddings({ ...options, reason: options.full ? "full_embedding_rebuild" : "missing_embedding" });
|
|
17504
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17505
|
+
try {
|
|
17506
|
+
db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
17507
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17508
|
+
runId,
|
|
17509
|
+
"embedding-refresh",
|
|
17510
|
+
options.full ? "full" : "incremental",
|
|
17511
|
+
"running",
|
|
17512
|
+
"local",
|
|
17513
|
+
resolveEmbeddingModelRef(options.modelRef, options.config),
|
|
17514
|
+
JSON.stringify({ full: options.full === true, queued }),
|
|
17515
|
+
now,
|
|
17516
|
+
now
|
|
17517
|
+
]);
|
|
17518
|
+
} finally {
|
|
17519
|
+
db.close();
|
|
17520
|
+
}
|
|
17521
|
+
const indexed = await indexKnowledgeEmbeddings({
|
|
17522
|
+
dbPath: options.dbPath,
|
|
17523
|
+
config: options.config,
|
|
17524
|
+
env: options.env,
|
|
17525
|
+
modelRef: options.modelRef,
|
|
17526
|
+
dimensions: options.dimensions,
|
|
17527
|
+
fake: options.fake,
|
|
17528
|
+
limit: options.limit,
|
|
17529
|
+
now: options.now
|
|
17530
|
+
});
|
|
17531
|
+
const completedQueueItems = completeIndexedQueueItems(options.dbPath, options, now);
|
|
17532
|
+
const doneDb = openKnowledgeDb(options.dbPath);
|
|
17533
|
+
try {
|
|
17534
|
+
doneDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
|
|
17535
|
+
"completed",
|
|
17536
|
+
JSON.stringify({ full: options.full === true, queued, indexed, completed_queue_items: completedQueueItems }),
|
|
17537
|
+
now,
|
|
17538
|
+
runId
|
|
17539
|
+
]);
|
|
17540
|
+
doneDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
17541
|
+
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
17542
|
+
`evt_${randomUUID6()}`,
|
|
17543
|
+
runId,
|
|
17544
|
+
"info",
|
|
17545
|
+
"embedding_refresh_completed",
|
|
17546
|
+
JSON.stringify({ queued, indexed, completed_queue_items: completedQueueItems }),
|
|
17547
|
+
now
|
|
17548
|
+
]);
|
|
17549
|
+
} finally {
|
|
17550
|
+
doneDb.close();
|
|
17551
|
+
}
|
|
17552
|
+
return {
|
|
17553
|
+
run_id: runId,
|
|
17554
|
+
full: options.full === true,
|
|
17555
|
+
deleted_embeddings: deleted.embeddings,
|
|
17556
|
+
deleted_vector_entries: deleted.vectorEntries,
|
|
17557
|
+
queued,
|
|
17558
|
+
indexed,
|
|
17559
|
+
completed_queue_items: completedQueueItems
|
|
17560
|
+
};
|
|
17561
|
+
}
|
|
17562
|
+
|
|
17563
|
+
// src/web-search.ts
|
|
17564
|
+
import { createHash as createHash8, randomUUID as randomUUID7 } from "crypto";
|
|
17565
|
+
function stableHash(value) {
|
|
17566
|
+
return `sha256:${createHash8("sha256").update(value).digest("hex")}`;
|
|
17567
|
+
}
|
|
17568
|
+
function estimateTokens2(text) {
|
|
17569
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
17570
|
+
return Math.max(1, Math.ceil(words * 1.25));
|
|
17571
|
+
}
|
|
17572
|
+
function asRecord(value) {
|
|
17573
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
17574
|
+
}
|
|
17575
|
+
function asString3(value) {
|
|
17576
|
+
return typeof value === "string" && value.length > 0 ? value : null;
|
|
17577
|
+
}
|
|
17578
|
+
function sourceFromRecord(value) {
|
|
17579
|
+
const record2 = asRecord(value);
|
|
17580
|
+
const url2 = asString3(record2.url) ?? asString3(record2.uri) ?? asString3(record2.sourceUrl);
|
|
17581
|
+
if (!url2)
|
|
17582
|
+
return null;
|
|
17583
|
+
return {
|
|
17584
|
+
url: url2,
|
|
17585
|
+
title: asString3(record2.title) ?? asString3(record2.name),
|
|
17586
|
+
snippet: asString3(record2.snippet) ?? asString3(record2.text) ?? asString3(record2.description),
|
|
17587
|
+
provider_metadata: record2
|
|
17588
|
+
};
|
|
17589
|
+
}
|
|
17590
|
+
function collectSources(value, output) {
|
|
17591
|
+
if (Array.isArray(value)) {
|
|
17592
|
+
for (const entry of value)
|
|
17593
|
+
collectSources(entry, output);
|
|
17594
|
+
return;
|
|
17595
|
+
}
|
|
17596
|
+
const source = sourceFromRecord(value);
|
|
17597
|
+
if (source)
|
|
17598
|
+
output.set(source.url, source);
|
|
17599
|
+
const record2 = asRecord(value);
|
|
17600
|
+
for (const key of ["sources", "results", "citations", "annotations", "output"]) {
|
|
17601
|
+
if (record2[key])
|
|
17602
|
+
collectSources(record2[key], output);
|
|
17603
|
+
}
|
|
17604
|
+
}
|
|
17605
|
+
function fakeSources(query, limit) {
|
|
17606
|
+
return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
|
|
17607
|
+
url: `https://example.com/knowledge-web-${index + 1}`,
|
|
17608
|
+
title: `Fake web source ${index + 1}`,
|
|
17609
|
+
snippet: `Deterministic web-search fixture for "${query}"`,
|
|
17610
|
+
provider_metadata: { fake: true, rank: index + 1 }
|
|
17611
|
+
}));
|
|
17612
|
+
}
|
|
17613
|
+
async function openAiWebSearch(input) {
|
|
17614
|
+
const { generateText } = await import("ai");
|
|
17615
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
17616
|
+
const settings = providerSettings(input.config, "openai");
|
|
17617
|
+
const openai = createOpenAI({
|
|
17618
|
+
apiKey: input.env[settings.api_key_env],
|
|
17619
|
+
baseURL: settings.base_url
|
|
17620
|
+
});
|
|
17621
|
+
const webSearch = openai.tools?.webSearch;
|
|
17622
|
+
if (!webSearch)
|
|
17623
|
+
throw new Error("OpenAI provider does not expose tools.webSearch.");
|
|
17624
|
+
return generateText({
|
|
17625
|
+
model: openai(input.model),
|
|
17626
|
+
prompt: input.query,
|
|
17627
|
+
tools: {
|
|
17628
|
+
web_search: webSearch({
|
|
17629
|
+
externalWebAccess: true,
|
|
17630
|
+
searchContextSize: "medium",
|
|
17631
|
+
...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
|
|
17632
|
+
})
|
|
17633
|
+
},
|
|
17634
|
+
toolChoice: { type: "tool", toolName: "web_search" }
|
|
17635
|
+
});
|
|
17636
|
+
}
|
|
17637
|
+
async function anthropicWebSearch(input) {
|
|
17638
|
+
const { generateText } = await import("ai");
|
|
17639
|
+
const { createAnthropic } = await import("@ai-sdk/anthropic");
|
|
17640
|
+
const settings = providerSettings(input.config, "anthropic");
|
|
17641
|
+
const anthropic = createAnthropic({
|
|
17642
|
+
apiKey: input.env[settings.api_key_env],
|
|
17643
|
+
baseURL: settings.base_url
|
|
17644
|
+
});
|
|
17645
|
+
const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
|
|
17646
|
+
if (!factory)
|
|
17647
|
+
throw new Error("Anthropic provider does not expose a web search tool.");
|
|
17648
|
+
return generateText({
|
|
17649
|
+
model: anthropic(input.model),
|
|
17650
|
+
prompt: input.query,
|
|
17651
|
+
tools: {
|
|
17652
|
+
web_search: factory({
|
|
17653
|
+
maxUses: input.maxUses,
|
|
17654
|
+
...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
|
|
17655
|
+
})
|
|
17656
|
+
}
|
|
17657
|
+
});
|
|
17658
|
+
}
|
|
17659
|
+
async function fileWebSources(options, sources, now) {
|
|
17660
|
+
if (!options.fileResults || sources.length === 0)
|
|
17661
|
+
return 0;
|
|
17662
|
+
const items = sources.map((source) => {
|
|
17663
|
+
const text = [source.title, source.snippet, source.url].filter(Boolean).join(`
|
|
17664
|
+
`);
|
|
17665
|
+
const hash2 = stableHash(text);
|
|
17666
|
+
return {
|
|
17667
|
+
source_ref: source.url,
|
|
17668
|
+
name: source.title ?? source.url,
|
|
17669
|
+
url: source.url,
|
|
17670
|
+
mime: "text/plain",
|
|
17671
|
+
hash: hash2,
|
|
17672
|
+
revision: hash2,
|
|
17673
|
+
status: "active",
|
|
17674
|
+
updated_at: now,
|
|
17675
|
+
permissions: { mode: "read_only", allowed_purposes: ["knowledge_answer", "knowledge_index"] },
|
|
17676
|
+
metadata: {
|
|
17677
|
+
source_ref: source.url,
|
|
17678
|
+
content_source: "provider_web_search",
|
|
17679
|
+
provider_metadata: source.provider_metadata
|
|
17680
|
+
},
|
|
17681
|
+
extracted_text: text
|
|
17682
|
+
};
|
|
17683
|
+
});
|
|
17684
|
+
const result = await ingestOpenFilesManifestItems({
|
|
17685
|
+
dbPath: options.dbPath,
|
|
17686
|
+
items,
|
|
17687
|
+
sourceLabel: `web-search:${options.query}`,
|
|
17688
|
+
readAction: "provider_web_search_file_results",
|
|
17689
|
+
safetyPolicy: options.safetyPolicy,
|
|
17690
|
+
now: new Date(now)
|
|
17691
|
+
});
|
|
17692
|
+
return result.sources_upserted;
|
|
17693
|
+
}
|
|
17694
|
+
async function runProviderWebSearch(options) {
|
|
17695
|
+
const query = options.query.trim();
|
|
17696
|
+
if (!query)
|
|
17697
|
+
throw new Error("Web search query is required.");
|
|
17698
|
+
const env = options.env ?? process.env;
|
|
17699
|
+
const now = (options.now ?? new Date).toISOString();
|
|
17700
|
+
const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
|
|
17701
|
+
const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
|
|
17702
|
+
const domains = options.domains ?? [];
|
|
17703
|
+
const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : "default"), options.config);
|
|
17704
|
+
const parsed = parseModelRef(modelRef);
|
|
17705
|
+
const provider = options.provider ?? parsed.provider;
|
|
17706
|
+
const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
|
|
17707
|
+
const runId = `run_${randomUUID7()}`;
|
|
17708
|
+
if (!options.fake && options.safetyPolicy)
|
|
17709
|
+
assertWebSearchAllowed(options.safetyPolicy);
|
|
17710
|
+
if (!options.fake && provider !== "openai" && provider !== "anthropic") {
|
|
17711
|
+
throw new Error(`Provider ${provider} does not expose native web search yet.`);
|
|
17712
|
+
}
|
|
17713
|
+
if (!options.fake)
|
|
17714
|
+
assertProviderCredentials(provider, options.config, env);
|
|
17715
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17716
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17717
|
+
try {
|
|
17718
|
+
db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
17719
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17720
|
+
runId,
|
|
17721
|
+
"provider-web-search",
|
|
17722
|
+
query,
|
|
17723
|
+
"running",
|
|
17724
|
+
provider,
|
|
17725
|
+
model,
|
|
17726
|
+
JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
|
|
17727
|
+
now,
|
|
17728
|
+
now
|
|
17729
|
+
]);
|
|
17730
|
+
recordAuditEvent(db, {
|
|
17731
|
+
event_type: "source_read",
|
|
17732
|
+
action: options.fake ? "fake_provider_web_search" : "provider_web_search",
|
|
17733
|
+
target_uri: query,
|
|
17734
|
+
decision: "allow",
|
|
17735
|
+
metadata: { provider, model, domains, max_uses: maxUses },
|
|
17736
|
+
created_at: now
|
|
17737
|
+
});
|
|
17738
|
+
} finally {
|
|
17739
|
+
db.close();
|
|
17740
|
+
}
|
|
17741
|
+
let answer = "";
|
|
17742
|
+
let sources = [];
|
|
17743
|
+
let usage = { input_tokens: estimateTokens2(query), output_tokens: 0, cost_usd: 0 };
|
|
17744
|
+
const warnings = [];
|
|
17745
|
+
if (options.fake) {
|
|
17746
|
+
sources = fakeSources(query, limit);
|
|
17747
|
+
answer = `Fake web search answer for: ${query}`;
|
|
17748
|
+
usage.output_tokens = estimateTokens2(answer);
|
|
17749
|
+
} else {
|
|
17750
|
+
const result = provider === "openai" ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains }) : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
|
|
17751
|
+
answer = result.text;
|
|
17752
|
+
const collected = new Map;
|
|
17753
|
+
collectSources(result.sources, collected);
|
|
17754
|
+
collectSources(result.toolResults, collected);
|
|
17755
|
+
sources = Array.from(collected.values()).slice(0, limit);
|
|
17756
|
+
const normalized = normalizeAiSdkUsage({
|
|
17757
|
+
provider,
|
|
17758
|
+
model,
|
|
17759
|
+
usage: result.usage,
|
|
17760
|
+
providerMetadata: result.providerMetadata
|
|
17761
|
+
});
|
|
17762
|
+
usage = {
|
|
17763
|
+
input_tokens: normalized.input_tokens,
|
|
17764
|
+
output_tokens: normalized.output_tokens,
|
|
17765
|
+
cost_usd: normalized.cost_usd
|
|
17766
|
+
};
|
|
17767
|
+
}
|
|
17768
|
+
const filedSources = await fileWebSources(options, sources, now);
|
|
17769
|
+
const writeDb = openKnowledgeDb(options.dbPath);
|
|
17770
|
+
try {
|
|
17771
|
+
writeDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
|
|
17772
|
+
"completed",
|
|
17773
|
+
JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
|
|
17774
|
+
now,
|
|
17775
|
+
runId
|
|
17776
|
+
]);
|
|
17777
|
+
writeDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
17778
|
+
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
17779
|
+
`evt_${randomUUID7()}`,
|
|
17780
|
+
runId,
|
|
17781
|
+
"info",
|
|
17782
|
+
"provider_web_search_completed",
|
|
17783
|
+
JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
|
|
17784
|
+
now
|
|
17785
|
+
]);
|
|
17786
|
+
recordProviderUsage(writeDb, {
|
|
17787
|
+
run_id: runId,
|
|
17788
|
+
provider,
|
|
17789
|
+
model,
|
|
17790
|
+
input_tokens: usage.input_tokens,
|
|
17791
|
+
output_tokens: usage.output_tokens,
|
|
17792
|
+
cost_usd: usage.cost_usd,
|
|
17793
|
+
metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
|
|
17794
|
+
created_at: now
|
|
17795
|
+
});
|
|
17796
|
+
} finally {
|
|
17797
|
+
writeDb.close();
|
|
17798
|
+
}
|
|
17799
|
+
if (sources.length === 0)
|
|
17800
|
+
warnings.push("no_web_sources_returned");
|
|
17801
|
+
return {
|
|
17802
|
+
run_id: runId,
|
|
17803
|
+
query,
|
|
17804
|
+
provider,
|
|
17805
|
+
model,
|
|
17806
|
+
answer,
|
|
17807
|
+
sources,
|
|
17808
|
+
filed_sources: filedSources,
|
|
17809
|
+
usage,
|
|
17810
|
+
warnings
|
|
17811
|
+
};
|
|
17812
|
+
}
|
|
17813
|
+
|
|
17814
|
+
// src/storage-contract.ts
|
|
17815
|
+
import { createHash as createHash9, randomUUID as randomUUID8 } from "crypto";
|
|
17353
17816
|
var GENERATED_ARTIFACTS = [
|
|
17354
17817
|
{
|
|
17355
17818
|
kind: "schema",
|
|
@@ -17385,7 +17848,7 @@ var GENERATED_ARTIFACTS = [
|
|
|
17385
17848
|
function hashArtifactBody(body) {
|
|
17386
17849
|
const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
|
|
17387
17850
|
return {
|
|
17388
|
-
hash: `sha256:${
|
|
17851
|
+
hash: `sha256:${createHash9("sha256").update(bytes).digest("hex")}`,
|
|
17389
17852
|
size_bytes: bytes.byteLength
|
|
17390
17853
|
};
|
|
17391
17854
|
}
|
|
@@ -17510,7 +17973,7 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
17510
17973
|
`);
|
|
17511
17974
|
const insert = db.transaction((entries) => {
|
|
17512
17975
|
for (const entry of entries) {
|
|
17513
|
-
statement.run(
|
|
17976
|
+
statement.run(randomUUID8(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
|
|
17514
17977
|
key: entry.key,
|
|
17515
17978
|
...entry.metadata ?? {}
|
|
17516
17979
|
}), timestamp, timestamp);
|
|
@@ -17520,15 +17983,15 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
17520
17983
|
}
|
|
17521
17984
|
|
|
17522
17985
|
// src/wiki-layout.ts
|
|
17523
|
-
import { createHash as
|
|
17986
|
+
import { createHash as createHash10 } from "crypto";
|
|
17524
17987
|
function todayParts(now) {
|
|
17525
17988
|
const year = String(now.getUTCFullYear());
|
|
17526
17989
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
17527
17990
|
const day = String(now.getUTCDate()).padStart(2, "0");
|
|
17528
17991
|
return { year, month, day };
|
|
17529
17992
|
}
|
|
17530
|
-
function
|
|
17531
|
-
return `${prefix}_${
|
|
17993
|
+
function stableId6(prefix, value) {
|
|
17994
|
+
return `${prefix}_${createHash10("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
17532
17995
|
}
|
|
17533
17996
|
function estimateTokenCount2(text) {
|
|
17534
17997
|
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
@@ -17646,7 +18109,7 @@ function provenanceFor(artifact) {
|
|
|
17646
18109
|
}
|
|
17647
18110
|
function recordWikiChunk(db, pageId, title, artifact, body, now) {
|
|
17648
18111
|
const provenance = provenanceFor(artifact);
|
|
17649
|
-
const chunkId =
|
|
18112
|
+
const chunkId = stableId6("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
|
|
17650
18113
|
const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
|
|
17651
18114
|
for (const row of existing)
|
|
17652
18115
|
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
|
|
@@ -17682,7 +18145,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
17682
18145
|
artifact_uri = excluded.artifact_uri,
|
|
17683
18146
|
metadata_json = excluded.metadata_json,
|
|
17684
18147
|
updated_at = excluded.updated_at`, [
|
|
17685
|
-
|
|
18148
|
+
stableId6("idx", "root:indexes/root.md"),
|
|
17686
18149
|
"root",
|
|
17687
18150
|
"root",
|
|
17688
18151
|
rootIndex.uri,
|
|
@@ -17697,7 +18160,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
17697
18160
|
]);
|
|
17698
18161
|
}
|
|
17699
18162
|
if (wikiReadme) {
|
|
17700
|
-
const wikiPageId =
|
|
18163
|
+
const wikiPageId = stableId6("wiki", "wiki/README.md");
|
|
17701
18164
|
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
17702
18165
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
17703
18166
|
ON CONFLICT(path) DO UPDATE SET
|
|
@@ -17843,6 +18306,30 @@ class KnowledgeService {
|
|
|
17843
18306
|
safetyPolicy: this.safetyPolicy()
|
|
17844
18307
|
});
|
|
17845
18308
|
}
|
|
18309
|
+
reindexHealth(options = {}) {
|
|
18310
|
+
const workspace = this.ensureWorkspace();
|
|
18311
|
+
return reindexHealth({
|
|
18312
|
+
...options,
|
|
18313
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18314
|
+
config: this.config()
|
|
18315
|
+
});
|
|
18316
|
+
}
|
|
18317
|
+
enqueueReindex(options = {}) {
|
|
18318
|
+
const workspace = this.ensureWorkspace();
|
|
18319
|
+
return enqueueMissingEmbeddings({
|
|
18320
|
+
...options,
|
|
18321
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18322
|
+
config: this.config()
|
|
18323
|
+
});
|
|
18324
|
+
}
|
|
18325
|
+
async refreshEmbeddings(options = {}) {
|
|
18326
|
+
const workspace = this.ensureWorkspace();
|
|
18327
|
+
return refreshEmbeddingIndex({
|
|
18328
|
+
...options,
|
|
18329
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18330
|
+
config: this.config()
|
|
18331
|
+
});
|
|
18332
|
+
}
|
|
17846
18333
|
providerStatus(env = process.env) {
|
|
17847
18334
|
return providerStatus(this.config(), env);
|
|
17848
18335
|
}
|
|
@@ -17893,6 +18380,15 @@ class KnowledgeService {
|
|
|
17893
18380
|
config: this.config()
|
|
17894
18381
|
});
|
|
17895
18382
|
}
|
|
18383
|
+
async webSearch(options) {
|
|
18384
|
+
const workspace = this.ensureWorkspace();
|
|
18385
|
+
return runProviderWebSearch({
|
|
18386
|
+
...options,
|
|
18387
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18388
|
+
config: this.config(),
|
|
18389
|
+
safetyPolicy: this.safetyPolicy()
|
|
18390
|
+
});
|
|
18391
|
+
}
|
|
17896
18392
|
}
|
|
17897
18393
|
function createKnowledgeService(options = {}) {
|
|
17898
18394
|
return new KnowledgeService(options);
|
|
@@ -18027,6 +18523,47 @@ function buildServer() {
|
|
|
18027
18523
|
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18028
18524
|
}
|
|
18029
18525
|
});
|
|
18526
|
+
registerTool(server, "ok_reindex_status", "Reindex status", "Inspect missing embeddings, queued jobs, stale revisions, and vector index health", {
|
|
18527
|
+
scope: scopeField,
|
|
18528
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
18529
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
18530
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
18531
|
+
}, async ({ scope, model, dimensions, fake }) => {
|
|
18532
|
+
const service = createKnowledgeService({ scope });
|
|
18533
|
+
try {
|
|
18534
|
+
return jsonText({ ok: true, ...service.reindexHealth({ modelRef: model, dimensions, fake }) });
|
|
18535
|
+
} catch (error48) {
|
|
18536
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18537
|
+
}
|
|
18538
|
+
});
|
|
18539
|
+
registerTool(server, "ok_reindex_enqueue", "Enqueue reindex work", "Queue missing embedding refresh jobs for indexed source chunks", {
|
|
18540
|
+
scope: scopeField,
|
|
18541
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
18542
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
18543
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
18544
|
+
}, async ({ scope, model, dimensions, fake }) => {
|
|
18545
|
+
const service = createKnowledgeService({ scope });
|
|
18546
|
+
try {
|
|
18547
|
+
return jsonText({ ok: true, ...service.enqueueReindex({ modelRef: model, dimensions, fake }) });
|
|
18548
|
+
} catch (error48) {
|
|
18549
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18550
|
+
}
|
|
18551
|
+
});
|
|
18552
|
+
registerTool(server, "ok_reindex_embeddings", "Refresh embedding index", "Run incremental or full embedding refresh jobs with run-ledger tracking", {
|
|
18553
|
+
scope: scopeField,
|
|
18554
|
+
full: exports_external.boolean().optional().describe("Delete and rebuild all embedding/vector rows first"),
|
|
18555
|
+
limit: exports_external.number().optional().describe("Maximum chunks to embed"),
|
|
18556
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
18557
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
18558
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
18559
|
+
}, async ({ scope, full, limit, model, dimensions, fake }) => {
|
|
18560
|
+
const service = createKnowledgeService({ scope });
|
|
18561
|
+
try {
|
|
18562
|
+
return jsonText({ ok: true, ...await service.refreshEmbeddings({ full, limit, modelRef: model, dimensions, fake }) });
|
|
18563
|
+
} catch (error48) {
|
|
18564
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18565
|
+
}
|
|
18566
|
+
});
|
|
18030
18567
|
registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
|
|
18031
18568
|
scope: scopeField,
|
|
18032
18569
|
query: exports_external.string().describe("Semantic query"),
|
|
@@ -18092,6 +18629,23 @@ function buildServer() {
|
|
|
18092
18629
|
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18093
18630
|
}
|
|
18094
18631
|
});
|
|
18632
|
+
registerTool(server, "ok_web_search", "Provider web search", "Run safety-gated provider-native web search and return citations/sources", {
|
|
18633
|
+
scope: scopeField,
|
|
18634
|
+
query: exports_external.string().describe("Web search query"),
|
|
18635
|
+
limit: exports_external.number().optional().describe("Maximum sources"),
|
|
18636
|
+
provider: exports_external.enum(["openai", "anthropic", "deepseek"]).optional().describe("Provider override"),
|
|
18637
|
+
model: exports_external.string().optional().describe("Model alias/ref"),
|
|
18638
|
+
domains: exports_external.array(exports_external.string()).optional().describe("Allowed domains"),
|
|
18639
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake web results"),
|
|
18640
|
+
file_results: exports_external.boolean().optional().describe("File web snippets as web source refs")
|
|
18641
|
+
}, async ({ scope, query, limit, provider, model, domains, fake, file_results }) => {
|
|
18642
|
+
const service = createKnowledgeService({ scope });
|
|
18643
|
+
try {
|
|
18644
|
+
return jsonText({ ok: true, ...await service.webSearch({ query, limit, provider, modelRef: model, domains, fake, fileResults: file_results }) });
|
|
18645
|
+
} catch (error48) {
|
|
18646
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18647
|
+
}
|
|
18648
|
+
});
|
|
18095
18649
|
registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
|
|
18096
18650
|
title: exports_external.string().describe("Item title"),
|
|
18097
18651
|
content: exports_external.string().describe("Item content/body"),
|