@hasna/knowledge 0.2.17 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
13660
13660
  // package.json
13661
13661
  var package_default = {
13662
13662
  name: "@hasna/knowledge",
13663
- version: "0.2.17",
13663
+ version: "0.2.19",
13664
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13665
13665
  type: "module",
13666
13666
  bin: {
@@ -14386,6 +14386,28 @@ CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(statu
14386
14386
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
14387
14387
  VALUES (4, datetime('now'));
14388
14388
  `;
14389
+ var MIGRATION_5 = `
14390
+ CREATE TABLE IF NOT EXISTS reindex_queue (
14391
+ id TEXT PRIMARY KEY,
14392
+ kind TEXT NOT NULL,
14393
+ target_id TEXT NOT NULL,
14394
+ source_uri TEXT,
14395
+ reason TEXT NOT NULL,
14396
+ status TEXT NOT NULL DEFAULT 'pending',
14397
+ attempts INTEGER NOT NULL DEFAULT 0,
14398
+ metadata_json TEXT NOT NULL DEFAULT '{}',
14399
+ created_at TEXT NOT NULL,
14400
+ updated_at TEXT NOT NULL,
14401
+ UNIQUE(kind, target_id, reason)
14402
+ );
14403
+
14404
+ CREATE INDEX IF NOT EXISTS idx_reindex_queue_status ON reindex_queue(status);
14405
+ CREATE INDEX IF NOT EXISTS idx_reindex_queue_kind_target ON reindex_queue(kind, target_id);
14406
+ CREATE INDEX IF NOT EXISTS idx_reindex_queue_source_uri ON reindex_queue(source_uri);
14407
+
14408
+ INSERT OR IGNORE INTO schema_versions(version, applied_at)
14409
+ VALUES (5, datetime('now'));
14410
+ `;
14389
14411
  function openKnowledgeDb(path) {
14390
14412
  ensureParentDir(path);
14391
14413
  const db = new Database(path);
@@ -14403,6 +14425,8 @@ function migrateKnowledgeDb(path) {
14403
14425
  db.exec(MIGRATION_3);
14404
14426
  if (getSchemaVersion(db) < 4)
14405
14427
  db.exec(MIGRATION_4);
14428
+ if (getSchemaVersion(db) < 5)
14429
+ db.exec(MIGRATION_5);
14406
14430
  return { path, schema_version: getSchemaVersion(db) };
14407
14431
  } finally {
14408
14432
  db.close();
@@ -14434,7 +14458,8 @@ function getKnowledgeDbStats(path) {
14434
14458
  approval_gates: count(db, "approval_gates"),
14435
14459
  storage_objects: count(db, "storage_objects"),
14436
14460
  embeddings: count(db, "chunk_embeddings"),
14437
- vector_entries: count(db, "vector_index_entries")
14461
+ vector_entries: count(db, "vector_index_entries"),
14462
+ reindex_queue: count(db, "reindex_queue")
14438
14463
  };
14439
14464
  } finally {
14440
14465
  db.close();
@@ -17348,8 +17373,446 @@ async function ingestSourceRef(options) {
17348
17373
  };
17349
17374
  }
17350
17375
 
17351
- // src/storage-contract.ts
17376
+ // src/reindex.ts
17352
17377
  import { createHash as createHash7, randomUUID as randomUUID6 } from "crypto";
17378
+ function stableId5(prefix, value) {
17379
+ return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
17380
+ }
17381
+ function queueCounts(dbPath) {
17382
+ const db = openKnowledgeDb(dbPath);
17383
+ try {
17384
+ const rows = db.query(`SELECT status, COUNT(*) AS n FROM reindex_queue GROUP BY status ORDER BY status`).all();
17385
+ return Object.fromEntries(rows.map((row) => [row.status, row.n]));
17386
+ } finally {
17387
+ db.close();
17388
+ }
17389
+ }
17390
+ function missingEmbeddingRows(dbPath, options) {
17391
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
17392
+ const parsed = parseModelRef(modelRef);
17393
+ const db = openKnowledgeDb(dbPath);
17394
+ try {
17395
+ return db.query(`SELECT c.id AS chunk_id, c.source_revision_id, s.uri AS source_uri
17396
+ FROM chunks c
17397
+ LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
17398
+ LEFT JOIN sources s ON s.id = sr.source_id
17399
+ LEFT JOIN vector_index_entries v ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
17400
+ WHERE v.id IS NULL
17401
+ ORDER BY c.created_at ASC, c.ordinal ASC`).all(parsed.provider, parsed.model);
17402
+ } finally {
17403
+ db.close();
17404
+ }
17405
+ }
17406
+ function reindexHealth(options) {
17407
+ migrateKnowledgeDb(options.dbPath);
17408
+ const db = openKnowledgeDb(options.dbPath);
17409
+ try {
17410
+ const version2 = db.query("SELECT MAX(version) AS version FROM schema_versions").get()?.version ?? 0;
17411
+ const chunks = db.query("SELECT COUNT(*) AS n FROM chunks").get()?.n ?? 0;
17412
+ const vectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
17413
+ const missing = missingEmbeddingRows(options.dbPath, options).length;
17414
+ const stale = db.query(`SELECT COUNT(*) AS n FROM source_revisions
17415
+ WHERE metadata_json LIKE '%"reindex_required":true%' OR metadata_json LIKE '%"status":"stale"%'`).get()?.n ?? 0;
17416
+ return {
17417
+ schema_version: version2,
17418
+ chunks,
17419
+ vector_entries: vectorEntries,
17420
+ missing_embeddings: missing,
17421
+ queued: queueCounts(options.dbPath),
17422
+ stale_revisions: stale
17423
+ };
17424
+ } finally {
17425
+ db.close();
17426
+ }
17427
+ }
17428
+ function enqueueMissingEmbeddings(options) {
17429
+ migrateKnowledgeDb(options.dbPath);
17430
+ const now = (options.now ?? new Date).toISOString();
17431
+ const reason = options.reason ?? "missing_embedding";
17432
+ const rows = missingEmbeddingRows(options.dbPath, options);
17433
+ const db = openKnowledgeDb(options.dbPath);
17434
+ let enqueued = 0;
17435
+ let alreadyQueued = 0;
17436
+ try {
17437
+ const write = db.transaction(() => {
17438
+ for (const row of rows) {
17439
+ const id = stableId5("rq", `embedding\x00${row.chunk_id}\x00${reason}`);
17440
+ const before = db.query("SELECT id FROM reindex_queue WHERE kind = ? AND target_id = ? AND reason = ?").get("embedding", row.chunk_id, reason);
17441
+ if (before) {
17442
+ alreadyQueued += 1;
17443
+ continue;
17444
+ }
17445
+ db.run(`INSERT INTO reindex_queue (id, kind, target_id, source_uri, reason, status, metadata_json, created_at, updated_at)
17446
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
17447
+ id,
17448
+ "embedding",
17449
+ row.chunk_id,
17450
+ row.source_uri,
17451
+ reason,
17452
+ "pending",
17453
+ JSON.stringify({ source_revision_id: row.source_revision_id }),
17454
+ now,
17455
+ now
17456
+ ]);
17457
+ enqueued += 1;
17458
+ }
17459
+ });
17460
+ write();
17461
+ } finally {
17462
+ db.close();
17463
+ }
17464
+ return { enqueued, already_queued: alreadyQueued, reason };
17465
+ }
17466
+ function clearEmbeddingIndex(dbPath) {
17467
+ const db = openKnowledgeDb(dbPath);
17468
+ try {
17469
+ const embeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
17470
+ const vectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
17471
+ db.run("DELETE FROM vector_index_entries");
17472
+ db.run("DELETE FROM chunk_embeddings");
17473
+ return { embeddings, vectorEntries };
17474
+ } finally {
17475
+ db.close();
17476
+ }
17477
+ }
17478
+ function completeIndexedQueueItems(dbPath, options, now) {
17479
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
17480
+ const parsed = parseModelRef(modelRef);
17481
+ const db = openKnowledgeDb(dbPath);
17482
+ try {
17483
+ const result = db.run(`UPDATE reindex_queue
17484
+ SET status = ?, updated_at = ?
17485
+ WHERE kind = ?
17486
+ AND status = ?
17487
+ AND EXISTS (
17488
+ SELECT 1 FROM vector_index_entries v
17489
+ WHERE v.chunk_id = reindex_queue.target_id
17490
+ AND v.provider = ?
17491
+ AND v.model = ?
17492
+ )`, ["completed", now, "embedding", "pending", parsed.provider, parsed.model]);
17493
+ return result.changes;
17494
+ } finally {
17495
+ db.close();
17496
+ }
17497
+ }
17498
+ async function refreshEmbeddingIndex(options) {
17499
+ migrateKnowledgeDb(options.dbPath);
17500
+ const now = (options.now ?? new Date).toISOString();
17501
+ const runId = `run_${randomUUID6()}`;
17502
+ const deleted = options.full ? clearEmbeddingIndex(options.dbPath) : { embeddings: 0, vectorEntries: 0 };
17503
+ const queued = enqueueMissingEmbeddings({ ...options, reason: options.full ? "full_embedding_rebuild" : "missing_embedding" });
17504
+ const db = openKnowledgeDb(options.dbPath);
17505
+ try {
17506
+ db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
17507
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
17508
+ runId,
17509
+ "embedding-refresh",
17510
+ options.full ? "full" : "incremental",
17511
+ "running",
17512
+ "local",
17513
+ resolveEmbeddingModelRef(options.modelRef, options.config),
17514
+ JSON.stringify({ full: options.full === true, queued }),
17515
+ now,
17516
+ now
17517
+ ]);
17518
+ } finally {
17519
+ db.close();
17520
+ }
17521
+ const indexed = await indexKnowledgeEmbeddings({
17522
+ dbPath: options.dbPath,
17523
+ config: options.config,
17524
+ env: options.env,
17525
+ modelRef: options.modelRef,
17526
+ dimensions: options.dimensions,
17527
+ fake: options.fake,
17528
+ limit: options.limit,
17529
+ now: options.now
17530
+ });
17531
+ const completedQueueItems = completeIndexedQueueItems(options.dbPath, options, now);
17532
+ const doneDb = openKnowledgeDb(options.dbPath);
17533
+ try {
17534
+ doneDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
17535
+ "completed",
17536
+ JSON.stringify({ full: options.full === true, queued, indexed, completed_queue_items: completedQueueItems }),
17537
+ now,
17538
+ runId
17539
+ ]);
17540
+ doneDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
17541
+ VALUES (?, ?, ?, ?, ?, ?)`, [
17542
+ `evt_${randomUUID6()}`,
17543
+ runId,
17544
+ "info",
17545
+ "embedding_refresh_completed",
17546
+ JSON.stringify({ queued, indexed, completed_queue_items: completedQueueItems }),
17547
+ now
17548
+ ]);
17549
+ } finally {
17550
+ doneDb.close();
17551
+ }
17552
+ return {
17553
+ run_id: runId,
17554
+ full: options.full === true,
17555
+ deleted_embeddings: deleted.embeddings,
17556
+ deleted_vector_entries: deleted.vectorEntries,
17557
+ queued,
17558
+ indexed,
17559
+ completed_queue_items: completedQueueItems
17560
+ };
17561
+ }
17562
+
17563
+ // src/web-search.ts
17564
+ import { createHash as createHash8, randomUUID as randomUUID7 } from "crypto";
17565
+ function stableHash(value) {
17566
+ return `sha256:${createHash8("sha256").update(value).digest("hex")}`;
17567
+ }
17568
+ function estimateTokens2(text) {
17569
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
17570
+ return Math.max(1, Math.ceil(words * 1.25));
17571
+ }
17572
+ function asRecord(value) {
17573
+ return value && typeof value === "object" && !Array.isArray(value) ? value : {};
17574
+ }
17575
+ function asString3(value) {
17576
+ return typeof value === "string" && value.length > 0 ? value : null;
17577
+ }
17578
+ function sourceFromRecord(value) {
17579
+ const record2 = asRecord(value);
17580
+ const url2 = asString3(record2.url) ?? asString3(record2.uri) ?? asString3(record2.sourceUrl);
17581
+ if (!url2)
17582
+ return null;
17583
+ return {
17584
+ url: url2,
17585
+ title: asString3(record2.title) ?? asString3(record2.name),
17586
+ snippet: asString3(record2.snippet) ?? asString3(record2.text) ?? asString3(record2.description),
17587
+ provider_metadata: record2
17588
+ };
17589
+ }
17590
+ function collectSources(value, output) {
17591
+ if (Array.isArray(value)) {
17592
+ for (const entry of value)
17593
+ collectSources(entry, output);
17594
+ return;
17595
+ }
17596
+ const source = sourceFromRecord(value);
17597
+ if (source)
17598
+ output.set(source.url, source);
17599
+ const record2 = asRecord(value);
17600
+ for (const key of ["sources", "results", "citations", "annotations", "output"]) {
17601
+ if (record2[key])
17602
+ collectSources(record2[key], output);
17603
+ }
17604
+ }
17605
+ function fakeSources(query, limit) {
17606
+ return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
17607
+ url: `https://example.com/knowledge-web-${index + 1}`,
17608
+ title: `Fake web source ${index + 1}`,
17609
+ snippet: `Deterministic web-search fixture for "${query}"`,
17610
+ provider_metadata: { fake: true, rank: index + 1 }
17611
+ }));
17612
+ }
17613
+ async function openAiWebSearch(input) {
17614
+ const { generateText } = await import("ai");
17615
+ const { createOpenAI } = await import("@ai-sdk/openai");
17616
+ const settings = providerSettings(input.config, "openai");
17617
+ const openai = createOpenAI({
17618
+ apiKey: input.env[settings.api_key_env],
17619
+ baseURL: settings.base_url
17620
+ });
17621
+ const webSearch = openai.tools?.webSearch;
17622
+ if (!webSearch)
17623
+ throw new Error("OpenAI provider does not expose tools.webSearch.");
17624
+ return generateText({
17625
+ model: openai(input.model),
17626
+ prompt: input.query,
17627
+ tools: {
17628
+ web_search: webSearch({
17629
+ externalWebAccess: true,
17630
+ searchContextSize: "medium",
17631
+ ...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
17632
+ })
17633
+ },
17634
+ toolChoice: { type: "tool", toolName: "web_search" }
17635
+ });
17636
+ }
17637
+ async function anthropicWebSearch(input) {
17638
+ const { generateText } = await import("ai");
17639
+ const { createAnthropic } = await import("@ai-sdk/anthropic");
17640
+ const settings = providerSettings(input.config, "anthropic");
17641
+ const anthropic = createAnthropic({
17642
+ apiKey: input.env[settings.api_key_env],
17643
+ baseURL: settings.base_url
17644
+ });
17645
+ const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
17646
+ if (!factory)
17647
+ throw new Error("Anthropic provider does not expose a web search tool.");
17648
+ return generateText({
17649
+ model: anthropic(input.model),
17650
+ prompt: input.query,
17651
+ tools: {
17652
+ web_search: factory({
17653
+ maxUses: input.maxUses,
17654
+ ...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
17655
+ })
17656
+ }
17657
+ });
17658
+ }
17659
+ async function fileWebSources(options, sources, now) {
17660
+ if (!options.fileResults || sources.length === 0)
17661
+ return 0;
17662
+ const items = sources.map((source) => {
17663
+ const text = [source.title, source.snippet, source.url].filter(Boolean).join(`
17664
+ `);
17665
+ const hash2 = stableHash(text);
17666
+ return {
17667
+ source_ref: source.url,
17668
+ name: source.title ?? source.url,
17669
+ url: source.url,
17670
+ mime: "text/plain",
17671
+ hash: hash2,
17672
+ revision: hash2,
17673
+ status: "active",
17674
+ updated_at: now,
17675
+ permissions: { mode: "read_only", allowed_purposes: ["knowledge_answer", "knowledge_index"] },
17676
+ metadata: {
17677
+ source_ref: source.url,
17678
+ content_source: "provider_web_search",
17679
+ provider_metadata: source.provider_metadata
17680
+ },
17681
+ extracted_text: text
17682
+ };
17683
+ });
17684
+ const result = await ingestOpenFilesManifestItems({
17685
+ dbPath: options.dbPath,
17686
+ items,
17687
+ sourceLabel: `web-search:${options.query}`,
17688
+ readAction: "provider_web_search_file_results",
17689
+ safetyPolicy: options.safetyPolicy,
17690
+ now: new Date(now)
17691
+ });
17692
+ return result.sources_upserted;
17693
+ }
17694
+ async function runProviderWebSearch(options) {
17695
+ const query = options.query.trim();
17696
+ if (!query)
17697
+ throw new Error("Web search query is required.");
17698
+ const env = options.env ?? process.env;
17699
+ const now = (options.now ?? new Date).toISOString();
17700
+ const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
17701
+ const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
17702
+ const domains = options.domains ?? [];
17703
+ const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : "default"), options.config);
17704
+ const parsed = parseModelRef(modelRef);
17705
+ const provider = options.provider ?? parsed.provider;
17706
+ const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
17707
+ const runId = `run_${randomUUID7()}`;
17708
+ if (!options.fake && options.safetyPolicy)
17709
+ assertWebSearchAllowed(options.safetyPolicy);
17710
+ if (!options.fake && provider !== "openai" && provider !== "anthropic") {
17711
+ throw new Error(`Provider ${provider} does not expose native web search yet.`);
17712
+ }
17713
+ if (!options.fake)
17714
+ assertProviderCredentials(provider, options.config, env);
17715
+ migrateKnowledgeDb(options.dbPath);
17716
+ const db = openKnowledgeDb(options.dbPath);
17717
+ try {
17718
+ db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
17719
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
17720
+ runId,
17721
+ "provider-web-search",
17722
+ query,
17723
+ "running",
17724
+ provider,
17725
+ model,
17726
+ JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
17727
+ now,
17728
+ now
17729
+ ]);
17730
+ recordAuditEvent(db, {
17731
+ event_type: "source_read",
17732
+ action: options.fake ? "fake_provider_web_search" : "provider_web_search",
17733
+ target_uri: query,
17734
+ decision: "allow",
17735
+ metadata: { provider, model, domains, max_uses: maxUses },
17736
+ created_at: now
17737
+ });
17738
+ } finally {
17739
+ db.close();
17740
+ }
17741
+ let answer = "";
17742
+ let sources = [];
17743
+ let usage = { input_tokens: estimateTokens2(query), output_tokens: 0, cost_usd: 0 };
17744
+ const warnings = [];
17745
+ if (options.fake) {
17746
+ sources = fakeSources(query, limit);
17747
+ answer = `Fake web search answer for: ${query}`;
17748
+ usage.output_tokens = estimateTokens2(answer);
17749
+ } else {
17750
+ const result = provider === "openai" ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains }) : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
17751
+ answer = result.text;
17752
+ const collected = new Map;
17753
+ collectSources(result.sources, collected);
17754
+ collectSources(result.toolResults, collected);
17755
+ sources = Array.from(collected.values()).slice(0, limit);
17756
+ const normalized = normalizeAiSdkUsage({
17757
+ provider,
17758
+ model,
17759
+ usage: result.usage,
17760
+ providerMetadata: result.providerMetadata
17761
+ });
17762
+ usage = {
17763
+ input_tokens: normalized.input_tokens,
17764
+ output_tokens: normalized.output_tokens,
17765
+ cost_usd: normalized.cost_usd
17766
+ };
17767
+ }
17768
+ const filedSources = await fileWebSources(options, sources, now);
17769
+ const writeDb = openKnowledgeDb(options.dbPath);
17770
+ try {
17771
+ writeDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
17772
+ "completed",
17773
+ JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
17774
+ now,
17775
+ runId
17776
+ ]);
17777
+ writeDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
17778
+ VALUES (?, ?, ?, ?, ?, ?)`, [
17779
+ `evt_${randomUUID7()}`,
17780
+ runId,
17781
+ "info",
17782
+ "provider_web_search_completed",
17783
+ JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
17784
+ now
17785
+ ]);
17786
+ recordProviderUsage(writeDb, {
17787
+ run_id: runId,
17788
+ provider,
17789
+ model,
17790
+ input_tokens: usage.input_tokens,
17791
+ output_tokens: usage.output_tokens,
17792
+ cost_usd: usage.cost_usd,
17793
+ metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
17794
+ created_at: now
17795
+ });
17796
+ } finally {
17797
+ writeDb.close();
17798
+ }
17799
+ if (sources.length === 0)
17800
+ warnings.push("no_web_sources_returned");
17801
+ return {
17802
+ run_id: runId,
17803
+ query,
17804
+ provider,
17805
+ model,
17806
+ answer,
17807
+ sources,
17808
+ filed_sources: filedSources,
17809
+ usage,
17810
+ warnings
17811
+ };
17812
+ }
17813
+
17814
+ // src/storage-contract.ts
17815
+ import { createHash as createHash9, randomUUID as randomUUID8 } from "crypto";
17353
17816
  var GENERATED_ARTIFACTS = [
17354
17817
  {
17355
17818
  kind: "schema",
@@ -17385,7 +17848,7 @@ var GENERATED_ARTIFACTS = [
17385
17848
  function hashArtifactBody(body) {
17386
17849
  const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
17387
17850
  return {
17388
- hash: `sha256:${createHash7("sha256").update(bytes).digest("hex")}`,
17851
+ hash: `sha256:${createHash9("sha256").update(bytes).digest("hex")}`,
17389
17852
  size_bytes: bytes.byteLength
17390
17853
  };
17391
17854
  }
@@ -17510,7 +17973,7 @@ function recordStorageObjects(db, objects, now = new Date) {
17510
17973
  `);
17511
17974
  const insert = db.transaction((entries) => {
17512
17975
  for (const entry of entries) {
17513
- statement.run(randomUUID6(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
17976
+ statement.run(randomUUID8(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
17514
17977
  key: entry.key,
17515
17978
  ...entry.metadata ?? {}
17516
17979
  }), timestamp, timestamp);
@@ -17520,15 +17983,15 @@ function recordStorageObjects(db, objects, now = new Date) {
17520
17983
  }
17521
17984
 
17522
17985
  // src/wiki-layout.ts
17523
- import { createHash as createHash8 } from "crypto";
17986
+ import { createHash as createHash10 } from "crypto";
17524
17987
  function todayParts(now) {
17525
17988
  const year = String(now.getUTCFullYear());
17526
17989
  const month = String(now.getUTCMonth() + 1).padStart(2, "0");
17527
17990
  const day = String(now.getUTCDate()).padStart(2, "0");
17528
17991
  return { year, month, day };
17529
17992
  }
17530
- function stableId5(prefix, value) {
17531
- return `${prefix}_${createHash8("sha256").update(value).digest("hex").slice(0, 20)}`;
17993
+ function stableId6(prefix, value) {
17994
+ return `${prefix}_${createHash10("sha256").update(value).digest("hex").slice(0, 20)}`;
17532
17995
  }
17533
17996
  function estimateTokenCount2(text) {
17534
17997
  const words = text.trim().split(/\s+/).filter(Boolean).length;
@@ -17646,7 +18109,7 @@ function provenanceFor(artifact) {
17646
18109
  }
17647
18110
  function recordWikiChunk(db, pageId, title, artifact, body, now) {
17648
18111
  const provenance = provenanceFor(artifact);
17649
- const chunkId = stableId5("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
18112
+ const chunkId = stableId6("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
17650
18113
  const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
17651
18114
  for (const row of existing)
17652
18115
  db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
@@ -17682,7 +18145,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
17682
18145
  artifact_uri = excluded.artifact_uri,
17683
18146
  metadata_json = excluded.metadata_json,
17684
18147
  updated_at = excluded.updated_at`, [
17685
- stableId5("idx", "root:indexes/root.md"),
18148
+ stableId6("idx", "root:indexes/root.md"),
17686
18149
  "root",
17687
18150
  "root",
17688
18151
  rootIndex.uri,
@@ -17697,7 +18160,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
17697
18160
  ]);
17698
18161
  }
17699
18162
  if (wikiReadme) {
17700
- const wikiPageId = stableId5("wiki", "wiki/README.md");
18163
+ const wikiPageId = stableId6("wiki", "wiki/README.md");
17701
18164
  db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
17702
18165
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
17703
18166
  ON CONFLICT(path) DO UPDATE SET
@@ -17843,6 +18306,30 @@ class KnowledgeService {
17843
18306
  safetyPolicy: this.safetyPolicy()
17844
18307
  });
17845
18308
  }
18309
+ reindexHealth(options = {}) {
18310
+ const workspace = this.ensureWorkspace();
18311
+ return reindexHealth({
18312
+ ...options,
18313
+ dbPath: workspace.knowledgeDbPath,
18314
+ config: this.config()
18315
+ });
18316
+ }
18317
+ enqueueReindex(options = {}) {
18318
+ const workspace = this.ensureWorkspace();
18319
+ return enqueueMissingEmbeddings({
18320
+ ...options,
18321
+ dbPath: workspace.knowledgeDbPath,
18322
+ config: this.config()
18323
+ });
18324
+ }
18325
+ async refreshEmbeddings(options = {}) {
18326
+ const workspace = this.ensureWorkspace();
18327
+ return refreshEmbeddingIndex({
18328
+ ...options,
18329
+ dbPath: workspace.knowledgeDbPath,
18330
+ config: this.config()
18331
+ });
18332
+ }
17846
18333
  providerStatus(env = process.env) {
17847
18334
  return providerStatus(this.config(), env);
17848
18335
  }
@@ -17893,6 +18380,15 @@ class KnowledgeService {
17893
18380
  config: this.config()
17894
18381
  });
17895
18382
  }
18383
+ async webSearch(options) {
18384
+ const workspace = this.ensureWorkspace();
18385
+ return runProviderWebSearch({
18386
+ ...options,
18387
+ dbPath: workspace.knowledgeDbPath,
18388
+ config: this.config(),
18389
+ safetyPolicy: this.safetyPolicy()
18390
+ });
18391
+ }
17896
18392
  }
17897
18393
  function createKnowledgeService(options = {}) {
17898
18394
  return new KnowledgeService(options);
@@ -18027,6 +18523,47 @@ function buildServer() {
18027
18523
  return errorText(error48 instanceof Error ? error48.message : String(error48));
18028
18524
  }
18029
18525
  });
18526
+ registerTool(server, "ok_reindex_status", "Reindex status", "Inspect missing embeddings, queued jobs, stale revisions, and vector index health", {
18527
+ scope: scopeField,
18528
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
18529
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
18530
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
18531
+ }, async ({ scope, model, dimensions, fake }) => {
18532
+ const service = createKnowledgeService({ scope });
18533
+ try {
18534
+ return jsonText({ ok: true, ...service.reindexHealth({ modelRef: model, dimensions, fake }) });
18535
+ } catch (error48) {
18536
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
18537
+ }
18538
+ });
18539
+ registerTool(server, "ok_reindex_enqueue", "Enqueue reindex work", "Queue missing embedding refresh jobs for indexed source chunks", {
18540
+ scope: scopeField,
18541
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
18542
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
18543
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
18544
+ }, async ({ scope, model, dimensions, fake }) => {
18545
+ const service = createKnowledgeService({ scope });
18546
+ try {
18547
+ return jsonText({ ok: true, ...service.enqueueReindex({ modelRef: model, dimensions, fake }) });
18548
+ } catch (error48) {
18549
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
18550
+ }
18551
+ });
18552
+ registerTool(server, "ok_reindex_embeddings", "Refresh embedding index", "Run incremental or full embedding refresh jobs with run-ledger tracking", {
18553
+ scope: scopeField,
18554
+ full: exports_external.boolean().optional().describe("Delete and rebuild all embedding/vector rows first"),
18555
+ limit: exports_external.number().optional().describe("Maximum chunks to embed"),
18556
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
18557
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
18558
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
18559
+ }, async ({ scope, full, limit, model, dimensions, fake }) => {
18560
+ const service = createKnowledgeService({ scope });
18561
+ try {
18562
+ return jsonText({ ok: true, ...await service.refreshEmbeddings({ full, limit, modelRef: model, dimensions, fake }) });
18563
+ } catch (error48) {
18564
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
18565
+ }
18566
+ });
18030
18567
  registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
18031
18568
  scope: scopeField,
18032
18569
  query: exports_external.string().describe("Semantic query"),
@@ -18092,6 +18629,23 @@ function buildServer() {
18092
18629
  return errorText(error48 instanceof Error ? error48.message : String(error48));
18093
18630
  }
18094
18631
  });
18632
+ registerTool(server, "ok_web_search", "Provider web search", "Run safety-gated provider-native web search and return citations/sources", {
18633
+ scope: scopeField,
18634
+ query: exports_external.string().describe("Web search query"),
18635
+ limit: exports_external.number().optional().describe("Maximum sources"),
18636
+ provider: exports_external.enum(["openai", "anthropic", "deepseek"]).optional().describe("Provider override"),
18637
+ model: exports_external.string().optional().describe("Model alias/ref"),
18638
+ domains: exports_external.array(exports_external.string()).optional().describe("Allowed domains"),
18639
+ fake: exports_external.boolean().optional().describe("Use deterministic fake web results"),
18640
+ file_results: exports_external.boolean().optional().describe("File web snippets as web source refs")
18641
+ }, async ({ scope, query, limit, provider, model, domains, fake, file_results }) => {
18642
+ const service = createKnowledgeService({ scope });
18643
+ try {
18644
+ return jsonText({ ok: true, ...await service.webSearch({ query, limit, provider, modelRef: model, domains, fake, fileResults: file_results }) });
18645
+ } catch (error48) {
18646
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
18647
+ }
18648
+ });
18095
18649
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
18096
18650
  title: exports_external.string().describe("Item title"),
18097
18651
  content: exports_external.string().describe("Item content/body"),