@hasna/knowledge 0.2.14 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
13660
13660
  // package.json
13661
13661
  var package_default = {
13662
13662
  name: "@hasna/knowledge",
13663
- version: "0.2.14",
13663
+ version: "0.2.16",
13664
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13665
13665
  type: "module",
13666
13666
  bin: {
@@ -16393,8 +16393,600 @@ async function ingestSourceRef(options) {
16393
16393
  };
16394
16394
  }
16395
16395
 
16396
+ // src/retrieval.ts
16397
+ import { createHash as createHash6 } from "crypto";
16398
+
16399
+ // src/search.ts
16400
+ function parseJsonObject3(value) {
16401
+ if (!value)
16402
+ return {};
16403
+ try {
16404
+ const parsed = JSON.parse(value);
16405
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
16406
+ } catch {
16407
+ return {};
16408
+ }
16409
+ }
16410
+ function metadataString3(metadata, keys) {
16411
+ for (const key of keys) {
16412
+ const value = metadata[key];
16413
+ if (typeof value === "string" && value.length > 0)
16414
+ return value;
16415
+ }
16416
+ return null;
16417
+ }
16418
+ function metadataNumber3(metadata, keys) {
16419
+ for (const key of keys) {
16420
+ const value = metadata[key];
16421
+ if (typeof value === "number" && Number.isFinite(value))
16422
+ return value;
16423
+ }
16424
+ return null;
16425
+ }
16426
+ function unique(values) {
16427
+ return Array.from(new Set(values));
16428
+ }
16429
+ function queryTerms(query) {
16430
+ const terms = query.normalize("NFKC").toLowerCase().match(/[\p{L}\p{N}_]+/gu) ?? [];
16431
+ return unique(terms.filter((term) => term.length > 0)).slice(0, 16);
16432
+ }
16433
+ function ftsQueryForTerms(terms) {
16434
+ if (terms.length === 0)
16435
+ return null;
16436
+ return terms.map((term) => `${term}*`).join(" OR ");
16437
+ }
16438
+ function escapeLikeTerm(term) {
16439
+ return term.replace(/[\\%_]/g, (char) => `\\${char}`);
16440
+ }
16441
+ function likeParams(terms, fieldsPerTerm) {
16442
+ return terms.flatMap((term) => Array.from({ length: fieldsPerTerm }, () => `%${escapeLikeTerm(term)}%`));
16443
+ }
16444
+ function scoreFromRank(rank, index) {
16445
+ const rankScore = Number.isFinite(rank) ? 1 / (1 + Math.abs(rank)) : 0;
16446
+ const orderScore = 1 / (1 + index);
16447
+ return roundScore(Math.max(rankScore, orderScore));
16448
+ }
16449
+ function catalogScore(haystack, terms) {
16450
+ if (terms.length === 0)
16451
+ return 0;
16452
+ const matched = terms.filter((term) => haystack.includes(term)).length;
16453
+ if (matched === 0)
16454
+ return 0;
16455
+ return roundScore(Math.min(0.85, 0.35 + matched / terms.length * 0.5));
16456
+ }
16457
+ function semanticScore(score) {
16458
+ return roundScore(Math.max(0, Math.min(1, (score + 1) / 2)));
16459
+ }
16460
+ function roundScore(score) {
16461
+ return Number(score.toFixed(6));
16462
+ }
16463
+ function combinedScore(scores, citation) {
16464
+ const keyword = scores.keyword ?? 0;
16465
+ const semantic = scores.semantic ?? 0;
16466
+ const catalog = scores.catalog ?? 0;
16467
+ const citationBoost = citation?.chunk_id ? 0.05 : 0;
16468
+ return roundScore(Math.min(1, keyword * 0.55 + semantic * 0.4 + catalog * 0.35 + citationBoost));
16469
+ }
16470
+ function existingProvenance(metadata) {
16471
+ const provenance = metadata.provenance;
16472
+ return provenance && typeof provenance === "object" && !Array.isArray(provenance) ? provenance : null;
16473
+ }
16474
+ function provenanceForChunk2(row) {
16475
+ const metadata = parseJsonObject3(row.chunk_metadata_json);
16476
+ const existing = existingProvenance(metadata);
16477
+ if (existing)
16478
+ return existing;
16479
+ if (!row.source_revision_id && !row.source_uri)
16480
+ return null;
16481
+ return sourceProvenance({
16482
+ source_ref: metadataString3(metadata, ["source_ref"]),
16483
+ source_uri: row.source_uri ?? metadataString3(metadata, ["source_uri"]),
16484
+ source_kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
16485
+ source_revision_id: row.source_revision_id,
16486
+ revision: row.revision ?? metadataString3(metadata, ["revision"]),
16487
+ hash: row.hash ?? metadataString3(metadata, ["hash"]),
16488
+ chunk_id: row.chunk_id,
16489
+ start_offset: row.start_offset ?? metadataNumber3(metadata, ["start_offset"]),
16490
+ end_offset: row.end_offset ?? metadataNumber3(metadata, ["end_offset"]),
16491
+ status: metadataString3(metadata, ["status"]),
16492
+ resolver: "open-files-read-only"
16493
+ });
16494
+ }
16495
+ function selectFtsChunks(db, ftsQuery, limit) {
16496
+ if (!ftsQuery)
16497
+ return [];
16498
+ return db.query(`SELECT
16499
+ chunks_fts.chunk_id,
16500
+ c.kind AS chunk_kind,
16501
+ c.wiki_page_id,
16502
+ c.text,
16503
+ c.token_count,
16504
+ c.start_offset,
16505
+ c.end_offset,
16506
+ c.metadata_json AS chunk_metadata_json,
16507
+ c.source_revision_id,
16508
+ sr.revision,
16509
+ sr.hash,
16510
+ s.uri AS source_uri,
16511
+ s.kind AS source_kind,
16512
+ s.title AS source_title,
16513
+ wp.path AS wiki_path,
16514
+ wp.title AS wiki_title,
16515
+ wp.artifact_uri AS wiki_artifact_uri,
16516
+ wp.content_hash AS wiki_content_hash,
16517
+ wp.status AS wiki_status,
16518
+ wp.metadata_json AS wiki_metadata_json,
16519
+ bm25(chunks_fts) AS rank
16520
+ FROM chunks_fts
16521
+ JOIN chunks c ON c.id = chunks_fts.chunk_id
16522
+ LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
16523
+ LEFT JOIN sources s ON s.id = sr.source_id
16524
+ LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
16525
+ WHERE chunks_fts MATCH ?
16526
+ ORDER BY rank ASC
16527
+ LIMIT ?`).all(ftsQuery, limit);
16528
+ }
16529
+ function catalogWhere(fields, terms) {
16530
+ if (terms.length === 0)
16531
+ return "1 = 0";
16532
+ const clauses = terms.map(() => `(${fields.map((field) => `lower(COALESCE(${field}, '')) LIKE ? ESCAPE '\\'`).join(" OR ")})`);
16533
+ return clauses.join(" OR ");
16534
+ }
16535
+ function selectWikiPages(db, terms, limit) {
16536
+ const fields = ["path", "title", "artifact_uri", "metadata_json"];
16537
+ return db.query(`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
16538
+ FROM wiki_pages
16539
+ WHERE status = 'active' AND (${catalogWhere(fields, terms)})
16540
+ ORDER BY updated_at DESC
16541
+ LIMIT ?`).all(...likeParams(terms, fields.length), limit);
16542
+ }
16543
+ function selectKnowledgeIndexes(db, terms, limit) {
16544
+ const fields = ["kind", "name", "shard_key", "artifact_uri", "metadata_json"];
16545
+ return db.query(`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
16546
+ FROM knowledge_indexes
16547
+ WHERE ${catalogWhere(fields, terms)}
16548
+ ORDER BY updated_at DESC
16549
+ LIMIT ?`).all(...likeParams(terms, fields.length), limit);
16550
+ }
16551
+ function chunkResult(row, keywordScore) {
16552
+ const metadata = parseJsonObject3(row.chunk_metadata_json);
16553
+ const provenance = provenanceForChunk2(row);
16554
+ const sourceRef = metadataString3(metadata, ["source_ref"]);
16555
+ const sourceUri = row.source_uri ?? metadataString3(metadata, ["source_uri"]);
16556
+ const isWiki = Boolean(row.wiki_page_id);
16557
+ const result = {
16558
+ kind: isWiki ? "wiki_chunk" : "source_chunk",
16559
+ id: row.chunk_id,
16560
+ title: isWiki ? row.wiki_title : row.source_title,
16561
+ text: row.text,
16562
+ score: 0,
16563
+ scores: { keyword: keywordScore },
16564
+ source: sourceUri || sourceRef ? {
16565
+ uri: sourceUri,
16566
+ ref: sourceRef,
16567
+ kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
16568
+ revision: row.revision ?? metadataString3(metadata, ["revision"]),
16569
+ hash: row.hash ?? metadataString3(metadata, ["hash"])
16570
+ } : null,
16571
+ citation: {
16572
+ chunk_id: row.chunk_id,
16573
+ start_offset: row.start_offset,
16574
+ end_offset: row.end_offset
16575
+ },
16576
+ artifact: isWiki ? {
16577
+ uri: row.wiki_artifact_uri,
16578
+ path: row.wiki_path,
16579
+ hash: row.wiki_content_hash,
16580
+ shard_key: row.wiki_path
16581
+ } : null,
16582
+ provenance,
16583
+ reasons: ["keyword_match"]
16584
+ };
16585
+ result.score = combinedScore(result.scores, result.citation);
16586
+ return result;
16587
+ }
16588
+ function wikiPageResult(row, terms) {
16589
+ const metadata = parseJsonObject3(row.metadata_json);
16590
+ const score = catalogScore(`${row.path} ${row.title} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
16591
+ const result = {
16592
+ kind: "wiki_page",
16593
+ id: row.id,
16594
+ title: row.title,
16595
+ text: null,
16596
+ score: 0,
16597
+ scores: { catalog: score },
16598
+ source: null,
16599
+ citation: null,
16600
+ artifact: {
16601
+ uri: row.artifact_uri,
16602
+ path: row.path,
16603
+ hash: row.content_hash,
16604
+ shard_key: row.path
16605
+ },
16606
+ provenance: existingProvenance(metadata),
16607
+ reasons: ["wiki_catalog_match"]
16608
+ };
16609
+ result.score = combinedScore(result.scores, result.citation);
16610
+ return result;
16611
+ }
16612
+ function indexResult(row, terms) {
16613
+ const metadata = parseJsonObject3(row.metadata_json);
16614
+ const score = catalogScore(`${row.kind} ${row.name} ${row.shard_key ?? ""} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
16615
+ const result = {
16616
+ kind: "knowledge_index",
16617
+ id: row.id,
16618
+ title: row.name,
16619
+ text: null,
16620
+ score: 0,
16621
+ scores: { catalog: score },
16622
+ source: null,
16623
+ citation: null,
16624
+ artifact: {
16625
+ uri: row.artifact_uri,
16626
+ path: metadataString3(metadata, ["artifact_key"]),
16627
+ hash: metadataString3(metadata, ["content_hash"]),
16628
+ shard_key: row.shard_key
16629
+ },
16630
+ provenance: existingProvenance(metadata),
16631
+ reasons: ["index_catalog_match"]
16632
+ };
16633
+ result.score = combinedScore(result.scores, result.citation);
16634
+ return result;
16635
+ }
16636
+ function mergeResult(results, entry) {
16637
+ const key = `${entry.kind}:${entry.id}`;
16638
+ const existing = results.get(key);
16639
+ if (!existing) {
16640
+ results.set(key, entry);
16641
+ return;
16642
+ }
16643
+ existing.scores = {
16644
+ keyword: Math.max(existing.scores.keyword ?? 0, entry.scores.keyword ?? 0) || undefined,
16645
+ semantic: Math.max(existing.scores.semantic ?? 0, entry.scores.semantic ?? 0) || undefined,
16646
+ catalog: Math.max(existing.scores.catalog ?? 0, entry.scores.catalog ?? 0) || undefined
16647
+ };
16648
+ existing.reasons = unique([...existing.reasons, ...entry.reasons]);
16649
+ existing.text = existing.text ?? entry.text;
16650
+ existing.title = existing.title ?? entry.title;
16651
+ existing.source = existing.source ?? entry.source;
16652
+ existing.citation = existing.citation ?? entry.citation;
16653
+ existing.artifact = existing.artifact ?? entry.artifact;
16654
+ existing.provenance = existing.provenance ?? entry.provenance;
16655
+ existing.score = combinedScore(existing.scores, existing.citation);
16656
+ }
16657
+ function sortResults(results) {
16658
+ const kindOrder = {
16659
+ source_chunk: 0,
16660
+ wiki_chunk: 1,
16661
+ wiki_page: 2,
16662
+ knowledge_index: 3
16663
+ };
16664
+ return results.sort((a, b) => {
16665
+ if (b.score !== a.score)
16666
+ return b.score - a.score;
16667
+ return kindOrder[a.kind] - kindOrder[b.kind] || a.id.localeCompare(b.id);
16668
+ });
16669
+ }
16670
+ async function hybridSearch(options) {
16671
+ const query = options.query.trim();
16672
+ if (!query)
16673
+ throw new Error("Search query is required.");
16674
+ const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
16675
+ const terms = queryTerms(query);
16676
+ const ftsQuery = ftsQueryForTerms(terms);
16677
+ const semanticEnabled = options.semantic === true || options.fake === true || Boolean(options.modelRef);
16678
+ const warnings = [];
16679
+ let semanticProvider = null;
16680
+ let semanticModel = null;
16681
+ let semanticDimensions = null;
16682
+ let keywordCount = 0;
16683
+ let catalogCount = 0;
16684
+ let semanticCount = 0;
16685
+ const merged = new Map;
16686
+ migrateKnowledgeDb(options.dbPath);
16687
+ const db = openKnowledgeDb(options.dbPath);
16688
+ try {
16689
+ const ftsRows = selectFtsChunks(db, ftsQuery, Math.max(limit * 3, 20));
16690
+ keywordCount = ftsRows.length;
16691
+ ftsRows.forEach((row, index) => mergeResult(merged, chunkResult(row, scoreFromRank(row.rank, index))));
16692
+ const wikiRows = selectWikiPages(db, terms, Math.max(limit, 10));
16693
+ const indexRows = selectKnowledgeIndexes(db, terms, Math.max(limit, 10));
16694
+ catalogCount = wikiRows.length + indexRows.length;
16695
+ wikiRows.forEach((row) => mergeResult(merged, wikiPageResult(row, terms)));
16696
+ indexRows.forEach((row) => mergeResult(merged, indexResult(row, terms)));
16697
+ } finally {
16698
+ db.close();
16699
+ }
16700
+ if (semanticEnabled) {
16701
+ try {
16702
+ const semantic = await searchVectorIndex({
16703
+ dbPath: options.dbPath,
16704
+ query,
16705
+ limit: Math.max(limit * 3, 20),
16706
+ config: options.config,
16707
+ env: options.env,
16708
+ modelRef: options.modelRef,
16709
+ dimensions: options.dimensions,
16710
+ fake: options.fake,
16711
+ batchSize: options.batchSize,
16712
+ maxParallelCalls: options.maxParallelCalls
16713
+ });
16714
+ semanticProvider = semantic.provider;
16715
+ semanticModel = semantic.model;
16716
+ semanticDimensions = semantic.dimensions;
16717
+ semanticCount = semantic.results.length;
16718
+ for (const row of semantic.results) {
16719
+ const result = {
16720
+ kind: "source_chunk",
16721
+ id: row.chunk_id,
16722
+ title: null,
16723
+ text: row.text,
16724
+ score: 0,
16725
+ scores: { semantic: semanticScore(row.score) },
16726
+ source: {
16727
+ uri: row.source_uri,
16728
+ ref: row.source_ref,
16729
+ kind: row.provenance?.source_kind ?? null,
16730
+ revision: row.revision,
16731
+ hash: row.hash
16732
+ },
16733
+ citation: {
16734
+ chunk_id: row.chunk_id,
16735
+ start_offset: row.provenance?.start_offset ?? null,
16736
+ end_offset: row.provenance?.end_offset ?? null
16737
+ },
16738
+ artifact: null,
16739
+ provenance: row.provenance,
16740
+ reasons: ["semantic_match"]
16741
+ };
16742
+ result.score = combinedScore(result.scores, result.citation);
16743
+ mergeResult(merged, result);
16744
+ }
16745
+ } catch (error48) {
16746
+ warnings.push(`semantic_search_failed: ${error48 instanceof Error ? error48.message : String(error48)}`);
16747
+ }
16748
+ }
16749
+ const results = sortResults(Array.from(merged.values())).slice(0, limit);
16750
+ return {
16751
+ query,
16752
+ limit,
16753
+ mode: {
16754
+ keyword: true,
16755
+ catalog: true,
16756
+ semantic: semanticEnabled
16757
+ },
16758
+ semantic_provider: semanticProvider,
16759
+ semantic_model: semanticModel,
16760
+ semantic_dimensions: semanticDimensions,
16761
+ counts: {
16762
+ keyword_results: keywordCount,
16763
+ catalog_results: catalogCount,
16764
+ semantic_results: semanticCount,
16765
+ merged_results: results.length
16766
+ },
16767
+ warnings,
16768
+ results
16769
+ };
16770
+ }
16771
+
16772
+ // src/retrieval.ts
16773
+ function stableId4(prefix, value) {
16774
+ return `${prefix}_${createHash6("sha256").update(value).digest("hex").slice(0, 20)}`;
16775
+ }
16776
+ function normalizeQuery(query) {
16777
+ return query.normalize("NFKC").trim().replace(/\s+/g, " ").toLowerCase();
16778
+ }
16779
+ function queryTerms2(query) {
16780
+ return Array.from(new Set(normalizeQuery(query).match(/[\p{L}\p{N}_]+/gu) ?? [])).slice(0, 16);
16781
+ }
16782
+ function textForResult(result) {
16783
+ return [result.title, result.text].filter(Boolean).join(" ").toLowerCase();
16784
+ }
16785
+ function exactScore(result, terms) {
16786
+ if (terms.length === 0)
16787
+ return 0;
16788
+ const text = textForResult(result);
16789
+ const matched = terms.filter((term) => text.includes(term)).length;
16790
+ return Number((matched / terms.length).toFixed(6));
16791
+ }
16792
+ function hasReadOnlyProvenance(provenance) {
16793
+ if (!provenance)
16794
+ return true;
16795
+ if ("read_only" in provenance)
16796
+ return provenance.read_only === true;
16797
+ if ("read_only_sources" in provenance)
16798
+ return provenance.read_only_sources === true;
16799
+ return true;
16800
+ }
16801
+ function isStale(provenance) {
16802
+ if (!provenance)
16803
+ return false;
16804
+ if ("stale" in provenance && provenance.stale)
16805
+ return true;
16806
+ if ("status" in provenance)
16807
+ return isStaleStatus(provenance.status);
16808
+ return false;
16809
+ }
16810
+ function freshnessScore(result) {
16811
+ if (isStale(result.provenance))
16812
+ return 0;
16813
+ if (result.source?.hash || result.source?.revision)
16814
+ return 1;
16815
+ if (result.artifact?.hash)
16816
+ return 0.85;
16817
+ if (result.provenance && "source_refs" in result.provenance && result.provenance.source_refs.length > 0)
16818
+ return 0.75;
16819
+ return 0.55;
16820
+ }
16821
+ function citationScore(result) {
16822
+ if (result.citation?.chunk_id && (result.source?.uri || result.artifact?.uri))
16823
+ return 1;
16824
+ if (result.provenance && "citation_required" in result.provenance && result.provenance.citation_required)
16825
+ return 0.75;
16826
+ if (result.artifact?.uri)
16827
+ return 0.65;
16828
+ return 0.35;
16829
+ }
16830
+ function authorityScore(result) {
16831
+ if (result.kind === "wiki_chunk")
16832
+ return 0.85;
16833
+ if (result.kind === "source_chunk")
16834
+ return 0.8;
16835
+ if (result.kind === "wiki_page")
16836
+ return 0.65;
16837
+ return 0.55;
16838
+ }
16839
+ function rerank(result, terms) {
16840
+ const scores = {
16841
+ base_score: result.score,
16842
+ exact_score: exactScore(result, terms),
16843
+ citation_score: citationScore(result),
16844
+ freshness_score: freshnessScore(result),
16845
+ authority_score: authorityScore(result)
16846
+ };
16847
+ const final = Math.min(1, scores.base_score * 0.65 + scores.exact_score * 0.1 + scores.citation_score * 0.1 + scores.freshness_score * 0.1 + scores.authority_score * 0.05);
16848
+ const reasons = new Set(result.reasons);
16849
+ if (scores.exact_score > 0.5)
16850
+ reasons.add("exact_term");
16851
+ if (scores.citation_score >= 0.75)
16852
+ reasons.add("cited_source");
16853
+ if (scores.freshness_score >= 0.85)
16854
+ reasons.add("fresh_source");
16855
+ return {
16856
+ ...result,
16857
+ score: Number(final.toFixed(6)),
16858
+ reasons: Array.from(reasons),
16859
+ rerank: {
16860
+ ...scores,
16861
+ final_score: Number(final.toFixed(6))
16862
+ }
16863
+ };
16864
+ }
16865
+ function quoteFor(result, maxChars) {
16866
+ const source = result.text ?? result.title;
16867
+ if (!source)
16868
+ return null;
16869
+ const normalized = source.replace(/\s+/g, " ").trim();
16870
+ return normalized.length <= maxChars ? normalized : `${normalized.slice(0, Math.max(0, maxChars - 1)).trim()}...`;
16871
+ }
16872
+ function citationFor(result) {
16873
+ const id = stableId4("cite", `${result.kind}\x00${result.id}\x00${result.source?.uri ?? ""}\x00${result.artifact?.uri ?? ""}`);
16874
+ return {
16875
+ id,
16876
+ result_id: result.id,
16877
+ kind: result.kind,
16878
+ source_uri: result.source?.uri ?? null,
16879
+ source_ref: result.source?.ref ?? null,
16880
+ artifact_uri: result.artifact?.uri ?? null,
16881
+ artifact_path: result.artifact?.path ?? null,
16882
+ revision: result.source?.revision ?? null,
16883
+ hash: result.source?.hash ?? result.artifact?.hash ?? null,
16884
+ chunk_id: result.citation?.chunk_id ?? null,
16885
+ start_offset: result.citation?.start_offset ?? null,
16886
+ end_offset: result.citation?.end_offset ?? null,
16887
+ quote: quoteFor(result, 500),
16888
+ provenance: result.provenance
16889
+ };
16890
+ }
16891
+ function excerptFor(result, citation, contextChars) {
16892
+ const text = quoteFor(result, contextChars);
16893
+ if (!text)
16894
+ return null;
16895
+ return {
16896
+ id: stableId4("excerpt", `${result.kind}\x00${result.id}`),
16897
+ result_id: result.id,
16898
+ citation_id: citation.id,
16899
+ kind: result.kind,
16900
+ text,
16901
+ score: result.score
16902
+ };
16903
+ }
16904
+ function placeholders(values) {
16905
+ return values.map(() => "?").join(", ");
16906
+ }
16907
+ function loadGraphEvidence(dbPath, results) {
16908
+ const chunkIds = results.map((result) => result.citation?.chunk_id).filter((id) => Boolean(id));
16909
+ const wikiPageIds = results.filter((result) => result.kind === "wiki_page").map((result) => result.id);
16910
+ const citations = [];
16911
+ const backlinks = [];
16912
+ if (chunkIds.length === 0 && wikiPageIds.length === 0)
16913
+ return { citations, backlinks };
16914
+ const db = openKnowledgeDb(dbPath);
16915
+ try {
16916
+ if (chunkIds.length > 0) {
16917
+ citations.push(...db.query(`SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
16918
+ FROM citations
16919
+ WHERE chunk_id IN (${placeholders(chunkIds)})
16920
+ ORDER BY created_at DESC
16921
+ LIMIT 50`).all(...chunkIds));
16922
+ }
16923
+ if (wikiPageIds.length > 0) {
16924
+ citations.push(...db.query(`SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
16925
+ FROM citations
16926
+ WHERE wiki_page_id IN (${placeholders(wikiPageIds)})
16927
+ ORDER BY created_at DESC
16928
+ LIMIT 50`).all(...wikiPageIds));
16929
+ backlinks.push(...db.query(`SELECT from_page_id, to_page_id, label
16930
+ FROM wiki_backlinks
16931
+ WHERE from_page_id IN (${placeholders(wikiPageIds)}) OR to_page_id IN (${placeholders(wikiPageIds)})
16932
+ LIMIT 50`).all(...wikiPageIds, ...wikiPageIds));
16933
+ }
16934
+ } finally {
16935
+ db.close();
16936
+ }
16937
+ return { citations, backlinks };
16938
+ }
16939
+ async function retrieveKnowledgeContext(options) {
16940
+ const contextChars = Math.max(200, Math.min(options.contextChars ?? 1200, 4000));
16941
+ const search = await hybridSearch(options);
16942
+ const terms = queryTerms2(search.query);
16943
+ const warnings = [...search.warnings];
16944
+ const permissionNotes = new Set;
16945
+ const freshnessNotes = new Set;
16946
+ const filtered = search.results.filter((result) => {
16947
+ if (!hasReadOnlyProvenance(result.provenance)) {
16948
+ warnings.push(`permission_filtered: ${result.kind}:${result.id}`);
16949
+ permissionNotes.add("Dropped a result because provenance was not read-only.");
16950
+ return false;
16951
+ }
16952
+ if (isStale(result.provenance)) {
16953
+ warnings.push(`stale_filtered: ${result.kind}:${result.id}`);
16954
+ freshnessNotes.add("Dropped a stale result whose source status requires reindexing.");
16955
+ return false;
16956
+ }
16957
+ return true;
16958
+ });
16959
+ const results = filtered.map((result) => rerank(result, terms)).sort((a, b) => b.score - a.score || a.id.localeCompare(b.id)).slice(0, search.limit);
16960
+ const citations = results.map(citationFor);
16961
+ const excerpts = results.map((result, index) => excerptFor(result, citations[index], contextChars)).filter((entry) => Boolean(entry));
16962
+ for (const result of results) {
16963
+ if (result.provenance && "read_only" in result.provenance && result.provenance.read_only) {
16964
+ permissionNotes.add("All source-backed excerpts are read-only and citation-required.");
16965
+ }
16966
+ if (result.rerank.freshness_score >= 0.85) {
16967
+ freshnessNotes.add("Fresh source revision/hash or artifact hash is present for top context.");
16968
+ }
16969
+ }
16970
+ return {
16971
+ query: search.query,
16972
+ normalized_query: normalizeQuery(search.query),
16973
+ created_at: new Date().toISOString(),
16974
+ mode: search.mode,
16975
+ warnings,
16976
+ search_counts: search.counts,
16977
+ results,
16978
+ citations,
16979
+ excerpts,
16980
+ graph: loadGraphEvidence(options.dbPath, results),
16981
+ notes: {
16982
+ permissions: Array.from(permissionNotes),
16983
+ freshness: Array.from(freshnessNotes)
16984
+ }
16985
+ };
16986
+ }
16987
+
16396
16988
  // src/storage-contract.ts
16397
- import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
16989
+ import { createHash as createHash7, randomUUID as randomUUID4 } from "crypto";
16398
16990
  var GENERATED_ARTIFACTS = [
16399
16991
  {
16400
16992
  kind: "schema",
@@ -16430,7 +17022,7 @@ var GENERATED_ARTIFACTS = [
16430
17022
  function hashArtifactBody(body) {
16431
17023
  const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
16432
17024
  return {
16433
- hash: `sha256:${createHash6("sha256").update(bytes).digest("hex")}`,
17025
+ hash: `sha256:${createHash7("sha256").update(bytes).digest("hex")}`,
16434
17026
  size_bytes: bytes.byteLength
16435
17027
  };
16436
17028
  }
@@ -16565,15 +17157,19 @@ function recordStorageObjects(db, objects, now = new Date) {
16565
17157
  }
16566
17158
 
16567
17159
  // src/wiki-layout.ts
16568
- import { createHash as createHash7 } from "crypto";
17160
+ import { createHash as createHash8 } from "crypto";
16569
17161
  function todayParts(now) {
16570
17162
  const year = String(now.getUTCFullYear());
16571
17163
  const month = String(now.getUTCMonth() + 1).padStart(2, "0");
16572
17164
  const day = String(now.getUTCDate()).padStart(2, "0");
16573
17165
  return { year, month, day };
16574
17166
  }
16575
- function stableId4(prefix, value) {
16576
- return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
17167
+ function stableId5(prefix, value) {
17168
+ return `${prefix}_${createHash8("sha256").update(value).digest("hex").slice(0, 20)}`;
17169
+ }
17170
+ function estimateTokenCount2(text) {
17171
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
17172
+ return Math.max(1, Math.ceil(words * 1.25));
16577
17173
  }
16578
17174
  function agentSchemaTemplate() {
16579
17175
  return `# Knowledge Agent Schema v1
@@ -16685,6 +17281,33 @@ function provenanceFor(artifact) {
16685
17281
  artifact_key: artifact.key
16686
17282
  });
16687
17283
  }
17284
+ function recordWikiChunk(db, pageId, title, artifact, body, now) {
17285
+ const provenance = provenanceFor(artifact);
17286
+ const chunkId = stableId5("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
17287
+ const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
17288
+ for (const row of existing)
17289
+ db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
17290
+ db.run("DELETE FROM chunks WHERE wiki_page_id = ?", [pageId]);
17291
+ db.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
17292
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
17293
+ chunkId,
17294
+ pageId,
17295
+ "wiki",
17296
+ 0,
17297
+ body,
17298
+ estimateTokenCount2(body),
17299
+ 0,
17300
+ body.length,
17301
+ JSON.stringify({
17302
+ artifact_key: artifact.key,
17303
+ artifact_uri: artifact.uri,
17304
+ content_hash: artifact.hash ?? null,
17305
+ provenance
17306
+ }),
17307
+ now
17308
+ ]);
17309
+ db.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)", [chunkId, body, title, artifact.uri]);
17310
+ }
16688
17311
  function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16689
17312
  const timestamp = now.toISOString();
16690
17313
  const rootIndex = artifacts.find((artifact) => artifact.key.endsWith("indexes/root.md"));
@@ -16696,7 +17319,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16696
17319
  artifact_uri = excluded.artifact_uri,
16697
17320
  metadata_json = excluded.metadata_json,
16698
17321
  updated_at = excluded.updated_at`, [
16699
- stableId4("idx", "root:indexes/root.md"),
17322
+ stableId5("idx", "root:indexes/root.md"),
16700
17323
  "root",
16701
17324
  "root",
16702
17325
  rootIndex.uri,
@@ -16711,6 +17334,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16711
17334
  ]);
16712
17335
  }
16713
17336
  if (wikiReadme) {
17337
+ const wikiPageId = stableId5("wiki", "wiki/README.md");
16714
17338
  db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
16715
17339
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
16716
17340
  ON CONFLICT(path) DO UPDATE SET
@@ -16720,7 +17344,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16720
17344
  status = excluded.status,
16721
17345
  metadata_json = excluded.metadata_json,
16722
17346
  updated_at = excluded.updated_at`, [
16723
- stableId4("wiki", "wiki/README.md"),
17347
+ wikiPageId,
16724
17348
  "wiki/README.md",
16725
17349
  "Wiki",
16726
17350
  wikiReadme.uri,
@@ -16733,6 +17357,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16733
17357
  timestamp,
16734
17358
  timestamp
16735
17359
  ]);
17360
+ recordWikiChunk(db, wikiPageId, "Wiki", wikiReadme, wikiReadmeTemplate(), timestamp);
16736
17361
  }
16737
17362
  }
16738
17363
 
@@ -16881,6 +17506,22 @@ class KnowledgeService {
16881
17506
  config: this.config()
16882
17507
  });
16883
17508
  }
17509
+ async search(options) {
17510
+ const workspace = this.ensureWorkspace();
17511
+ return hybridSearch({
17512
+ ...options,
17513
+ dbPath: workspace.knowledgeDbPath,
17514
+ config: this.config()
17515
+ });
17516
+ }
17517
+ async retrieveContext(options) {
17518
+ const workspace = this.ensureWorkspace();
17519
+ return retrieveKnowledgeContext({
17520
+ ...options,
17521
+ dbPath: workspace.knowledgeDbPath,
17522
+ config: this.config()
17523
+ });
17524
+ }
16884
17525
  }
16885
17526
  function createKnowledgeService(options = {}) {
16886
17527
  return new KnowledgeService(options);
@@ -17030,6 +17671,38 @@ function buildServer() {
17030
17671
  return errorText(error48 instanceof Error ? error48.message : String(error48));
17031
17672
  }
17032
17673
  });
17674
+ registerTool(server, "ok_search", "Hybrid knowledge search", "Search source chunks, generated wiki pages, sharded indexes, and optional semantic vectors", {
17675
+ scope: scopeField,
17676
+ query: exports_external.string().describe("Search query"),
17677
+ limit: exports_external.number().optional().describe("Maximum results"),
17678
+ semantic: exports_external.boolean().optional().describe("Include vector semantic results"),
17679
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17680
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17681
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17682
+ }, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
17683
+ const service = createKnowledgeService({ scope });
17684
+ try {
17685
+ return jsonText({ ok: true, ...await service.search({ query, limit, semantic, modelRef: model, dimensions, fake }) });
17686
+ } catch (error48) {
17687
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17688
+ }
17689
+ });
17690
+ registerTool(server, "knowledge_search", "Knowledge context search", "Return a reranked citation context pack for agent prompts", {
17691
+ scope: scopeField,
17692
+ query: exports_external.string().describe("Search query or prompt"),
17693
+ limit: exports_external.number().optional().describe("Maximum context results"),
17694
+ semantic: exports_external.boolean().optional().describe("Include vector semantic results"),
17695
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17696
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17697
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17698
+ }, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
17699
+ const service = createKnowledgeService({ scope });
17700
+ try {
17701
+ return jsonText({ ok: true, ...await service.retrieveContext({ query, limit, semantic, modelRef: model, dimensions, fake }) });
17702
+ } catch (error48) {
17703
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17704
+ }
17705
+ });
17033
17706
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
17034
17707
  title: exports_external.string().describe("Item title"),
17035
17708
  content: exports_external.string().describe("Item content/body"),