@hasna/knowledge 0.2.15 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -97,6 +97,7 @@ open-knowledge embeddings search "company wiki policy" --scope project --json
97
97
  # Hybrid search over source chunks, generated wiki pages, indexes, and optional vectors
98
98
  open-knowledge search "company wiki policy" --scope project --json
99
99
  open-knowledge search "company wiki policy" --scope project --semantic --json
100
+ open-knowledge search "company wiki policy" --scope project --context --json
100
101
  ```
101
102
 
102
103
  ## Commands
@@ -245,6 +246,7 @@ permission/path/delete metadata, and records a local run ledger.
245
246
  ```bash
246
247
  open-knowledge search <query> [--scope project] [--limit <n>] [--json]
247
248
  open-knowledge search <query> --semantic [--model openai:text-embedding-3-small] [--scope project] [--json]
249
+ open-knowledge search <query> --context [--semantic] [--scope project] [--json]
248
250
  ```
249
251
  Run hybrid search over `chunks_fts`, generated wiki chunks, wiki/index catalog
250
252
  rows, and optional vector results. The default path is local-only keyword and
@@ -252,6 +254,11 @@ catalog search. `--semantic` embeds the query and merges vector results from
252
254
  `vector_index_entries`, preserving source refs, artifact URIs, citations,
253
255
  revision/hash metadata, and provenance in each structured result.
254
256
 
257
+ `--context` returns a reranked context pack for agents: selected excerpts,
258
+ assembled citations, freshness and permission notes, graph evidence from
259
+ `citations`/`wiki_backlinks`, and final rerank scores. This is the shape future
260
+ `knowledge <prompt>` flows should send to a model instead of raw search rows.
261
+
255
262
  ### safety
256
263
  ```bash
257
264
  open-knowledge safety status [--scope project] [--json]
@@ -331,7 +338,9 @@ The MCP server exposes item tools (`ok_add`, `ok_list`, `ok_get`, `ok_update`,
331
338
  `ok_storage_status`), provider/embedding tools (`ok_provider_status`,
332
339
  `ok_provider_models`, `ok_embeddings_status`, `ok_embeddings_index`,
333
340
  `ok_semantic_search`), hybrid retrieval (`ok_search`), and source-ref
334
- parsing/resolution (`ok_parse_source_ref`, `ok_resolve_source`).
341
+ parsing/resolution (`ok_parse_source_ref`, `ok_resolve_source`). The
342
+ `knowledge_search` MCP tool returns reranked citation context packs for agent
343
+ prompts.
335
344
 
336
345
  ## Source And Artifact Boundary
337
346
 
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
13660
13660
  // package.json
13661
13661
  var package_default = {
13662
13662
  name: "@hasna/knowledge",
13663
- version: "0.2.15",
13663
+ version: "0.2.16",
13664
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13665
13665
  type: "module",
13666
13666
  bin: {
@@ -16393,6 +16393,9 @@ async function ingestSourceRef(options) {
16393
16393
  };
16394
16394
  }
16395
16395
 
16396
+ // src/retrieval.ts
16397
+ import { createHash as createHash6 } from "crypto";
16398
+
16396
16399
  // src/search.ts
16397
16400
  function parseJsonObject3(value) {
16398
16401
  if (!value)
@@ -16766,8 +16769,224 @@ async function hybridSearch(options) {
16766
16769
  };
16767
16770
  }
16768
16771
 
16772
+ // src/retrieval.ts
16773
+ function stableId4(prefix, value) {
16774
+ return `${prefix}_${createHash6("sha256").update(value).digest("hex").slice(0, 20)}`;
16775
+ }
16776
+ function normalizeQuery(query) {
16777
+ return query.normalize("NFKC").trim().replace(/\s+/g, " ").toLowerCase();
16778
+ }
16779
+ function queryTerms2(query) {
16780
+ return Array.from(new Set(normalizeQuery(query).match(/[\p{L}\p{N}_]+/gu) ?? [])).slice(0, 16);
16781
+ }
16782
+ function textForResult(result) {
16783
+ return [result.title, result.text].filter(Boolean).join(" ").toLowerCase();
16784
+ }
16785
+ function exactScore(result, terms) {
16786
+ if (terms.length === 0)
16787
+ return 0;
16788
+ const text = textForResult(result);
16789
+ const matched = terms.filter((term) => text.includes(term)).length;
16790
+ return Number((matched / terms.length).toFixed(6));
16791
+ }
16792
+ function hasReadOnlyProvenance(provenance) {
16793
+ if (!provenance)
16794
+ return true;
16795
+ if ("read_only" in provenance)
16796
+ return provenance.read_only === true;
16797
+ if ("read_only_sources" in provenance)
16798
+ return provenance.read_only_sources === true;
16799
+ return true;
16800
+ }
16801
+ function isStale(provenance) {
16802
+ if (!provenance)
16803
+ return false;
16804
+ if ("stale" in provenance && provenance.stale)
16805
+ return true;
16806
+ if ("status" in provenance)
16807
+ return isStaleStatus(provenance.status);
16808
+ return false;
16809
+ }
16810
+ function freshnessScore(result) {
16811
+ if (isStale(result.provenance))
16812
+ return 0;
16813
+ if (result.source?.hash || result.source?.revision)
16814
+ return 1;
16815
+ if (result.artifact?.hash)
16816
+ return 0.85;
16817
+ if (result.provenance && "source_refs" in result.provenance && result.provenance.source_refs.length > 0)
16818
+ return 0.75;
16819
+ return 0.55;
16820
+ }
16821
+ function citationScore(result) {
16822
+ if (result.citation?.chunk_id && (result.source?.uri || result.artifact?.uri))
16823
+ return 1;
16824
+ if (result.provenance && "citation_required" in result.provenance && result.provenance.citation_required)
16825
+ return 0.75;
16826
+ if (result.artifact?.uri)
16827
+ return 0.65;
16828
+ return 0.35;
16829
+ }
16830
+ function authorityScore(result) {
16831
+ if (result.kind === "wiki_chunk")
16832
+ return 0.85;
16833
+ if (result.kind === "source_chunk")
16834
+ return 0.8;
16835
+ if (result.kind === "wiki_page")
16836
+ return 0.65;
16837
+ return 0.55;
16838
+ }
16839
+ function rerank(result, terms) {
16840
+ const scores = {
16841
+ base_score: result.score,
16842
+ exact_score: exactScore(result, terms),
16843
+ citation_score: citationScore(result),
16844
+ freshness_score: freshnessScore(result),
16845
+ authority_score: authorityScore(result)
16846
+ };
16847
+ const final = Math.min(1, scores.base_score * 0.65 + scores.exact_score * 0.1 + scores.citation_score * 0.1 + scores.freshness_score * 0.1 + scores.authority_score * 0.05);
16848
+ const reasons = new Set(result.reasons);
16849
+ if (scores.exact_score > 0.5)
16850
+ reasons.add("exact_term");
16851
+ if (scores.citation_score >= 0.75)
16852
+ reasons.add("cited_source");
16853
+ if (scores.freshness_score >= 0.85)
16854
+ reasons.add("fresh_source");
16855
+ return {
16856
+ ...result,
16857
+ score: Number(final.toFixed(6)),
16858
+ reasons: Array.from(reasons),
16859
+ rerank: {
16860
+ ...scores,
16861
+ final_score: Number(final.toFixed(6))
16862
+ }
16863
+ };
16864
+ }
16865
+ function quoteFor(result, maxChars) {
16866
+ const source = result.text ?? result.title;
16867
+ if (!source)
16868
+ return null;
16869
+ const normalized = source.replace(/\s+/g, " ").trim();
16870
+ return normalized.length <= maxChars ? normalized : `${normalized.slice(0, Math.max(0, maxChars - 1)).trim()}...`;
16871
+ }
16872
+ function citationFor(result) {
16873
+ const id = stableId4("cite", `${result.kind}\x00${result.id}\x00${result.source?.uri ?? ""}\x00${result.artifact?.uri ?? ""}`);
16874
+ return {
16875
+ id,
16876
+ result_id: result.id,
16877
+ kind: result.kind,
16878
+ source_uri: result.source?.uri ?? null,
16879
+ source_ref: result.source?.ref ?? null,
16880
+ artifact_uri: result.artifact?.uri ?? null,
16881
+ artifact_path: result.artifact?.path ?? null,
16882
+ revision: result.source?.revision ?? null,
16883
+ hash: result.source?.hash ?? result.artifact?.hash ?? null,
16884
+ chunk_id: result.citation?.chunk_id ?? null,
16885
+ start_offset: result.citation?.start_offset ?? null,
16886
+ end_offset: result.citation?.end_offset ?? null,
16887
+ quote: quoteFor(result, 500),
16888
+ provenance: result.provenance
16889
+ };
16890
+ }
16891
+ function excerptFor(result, citation, contextChars) {
16892
+ const text = quoteFor(result, contextChars);
16893
+ if (!text)
16894
+ return null;
16895
+ return {
16896
+ id: stableId4("excerpt", `${result.kind}\x00${result.id}`),
16897
+ result_id: result.id,
16898
+ citation_id: citation.id,
16899
+ kind: result.kind,
16900
+ text,
16901
+ score: result.score
16902
+ };
16903
+ }
16904
+ function placeholders(values) {
16905
+ return values.map(() => "?").join(", ");
16906
+ }
16907
+ function loadGraphEvidence(dbPath, results) {
16908
+ const chunkIds = results.map((result) => result.citation?.chunk_id).filter((id) => Boolean(id));
16909
+ const wikiPageIds = results.filter((result) => result.kind === "wiki_page").map((result) => result.id);
16910
+ const citations = [];
16911
+ const backlinks = [];
16912
+ if (chunkIds.length === 0 && wikiPageIds.length === 0)
16913
+ return { citations, backlinks };
16914
+ const db = openKnowledgeDb(dbPath);
16915
+ try {
16916
+ if (chunkIds.length > 0) {
16917
+ citations.push(...db.query(`SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
16918
+ FROM citations
16919
+ WHERE chunk_id IN (${placeholders(chunkIds)})
16920
+ ORDER BY created_at DESC
16921
+ LIMIT 50`).all(...chunkIds));
16922
+ }
16923
+ if (wikiPageIds.length > 0) {
16924
+ citations.push(...db.query(`SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
16925
+ FROM citations
16926
+ WHERE wiki_page_id IN (${placeholders(wikiPageIds)})
16927
+ ORDER BY created_at DESC
16928
+ LIMIT 50`).all(...wikiPageIds));
16929
+ backlinks.push(...db.query(`SELECT from_page_id, to_page_id, label
16930
+ FROM wiki_backlinks
16931
+ WHERE from_page_id IN (${placeholders(wikiPageIds)}) OR to_page_id IN (${placeholders(wikiPageIds)})
16932
+ LIMIT 50`).all(...wikiPageIds, ...wikiPageIds));
16933
+ }
16934
+ } finally {
16935
+ db.close();
16936
+ }
16937
+ return { citations, backlinks };
16938
+ }
16939
+ async function retrieveKnowledgeContext(options) {
16940
+ const contextChars = Math.max(200, Math.min(options.contextChars ?? 1200, 4000));
16941
+ const search = await hybridSearch(options);
16942
+ const terms = queryTerms2(search.query);
16943
+ const warnings = [...search.warnings];
16944
+ const permissionNotes = new Set;
16945
+ const freshnessNotes = new Set;
16946
+ const filtered = search.results.filter((result) => {
16947
+ if (!hasReadOnlyProvenance(result.provenance)) {
16948
+ warnings.push(`permission_filtered: ${result.kind}:${result.id}`);
16949
+ permissionNotes.add("Dropped a result because provenance was not read-only.");
16950
+ return false;
16951
+ }
16952
+ if (isStale(result.provenance)) {
16953
+ warnings.push(`stale_filtered: ${result.kind}:${result.id}`);
16954
+ freshnessNotes.add("Dropped a stale result whose source status requires reindexing.");
16955
+ return false;
16956
+ }
16957
+ return true;
16958
+ });
16959
+ const results = filtered.map((result) => rerank(result, terms)).sort((a, b) => b.score - a.score || a.id.localeCompare(b.id)).slice(0, search.limit);
16960
+ const citations = results.map(citationFor);
16961
+ const excerpts = results.map((result, index) => excerptFor(result, citations[index], contextChars)).filter((entry) => Boolean(entry));
16962
+ for (const result of results) {
16963
+ if (result.provenance && "read_only" in result.provenance && result.provenance.read_only) {
16964
+ permissionNotes.add("All source-backed excerpts are read-only and citation-required.");
16965
+ }
16966
+ if (result.rerank.freshness_score >= 0.85) {
16967
+ freshnessNotes.add("Fresh source revision/hash or artifact hash is present for top context.");
16968
+ }
16969
+ }
16970
+ return {
16971
+ query: search.query,
16972
+ normalized_query: normalizeQuery(search.query),
16973
+ created_at: new Date().toISOString(),
16974
+ mode: search.mode,
16975
+ warnings,
16976
+ search_counts: search.counts,
16977
+ results,
16978
+ citations,
16979
+ excerpts,
16980
+ graph: loadGraphEvidence(options.dbPath, results),
16981
+ notes: {
16982
+ permissions: Array.from(permissionNotes),
16983
+ freshness: Array.from(freshnessNotes)
16984
+ }
16985
+ };
16986
+ }
16987
+
16769
16988
  // src/storage-contract.ts
16770
- import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
16989
+ import { createHash as createHash7, randomUUID as randomUUID4 } from "crypto";
16771
16990
  var GENERATED_ARTIFACTS = [
16772
16991
  {
16773
16992
  kind: "schema",
@@ -16803,7 +17022,7 @@ var GENERATED_ARTIFACTS = [
16803
17022
  function hashArtifactBody(body) {
16804
17023
  const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
16805
17024
  return {
16806
- hash: `sha256:${createHash6("sha256").update(bytes).digest("hex")}`,
17025
+ hash: `sha256:${createHash7("sha256").update(bytes).digest("hex")}`,
16807
17026
  size_bytes: bytes.byteLength
16808
17027
  };
16809
17028
  }
@@ -16938,15 +17157,15 @@ function recordStorageObjects(db, objects, now = new Date) {
16938
17157
  }
16939
17158
 
16940
17159
  // src/wiki-layout.ts
16941
- import { createHash as createHash7 } from "crypto";
17160
+ import { createHash as createHash8 } from "crypto";
16942
17161
  function todayParts(now) {
16943
17162
  const year = String(now.getUTCFullYear());
16944
17163
  const month = String(now.getUTCMonth() + 1).padStart(2, "0");
16945
17164
  const day = String(now.getUTCDate()).padStart(2, "0");
16946
17165
  return { year, month, day };
16947
17166
  }
16948
- function stableId4(prefix, value) {
16949
- return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
17167
+ function stableId5(prefix, value) {
17168
+ return `${prefix}_${createHash8("sha256").update(value).digest("hex").slice(0, 20)}`;
16950
17169
  }
16951
17170
  function estimateTokenCount2(text) {
16952
17171
  const words = text.trim().split(/\s+/).filter(Boolean).length;
@@ -17064,7 +17283,7 @@ function provenanceFor(artifact) {
17064
17283
  }
17065
17284
  function recordWikiChunk(db, pageId, title, artifact, body, now) {
17066
17285
  const provenance = provenanceFor(artifact);
17067
- const chunkId = stableId4("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
17286
+ const chunkId = stableId5("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
17068
17287
  const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
17069
17288
  for (const row of existing)
17070
17289
  db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
@@ -17100,7 +17319,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
17100
17319
  artifact_uri = excluded.artifact_uri,
17101
17320
  metadata_json = excluded.metadata_json,
17102
17321
  updated_at = excluded.updated_at`, [
17103
- stableId4("idx", "root:indexes/root.md"),
17322
+ stableId5("idx", "root:indexes/root.md"),
17104
17323
  "root",
17105
17324
  "root",
17106
17325
  rootIndex.uri,
@@ -17115,7 +17334,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
17115
17334
  ]);
17116
17335
  }
17117
17336
  if (wikiReadme) {
17118
- const wikiPageId = stableId4("wiki", "wiki/README.md");
17337
+ const wikiPageId = stableId5("wiki", "wiki/README.md");
17119
17338
  db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
17120
17339
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
17121
17340
  ON CONFLICT(path) DO UPDATE SET
@@ -17295,6 +17514,14 @@ class KnowledgeService {
17295
17514
  config: this.config()
17296
17515
  });
17297
17516
  }
17517
+ async retrieveContext(options) {
17518
+ const workspace = this.ensureWorkspace();
17519
+ return retrieveKnowledgeContext({
17520
+ ...options,
17521
+ dbPath: workspace.knowledgeDbPath,
17522
+ config: this.config()
17523
+ });
17524
+ }
17298
17525
  }
17299
17526
  function createKnowledgeService(options = {}) {
17300
17527
  return new KnowledgeService(options);
@@ -17460,6 +17687,22 @@ function buildServer() {
17460
17687
  return errorText(error48 instanceof Error ? error48.message : String(error48));
17461
17688
  }
17462
17689
  });
17690
+ registerTool(server, "knowledge_search", "Knowledge context search", "Return a reranked citation context pack for agent prompts", {
17691
+ scope: scopeField,
17692
+ query: exports_external.string().describe("Search query or prompt"),
17693
+ limit: exports_external.number().optional().describe("Maximum context results"),
17694
+ semantic: exports_external.boolean().optional().describe("Include vector semantic results"),
17695
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17696
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17697
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17698
+ }, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
17699
+ const service = createKnowledgeService({ scope });
17700
+ try {
17701
+ return jsonText({ ok: true, ...await service.retrieveContext({ query, limit, semantic, modelRef: model, dimensions, fake }) });
17702
+ } catch (error48) {
17703
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17704
+ }
17705
+ });
17463
17706
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
17464
17707
  title: exports_external.string().describe("Item title"),
17465
17708
  content: exports_external.string().describe("Item content/body"),