@hasna/knowledge 0.2.15 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -223,8 +223,11 @@ generated metadata rows, not raw source bytes, and pins each row to `open-files`
223
223
  provenance: source ref/URI, revision/hash, chunk offsets, token count, provider,
224
224
  model, dimensions, status, and timestamps. The structured `search` contract
225
225
  merges keyword FTS, wiki/index catalog hits, generated wiki chunks, and optional
226
- vector results so the local SQLite index can later move to pgvector or a managed
227
- hosted vector store without changing CLI/MCP result shape.
226
+ vector results. `open-knowledge search --context` and MCP `knowledge_search`
227
+ turn those rows into reranked citation context packs with selected excerpts,
228
+ freshness and permission notes, graph evidence, and final rerank scores. The
229
+ local SQLite index can later move to pgvector or a managed hosted vector store
230
+ without changing CLI/MCP result shape.
228
231
 
229
232
  ## Agent Workflow
230
233
 
@@ -49,14 +49,15 @@ The current local command surface is:
49
49
  ```bash
50
50
  open-knowledge search "company wiki policy" --scope project --json
51
51
  open-knowledge search "company wiki policy" --scope project --semantic --json
52
+ open-knowledge search "company wiki policy" --scope project --context --json
52
53
  open-knowledge embeddings index --scope project --model openai:text-embedding-3-small
53
54
  open-knowledge embeddings search "company wiki policy" --scope project --json
54
55
  ```
55
56
 
56
57
  `search` is the structured hybrid layer for agents. `embeddings search` is the
57
58
  lower-level vector-only command. MCP exposes the same capability through
58
- `ok_search`, `ok_embeddings_status`, `ok_embeddings_index`, and
59
- `ok_semantic_search`. Deterministic `--fake` embeddings exist for tests and
59
+ `ok_search`, `knowledge_search`, `ok_embeddings_status`, `ok_embeddings_index`,
60
+ and `ok_semantic_search`. Deterministic `--fake` embeddings exist for tests and
60
61
  offline verification only.
61
62
 
62
63
  ## Hosted Indexes
@@ -132,6 +133,12 @@ They should receive context packs:
132
133
 
133
134
  This keeps agent prompts stable while the retrieval internals evolve.
134
135
 
136
+ The local context-pack implementation is available through
137
+ `open-knowledge search --context` and MCP `knowledge_search`. It reranks merged
138
+ search rows using exact-term coverage, citation availability, source freshness,
139
+ and source/wiki authority, then emits excerpts and citation objects that preserve
140
+ source refs, artifact URIs, revision/hash metadata, offsets, and provenance.
141
+
135
142
  ## Reindexing
136
143
 
137
144
  Reindexing is driven by source revisions:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.15",
3
+ "version": "0.2.16",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -50,6 +50,7 @@ interface Flags {
50
50
  model?: string;
51
51
  dimensions?: number;
52
52
  semantic?: boolean;
53
+ context?: boolean;
53
54
  noColor?: boolean;
54
55
  scope?: string;
55
56
  olderThan?: number;
@@ -103,6 +104,7 @@ function parseArgs(argv: string[]): ParseResult {
103
104
  case '--model': flags.model = argv[i + 1]; i += 1; break;
104
105
  case '--dimensions': flags.dimensions = Number(argv[i + 1]); i += 1; break;
105
106
  case '--semantic': flags.semantic = true; break;
107
+ case '--context': flags.context = true; break;
106
108
  case '--fake': flags.fake = true; break;
107
109
  case '--no-color': flags.noColor = true; break;
108
110
  case '--scope': flags.scope = argv[i + 1]; i += 1; break;
@@ -177,7 +179,7 @@ Commands:
177
179
  ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
178
180
  ingest source <source-ref> Ingest a read-only source ref into knowledge.db
179
181
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
180
- search <query> Hybrid search sources, wiki pages, and indexes
182
+ search <query> Hybrid search sources, wiki pages, indexes, or context
181
183
  embeddings status|index|search Build/query local vector embeddings
182
184
  providers status|models|check Inspect AI SDK provider config and credentials
183
185
  safety status|check|approve|audit|redact
@@ -190,6 +192,7 @@ Global Options:
190
192
  --model <provider:model> AI/embedding model ref
191
193
  --dimensions <n> Embedding dimensions for local/fake providers
192
194
  --semantic Include vector semantic results in search
195
+ --context Return a reranked citation context pack for search
193
196
  --fake Use deterministic fake embeddings for local tests
194
197
  --scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
195
198
  --no-color Disable color output
@@ -251,7 +254,7 @@ function printCommandHelp(command: string): void {
251
254
  if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
252
255
  if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
253
256
  if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
254
- if (command === 'search') { console.log('Usage: open-knowledge search <query> [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
257
+ if (command === 'search') { console.log('Usage: open-knowledge search <query> [--context] [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
255
258
  if (command === 'embeddings') { console.log('Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
256
259
  if (command === 'providers') { console.log('Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]'); return; }
257
260
  if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
@@ -299,11 +302,11 @@ async function run(argv: string[]): Promise<void> {
299
302
  if (flags.completions) {
300
303
  const shell = flags.completions;
301
304
  if (shell === 'bash') {
302
- console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
305
+ console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --context --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
303
306
  } else if (shell === 'zsh') {
304
- console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
307
+ console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--context)--context" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
305
308
  } else if (shell === 'fish') {
306
- console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
309
+ console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l context; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
307
310
  } else {
308
311
  throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
309
312
  }
@@ -586,6 +589,18 @@ async function run(argv: string[]): Promise<void> {
586
589
  if (command === 'search') {
587
590
  const query = positional.slice(1).join(' ');
588
591
  if (!query) throw new Error('Usage: open-knowledge search <query>');
592
+ if (flags.context) {
593
+ const context = await service.retrieveContext({
594
+ query,
595
+ limit: flags.limit,
596
+ semantic: flags.semantic,
597
+ modelRef: flags.model,
598
+ dimensions: flags.dimensions,
599
+ fake: flags.fake,
600
+ });
601
+ output({ ok: true, ...context, message: `${context.excerpts.length} context excerpt(s)` }, flags.json);
602
+ return;
603
+ }
589
604
  const result = await service.search({
590
605
  query,
591
606
  limit: flags.limit,
package/src/mcp.js CHANGED
@@ -186,6 +186,23 @@ export function buildServer() {
186
186
  }
187
187
  });
188
188
 
189
+ registerTool(server, 'knowledge_search', 'Knowledge context search', 'Return a reranked citation context pack for agent prompts', {
190
+ scope: scopeField,
191
+ query: z.string().describe('Search query or prompt'),
192
+ limit: z.number().optional().describe('Maximum context results'),
193
+ semantic: z.boolean().optional().describe('Include vector semantic results'),
194
+ model: z.string().optional().describe('Embedding model ref, default openai:text-embedding-3-small'),
195
+ dimensions: z.number().optional().describe('Embedding dimensions for deterministic fake mode'),
196
+ fake: z.boolean().optional().describe('Use deterministic fake embeddings for local tests'),
197
+ }, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
198
+ const service = createKnowledgeService({ scope });
199
+ try {
200
+ return jsonText({ ok: true, ...await service.retrieveContext({ query, limit, semantic, modelRef: model, dimensions, fake }) });
201
+ } catch (error) {
202
+ return errorText(error instanceof Error ? error.message : String(error));
203
+ }
204
+ });
205
+
189
206
  registerTool(server, 'ok_add', 'Add a knowledge item', 'Add a new item to the knowledge store', {
190
207
  title: z.string().describe('Item title'),
191
208
  content: z.string().describe('Item content/body'),
@@ -0,0 +1,326 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { openKnowledgeDb } from './knowledge-db';
3
+ import { isStaleStatus } from './provenance';
4
+ import { hybridSearch, type HybridSearchEntry, type HybridSearchOptions, type HybridSearchResult, type SearchProvenance } from './search';
5
+
6
+ export interface RetrievalOptions extends HybridSearchOptions {
7
+ contextChars?: number;
8
+ }
9
+
10
+ export interface RerankedSearchEntry extends HybridSearchEntry {
11
+ rerank: {
12
+ base_score: number;
13
+ final_score: number;
14
+ exact_score: number;
15
+ citation_score: number;
16
+ freshness_score: number;
17
+ authority_score: number;
18
+ };
19
+ }
20
+
21
+ export interface RetrievalCitation {
22
+ id: string;
23
+ result_id: string;
24
+ kind: HybridSearchEntry['kind'];
25
+ source_uri: string | null;
26
+ source_ref: string | null;
27
+ artifact_uri: string | null;
28
+ artifact_path: string | null;
29
+ revision: string | null;
30
+ hash: string | null;
31
+ chunk_id: string | null;
32
+ start_offset: number | null;
33
+ end_offset: number | null;
34
+ quote: string | null;
35
+ provenance: SearchProvenance | null;
36
+ }
37
+
38
+ export interface RetrievalExcerpt {
39
+ id: string;
40
+ result_id: string;
41
+ citation_id: string | null;
42
+ kind: HybridSearchEntry['kind'];
43
+ text: string;
44
+ score: number;
45
+ }
46
+
47
+ export interface RetrievalGraphEvidence {
48
+ citations: Array<{
49
+ id: string;
50
+ chunk_id: string | null;
51
+ wiki_page_id: string | null;
52
+ source_uri: string;
53
+ quote: string | null;
54
+ start_offset: number | null;
55
+ end_offset: number | null;
56
+ }>;
57
+ backlinks: Array<{
58
+ from_page_id: string;
59
+ to_page_id: string;
60
+ label: string | null;
61
+ }>;
62
+ }
63
+
64
+ export interface KnowledgeContextPack {
65
+ query: string;
66
+ normalized_query: string;
67
+ created_at: string;
68
+ mode: HybridSearchResult['mode'];
69
+ warnings: string[];
70
+ search_counts: HybridSearchResult['counts'];
71
+ results: RerankedSearchEntry[];
72
+ citations: RetrievalCitation[];
73
+ excerpts: RetrievalExcerpt[];
74
+ graph: RetrievalGraphEvidence;
75
+ notes: {
76
+ permissions: string[];
77
+ freshness: string[];
78
+ };
79
+ }
80
+
81
+ interface CitationRow {
82
+ id: string;
83
+ wiki_page_id: string | null;
84
+ chunk_id: string | null;
85
+ source_uri: string;
86
+ quote: string | null;
87
+ start_offset: number | null;
88
+ end_offset: number | null;
89
+ }
90
+
91
+ interface BacklinkRow {
92
+ from_page_id: string;
93
+ to_page_id: string;
94
+ label: string | null;
95
+ }
96
+
97
+ function stableId(prefix: string, value: string): string {
98
+ return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
99
+ }
100
+
101
+ function normalizeQuery(query: string): string {
102
+ return query.normalize('NFKC').trim().replace(/\s+/g, ' ').toLowerCase();
103
+ }
104
+
105
+ function queryTerms(query: string): string[] {
106
+ return Array.from(new Set(normalizeQuery(query).match(/[\p{L}\p{N}_]+/gu) ?? [])).slice(0, 16);
107
+ }
108
+
109
+ function textForResult(result: HybridSearchEntry): string {
110
+ return [result.title, result.text].filter(Boolean).join(' ').toLowerCase();
111
+ }
112
+
113
+ function exactScore(result: HybridSearchEntry, terms: string[]): number {
114
+ if (terms.length === 0) return 0;
115
+ const text = textForResult(result);
116
+ const matched = terms.filter((term) => text.includes(term)).length;
117
+ return Number((matched / terms.length).toFixed(6));
118
+ }
119
+
120
+ function hasReadOnlyProvenance(provenance: SearchProvenance | null): boolean {
121
+ if (!provenance) return true;
122
+ if ('read_only' in provenance) return provenance.read_only === true;
123
+ if ('read_only_sources' in provenance) return provenance.read_only_sources === true;
124
+ return true;
125
+ }
126
+
127
+ function isStale(provenance: SearchProvenance | null): boolean {
128
+ if (!provenance) return false;
129
+ if ('stale' in provenance && provenance.stale) return true;
130
+ if ('status' in provenance) return isStaleStatus(provenance.status);
131
+ return false;
132
+ }
133
+
134
+ function freshnessScore(result: HybridSearchEntry): number {
135
+ if (isStale(result.provenance)) return 0;
136
+ if (result.source?.hash || result.source?.revision) return 1;
137
+ if (result.artifact?.hash) return 0.85;
138
+ if (result.provenance && 'source_refs' in result.provenance && result.provenance.source_refs.length > 0) return 0.75;
139
+ return 0.55;
140
+ }
141
+
142
+ function citationScore(result: HybridSearchEntry): number {
143
+ if (result.citation?.chunk_id && (result.source?.uri || result.artifact?.uri)) return 1;
144
+ if (result.provenance && 'citation_required' in result.provenance && result.provenance.citation_required) return 0.75;
145
+ if (result.artifact?.uri) return 0.65;
146
+ return 0.35;
147
+ }
148
+
149
+ function authorityScore(result: HybridSearchEntry): number {
150
+ if (result.kind === 'wiki_chunk') return 0.85;
151
+ if (result.kind === 'source_chunk') return 0.8;
152
+ if (result.kind === 'wiki_page') return 0.65;
153
+ return 0.55;
154
+ }
155
+
156
+ function rerank(result: HybridSearchEntry, terms: string[]): RerankedSearchEntry {
157
+ const scores = {
158
+ base_score: result.score,
159
+ exact_score: exactScore(result, terms),
160
+ citation_score: citationScore(result),
161
+ freshness_score: freshnessScore(result),
162
+ authority_score: authorityScore(result),
163
+ };
164
+ const final = Math.min(1,
165
+ scores.base_score * 0.65 +
166
+ scores.exact_score * 0.1 +
167
+ scores.citation_score * 0.1 +
168
+ scores.freshness_score * 0.1 +
169
+ scores.authority_score * 0.05,
170
+ );
171
+ const reasons = new Set(result.reasons);
172
+ if (scores.exact_score > 0.5) reasons.add('exact_term');
173
+ if (scores.citation_score >= 0.75) reasons.add('cited_source');
174
+ if (scores.freshness_score >= 0.85) reasons.add('fresh_source');
175
+ return {
176
+ ...result,
177
+ score: Number(final.toFixed(6)),
178
+ reasons: Array.from(reasons),
179
+ rerank: {
180
+ ...scores,
181
+ final_score: Number(final.toFixed(6)),
182
+ },
183
+ };
184
+ }
185
+
186
+ function quoteFor(result: HybridSearchEntry, maxChars: number): string | null {
187
+ const source = result.text ?? result.title;
188
+ if (!source) return null;
189
+ const normalized = source.replace(/\s+/g, ' ').trim();
190
+ return normalized.length <= maxChars ? normalized : `${normalized.slice(0, Math.max(0, maxChars - 1)).trim()}...`;
191
+ }
192
+
193
+ function citationFor(result: RerankedSearchEntry): RetrievalCitation {
194
+ const id = stableId('cite', `${result.kind}\u0000${result.id}\u0000${result.source?.uri ?? ''}\u0000${result.artifact?.uri ?? ''}`);
195
+ return {
196
+ id,
197
+ result_id: result.id,
198
+ kind: result.kind,
199
+ source_uri: result.source?.uri ?? null,
200
+ source_ref: result.source?.ref ?? null,
201
+ artifact_uri: result.artifact?.uri ?? null,
202
+ artifact_path: result.artifact?.path ?? null,
203
+ revision: result.source?.revision ?? null,
204
+ hash: result.source?.hash ?? result.artifact?.hash ?? null,
205
+ chunk_id: result.citation?.chunk_id ?? null,
206
+ start_offset: result.citation?.start_offset ?? null,
207
+ end_offset: result.citation?.end_offset ?? null,
208
+ quote: quoteFor(result, 500),
209
+ provenance: result.provenance,
210
+ };
211
+ }
212
+
213
+ function excerptFor(result: RerankedSearchEntry, citation: RetrievalCitation, contextChars: number): RetrievalExcerpt | null {
214
+ const text = quoteFor(result, contextChars);
215
+ if (!text) return null;
216
+ return {
217
+ id: stableId('excerpt', `${result.kind}\u0000${result.id}`),
218
+ result_id: result.id,
219
+ citation_id: citation.id,
220
+ kind: result.kind,
221
+ text,
222
+ score: result.score,
223
+ };
224
+ }
225
+
226
+ function placeholders(values: unknown[]): string {
227
+ return values.map(() => '?').join(', ');
228
+ }
229
+
230
+ function loadGraphEvidence(dbPath: string, results: RerankedSearchEntry[]): RetrievalGraphEvidence {
231
+ const chunkIds = results.map((result) => result.citation?.chunk_id).filter((id): id is string => Boolean(id));
232
+ const wikiPageIds = results.filter((result) => result.kind === 'wiki_page').map((result) => result.id);
233
+ const citations: CitationRow[] = [];
234
+ const backlinks: BacklinkRow[] = [];
235
+ if (chunkIds.length === 0 && wikiPageIds.length === 0) return { citations, backlinks };
236
+
237
+ const db = openKnowledgeDb(dbPath);
238
+ try {
239
+ if (chunkIds.length > 0) {
240
+ citations.push(...db.query<CitationRow, string[]>(
241
+ `SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
242
+ FROM citations
243
+ WHERE chunk_id IN (${placeholders(chunkIds)})
244
+ ORDER BY created_at DESC
245
+ LIMIT 50`,
246
+ ).all(...chunkIds));
247
+ }
248
+ if (wikiPageIds.length > 0) {
249
+ citations.push(...db.query<CitationRow, string[]>(
250
+ `SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
251
+ FROM citations
252
+ WHERE wiki_page_id IN (${placeholders(wikiPageIds)})
253
+ ORDER BY created_at DESC
254
+ LIMIT 50`,
255
+ ).all(...wikiPageIds));
256
+ backlinks.push(...db.query<BacklinkRow, string[]>(
257
+ `SELECT from_page_id, to_page_id, label
258
+ FROM wiki_backlinks
259
+ WHERE from_page_id IN (${placeholders(wikiPageIds)}) OR to_page_id IN (${placeholders(wikiPageIds)})
260
+ LIMIT 50`,
261
+ ).all(...wikiPageIds, ...wikiPageIds));
262
+ }
263
+ } finally {
264
+ db.close();
265
+ }
266
+ return { citations, backlinks };
267
+ }
268
+
269
+ export async function retrieveKnowledgeContext(options: RetrievalOptions): Promise<KnowledgeContextPack> {
270
+ const contextChars = Math.max(200, Math.min(options.contextChars ?? 1200, 4000));
271
+ const search = await hybridSearch(options);
272
+ const terms = queryTerms(search.query);
273
+ const warnings = [...search.warnings];
274
+ const permissionNotes = new Set<string>();
275
+ const freshnessNotes = new Set<string>();
276
+
277
+ const filtered = search.results.filter((result) => {
278
+ if (!hasReadOnlyProvenance(result.provenance)) {
279
+ warnings.push(`permission_filtered: ${result.kind}:${result.id}`);
280
+ permissionNotes.add('Dropped a result because provenance was not read-only.');
281
+ return false;
282
+ }
283
+ if (isStale(result.provenance)) {
284
+ warnings.push(`stale_filtered: ${result.kind}:${result.id}`);
285
+ freshnessNotes.add('Dropped a stale result whose source status requires reindexing.');
286
+ return false;
287
+ }
288
+ return true;
289
+ });
290
+
291
+ const results = filtered
292
+ .map((result) => rerank(result, terms))
293
+ .sort((a, b) => b.score - a.score || a.id.localeCompare(b.id))
294
+ .slice(0, search.limit);
295
+
296
+ const citations = results.map(citationFor);
297
+ const excerpts = results
298
+ .map((result, index) => excerptFor(result, citations[index], contextChars))
299
+ .filter((entry): entry is RetrievalExcerpt => Boolean(entry));
300
+
301
+ for (const result of results) {
302
+ if (result.provenance && 'read_only' in result.provenance && result.provenance.read_only) {
303
+ permissionNotes.add('All source-backed excerpts are read-only and citation-required.');
304
+ }
305
+ if (result.rerank.freshness_score >= 0.85) {
306
+ freshnessNotes.add('Fresh source revision/hash or artifact hash is present for top context.');
307
+ }
308
+ }
309
+
310
+ return {
311
+ query: search.query,
312
+ normalized_query: normalizeQuery(search.query),
313
+ created_at: new Date().toISOString(),
314
+ mode: search.mode,
315
+ warnings,
316
+ search_counts: search.counts,
317
+ results,
318
+ citations,
319
+ excerpts,
320
+ graph: loadGraphEvidence(options.dbPath, results),
321
+ notes: {
322
+ permissions: Array.from(permissionNotes),
323
+ freshness: Array.from(freshnessNotes),
324
+ },
325
+ };
326
+ }
package/src/service.ts CHANGED
@@ -12,6 +12,7 @@ import { ingestOpenFilesManifest } from './manifest-ingest';
12
12
  import { ingestSourceRef } from './source-ingest';
13
13
  import { resolveOpenFilesSource } from './source-resolver';
14
14
  import { providerStatus, listModelRegistry, type ProviderStatusResult, type ModelRegistryEntry } from './providers';
15
+ import { retrieveKnowledgeContext, type RetrievalOptions } from './retrieval';
15
16
  import { hybridSearch, type HybridSearchOptions } from './search';
16
17
  import { resolveSafetyPolicy } from './safety';
17
18
  import {
@@ -224,6 +225,15 @@ export class KnowledgeService {
224
225
  config: this.config(),
225
226
  });
226
227
  }
228
+
229
+ async retrieveContext(options: Omit<RetrievalOptions, 'dbPath' | 'config'>) {
230
+ const workspace = this.ensureWorkspace();
231
+ return retrieveKnowledgeContext({
232
+ ...options,
233
+ dbPath: workspace.knowledgeDbPath,
234
+ config: this.config(),
235
+ });
236
+ }
227
237
  }
228
238
 
229
239
  export function createKnowledgeService(options: KnowledgeServiceOptions = {}): KnowledgeService {