@hasna/knowledge 0.2.27 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +41 -0
  2. package/bin/open-knowledge-mcp.js +15 -7
  3. package/bin/open-knowledge.js +17 -17
  4. package/dist/agent.d.ts +35 -0
  5. package/dist/artifact-store.d.ts +63 -0
  6. package/dist/auth.d.ts +35 -0
  7. package/dist/embeddings.d.ts +77 -0
  8. package/dist/index.d.ts +20 -0
  9. package/dist/index.js +5709 -0
  10. package/dist/knowledge-db.d.ts +27 -0
  11. package/dist/manifest-ingest.d.ts +35 -0
  12. package/dist/outbox-consume.d.ts +25 -0
  13. package/dist/provenance.d.ts +50 -0
  14. package/dist/providers.d.ts +89 -0
  15. package/dist/reindex.d.ts +37 -0
  16. package/dist/remote-client.d.ts +108 -0
  17. package/dist/retrieval.d.ts +71 -0
  18. package/dist/safety.d.ts +70 -0
  19. package/dist/sdk.d.ts +72 -0
  20. package/dist/search.d.ts +65 -0
  21. package/dist/service.d.ts +117 -0
  22. package/dist/source-ingest.d.ts +18 -0
  23. package/dist/source-ref.d.ts +30 -0
  24. package/dist/source-resolver.d.ts +92 -0
  25. package/dist/storage-contract.d.ts +106 -0
  26. package/dist/web-search.d.ts +40 -0
  27. package/dist/wiki-compiler.d.ts +67 -0
  28. package/dist/wiki-layout.d.ts +23 -0
  29. package/dist/workspace.d.ts +111 -0
  30. package/package.json +15 -7
  31. package/src/agent.ts +0 -367
  32. package/src/artifact-store.ts +0 -184
  33. package/src/auth.ts +0 -123
  34. package/src/cli.ts +0 -1184
  35. package/src/embeddings.ts +0 -516
  36. package/src/knowledge-db.ts +0 -354
  37. package/src/manifest-ingest.ts +0 -515
  38. package/src/mcp-http.js +0 -110
  39. package/src/mcp.js +0 -1503
  40. package/src/outbox-consume.ts +0 -463
  41. package/src/provenance.ts +0 -93
  42. package/src/providers.ts +0 -308
  43. package/src/reindex.ts +0 -260
  44. package/src/remote-client.ts +0 -268
  45. package/src/retrieval.ts +0 -326
  46. package/src/safety.ts +0 -265
  47. package/src/schema.js +0 -25
  48. package/src/search.ts +0 -510
  49. package/src/service.ts +0 -443
  50. package/src/source-ingest.ts +0 -268
  51. package/src/source-ref.ts +0 -104
  52. package/src/source-resolver.ts +0 -436
  53. package/src/storage-contract.ts +0 -346
  54. package/src/store.ts +0 -113
  55. package/src/web-search.ts +0 -330
  56. package/src/wiki-compiler.ts +0 -711
  57. package/src/wiki-layout.ts +0 -251
  58. package/src/workspace.ts +0 -251
@@ -1,268 +0,0 @@
1
- import { getKnowledgeApiKey, resolveKnowledgeApiUrl } from './auth';
2
- import type { KnowledgeConfig } from './workspace';
3
-
4
- export const REMOTE_KNOWLEDGE_CONTRACT_VERSION = 1 as const;
5
-
6
- export type RemoteKnowledgeRunStatus = 'queued' | 'running' | 'completed' | 'failed' | 'canceled';
7
-
8
- export interface RemoteKnowledgeSourceContract {
9
- owner: 'open-files';
10
- preferred_ref: 'open-files';
11
- allowed_schemes: string[];
12
- raw_source_bytes_stored_in_open_knowledge: false;
13
- }
14
-
15
- export interface RemoteKnowledgeArtifactContract {
16
- storage_type: 'local' | 's3' | 'managed';
17
- uri_prefix: string | null;
18
- generated_only: true;
19
- }
20
-
21
- export interface RemoteKnowledgeRegistryContract {
22
- contract_version: typeof REMOTE_KNOWLEDGE_CONTRACT_VERSION;
23
- service: 'open-knowledge';
24
- mode: 'local' | 'hosted';
25
- capabilities: string[];
26
- endpoints: {
27
- registry: string;
28
- search: string;
29
- ask: string;
30
- build: string;
31
- sync: string;
32
- run_status: string;
33
- run_logs: string;
34
- run_artifacts: string;
35
- };
36
- source_contract: RemoteKnowledgeSourceContract;
37
- artifact_contract: RemoteKnowledgeArtifactContract;
38
- }
39
-
40
- export interface RemoteKnowledgeRunContract {
41
- contract_version: typeof REMOTE_KNOWLEDGE_CONTRACT_VERSION;
42
- id?: string;
43
- type?: 'search' | 'ask' | 'build' | 'sync' | 'artifact' | 'status';
44
- status?: RemoteKnowledgeRunStatus | string;
45
- query?: string;
46
- prompt?: string;
47
- output_preview?: unknown;
48
- citations?: unknown[];
49
- artifacts?: unknown[];
50
- usage?: Record<string, unknown>;
51
- created_at?: string;
52
- started_at?: string;
53
- completed_at?: string;
54
- duration_ms?: number;
55
- error_code?: string;
56
- error_message?: string;
57
- error?: string;
58
- details?: unknown;
59
- }
60
-
61
- export interface RemoteKnowledgeSearchRequest {
62
- query: string;
63
- limit?: number;
64
- semantic?: boolean;
65
- source_refs?: string[];
66
- }
67
-
68
- export interface RemoteKnowledgePromptRequest extends RemoteKnowledgeSearchRequest {
69
- prompt: string;
70
- generate?: boolean;
71
- approve_write?: boolean;
72
- }
73
-
74
- export interface RemoteKnowledgeSyncRequest {
75
- source_refs?: string[];
76
- artifact_prefix?: string;
77
- mode?: 'pull' | 'push' | 'both';
78
- }
79
-
80
- export interface RemoteKnowledgeLogEntry {
81
- id?: string;
82
- run_id?: string;
83
- level?: string;
84
- event?: string;
85
- metadata?: Record<string, unknown>;
86
- created_at?: string;
87
- }
88
-
89
- export interface RemoteKnowledgeArtifact {
90
- id?: string;
91
- uri?: string;
92
- key?: string;
93
- kind?: string;
94
- content_type?: string;
95
- hash?: string;
96
- size_bytes?: number;
97
- metadata?: Record<string, unknown>;
98
- }
99
-
100
- function isRecord(value: unknown): value is Record<string, unknown> {
101
- return Boolean(value && typeof value === 'object' && !Array.isArray(value));
102
- }
103
-
104
- function stringValue(record: Record<string, unknown>, key: string): string | undefined {
105
- const value = record[key];
106
- return typeof value === 'string' ? value : undefined;
107
- }
108
-
109
- function numberValue(record: Record<string, unknown>, key: string): number | undefined {
110
- const value = record[key];
111
- return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
112
- }
113
-
114
- function arrayValue(record: Record<string, unknown>, key: string): unknown[] | undefined {
115
- const value = record[key];
116
- return Array.isArray(value) ? value : undefined;
117
- }
118
-
119
- export function normalizeRemoteKnowledgeRunContract(payload: unknown, fallback?: Partial<RemoteKnowledgeRunContract>): RemoteKnowledgeRunContract {
120
- const record = isRecord(payload) ? payload : {};
121
- return {
122
- contract_version: REMOTE_KNOWLEDGE_CONTRACT_VERSION,
123
- id: stringValue(record, 'id') ?? fallback?.id,
124
- type: (stringValue(record, 'type') as RemoteKnowledgeRunContract['type'] | undefined) ?? fallback?.type,
125
- status: stringValue(record, 'status') ?? fallback?.status,
126
- query: stringValue(record, 'query') ?? fallback?.query,
127
- prompt: stringValue(record, 'prompt') ?? fallback?.prompt,
128
- output_preview: Object.prototype.hasOwnProperty.call(record, 'output_preview') ? record.output_preview : fallback?.output_preview,
129
- citations: arrayValue(record, 'citations') ?? fallback?.citations,
130
- artifacts: arrayValue(record, 'artifacts') ?? fallback?.artifacts,
131
- usage: isRecord(record.usage) ? record.usage : fallback?.usage,
132
- created_at: stringValue(record, 'created_at') ?? fallback?.created_at,
133
- started_at: stringValue(record, 'started_at') ?? fallback?.started_at,
134
- completed_at: stringValue(record, 'completed_at') ?? fallback?.completed_at,
135
- duration_ms: numberValue(record, 'duration_ms') ?? fallback?.duration_ms,
136
- error_code: stringValue(record, 'error_code') ?? fallback?.error_code,
137
- error_message: stringValue(record, 'error_message') ?? fallback?.error_message,
138
- error: stringValue(record, 'error') ?? fallback?.error,
139
- details: Object.prototype.hasOwnProperty.call(record, 'details') ? record.details : fallback?.details,
140
- };
141
- }
142
-
143
- export function knowledgeRegistryContract(input: {
144
- mode: 'local' | 'hosted';
145
- sourceSchemes: string[];
146
- storageType: 'local' | 's3' | 'managed';
147
- artifactUriPrefix: string | null;
148
- }): RemoteKnowledgeRegistryContract {
149
- return {
150
- contract_version: REMOTE_KNOWLEDGE_CONTRACT_VERSION,
151
- service: 'open-knowledge',
152
- mode: input.mode,
153
- capabilities: [
154
- 'registry',
155
- 'search',
156
- 'ask',
157
- 'build',
158
- 'sync',
159
- 'status',
160
- 'logs',
161
- 'artifacts',
162
- 'open-files-source-refs',
163
- 's3-generated-artifacts',
164
- ],
165
- endpoints: {
166
- registry: '/api/v1/knowledge/registry',
167
- search: '/api/v1/knowledge/search',
168
- ask: '/api/v1/knowledge/ask',
169
- build: '/api/v1/knowledge/build',
170
- sync: '/api/v1/knowledge/sync',
171
- run_status: '/api/v1/knowledge/runs/{run_id}',
172
- run_logs: '/api/v1/knowledge/runs/{run_id}/logs',
173
- run_artifacts: '/api/v1/knowledge/runs/{run_id}/artifacts',
174
- },
175
- source_contract: {
176
- owner: 'open-files',
177
- preferred_ref: 'open-files',
178
- allowed_schemes: input.sourceSchemes,
179
- raw_source_bytes_stored_in_open_knowledge: false,
180
- },
181
- artifact_contract: {
182
- storage_type: input.storageType,
183
- uri_prefix: input.artifactUriPrefix,
184
- generated_only: true,
185
- },
186
- };
187
- }
188
-
189
- export class RemoteKnowledgeClient {
190
- constructor(
191
- private readonly apiKey: string,
192
- private readonly apiUrl: string,
193
- ) {}
194
-
195
- static fromConfig(config?: KnowledgeConfig, env: Record<string, string | undefined> = process.env): RemoteKnowledgeClient | null {
196
- const key = getKnowledgeApiKey(env);
197
- if (!key.apiKey) return null;
198
- return new RemoteKnowledgeClient(key.apiKey, resolveKnowledgeApiUrl(config, env));
199
- }
200
-
201
- private async request(path: string, options: RequestInit = {}): Promise<Response> {
202
- return fetch(`${this.apiUrl}${path}`, {
203
- ...options,
204
- headers: {
205
- Authorization: `Bearer ${this.apiKey}`,
206
- 'Content-Type': 'application/json',
207
- ...options.headers,
208
- },
209
- });
210
- }
211
-
212
- async registry(): Promise<RemoteKnowledgeRegistryContract> {
213
- const response = await this.request('/api/v1/knowledge/registry');
214
- return response.json() as Promise<RemoteKnowledgeRegistryContract>;
215
- }
216
-
217
- async search(request: RemoteKnowledgeSearchRequest): Promise<RemoteKnowledgeRunContract> {
218
- const response = await this.request('/api/v1/knowledge/search', {
219
- method: 'POST',
220
- body: JSON.stringify(request),
221
- });
222
- return normalizeRemoteKnowledgeRunContract(await response.json(), { type: 'search', query: request.query });
223
- }
224
-
225
- async ask(request: RemoteKnowledgePromptRequest): Promise<RemoteKnowledgeRunContract> {
226
- const response = await this.request('/api/v1/knowledge/ask', {
227
- method: 'POST',
228
- body: JSON.stringify(request),
229
- });
230
- return normalizeRemoteKnowledgeRunContract(await response.json(), { type: 'ask', prompt: request.prompt });
231
- }
232
-
233
- async build(request: RemoteKnowledgePromptRequest): Promise<RemoteKnowledgeRunContract> {
234
- const response = await this.request('/api/v1/knowledge/build', {
235
- method: 'POST',
236
- body: JSON.stringify(request),
237
- });
238
- return normalizeRemoteKnowledgeRunContract(await response.json(), { type: 'build', prompt: request.prompt });
239
- }
240
-
241
- async sync(request: RemoteKnowledgeSyncRequest = {}): Promise<RemoteKnowledgeRunContract> {
242
- const response = await this.request('/api/v1/knowledge/sync', {
243
- method: 'POST',
244
- body: JSON.stringify(request),
245
- });
246
- return normalizeRemoteKnowledgeRunContract(await response.json(), { type: 'sync' });
247
- }
248
-
249
- async runStatus(runId: string): Promise<RemoteKnowledgeRunContract | null> {
250
- const response = await this.request(`/api/v1/knowledge/runs/${encodeURIComponent(runId)}`);
251
- if (!response.ok) return null;
252
- return normalizeRemoteKnowledgeRunContract(await response.json(), { id: runId, type: 'status' });
253
- }
254
-
255
- async runLogs(runId: string): Promise<RemoteKnowledgeLogEntry[]> {
256
- const response = await this.request(`/api/v1/knowledge/runs/${encodeURIComponent(runId)}/logs`);
257
- if (!response.ok) return [];
258
- const payload = await response.json();
259
- return Array.isArray(payload) ? payload as RemoteKnowledgeLogEntry[] : [];
260
- }
261
-
262
- async runArtifacts(runId: string): Promise<RemoteKnowledgeArtifact[]> {
263
- const response = await this.request(`/api/v1/knowledge/runs/${encodeURIComponent(runId)}/artifacts`);
264
- if (!response.ok) return [];
265
- const payload = await response.json();
266
- return Array.isArray(payload) ? payload as RemoteKnowledgeArtifact[] : [];
267
- }
268
- }
package/src/retrieval.ts DELETED
@@ -1,326 +0,0 @@
1
- import { createHash } from 'node:crypto';
2
- import { openKnowledgeDb } from './knowledge-db';
3
- import { isStaleStatus } from './provenance';
4
- import { hybridSearch, type HybridSearchEntry, type HybridSearchOptions, type HybridSearchResult, type SearchProvenance } from './search';
5
-
6
- export interface RetrievalOptions extends HybridSearchOptions {
7
- contextChars?: number;
8
- }
9
-
10
- export interface RerankedSearchEntry extends HybridSearchEntry {
11
- rerank: {
12
- base_score: number;
13
- final_score: number;
14
- exact_score: number;
15
- citation_score: number;
16
- freshness_score: number;
17
- authority_score: number;
18
- };
19
- }
20
-
21
- export interface RetrievalCitation {
22
- id: string;
23
- result_id: string;
24
- kind: HybridSearchEntry['kind'];
25
- source_uri: string | null;
26
- source_ref: string | null;
27
- artifact_uri: string | null;
28
- artifact_path: string | null;
29
- revision: string | null;
30
- hash: string | null;
31
- chunk_id: string | null;
32
- start_offset: number | null;
33
- end_offset: number | null;
34
- quote: string | null;
35
- provenance: SearchProvenance | null;
36
- }
37
-
38
- export interface RetrievalExcerpt {
39
- id: string;
40
- result_id: string;
41
- citation_id: string | null;
42
- kind: HybridSearchEntry['kind'];
43
- text: string;
44
- score: number;
45
- }
46
-
47
- export interface RetrievalGraphEvidence {
48
- citations: Array<{
49
- id: string;
50
- chunk_id: string | null;
51
- wiki_page_id: string | null;
52
- source_uri: string;
53
- quote: string | null;
54
- start_offset: number | null;
55
- end_offset: number | null;
56
- }>;
57
- backlinks: Array<{
58
- from_page_id: string;
59
- to_page_id: string;
60
- label: string | null;
61
- }>;
62
- }
63
-
64
- export interface KnowledgeContextPack {
65
- query: string;
66
- normalized_query: string;
67
- created_at: string;
68
- mode: HybridSearchResult['mode'];
69
- warnings: string[];
70
- search_counts: HybridSearchResult['counts'];
71
- results: RerankedSearchEntry[];
72
- citations: RetrievalCitation[];
73
- excerpts: RetrievalExcerpt[];
74
- graph: RetrievalGraphEvidence;
75
- notes: {
76
- permissions: string[];
77
- freshness: string[];
78
- };
79
- }
80
-
81
- interface CitationRow {
82
- id: string;
83
- wiki_page_id: string | null;
84
- chunk_id: string | null;
85
- source_uri: string;
86
- quote: string | null;
87
- start_offset: number | null;
88
- end_offset: number | null;
89
- }
90
-
91
- interface BacklinkRow {
92
- from_page_id: string;
93
- to_page_id: string;
94
- label: string | null;
95
- }
96
-
97
- function stableId(prefix: string, value: string): string {
98
- return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
99
- }
100
-
101
- function normalizeQuery(query: string): string {
102
- return query.normalize('NFKC').trim().replace(/\s+/g, ' ').toLowerCase();
103
- }
104
-
105
- function queryTerms(query: string): string[] {
106
- return Array.from(new Set(normalizeQuery(query).match(/[\p{L}\p{N}_]+/gu) ?? [])).slice(0, 16);
107
- }
108
-
109
- function textForResult(result: HybridSearchEntry): string {
110
- return [result.title, result.text].filter(Boolean).join(' ').toLowerCase();
111
- }
112
-
113
- function exactScore(result: HybridSearchEntry, terms: string[]): number {
114
- if (terms.length === 0) return 0;
115
- const text = textForResult(result);
116
- const matched = terms.filter((term) => text.includes(term)).length;
117
- return Number((matched / terms.length).toFixed(6));
118
- }
119
-
120
- function hasReadOnlyProvenance(provenance: SearchProvenance | null): boolean {
121
- if (!provenance) return true;
122
- if ('read_only' in provenance) return provenance.read_only === true;
123
- if ('read_only_sources' in provenance) return provenance.read_only_sources === true;
124
- return true;
125
- }
126
-
127
- function isStale(provenance: SearchProvenance | null): boolean {
128
- if (!provenance) return false;
129
- if ('stale' in provenance && provenance.stale) return true;
130
- if ('status' in provenance) return isStaleStatus(provenance.status);
131
- return false;
132
- }
133
-
134
- function freshnessScore(result: HybridSearchEntry): number {
135
- if (isStale(result.provenance)) return 0;
136
- if (result.source?.hash || result.source?.revision) return 1;
137
- if (result.artifact?.hash) return 0.85;
138
- if (result.provenance && 'source_refs' in result.provenance && result.provenance.source_refs.length > 0) return 0.75;
139
- return 0.55;
140
- }
141
-
142
- function citationScore(result: HybridSearchEntry): number {
143
- if (result.citation?.chunk_id && (result.source?.uri || result.artifact?.uri)) return 1;
144
- if (result.provenance && 'citation_required' in result.provenance && result.provenance.citation_required) return 0.75;
145
- if (result.artifact?.uri) return 0.65;
146
- return 0.35;
147
- }
148
-
149
- function authorityScore(result: HybridSearchEntry): number {
150
- if (result.kind === 'wiki_chunk') return 0.85;
151
- if (result.kind === 'source_chunk') return 0.8;
152
- if (result.kind === 'wiki_page') return 0.65;
153
- return 0.55;
154
- }
155
-
156
- function rerank(result: HybridSearchEntry, terms: string[]): RerankedSearchEntry {
157
- const scores = {
158
- base_score: result.score,
159
- exact_score: exactScore(result, terms),
160
- citation_score: citationScore(result),
161
- freshness_score: freshnessScore(result),
162
- authority_score: authorityScore(result),
163
- };
164
- const final = Math.min(1,
165
- scores.base_score * 0.65 +
166
- scores.exact_score * 0.1 +
167
- scores.citation_score * 0.1 +
168
- scores.freshness_score * 0.1 +
169
- scores.authority_score * 0.05,
170
- );
171
- const reasons = new Set(result.reasons);
172
- if (scores.exact_score > 0.5) reasons.add('exact_term');
173
- if (scores.citation_score >= 0.75) reasons.add('cited_source');
174
- if (scores.freshness_score >= 0.85) reasons.add('fresh_source');
175
- return {
176
- ...result,
177
- score: Number(final.toFixed(6)),
178
- reasons: Array.from(reasons),
179
- rerank: {
180
- ...scores,
181
- final_score: Number(final.toFixed(6)),
182
- },
183
- };
184
- }
185
-
186
- function quoteFor(result: HybridSearchEntry, maxChars: number): string | null {
187
- const source = result.text ?? result.title;
188
- if (!source) return null;
189
- const normalized = source.replace(/\s+/g, ' ').trim();
190
- return normalized.length <= maxChars ? normalized : `${normalized.slice(0, Math.max(0, maxChars - 1)).trim()}...`;
191
- }
192
-
193
- function citationFor(result: RerankedSearchEntry): RetrievalCitation {
194
- const id = stableId('cite', `${result.kind}\u0000${result.id}\u0000${result.source?.uri ?? ''}\u0000${result.artifact?.uri ?? ''}`);
195
- return {
196
- id,
197
- result_id: result.id,
198
- kind: result.kind,
199
- source_uri: result.source?.uri ?? null,
200
- source_ref: result.source?.ref ?? null,
201
- artifact_uri: result.artifact?.uri ?? null,
202
- artifact_path: result.artifact?.path ?? null,
203
- revision: result.source?.revision ?? null,
204
- hash: result.source?.hash ?? result.artifact?.hash ?? null,
205
- chunk_id: result.citation?.chunk_id ?? null,
206
- start_offset: result.citation?.start_offset ?? null,
207
- end_offset: result.citation?.end_offset ?? null,
208
- quote: quoteFor(result, 500),
209
- provenance: result.provenance,
210
- };
211
- }
212
-
213
- function excerptFor(result: RerankedSearchEntry, citation: RetrievalCitation, contextChars: number): RetrievalExcerpt | null {
214
- const text = quoteFor(result, contextChars);
215
- if (!text) return null;
216
- return {
217
- id: stableId('excerpt', `${result.kind}\u0000${result.id}`),
218
- result_id: result.id,
219
- citation_id: citation.id,
220
- kind: result.kind,
221
- text,
222
- score: result.score,
223
- };
224
- }
225
-
226
- function placeholders(values: unknown[]): string {
227
- return values.map(() => '?').join(', ');
228
- }
229
-
230
- function loadGraphEvidence(dbPath: string, results: RerankedSearchEntry[]): RetrievalGraphEvidence {
231
- const chunkIds = results.map((result) => result.citation?.chunk_id).filter((id): id is string => Boolean(id));
232
- const wikiPageIds = results.filter((result) => result.kind === 'wiki_page').map((result) => result.id);
233
- const citations: CitationRow[] = [];
234
- const backlinks: BacklinkRow[] = [];
235
- if (chunkIds.length === 0 && wikiPageIds.length === 0) return { citations, backlinks };
236
-
237
- const db = openKnowledgeDb(dbPath);
238
- try {
239
- if (chunkIds.length > 0) {
240
- citations.push(...db.query<CitationRow, string[]>(
241
- `SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
242
- FROM citations
243
- WHERE chunk_id IN (${placeholders(chunkIds)})
244
- ORDER BY created_at DESC
245
- LIMIT 50`,
246
- ).all(...chunkIds));
247
- }
248
- if (wikiPageIds.length > 0) {
249
- citations.push(...db.query<CitationRow, string[]>(
250
- `SELECT id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset
251
- FROM citations
252
- WHERE wiki_page_id IN (${placeholders(wikiPageIds)})
253
- ORDER BY created_at DESC
254
- LIMIT 50`,
255
- ).all(...wikiPageIds));
256
- backlinks.push(...db.query<BacklinkRow, string[]>(
257
- `SELECT from_page_id, to_page_id, label
258
- FROM wiki_backlinks
259
- WHERE from_page_id IN (${placeholders(wikiPageIds)}) OR to_page_id IN (${placeholders(wikiPageIds)})
260
- LIMIT 50`,
261
- ).all(...wikiPageIds, ...wikiPageIds));
262
- }
263
- } finally {
264
- db.close();
265
- }
266
- return { citations, backlinks };
267
- }
268
-
269
- export async function retrieveKnowledgeContext(options: RetrievalOptions): Promise<KnowledgeContextPack> {
270
- const contextChars = Math.max(200, Math.min(options.contextChars ?? 1200, 4000));
271
- const search = await hybridSearch(options);
272
- const terms = queryTerms(search.query);
273
- const warnings = [...search.warnings];
274
- const permissionNotes = new Set<string>();
275
- const freshnessNotes = new Set<string>();
276
-
277
- const filtered = search.results.filter((result) => {
278
- if (!hasReadOnlyProvenance(result.provenance)) {
279
- warnings.push(`permission_filtered: ${result.kind}:${result.id}`);
280
- permissionNotes.add('Dropped a result because provenance was not read-only.');
281
- return false;
282
- }
283
- if (isStale(result.provenance)) {
284
- warnings.push(`stale_filtered: ${result.kind}:${result.id}`);
285
- freshnessNotes.add('Dropped a stale result whose source status requires reindexing.');
286
- return false;
287
- }
288
- return true;
289
- });
290
-
291
- const results = filtered
292
- .map((result) => rerank(result, terms))
293
- .sort((a, b) => b.score - a.score || a.id.localeCompare(b.id))
294
- .slice(0, search.limit);
295
-
296
- const citations = results.map(citationFor);
297
- const excerpts = results
298
- .map((result, index) => excerptFor(result, citations[index], contextChars))
299
- .filter((entry): entry is RetrievalExcerpt => Boolean(entry));
300
-
301
- for (const result of results) {
302
- if (result.provenance && 'read_only' in result.provenance && result.provenance.read_only) {
303
- permissionNotes.add('All source-backed excerpts are read-only and citation-required.');
304
- }
305
- if (result.rerank.freshness_score >= 0.85) {
306
- freshnessNotes.add('Fresh source revision/hash or artifact hash is present for top context.');
307
- }
308
- }
309
-
310
- return {
311
- query: search.query,
312
- normalized_query: normalizeQuery(search.query),
313
- created_at: new Date().toISOString(),
314
- mode: search.mode,
315
- warnings,
316
- search_counts: search.counts,
317
- results,
318
- citations,
319
- excerpts,
320
- graph: loadGraphEvidence(options.dbPath, results),
321
- notes: {
322
- permissions: Array.from(permissionNotes),
323
- freshness: Array.from(freshnessNotes),
324
- },
325
- };
326
- }