@hasna/knowledge 0.2.26 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +61 -0
  2. package/bin/open-knowledge-mcp.js +85 -9
  3. package/bin/open-knowledge.js +86 -86
  4. package/dist/agent.d.ts +35 -0
  5. package/dist/artifact-store.d.ts +63 -0
  6. package/dist/auth.d.ts +35 -0
  7. package/dist/embeddings.d.ts +77 -0
  8. package/dist/index.d.ts +20 -0
  9. package/dist/index.js +5709 -0
  10. package/dist/knowledge-db.d.ts +27 -0
  11. package/dist/manifest-ingest.d.ts +35 -0
  12. package/dist/outbox-consume.d.ts +25 -0
  13. package/dist/provenance.d.ts +50 -0
  14. package/dist/providers.d.ts +89 -0
  15. package/dist/reindex.d.ts +37 -0
  16. package/dist/remote-client.d.ts +108 -0
  17. package/dist/retrieval.d.ts +71 -0
  18. package/dist/safety.d.ts +70 -0
  19. package/dist/sdk.d.ts +72 -0
  20. package/dist/search.d.ts +65 -0
  21. package/dist/service.d.ts +117 -0
  22. package/dist/source-ingest.d.ts +18 -0
  23. package/dist/source-ref.d.ts +30 -0
  24. package/dist/source-resolver.d.ts +92 -0
  25. package/dist/storage-contract.d.ts +106 -0
  26. package/dist/web-search.d.ts +40 -0
  27. package/dist/wiki-compiler.d.ts +67 -0
  28. package/dist/wiki-layout.d.ts +23 -0
  29. package/dist/workspace.d.ts +111 -0
  30. package/docs/architecture/ai-native-knowledge-base.md +24 -0
  31. package/docs/architecture/hosted-wrapper-responsibilities.md +8 -0
  32. package/docs/canonical-secrets-bootstrap-2026-06-08.md +127 -0
  33. package/package.json +15 -7
  34. package/src/agent.ts +0 -367
  35. package/src/artifact-store.ts +0 -184
  36. package/src/auth.ts +0 -123
  37. package/src/cli.ts +0 -1181
  38. package/src/embeddings.ts +0 -516
  39. package/src/knowledge-db.ts +0 -354
  40. package/src/manifest-ingest.ts +0 -515
  41. package/src/mcp-http.js +0 -110
  42. package/src/mcp.js +0 -1503
  43. package/src/outbox-consume.ts +0 -463
  44. package/src/provenance.ts +0 -93
  45. package/src/providers.ts +0 -308
  46. package/src/reindex.ts +0 -260
  47. package/src/remote-client.ts +0 -268
  48. package/src/retrieval.ts +0 -326
  49. package/src/safety.ts +0 -265
  50. package/src/schema.js +0 -25
  51. package/src/search.ts +0 -510
  52. package/src/service.ts +0 -432
  53. package/src/source-ingest.ts +0 -268
  54. package/src/source-ref.ts +0 -104
  55. package/src/source-resolver.ts +0 -436
  56. package/src/storage-contract.ts +0 -293
  57. package/src/store.ts +0 -113
  58. package/src/web-search.ts +0 -330
  59. package/src/wiki-compiler.ts +0 -711
  60. package/src/wiki-layout.ts +0 -251
  61. package/src/workspace.ts +0 -213
package/src/web-search.ts DELETED
@@ -1,330 +0,0 @@
1
- import { createHash, randomUUID } from 'node:crypto';
2
- import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
- import { ingestOpenFilesManifestItems } from './manifest-ingest';
4
- import {
5
- assertProviderCredentials,
6
- normalizeAiSdkUsage,
7
- parseModelRef,
8
- providerSettings,
9
- recordProviderUsage,
10
- resolveModelRef,
11
- type AiProviderId,
12
- } from './providers';
13
- import { assertWebSearchAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
14
- import type { KnowledgeConfig } from './workspace';
15
-
16
- export interface WebSearchOptions {
17
- dbPath: string;
18
- query: string;
19
- config?: KnowledgeConfig;
20
- safetyPolicy?: SafetyPolicy;
21
- modelRef?: string;
22
- provider?: AiProviderId;
23
- limit?: number;
24
- maxUses?: number;
25
- domains?: string[];
26
- fake?: boolean;
27
- fileResults?: boolean;
28
- env?: Record<string, string | undefined>;
29
- now?: Date;
30
- }
31
-
32
- export interface WebSearchSource {
33
- url: string;
34
- title: string | null;
35
- snippet: string | null;
36
- provider_metadata: Record<string, unknown>;
37
- }
38
-
39
- export interface WebSearchResult {
40
- run_id: string;
41
- query: string;
42
- provider: string;
43
- model: string;
44
- answer: string;
45
- sources: WebSearchSource[];
46
- filed_sources: number;
47
- usage: {
48
- input_tokens: number;
49
- output_tokens: number;
50
- cost_usd: number;
51
- };
52
- warnings: string[];
53
- }
54
-
55
- function stableHash(value: string): string {
56
- return `sha256:${createHash('sha256').update(value).digest('hex')}`;
57
- }
58
-
59
- function estimateTokens(text: string): number {
60
- const words = text.trim().split(/\s+/).filter(Boolean).length;
61
- return Math.max(1, Math.ceil(words * 1.25));
62
- }
63
-
64
- function asRecord(value: unknown): Record<string, unknown> {
65
- return value && typeof value === 'object' && !Array.isArray(value) ? value as Record<string, unknown> : {};
66
- }
67
-
68
- function asString(value: unknown): string | null {
69
- return typeof value === 'string' && value.length > 0 ? value : null;
70
- }
71
-
72
- function sourceFromRecord(value: unknown): WebSearchSource | null {
73
- const record = asRecord(value);
74
- const url = asString(record.url) ?? asString(record.uri) ?? asString(record.sourceUrl);
75
- if (!url) return null;
76
- return {
77
- url,
78
- title: asString(record.title) ?? asString(record.name),
79
- snippet: asString(record.snippet) ?? asString(record.text) ?? asString(record.description),
80
- provider_metadata: record,
81
- };
82
- }
83
-
84
- function collectSources(value: unknown, output: Map<string, WebSearchSource>): void {
85
- if (Array.isArray(value)) {
86
- for (const entry of value) collectSources(entry, output);
87
- return;
88
- }
89
- const source = sourceFromRecord(value);
90
- if (source) output.set(source.url, source);
91
- const record = asRecord(value);
92
- for (const key of ['sources', 'results', 'citations', 'annotations', 'output']) {
93
- if (record[key]) collectSources(record[key], output);
94
- }
95
- }
96
-
97
- function fakeSources(query: string, limit: number): WebSearchSource[] {
98
- return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
99
- url: `https://example.com/knowledge-web-${index + 1}`,
100
- title: `Fake web source ${index + 1}`,
101
- snippet: `Deterministic web-search fixture for "${query}"`,
102
- provider_metadata: { fake: true, rank: index + 1 },
103
- }));
104
- }
105
-
106
- async function openAiWebSearch(input: {
107
- query: string;
108
- model: string;
109
- config?: KnowledgeConfig;
110
- env: Record<string, string | undefined>;
111
- maxUses: number;
112
- domains: string[];
113
- }) {
114
- const { generateText } = await import('ai');
115
- const { createOpenAI } = await import('@ai-sdk/openai');
116
- const settings = providerSettings(input.config, 'openai');
117
- const openai = createOpenAI({
118
- apiKey: input.env[settings.api_key_env],
119
- baseURL: settings.base_url,
120
- }) as any;
121
- const webSearch = openai.tools?.webSearch;
122
- if (!webSearch) throw new Error('OpenAI provider does not expose tools.webSearch.');
123
- return generateText({
124
- model: openai(input.model),
125
- prompt: input.query,
126
- tools: {
127
- web_search: webSearch({
128
- externalWebAccess: true,
129
- searchContextSize: 'medium',
130
- ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
131
- }),
132
- },
133
- toolChoice: { type: 'tool', toolName: 'web_search' },
134
- });
135
- }
136
-
137
- async function anthropicWebSearch(input: {
138
- query: string;
139
- model: string;
140
- config?: KnowledgeConfig;
141
- env: Record<string, string | undefined>;
142
- maxUses: number;
143
- domains: string[];
144
- }) {
145
- const { generateText } = await import('ai');
146
- const { createAnthropic } = await import('@ai-sdk/anthropic');
147
- const settings = providerSettings(input.config, 'anthropic');
148
- const anthropic = createAnthropic({
149
- apiKey: input.env[settings.api_key_env],
150
- baseURL: settings.base_url,
151
- }) as any;
152
- const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
153
- if (!factory) throw new Error('Anthropic provider does not expose a web search tool.');
154
- return generateText({
155
- model: anthropic(input.model),
156
- prompt: input.query,
157
- tools: {
158
- web_search: factory({
159
- maxUses: input.maxUses,
160
- ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
161
- }),
162
- },
163
- });
164
- }
165
-
166
- async function fileWebSources(options: WebSearchOptions, sources: WebSearchSource[], now: string): Promise<number> {
167
- if (!options.fileResults || sources.length === 0) return 0;
168
- const items = sources.map((source) => {
169
- const text = [source.title, source.snippet, source.url].filter(Boolean).join('\n');
170
- const hash = stableHash(text);
171
- return {
172
- source_ref: source.url,
173
- name: source.title ?? source.url,
174
- url: source.url,
175
- mime: 'text/plain',
176
- hash,
177
- revision: hash,
178
- status: 'active',
179
- updated_at: now,
180
- permissions: { mode: 'read_only', allowed_purposes: ['knowledge_answer', 'knowledge_index'] },
181
- metadata: {
182
- source_ref: source.url,
183
- content_source: 'provider_web_search',
184
- provider_metadata: source.provider_metadata,
185
- },
186
- extracted_text: text,
187
- };
188
- });
189
- const result = await ingestOpenFilesManifestItems({
190
- dbPath: options.dbPath,
191
- items,
192
- sourceLabel: `web-search:${options.query}`,
193
- readAction: 'provider_web_search_file_results',
194
- safetyPolicy: options.safetyPolicy,
195
- now: new Date(now),
196
- });
197
- return result.sources_upserted;
198
- }
199
-
200
- export async function runProviderWebSearch(options: WebSearchOptions): Promise<WebSearchResult> {
201
- const query = options.query.trim();
202
- if (!query) throw new Error('Web search query is required.');
203
- const env = options.env ?? process.env;
204
- const now = (options.now ?? new Date()).toISOString();
205
- const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
206
- const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
207
- const domains = options.domains ?? [];
208
- const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : 'default'), options.config);
209
- const parsed = parseModelRef(modelRef);
210
- const provider = options.provider ?? parsed.provider;
211
- const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
212
- const runId = `run_${randomUUID()}`;
213
-
214
- if (!options.fake && options.safetyPolicy) assertWebSearchAllowed(options.safetyPolicy);
215
- if (!options.fake && provider !== 'openai' && provider !== 'anthropic') {
216
- throw new Error(`Provider ${provider} does not expose native web search yet.`);
217
- }
218
- if (!options.fake) assertProviderCredentials(provider, options.config, env);
219
-
220
- migrateKnowledgeDb(options.dbPath);
221
- const db = openKnowledgeDb(options.dbPath);
222
- try {
223
- db.run(
224
- `INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
225
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
226
- [
227
- runId,
228
- 'provider-web-search',
229
- query,
230
- 'running',
231
- provider,
232
- model,
233
- JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
234
- now,
235
- now,
236
- ],
237
- );
238
- recordAuditEvent(db, {
239
- event_type: 'source_read',
240
- action: options.fake ? 'fake_provider_web_search' : 'provider_web_search',
241
- target_uri: query,
242
- decision: 'allow',
243
- metadata: { provider, model, domains, max_uses: maxUses },
244
- created_at: now,
245
- });
246
- } finally {
247
- db.close();
248
- }
249
-
250
- let answer = '';
251
- let sources: WebSearchSource[] = [];
252
- let usage = { input_tokens: estimateTokens(query), output_tokens: 0, cost_usd: 0 };
253
- const warnings: string[] = [];
254
- if (options.fake) {
255
- sources = fakeSources(query, limit);
256
- answer = `Fake web search answer for: ${query}`;
257
- usage.output_tokens = estimateTokens(answer);
258
- } else {
259
- const result = provider === 'openai'
260
- ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains })
261
- : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
262
- answer = result.text;
263
- const collected = new Map<string, WebSearchSource>();
264
- collectSources((result as any).sources, collected);
265
- collectSources((result as any).toolResults, collected);
266
- sources = Array.from(collected.values()).slice(0, limit);
267
- const normalized = normalizeAiSdkUsage({
268
- provider,
269
- model,
270
- usage: (result as any).usage,
271
- providerMetadata: (result as any).providerMetadata,
272
- });
273
- usage = {
274
- input_tokens: normalized.input_tokens,
275
- output_tokens: normalized.output_tokens,
276
- cost_usd: normalized.cost_usd,
277
- };
278
- }
279
-
280
- const filedSources = await fileWebSources(options, sources, now);
281
- const writeDb = openKnowledgeDb(options.dbPath);
282
- try {
283
- writeDb.run(
284
- `UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`,
285
- [
286
- 'completed',
287
- JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
288
- now,
289
- runId,
290
- ],
291
- );
292
- writeDb.run(
293
- `INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
294
- VALUES (?, ?, ?, ?, ?, ?)`,
295
- [
296
- `evt_${randomUUID()}`,
297
- runId,
298
- 'info',
299
- 'provider_web_search_completed',
300
- JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
301
- now,
302
- ],
303
- );
304
- recordProviderUsage(writeDb, {
305
- run_id: runId,
306
- provider,
307
- model,
308
- input_tokens: usage.input_tokens,
309
- output_tokens: usage.output_tokens,
310
- cost_usd: usage.cost_usd,
311
- metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
312
- created_at: now,
313
- });
314
- } finally {
315
- writeDb.close();
316
- }
317
-
318
- if (sources.length === 0) warnings.push('no_web_sources_returned');
319
- return {
320
- run_id: runId,
321
- query,
322
- provider,
323
- model,
324
- answer,
325
- sources,
326
- filed_sources: filedSources,
327
- usage,
328
- warnings,
329
- };
330
- }