@hasna/knowledge 0.2.27 → 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +140 -99
  2. package/bin/{open-knowledge-mcp.js → knowledge-mcp.js} +22 -15
  3. package/bin/{open-knowledge.js → knowledge.js} +5 -5
  4. package/dist/agent.d.ts +35 -0
  5. package/dist/artifact-store.d.ts +63 -0
  6. package/dist/auth.d.ts +35 -0
  7. package/dist/embeddings.d.ts +77 -0
  8. package/dist/index.d.ts +20 -0
  9. package/dist/index.js +5709 -0
  10. package/dist/knowledge-db.d.ts +27 -0
  11. package/dist/manifest-ingest.d.ts +35 -0
  12. package/dist/outbox-consume.d.ts +25 -0
  13. package/dist/provenance.d.ts +50 -0
  14. package/dist/providers.d.ts +89 -0
  15. package/dist/reindex.d.ts +37 -0
  16. package/dist/remote-client.d.ts +108 -0
  17. package/dist/retrieval.d.ts +71 -0
  18. package/dist/safety.d.ts +70 -0
  19. package/dist/sdk.d.ts +72 -0
  20. package/dist/search.d.ts +65 -0
  21. package/dist/service.d.ts +117 -0
  22. package/dist/source-ingest.d.ts +18 -0
  23. package/dist/source-ref.d.ts +30 -0
  24. package/dist/source-resolver.d.ts +92 -0
  25. package/dist/storage-contract.d.ts +106 -0
  26. package/dist/web-search.d.ts +40 -0
  27. package/dist/wiki-compiler.d.ts +67 -0
  28. package/dist/wiki-layout.d.ts +23 -0
  29. package/dist/workspace.d.ts +111 -0
  30. package/docs/architecture/ai-native-knowledge-base.md +16 -16
  31. package/docs/architecture/hosted-wrapper-responsibilities.md +5 -5
  32. package/docs/architecture/hybrid-semantic-search.md +12 -12
  33. package/docs/canonical-secrets-bootstrap-2026-06-08.md +1 -1
  34. package/docs/examples/company-wiki-workflow.md +19 -19
  35. package/docs/migration/json-to-sqlite.md +17 -17
  36. package/package.json +17 -10
  37. package/src/agent.ts +0 -367
  38. package/src/artifact-store.ts +0 -184
  39. package/src/auth.ts +0 -123
  40. package/src/cli.ts +0 -1184
  41. package/src/embeddings.ts +0 -516
  42. package/src/knowledge-db.ts +0 -354
  43. package/src/manifest-ingest.ts +0 -515
  44. package/src/mcp-http.js +0 -110
  45. package/src/mcp.js +0 -1503
  46. package/src/outbox-consume.ts +0 -463
  47. package/src/provenance.ts +0 -93
  48. package/src/providers.ts +0 -308
  49. package/src/reindex.ts +0 -260
  50. package/src/remote-client.ts +0 -268
  51. package/src/retrieval.ts +0 -326
  52. package/src/safety.ts +0 -265
  53. package/src/schema.js +0 -25
  54. package/src/search.ts +0 -510
  55. package/src/service.ts +0 -443
  56. package/src/source-ingest.ts +0 -268
  57. package/src/source-ref.ts +0 -104
  58. package/src/source-resolver.ts +0 -436
  59. package/src/storage-contract.ts +0 -346
  60. package/src/store.ts +0 -113
  61. package/src/web-search.ts +0 -330
  62. package/src/wiki-compiler.ts +0 -711
  63. package/src/wiki-layout.ts +0 -251
  64. package/src/workspace.ts +0 -251
package/src/web-search.ts DELETED
@@ -1,330 +0,0 @@
1
- import { createHash, randomUUID } from 'node:crypto';
2
- import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
- import { ingestOpenFilesManifestItems } from './manifest-ingest';
4
- import {
5
- assertProviderCredentials,
6
- normalizeAiSdkUsage,
7
- parseModelRef,
8
- providerSettings,
9
- recordProviderUsage,
10
- resolveModelRef,
11
- type AiProviderId,
12
- } from './providers';
13
- import { assertWebSearchAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
14
- import type { KnowledgeConfig } from './workspace';
15
-
16
- export interface WebSearchOptions {
17
- dbPath: string;
18
- query: string;
19
- config?: KnowledgeConfig;
20
- safetyPolicy?: SafetyPolicy;
21
- modelRef?: string;
22
- provider?: AiProviderId;
23
- limit?: number;
24
- maxUses?: number;
25
- domains?: string[];
26
- fake?: boolean;
27
- fileResults?: boolean;
28
- env?: Record<string, string | undefined>;
29
- now?: Date;
30
- }
31
-
32
- export interface WebSearchSource {
33
- url: string;
34
- title: string | null;
35
- snippet: string | null;
36
- provider_metadata: Record<string, unknown>;
37
- }
38
-
39
- export interface WebSearchResult {
40
- run_id: string;
41
- query: string;
42
- provider: string;
43
- model: string;
44
- answer: string;
45
- sources: WebSearchSource[];
46
- filed_sources: number;
47
- usage: {
48
- input_tokens: number;
49
- output_tokens: number;
50
- cost_usd: number;
51
- };
52
- warnings: string[];
53
- }
54
-
55
- function stableHash(value: string): string {
56
- return `sha256:${createHash('sha256').update(value).digest('hex')}`;
57
- }
58
-
59
- function estimateTokens(text: string): number {
60
- const words = text.trim().split(/\s+/).filter(Boolean).length;
61
- return Math.max(1, Math.ceil(words * 1.25));
62
- }
63
-
64
- function asRecord(value: unknown): Record<string, unknown> {
65
- return value && typeof value === 'object' && !Array.isArray(value) ? value as Record<string, unknown> : {};
66
- }
67
-
68
- function asString(value: unknown): string | null {
69
- return typeof value === 'string' && value.length > 0 ? value : null;
70
- }
71
-
72
- function sourceFromRecord(value: unknown): WebSearchSource | null {
73
- const record = asRecord(value);
74
- const url = asString(record.url) ?? asString(record.uri) ?? asString(record.sourceUrl);
75
- if (!url) return null;
76
- return {
77
- url,
78
- title: asString(record.title) ?? asString(record.name),
79
- snippet: asString(record.snippet) ?? asString(record.text) ?? asString(record.description),
80
- provider_metadata: record,
81
- };
82
- }
83
-
84
- function collectSources(value: unknown, output: Map<string, WebSearchSource>): void {
85
- if (Array.isArray(value)) {
86
- for (const entry of value) collectSources(entry, output);
87
- return;
88
- }
89
- const source = sourceFromRecord(value);
90
- if (source) output.set(source.url, source);
91
- const record = asRecord(value);
92
- for (const key of ['sources', 'results', 'citations', 'annotations', 'output']) {
93
- if (record[key]) collectSources(record[key], output);
94
- }
95
- }
96
-
97
- function fakeSources(query: string, limit: number): WebSearchSource[] {
98
- return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
99
- url: `https://example.com/knowledge-web-${index + 1}`,
100
- title: `Fake web source ${index + 1}`,
101
- snippet: `Deterministic web-search fixture for "${query}"`,
102
- provider_metadata: { fake: true, rank: index + 1 },
103
- }));
104
- }
105
-
106
- async function openAiWebSearch(input: {
107
- query: string;
108
- model: string;
109
- config?: KnowledgeConfig;
110
- env: Record<string, string | undefined>;
111
- maxUses: number;
112
- domains: string[];
113
- }) {
114
- const { generateText } = await import('ai');
115
- const { createOpenAI } = await import('@ai-sdk/openai');
116
- const settings = providerSettings(input.config, 'openai');
117
- const openai = createOpenAI({
118
- apiKey: input.env[settings.api_key_env],
119
- baseURL: settings.base_url,
120
- }) as any;
121
- const webSearch = openai.tools?.webSearch;
122
- if (!webSearch) throw new Error('OpenAI provider does not expose tools.webSearch.');
123
- return generateText({
124
- model: openai(input.model),
125
- prompt: input.query,
126
- tools: {
127
- web_search: webSearch({
128
- externalWebAccess: true,
129
- searchContextSize: 'medium',
130
- ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
131
- }),
132
- },
133
- toolChoice: { type: 'tool', toolName: 'web_search' },
134
- });
135
- }
136
-
137
- async function anthropicWebSearch(input: {
138
- query: string;
139
- model: string;
140
- config?: KnowledgeConfig;
141
- env: Record<string, string | undefined>;
142
- maxUses: number;
143
- domains: string[];
144
- }) {
145
- const { generateText } = await import('ai');
146
- const { createAnthropic } = await import('@ai-sdk/anthropic');
147
- const settings = providerSettings(input.config, 'anthropic');
148
- const anthropic = createAnthropic({
149
- apiKey: input.env[settings.api_key_env],
150
- baseURL: settings.base_url,
151
- }) as any;
152
- const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
153
- if (!factory) throw new Error('Anthropic provider does not expose a web search tool.');
154
- return generateText({
155
- model: anthropic(input.model),
156
- prompt: input.query,
157
- tools: {
158
- web_search: factory({
159
- maxUses: input.maxUses,
160
- ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
161
- }),
162
- },
163
- });
164
- }
165
-
166
- async function fileWebSources(options: WebSearchOptions, sources: WebSearchSource[], now: string): Promise<number> {
167
- if (!options.fileResults || sources.length === 0) return 0;
168
- const items = sources.map((source) => {
169
- const text = [source.title, source.snippet, source.url].filter(Boolean).join('\n');
170
- const hash = stableHash(text);
171
- return {
172
- source_ref: source.url,
173
- name: source.title ?? source.url,
174
- url: source.url,
175
- mime: 'text/plain',
176
- hash,
177
- revision: hash,
178
- status: 'active',
179
- updated_at: now,
180
- permissions: { mode: 'read_only', allowed_purposes: ['knowledge_answer', 'knowledge_index'] },
181
- metadata: {
182
- source_ref: source.url,
183
- content_source: 'provider_web_search',
184
- provider_metadata: source.provider_metadata,
185
- },
186
- extracted_text: text,
187
- };
188
- });
189
- const result = await ingestOpenFilesManifestItems({
190
- dbPath: options.dbPath,
191
- items,
192
- sourceLabel: `web-search:${options.query}`,
193
- readAction: 'provider_web_search_file_results',
194
- safetyPolicy: options.safetyPolicy,
195
- now: new Date(now),
196
- });
197
- return result.sources_upserted;
198
- }
199
-
200
- export async function runProviderWebSearch(options: WebSearchOptions): Promise<WebSearchResult> {
201
- const query = options.query.trim();
202
- if (!query) throw new Error('Web search query is required.');
203
- const env = options.env ?? process.env;
204
- const now = (options.now ?? new Date()).toISOString();
205
- const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
206
- const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
207
- const domains = options.domains ?? [];
208
- const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : 'default'), options.config);
209
- const parsed = parseModelRef(modelRef);
210
- const provider = options.provider ?? parsed.provider;
211
- const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
212
- const runId = `run_${randomUUID()}`;
213
-
214
- if (!options.fake && options.safetyPolicy) assertWebSearchAllowed(options.safetyPolicy);
215
- if (!options.fake && provider !== 'openai' && provider !== 'anthropic') {
216
- throw new Error(`Provider ${provider} does not expose native web search yet.`);
217
- }
218
- if (!options.fake) assertProviderCredentials(provider, options.config, env);
219
-
220
- migrateKnowledgeDb(options.dbPath);
221
- const db = openKnowledgeDb(options.dbPath);
222
- try {
223
- db.run(
224
- `INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
225
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
226
- [
227
- runId,
228
- 'provider-web-search',
229
- query,
230
- 'running',
231
- provider,
232
- model,
233
- JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
234
- now,
235
- now,
236
- ],
237
- );
238
- recordAuditEvent(db, {
239
- event_type: 'source_read',
240
- action: options.fake ? 'fake_provider_web_search' : 'provider_web_search',
241
- target_uri: query,
242
- decision: 'allow',
243
- metadata: { provider, model, domains, max_uses: maxUses },
244
- created_at: now,
245
- });
246
- } finally {
247
- db.close();
248
- }
249
-
250
- let answer = '';
251
- let sources: WebSearchSource[] = [];
252
- let usage = { input_tokens: estimateTokens(query), output_tokens: 0, cost_usd: 0 };
253
- const warnings: string[] = [];
254
- if (options.fake) {
255
- sources = fakeSources(query, limit);
256
- answer = `Fake web search answer for: ${query}`;
257
- usage.output_tokens = estimateTokens(answer);
258
- } else {
259
- const result = provider === 'openai'
260
- ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains })
261
- : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
262
- answer = result.text;
263
- const collected = new Map<string, WebSearchSource>();
264
- collectSources((result as any).sources, collected);
265
- collectSources((result as any).toolResults, collected);
266
- sources = Array.from(collected.values()).slice(0, limit);
267
- const normalized = normalizeAiSdkUsage({
268
- provider,
269
- model,
270
- usage: (result as any).usage,
271
- providerMetadata: (result as any).providerMetadata,
272
- });
273
- usage = {
274
- input_tokens: normalized.input_tokens,
275
- output_tokens: normalized.output_tokens,
276
- cost_usd: normalized.cost_usd,
277
- };
278
- }
279
-
280
- const filedSources = await fileWebSources(options, sources, now);
281
- const writeDb = openKnowledgeDb(options.dbPath);
282
- try {
283
- writeDb.run(
284
- `UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`,
285
- [
286
- 'completed',
287
- JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
288
- now,
289
- runId,
290
- ],
291
- );
292
- writeDb.run(
293
- `INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
294
- VALUES (?, ?, ?, ?, ?, ?)`,
295
- [
296
- `evt_${randomUUID()}`,
297
- runId,
298
- 'info',
299
- 'provider_web_search_completed',
300
- JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
301
- now,
302
- ],
303
- );
304
- recordProviderUsage(writeDb, {
305
- run_id: runId,
306
- provider,
307
- model,
308
- input_tokens: usage.input_tokens,
309
- output_tokens: usage.output_tokens,
310
- cost_usd: usage.cost_usd,
311
- metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
312
- created_at: now,
313
- });
314
- } finally {
315
- writeDb.close();
316
- }
317
-
318
- if (sources.length === 0) warnings.push('no_web_sources_returned');
319
- return {
320
- run_id: runId,
321
- query,
322
- provider,
323
- model,
324
- answer,
325
- sources,
326
- filed_sources: filedSources,
327
- usage,
328
- warnings,
329
- };
330
- }