@hasna/knowledge 0.2.27 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +41 -0
  2. package/bin/open-knowledge-mcp.js +15 -7
  3. package/bin/open-knowledge.js +17 -17
  4. package/dist/agent.d.ts +35 -0
  5. package/dist/artifact-store.d.ts +63 -0
  6. package/dist/auth.d.ts +35 -0
  7. package/dist/embeddings.d.ts +77 -0
  8. package/dist/index.d.ts +20 -0
  9. package/dist/index.js +5709 -0
  10. package/dist/knowledge-db.d.ts +27 -0
  11. package/dist/manifest-ingest.d.ts +35 -0
  12. package/dist/outbox-consume.d.ts +25 -0
  13. package/dist/provenance.d.ts +50 -0
  14. package/dist/providers.d.ts +89 -0
  15. package/dist/reindex.d.ts +37 -0
  16. package/dist/remote-client.d.ts +108 -0
  17. package/dist/retrieval.d.ts +71 -0
  18. package/dist/safety.d.ts +70 -0
  19. package/dist/sdk.d.ts +72 -0
  20. package/dist/search.d.ts +65 -0
  21. package/dist/service.d.ts +117 -0
  22. package/dist/source-ingest.d.ts +18 -0
  23. package/dist/source-ref.d.ts +30 -0
  24. package/dist/source-resolver.d.ts +92 -0
  25. package/dist/storage-contract.d.ts +106 -0
  26. package/dist/web-search.d.ts +40 -0
  27. package/dist/wiki-compiler.d.ts +67 -0
  28. package/dist/wiki-layout.d.ts +23 -0
  29. package/dist/workspace.d.ts +111 -0
  30. package/package.json +15 -7
  31. package/src/agent.ts +0 -367
  32. package/src/artifact-store.ts +0 -184
  33. package/src/auth.ts +0 -123
  34. package/src/cli.ts +0 -1184
  35. package/src/embeddings.ts +0 -516
  36. package/src/knowledge-db.ts +0 -354
  37. package/src/manifest-ingest.ts +0 -515
  38. package/src/mcp-http.js +0 -110
  39. package/src/mcp.js +0 -1503
  40. package/src/outbox-consume.ts +0 -463
  41. package/src/provenance.ts +0 -93
  42. package/src/providers.ts +0 -308
  43. package/src/reindex.ts +0 -260
  44. package/src/remote-client.ts +0 -268
  45. package/src/retrieval.ts +0 -326
  46. package/src/safety.ts +0 -265
  47. package/src/schema.js +0 -25
  48. package/src/search.ts +0 -510
  49. package/src/service.ts +0 -443
  50. package/src/source-ingest.ts +0 -268
  51. package/src/source-ref.ts +0 -104
  52. package/src/source-resolver.ts +0 -436
  53. package/src/storage-contract.ts +0 -346
  54. package/src/store.ts +0 -113
  55. package/src/web-search.ts +0 -330
  56. package/src/wiki-compiler.ts +0 -711
  57. package/src/wiki-layout.ts +0 -251
  58. package/src/workspace.ts +0 -251
package/src/web-search.ts DELETED
@@ -1,330 +0,0 @@
1
- import { createHash, randomUUID } from 'node:crypto';
2
- import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
- import { ingestOpenFilesManifestItems } from './manifest-ingest';
4
- import {
5
- assertProviderCredentials,
6
- normalizeAiSdkUsage,
7
- parseModelRef,
8
- providerSettings,
9
- recordProviderUsage,
10
- resolveModelRef,
11
- type AiProviderId,
12
- } from './providers';
13
- import { assertWebSearchAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
14
- import type { KnowledgeConfig } from './workspace';
15
-
16
- export interface WebSearchOptions {
17
- dbPath: string;
18
- query: string;
19
- config?: KnowledgeConfig;
20
- safetyPolicy?: SafetyPolicy;
21
- modelRef?: string;
22
- provider?: AiProviderId;
23
- limit?: number;
24
- maxUses?: number;
25
- domains?: string[];
26
- fake?: boolean;
27
- fileResults?: boolean;
28
- env?: Record<string, string | undefined>;
29
- now?: Date;
30
- }
31
-
32
- export interface WebSearchSource {
33
- url: string;
34
- title: string | null;
35
- snippet: string | null;
36
- provider_metadata: Record<string, unknown>;
37
- }
38
-
39
- export interface WebSearchResult {
40
- run_id: string;
41
- query: string;
42
- provider: string;
43
- model: string;
44
- answer: string;
45
- sources: WebSearchSource[];
46
- filed_sources: number;
47
- usage: {
48
- input_tokens: number;
49
- output_tokens: number;
50
- cost_usd: number;
51
- };
52
- warnings: string[];
53
- }
54
-
55
- function stableHash(value: string): string {
56
- return `sha256:${createHash('sha256').update(value).digest('hex')}`;
57
- }
58
-
59
- function estimateTokens(text: string): number {
60
- const words = text.trim().split(/\s+/).filter(Boolean).length;
61
- return Math.max(1, Math.ceil(words * 1.25));
62
- }
63
-
64
- function asRecord(value: unknown): Record<string, unknown> {
65
- return value && typeof value === 'object' && !Array.isArray(value) ? value as Record<string, unknown> : {};
66
- }
67
-
68
- function asString(value: unknown): string | null {
69
- return typeof value === 'string' && value.length > 0 ? value : null;
70
- }
71
-
72
- function sourceFromRecord(value: unknown): WebSearchSource | null {
73
- const record = asRecord(value);
74
- const url = asString(record.url) ?? asString(record.uri) ?? asString(record.sourceUrl);
75
- if (!url) return null;
76
- return {
77
- url,
78
- title: asString(record.title) ?? asString(record.name),
79
- snippet: asString(record.snippet) ?? asString(record.text) ?? asString(record.description),
80
- provider_metadata: record,
81
- };
82
- }
83
-
84
- function collectSources(value: unknown, output: Map<string, WebSearchSource>): void {
85
- if (Array.isArray(value)) {
86
- for (const entry of value) collectSources(entry, output);
87
- return;
88
- }
89
- const source = sourceFromRecord(value);
90
- if (source) output.set(source.url, source);
91
- const record = asRecord(value);
92
- for (const key of ['sources', 'results', 'citations', 'annotations', 'output']) {
93
- if (record[key]) collectSources(record[key], output);
94
- }
95
- }
96
-
97
- function fakeSources(query: string, limit: number): WebSearchSource[] {
98
- return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
99
- url: `https://example.com/knowledge-web-${index + 1}`,
100
- title: `Fake web source ${index + 1}`,
101
- snippet: `Deterministic web-search fixture for "${query}"`,
102
- provider_metadata: { fake: true, rank: index + 1 },
103
- }));
104
- }
105
-
106
- async function openAiWebSearch(input: {
107
- query: string;
108
- model: string;
109
- config?: KnowledgeConfig;
110
- env: Record<string, string | undefined>;
111
- maxUses: number;
112
- domains: string[];
113
- }) {
114
- const { generateText } = await import('ai');
115
- const { createOpenAI } = await import('@ai-sdk/openai');
116
- const settings = providerSettings(input.config, 'openai');
117
- const openai = createOpenAI({
118
- apiKey: input.env[settings.api_key_env],
119
- baseURL: settings.base_url,
120
- }) as any;
121
- const webSearch = openai.tools?.webSearch;
122
- if (!webSearch) throw new Error('OpenAI provider does not expose tools.webSearch.');
123
- return generateText({
124
- model: openai(input.model),
125
- prompt: input.query,
126
- tools: {
127
- web_search: webSearch({
128
- externalWebAccess: true,
129
- searchContextSize: 'medium',
130
- ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
131
- }),
132
- },
133
- toolChoice: { type: 'tool', toolName: 'web_search' },
134
- });
135
- }
136
-
137
- async function anthropicWebSearch(input: {
138
- query: string;
139
- model: string;
140
- config?: KnowledgeConfig;
141
- env: Record<string, string | undefined>;
142
- maxUses: number;
143
- domains: string[];
144
- }) {
145
- const { generateText } = await import('ai');
146
- const { createAnthropic } = await import('@ai-sdk/anthropic');
147
- const settings = providerSettings(input.config, 'anthropic');
148
- const anthropic = createAnthropic({
149
- apiKey: input.env[settings.api_key_env],
150
- baseURL: settings.base_url,
151
- }) as any;
152
- const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
153
- if (!factory) throw new Error('Anthropic provider does not expose a web search tool.');
154
- return generateText({
155
- model: anthropic(input.model),
156
- prompt: input.query,
157
- tools: {
158
- web_search: factory({
159
- maxUses: input.maxUses,
160
- ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
161
- }),
162
- },
163
- });
164
- }
165
-
166
- async function fileWebSources(options: WebSearchOptions, sources: WebSearchSource[], now: string): Promise<number> {
167
- if (!options.fileResults || sources.length === 0) return 0;
168
- const items = sources.map((source) => {
169
- const text = [source.title, source.snippet, source.url].filter(Boolean).join('\n');
170
- const hash = stableHash(text);
171
- return {
172
- source_ref: source.url,
173
- name: source.title ?? source.url,
174
- url: source.url,
175
- mime: 'text/plain',
176
- hash,
177
- revision: hash,
178
- status: 'active',
179
- updated_at: now,
180
- permissions: { mode: 'read_only', allowed_purposes: ['knowledge_answer', 'knowledge_index'] },
181
- metadata: {
182
- source_ref: source.url,
183
- content_source: 'provider_web_search',
184
- provider_metadata: source.provider_metadata,
185
- },
186
- extracted_text: text,
187
- };
188
- });
189
- const result = await ingestOpenFilesManifestItems({
190
- dbPath: options.dbPath,
191
- items,
192
- sourceLabel: `web-search:${options.query}`,
193
- readAction: 'provider_web_search_file_results',
194
- safetyPolicy: options.safetyPolicy,
195
- now: new Date(now),
196
- });
197
- return result.sources_upserted;
198
- }
199
-
200
- export async function runProviderWebSearch(options: WebSearchOptions): Promise<WebSearchResult> {
201
- const query = options.query.trim();
202
- if (!query) throw new Error('Web search query is required.');
203
- const env = options.env ?? process.env;
204
- const now = (options.now ?? new Date()).toISOString();
205
- const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
206
- const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
207
- const domains = options.domains ?? [];
208
- const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : 'default'), options.config);
209
- const parsed = parseModelRef(modelRef);
210
- const provider = options.provider ?? parsed.provider;
211
- const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
212
- const runId = `run_${randomUUID()}`;
213
-
214
- if (!options.fake && options.safetyPolicy) assertWebSearchAllowed(options.safetyPolicy);
215
- if (!options.fake && provider !== 'openai' && provider !== 'anthropic') {
216
- throw new Error(`Provider ${provider} does not expose native web search yet.`);
217
- }
218
- if (!options.fake) assertProviderCredentials(provider, options.config, env);
219
-
220
- migrateKnowledgeDb(options.dbPath);
221
- const db = openKnowledgeDb(options.dbPath);
222
- try {
223
- db.run(
224
- `INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
225
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
226
- [
227
- runId,
228
- 'provider-web-search',
229
- query,
230
- 'running',
231
- provider,
232
- model,
233
- JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
234
- now,
235
- now,
236
- ],
237
- );
238
- recordAuditEvent(db, {
239
- event_type: 'source_read',
240
- action: options.fake ? 'fake_provider_web_search' : 'provider_web_search',
241
- target_uri: query,
242
- decision: 'allow',
243
- metadata: { provider, model, domains, max_uses: maxUses },
244
- created_at: now,
245
- });
246
- } finally {
247
- db.close();
248
- }
249
-
250
- let answer = '';
251
- let sources: WebSearchSource[] = [];
252
- let usage = { input_tokens: estimateTokens(query), output_tokens: 0, cost_usd: 0 };
253
- const warnings: string[] = [];
254
- if (options.fake) {
255
- sources = fakeSources(query, limit);
256
- answer = `Fake web search answer for: ${query}`;
257
- usage.output_tokens = estimateTokens(answer);
258
- } else {
259
- const result = provider === 'openai'
260
- ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains })
261
- : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
262
- answer = result.text;
263
- const collected = new Map<string, WebSearchSource>();
264
- collectSources((result as any).sources, collected);
265
- collectSources((result as any).toolResults, collected);
266
- sources = Array.from(collected.values()).slice(0, limit);
267
- const normalized = normalizeAiSdkUsage({
268
- provider,
269
- model,
270
- usage: (result as any).usage,
271
- providerMetadata: (result as any).providerMetadata,
272
- });
273
- usage = {
274
- input_tokens: normalized.input_tokens,
275
- output_tokens: normalized.output_tokens,
276
- cost_usd: normalized.cost_usd,
277
- };
278
- }
279
-
280
- const filedSources = await fileWebSources(options, sources, now);
281
- const writeDb = openKnowledgeDb(options.dbPath);
282
- try {
283
- writeDb.run(
284
- `UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`,
285
- [
286
- 'completed',
287
- JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
288
- now,
289
- runId,
290
- ],
291
- );
292
- writeDb.run(
293
- `INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
294
- VALUES (?, ?, ?, ?, ?, ?)`,
295
- [
296
- `evt_${randomUUID()}`,
297
- runId,
298
- 'info',
299
- 'provider_web_search_completed',
300
- JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
301
- now,
302
- ],
303
- );
304
- recordProviderUsage(writeDb, {
305
- run_id: runId,
306
- provider,
307
- model,
308
- input_tokens: usage.input_tokens,
309
- output_tokens: usage.output_tokens,
310
- cost_usd: usage.cost_usd,
311
- metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
312
- created_at: now,
313
- });
314
- } finally {
315
- writeDb.close();
316
- }
317
-
318
- if (sources.length === 0) warnings.push('no_web_sources_returned');
319
- return {
320
- run_id: runId,
321
- query,
322
- provider,
323
- model,
324
- answer,
325
- sources,
326
- filed_sources: filedSources,
327
- usage,
328
- warnings,
329
- };
330
- }