@hasna/knowledge 0.2.17 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -255,6 +255,13 @@ returns a local citation draft by default, optionally calls AI SDK generation vi
255
255
  `--generate`, records `runs`, `run_events`, and `provider_usage`, and only
256
256
  proposes durable wiki updates until the wiki compile/write task owns writes.
257
257
 
258
+ Provider-native web search is exposed separately as
259
+ `open-knowledge web search <query>` and MCP `ok_web_search`. Real network access
260
+ is safety-gated; OpenAI and Anthropic use provider web-search tools through AI
261
+ SDK, while DeepSeek remains a future fallback/external-search path. Returned web
262
+ snippets can optionally be filed as read-only `web` source refs for later local
263
+ search and citation.
264
+
258
265
  ## Provider Registry
259
266
 
260
267
  AI provider setup is BYOK and AI SDK v6 based. The local provider layer tracks:
@@ -142,6 +142,11 @@ source refs, artifact URIs, revision/hash metadata, offsets, and provenance.
142
142
  and MCP `knowledge_ask` wrap this context pack in a run ledger and return a
143
143
  citation draft or explicit AI SDK generated answer.
144
144
 
145
+ Provider-native web search lives beside local retrieval. `open-knowledge web
146
+ search` and MCP `ok_web_search` are safety-gated, capture provider sources, and
147
+ can file snippets as read-only `web` source refs so later local retrieval treats
148
+ them like other cited sources.
149
+
145
150
  ## Reindexing
146
151
 
147
152
  Reindexing is driven by source revisions:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.17",
3
+ "version": "0.2.18",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -54,6 +54,9 @@ interface Flags {
54
54
  context?: boolean;
55
55
  generate?: boolean;
56
56
  approveWrite?: boolean;
57
+ provider?: string;
58
+ domain?: string[];
59
+ fileResults?: boolean;
57
60
  noColor?: boolean;
58
61
  scope?: string;
59
62
  olderThan?: number;
@@ -68,7 +71,7 @@ interface ParseResult {
68
71
  flags: Flags;
69
72
  }
70
73
 
71
- const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'search', 'ask', 'build', 'embeddings', 'providers', 'safety', 'help'];
74
+ const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'search', 'web', 'ask', 'build', 'embeddings', 'providers', 'safety', 'help'];
72
75
  const COMMAND_ALIASES: Record<string, string> = {
73
76
  ls: 'list',
74
77
  rm: 'delete',
@@ -111,6 +114,9 @@ function parseArgs(argv: string[]): ParseResult {
111
114
  case '--context': flags.context = true; break;
112
115
  case '--generate': flags.generate = true; break;
113
116
  case '--approve-write': flags.approveWrite = true; break;
117
+ case '--provider': flags.provider = argv[i + 1]; i += 1; break;
118
+ case '--domain': flags.domain = [...(flags.domain ?? []), argv[i + 1]]; i += 1; break;
119
+ case '--file-results': flags.fileResults = true; break;
114
120
  case '--fake': flags.fake = true; break;
115
121
  case '--no-color': flags.noColor = true; break;
116
122
  case '--scope': flags.scope = argv[i + 1]; i += 1; break;
@@ -190,6 +196,7 @@ Commands:
190
196
  ingest source <source-ref> Ingest a read-only source ref into knowledge.db
191
197
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
192
198
  search <query> Hybrid search sources, wiki pages, indexes, or context
199
+ web search <query> Provider-native web search with citations
193
200
  ask|build <prompt> Build a read-only citation answer/context pack
194
201
  embeddings status|index|search Build/query local vector embeddings
195
202
  providers status|models|check Inspect AI SDK provider config and credentials
@@ -206,6 +213,9 @@ Global Options:
206
213
  --context Return a reranked citation context pack for search
207
214
  --generate Call AI SDK text generation for ask/build
208
215
  --approve-write Record approval intent for future durable wiki writes
216
+ --provider <name> Provider override for web search
217
+ --domain <domain> Restrict provider web search to a domain
218
+ --file-results File web snippets as web source refs
209
219
  --fake Use deterministic fake embeddings for local tests
210
220
  --scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
211
221
  --no-color Disable color output
@@ -268,6 +278,7 @@ function printCommandHelp(command: string): void {
268
278
  if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
269
279
  if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
270
280
  if (command === 'search') { console.log('Usage: open-knowledge search <query> [--context] [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
281
+ if (command === 'web') { console.log('Usage: open-knowledge web search <query> [--provider openai|anthropic] [--model provider:model] [--domain <domain>] [--file-results] [--fake] [--scope local|global|project] [--json]'); return; }
271
282
  if (command === 'ask' || command === 'build' || command === 'knowledge') { console.log('Usage: open-knowledge ask|build <prompt> [--generate] [--semantic] [--model default|provider:model] [--approve-write] [--scope local|global|project] [--json]'); return; }
272
283
  if (command === 'embeddings') { console.log('Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
273
284
  if (command === 'providers') { console.log('Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]'); return; }
@@ -316,11 +327,11 @@ async function run(argv: string[]): Promise<void> {
316
327
  if (flags.completions) {
317
328
  const shell = flags.completions;
318
329
  if (shell === 'bash') {
319
- console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search ask build embeddings providers safety help ls rm edit unarchive knowledge --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --context --generate --approve-write --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
330
+ console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search web ask build embeddings providers safety help ls rm edit unarchive knowledge --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --context --generate --approve-write --provider --domain --file-results --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
320
331
  } else if (shell === 'zsh') {
321
- console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search ask build embeddings providers safety help ls rm edit unarchive knowledge)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--context)--context" "(--generate)--generate" "(--approve-write)--approve-write" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
332
+ console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search web ask build embeddings providers safety help ls rm edit unarchive knowledge)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--context)--context" "(--generate)--generate" "(--approve-write)--approve-write" "(--file-results)--file-results" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--provider)--provider[provider]:" "(--domain)--domain[domain]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
322
333
  } else if (shell === 'fish') {
323
- console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search ask build embeddings providers safety help ls rm edit unarchive knowledge"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l context; complete -c open-knowledge -l generate; complete -c open-knowledge -l approve-write; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
334
+ console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search web ask build embeddings providers safety help ls rm edit unarchive knowledge"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l context; complete -c open-knowledge -l generate; complete -c open-knowledge -l approve-write; complete -c open-knowledge -l provider; complete -c open-knowledge -l domain; complete -c open-knowledge -l file-results; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
324
335
  } else {
325
336
  throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
326
337
  }
@@ -632,6 +643,24 @@ async function run(argv: string[]): Promise<void> {
632
643
  return;
633
644
  }
634
645
 
646
+ if (command === 'web') {
647
+ const action = positional[1] ?? 'search';
648
+ if (action !== 'search') throw new Error("Invalid web action. Use 'search'.");
649
+ const query = positional.slice(2).join(' ');
650
+ if (!query) throw new Error('Usage: open-knowledge web search <query>');
651
+ const result = await service.webSearch({
652
+ query,
653
+ limit: flags.limit,
654
+ modelRef: flags.model,
655
+ provider: flags.provider as AiProviderId | undefined,
656
+ domains: flags.domain,
657
+ fake: flags.fake,
658
+ fileResults: flags.fileResults,
659
+ });
660
+ output({ ok: true, ...result, message: `${result.sources.length} web source(s)` }, flags.json);
661
+ return;
662
+ }
663
+
635
664
  if (command === 'ask' || command === 'build') {
636
665
  const prompt = positional.slice(commandArgOffset).join(' ');
637
666
  if (!prompt) throw new Error('Usage: open-knowledge ask <prompt>');
package/src/mcp.js CHANGED
@@ -222,6 +222,24 @@ export function buildServer() {
222
222
  }
223
223
  });
224
224
 
225
+ registerTool(server, 'ok_web_search', 'Provider web search', 'Run safety-gated provider-native web search and return citations/sources', {
226
+ scope: scopeField,
227
+ query: z.string().describe('Web search query'),
228
+ limit: z.number().optional().describe('Maximum sources'),
229
+ provider: z.enum(['openai', 'anthropic', 'deepseek']).optional().describe('Provider override'),
230
+ model: z.string().optional().describe('Model alias/ref'),
231
+ domains: z.array(z.string()).optional().describe('Allowed domains'),
232
+ fake: z.boolean().optional().describe('Use deterministic fake web results'),
233
+ file_results: z.boolean().optional().describe('File web snippets as web source refs'),
234
+ }, async ({ scope, query, limit, provider, model, domains, fake, file_results }) => {
235
+ const service = createKnowledgeService({ scope });
236
+ try {
237
+ return jsonText({ ok: true, ...await service.webSearch({ query, limit, provider, modelRef: model, domains, fake, fileResults: file_results }) });
238
+ } catch (error) {
239
+ return errorText(error instanceof Error ? error.message : String(error));
240
+ }
241
+ });
242
+
225
243
  registerTool(server, 'ok_add', 'Add a knowledge item', 'Add a new item to the knowledge store', {
226
244
  title: z.string().describe('Item title'),
227
245
  content: z.string().describe('Item content/body'),
package/src/service.ts CHANGED
@@ -16,6 +16,7 @@ import { providerStatus, listModelRegistry, type ProviderStatusResult, type Mode
16
16
  import { retrieveKnowledgeContext, type RetrievalOptions } from './retrieval';
17
17
  import { hybridSearch, type HybridSearchOptions } from './search';
18
18
  import { resolveSafetyPolicy } from './safety';
19
+ import { runProviderWebSearch, type WebSearchOptions } from './web-search';
19
20
  import {
20
21
  recordStorageObjects,
21
22
  resolveStorageContract,
@@ -244,6 +245,16 @@ export class KnowledgeService {
244
245
  config: this.config(),
245
246
  });
246
247
  }
248
+
249
+ async webSearch(options: Omit<WebSearchOptions, 'dbPath' | 'config' | 'safetyPolicy'>) {
250
+ const workspace = this.ensureWorkspace();
251
+ return runProviderWebSearch({
252
+ ...options,
253
+ dbPath: workspace.knowledgeDbPath,
254
+ config: this.config(),
255
+ safetyPolicy: this.safetyPolicy(),
256
+ });
257
+ }
247
258
  }
248
259
 
249
260
  export function createKnowledgeService(options: KnowledgeServiceOptions = {}): KnowledgeService {
@@ -0,0 +1,330 @@
1
+ import { createHash, randomUUID } from 'node:crypto';
2
+ import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
+ import { ingestOpenFilesManifestItems } from './manifest-ingest';
4
+ import {
5
+ assertProviderCredentials,
6
+ normalizeAiSdkUsage,
7
+ parseModelRef,
8
+ providerSettings,
9
+ recordProviderUsage,
10
+ resolveModelRef,
11
+ type AiProviderId,
12
+ } from './providers';
13
+ import { assertWebSearchAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
14
+ import type { KnowledgeConfig } from './workspace';
15
+
16
+ export interface WebSearchOptions {
17
+ dbPath: string;
18
+ query: string;
19
+ config?: KnowledgeConfig;
20
+ safetyPolicy?: SafetyPolicy;
21
+ modelRef?: string;
22
+ provider?: AiProviderId;
23
+ limit?: number;
24
+ maxUses?: number;
25
+ domains?: string[];
26
+ fake?: boolean;
27
+ fileResults?: boolean;
28
+ env?: Record<string, string | undefined>;
29
+ now?: Date;
30
+ }
31
+
32
+ export interface WebSearchSource {
33
+ url: string;
34
+ title: string | null;
35
+ snippet: string | null;
36
+ provider_metadata: Record<string, unknown>;
37
+ }
38
+
39
+ export interface WebSearchResult {
40
+ run_id: string;
41
+ query: string;
42
+ provider: string;
43
+ model: string;
44
+ answer: string;
45
+ sources: WebSearchSource[];
46
+ filed_sources: number;
47
+ usage: {
48
+ input_tokens: number;
49
+ output_tokens: number;
50
+ cost_usd: number;
51
+ };
52
+ warnings: string[];
53
+ }
54
+
55
+ function stableHash(value: string): string {
56
+ return `sha256:${createHash('sha256').update(value).digest('hex')}`;
57
+ }
58
+
59
+ function estimateTokens(text: string): number {
60
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
61
+ return Math.max(1, Math.ceil(words * 1.25));
62
+ }
63
+
64
+ function asRecord(value: unknown): Record<string, unknown> {
65
+ return value && typeof value === 'object' && !Array.isArray(value) ? value as Record<string, unknown> : {};
66
+ }
67
+
68
+ function asString(value: unknown): string | null {
69
+ return typeof value === 'string' && value.length > 0 ? value : null;
70
+ }
71
+
72
+ function sourceFromRecord(value: unknown): WebSearchSource | null {
73
+ const record = asRecord(value);
74
+ const url = asString(record.url) ?? asString(record.uri) ?? asString(record.sourceUrl);
75
+ if (!url) return null;
76
+ return {
77
+ url,
78
+ title: asString(record.title) ?? asString(record.name),
79
+ snippet: asString(record.snippet) ?? asString(record.text) ?? asString(record.description),
80
+ provider_metadata: record,
81
+ };
82
+ }
83
+
84
+ function collectSources(value: unknown, output: Map<string, WebSearchSource>): void {
85
+ if (Array.isArray(value)) {
86
+ for (const entry of value) collectSources(entry, output);
87
+ return;
88
+ }
89
+ const source = sourceFromRecord(value);
90
+ if (source) output.set(source.url, source);
91
+ const record = asRecord(value);
92
+ for (const key of ['sources', 'results', 'citations', 'annotations', 'output']) {
93
+ if (record[key]) collectSources(record[key], output);
94
+ }
95
+ }
96
+
97
+ function fakeSources(query: string, limit: number): WebSearchSource[] {
98
+ return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
99
+ url: `https://example.com/knowledge-web-${index + 1}`,
100
+ title: `Fake web source ${index + 1}`,
101
+ snippet: `Deterministic web-search fixture for "${query}"`,
102
+ provider_metadata: { fake: true, rank: index + 1 },
103
+ }));
104
+ }
105
+
106
+ async function openAiWebSearch(input: {
107
+ query: string;
108
+ model: string;
109
+ config?: KnowledgeConfig;
110
+ env: Record<string, string | undefined>;
111
+ maxUses: number;
112
+ domains: string[];
113
+ }) {
114
+ const { generateText } = await import('ai');
115
+ const { createOpenAI } = await import('@ai-sdk/openai');
116
+ const settings = providerSettings(input.config, 'openai');
117
+ const openai = createOpenAI({
118
+ apiKey: input.env[settings.api_key_env],
119
+ baseURL: settings.base_url,
120
+ }) as any;
121
+ const webSearch = openai.tools?.webSearch;
122
+ if (!webSearch) throw new Error('OpenAI provider does not expose tools.webSearch.');
123
+ return generateText({
124
+ model: openai(input.model),
125
+ prompt: input.query,
126
+ tools: {
127
+ web_search: webSearch({
128
+ externalWebAccess: true,
129
+ searchContextSize: 'medium',
130
+ ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
131
+ }),
132
+ },
133
+ toolChoice: { type: 'tool', toolName: 'web_search' },
134
+ });
135
+ }
136
+
137
+ async function anthropicWebSearch(input: {
138
+ query: string;
139
+ model: string;
140
+ config?: KnowledgeConfig;
141
+ env: Record<string, string | undefined>;
142
+ maxUses: number;
143
+ domains: string[];
144
+ }) {
145
+ const { generateText } = await import('ai');
146
+ const { createAnthropic } = await import('@ai-sdk/anthropic');
147
+ const settings = providerSettings(input.config, 'anthropic');
148
+ const anthropic = createAnthropic({
149
+ apiKey: input.env[settings.api_key_env],
150
+ baseURL: settings.base_url,
151
+ }) as any;
152
+ const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
153
+ if (!factory) throw new Error('Anthropic provider does not expose a web search tool.');
154
+ return generateText({
155
+ model: anthropic(input.model),
156
+ prompt: input.query,
157
+ tools: {
158
+ web_search: factory({
159
+ maxUses: input.maxUses,
160
+ ...(input.domains.length > 0 ? { allowedDomains: input.domains } : {}),
161
+ }),
162
+ },
163
+ });
164
+ }
165
+
166
+ async function fileWebSources(options: WebSearchOptions, sources: WebSearchSource[], now: string): Promise<number> {
167
+ if (!options.fileResults || sources.length === 0) return 0;
168
+ const items = sources.map((source) => {
169
+ const text = [source.title, source.snippet, source.url].filter(Boolean).join('\n');
170
+ const hash = stableHash(text);
171
+ return {
172
+ source_ref: source.url,
173
+ name: source.title ?? source.url,
174
+ url: source.url,
175
+ mime: 'text/plain',
176
+ hash,
177
+ revision: hash,
178
+ status: 'active',
179
+ updated_at: now,
180
+ permissions: { mode: 'read_only', allowed_purposes: ['knowledge_answer', 'knowledge_index'] },
181
+ metadata: {
182
+ source_ref: source.url,
183
+ content_source: 'provider_web_search',
184
+ provider_metadata: source.provider_metadata,
185
+ },
186
+ extracted_text: text,
187
+ };
188
+ });
189
+ const result = await ingestOpenFilesManifestItems({
190
+ dbPath: options.dbPath,
191
+ items,
192
+ sourceLabel: `web-search:${options.query}`,
193
+ readAction: 'provider_web_search_file_results',
194
+ safetyPolicy: options.safetyPolicy,
195
+ now: new Date(now),
196
+ });
197
+ return result.sources_upserted;
198
+ }
199
+
200
+ export async function runProviderWebSearch(options: WebSearchOptions): Promise<WebSearchResult> {
201
+ const query = options.query.trim();
202
+ if (!query) throw new Error('Web search query is required.');
203
+ const env = options.env ?? process.env;
204
+ const now = (options.now ?? new Date()).toISOString();
205
+ const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
206
+ const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
207
+ const domains = options.domains ?? [];
208
+ const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : 'default'), options.config);
209
+ const parsed = parseModelRef(modelRef);
210
+ const provider = options.provider ?? parsed.provider;
211
+ const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
212
+ const runId = `run_${randomUUID()}`;
213
+
214
+ if (!options.fake && options.safetyPolicy) assertWebSearchAllowed(options.safetyPolicy);
215
+ if (!options.fake && provider !== 'openai' && provider !== 'anthropic') {
216
+ throw new Error(`Provider ${provider} does not expose native web search yet.`);
217
+ }
218
+ if (!options.fake) assertProviderCredentials(provider, options.config, env);
219
+
220
+ migrateKnowledgeDb(options.dbPath);
221
+ const db = openKnowledgeDb(options.dbPath);
222
+ try {
223
+ db.run(
224
+ `INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
225
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
226
+ [
227
+ runId,
228
+ 'provider-web-search',
229
+ query,
230
+ 'running',
231
+ provider,
232
+ model,
233
+ JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
234
+ now,
235
+ now,
236
+ ],
237
+ );
238
+ recordAuditEvent(db, {
239
+ event_type: 'source_read',
240
+ action: options.fake ? 'fake_provider_web_search' : 'provider_web_search',
241
+ target_uri: query,
242
+ decision: 'allow',
243
+ metadata: { provider, model, domains, max_uses: maxUses },
244
+ created_at: now,
245
+ });
246
+ } finally {
247
+ db.close();
248
+ }
249
+
250
+ let answer = '';
251
+ let sources: WebSearchSource[] = [];
252
+ let usage = { input_tokens: estimateTokens(query), output_tokens: 0, cost_usd: 0 };
253
+ const warnings: string[] = [];
254
+ if (options.fake) {
255
+ sources = fakeSources(query, limit);
256
+ answer = `Fake web search answer for: ${query}`;
257
+ usage.output_tokens = estimateTokens(answer);
258
+ } else {
259
+ const result = provider === 'openai'
260
+ ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains })
261
+ : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
262
+ answer = result.text;
263
+ const collected = new Map<string, WebSearchSource>();
264
+ collectSources((result as any).sources, collected);
265
+ collectSources((result as any).toolResults, collected);
266
+ sources = Array.from(collected.values()).slice(0, limit);
267
+ const normalized = normalizeAiSdkUsage({
268
+ provider,
269
+ model,
270
+ usage: (result as any).usage,
271
+ providerMetadata: (result as any).providerMetadata,
272
+ });
273
+ usage = {
274
+ input_tokens: normalized.input_tokens,
275
+ output_tokens: normalized.output_tokens,
276
+ cost_usd: normalized.cost_usd,
277
+ };
278
+ }
279
+
280
+ const filedSources = await fileWebSources(options, sources, now);
281
+ const writeDb = openKnowledgeDb(options.dbPath);
282
+ try {
283
+ writeDb.run(
284
+ `UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`,
285
+ [
286
+ 'completed',
287
+ JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
288
+ now,
289
+ runId,
290
+ ],
291
+ );
292
+ writeDb.run(
293
+ `INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
294
+ VALUES (?, ?, ?, ?, ?, ?)`,
295
+ [
296
+ `evt_${randomUUID()}`,
297
+ runId,
298
+ 'info',
299
+ 'provider_web_search_completed',
300
+ JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
301
+ now,
302
+ ],
303
+ );
304
+ recordProviderUsage(writeDb, {
305
+ run_id: runId,
306
+ provider,
307
+ model,
308
+ input_tokens: usage.input_tokens,
309
+ output_tokens: usage.output_tokens,
310
+ cost_usd: usage.cost_usd,
311
+ metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
312
+ created_at: now,
313
+ });
314
+ } finally {
315
+ writeDb.close();
316
+ }
317
+
318
+ if (sources.length === 0) warnings.push('no_web_sources_returned');
319
+ return {
320
+ run_id: runId,
321
+ query,
322
+ provider,
323
+ model,
324
+ answer,
325
+ sources,
326
+ filed_sources: filedSources,
327
+ usage,
328
+ warnings,
329
+ };
330
+ }