@hasna/knowledge 0.2.17 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -102,6 +102,9 @@ open-knowledge search "company wiki policy" --scope project --context --json
102
102
  # Build a citation answer/context draft for a prompt
103
103
  open-knowledge ask "How do we cite handbook policy?" --scope project --json
104
104
  knowledge "How do we cite handbook policy?" --scope project --json
105
+
106
+ # Provider-native web search, safety-gated for real network access
107
+ HASNA_KNOWLEDGE_WEB_SEARCH=1 open-knowledge web search "latest AI SDK web search" --provider openai --json
105
108
  ```
106
109
 
107
110
  ## Commands
@@ -276,6 +279,18 @@ pack, returns a local citation draft by default, records a run ledger in
276
279
  the flow deterministic for local tests. `--approve-write` records approval
277
280
  intent, but durable wiki writes remain deferred to the wiki compile/write task.
278
281
 
282
+ ### web
283
+ ```bash
284
+ open-knowledge web search <query> [--provider openai|anthropic] [--model provider:model] [--domain <domain>] [--file-results] [--scope project] [--json]
285
+ ```
286
+ Run provider-native hosted web search and return cited web sources. Real network
287
+ search is disabled unless `safety.network.web_search_enabled=true` or
288
+ `HASNA_KNOWLEDGE_WEB_SEARCH=1` is set. OpenAI uses the AI SDK OpenAI
289
+ `tools.webSearch` path; Anthropic uses its provider web-search tool when
290
+ available. `--file-results` stores returned snippets as read-only `web` source
291
+ refs in `knowledge.db` so later local search can cite them. `--fake` returns
292
+ deterministic offline sources for tests.
293
+
279
294
  ### safety
280
295
  ```bash
281
296
  open-knowledge safety status [--scope project] [--json]
@@ -358,7 +373,8 @@ The MCP server exposes item tools (`ok_add`, `ok_list`, `ok_get`, `ok_update`,
358
373
  parsing/resolution (`ok_parse_source_ref`, `ok_resolve_source`). The
359
374
  `knowledge_search` MCP tool returns reranked citation context packs for agent
360
375
  prompts, and `knowledge_ask` runs the same prompt flow exposed by
361
- `open-knowledge ask`.
376
+ `open-knowledge ask`. `ok_web_search` exposes safety-gated provider web search
377
+ to MCP clients.
362
378
 
363
379
  ## Source And Artifact Boundary
364
380
 
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
13660
13660
  // package.json
13661
13661
  var package_default = {
13662
13662
  name: "@hasna/knowledge",
13663
- version: "0.2.17",
13663
+ version: "0.2.18",
13664
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13665
13665
  type: "module",
13666
13666
  bin: {
@@ -17348,8 +17348,259 @@ async function ingestSourceRef(options) {
17348
17348
  };
17349
17349
  }
17350
17350
 
17351
- // src/storage-contract.ts
17351
+ // src/web-search.ts
17352
17352
  import { createHash as createHash7, randomUUID as randomUUID6 } from "crypto";
17353
+ function stableHash(value) {
17354
+ return `sha256:${createHash7("sha256").update(value).digest("hex")}`;
17355
+ }
17356
+ function estimateTokens2(text) {
17357
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
17358
+ return Math.max(1, Math.ceil(words * 1.25));
17359
+ }
17360
+ function asRecord(value) {
17361
+ return value && typeof value === "object" && !Array.isArray(value) ? value : {};
17362
+ }
17363
+ function asString3(value) {
17364
+ return typeof value === "string" && value.length > 0 ? value : null;
17365
+ }
17366
+ function sourceFromRecord(value) {
17367
+ const record2 = asRecord(value);
17368
+ const url2 = asString3(record2.url) ?? asString3(record2.uri) ?? asString3(record2.sourceUrl);
17369
+ if (!url2)
17370
+ return null;
17371
+ return {
17372
+ url: url2,
17373
+ title: asString3(record2.title) ?? asString3(record2.name),
17374
+ snippet: asString3(record2.snippet) ?? asString3(record2.text) ?? asString3(record2.description),
17375
+ provider_metadata: record2
17376
+ };
17377
+ }
17378
+ function collectSources(value, output) {
17379
+ if (Array.isArray(value)) {
17380
+ for (const entry of value)
17381
+ collectSources(entry, output);
17382
+ return;
17383
+ }
17384
+ const source = sourceFromRecord(value);
17385
+ if (source)
17386
+ output.set(source.url, source);
17387
+ const record2 = asRecord(value);
17388
+ for (const key of ["sources", "results", "citations", "annotations", "output"]) {
17389
+ if (record2[key])
17390
+ collectSources(record2[key], output);
17391
+ }
17392
+ }
17393
+ function fakeSources(query, limit) {
17394
+ return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
17395
+ url: `https://example.com/knowledge-web-${index + 1}`,
17396
+ title: `Fake web source ${index + 1}`,
17397
+ snippet: `Deterministic web-search fixture for "${query}"`,
17398
+ provider_metadata: { fake: true, rank: index + 1 }
17399
+ }));
17400
+ }
17401
+ async function openAiWebSearch(input) {
17402
+ const { generateText } = await import("ai");
17403
+ const { createOpenAI } = await import("@ai-sdk/openai");
17404
+ const settings = providerSettings(input.config, "openai");
17405
+ const openai = createOpenAI({
17406
+ apiKey: input.env[settings.api_key_env],
17407
+ baseURL: settings.base_url
17408
+ });
17409
+ const webSearch = openai.tools?.webSearch;
17410
+ if (!webSearch)
17411
+ throw new Error("OpenAI provider does not expose tools.webSearch.");
17412
+ return generateText({
17413
+ model: openai(input.model),
17414
+ prompt: input.query,
17415
+ tools: {
17416
+ web_search: webSearch({
17417
+ externalWebAccess: true,
17418
+ searchContextSize: "medium",
17419
+ ...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
17420
+ })
17421
+ },
17422
+ toolChoice: { type: "tool", toolName: "web_search" }
17423
+ });
17424
+ }
17425
+ async function anthropicWebSearch(input) {
17426
+ const { generateText } = await import("ai");
17427
+ const { createAnthropic } = await import("@ai-sdk/anthropic");
17428
+ const settings = providerSettings(input.config, "anthropic");
17429
+ const anthropic = createAnthropic({
17430
+ apiKey: input.env[settings.api_key_env],
17431
+ baseURL: settings.base_url
17432
+ });
17433
+ const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
17434
+ if (!factory)
17435
+ throw new Error("Anthropic provider does not expose a web search tool.");
17436
+ return generateText({
17437
+ model: anthropic(input.model),
17438
+ prompt: input.query,
17439
+ tools: {
17440
+ web_search: factory({
17441
+ maxUses: input.maxUses,
17442
+ ...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
17443
+ })
17444
+ }
17445
+ });
17446
+ }
17447
+ async function fileWebSources(options, sources, now) {
17448
+ if (!options.fileResults || sources.length === 0)
17449
+ return 0;
17450
+ const items = sources.map((source) => {
17451
+ const text = [source.title, source.snippet, source.url].filter(Boolean).join(`
17452
+ `);
17453
+ const hash2 = stableHash(text);
17454
+ return {
17455
+ source_ref: source.url,
17456
+ name: source.title ?? source.url,
17457
+ url: source.url,
17458
+ mime: "text/plain",
17459
+ hash: hash2,
17460
+ revision: hash2,
17461
+ status: "active",
17462
+ updated_at: now,
17463
+ permissions: { mode: "read_only", allowed_purposes: ["knowledge_answer", "knowledge_index"] },
17464
+ metadata: {
17465
+ source_ref: source.url,
17466
+ content_source: "provider_web_search",
17467
+ provider_metadata: source.provider_metadata
17468
+ },
17469
+ extracted_text: text
17470
+ };
17471
+ });
17472
+ const result = await ingestOpenFilesManifestItems({
17473
+ dbPath: options.dbPath,
17474
+ items,
17475
+ sourceLabel: `web-search:${options.query}`,
17476
+ readAction: "provider_web_search_file_results",
17477
+ safetyPolicy: options.safetyPolicy,
17478
+ now: new Date(now)
17479
+ });
17480
+ return result.sources_upserted;
17481
+ }
17482
+ async function runProviderWebSearch(options) {
17483
+ const query = options.query.trim();
17484
+ if (!query)
17485
+ throw new Error("Web search query is required.");
17486
+ const env = options.env ?? process.env;
17487
+ const now = (options.now ?? new Date).toISOString();
17488
+ const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
17489
+ const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
17490
+ const domains = options.domains ?? [];
17491
+ const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : "default"), options.config);
17492
+ const parsed = parseModelRef(modelRef);
17493
+ const provider = options.provider ?? parsed.provider;
17494
+ const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
17495
+ const runId = `run_${randomUUID6()}`;
17496
+ if (!options.fake && options.safetyPolicy)
17497
+ assertWebSearchAllowed(options.safetyPolicy);
17498
+ if (!options.fake && provider !== "openai" && provider !== "anthropic") {
17499
+ throw new Error(`Provider ${provider} does not expose native web search yet.`);
17500
+ }
17501
+ if (!options.fake)
17502
+ assertProviderCredentials(provider, options.config, env);
17503
+ migrateKnowledgeDb(options.dbPath);
17504
+ const db = openKnowledgeDb(options.dbPath);
17505
+ try {
17506
+ db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
17507
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
17508
+ runId,
17509
+ "provider-web-search",
17510
+ query,
17511
+ "running",
17512
+ provider,
17513
+ model,
17514
+ JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
17515
+ now,
17516
+ now
17517
+ ]);
17518
+ recordAuditEvent(db, {
17519
+ event_type: "source_read",
17520
+ action: options.fake ? "fake_provider_web_search" : "provider_web_search",
17521
+ target_uri: query,
17522
+ decision: "allow",
17523
+ metadata: { provider, model, domains, max_uses: maxUses },
17524
+ created_at: now
17525
+ });
17526
+ } finally {
17527
+ db.close();
17528
+ }
17529
+ let answer = "";
17530
+ let sources = [];
17531
+ let usage = { input_tokens: estimateTokens2(query), output_tokens: 0, cost_usd: 0 };
17532
+ const warnings = [];
17533
+ if (options.fake) {
17534
+ sources = fakeSources(query, limit);
17535
+ answer = `Fake web search answer for: ${query}`;
17536
+ usage.output_tokens = estimateTokens2(answer);
17537
+ } else {
17538
+ const result = provider === "openai" ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains }) : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
17539
+ answer = result.text;
17540
+ const collected = new Map;
17541
+ collectSources(result.sources, collected);
17542
+ collectSources(result.toolResults, collected);
17543
+ sources = Array.from(collected.values()).slice(0, limit);
17544
+ const normalized = normalizeAiSdkUsage({
17545
+ provider,
17546
+ model,
17547
+ usage: result.usage,
17548
+ providerMetadata: result.providerMetadata
17549
+ });
17550
+ usage = {
17551
+ input_tokens: normalized.input_tokens,
17552
+ output_tokens: normalized.output_tokens,
17553
+ cost_usd: normalized.cost_usd
17554
+ };
17555
+ }
17556
+ const filedSources = await fileWebSources(options, sources, now);
17557
+ const writeDb = openKnowledgeDb(options.dbPath);
17558
+ try {
17559
+ writeDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
17560
+ "completed",
17561
+ JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
17562
+ now,
17563
+ runId
17564
+ ]);
17565
+ writeDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
17566
+ VALUES (?, ?, ?, ?, ?, ?)`, [
17567
+ `evt_${randomUUID6()}`,
17568
+ runId,
17569
+ "info",
17570
+ "provider_web_search_completed",
17571
+ JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
17572
+ now
17573
+ ]);
17574
+ recordProviderUsage(writeDb, {
17575
+ run_id: runId,
17576
+ provider,
17577
+ model,
17578
+ input_tokens: usage.input_tokens,
17579
+ output_tokens: usage.output_tokens,
17580
+ cost_usd: usage.cost_usd,
17581
+ metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
17582
+ created_at: now
17583
+ });
17584
+ } finally {
17585
+ writeDb.close();
17586
+ }
17587
+ if (sources.length === 0)
17588
+ warnings.push("no_web_sources_returned");
17589
+ return {
17590
+ run_id: runId,
17591
+ query,
17592
+ provider,
17593
+ model,
17594
+ answer,
17595
+ sources,
17596
+ filed_sources: filedSources,
17597
+ usage,
17598
+ warnings
17599
+ };
17600
+ }
17601
+
17602
+ // src/storage-contract.ts
17603
+ import { createHash as createHash8, randomUUID as randomUUID7 } from "crypto";
17353
17604
  var GENERATED_ARTIFACTS = [
17354
17605
  {
17355
17606
  kind: "schema",
@@ -17385,7 +17636,7 @@ var GENERATED_ARTIFACTS = [
17385
17636
  function hashArtifactBody(body) {
17386
17637
  const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
17387
17638
  return {
17388
- hash: `sha256:${createHash7("sha256").update(bytes).digest("hex")}`,
17639
+ hash: `sha256:${createHash8("sha256").update(bytes).digest("hex")}`,
17389
17640
  size_bytes: bytes.byteLength
17390
17641
  };
17391
17642
  }
@@ -17510,7 +17761,7 @@ function recordStorageObjects(db, objects, now = new Date) {
17510
17761
  `);
17511
17762
  const insert = db.transaction((entries) => {
17512
17763
  for (const entry of entries) {
17513
- statement.run(randomUUID6(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
17764
+ statement.run(randomUUID7(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
17514
17765
  key: entry.key,
17515
17766
  ...entry.metadata ?? {}
17516
17767
  }), timestamp, timestamp);
@@ -17520,7 +17771,7 @@ function recordStorageObjects(db, objects, now = new Date) {
17520
17771
  }
17521
17772
 
17522
17773
  // src/wiki-layout.ts
17523
- import { createHash as createHash8 } from "crypto";
17774
+ import { createHash as createHash9 } from "crypto";
17524
17775
  function todayParts(now) {
17525
17776
  const year = String(now.getUTCFullYear());
17526
17777
  const month = String(now.getUTCMonth() + 1).padStart(2, "0");
@@ -17528,7 +17779,7 @@ function todayParts(now) {
17528
17779
  return { year, month, day };
17529
17780
  }
17530
17781
  function stableId5(prefix, value) {
17531
- return `${prefix}_${createHash8("sha256").update(value).digest("hex").slice(0, 20)}`;
17782
+ return `${prefix}_${createHash9("sha256").update(value).digest("hex").slice(0, 20)}`;
17532
17783
  }
17533
17784
  function estimateTokenCount2(text) {
17534
17785
  const words = text.trim().split(/\s+/).filter(Boolean).length;
@@ -17893,6 +18144,15 @@ class KnowledgeService {
17893
18144
  config: this.config()
17894
18145
  });
17895
18146
  }
18147
+ async webSearch(options) {
18148
+ const workspace = this.ensureWorkspace();
18149
+ return runProviderWebSearch({
18150
+ ...options,
18151
+ dbPath: workspace.knowledgeDbPath,
18152
+ config: this.config(),
18153
+ safetyPolicy: this.safetyPolicy()
18154
+ });
18155
+ }
17896
18156
  }
17897
18157
  function createKnowledgeService(options = {}) {
17898
18158
  return new KnowledgeService(options);
@@ -18092,6 +18352,23 @@ function buildServer() {
18092
18352
  return errorText(error48 instanceof Error ? error48.message : String(error48));
18093
18353
  }
18094
18354
  });
18355
+ registerTool(server, "ok_web_search", "Provider web search", "Run safety-gated provider-native web search and return citations/sources", {
18356
+ scope: scopeField,
18357
+ query: exports_external.string().describe("Web search query"),
18358
+ limit: exports_external.number().optional().describe("Maximum sources"),
18359
+ provider: exports_external.enum(["openai", "anthropic", "deepseek"]).optional().describe("Provider override"),
18360
+ model: exports_external.string().optional().describe("Model alias/ref"),
18361
+ domains: exports_external.array(exports_external.string()).optional().describe("Allowed domains"),
18362
+ fake: exports_external.boolean().optional().describe("Use deterministic fake web results"),
18363
+ file_results: exports_external.boolean().optional().describe("File web snippets as web source refs")
18364
+ }, async ({ scope, query, limit, provider, model, domains, fake, file_results }) => {
18365
+ const service = createKnowledgeService({ scope });
18366
+ try {
18367
+ return jsonText({ ok: true, ...await service.webSearch({ query, limit, provider, modelRef: model, domains, fake, fileResults: file_results }) });
18368
+ } catch (error48) {
18369
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
18370
+ }
18371
+ });
18095
18372
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
18096
18373
  title: exports_external.string().describe("Item title"),
18097
18374
  content: exports_external.string().describe("Item content/body"),