@hasna/knowledge 0.2.17 → 0.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/bin/open-knowledge-mcp.js +283 -6
- package/bin/open-knowledge.js +64 -57
- package/docs/architecture/ai-native-knowledge-base.md +7 -0
- package/docs/architecture/hybrid-semantic-search.md +5 -0
- package/package.json +1 -1
- package/src/cli.ts +33 -4
- package/src/mcp.js +18 -0
- package/src/service.ts +11 -0
- package/src/web-search.ts +330 -0
package/README.md
CHANGED
|
@@ -102,6 +102,9 @@ open-knowledge search "company wiki policy" --scope project --context --json
|
|
|
102
102
|
# Build a citation answer/context draft for a prompt
|
|
103
103
|
open-knowledge ask "How do we cite handbook policy?" --scope project --json
|
|
104
104
|
knowledge "How do we cite handbook policy?" --scope project --json
|
|
105
|
+
|
|
106
|
+
# Provider-native web search, safety-gated for real network access
|
|
107
|
+
HASNA_KNOWLEDGE_WEB_SEARCH=1 open-knowledge web search "latest AI SDK web search" --provider openai --json
|
|
105
108
|
```
|
|
106
109
|
|
|
107
110
|
## Commands
|
|
@@ -276,6 +279,18 @@ pack, returns a local citation draft by default, records a run ledger in
|
|
|
276
279
|
the flow deterministic for local tests. `--approve-write` records approval
|
|
277
280
|
intent, but durable wiki writes remain deferred to the wiki compile/write task.
|
|
278
281
|
|
|
282
|
+
### web
|
|
283
|
+
```bash
|
|
284
|
+
open-knowledge web search <query> [--provider openai|anthropic] [--model provider:model] [--domain <domain>] [--file-results] [--scope project] [--json]
|
|
285
|
+
```
|
|
286
|
+
Run provider-native hosted web search and return cited web sources. Real network
|
|
287
|
+
search is disabled unless `safety.network.web_search_enabled=true` or
|
|
288
|
+
`HASNA_KNOWLEDGE_WEB_SEARCH=1` is set. OpenAI uses the AI SDK OpenAI
|
|
289
|
+
`tools.webSearch` path; Anthropic uses its provider web-search tool when
|
|
290
|
+
available. `--file-results` stores returned snippets as read-only `web` source
|
|
291
|
+
refs in `knowledge.db` so later local search can cite them. `--fake` returns
|
|
292
|
+
deterministic offline sources for tests.
|
|
293
|
+
|
|
279
294
|
### safety
|
|
280
295
|
```bash
|
|
281
296
|
open-knowledge safety status [--scope project] [--json]
|
|
@@ -358,7 +373,8 @@ The MCP server exposes item tools (`ok_add`, `ok_list`, `ok_get`, `ok_update`,
|
|
|
358
373
|
parsing/resolution (`ok_parse_source_ref`, `ok_resolve_source`). The
|
|
359
374
|
`knowledge_search` MCP tool returns reranked citation context packs for agent
|
|
360
375
|
prompts, and `knowledge_ask` runs the same prompt flow exposed by
|
|
361
|
-
`open-knowledge ask`.
|
|
376
|
+
`open-knowledge ask`. `ok_web_search` exposes safety-gated provider web search
|
|
377
|
+
to MCP clients.
|
|
362
378
|
|
|
363
379
|
## Source And Artifact Boundary
|
|
364
380
|
|
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.18",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -17348,8 +17348,259 @@ async function ingestSourceRef(options) {
|
|
|
17348
17348
|
};
|
|
17349
17349
|
}
|
|
17350
17350
|
|
|
17351
|
-
// src/
|
|
17351
|
+
// src/web-search.ts
|
|
17352
17352
|
import { createHash as createHash7, randomUUID as randomUUID6 } from "crypto";
|
|
17353
|
+
function stableHash(value) {
|
|
17354
|
+
return `sha256:${createHash7("sha256").update(value).digest("hex")}`;
|
|
17355
|
+
}
|
|
17356
|
+
function estimateTokens2(text) {
|
|
17357
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
17358
|
+
return Math.max(1, Math.ceil(words * 1.25));
|
|
17359
|
+
}
|
|
17360
|
+
function asRecord(value) {
|
|
17361
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
17362
|
+
}
|
|
17363
|
+
function asString3(value) {
|
|
17364
|
+
return typeof value === "string" && value.length > 0 ? value : null;
|
|
17365
|
+
}
|
|
17366
|
+
function sourceFromRecord(value) {
|
|
17367
|
+
const record2 = asRecord(value);
|
|
17368
|
+
const url2 = asString3(record2.url) ?? asString3(record2.uri) ?? asString3(record2.sourceUrl);
|
|
17369
|
+
if (!url2)
|
|
17370
|
+
return null;
|
|
17371
|
+
return {
|
|
17372
|
+
url: url2,
|
|
17373
|
+
title: asString3(record2.title) ?? asString3(record2.name),
|
|
17374
|
+
snippet: asString3(record2.snippet) ?? asString3(record2.text) ?? asString3(record2.description),
|
|
17375
|
+
provider_metadata: record2
|
|
17376
|
+
};
|
|
17377
|
+
}
|
|
17378
|
+
function collectSources(value, output) {
|
|
17379
|
+
if (Array.isArray(value)) {
|
|
17380
|
+
for (const entry of value)
|
|
17381
|
+
collectSources(entry, output);
|
|
17382
|
+
return;
|
|
17383
|
+
}
|
|
17384
|
+
const source = sourceFromRecord(value);
|
|
17385
|
+
if (source)
|
|
17386
|
+
output.set(source.url, source);
|
|
17387
|
+
const record2 = asRecord(value);
|
|
17388
|
+
for (const key of ["sources", "results", "citations", "annotations", "output"]) {
|
|
17389
|
+
if (record2[key])
|
|
17390
|
+
collectSources(record2[key], output);
|
|
17391
|
+
}
|
|
17392
|
+
}
|
|
17393
|
+
function fakeSources(query, limit) {
|
|
17394
|
+
return Array.from({ length: Math.min(limit, 3) }, (_, index) => ({
|
|
17395
|
+
url: `https://example.com/knowledge-web-${index + 1}`,
|
|
17396
|
+
title: `Fake web source ${index + 1}`,
|
|
17397
|
+
snippet: `Deterministic web-search fixture for "${query}"`,
|
|
17398
|
+
provider_metadata: { fake: true, rank: index + 1 }
|
|
17399
|
+
}));
|
|
17400
|
+
}
|
|
17401
|
+
async function openAiWebSearch(input) {
|
|
17402
|
+
const { generateText } = await import("ai");
|
|
17403
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
17404
|
+
const settings = providerSettings(input.config, "openai");
|
|
17405
|
+
const openai = createOpenAI({
|
|
17406
|
+
apiKey: input.env[settings.api_key_env],
|
|
17407
|
+
baseURL: settings.base_url
|
|
17408
|
+
});
|
|
17409
|
+
const webSearch = openai.tools?.webSearch;
|
|
17410
|
+
if (!webSearch)
|
|
17411
|
+
throw new Error("OpenAI provider does not expose tools.webSearch.");
|
|
17412
|
+
return generateText({
|
|
17413
|
+
model: openai(input.model),
|
|
17414
|
+
prompt: input.query,
|
|
17415
|
+
tools: {
|
|
17416
|
+
web_search: webSearch({
|
|
17417
|
+
externalWebAccess: true,
|
|
17418
|
+
searchContextSize: "medium",
|
|
17419
|
+
...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
|
|
17420
|
+
})
|
|
17421
|
+
},
|
|
17422
|
+
toolChoice: { type: "tool", toolName: "web_search" }
|
|
17423
|
+
});
|
|
17424
|
+
}
|
|
17425
|
+
async function anthropicWebSearch(input) {
|
|
17426
|
+
const { generateText } = await import("ai");
|
|
17427
|
+
const { createAnthropic } = await import("@ai-sdk/anthropic");
|
|
17428
|
+
const settings = providerSettings(input.config, "anthropic");
|
|
17429
|
+
const anthropic = createAnthropic({
|
|
17430
|
+
apiKey: input.env[settings.api_key_env],
|
|
17431
|
+
baseURL: settings.base_url
|
|
17432
|
+
});
|
|
17433
|
+
const factory = anthropic.tools?.webSearch_20250305 ?? anthropic.tools?.webSearch;
|
|
17434
|
+
if (!factory)
|
|
17435
|
+
throw new Error("Anthropic provider does not expose a web search tool.");
|
|
17436
|
+
return generateText({
|
|
17437
|
+
model: anthropic(input.model),
|
|
17438
|
+
prompt: input.query,
|
|
17439
|
+
tools: {
|
|
17440
|
+
web_search: factory({
|
|
17441
|
+
maxUses: input.maxUses,
|
|
17442
|
+
...input.domains.length > 0 ? { allowedDomains: input.domains } : {}
|
|
17443
|
+
})
|
|
17444
|
+
}
|
|
17445
|
+
});
|
|
17446
|
+
}
|
|
17447
|
+
async function fileWebSources(options, sources, now) {
|
|
17448
|
+
if (!options.fileResults || sources.length === 0)
|
|
17449
|
+
return 0;
|
|
17450
|
+
const items = sources.map((source) => {
|
|
17451
|
+
const text = [source.title, source.snippet, source.url].filter(Boolean).join(`
|
|
17452
|
+
`);
|
|
17453
|
+
const hash2 = stableHash(text);
|
|
17454
|
+
return {
|
|
17455
|
+
source_ref: source.url,
|
|
17456
|
+
name: source.title ?? source.url,
|
|
17457
|
+
url: source.url,
|
|
17458
|
+
mime: "text/plain",
|
|
17459
|
+
hash: hash2,
|
|
17460
|
+
revision: hash2,
|
|
17461
|
+
status: "active",
|
|
17462
|
+
updated_at: now,
|
|
17463
|
+
permissions: { mode: "read_only", allowed_purposes: ["knowledge_answer", "knowledge_index"] },
|
|
17464
|
+
metadata: {
|
|
17465
|
+
source_ref: source.url,
|
|
17466
|
+
content_source: "provider_web_search",
|
|
17467
|
+
provider_metadata: source.provider_metadata
|
|
17468
|
+
},
|
|
17469
|
+
extracted_text: text
|
|
17470
|
+
};
|
|
17471
|
+
});
|
|
17472
|
+
const result = await ingestOpenFilesManifestItems({
|
|
17473
|
+
dbPath: options.dbPath,
|
|
17474
|
+
items,
|
|
17475
|
+
sourceLabel: `web-search:${options.query}`,
|
|
17476
|
+
readAction: "provider_web_search_file_results",
|
|
17477
|
+
safetyPolicy: options.safetyPolicy,
|
|
17478
|
+
now: new Date(now)
|
|
17479
|
+
});
|
|
17480
|
+
return result.sources_upserted;
|
|
17481
|
+
}
|
|
17482
|
+
async function runProviderWebSearch(options) {
|
|
17483
|
+
const query = options.query.trim();
|
|
17484
|
+
if (!query)
|
|
17485
|
+
throw new Error("Web search query is required.");
|
|
17486
|
+
const env = options.env ?? process.env;
|
|
17487
|
+
const now = (options.now ?? new Date).toISOString();
|
|
17488
|
+
const limit = Math.max(1, Math.min(options.limit ?? 5, 20));
|
|
17489
|
+
const maxUses = Math.max(1, Math.min(options.maxUses ?? 3, 10));
|
|
17490
|
+
const domains = options.domains ?? [];
|
|
17491
|
+
const modelRef = resolveModelRef(options.modelRef ?? (options.provider ? `${options.provider}:${providerSettings(options.config, options.provider).default_model}` : "default"), options.config);
|
|
17492
|
+
const parsed = parseModelRef(modelRef);
|
|
17493
|
+
const provider = options.provider ?? parsed.provider;
|
|
17494
|
+
const model = parsed.provider === provider ? parsed.model : providerSettings(options.config, provider).default_model;
|
|
17495
|
+
const runId = `run_${randomUUID6()}`;
|
|
17496
|
+
if (!options.fake && options.safetyPolicy)
|
|
17497
|
+
assertWebSearchAllowed(options.safetyPolicy);
|
|
17498
|
+
if (!options.fake && provider !== "openai" && provider !== "anthropic") {
|
|
17499
|
+
throw new Error(`Provider ${provider} does not expose native web search yet.`);
|
|
17500
|
+
}
|
|
17501
|
+
if (!options.fake)
|
|
17502
|
+
assertProviderCredentials(provider, options.config, env);
|
|
17503
|
+
migrateKnowledgeDb(options.dbPath);
|
|
17504
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
17505
|
+
try {
|
|
17506
|
+
db.run(`INSERT INTO runs (id, type, prompt, status, provider, model, metadata_json, created_at, updated_at)
|
|
17507
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17508
|
+
runId,
|
|
17509
|
+
"provider-web-search",
|
|
17510
|
+
query,
|
|
17511
|
+
"running",
|
|
17512
|
+
provider,
|
|
17513
|
+
model,
|
|
17514
|
+
JSON.stringify({ domains, max_uses: maxUses, fake: options.fake === true }),
|
|
17515
|
+
now,
|
|
17516
|
+
now
|
|
17517
|
+
]);
|
|
17518
|
+
recordAuditEvent(db, {
|
|
17519
|
+
event_type: "source_read",
|
|
17520
|
+
action: options.fake ? "fake_provider_web_search" : "provider_web_search",
|
|
17521
|
+
target_uri: query,
|
|
17522
|
+
decision: "allow",
|
|
17523
|
+
metadata: { provider, model, domains, max_uses: maxUses },
|
|
17524
|
+
created_at: now
|
|
17525
|
+
});
|
|
17526
|
+
} finally {
|
|
17527
|
+
db.close();
|
|
17528
|
+
}
|
|
17529
|
+
let answer = "";
|
|
17530
|
+
let sources = [];
|
|
17531
|
+
let usage = { input_tokens: estimateTokens2(query), output_tokens: 0, cost_usd: 0 };
|
|
17532
|
+
const warnings = [];
|
|
17533
|
+
if (options.fake) {
|
|
17534
|
+
sources = fakeSources(query, limit);
|
|
17535
|
+
answer = `Fake web search answer for: ${query}`;
|
|
17536
|
+
usage.output_tokens = estimateTokens2(answer);
|
|
17537
|
+
} else {
|
|
17538
|
+
const result = provider === "openai" ? await openAiWebSearch({ query, model, config: options.config, env, maxUses, domains }) : await anthropicWebSearch({ query, model, config: options.config, env, maxUses, domains });
|
|
17539
|
+
answer = result.text;
|
|
17540
|
+
const collected = new Map;
|
|
17541
|
+
collectSources(result.sources, collected);
|
|
17542
|
+
collectSources(result.toolResults, collected);
|
|
17543
|
+
sources = Array.from(collected.values()).slice(0, limit);
|
|
17544
|
+
const normalized = normalizeAiSdkUsage({
|
|
17545
|
+
provider,
|
|
17546
|
+
model,
|
|
17547
|
+
usage: result.usage,
|
|
17548
|
+
providerMetadata: result.providerMetadata
|
|
17549
|
+
});
|
|
17550
|
+
usage = {
|
|
17551
|
+
input_tokens: normalized.input_tokens,
|
|
17552
|
+
output_tokens: normalized.output_tokens,
|
|
17553
|
+
cost_usd: normalized.cost_usd
|
|
17554
|
+
};
|
|
17555
|
+
}
|
|
17556
|
+
const filedSources = await fileWebSources(options, sources, now);
|
|
17557
|
+
const writeDb = openKnowledgeDb(options.dbPath);
|
|
17558
|
+
try {
|
|
17559
|
+
writeDb.run(`UPDATE runs SET status = ?, metadata_json = ?, updated_at = ? WHERE id = ?`, [
|
|
17560
|
+
"completed",
|
|
17561
|
+
JSON.stringify({ domains, max_uses: maxUses, sources: sources.length, filed_sources: filedSources, fake: options.fake === true }),
|
|
17562
|
+
now,
|
|
17563
|
+
runId
|
|
17564
|
+
]);
|
|
17565
|
+
writeDb.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
17566
|
+
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
17567
|
+
`evt_${randomUUID6()}`,
|
|
17568
|
+
runId,
|
|
17569
|
+
"info",
|
|
17570
|
+
"provider_web_search_completed",
|
|
17571
|
+
JSON.stringify({ sources: sources.length, filed_sources: filedSources }),
|
|
17572
|
+
now
|
|
17573
|
+
]);
|
|
17574
|
+
recordProviderUsage(writeDb, {
|
|
17575
|
+
run_id: runId,
|
|
17576
|
+
provider,
|
|
17577
|
+
model,
|
|
17578
|
+
input_tokens: usage.input_tokens,
|
|
17579
|
+
output_tokens: usage.output_tokens,
|
|
17580
|
+
cost_usd: usage.cost_usd,
|
|
17581
|
+
metadata: { web_search: true, sources: sources.length, filed_sources: filedSources },
|
|
17582
|
+
created_at: now
|
|
17583
|
+
});
|
|
17584
|
+
} finally {
|
|
17585
|
+
writeDb.close();
|
|
17586
|
+
}
|
|
17587
|
+
if (sources.length === 0)
|
|
17588
|
+
warnings.push("no_web_sources_returned");
|
|
17589
|
+
return {
|
|
17590
|
+
run_id: runId,
|
|
17591
|
+
query,
|
|
17592
|
+
provider,
|
|
17593
|
+
model,
|
|
17594
|
+
answer,
|
|
17595
|
+
sources,
|
|
17596
|
+
filed_sources: filedSources,
|
|
17597
|
+
usage,
|
|
17598
|
+
warnings
|
|
17599
|
+
};
|
|
17600
|
+
}
|
|
17601
|
+
|
|
17602
|
+
// src/storage-contract.ts
|
|
17603
|
+
import { createHash as createHash8, randomUUID as randomUUID7 } from "crypto";
|
|
17353
17604
|
var GENERATED_ARTIFACTS = [
|
|
17354
17605
|
{
|
|
17355
17606
|
kind: "schema",
|
|
@@ -17385,7 +17636,7 @@ var GENERATED_ARTIFACTS = [
|
|
|
17385
17636
|
function hashArtifactBody(body) {
|
|
17386
17637
|
const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
|
|
17387
17638
|
return {
|
|
17388
|
-
hash: `sha256:${
|
|
17639
|
+
hash: `sha256:${createHash8("sha256").update(bytes).digest("hex")}`,
|
|
17389
17640
|
size_bytes: bytes.byteLength
|
|
17390
17641
|
};
|
|
17391
17642
|
}
|
|
@@ -17510,7 +17761,7 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
17510
17761
|
`);
|
|
17511
17762
|
const insert = db.transaction((entries) => {
|
|
17512
17763
|
for (const entry of entries) {
|
|
17513
|
-
statement.run(
|
|
17764
|
+
statement.run(randomUUID7(), entry.uri, entry.kind, entry.content_type ?? null, entry.hash ?? null, entry.size_bytes ?? null, JSON.stringify({
|
|
17514
17765
|
key: entry.key,
|
|
17515
17766
|
...entry.metadata ?? {}
|
|
17516
17767
|
}), timestamp, timestamp);
|
|
@@ -17520,7 +17771,7 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
17520
17771
|
}
|
|
17521
17772
|
|
|
17522
17773
|
// src/wiki-layout.ts
|
|
17523
|
-
import { createHash as
|
|
17774
|
+
import { createHash as createHash9 } from "crypto";
|
|
17524
17775
|
function todayParts(now) {
|
|
17525
17776
|
const year = String(now.getUTCFullYear());
|
|
17526
17777
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
@@ -17528,7 +17779,7 @@ function todayParts(now) {
|
|
|
17528
17779
|
return { year, month, day };
|
|
17529
17780
|
}
|
|
17530
17781
|
function stableId5(prefix, value) {
|
|
17531
|
-
return `${prefix}_${
|
|
17782
|
+
return `${prefix}_${createHash9("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
17532
17783
|
}
|
|
17533
17784
|
function estimateTokenCount2(text) {
|
|
17534
17785
|
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
@@ -17893,6 +18144,15 @@ class KnowledgeService {
|
|
|
17893
18144
|
config: this.config()
|
|
17894
18145
|
});
|
|
17895
18146
|
}
|
|
18147
|
+
async webSearch(options) {
|
|
18148
|
+
const workspace = this.ensureWorkspace();
|
|
18149
|
+
return runProviderWebSearch({
|
|
18150
|
+
...options,
|
|
18151
|
+
dbPath: workspace.knowledgeDbPath,
|
|
18152
|
+
config: this.config(),
|
|
18153
|
+
safetyPolicy: this.safetyPolicy()
|
|
18154
|
+
});
|
|
18155
|
+
}
|
|
17896
18156
|
}
|
|
17897
18157
|
function createKnowledgeService(options = {}) {
|
|
17898
18158
|
return new KnowledgeService(options);
|
|
@@ -18092,6 +18352,23 @@ function buildServer() {
|
|
|
18092
18352
|
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18093
18353
|
}
|
|
18094
18354
|
});
|
|
18355
|
+
registerTool(server, "ok_web_search", "Provider web search", "Run safety-gated provider-native web search and return citations/sources", {
|
|
18356
|
+
scope: scopeField,
|
|
18357
|
+
query: exports_external.string().describe("Web search query"),
|
|
18358
|
+
limit: exports_external.number().optional().describe("Maximum sources"),
|
|
18359
|
+
provider: exports_external.enum(["openai", "anthropic", "deepseek"]).optional().describe("Provider override"),
|
|
18360
|
+
model: exports_external.string().optional().describe("Model alias/ref"),
|
|
18361
|
+
domains: exports_external.array(exports_external.string()).optional().describe("Allowed domains"),
|
|
18362
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake web results"),
|
|
18363
|
+
file_results: exports_external.boolean().optional().describe("File web snippets as web source refs")
|
|
18364
|
+
}, async ({ scope, query, limit, provider, model, domains, fake, file_results }) => {
|
|
18365
|
+
const service = createKnowledgeService({ scope });
|
|
18366
|
+
try {
|
|
18367
|
+
return jsonText({ ok: true, ...await service.webSearch({ query, limit, provider, modelRef: model, domains, fake, fileResults: file_results }) });
|
|
18368
|
+
} catch (error48) {
|
|
18369
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
18370
|
+
}
|
|
18371
|
+
});
|
|
18095
18372
|
registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
|
|
18096
18373
|
title: exports_external.string().describe("Item title"),
|
|
18097
18374
|
content: exports_external.string().describe("Item content/body"),
|