@hasna/knowledge 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/open-knowledge-mcp.js +432 -2
- package/bin/open-knowledge.js +80 -40
- package/docs/architecture/ai-native-knowledge-base.md +7 -5
- package/docs/architecture/hybrid-semantic-search.md +17 -11
- package/package.json +1 -1
- package/src/cli.ts +24 -4
- package/src/mcp.js +17 -0
- package/src/search.ts +510 -0
- package/src/service.ts +10 -0
- package/src/wiki-layout.ts +41 -1
|
@@ -217,12 +217,14 @@ filters must be applied before agent context is assembled.
|
|
|
217
217
|
|
|
218
218
|
The first local semantic-search implementation indexes derived chunks with
|
|
219
219
|
`open-knowledge embeddings index` and queries them with
|
|
220
|
+
`open-knowledge search --semantic` or the lower-level
|
|
220
221
|
`open-knowledge embeddings search`. It stores OpenAI embedding vectors as
|
|
221
|
-
generated metadata rows, not raw source bytes, and pins each row to
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
222
|
+
generated metadata rows, not raw source bytes, and pins each row to `open-files`
|
|
223
|
+
provenance: source ref/URI, revision/hash, chunk offsets, token count, provider,
|
|
224
|
+
model, dimensions, status, and timestamps. The structured `search` contract
|
|
225
|
+
merges keyword FTS, wiki/index catalog hits, generated wiki chunks, and optional
|
|
226
|
+
vector results so the local SQLite index can later move to pgvector or a managed
|
|
227
|
+
hosted vector store without changing CLI/MCP result shape.
|
|
226
228
|
|
|
227
229
|
## Agent Workflow
|
|
228
230
|
|
|
@@ -47,13 +47,17 @@ contracts.
|
|
|
47
47
|
The current local command surface is:
|
|
48
48
|
|
|
49
49
|
```bash
|
|
50
|
+
open-knowledge search "company wiki policy" --scope project --json
|
|
51
|
+
open-knowledge search "company wiki policy" --scope project --semantic --json
|
|
50
52
|
open-knowledge embeddings index --scope project --model openai:text-embedding-3-small
|
|
51
53
|
open-knowledge embeddings search "company wiki policy" --scope project --json
|
|
52
54
|
```
|
|
53
55
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
56
|
+
`search` is the structured hybrid layer for agents. `embeddings search` is the
|
|
57
|
+
lower-level vector-only command. MCP exposes the same capability through
|
|
58
|
+
`ok_search`, `ok_embeddings_status`, `ok_embeddings_index`, and
|
|
59
|
+
`ok_semantic_search`. Deterministic `--fake` embeddings exist for tests and
|
|
60
|
+
offline verification only.
|
|
57
61
|
|
|
58
62
|
## Hosted Indexes
|
|
59
63
|
|
|
@@ -72,16 +76,18 @@ unauthorized content.
|
|
|
72
76
|
|
|
73
77
|
1. Normalize the query.
|
|
74
78
|
2. Embed the query if a semantic-capable provider is configured.
|
|
75
|
-
3. Run keyword FTS over source chunks and wiki chunks.
|
|
76
|
-
4.
|
|
77
|
-
5.
|
|
78
|
-
|
|
79
|
+
3. Run keyword FTS over source chunks and generated wiki chunks.
|
|
80
|
+
4. Search wiki page and machine-readable index catalog rows.
|
|
81
|
+
5. Run vector search over source chunks and wiki pages when semantic mode is
|
|
82
|
+
requested.
|
|
83
|
+
6. Expand candidate pages through backlinks and citations.
|
|
84
|
+
7. Drop stale candidates whose source revision/hash no longer matches
|
|
79
85
|
`open-files`.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
86
|
+
8. Apply permission filters.
|
|
87
|
+
9. Merge and dedupe by source revision, wiki page, citation, and text hash.
|
|
88
|
+
10. Rerank by relevance, exact-match score, semantic score, freshness, citation
|
|
83
89
|
quality, and wiki authority.
|
|
84
|
-
|
|
90
|
+
11. Return structured results with source refs, citation spans, page refs,
|
|
85
91
|
scores, and reason codes.
|
|
86
92
|
|
|
87
93
|
## Result Shape
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -49,6 +49,7 @@ interface Flags {
|
|
|
49
49
|
purpose?: string;
|
|
50
50
|
model?: string;
|
|
51
51
|
dimensions?: number;
|
|
52
|
+
semantic?: boolean;
|
|
52
53
|
noColor?: boolean;
|
|
53
54
|
scope?: string;
|
|
54
55
|
olderThan?: number;
|
|
@@ -63,7 +64,7 @@ interface ParseResult {
|
|
|
63
64
|
flags: Flags;
|
|
64
65
|
}
|
|
65
66
|
|
|
66
|
-
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'embeddings', 'providers', 'safety', 'help'];
|
|
67
|
+
const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'search', 'embeddings', 'providers', 'safety', 'help'];
|
|
67
68
|
const COMMAND_ALIASES: Record<string, string> = {
|
|
68
69
|
ls: 'list',
|
|
69
70
|
rm: 'delete',
|
|
@@ -101,6 +102,7 @@ function parseArgs(argv: string[]): ParseResult {
|
|
|
101
102
|
case '--purpose': flags.purpose = argv[i + 1]; i += 1; break;
|
|
102
103
|
case '--model': flags.model = argv[i + 1]; i += 1; break;
|
|
103
104
|
case '--dimensions': flags.dimensions = Number(argv[i + 1]); i += 1; break;
|
|
105
|
+
case '--semantic': flags.semantic = true; break;
|
|
104
106
|
case '--fake': flags.fake = true; break;
|
|
105
107
|
case '--no-color': flags.noColor = true; break;
|
|
106
108
|
case '--scope': flags.scope = argv[i + 1]; i += 1; break;
|
|
@@ -175,6 +177,7 @@ Commands:
|
|
|
175
177
|
ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
|
|
176
178
|
ingest source <source-ref> Ingest a read-only source ref into knowledge.db
|
|
177
179
|
reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
|
|
180
|
+
search <query> Hybrid search sources, wiki pages, and indexes
|
|
178
181
|
embeddings status|index|search Build/query local vector embeddings
|
|
179
182
|
providers status|models|check Inspect AI SDK provider config and credentials
|
|
180
183
|
safety status|check|approve|audit|redact
|
|
@@ -186,6 +189,7 @@ Global Options:
|
|
|
186
189
|
--purpose <name> Read-only source purpose (default: knowledge_answer)
|
|
187
190
|
--model <provider:model> AI/embedding model ref
|
|
188
191
|
--dimensions <n> Embedding dimensions for local/fake providers
|
|
192
|
+
--semantic Include vector semantic results in search
|
|
189
193
|
--fake Use deterministic fake embeddings for local tests
|
|
190
194
|
--scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
|
|
191
195
|
--no-color Disable color output
|
|
@@ -247,6 +251,7 @@ function printCommandHelp(command: string): void {
|
|
|
247
251
|
if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
|
|
248
252
|
if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
|
|
249
253
|
if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
|
|
254
|
+
if (command === 'search') { console.log('Usage: open-knowledge search <query> [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
|
|
250
255
|
if (command === 'embeddings') { console.log('Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
|
|
251
256
|
if (command === 'providers') { console.log('Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]'); return; }
|
|
252
257
|
if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
|
|
@@ -294,11 +299,11 @@ async function run(argv: string[]): Promise<void> {
|
|
|
294
299
|
if (flags.completions) {
|
|
295
300
|
const shell = flags.completions;
|
|
296
301
|
if (shell === 'bash') {
|
|
297
|
-
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
302
|
+
console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
|
|
298
303
|
} else if (shell === 'zsh') {
|
|
299
|
-
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
304
|
+
console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
|
|
300
305
|
} else if (shell === 'fish') {
|
|
301
|
-
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
306
|
+
console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
|
|
302
307
|
} else {
|
|
303
308
|
throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
|
|
304
309
|
}
|
|
@@ -578,6 +583,21 @@ async function run(argv: string[]): Promise<void> {
|
|
|
578
583
|
throw new Error("Invalid embeddings action. Use 'status', 'index', or 'search'.");
|
|
579
584
|
}
|
|
580
585
|
|
|
586
|
+
if (command === 'search') {
|
|
587
|
+
const query = positional.slice(1).join(' ');
|
|
588
|
+
if (!query) throw new Error('Usage: open-knowledge search <query>');
|
|
589
|
+
const result = await service.search({
|
|
590
|
+
query,
|
|
591
|
+
limit: flags.limit,
|
|
592
|
+
semantic: flags.semantic,
|
|
593
|
+
modelRef: flags.model,
|
|
594
|
+
dimensions: flags.dimensions,
|
|
595
|
+
fake: flags.fake,
|
|
596
|
+
});
|
|
597
|
+
output({ ok: true, ...result, message: `${result.results.length} search result(s)` }, flags.json);
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
|
|
581
601
|
if (command === 'providers') {
|
|
582
602
|
const action = positional[1] ?? 'status';
|
|
583
603
|
if (action === 'status') {
|
package/src/mcp.js
CHANGED
|
@@ -169,6 +169,23 @@ export function buildServer() {
|
|
|
169
169
|
}
|
|
170
170
|
});
|
|
171
171
|
|
|
172
|
+
registerTool(server, 'ok_search', 'Hybrid knowledge search', 'Search source chunks, generated wiki pages, sharded indexes, and optional semantic vectors', {
|
|
173
|
+
scope: scopeField,
|
|
174
|
+
query: z.string().describe('Search query'),
|
|
175
|
+
limit: z.number().optional().describe('Maximum results'),
|
|
176
|
+
semantic: z.boolean().optional().describe('Include vector semantic results'),
|
|
177
|
+
model: z.string().optional().describe('Embedding model ref, default openai:text-embedding-3-small'),
|
|
178
|
+
dimensions: z.number().optional().describe('Embedding dimensions for deterministic fake mode'),
|
|
179
|
+
fake: z.boolean().optional().describe('Use deterministic fake embeddings for local tests'),
|
|
180
|
+
}, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
|
|
181
|
+
const service = createKnowledgeService({ scope });
|
|
182
|
+
try {
|
|
183
|
+
return jsonText({ ok: true, ...await service.search({ query, limit, semantic, modelRef: model, dimensions, fake }) });
|
|
184
|
+
} catch (error) {
|
|
185
|
+
return errorText(error instanceof Error ? error.message : String(error));
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
|
|
172
189
|
registerTool(server, 'ok_add', 'Add a knowledge item', 'Add a new item to the knowledge store', {
|
|
173
190
|
title: z.string().describe('Item title'),
|
|
174
191
|
content: z.string().describe('Item content/body'),
|