@hasna/knowledge 0.2.14 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -217,12 +217,14 @@ filters must be applied before agent context is assembled.
217
217
 
218
218
  The first local semantic-search implementation indexes derived chunks with
219
219
  `open-knowledge embeddings index` and queries them with
220
+ `open-knowledge search --semantic` or the lower-level
220
221
  `open-knowledge embeddings search`. It stores OpenAI embedding vectors as
221
- generated metadata rows, not raw source bytes, and pins each row to
222
- `open-files` provenance: source ref/URI, revision/hash, chunk offsets, token
223
- count, provider, model, dimensions, status, and timestamps. This lets the local
224
- SQLite index later move to pgvector or a managed hosted vector store without
225
- changing the CLI/MCP result contract.
222
+ generated metadata rows, not raw source bytes, and pins each row to `open-files`
223
+ provenance: source ref/URI, revision/hash, chunk offsets, token count, provider,
224
+ model, dimensions, status, and timestamps. The structured `search` contract
225
+ merges keyword FTS, wiki/index catalog hits, generated wiki chunks, and optional
226
+ vector results so the local SQLite index can later move to pgvector or a managed
227
+ hosted vector store without changing CLI/MCP result shape.
226
228
 
227
229
  ## Agent Workflow
228
230
 
@@ -47,13 +47,17 @@ contracts.
47
47
  The current local command surface is:
48
48
 
49
49
  ```bash
50
+ open-knowledge search "company wiki policy" --scope project --json
51
+ open-knowledge search "company wiki policy" --scope project --semantic --json
50
52
  open-knowledge embeddings index --scope project --model openai:text-embedding-3-small
51
53
  open-knowledge embeddings search "company wiki policy" --scope project --json
52
54
  ```
53
55
 
54
- MCP exposes the same capability through `ok_embeddings_status`,
55
- `ok_embeddings_index`, and `ok_semantic_search`. Deterministic `--fake`
56
- embeddings exist for tests and offline verification only.
56
+ `search` is the structured hybrid layer for agents. `embeddings search` is the
57
+ lower-level vector-only command. MCP exposes the same capability through
58
+ `ok_search`, `ok_embeddings_status`, `ok_embeddings_index`, and
59
+ `ok_semantic_search`. Deterministic `--fake` embeddings exist for tests and
60
+ offline verification only.
57
61
 
58
62
  ## Hosted Indexes
59
63
 
@@ -72,16 +76,18 @@ unauthorized content.
72
76
 
73
77
  1. Normalize the query.
74
78
  2. Embed the query if a semantic-capable provider is configured.
75
- 3. Run keyword FTS over source chunks and wiki chunks.
76
- 4. Run vector search over source chunks and wiki pages.
77
- 5. Expand candidate pages through backlinks and citations.
78
- 6. Drop stale candidates whose source revision/hash no longer matches
79
+ 3. Run keyword FTS over source chunks and generated wiki chunks.
80
+ 4. Search wiki page and machine-readable index catalog rows.
81
+ 5. Run vector search over source chunks and wiki pages when semantic mode is
82
+ requested.
83
+ 6. Expand candidate pages through backlinks and citations.
84
+ 7. Drop stale candidates whose source revision/hash no longer matches
79
85
  `open-files`.
80
- 7. Apply permission filters.
81
- 8. Merge and dedupe by source revision, wiki page, citation, and text hash.
82
- 9. Rerank by relevance, exact-match score, semantic score, freshness, citation
86
+ 8. Apply permission filters.
87
+ 9. Merge and dedupe by source revision, wiki page, citation, and text hash.
88
+ 10. Rerank by relevance, exact-match score, semantic score, freshness, citation
83
89
  quality, and wiki authority.
84
- 10. Return structured results with source refs, citation spans, page refs,
90
+ 11. Return structured results with source refs, citation spans, page refs,
85
91
  scores, and reason codes.
86
92
 
87
93
  ## Result Shape
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.14",
3
+ "version": "0.2.15",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -49,6 +49,7 @@ interface Flags {
49
49
  purpose?: string;
50
50
  model?: string;
51
51
  dimensions?: number;
52
+ semantic?: boolean;
52
53
  noColor?: boolean;
53
54
  scope?: string;
54
55
  olderThan?: number;
@@ -63,7 +64,7 @@ interface ParseResult {
63
64
  flags: Flags;
64
65
  }
65
66
 
66
- const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'embeddings', 'providers', 'safety', 'help'];
67
+ const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'search', 'embeddings', 'providers', 'safety', 'help'];
67
68
  const COMMAND_ALIASES: Record<string, string> = {
68
69
  ls: 'list',
69
70
  rm: 'delete',
@@ -101,6 +102,7 @@ function parseArgs(argv: string[]): ParseResult {
101
102
  case '--purpose': flags.purpose = argv[i + 1]; i += 1; break;
102
103
  case '--model': flags.model = argv[i + 1]; i += 1; break;
103
104
  case '--dimensions': flags.dimensions = Number(argv[i + 1]); i += 1; break;
105
+ case '--semantic': flags.semantic = true; break;
104
106
  case '--fake': flags.fake = true; break;
105
107
  case '--no-color': flags.noColor = true; break;
106
108
  case '--scope': flags.scope = argv[i + 1]; i += 1; break;
@@ -175,6 +177,7 @@ Commands:
175
177
  ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
176
178
  ingest source <source-ref> Ingest a read-only source ref into knowledge.db
177
179
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
180
+ search <query> Hybrid search sources, wiki pages, and indexes
178
181
  embeddings status|index|search Build/query local vector embeddings
179
182
  providers status|models|check Inspect AI SDK provider config and credentials
180
183
  safety status|check|approve|audit|redact
@@ -186,6 +189,7 @@ Global Options:
186
189
  --purpose <name> Read-only source purpose (default: knowledge_answer)
187
190
  --model <provider:model> AI/embedding model ref
188
191
  --dimensions <n> Embedding dimensions for local/fake providers
192
+ --semantic Include vector semantic results in search
189
193
  --fake Use deterministic fake embeddings for local tests
190
194
  --scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
191
195
  --no-color Disable color output
@@ -247,6 +251,7 @@ function printCommandHelp(command: string): void {
247
251
  if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
248
252
  if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
249
253
  if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
254
+ if (command === 'search') { console.log('Usage: open-knowledge search <query> [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
250
255
  if (command === 'embeddings') { console.log('Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
251
256
  if (command === 'providers') { console.log('Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]'); return; }
252
257
  if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
@@ -294,11 +299,11 @@ async function run(argv: string[]): Promise<void> {
294
299
  if (flags.completions) {
295
300
  const shell = flags.completions;
296
301
  if (shell === 'bash') {
297
- console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
302
+ console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
298
303
  } else if (shell === 'zsh') {
299
- console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
304
+ console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
300
305
  } else if (shell === 'fish') {
301
- console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
306
+ console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
302
307
  } else {
303
308
  throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
304
309
  }
@@ -578,6 +583,21 @@ async function run(argv: string[]): Promise<void> {
578
583
  throw new Error("Invalid embeddings action. Use 'status', 'index', or 'search'.");
579
584
  }
580
585
 
586
+ if (command === 'search') {
587
+ const query = positional.slice(1).join(' ');
588
+ if (!query) throw new Error('Usage: open-knowledge search <query>');
589
+ const result = await service.search({
590
+ query,
591
+ limit: flags.limit,
592
+ semantic: flags.semantic,
593
+ modelRef: flags.model,
594
+ dimensions: flags.dimensions,
595
+ fake: flags.fake,
596
+ });
597
+ output({ ok: true, ...result, message: `${result.results.length} search result(s)` }, flags.json);
598
+ return;
599
+ }
600
+
581
601
  if (command === 'providers') {
582
602
  const action = positional[1] ?? 'status';
583
603
  if (action === 'status') {
package/src/mcp.js CHANGED
@@ -169,6 +169,23 @@ export function buildServer() {
169
169
  }
170
170
  });
171
171
 
172
+ registerTool(server, 'ok_search', 'Hybrid knowledge search', 'Search source chunks, generated wiki pages, sharded indexes, and optional semantic vectors', {
173
+ scope: scopeField,
174
+ query: z.string().describe('Search query'),
175
+ limit: z.number().optional().describe('Maximum results'),
176
+ semantic: z.boolean().optional().describe('Include vector semantic results'),
177
+ model: z.string().optional().describe('Embedding model ref, default openai:text-embedding-3-small'),
178
+ dimensions: z.number().optional().describe('Embedding dimensions for deterministic fake mode'),
179
+ fake: z.boolean().optional().describe('Use deterministic fake embeddings for local tests'),
180
+ }, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
181
+ const service = createKnowledgeService({ scope });
182
+ try {
183
+ return jsonText({ ok: true, ...await service.search({ query, limit, semantic, modelRef: model, dimensions, fake }) });
184
+ } catch (error) {
185
+ return errorText(error instanceof Error ? error.message : String(error));
186
+ }
187
+ });
188
+
172
189
  registerTool(server, 'ok_add', 'Add a knowledge item', 'Add a new item to the knowledge store', {
173
190
  title: z.string().describe('Item title'),
174
191
  content: z.string().describe('Item content/body'),