@hasna/knowledge 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -215,6 +215,17 @@ Local mode should start with SQLite FTS and a local vector-index option. Hosted
215
215
  mode can use Postgres with pgvector or a managed vector index. Permission
216
216
  filters must be applied before agent context is assembled.
217
217
 
218
+ The first local semantic-search implementation indexes derived chunks with
219
+ `open-knowledge embeddings index` and queries them with
220
+ `open-knowledge search --semantic` or the lower-level
221
+ `open-knowledge embeddings search`. It stores OpenAI embedding vectors as
222
+ generated metadata rows, not raw source bytes, and pins each row to `open-files`
223
+ provenance: source ref/URI, revision/hash, chunk offsets, token count, provider,
224
+ model, dimensions, status, and timestamps. The structured `search` contract
225
+ merges keyword FTS, wiki/index catalog hits, generated wiki chunks, and optional
226
+ vector results so the local SQLite index can later move to pgvector or a managed
227
+ hosted vector store without changing CLI/MCP result shape.
228
+
218
229
  ## Agent Workflow
219
230
 
220
231
  The target user flow is:
@@ -32,6 +32,9 @@ Local mode starts with SQLite:
32
32
  - `chunks_fts` provides keyword search.
33
33
  - `chunk_embeddings` stores embedding vectors as JSON until a local vector
34
34
  extension is chosen.
35
+ - `vector_index_entries` stores searchable embedding rows with provider/model,
36
+ dimensions, source revision/hash, chunk offsets, status, timestamps, and
37
+ provenance metadata.
35
38
  - `wiki_pages`, `wiki_backlinks`, and `citations` provide graph and provenance
36
39
  signals.
37
40
  - `knowledge_indexes` tracks generated machine-readable shards.
@@ -41,6 +44,21 @@ implementation. The retrieval interface should hide it so a later vector
41
44
  extension or pgvector backend can replace storage without changing CLI/MCP
42
45
  contracts.
43
46
 
47
+ The current local command surface is:
48
+
49
+ ```bash
50
+ open-knowledge search "company wiki policy" --scope project --json
51
+ open-knowledge search "company wiki policy" --scope project --semantic --json
52
+ open-knowledge embeddings index --scope project --model openai:text-embedding-3-small
53
+ open-knowledge embeddings search "company wiki policy" --scope project --json
54
+ ```
55
+
56
+ `search` is the structured hybrid layer for agents. `embeddings search` is the
57
+ lower-level vector-only command. MCP exposes the same capability through
58
+ `ok_search`, `ok_embeddings_status`, `ok_embeddings_index`, and
59
+ `ok_semantic_search`. Deterministic `--fake` embeddings exist for tests and
60
+ offline verification only.
61
+
44
62
  ## Hosted Indexes
45
63
 
46
64
  Hosted mode may use:
@@ -58,16 +76,18 @@ unauthorized content.
58
76
 
59
77
  1. Normalize the query.
60
78
  2. Embed the query if a semantic-capable provider is configured.
61
- 3. Run keyword FTS over source chunks and wiki chunks.
62
- 4. Run vector search over source chunks and wiki pages.
63
- 5. Expand candidate pages through backlinks and citations.
64
- 6. Drop stale candidates whose source revision/hash no longer matches
79
+ 3. Run keyword FTS over source chunks and generated wiki chunks.
80
+ 4. Search wiki page and machine-readable index catalog rows.
81
+ 5. Run vector search over source chunks and wiki pages when semantic mode is
82
+ requested.
83
+ 6. Expand candidate pages through backlinks and citations.
84
+ 7. Drop stale candidates whose source revision/hash no longer matches
65
85
  `open-files`.
66
- 7. Apply permission filters.
67
- 8. Merge and dedupe by source revision, wiki page, citation, and text hash.
68
- 9. Rerank by relevance, exact-match score, semantic score, freshness, citation
86
+ 8. Apply permission filters.
87
+ 9. Merge and dedupe by source revision, wiki page, citation, and text hash.
88
+ 10. Rerank by relevance, exact-match score, semantic score, freshness, citation
69
89
  quality, and wiki authority.
70
- 10. Return structured results with source refs, citation spans, page refs,
90
+ 11. Return structured results with source refs, citation spans, page refs,
71
91
  scores, and reason codes.
72
92
 
73
93
  ## Result Shape
@@ -120,6 +140,9 @@ Reindexing is driven by source revisions:
120
140
  stale.
121
141
  - If a source is deleted or access changes, affected chunks must be hidden or
122
142
  removed before future retrieval.
143
+ - Local outbox consumption deletes stale `chunk_embeddings` and
144
+ `vector_index_entries` for deleted revisions, so semantic search cannot return
145
+ removed source chunks.
123
146
  - Wiki pages should track the source revisions they cite so lint can flag stale
124
147
  pages.
125
148
  - Embedding refresh jobs should be idempotent and checkpointed in `runs` and
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.13",
3
+ "version": "0.2.15",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -47,10 +47,14 @@ interface Flags {
47
47
  format?: string;
48
48
  completions?: string;
49
49
  purpose?: string;
50
+ model?: string;
51
+ dimensions?: number;
52
+ semantic?: boolean;
50
53
  noColor?: boolean;
51
54
  scope?: string;
52
55
  olderThan?: number;
53
56
  empty?: boolean;
57
+ fake?: boolean;
54
58
  archived?: boolean;
55
59
  includeArchived?: boolean;
56
60
  }
@@ -60,7 +64,7 @@ interface ParseResult {
60
64
  flags: Flags;
61
65
  }
62
66
 
63
- const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'providers', 'safety', 'help'];
67
+ const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'search', 'embeddings', 'providers', 'safety', 'help'];
64
68
  const COMMAND_ALIASES: Record<string, string> = {
65
69
  ls: 'list',
66
70
  rm: 'delete',
@@ -96,6 +100,10 @@ function parseArgs(argv: string[]): ParseResult {
96
100
  case '--format': flags.format = argv[i + 1]; i += 1; break;
97
101
  case '--completions': flags.completions = argv[i + 1]; i += 1; break;
98
102
  case '--purpose': flags.purpose = argv[i + 1]; i += 1; break;
103
+ case '--model': flags.model = argv[i + 1]; i += 1; break;
104
+ case '--dimensions': flags.dimensions = Number(argv[i + 1]); i += 1; break;
105
+ case '--semantic': flags.semantic = true; break;
106
+ case '--fake': flags.fake = true; break;
99
107
  case '--no-color': flags.noColor = true; break;
100
108
  case '--scope': flags.scope = argv[i + 1]; i += 1; break;
101
109
  case '--older-than': flags.olderThan = Number(argv[i + 1]); i += 1; break;
@@ -169,6 +177,8 @@ Commands:
169
177
  ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
170
178
  ingest source <source-ref> Ingest a read-only source ref into knowledge.db
171
179
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
180
+ search <query> Hybrid search sources, wiki pages, and indexes
181
+ embeddings status|index|search Build/query local vector embeddings
172
182
  providers status|models|check Inspect AI SDK provider config and credentials
173
183
  safety status|check|approve|audit|redact
174
184
  help [command] Show help
@@ -177,6 +187,10 @@ Global Options:
177
187
  --json Output JSON
178
188
  --store <path> Override store path
179
189
  --purpose <name> Read-only source purpose (default: knowledge_answer)
190
+ --model <provider:model> AI/embedding model ref
191
+ --dimensions <n> Embedding dimensions for local/fake providers
192
+ --semantic Include vector semantic results in search
193
+ --fake Use deterministic fake embeddings for local tests
180
194
  --scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
181
195
  --no-color Disable color output
182
196
  --completions <shell> Output completions for bash|zsh|fish
@@ -237,6 +251,8 @@ function printCommandHelp(command: string): void {
237
251
  if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
238
252
  if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
239
253
  if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
254
+ if (command === 'search') { console.log('Usage: open-knowledge search <query> [--semantic] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
255
+ if (command === 'embeddings') { console.log('Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
240
256
  if (command === 'providers') { console.log('Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]'); return; }
241
257
  if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
242
258
  printGlobalHelp();
@@ -283,11 +299,11 @@ async function run(argv: string[]): Promise<void> {
283
299
  if (flags.completions) {
284
300
  const shell = flags.completions;
285
301
  if (shell === 'bash') {
286
- console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
302
+ console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --semantic --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
287
303
  } else if (shell === 'zsh') {
288
- console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
304
+ console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--semantic)--semantic" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
289
305
  } else if (shell === 'fish') {
290
- console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
306
+ console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex search embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l semantic; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
291
307
  } else {
292
308
  throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
293
309
  }
@@ -534,6 +550,54 @@ async function run(argv: string[]): Promise<void> {
534
550
  return;
535
551
  }
536
552
 
553
+ if (command === 'embeddings') {
554
+ const action = positional[1] ?? 'status';
555
+ if (action === 'status') {
556
+ const result = service.embeddingStatus();
557
+ output({ ok: true, ...result, message: `${result.total_vector_entries} vector index entries` }, flags.json);
558
+ return;
559
+ }
560
+ if (action === 'index') {
561
+ const result = await service.indexEmbeddings({
562
+ limit: flags.limit,
563
+ modelRef: flags.model,
564
+ dimensions: flags.dimensions,
565
+ fake: flags.fake,
566
+ });
567
+ output({ ok: true, ...result, message: `Embedded ${result.chunks_embedded} chunk(s)` }, flags.json);
568
+ return;
569
+ }
570
+ if (action === 'search') {
571
+ const query = positional.slice(2).join(' ');
572
+ if (!query) throw new Error('Usage: open-knowledge embeddings search <query>');
573
+ const result = await service.semanticSearch({
574
+ query,
575
+ limit: flags.limit,
576
+ modelRef: flags.model,
577
+ dimensions: flags.dimensions,
578
+ fake: flags.fake,
579
+ });
580
+ output({ ok: true, ...result, message: `${result.results.length} semantic result(s)` }, flags.json);
581
+ return;
582
+ }
583
+ throw new Error("Invalid embeddings action. Use 'status', 'index', or 'search'.");
584
+ }
585
+
586
+ if (command === 'search') {
587
+ const query = positional.slice(1).join(' ');
588
+ if (!query) throw new Error('Usage: open-knowledge search <query>');
589
+ const result = await service.search({
590
+ query,
591
+ limit: flags.limit,
592
+ semantic: flags.semantic,
593
+ modelRef: flags.model,
594
+ dimensions: flags.dimensions,
595
+ fake: flags.fake,
596
+ });
597
+ output({ ok: true, ...result, message: `${result.results.length} search result(s)` }, flags.json);
598
+ return;
599
+ }
600
+
537
601
  if (command === 'providers') {
538
602
  const action = positional[1] ?? 'status';
539
603
  if (action === 'status') {