@hasna/knowledge 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -215,6 +215,15 @@ Local mode should start with SQLite FTS and a local vector-index option. Hosted
215
215
  mode can use Postgres with pgvector or a managed vector index. Permission
216
216
  filters must be applied before agent context is assembled.
217
217
 
218
+ The first local semantic-search implementation indexes derived chunks with
219
+ `open-knowledge embeddings index` and queries them with
220
+ `open-knowledge embeddings search`. It stores OpenAI embedding vectors as
221
+ generated metadata rows, not raw source bytes, and pins each row to
222
+ `open-files` provenance: source ref/URI, revision/hash, chunk offsets, token
223
+ count, provider, model, dimensions, status, and timestamps. This lets the local
224
+ SQLite index later move to pgvector or a managed hosted vector store without
225
+ changing the CLI/MCP result contract.
226
+
218
227
  ## Agent Workflow
219
228
 
220
229
  The target user flow is:
@@ -32,6 +32,9 @@ Local mode starts with SQLite:
32
32
  - `chunks_fts` provides keyword search.
33
33
  - `chunk_embeddings` stores embedding vectors as JSON until a local vector
34
34
  extension is chosen.
35
+ - `vector_index_entries` stores searchable embedding rows with provider/model,
36
+ dimensions, source revision/hash, chunk offsets, status, timestamps, and
37
+ provenance metadata.
35
38
  - `wiki_pages`, `wiki_backlinks`, and `citations` provide graph and provenance
36
39
  signals.
37
40
  - `knowledge_indexes` tracks generated machine-readable shards.
@@ -41,6 +44,17 @@ implementation. The retrieval interface should hide it so a later vector
41
44
  extension or pgvector backend can replace storage without changing CLI/MCP
42
45
  contracts.
43
46
 
47
+ The current local command surface is:
48
+
49
+ ```bash
50
+ open-knowledge embeddings index --scope project --model openai:text-embedding-3-small
51
+ open-knowledge embeddings search "company wiki policy" --scope project --json
52
+ ```
53
+
54
+ MCP exposes the same capability through `ok_embeddings_status`,
55
+ `ok_embeddings_index`, and `ok_semantic_search`. Deterministic `--fake`
56
+ embeddings exist for tests and offline verification only.
57
+
44
58
  ## Hosted Indexes
45
59
 
46
60
  Hosted mode may use:
@@ -120,6 +134,9 @@ Reindexing is driven by source revisions:
120
134
  stale.
121
135
  - If a source is deleted or access changes, affected chunks must be hidden or
122
136
  removed before future retrieval.
137
+ - Local outbox consumption deletes stale `chunk_embeddings` and
138
+ `vector_index_entries` for deleted revisions, so semantic search cannot return
139
+ removed source chunks.
123
140
  - Wiki pages should track the source revisions they cite so lint can flag stale
124
141
  pages.
125
142
  - Embedding refresh jobs should be idempotent and checkpointed in `runs` and
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/knowledge",
3
- "version": "0.2.13",
3
+ "version": "0.2.14",
4
4
  "description": "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -47,10 +47,13 @@ interface Flags {
47
47
  format?: string;
48
48
  completions?: string;
49
49
  purpose?: string;
50
+ model?: string;
51
+ dimensions?: number;
50
52
  noColor?: boolean;
51
53
  scope?: string;
52
54
  olderThan?: number;
53
55
  empty?: boolean;
56
+ fake?: boolean;
54
57
  archived?: boolean;
55
58
  includeArchived?: boolean;
56
59
  }
@@ -60,7 +63,7 @@ interface ParseResult {
60
63
  flags: Flags;
61
64
  }
62
65
 
63
- const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'providers', 'safety', 'help'];
66
+ const COMMANDS = ['add', 'list', 'get', 'delete', 'update', 'archive', 'restore', 'upsert', 'untag', 'export', 'prune', 'dedupe', 'stats', 'paths', 'storage', 'db', 'wiki', 'source', 'ingest', 'reindex', 'embeddings', 'providers', 'safety', 'help'];
64
67
  const COMMAND_ALIASES: Record<string, string> = {
65
68
  ls: 'list',
66
69
  rm: 'delete',
@@ -96,6 +99,9 @@ function parseArgs(argv: string[]): ParseResult {
96
99
  case '--format': flags.format = argv[i + 1]; i += 1; break;
97
100
  case '--completions': flags.completions = argv[i + 1]; i += 1; break;
98
101
  case '--purpose': flags.purpose = argv[i + 1]; i += 1; break;
102
+ case '--model': flags.model = argv[i + 1]; i += 1; break;
103
+ case '--dimensions': flags.dimensions = Number(argv[i + 1]); i += 1; break;
104
+ case '--fake': flags.fake = true; break;
99
105
  case '--no-color': flags.noColor = true; break;
100
106
  case '--scope': flags.scope = argv[i + 1]; i += 1; break;
101
107
  case '--older-than': flags.olderThan = Number(argv[i + 1]); i += 1; break;
@@ -169,6 +175,7 @@ Commands:
169
175
  ingest manifest <file|s3://> Ingest an open-files manifest into knowledge.db
170
176
  ingest source <source-ref> Ingest a read-only source ref into knowledge.db
171
177
  reindex outbox <file|s3://> Consume open-files change events and invalidate chunks
178
+ embeddings status|index|search Build/query local vector embeddings
172
179
  providers status|models|check Inspect AI SDK provider config and credentials
173
180
  safety status|check|approve|audit|redact
174
181
  help [command] Show help
@@ -177,6 +184,9 @@ Global Options:
177
184
  --json Output JSON
178
185
  --store <path> Override store path
179
186
  --purpose <name> Read-only source purpose (default: knowledge_answer)
187
+ --model <provider:model> AI/embedding model ref
188
+ --dimensions <n> Embedding dimensions for local/fake providers
189
+ --fake Use deterministic fake embeddings for local tests
180
190
  --scope local|global|project Store scope (default: global ~/.hasna/apps/knowledge/)
181
191
  --no-color Disable color output
182
192
  --completions <shell> Output completions for bash|zsh|fish
@@ -237,6 +247,7 @@ function printCommandHelp(command: string): void {
237
247
  if (command === 'source') { console.log('Usage: open-knowledge source resolve <source-ref> [--purpose knowledge_answer|knowledge_index] [--limit <n>] [--scope local|global|project] [--json]'); return; }
238
248
  if (command === 'ingest') { console.log('Usage: open-knowledge ingest manifest <file|s3://bucket/key> | source <source-ref> [--purpose knowledge_index] [--scope local|global|project] [--json]'); return; }
239
249
  if (command === 'reindex') { console.log('Usage: open-knowledge reindex outbox <file|s3://bucket/key> [--scope local|global|project] [--json]'); return; }
250
+ if (command === 'embeddings') { console.log('Usage: open-knowledge embeddings status|index|search [query] [--model openai:text-embedding-3-small] [--limit <n>] [--dimensions <n>] [--fake] [--scope local|global|project] [--json]'); return; }
240
251
  if (command === 'providers') { console.log('Usage: open-knowledge providers status|models|check [provider|model-alias] [--scope local|global|project] [--json]'); return; }
241
252
  if (command === 'safety') { console.log('Usage: open-knowledge safety status|check|approve|audit|redact [args] [--scope local|global|project] [--json]'); return; }
242
253
  printGlobalHelp();
@@ -283,11 +294,11 @@ async function run(argv: string[]): Promise<void> {
283
294
  if (flags.completions) {
284
295
  const shell = flags.completions;
285
296
  if (shell === 'bash') {
286
- console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
297
+ console.log(`_open_knowledge() { local cur; cur="${"$"}{COMP_WORDS[COMP_CWORD]}"; COMPREPLY=($(compgen -W "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive --json --yes --help --version --desc --page --limit --search --sort --id --store --title --content --url --tag --format --completions --purpose --model --dimensions --fake --no-color --scope --archived --include-archived" -- "$cur")); }; complete -F _open_knowledge open-knowledge`);
287
298
  } else if (shell === 'zsh') {
288
- console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
299
+ console.log(`#compdef open-knowledge\n_open_knowledge() { _arguments -C "1: :(add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive)" "(--json)--json" "(--yes)-y" "(--help)--help" "(--version)--version" "(--desc)--desc" "(--archived)--archived" "(--include-archived)--include-archived" "(--fake)--fake" "(-p --page)"{-p,--page}"[page number]:number:" "(-l --limit)"{-l,--limit}"[items per page]:number:" "(-s --search)"{-s,--search}"[search text]:text:" "(--sort)--sort"\{created,title\}:" "(--id)--id[item id]:id:" "(--store)--store[store path]:path:" "(--title)--title[new title]:" "(--content)--content[new content]:" "(--url)--url[source url]:" "(-t --tag)"{-t,--tag}"[tag]:tag:" "(--format)--format[json|jsonl]:" "(--completions)--completions[output completions]:shell:(bash zsh fish):" "(--purpose)--purpose[purpose]:" "(--model)--model[model ref]:" "(--dimensions)--dimensions[embedding dimensions]:number:" "(--no-color)--no-color[disable color]" "(--scope)--scope"\{local,global,project\}:" }; _open_knowledge`);
289
300
  } else if (shell === 'fish') {
290
- console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
301
+ console.log(`complete -c open-knowledge -f; complete -c open-knowledge -a "add list get update archive restore upsert untag delete export prune dedupe stats paths storage db wiki source ingest reindex embeddings providers safety help ls rm edit unarchive"; complete -c open-knowledge -l json; complete -c open-knowledge -l yes -s y; complete -c open-knowledge -l help -s h; complete -c open-knowledge -l version -s v; complete -c open-knowledge -l desc; complete -c open-knowledge -l archived; complete -c open-knowledge -l include-archived; complete -c open-knowledge -l fake; complete -c open-knowledge -s p -l page; complete -c open-knowledge -s l -l limit; complete -c open-knowledge -s s -l search; complete -c open-knowledge -l sort; complete -c open-knowledge -l id; complete -c open-knowledge -l store; complete -c open-knowledge -l title; complete -c open-knowledge -l content; complete -c open-knowledge -l url; complete -c open-knowledge -s t -l tag; complete -c open-knowledge -l format; complete -c open-knowledge -l completions; complete -c open-knowledge -l purpose; complete -c open-knowledge -l model; complete -c open-knowledge -l dimensions; complete -c open-knowledge -l no-color; complete -c open-knowledge -l scope -a "local global project"`);
291
302
  } else {
292
303
  throw new Error("Invalid --completions value. Use 'bash', 'zsh', or 'fish'.");
293
304
  }
@@ -534,6 +545,39 @@ async function run(argv: string[]): Promise<void> {
534
545
  return;
535
546
  }
536
547
 
548
+ if (command === 'embeddings') {
549
+ const action = positional[1] ?? 'status';
550
+ if (action === 'status') {
551
+ const result = service.embeddingStatus();
552
+ output({ ok: true, ...result, message: `${result.total_vector_entries} vector index entries` }, flags.json);
553
+ return;
554
+ }
555
+ if (action === 'index') {
556
+ const result = await service.indexEmbeddings({
557
+ limit: flags.limit,
558
+ modelRef: flags.model,
559
+ dimensions: flags.dimensions,
560
+ fake: flags.fake,
561
+ });
562
+ output({ ok: true, ...result, message: `Embedded ${result.chunks_embedded} chunk(s)` }, flags.json);
563
+ return;
564
+ }
565
+ if (action === 'search') {
566
+ const query = positional.slice(2).join(' ');
567
+ if (!query) throw new Error('Usage: open-knowledge embeddings search <query>');
568
+ const result = await service.semanticSearch({
569
+ query,
570
+ limit: flags.limit,
571
+ modelRef: flags.model,
572
+ dimensions: flags.dimensions,
573
+ fake: flags.fake,
574
+ });
575
+ output({ ok: true, ...result, message: `${result.results.length} semantic result(s)` }, flags.json);
576
+ return;
577
+ }
578
+ throw new Error("Invalid embeddings action. Use 'status', 'index', or 'search'.");
579
+ }
580
+
537
581
  if (command === 'providers') {
538
582
  const action = positional[1] ?? 'status';
539
583
  if (action === 'status') {