@gmickel/gno 0.36.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +199 -98
- package/assets/skill/SKILL.md +22 -0
- package/assets/skill/cli-reference.md +5 -1
- package/package.json +8 -1
- package/src/cli/commands/ask.ts +25 -7
- package/src/cli/commands/collection/add.ts +6 -0
- package/src/cli/commands/doctor.ts +17 -0
- package/src/cli/commands/embed.ts +2 -3
- package/src/cli/commands/query.ts +21 -6
- package/src/cli/commands/search.ts +3 -0
- package/src/cli/commands/vsearch.ts +10 -3
- package/src/cli/format/search-results.ts +58 -1
- package/src/cli/program.ts +40 -0
- package/src/collection/add.ts +1 -0
- package/src/collection/index.ts +2 -0
- package/src/collection/types.ts +13 -0
- package/src/collection/update.ts +93 -0
- package/src/config/types.ts +14 -0
- package/src/converters/mime.ts +9 -0
- package/src/ingestion/chunker.ts +186 -5
- package/src/ingestion/sync.ts +2 -1
- package/src/ingestion/types.ts +2 -1
- package/src/llm/registry.ts +75 -2
- package/src/mcp/tools/query.ts +17 -8
- package/src/mcp/tools/vsearch.ts +7 -3
- package/src/sdk/client.ts +34 -6
- package/src/sdk/embed.ts +7 -3
- package/src/sdk/types.ts +1 -0
- package/src/serve/public/components/CollectionModelDialog.tsx +397 -0
- package/src/serve/public/pages/Collections.tsx +96 -7
- package/src/serve/routes/api.ts +154 -17
- package/src/serve/server.ts +18 -1
- package/src/store/sqlite/adapter.ts +199 -25
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# GNO
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**Local search, retrieval, and synthesis for the files you actually work in.**
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/@gmickel/gno)
|
|
6
6
|
[](./LICENSE)
|
|
@@ -12,7 +12,57 @@
|
|
|
12
12
|
|
|
13
13
|

|
|
14
14
|
|
|
15
|
-
GNO is a local knowledge engine
|
|
15
|
+
GNO is a local knowledge engine for notes, code, PDFs, Office docs, meeting transcripts, and reference material. It gives you fast keyword search, semantic retrieval, grounded answers with citations, wiki-style linking, and a real workspace UI, while keeping the whole stack local by default.
|
|
16
|
+
|
|
17
|
+
Use it when:
|
|
18
|
+
|
|
19
|
+
- your notes live in more than one folder
|
|
20
|
+
- your important knowledge is split across Markdown, code, PDFs, and Office files
|
|
21
|
+
- you want one retrieval layer that works from the CLI, browser, MCP, and a Bun/TypeScript SDK
|
|
22
|
+
- you want better local context for agents without shipping your docs to a cloud API
|
|
23
|
+
|
|
24
|
+
### What GNO Gives You
|
|
25
|
+
|
|
26
|
+
- **Fast local search**: BM25 for exact hits, vectors for concepts, hybrid for best quality
|
|
27
|
+
- **Real retrieval surfaces**: CLI, Web UI, REST API, MCP, SDK
|
|
28
|
+
- **Local-first answers**: grounded synthesis with citations when you want answers, raw retrieval when you do not
|
|
29
|
+
- **Connected knowledge**: backlinks, related notes, graph view, cross-collection navigation
|
|
30
|
+
- **Operational fit**: daemon mode, model presets, remote GPU backends, safe config/state on disk
|
|
31
|
+
|
|
32
|
+
### One-Minute Tour
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# Install
|
|
36
|
+
bun install -g @gmickel/gno
|
|
37
|
+
|
|
38
|
+
# Add a few collections
|
|
39
|
+
gno init ~/notes --name notes
|
|
40
|
+
gno collection add ~/work/docs --name work-docs --pattern "**/*.{md,pdf,docx}"
|
|
41
|
+
gno collection add ~/work/gno/src --name gno-code --pattern "**/*.{ts,tsx,js,jsx}"
|
|
42
|
+
|
|
43
|
+
# Add context so retrieval results come back with the right framing
|
|
44
|
+
gno context add "notes:" "Personal notes, journal entries, and long-form ideas"
|
|
45
|
+
gno context add "work-docs:" "Architecture docs, runbooks, RFCs, meeting notes"
|
|
46
|
+
gno context add "gno-code:" "Source code for the GNO application"
|
|
47
|
+
|
|
48
|
+
# Index + embed
|
|
49
|
+
gno update --yes
|
|
50
|
+
gno embed
|
|
51
|
+
|
|
52
|
+
# Search in the way that fits the question
|
|
53
|
+
gno search "DEC-0054" # exact keyword / identifier
|
|
54
|
+
gno vsearch "retry failed jobs with backoff" # natural-language semantic lookup
|
|
55
|
+
gno query "JWT refresh token rotation" --explain # hybrid retrieval with score traces
|
|
56
|
+
|
|
57
|
+
# Retrieve documents or export context for an agent
|
|
58
|
+
gno get "gno://work-docs/architecture/auth.md"
|
|
59
|
+
gno multi-get "gno-code/**/*.ts" --max-bytes 30000 --md
|
|
60
|
+
gno query "deployment process" --all --files --min-score 0.35
|
|
61
|
+
|
|
62
|
+
# Run the workspace
|
|
63
|
+
gno serve
|
|
64
|
+
gno daemon
|
|
65
|
+
```
|
|
16
66
|
|
|
17
67
|
---
|
|
18
68
|
|
|
@@ -35,45 +85,15 @@ GNO is a local knowledge engine that turns your documents into a searchable, con
|
|
|
35
85
|
|
|
36
86
|
---
|
|
37
87
|
|
|
38
|
-
## What's New
|
|
39
|
-
|
|
40
|
-
- **GNO Desktop Beta**: first mac-first desktop beta shell with deep-link routing, singleton handoff, and the same onboarding/search/edit flows as `gno serve`
|
|
41
|
-
- **Desktop Onboarding Polish**: guided setup now covers folders, presets, model readiness, indexing, connectors, import preview, app tabs, file actions, and recovery without drift between web and desktop
|
|
42
|
-
- **Default Preset Upgrade**: `slim-tuned` is now the built-in default, using the fine-tuned retrieval expansion model while keeping the same embed, rerank, and answer stack as `slim`
|
|
43
|
-
- **Workspace UI Polish**: richer scholarly-dusk presentation across dashboard, tabs, search, ask, footer, and global styling without introducing external font or asset dependencies
|
|
44
|
-
|
|
45
|
-
## What's New in v0.30
|
|
46
|
-
|
|
47
|
-
- **Headless Daemon Mode**: `gno daemon` keeps your index fresh continuously without opening the Web UI
|
|
48
|
-
- **CLI Concurrency Hardening**: read-only commands no longer trip transient `database is locked` errors when they overlap with `gno update`
|
|
49
|
-
- **Web/Desktop UI Polish**: sharper workspace styling across dashboard, tabs, search, ask, and footer surfaces
|
|
50
|
-
|
|
51
|
-
## What's New in v0.31
|
|
52
|
-
|
|
53
|
-
- **Windows Desktop Beta Artifact**: release flow now includes a packaged `windows-x64` desktop beta zip, not just source-level support claims
|
|
54
|
-
- **Packaged Runtime Proof**: Windows desktop packaging validates bundled Bun + staged GNO runtime + FTS5 + vendored snowball + `sqlite-vec`
|
|
55
|
-
- **Scoped Index Fix**: `gno index <collection>` now embeds only that collection instead of accidentally burning through unrelated backlog from other collections
|
|
56
|
-
- **CLI Reporting Fix**: long embed runs now report sane durations instead of bogus sub-second summaries
|
|
88
|
+
## What's New
|
|
57
89
|
|
|
58
|
-
|
|
90
|
+
> Latest release: [v0.37.0](./CHANGELOG.md#0370---2026-04-06)
|
|
91
|
+
> Full release history: [CHANGELOG.md](./CHANGELOG.md)
|
|
59
92
|
|
|
60
|
-
- **
|
|
61
|
-
- **
|
|
62
|
-
- **
|
|
63
|
-
|
|
64
|
-
### v0.23
|
|
65
|
-
|
|
66
|
-
- **SDK / Library Mode**: package-root importable SDK with `createGnoClient(...)` for direct retrieval, document access, and indexing flows
|
|
67
|
-
- **Inline Config Support**: embed GNO in another app without writing YAML config files
|
|
68
|
-
- **Programmatic Indexing**: call `update`, `embed`, and `index` directly from Bun/TypeScript
|
|
69
|
-
- **Docs & Website**: dedicated SDK guide, feature page, homepage section, and architecture docs
|
|
70
|
-
|
|
71
|
-
### v0.22
|
|
72
|
-
|
|
73
|
-
- **Promoted Slim Retrieval Model**: published `slim-retrieval-v1` on Hugging Face for direct `hf:` installation in GNO
|
|
74
|
-
- **Fine-Tuning Workflow**: local MLX LoRA training, portable GGUF export, automatic checkpoint selection, promotion bundles, and repeatable benchmark comparisons
|
|
75
|
-
- **Autonomous Search Harness**: bounded candidate search with early-stop guards, repeated incumbent confirmation, and promotion targets
|
|
76
|
-
- **Public Docs & Site**: fine-tuned model docs and feature pages now point at the published HF model and the `slim-tuned` preset
|
|
93
|
+
- **Retrieval Quality Upgrade**: stronger BM25 lexical handling, code-aware chunking, terminal result hyperlinks, and per-collection model overrides
|
|
94
|
+
- **Code Embedding Benchmarks**: new benchmark workflow across canonical, real-GNO, and pinned OSS slices for comparing alternate embedding models
|
|
95
|
+
- **Recommended Code Embed Model**: docs and benchmark pages now point to `Qwen3-Embedding-0.6B-GGUF` as the current code-specialist embedding option
|
|
96
|
+
- **Regression Fixes**: tightened phrase/negation/hyphen/underscore BM25 behavior, cleaned non-TTY hyperlink output, improved `gno doctor` chunking visibility, and fixed the embedding autoresearch harness
|
|
77
97
|
|
|
78
98
|
### Fine-Tuned Model Quick Use
|
|
79
99
|
|
|
@@ -100,58 +120,6 @@ gno query "ECONNREFUSED 127.0.0.1:5432" --thorough
|
|
|
100
120
|
|
|
101
121
|
> Full guide: [Fine-Tuned Models](https://gno.sh/docs/FINE-TUNED-MODELS/) · [Feature page](https://gno.sh/features/fine-tuned-models/)
|
|
102
122
|
|
|
103
|
-
## What's New in v0.21
|
|
104
|
-
|
|
105
|
-
- **Ask CLI Query Modes**: `gno ask` now accepts repeatable `--query-mode term|intent|hyde` entries, matching the existing Ask API and Web controls
|
|
106
|
-
|
|
107
|
-
### v0.20
|
|
108
|
-
|
|
109
|
-
- **Improved Model Init Fallbacks**: upgraded `node-llama-cpp` to `3.17.1` and switched to `build: "autoAttempt"` for better backend selection/fallback behavior
|
|
110
|
-
|
|
111
|
-
### v0.19
|
|
112
|
-
|
|
113
|
-
- **Exclusion Filters**: explicit `exclude` controls across CLI, API, Web, and MCP to hard-prune unwanted docs by title/path/body text
|
|
114
|
-
- **Ask Query-Mode Parity**: Ask now supports structured `term` / `intent` / `hyde` controls in both API and Web UI
|
|
115
|
-
|
|
116
|
-
### v0.18
|
|
117
|
-
|
|
118
|
-
- **Intent Steering**: optional `intent` control for ambiguous queries across CLI, API, Web, and MCP query flows
|
|
119
|
-
- **Rerank Controls**: `candidateLimit` lets you tune rerank cost vs. recall on slower or memory-constrained machines
|
|
120
|
-
- **Stability**: query expansion now uses a bounded configurable context size (`models.expandContextSize`, default `2048`)
|
|
121
|
-
- **Rerank Efficiency**: identical chunk texts are deduplicated before scoring and expanded back out deterministically
|
|
122
|
-
|
|
123
|
-
### v0.17
|
|
124
|
-
|
|
125
|
-
- **Structured Query Modes**: `term`, `intent`, and `hyde` controls across CLI, API, MCP, and Web
|
|
126
|
-
- **Temporal Retrieval Upgrades**: `since`/`until`, date-range parsing, and recency sorting with frontmatter-date fallback
|
|
127
|
-
- **Web Retrieval UX Polish**: richer advanced controls in Search and Ask (collection/date/category/author/tags + query modes)
|
|
128
|
-
- **Metadata-Aware Retrieval**: ingestion now materializes document metadata/date fields for better filtering and ranking
|
|
129
|
-
- **Migration Reliability**: SQLite-compatible migration path for existing indexes (including older SQLite engines)
|
|
130
|
-
|
|
131
|
-
### v0.15
|
|
132
|
-
|
|
133
|
-
- **HTTP Backends**: Offload embedding, reranking, and generation to remote GPU servers
|
|
134
|
-
- Simple URI config: `http://host:port/path#modelname`
|
|
135
|
-
- Works with llama-server, Ollama, LocalAI, vLLM
|
|
136
|
-
- Run GNO on lightweight machines while GPU inference runs on your network
|
|
137
|
-
|
|
138
|
-
### v0.13
|
|
139
|
-
|
|
140
|
-
- **Knowledge Graph**: Interactive force-directed visualization of document connections
|
|
141
|
-
- **Graph with Similarity**: See semantic similarity as golden edges (not just wiki/markdown links)
|
|
142
|
-
- **CLI**: `gno graph` command with collection filtering and similarity options
|
|
143
|
-
- **Web UI**: `/graph` page with zoom, pan, collection filter, similarity toggle
|
|
144
|
-
- **MCP**: `gno_graph` tool for AI agents to explore document relationships
|
|
145
|
-
- **REST API**: `/api/graph` endpoint with full query parameters
|
|
146
|
-
|
|
147
|
-
### v0.12
|
|
148
|
-
|
|
149
|
-
- **Note Linking**: Wiki-style `[[links]]`, backlinks, and AI-powered related notes
|
|
150
|
-
- **Tag System**: Filter searches by frontmatter tags with `--tags-any`/`--tags-all`
|
|
151
|
-
- **Web UI**: Outgoing links panel, backlinks panel, related notes sidebar
|
|
152
|
-
- **CLI**: `gno links`, `gno backlinks`, `gno similar` commands
|
|
153
|
-
- **MCP**: `gno_links`, `gno_backlinks`, `gno_similar` tools
|
|
154
|
-
|
|
155
123
|
---
|
|
156
124
|
|
|
157
125
|
## Quick Start
|
|
@@ -265,6 +233,14 @@ headless. In v0.30 it is foreground-only and does not expose built-in
|
|
|
265
233
|
|
|
266
234
|
Embed GNO directly in another Bun or TypeScript app. No CLI subprocesses. No local server required.
|
|
267
235
|
|
|
236
|
+
Install:
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
bun add @gmickel/gno
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Minimal client:
|
|
243
|
+
|
|
268
244
|
```ts
|
|
269
245
|
import { createDefaultConfig, createGnoClient } from "@gmickel/gno";
|
|
270
246
|
|
|
@@ -295,6 +271,43 @@ console.log(results.results[0]?.uri);
|
|
|
295
271
|
await client.close();
|
|
296
272
|
```
|
|
297
273
|
|
|
274
|
+
More SDK examples:
|
|
275
|
+
|
|
276
|
+
```ts
|
|
277
|
+
import { createGnoClient } from "@gmickel/gno";
|
|
278
|
+
|
|
279
|
+
const client = await createGnoClient({
|
|
280
|
+
configPath: "/Users/me/.config/gno/index.yml",
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
// Fast exact search
|
|
284
|
+
const bm25 = await client.search("DEC-0054", {
|
|
285
|
+
collection: "work-docs",
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// Semantic code lookup
|
|
289
|
+
const semantic = await client.vsearch("retry failed jobs with backoff", {
|
|
290
|
+
collection: "gno-code",
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
// Hybrid retrieval with explicit intent
|
|
294
|
+
const hybrid = await client.query("token refresh", {
|
|
295
|
+
collection: "work-docs",
|
|
296
|
+
intent: "JWT refresh token rotation in our auth stack",
|
|
297
|
+
candidateLimit: 12,
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
// Fetch content directly
|
|
301
|
+
const doc = await client.get("gno://work-docs/auth/refresh.md");
|
|
302
|
+
const bundle = await client.multiGet(["gno-code/**/*.ts"], { maxBytes: 25000 });
|
|
303
|
+
|
|
304
|
+
// Indexing / embedding
|
|
305
|
+
await client.update({ collection: "work-docs" });
|
|
306
|
+
await client.embed({ collection: "gno-code" });
|
|
307
|
+
|
|
308
|
+
await client.close();
|
|
309
|
+
```
|
|
310
|
+
|
|
298
311
|
Core SDK surface:
|
|
299
312
|
|
|
300
313
|
- `createGnoClient({ config | configPath, dbPath? })`
|
|
@@ -303,12 +316,6 @@ Core SDK surface:
|
|
|
303
316
|
- `update`, `embed`, `index`
|
|
304
317
|
- `close`
|
|
305
318
|
|
|
306
|
-
Install in an app:
|
|
307
|
-
|
|
308
|
-
```bash
|
|
309
|
-
bun add @gmickel/gno
|
|
310
|
-
```
|
|
311
|
-
|
|
312
319
|
Full guide: [SDK docs](https://gno.sh/docs/SDK/)
|
|
313
320
|
|
|
314
321
|
---
|
|
@@ -338,6 +345,31 @@ gno ask "what did we decide" --answer # AI synthesis
|
|
|
338
345
|
|
|
339
346
|
Output formats: `--json`, `--files`, `--csv`, `--md`, `--xml`
|
|
340
347
|
|
|
348
|
+
### Common CLI Recipes
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
# Search one collection
|
|
352
|
+
gno search "PostgreSQL connection pool" --collection work-docs
|
|
353
|
+
|
|
354
|
+
# Export retrieval results for an agent
|
|
355
|
+
gno query "authentication flow" --json -n 10
|
|
356
|
+
gno query "deployment rollback" --all --files --min-score 0.4
|
|
357
|
+
|
|
358
|
+
# Retrieve a document by URI or docid
|
|
359
|
+
gno get "gno://work-docs/runbooks/deploy.md"
|
|
360
|
+
gno get "#abc123"
|
|
361
|
+
|
|
362
|
+
# Fetch many documents at once
|
|
363
|
+
gno multi-get "work-docs/**/*.md" --max-bytes 20000 --md
|
|
364
|
+
|
|
365
|
+
# Inspect how the hybrid rank was assembled
|
|
366
|
+
gno query "refresh token rotation" --explain
|
|
367
|
+
|
|
368
|
+
# Work with filters
|
|
369
|
+
gno query "meeting notes" --since "last month" --category "meeting,notes"
|
|
370
|
+
gno search "incident review" --tags-all "status/active,team/platform"
|
|
371
|
+
```
|
|
372
|
+
|
|
341
373
|
### Retrieval V2 Controls
|
|
342
374
|
|
|
343
375
|
Existing query calls still work. Retrieval v2 adds optional structured intent control and deeper explain output.
|
|
@@ -382,6 +414,20 @@ gno skill install --scope user
|
|
|
382
414
|
|
|
383
415
|
Then ask your agent: _"Search my notes for the auth discussion"_
|
|
384
416
|
|
|
417
|
+
Agent-friendly CLI examples:
|
|
418
|
+
|
|
419
|
+
```bash
|
|
420
|
+
# Structured retrieval output for an agent
|
|
421
|
+
gno query "authentication" --json -n 10
|
|
422
|
+
|
|
423
|
+
# File list for downstream retrieval
|
|
424
|
+
gno query "error handling" --all --files --min-score 0.35
|
|
425
|
+
|
|
426
|
+
# Full document content when the agent already knows the ref
|
|
427
|
+
gno get "gno://work-docs/api-reference.md" --full
|
|
428
|
+
gno multi-get "work-docs/**/*.md" --md --max-bytes 30000
|
|
429
|
+
```
|
|
430
|
+
|
|
385
431
|
[Skill setup guide →](https://gno.sh/docs/integrations/skills/)
|
|
386
432
|
|
|
387
433
|
### MCP Server
|
|
@@ -655,7 +701,7 @@ See:
|
|
|
655
701
|
Offload inference to a GPU server on your network:
|
|
656
702
|
|
|
657
703
|
```yaml
|
|
658
|
-
# ~/.config/gno/
|
|
704
|
+
# ~/.config/gno/index.yml
|
|
659
705
|
models:
|
|
660
706
|
activePreset: remote-gpu
|
|
661
707
|
presets:
|
|
@@ -715,6 +761,61 @@ bun run eval:hybrid:delta
|
|
|
715
761
|
- Benchmark guide: [evals/README.md](./evals/README.md)
|
|
716
762
|
- Latest baseline snapshot: [evals/fixtures/hybrid-baseline/latest.json](./evals/fixtures/hybrid-baseline/latest.json)
|
|
717
763
|
|
|
764
|
+
### Code Embedding Benchmark Harness
|
|
765
|
+
|
|
766
|
+
GNO also has a dedicated harness for comparing alternate embedding models on code retrieval without touching product defaults:
|
|
767
|
+
|
|
768
|
+
```bash
|
|
769
|
+
# Establish the current incumbent baseline
|
|
770
|
+
bun run bench:code-embeddings --candidate bge-m3-incumbent --write
|
|
771
|
+
|
|
772
|
+
# Add candidate model URIs to the search space, then inspect them
|
|
773
|
+
bun run research:embeddings:autonomous:list-search-candidates
|
|
774
|
+
|
|
775
|
+
# Benchmark one candidate explicitly
|
|
776
|
+
bun run research:embeddings:autonomous:run-candidate bge-m3-incumbent
|
|
777
|
+
|
|
778
|
+
# Or let the bounded search harness walk the remaining candidates later
|
|
779
|
+
bun run research:embeddings:autonomous:search --dry-run
|
|
780
|
+
```
|
|
781
|
+
|
|
782
|
+
See [research/embeddings/README.md](./research/embeddings/README.md).
|
|
783
|
+
|
|
784
|
+
If a model turns out to be better specifically for code, the intended user story is:
|
|
785
|
+
|
|
786
|
+
- keep the default global preset for mixed prose/docs collections
|
|
787
|
+
- use per-collection `models.embed` overrides for code collections
|
|
788
|
+
|
|
789
|
+
That lets GNO stay sane by default while still giving power users a clean path to code-specialist retrieval.
|
|
790
|
+
|
|
791
|
+
Current code-focused recommendation:
|
|
792
|
+
|
|
793
|
+
```yaml
|
|
794
|
+
collections:
|
|
795
|
+
- name: gno-code
|
|
796
|
+
path: /Users/you/work/gno/src
|
|
797
|
+
pattern: "**/*.{ts,tsx,js,jsx,go,rs,py,swift,c}"
|
|
798
|
+
models:
|
|
799
|
+
embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"
|
|
800
|
+
```
|
|
801
|
+
|
|
802
|
+
GNO treats that override like any other model URI:
|
|
803
|
+
|
|
804
|
+
- auto-downloads on first use by default
|
|
805
|
+
- manual-only if `GNO_NO_AUTO_DOWNLOAD=1`
|
|
806
|
+
- offline-safe if the model is already cached
|
|
807
|
+
|
|
808
|
+
Why this is the current recommendation:
|
|
809
|
+
|
|
810
|
+
- matches `bge-m3` on the tiny canonical benchmark
|
|
811
|
+
- significantly beats `bge-m3` on the real GNO `src/serve` code slice
|
|
812
|
+
- also beats `bge-m3` on a pinned public-OSS code slice
|
|
813
|
+
|
|
814
|
+
Trade-off:
|
|
815
|
+
|
|
816
|
+
- Qwen is slower to embed than `bge-m3`
|
|
817
|
+
- use it where code retrieval quality matters, not necessarily as the global default for every collection
|
|
818
|
+
|
|
718
819
|
---
|
|
719
820
|
|
|
720
821
|
## License
|
package/assets/skill/SKILL.md
CHANGED
|
@@ -177,6 +177,28 @@ gno embed # Embed only (if already synced)
|
|
|
177
177
|
|
|
178
178
|
MCP `gno.sync` and `gno.capture` do NOT auto-embed. Use CLI for embedding.
|
|
179
179
|
|
|
180
|
+
## Collection-specific embedding models
|
|
181
|
+
|
|
182
|
+
Collections can override the global embedding model with `models.embed`.
|
|
183
|
+
|
|
184
|
+
CLI path:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
gno collection add ~/work/gno/src \
|
|
188
|
+
--name gno-code \
|
|
189
|
+
--embed-model "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Good default guidance:
|
|
193
|
+
|
|
194
|
+
- keep the global preset for mixed notes/docs collections
|
|
195
|
+
- use a collection-specific embed override for code-heavy collections when benchmark guidance says so
|
|
196
|
+
- after changing an embed model on an existing populated collection, run:
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
gno embed --collection gno-code
|
|
200
|
+
```
|
|
201
|
+
|
|
180
202
|
## Reference Documentation
|
|
181
203
|
|
|
182
204
|
| Topic | File |
|
|
@@ -42,7 +42,11 @@ gno init [<path>] [options]
|
|
|
42
42
|
gno collection add <path> --name <name> [options]
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
Options same as `init
|
|
45
|
+
Options same as `init`, plus:
|
|
46
|
+
|
|
47
|
+
| Option | Description |
|
|
48
|
+
| --------------------- | ---------------------------------------------------- |
|
|
49
|
+
| `--embed-model <uri>` | Initial collection-specific embedding model override |
|
|
46
50
|
|
|
47
51
|
### gno collection list
|
|
48
52
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmickel/gno",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.38.0",
|
|
4
4
|
"description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"embeddings",
|
|
@@ -69,6 +69,8 @@
|
|
|
69
69
|
"eval:hybrid": "bun --bun evalite evals/hybrid.eval.ts",
|
|
70
70
|
"eval:hybrid:baseline": "bun scripts/hybrid-benchmark.ts --write",
|
|
71
71
|
"eval:hybrid:delta": "bun scripts/hybrid-benchmark.ts --delta",
|
|
72
|
+
"bench:code-embeddings": "bun scripts/code-embedding-benchmark.ts",
|
|
73
|
+
"bench:code-embeddings:write": "bun scripts/code-embedding-benchmark.ts --write",
|
|
72
74
|
"eval:retrieval-candidates": "bun scripts/retrieval-candidate-benchmark.ts",
|
|
73
75
|
"eval:retrieval-candidates:write": "bun scripts/retrieval-candidate-benchmark.ts --write",
|
|
74
76
|
"eval:watch": "bun --bun evalite watch",
|
|
@@ -83,6 +85,11 @@
|
|
|
83
85
|
"research:finetune:autonomous:confirm-winner": "bun research/finetune/autonomous/scripts/confirm-winner.ts",
|
|
84
86
|
"research:finetune:autonomous:check-promotion-targets": "bun research/finetune/autonomous/scripts/check-promotion-targets.ts",
|
|
85
87
|
"research:finetune:validate": "bun research/finetune/scripts/validate-sandbox.ts",
|
|
88
|
+
"research:embeddings:autonomous:list-search-candidates": "bun research/embeddings/autonomous/scripts/list-search-candidates.ts",
|
|
89
|
+
"research:embeddings:autonomous:run-candidate": "bun research/embeddings/autonomous/scripts/run-candidate.ts",
|
|
90
|
+
"research:embeddings:autonomous:leaderboard": "bun research/embeddings/autonomous/scripts/leaderboard.ts",
|
|
91
|
+
"research:embeddings:autonomous:confirm-winner": "bun research/embeddings/autonomous/scripts/confirm-winner.ts",
|
|
92
|
+
"research:embeddings:autonomous:search": "bun research/embeddings/autonomous/scripts/search.ts",
|
|
86
93
|
"research:finetune:qmd-import:legacy": "bun research/finetune/scripts/import-qmd-training.ts",
|
|
87
94
|
"research:finetune:mlx:build-dataset": "bun research/finetune/scripts/build-mlx-dataset.ts",
|
|
88
95
|
"research:finetune:build-variant-dataset": "bun research/finetune/scripts/build-variant-dataset.ts",
|
package/src/cli/commands/ask.ts
CHANGED
|
@@ -14,7 +14,7 @@ import type { AskOptions, AskResult, Citation } from "../../pipeline/types";
|
|
|
14
14
|
|
|
15
15
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
16
16
|
import { resolveDownloadPolicy } from "../../llm/policy";
|
|
17
|
-
import {
|
|
17
|
+
import { resolveModelUri } from "../../llm/registry";
|
|
18
18
|
import {
|
|
19
19
|
generateGroundedAnswer,
|
|
20
20
|
processAnswerResult,
|
|
@@ -90,7 +90,6 @@ export async function ask(
|
|
|
90
90
|
let rerankPort: RerankPort | null = null;
|
|
91
91
|
|
|
92
92
|
try {
|
|
93
|
-
const preset = getActivePreset(config);
|
|
94
93
|
const llm = new LlmAdapter(config);
|
|
95
94
|
|
|
96
95
|
// Resolve download policy from env/flags
|
|
@@ -106,7 +105,12 @@ export async function ask(
|
|
|
106
105
|
: undefined;
|
|
107
106
|
|
|
108
107
|
// Create embedding port
|
|
109
|
-
const embedUri =
|
|
108
|
+
const embedUri = resolveModelUri(
|
|
109
|
+
config,
|
|
110
|
+
"embed",
|
|
111
|
+
options.embedModel,
|
|
112
|
+
options.collection
|
|
113
|
+
);
|
|
110
114
|
const embedResult = await llm.createEmbeddingPort(embedUri, {
|
|
111
115
|
policy,
|
|
112
116
|
onProgress: downloadProgress
|
|
@@ -119,8 +123,12 @@ export async function ask(
|
|
|
119
123
|
|
|
120
124
|
// Create expansion port when expansion is enabled.
|
|
121
125
|
if (!options.noExpand && !options.queryModes?.length) {
|
|
122
|
-
const expandUri =
|
|
123
|
-
|
|
126
|
+
const expandUri = resolveModelUri(
|
|
127
|
+
config,
|
|
128
|
+
"expand",
|
|
129
|
+
options.expandModel ?? options.genModel,
|
|
130
|
+
options.collection
|
|
131
|
+
);
|
|
124
132
|
const genResult = await llm.createExpansionPort(expandUri, {
|
|
125
133
|
policy,
|
|
126
134
|
onProgress: downloadProgress
|
|
@@ -134,7 +142,12 @@ export async function ask(
|
|
|
134
142
|
|
|
135
143
|
// Create answer generation port when answers are requested.
|
|
136
144
|
if (options.answer) {
|
|
137
|
-
const genUri =
|
|
145
|
+
const genUri = resolveModelUri(
|
|
146
|
+
config,
|
|
147
|
+
"gen",
|
|
148
|
+
options.genModel,
|
|
149
|
+
options.collection
|
|
150
|
+
);
|
|
138
151
|
const genResult = await llm.createGenerationPort(genUri, {
|
|
139
152
|
policy,
|
|
140
153
|
onProgress: downloadProgress
|
|
@@ -148,7 +161,12 @@ export async function ask(
|
|
|
148
161
|
|
|
149
162
|
// Create rerank port (unless --fast or --no-rerank)
|
|
150
163
|
if (!options.noRerank) {
|
|
151
|
-
const rerankUri =
|
|
164
|
+
const rerankUri = resolveModelUri(
|
|
165
|
+
config,
|
|
166
|
+
"rerank",
|
|
167
|
+
options.rerankModel,
|
|
168
|
+
options.collection
|
|
169
|
+
);
|
|
152
170
|
const rerankResult = await llm.createRerankPort(rerankUri, {
|
|
153
171
|
policy,
|
|
154
172
|
onProgress: downloadProgress
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
import { CliError } from "../../errors";
|
|
13
13
|
|
|
14
14
|
interface AddOptions {
|
|
15
|
+
embedModel?: string;
|
|
15
16
|
name?: string;
|
|
16
17
|
pattern?: string;
|
|
17
18
|
include?: string;
|
|
@@ -51,6 +52,11 @@ export async function collectionAdd(
|
|
|
51
52
|
pattern: options.pattern,
|
|
52
53
|
include: options.include,
|
|
53
54
|
exclude: options.exclude,
|
|
55
|
+
models: options.embedModel
|
|
56
|
+
? {
|
|
57
|
+
embed: options.embedModel,
|
|
58
|
+
}
|
|
59
|
+
: undefined,
|
|
54
60
|
updateCmd: options.update,
|
|
55
61
|
});
|
|
56
62
|
|
|
@@ -14,6 +14,7 @@ import type { Config } from "../../config/types";
|
|
|
14
14
|
|
|
15
15
|
import { getIndexDbPath, getModelsCachePath } from "../../app/constants";
|
|
16
16
|
import { getConfigPaths, isInitialized, loadConfig } from "../../config";
|
|
17
|
+
import { getCodeChunkingStatus } from "../../ingestion/chunker";
|
|
17
18
|
import { ModelCache } from "../../llm/cache";
|
|
18
19
|
import { getActivePreset } from "../../llm/registry";
|
|
19
20
|
import { loadFts5Snowball } from "../../store/sqlite/fts5-snowball";
|
|
@@ -122,6 +123,19 @@ async function checkModels(config: Config): Promise<DoctorCheck[]> {
|
|
|
122
123
|
return checks;
|
|
123
124
|
}
|
|
124
125
|
|
|
126
|
+
function checkCodeChunking(): DoctorCheck {
|
|
127
|
+
const status = getCodeChunkingStatus();
|
|
128
|
+
return {
|
|
129
|
+
name: "code-chunking",
|
|
130
|
+
status: "ok",
|
|
131
|
+
message: `${status.mode} structural chunking for ${status.supportedExtensions.join(", ")}`,
|
|
132
|
+
details: [
|
|
133
|
+
"Unsupported extensions fall back to the default markdown chunker.",
|
|
134
|
+
"Chunking mode is automatic-only in the first pass.",
|
|
135
|
+
],
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
125
139
|
async function checkNodeLlamaCpp(): Promise<DoctorCheck> {
|
|
126
140
|
try {
|
|
127
141
|
const { getLlama } = await import("node-llama-cpp");
|
|
@@ -319,6 +333,9 @@ export async function doctor(
|
|
|
319
333
|
const sqliteChecks = await checkSqliteExtensions();
|
|
320
334
|
checks.push(...sqliteChecks);
|
|
321
335
|
|
|
336
|
+
// Code chunking capability
|
|
337
|
+
checks.push(checkCodeChunking());
|
|
338
|
+
|
|
322
339
|
// Determine overall health
|
|
323
340
|
const hasErrors = checks.some((c) => c.status === "error");
|
|
324
341
|
|
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
} from "../../config";
|
|
20
20
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
21
21
|
import { resolveDownloadPolicy } from "../../llm/policy";
|
|
22
|
-
import {
|
|
22
|
+
import { resolveModelUri } from "../../llm/registry";
|
|
23
23
|
import { formatDocForEmbedding } from "../../pipeline/contextual";
|
|
24
24
|
import { SqliteAdapter } from "../../store/sqlite/adapter";
|
|
25
25
|
import { err, ok } from "../../store/types";
|
|
@@ -271,8 +271,7 @@ async function initEmbedContext(
|
|
|
271
271
|
return { ok: false, error: `Collection not found: ${collection}` };
|
|
272
272
|
}
|
|
273
273
|
|
|
274
|
-
const
|
|
275
|
-
const modelUri = model ?? preset.embed;
|
|
274
|
+
const modelUri = resolveModelUri(config, "embed", model, collection);
|
|
276
275
|
|
|
277
276
|
const store = new SqliteAdapter();
|
|
278
277
|
const dbPath = getIndexDbPath();
|
|
@@ -14,7 +14,7 @@ import type { HybridSearchOptions, SearchResults } from "../../pipeline/types";
|
|
|
14
14
|
|
|
15
15
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
16
16
|
import { resolveDownloadPolicy } from "../../llm/policy";
|
|
17
|
-
import {
|
|
17
|
+
import { resolveModelUri } from "../../llm/registry";
|
|
18
18
|
import { type HybridSearchDeps, searchHybrid } from "../../pipeline/hybrid";
|
|
19
19
|
import {
|
|
20
20
|
createVectorIndexPort,
|
|
@@ -58,6 +58,7 @@ export interface QueryFormatOptions {
|
|
|
58
58
|
format: "terminal" | "json" | "files" | "csv" | "md" | "xml";
|
|
59
59
|
full?: boolean;
|
|
60
60
|
lineNumbers?: boolean;
|
|
61
|
+
terminalLinks?: import("../format/search-results").FormatOptions["terminalLinks"];
|
|
61
62
|
}
|
|
62
63
|
|
|
63
64
|
export type QueryResult =
|
|
@@ -97,7 +98,6 @@ export async function query(
|
|
|
97
98
|
let rerankPort: RerankPort | null = null;
|
|
98
99
|
|
|
99
100
|
try {
|
|
100
|
-
const preset = getActivePreset(config);
|
|
101
101
|
const llm = new LlmAdapter(config);
|
|
102
102
|
|
|
103
103
|
// Resolve download policy from env/flags
|
|
@@ -113,7 +113,12 @@ export async function query(
|
|
|
113
113
|
: undefined;
|
|
114
114
|
|
|
115
115
|
// Create embedding port (for vector search)
|
|
116
|
-
const embedUri =
|
|
116
|
+
const embedUri = resolveModelUri(
|
|
117
|
+
config,
|
|
118
|
+
"embed",
|
|
119
|
+
options.embedModel,
|
|
120
|
+
options.collection
|
|
121
|
+
);
|
|
117
122
|
const embedResult = await llm.createEmbeddingPort(embedUri, {
|
|
118
123
|
policy,
|
|
119
124
|
onProgress: downloadProgress
|
|
@@ -127,8 +132,12 @@ export async function query(
|
|
|
127
132
|
// Create expansion port - optional.
|
|
128
133
|
// Skip when structured query modes are provided.
|
|
129
134
|
if (!options.noExpand && !options.queryModes?.length) {
|
|
130
|
-
const expandUri =
|
|
131
|
-
|
|
135
|
+
const expandUri = resolveModelUri(
|
|
136
|
+
config,
|
|
137
|
+
"expand",
|
|
138
|
+
options.expandModel ?? options.genModel,
|
|
139
|
+
options.collection
|
|
140
|
+
);
|
|
132
141
|
const genResult = await llm.createExpansionPort(expandUri, {
|
|
133
142
|
policy,
|
|
134
143
|
onProgress: downloadProgress
|
|
@@ -142,7 +151,12 @@ export async function query(
|
|
|
142
151
|
|
|
143
152
|
// Create rerank port - optional
|
|
144
153
|
if (!options.noRerank) {
|
|
145
|
-
const rerankUri =
|
|
154
|
+
const rerankUri = resolveModelUri(
|
|
155
|
+
config,
|
|
156
|
+
"rerank",
|
|
157
|
+
options.rerankModel,
|
|
158
|
+
options.collection
|
|
159
|
+
);
|
|
146
160
|
const rerankResult = await llm.createRerankPort(rerankUri, {
|
|
147
161
|
policy,
|
|
148
162
|
onProgress: downloadProgress
|
|
@@ -260,5 +274,6 @@ export function formatQuery(
|
|
|
260
274
|
format: options.format,
|
|
261
275
|
full: options.full,
|
|
262
276
|
lineNumbers: options.lineNumbers,
|
|
277
|
+
terminalLinks: options.terminalLinks,
|
|
263
278
|
});
|
|
264
279
|
}
|