@gmickel/gno 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -85,45 +85,15 @@ gno daemon
85
85
 
86
86
  ---
87
87
 
88
- ## What's New in v0.29
88
+ ## What's New
89
89
 
90
- - **GNO Desktop Beta**: first mac-first desktop beta shell with deep-link routing, singleton handoff, and the same onboarding/search/edit flows as `gno serve`
91
- - **Desktop Onboarding Polish**: guided setup now covers folders, presets, model readiness, indexing, connectors, import preview, app tabs, file actions, and recovery without drift between web and desktop
92
- - **Default Preset Upgrade**: `slim-tuned` is now the built-in default, using the fine-tuned retrieval expansion model while keeping the same embed, rerank, and answer stack as `slim`
93
- - **Workspace UI Polish**: richer scholarly-dusk presentation across dashboard, tabs, search, ask, footer, and global styling without introducing external font or asset dependencies
90
+ > Latest release: [v0.37.0](./CHANGELOG.md#0370---2026-04-06)
91
+ > Full release history: [CHANGELOG.md](./CHANGELOG.md)
94
92
 
95
- ## What's New in v0.30
96
-
97
- - **Headless Daemon Mode**: `gno daemon` keeps your index fresh continuously without opening the Web UI
98
- - **CLI Concurrency Hardening**: read-only commands no longer trip transient `database is locked` errors when they overlap with `gno update`
99
- - **Web/Desktop UI Polish**: sharper workspace styling across dashboard, tabs, search, ask, and footer surfaces
100
-
101
- ## What's New in v0.31
102
-
103
- - **Windows Desktop Beta Artifact**: release flow now includes a packaged `windows-x64` desktop beta zip, not just source-level support claims
104
- - **Packaged Runtime Proof**: Windows desktop packaging validates bundled Bun + staged GNO runtime + FTS5 + vendored snowball + `sqlite-vec`
105
- - **Scoped Index Fix**: `gno index <collection>` now embeds only that collection instead of accidentally burning through unrelated backlog from other collections
106
- - **CLI Reporting Fix**: long embed runs now report sane durations instead of bogus sub-second summaries
107
-
108
- ### v0.24
109
-
110
- - **Structured Query Documents**: first-class multi-line query syntax using `term:`, `intent:`, and `hyde:`
111
- - **Cross-Surface Rollout**: works across CLI, API, MCP, SDK, and Web Search/Ask
112
- - **Portable Retrieval Prompts**: save/share advanced retrieval intent as one text payload instead of repeated flags or JSON arrays
113
-
114
- ### v0.23
115
-
116
- - **SDK / Library Mode**: package-root importable SDK with `createGnoClient(...)` for direct retrieval, document access, and indexing flows
117
- - **Inline Config Support**: embed GNO in another app without writing YAML config files
118
- - **Programmatic Indexing**: call `update`, `embed`, and `index` directly from Bun/TypeScript
119
- - **Docs & Website**: dedicated SDK guide, feature page, homepage section, and architecture docs
120
-
121
- ### v0.22
122
-
123
- - **Promoted Slim Retrieval Model**: published `slim-retrieval-v1` on Hugging Face for direct `hf:` installation in GNO
124
- - **Fine-Tuning Workflow**: local MLX LoRA training, portable GGUF export, automatic checkpoint selection, promotion bundles, and repeatable benchmark comparisons
125
- - **Autonomous Search Harness**: bounded candidate search with early-stop guards, repeated incumbent confirmation, and promotion targets
126
- - **Public Docs & Site**: fine-tuned model docs and feature pages now point at the published HF model and the `slim-tuned` preset
93
+ - **Retrieval Quality Upgrade**: stronger BM25 lexical handling, code-aware chunking, terminal result hyperlinks, and per-collection model overrides
94
+ - **Code Embedding Benchmarks**: new benchmark workflow across canonical, real-GNO, and pinned OSS slices for comparing alternate embedding models
95
+ - **Default Embed Model**: built-in presets now use `Qwen3-Embedding-0.6B-GGUF` after it beat `bge-m3` on both code and multilingual prose benchmark lanes
96
+ - **Regression Fixes**: tightened phrase/negation/hyphen/underscore BM25 behavior, cleaned non-TTY hyperlink output, improved `gno doctor` chunking visibility, and fixed the embedding autoresearch harness
127
97
 
128
98
  ### Fine-Tuned Model Quick Use
129
99
 
@@ -133,7 +103,7 @@ models:
133
103
  presets:
134
104
  - id: slim-tuned
135
105
  name: GNO Slim Tuned
136
- embed: hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf
106
+ embed: hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf
137
107
  rerank: hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf
138
108
  expand: hf:guiltylemon/gno-expansion-slim-retrieval-v1/gno-expansion-auto-entity-lock-default-mix-lr95-f16.gguf
139
109
  gen: hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf
@@ -150,58 +120,6 @@ gno query "ECONNREFUSED 127.0.0.1:5432" --thorough
150
120
 
151
121
  > Full guide: [Fine-Tuned Models](https://gno.sh/docs/FINE-TUNED-MODELS/) · [Feature page](https://gno.sh/features/fine-tuned-models/)
152
122
 
153
- ## What's New in v0.21
154
-
155
- - **Ask CLI Query Modes**: `gno ask` now accepts repeatable `--query-mode term|intent|hyde` entries, matching the existing Ask API and Web controls
156
-
157
- ### v0.20
158
-
159
- - **Improved Model Init Fallbacks**: upgraded `node-llama-cpp` to `3.17.1` and switched to `build: "autoAttempt"` for better backend selection/fallback behavior
160
-
161
- ### v0.19
162
-
163
- - **Exclusion Filters**: explicit `exclude` controls across CLI, API, Web, and MCP to hard-prune unwanted docs by title/path/body text
164
- - **Ask Query-Mode Parity**: Ask now supports structured `term` / `intent` / `hyde` controls in both API and Web UI
165
-
166
- ### v0.18
167
-
168
- - **Intent Steering**: optional `intent` control for ambiguous queries across CLI, API, Web, and MCP query flows
169
- - **Rerank Controls**: `candidateLimit` lets you tune rerank cost vs. recall on slower or memory-constrained machines
170
- - **Stability**: query expansion now uses a bounded configurable context size (`models.expandContextSize`, default `2048`)
171
- - **Rerank Efficiency**: identical chunk texts are deduplicated before scoring and expanded back out deterministically
172
-
173
- ### v0.17
174
-
175
- - **Structured Query Modes**: `term`, `intent`, and `hyde` controls across CLI, API, MCP, and Web
176
- - **Temporal Retrieval Upgrades**: `since`/`until`, date-range parsing, and recency sorting with frontmatter-date fallback
177
- - **Web Retrieval UX Polish**: richer advanced controls in Search and Ask (collection/date/category/author/tags + query modes)
178
- - **Metadata-Aware Retrieval**: ingestion now materializes document metadata/date fields for better filtering and ranking
179
- - **Migration Reliability**: SQLite-compatible migration path for existing indexes (including older SQLite engines)
180
-
181
- ### v0.15
182
-
183
- - **HTTP Backends**: Offload embedding, reranking, and generation to remote GPU servers
184
- - Simple URI config: `http://host:port/path#modelname`
185
- - Works with llama-server, Ollama, LocalAI, vLLM
186
- - Run GNO on lightweight machines while GPU inference runs on your network
187
-
188
- ### v0.13
189
-
190
- - **Knowledge Graph**: Interactive force-directed visualization of document connections
191
- - **Graph with Similarity**: See semantic similarity as golden edges (not just wiki/markdown links)
192
- - **CLI**: `gno graph` command with collection filtering and similarity options
193
- - **Web UI**: `/graph` page with zoom, pan, collection filter, similarity toggle
194
- - **MCP**: `gno_graph` tool for AI agents to explore document relationships
195
- - **REST API**: `/api/graph` endpoint with full query parameters
196
-
197
- ### v0.12
198
-
199
- - **Note Linking**: Wiki-style `[[links]]`, backlinks, and AI-powered related notes
200
- - **Tag System**: Filter searches by frontmatter tags with `--tags-any`/`--tags-all`
201
- - **Web UI**: Outgoing links panel, backlinks panel, related notes sidebar
202
- - **CLI**: `gno links`, `gno backlinks`, `gno similar` commands
203
- - **MCP**: `gno_links`, `gno_backlinks`, `gno_similar` tools
204
-
205
123
  ---
206
124
 
207
125
  ## Quick Start
@@ -741,11 +659,11 @@ graph TD
741
659
 
742
660
  Models auto-download on first use to `~/.cache/gno/models/`. For deterministic startup, set `GNO_NO_AUTO_DOWNLOAD=1` and use `gno models pull` explicitly. Alternatively, offload to a GPU server on your network using HTTP backends.
743
661
 
744
- | Model | Purpose | Size |
745
- | :------------------ | :------------------------------------ | :----------- |
746
- | bge-m3 | Embeddings (1024-dim, multilingual) | ~500MB |
747
- | Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
748
- | Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
662
+ | Model | Purpose | Size |
663
+ | :------------------- | :------------------------------------ | :----------- |
664
+ | Qwen3-Embedding-0.6B | Embeddings (multilingual) | ~640MB |
665
+ | Qwen3-Reranker-0.6B | Cross-encoder reranking (32K context) | ~700MB |
666
+ | Qwen/SmolLM | Query expansion + AI answers | ~600MB-1.2GB |
749
667
 
750
668
  ### Model Presets
751
669
 
@@ -896,7 +814,28 @@ Why this is the current recommendation:
896
814
  Trade-off:
897
815
 
898
816
  - Qwen is slower to embed than `bge-m3`
899
- - use it where code retrieval quality matters, not necessarily as the global default for every collection
817
+ - existing users upgrading to the new default may need to run `gno embed` again so vector and hybrid retrieval catch up
818
+
819
+ ### General Multilingual Embedding Benchmark
820
+
821
+ GNO also now has a separate public-docs benchmark lane for normal markdown/prose
822
+ collections:
823
+
824
+ ```bash
825
+ bun run bench:general-embeddings --candidate bge-m3-incumbent --write
826
+ bun run bench:general-embeddings --candidate qwen3-embedding-0.6b --write
827
+ ```
828
+
829
+ Current signal on the public multilingual FastAPI-docs fixture:
830
+
831
+ - `bge-m3`: vector nDCG@10 `0.350`, hybrid nDCG@10 `0.642`
832
+ - `Qwen3-Embedding-0.6B-GGUF`: vector nDCG@10 `0.859`, hybrid nDCG@10 `0.947`
833
+
834
+ Interpretation:
835
+
836
+ - Qwen is now the strongest general multilingual embedding model we have tested
837
+ - built-in presets now use Qwen by default
838
+ - existing users may need to run `gno embed` again after upgrading so current collections catch up
900
839
 
901
840
  ---
902
841
 
@@ -177,6 +177,35 @@ gno embed # Embed only (if already synced)
177
177
 
178
178
  MCP `gno.sync` and `gno.capture` do NOT auto-embed. Use CLI for embedding.
179
179
 
180
+ ## Collection-specific embedding models
181
+
182
+ Collections can override the global embedding model with `models.embed`.
183
+
184
+ CLI path:
185
+
186
+ ```bash
187
+ gno collection add ~/work/gno/src \
188
+ --name gno-code \
189
+ --embed-model "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"
190
+ ```
191
+
192
+ Good default guidance:
193
+
194
+ - keep the global preset for mixed notes/docs collections
195
+ - use a collection-specific embed override for code-heavy collections when benchmark guidance says so
196
+ - after changing an embed model on an existing populated collection, run:
197
+
198
+ ```bash
199
+ gno embed --collection gno-code
200
+ ```
201
+
202
+ If you want to remove old vectors after switching:
203
+
204
+ ```bash
205
+ gno collection clear-embeddings gno-code # stale models only
206
+ gno collection clear-embeddings gno-code --all # remove everything, then re-embed
207
+ ```
208
+
180
209
  ## Reference Documentation
181
210
 
182
211
  | Topic | File |
@@ -42,7 +42,11 @@ gno init [<path>] [options]
42
42
  gno collection add <path> --name <name> [options]
43
43
  ```
44
44
 
45
- Options same as `init`.
45
+ Options same as `init`, plus:
46
+
47
+ | Option | Description |
48
+ | --------------------- | ---------------------------------------------------- |
49
+ | `--embed-model <uri>` | Initial collection-specific embedding model override |
46
50
 
47
51
  ### gno collection list
48
52
 
@@ -62,6 +66,12 @@ gno collection remove <name>
62
66
  gno collection rename <old> <new>
63
67
  ```
64
68
 
69
+ ### gno collection clear-embeddings
70
+
71
+ ```bash
72
+ gno collection clear-embeddings <name> [--all] [--json]
73
+ ```
74
+
65
75
  ## Indexing
66
76
 
67
77
  ### gno update
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "0.37.0",
3
+ "version": "0.39.0",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "embeddings",
@@ -71,6 +71,8 @@
71
71
  "eval:hybrid:delta": "bun scripts/hybrid-benchmark.ts --delta",
72
72
  "bench:code-embeddings": "bun scripts/code-embedding-benchmark.ts",
73
73
  "bench:code-embeddings:write": "bun scripts/code-embedding-benchmark.ts --write",
74
+ "bench:general-embeddings": "bun scripts/general-embedding-benchmark.ts",
75
+ "bench:general-embeddings:write": "bun scripts/general-embedding-benchmark.ts --write",
74
76
  "eval:retrieval-candidates": "bun scripts/retrieval-candidate-benchmark.ts",
75
77
  "eval:retrieval-candidates:write": "bun scripts/retrieval-candidate-benchmark.ts --write",
76
78
  "eval:watch": "bun --bun evalite watch",
@@ -12,6 +12,7 @@ import {
12
12
  import { CliError } from "../../errors";
13
13
 
14
14
  interface AddOptions {
15
+ embedModel?: string;
15
16
  name?: string;
16
17
  pattern?: string;
17
18
  include?: string;
@@ -51,6 +52,11 @@ export async function collectionAdd(
51
52
  pattern: options.pattern,
52
53
  include: options.include,
53
54
  exclude: options.exclude,
55
+ models: options.embedModel
56
+ ? {
57
+ embed: options.embedModel,
58
+ }
59
+ : undefined,
54
60
  updateCmd: options.update,
55
61
  });
56
62
 
@@ -0,0 +1,83 @@
1
+ /**
2
+ * gno collection clear-embeddings - Remove stale or all embeddings for a collection.
3
+ */
4
+
5
+ import { getIndexDbPath } from "../../../app/constants";
6
+ import { loadConfig } from "../../../config";
7
+ import { resolveModelUri } from "../../../llm/registry";
8
+ import { SqliteAdapter } from "../../../store/sqlite/adapter";
9
+ import { CliError } from "../../errors";
10
+
11
+ interface ClearEmbeddingsOptions {
12
+ all?: boolean;
13
+ json?: boolean;
14
+ }
15
+
16
+ export async function collectionClearEmbeddings(
17
+ name: string,
18
+ options: ClearEmbeddingsOptions = {}
19
+ ): Promise<void> {
20
+ const configResult = await loadConfig();
21
+ if (!configResult.ok) {
22
+ throw new CliError(
23
+ "RUNTIME",
24
+ `Failed to load config: ${configResult.error.message}`
25
+ );
26
+ }
27
+
28
+ const config = configResult.value;
29
+ const collection = config.collections.find(
30
+ (item) => item.name === name.toLowerCase()
31
+ );
32
+ if (!collection) {
33
+ throw new CliError("VALIDATION", `Collection not found: ${name}`);
34
+ }
35
+
36
+ const store = new SqliteAdapter();
37
+ const openResult = await store.open(getIndexDbPath(), config.ftsTokenizer);
38
+ if (!openResult.ok) {
39
+ throw new CliError("RUNTIME", openResult.error.message);
40
+ }
41
+
42
+ try {
43
+ const mode = options.all ? "all" : "stale";
44
+ const activeModel = resolveModelUri(
45
+ config,
46
+ "embed",
47
+ undefined,
48
+ collection.name
49
+ );
50
+ const result = await store.clearEmbeddingsForCollection(collection.name, {
51
+ mode,
52
+ activeModel,
53
+ });
54
+
55
+ if (!result.ok) {
56
+ throw new CliError("RUNTIME", result.error.message);
57
+ }
58
+
59
+ if (options.json) {
60
+ process.stdout.write(`${JSON.stringify(result.value, null, 2)}\n`);
61
+ return;
62
+ }
63
+
64
+ const lines = [
65
+ `Cleared ${result.value.deletedVectors} embedding(s) for ${result.value.collection}.`,
66
+ `Mode: ${result.value.mode}`,
67
+ ];
68
+ if (result.value.deletedModels.length > 0) {
69
+ lines.push(`Models: ${result.value.deletedModels.join(", ")}`);
70
+ }
71
+ if (result.value.protectedSharedVectors > 0) {
72
+ lines.push(
73
+ `Retained ${result.value.protectedSharedVectors} shared vector(s) still referenced by other active collections.`
74
+ );
75
+ }
76
+ if (mode === "all") {
77
+ lines.push(`Run: gno embed --collection ${result.value.collection}`);
78
+ }
79
+ process.stdout.write(`${lines.join("\n")}\n`);
80
+ } finally {
81
+ await store.close();
82
+ }
83
+ }
@@ -5,6 +5,7 @@
5
5
  */
6
6
 
7
7
  export { collectionAdd } from "./add";
8
+ export { collectionClearEmbeddings } from "./clear-embeddings";
8
9
  export { collectionList } from "./list";
9
10
  export { collectionRemove } from "./remove";
10
11
  export { collectionRename } from "./rename";
@@ -7,7 +7,7 @@
7
7
 
8
8
  import { createDefaultConfig, loadConfig } from "../../../config";
9
9
  import { saveConfig } from "../../../config/saver";
10
- import { getPreset, listPresets } from "../../../llm/registry";
10
+ import { getPreset, listPresets, resolveModelUri } from "../../../llm/registry";
11
11
 
12
12
  // ─────────────────────────────────────────────────────────────────────────────
13
13
  // Types
@@ -19,7 +19,12 @@ export interface ModelsUseOptions {
19
19
  }
20
20
 
21
21
  export type ModelsUseResult =
22
- | { success: true; preset: string; name: string }
22
+ | {
23
+ success: true;
24
+ preset: string;
25
+ name: string;
26
+ embedModelChanged: boolean;
27
+ }
23
28
  | { success: false; error: string };
24
29
 
25
30
  // ─────────────────────────────────────────────────────────────────────────────
@@ -36,6 +41,7 @@ export async function modelsUse(
36
41
  // Load existing config or create default
37
42
  const configResult = await loadConfig(options.configPath);
38
43
  const config = configResult.ok ? configResult.value : createDefaultConfig();
44
+ const previousEmbedModel = resolveModelUri(config, "embed");
39
45
 
40
46
  // Check if preset exists
41
47
  const preset = getPreset(config, presetId);
@@ -72,7 +78,12 @@ export async function modelsUse(
72
78
  };
73
79
  }
74
80
 
75
- return { success: true, preset: presetId, name: preset.name };
81
+ return {
82
+ success: true,
83
+ preset: presetId,
84
+ name: preset.name,
85
+ embedModelChanged: previousEmbedModel !== preset.embed,
86
+ };
76
87
  }
77
88
 
78
89
  /**
@@ -82,5 +93,9 @@ export function formatModelsUse(result: ModelsUseResult): string {
82
93
  if (!result.success) {
83
94
  return `Error: ${result.error}`;
84
95
  }
85
- return `Switched to preset: ${result.preset} (${result.name})`;
96
+ const lines = [`Switched to preset: ${result.preset} (${result.name})`];
97
+ if (result.embedModelChanged) {
98
+ lines.push("Embedding model changed. Run: gno embed");
99
+ }
100
+ return lines.join("\n");
86
101
  }
@@ -9,6 +9,7 @@ import type { IndexStatus } from "../../store/types";
9
9
 
10
10
  import { getIndexDbPath } from "../../app/constants";
11
11
  import { getConfigPaths, isInitialized, loadConfig } from "../../config";
12
+ import { resolveModelUri } from "../../llm/registry";
12
13
  import { SqliteAdapter } from "../../store/sqlite/adapter";
13
14
 
14
15
  /**
@@ -148,7 +149,9 @@ export async function status(
148
149
  }
149
150
 
150
151
  try {
151
- const statusResult = await store.getStatus();
152
+ const statusResult = await store.getStatus({
153
+ embedModel: resolveModelUri(config, "embed"),
154
+ });
152
155
  if (!statusResult.ok) {
153
156
  return { success: false, error: statusResult.error.message };
154
157
  }
@@ -1264,10 +1264,12 @@ function wireManagementCommands(program: Command): void {
1264
1264
  .option("--pattern <glob>", "file matching pattern")
1265
1265
  .option("--include <exts>", "extension allowlist (CSV)")
1266
1266
  .option("--exclude <patterns>", "exclude patterns (CSV)")
1267
+ .option("--embed-model <uri>", "collection-specific embedding model URI")
1267
1268
  .option("--update <cmd>", "shell command to run before indexing")
1268
1269
  .action(async (path: string, cmdOpts: Record<string, unknown>) => {
1269
1270
  const { collectionAdd } = await import("./commands/collection");
1270
1271
  await collectionAdd(path, {
1272
+ embedModel: cmdOpts.embedModel as string | undefined,
1271
1273
  name: cmdOpts.name as string,
1272
1274
  pattern: cmdOpts.pattern as string | undefined,
1273
1275
  include: cmdOpts.include as string | undefined,
@@ -1308,6 +1310,20 @@ function wireManagementCommands(program: Command): void {
1308
1310
  await collectionRename(oldName, newName);
1309
1311
  });
1310
1312
 
1313
+ collectionCmd
1314
+ .command("clear-embeddings <name>")
1315
+ .description("Clear stale or all embeddings for a collection")
1316
+ .option("--all", "remove all embeddings for the collection")
1317
+ .option("--json", "JSON output")
1318
+ .action(async (name: string, cmdOpts: Record<string, unknown>) => {
1319
+ const { collectionClearEmbeddings } =
1320
+ await import("./commands/collection");
1321
+ await collectionClearEmbeddings(name, {
1322
+ all: Boolean(cmdOpts.all),
1323
+ json: Boolean(cmdOpts.json),
1324
+ });
1325
+ });
1326
+
1311
1327
  // context subcommands
1312
1328
  const contextCmd = program
1313
1329
  .command("context")
@@ -97,6 +97,7 @@ export async function addCollection(
97
97
  pattern: input.pattern ?? DEFAULT_PATTERN,
98
98
  include: includeList,
99
99
  exclude: excludeList,
100
+ models: input.models,
100
101
  updateCmd: input.updateCmd,
101
102
  };
102
103
 
@@ -7,6 +7,7 @@
7
7
 
8
8
  export { addCollection } from "./add";
9
9
  export { removeCollection } from "./remove";
10
+ export { updateCollection } from "./update";
10
11
  export type {
11
12
  AddCollectionInput,
12
13
  CollectionError,
@@ -14,4 +15,5 @@ export type {
14
15
  CollectionSuccess,
15
16
  RemoveCollectionInput,
16
17
  RenameCollectionInput,
18
+ UpdateCollectionInput,
17
19
  } from "./types";
@@ -5,6 +5,7 @@
5
5
  */
6
6
 
7
7
  import type { Collection, Config } from "../config/types";
8
+ import type { ModelType } from "../llm/types";
8
9
 
9
10
  /**
10
11
  * Input for adding a collection.
@@ -22,6 +23,8 @@ export interface AddCollectionInput {
22
23
  exclude?: string[] | string;
23
24
  /** Update command to run before sync */
24
25
  updateCmd?: string;
26
+ /** Optional initial model overrides */
27
+ models?: Partial<Record<ModelType, string>>;
25
28
  }
26
29
 
27
30
  /**
@@ -42,6 +45,16 @@ export interface RenameCollectionInput {
42
45
  newName: string;
43
46
  }
44
47
 
48
+ /**
49
+ * Input for updating a collection.
50
+ */
51
+ export interface UpdateCollectionInput {
52
+ /** Collection name (case-insensitive) */
53
+ name: string;
54
+ /** Partial model override patch; null clears one role */
55
+ models?: Partial<Record<ModelType, string | null>>;
56
+ }
57
+
45
58
  /**
46
59
  * Successful collection operation result.
47
60
  */
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Update collection core logic.
3
+ * Pure function that mutates config - caller handles I/O.
4
+ *
5
+ * @module src/collection/update
6
+ */
7
+
8
+ import type {
9
+ Collection,
10
+ CollectionModelOverrides,
11
+ Config,
12
+ } from "../config/types";
13
+ import type { CollectionResult, UpdateCollectionInput } from "./types";
14
+
15
+ import { CollectionSchema } from "../config";
16
+
17
+ function normalizeOverrides(
18
+ models?: UpdateCollectionInput["models"]
19
+ ): CollectionModelOverrides | undefined {
20
+ if (!models) {
21
+ return undefined;
22
+ }
23
+
24
+ const entries = Object.entries(models).filter(
25
+ ([, value]) => value !== undefined && value !== null
26
+ );
27
+ if (entries.length === 0) {
28
+ return undefined;
29
+ }
30
+
31
+ return Object.fromEntries(entries) as CollectionModelOverrides;
32
+ }
33
+
34
+ /**
35
+ * Update a collection in config.
36
+ */
37
+ export function updateCollection(
38
+ config: Config,
39
+ input: UpdateCollectionInput
40
+ ): CollectionResult {
41
+ const collectionName = input.name.toLowerCase();
42
+ const index = config.collections.findIndex((c) => c.name === collectionName);
43
+ if (index < 0) {
44
+ return {
45
+ ok: false,
46
+ code: "NOT_FOUND",
47
+ message: `Collection "${collectionName}" not found`,
48
+ };
49
+ }
50
+
51
+ const current = config.collections[index];
52
+ if (!current) {
53
+ return {
54
+ ok: false,
55
+ code: "NOT_FOUND",
56
+ message: `Collection "${collectionName}" not found`,
57
+ };
58
+ }
59
+
60
+ let nextModels = current.models;
61
+ if (input.models) {
62
+ nextModels = normalizeOverrides({
63
+ ...current.models,
64
+ ...input.models,
65
+ });
66
+ }
67
+
68
+ const nextCollection: Collection = {
69
+ ...current,
70
+ models: nextModels,
71
+ };
72
+
73
+ const validation = CollectionSchema.safeParse(nextCollection);
74
+ if (!validation.success) {
75
+ return {
76
+ ok: false,
77
+ code: "VALIDATION",
78
+ message: `Invalid collection: ${validation.error.issues[0]?.message ?? "unknown error"}`,
79
+ };
80
+ }
81
+
82
+ const nextCollections = [...config.collections];
83
+ nextCollections[index] = validation.data;
84
+
85
+ return {
86
+ ok: true,
87
+ config: {
88
+ ...config,
89
+ collections: nextCollections,
90
+ },
91
+ collection: validation.data,
92
+ };
93
+ }
@@ -190,7 +190,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
190
190
  {
191
191
  id: "slim-tuned",
192
192
  name: "GNO Slim Tuned (Default, ~1GB)",
193
- embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
193
+ embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf",
194
194
  rerank:
195
195
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
196
196
  expand:
@@ -200,7 +200,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
200
200
  {
201
201
  id: "slim",
202
202
  name: "Slim (~1GB)",
203
- embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
203
+ embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf",
204
204
  rerank:
205
205
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
206
206
  expand: "hf:unsloth/Qwen3-1.7B-GGUF/Qwen3-1.7B-Q4_K_M.gguf",
@@ -209,7 +209,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
209
209
  {
210
210
  id: "balanced",
211
211
  name: "Balanced (~2GB)",
212
- embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
212
+ embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf",
213
213
  rerank:
214
214
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
215
215
  expand:
@@ -219,7 +219,7 @@ export const DEFAULT_MODEL_PRESETS: ModelPreset[] = [
219
219
  {
220
220
  id: "quality",
221
221
  name: "Quality (Best Answers, ~2.5GB)",
222
- embed: "hf:gpustack/bge-m3-GGUF/bge-m3-Q4_K_M.gguf",
222
+ embed: "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf",
223
223
  rerank:
224
224
  "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf",
225
225
  expand: