@gmickel/gno 0.39.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/assets/skill/SKILL.md +7 -0
- package/assets/skill/cli-reference.md +6 -0
- package/package.json +1 -1
- package/src/cli/program.ts +28 -19
- package/src/embed/backlog.ts +3 -1
- package/src/mcp/tools/clear-collection-embeddings.ts +96 -0
- package/src/mcp/tools/embed.ts +23 -6
- package/src/mcp/tools/index-cmd.ts +9 -5
- package/src/mcp/tools/index.ts +26 -2
- package/src/sdk/embed.ts +2 -1
package/README.md
CHANGED
|
@@ -87,7 +87,7 @@ gno daemon
|
|
|
87
87
|
|
|
88
88
|
## What's New
|
|
89
89
|
|
|
90
|
-
> Latest release: [v0.
|
|
90
|
+
> Latest release: [v0.39.1](./CHANGELOG.md#0391---2026-04-06)
|
|
91
91
|
> Full release history: [CHANGELOG.md](./CHANGELOG.md)
|
|
92
92
|
|
|
93
93
|
- **Retrieval Quality Upgrade**: stronger BM25 lexical handling, code-aware chunking, terminal result hyperlinks, and per-collection model overrides
|
|
@@ -95,6 +95,19 @@ gno daemon
|
|
|
95
95
|
- **Default Embed Model**: built-in presets now use `Qwen3-Embedding-0.6B-GGUF` after it beat `bge-m3` on both code and multilingual prose benchmark lanes
|
|
96
96
|
- **Regression Fixes**: tightened phrase/negation/hyphen/underscore BM25 behavior, cleaned non-TTY hyperlink output, improved `gno doctor` chunking visibility, and fixed the embedding autoresearch harness
|
|
97
97
|
|
|
98
|
+
### Upgrading Existing Collections
|
|
99
|
+
|
|
100
|
+
If you already had collections indexed before the default embed-model switch to
|
|
101
|
+
`Qwen3-Embedding-0.6B-GGUF`, run:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
gno models pull --embed
|
|
105
|
+
gno embed
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
That regenerates embeddings for the new default model. Old vectors are kept
|
|
109
|
+
until you explicitly clear stale embeddings.
|
|
110
|
+
|
|
98
111
|
### Fine-Tuned Model Quick Use
|
|
99
112
|
|
|
100
113
|
```yaml
|
package/assets/skill/SKILL.md
CHANGED
|
@@ -173,6 +173,9 @@ If you edit/create files that should be searchable via vector search:
|
|
|
173
173
|
gno index # Full re-index (sync + embed)
|
|
174
174
|
# or
|
|
175
175
|
gno embed # Embed only (if already synced)
|
|
176
|
+
gno embed travel # Embed one collection only
|
|
177
|
+
# or
|
|
178
|
+
gno embed --collection travel
|
|
176
179
|
```
|
|
177
180
|
|
|
178
181
|
MCP `gno.sync` and `gno.capture` do NOT auto-embed. Use CLI for embedding.
|
|
@@ -206,6 +209,10 @@ gno collection clear-embeddings gno-code # stale models only
|
|
|
206
209
|
gno collection clear-embeddings gno-code --all # remove everything, then re-embed
|
|
207
210
|
```
|
|
208
211
|
|
|
212
|
+
MCP-equivalent write tool:
|
|
213
|
+
|
|
214
|
+
- `gno_clear_collection_embeddings`
|
|
215
|
+
|
|
209
216
|
## Reference Documentation
|
|
210
217
|
|
|
211
218
|
| Topic | File |
|
|
@@ -72,6 +72,12 @@ gno collection rename <old> <new>
|
|
|
72
72
|
gno collection clear-embeddings <name> [--all] [--json]
|
|
73
73
|
```
|
|
74
74
|
|
|
75
|
+
### gno embed
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
gno embed [collection] [--collection <name>] [--force] [--model <uri>] [--batch-size <n>] [--dry-run]
|
|
79
|
+
```
|
|
80
|
+
|
|
75
81
|
## Indexing
|
|
76
82
|
|
|
77
83
|
### gno update
|
package/package.json
CHANGED
package/src/cli/program.ts
CHANGED
|
@@ -1495,34 +1495,43 @@ function wireManagementCommands(program: Command): void {
|
|
|
1495
1495
|
|
|
1496
1496
|
// embed - Generate embeddings
|
|
1497
1497
|
program
|
|
1498
|
-
.command("embed")
|
|
1498
|
+
.command("embed [collection]")
|
|
1499
1499
|
.description("Generate embeddings for indexed documents")
|
|
1500
|
+
.option("--collection <name>", "restrict to one collection")
|
|
1500
1501
|
.option("--model <uri>", "embedding model URI")
|
|
1501
1502
|
.option("--batch-size <num>", "batch size", "32")
|
|
1502
1503
|
.option("--force", "regenerate all embeddings")
|
|
1503
1504
|
.option("--dry-run", "show what would be done")
|
|
1504
1505
|
.option("--json", "JSON output")
|
|
1505
|
-
.action(
|
|
1506
|
-
|
|
1507
|
-
|
|
1506
|
+
.action(
|
|
1507
|
+
async (
|
|
1508
|
+
collectionArg: string | undefined,
|
|
1509
|
+
cmdOpts: Record<string, unknown>
|
|
1510
|
+
) => {
|
|
1511
|
+
const globals = getGlobals();
|
|
1512
|
+
const format = getFormat(cmdOpts);
|
|
1508
1513
|
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1514
|
+
const { embed, formatEmbed } = await import("./commands/embed");
|
|
1515
|
+
const collection =
|
|
1516
|
+
collectionArg ?? (cmdOpts.collection as string | undefined);
|
|
1517
|
+
const opts = {
|
|
1518
|
+
collection,
|
|
1519
|
+
model: cmdOpts.model as string | undefined,
|
|
1520
|
+
batchSize: parsePositiveInt("batch-size", cmdOpts.batchSize),
|
|
1521
|
+
force: Boolean(cmdOpts.force),
|
|
1522
|
+
dryRun: Boolean(cmdOpts.dryRun),
|
|
1523
|
+
yes: globals.yes,
|
|
1524
|
+
json: format === "json",
|
|
1525
|
+
verbose: globals.verbose,
|
|
1526
|
+
};
|
|
1527
|
+
const result = await embed(opts);
|
|
1520
1528
|
|
|
1521
|
-
|
|
1522
|
-
|
|
1529
|
+
if (!result.success) {
|
|
1530
|
+
throw new CliError("RUNTIME", result.error ?? "Embed failed");
|
|
1531
|
+
}
|
|
1532
|
+
process.stdout.write(`${formatEmbed(result, opts)}\n`);
|
|
1523
1533
|
}
|
|
1524
|
-
|
|
1525
|
-
});
|
|
1534
|
+
);
|
|
1526
1535
|
|
|
1527
1536
|
// cleanup - Clean stale data
|
|
1528
1537
|
program
|
package/src/embed/backlog.ts
CHANGED
|
@@ -25,6 +25,7 @@ export interface EmbedBacklogDeps {
|
|
|
25
25
|
statsPort: VectorStatsPort;
|
|
26
26
|
embedPort: EmbeddingPort;
|
|
27
27
|
vectorIndex: VectorIndexPort;
|
|
28
|
+
collection?: string;
|
|
28
29
|
modelUri: string;
|
|
29
30
|
batchSize?: number;
|
|
30
31
|
}
|
|
@@ -52,7 +53,7 @@ interface Cursor {
|
|
|
52
53
|
export async function embedBacklog(
|
|
53
54
|
deps: EmbedBacklogDeps
|
|
54
55
|
): Promise<StoreResult<EmbedBacklogResult>> {
|
|
55
|
-
const { statsPort, embedPort, vectorIndex, modelUri } = deps;
|
|
56
|
+
const { statsPort, embedPort, vectorIndex, modelUri, collection } = deps;
|
|
56
57
|
const batchSize = deps.batchSize ?? 32;
|
|
57
58
|
|
|
58
59
|
let embedded = 0;
|
|
@@ -65,6 +66,7 @@ export async function embedBacklog(
|
|
|
65
66
|
const batchResult = await statsPort.getBacklog(modelUri, {
|
|
66
67
|
limit: batchSize,
|
|
67
68
|
after: cursor,
|
|
69
|
+
collection,
|
|
68
70
|
});
|
|
69
71
|
|
|
70
72
|
if (!batchResult.ok) {
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP gno_clear_collection_embeddings tool.
|
|
3
|
+
*
|
|
4
|
+
* @module src/mcp/tools/clear-collection-embeddings
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { ToolContext } from "../server";
|
|
8
|
+
|
|
9
|
+
import { MCP_ERRORS } from "../../core/errors";
|
|
10
|
+
import { withWriteLock } from "../../core/file-lock";
|
|
11
|
+
import { resolveModelUri } from "../../llm/registry";
|
|
12
|
+
import { runTool, type ToolResult } from "./index";
|
|
13
|
+
|
|
14
|
+
interface ClearCollectionEmbeddingsInput {
|
|
15
|
+
collection: string;
|
|
16
|
+
mode?: "stale" | "all";
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface ClearCollectionEmbeddingsResult {
|
|
20
|
+
collection: string;
|
|
21
|
+
deletedVectors: number;
|
|
22
|
+
deletedModels: string[];
|
|
23
|
+
mode: "stale" | "all";
|
|
24
|
+
protectedSharedVectors: number;
|
|
25
|
+
note?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function formatResult(result: ClearCollectionEmbeddingsResult): string {
|
|
29
|
+
const lines = [
|
|
30
|
+
`Collection: ${result.collection}`,
|
|
31
|
+
`Mode: ${result.mode}`,
|
|
32
|
+
`Deleted vectors: ${result.deletedVectors}`,
|
|
33
|
+
];
|
|
34
|
+
if (result.deletedModels.length > 0) {
|
|
35
|
+
lines.push(`Models: ${result.deletedModels.join(", ")}`);
|
|
36
|
+
}
|
|
37
|
+
if (result.protectedSharedVectors > 0) {
|
|
38
|
+
lines.push(`Protected shared vectors: ${result.protectedSharedVectors}`);
|
|
39
|
+
}
|
|
40
|
+
if (result.note) {
|
|
41
|
+
lines.push(result.note);
|
|
42
|
+
}
|
|
43
|
+
return lines.join("\n");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function handleClearCollectionEmbeddings(
|
|
47
|
+
args: ClearCollectionEmbeddingsInput,
|
|
48
|
+
ctx: ToolContext
|
|
49
|
+
): Promise<ToolResult> {
|
|
50
|
+
return runTool(
|
|
51
|
+
ctx,
|
|
52
|
+
"gno_clear_collection_embeddings",
|
|
53
|
+
async () => {
|
|
54
|
+
if (!ctx.enableWrite) {
|
|
55
|
+
throw new Error("Write tools disabled. Start MCP with --enable-write.");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const collection = ctx.collections.find(
|
|
59
|
+
(item) => item.name === args.collection
|
|
60
|
+
);
|
|
61
|
+
if (!collection) {
|
|
62
|
+
throw new Error(
|
|
63
|
+
`${MCP_ERRORS.NOT_FOUND.code}: Collection not found: ${args.collection}`
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const mode = args.mode ?? "stale";
|
|
68
|
+
return withWriteLock(ctx.writeLockPath, async () => {
|
|
69
|
+
const result = await ctx.store.clearEmbeddingsForCollection(
|
|
70
|
+
collection.name,
|
|
71
|
+
{
|
|
72
|
+
mode,
|
|
73
|
+
activeModel: resolveModelUri(
|
|
74
|
+
ctx.config,
|
|
75
|
+
"embed",
|
|
76
|
+
undefined,
|
|
77
|
+
collection.name
|
|
78
|
+
),
|
|
79
|
+
}
|
|
80
|
+
);
|
|
81
|
+
if (!result.ok) {
|
|
82
|
+
throw new Error(`${result.error.code}: ${result.error.message}`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
...result.value,
|
|
87
|
+
note:
|
|
88
|
+
mode === "all"
|
|
89
|
+
? `Run gno_embed or gno_index for ${collection.name} to rebuild embeddings.`
|
|
90
|
+
: undefined,
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
},
|
|
94
|
+
formatResult
|
|
95
|
+
);
|
|
96
|
+
}
|
package/src/mcp/tools/embed.ts
CHANGED
|
@@ -11,14 +11,16 @@ import { acquireWriteLock, type WriteLockHandle } from "../../core/file-lock";
|
|
|
11
11
|
import { JobError } from "../../core/job-manager";
|
|
12
12
|
import { embedBacklog } from "../../embed";
|
|
13
13
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
14
|
-
import {
|
|
14
|
+
import { resolveModelUri } from "../../llm/registry";
|
|
15
15
|
import {
|
|
16
16
|
createVectorIndexPort,
|
|
17
17
|
createVectorStatsPort,
|
|
18
18
|
} from "../../store/vector";
|
|
19
19
|
import { runTool, type ToolResult } from "./index";
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
interface EmbedInput {
|
|
22
|
+
collection?: string;
|
|
23
|
+
}
|
|
22
24
|
|
|
23
25
|
interface EmbedResultOutput {
|
|
24
26
|
jobId: string;
|
|
@@ -57,9 +59,23 @@ export function handleEmbed(
|
|
|
57
59
|
);
|
|
58
60
|
}
|
|
59
61
|
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
|
|
62
|
+
const requestedCollection = args.collection?.trim();
|
|
63
|
+
const collection = requestedCollection
|
|
64
|
+
? ctx.collections.find((item) => item.name === requestedCollection)
|
|
65
|
+
: null;
|
|
66
|
+
|
|
67
|
+
if (requestedCollection && !collection) {
|
|
68
|
+
throw new Error(
|
|
69
|
+
`${MCP_ERRORS.NOT_FOUND.code}: Collection not found: ${requestedCollection}`
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const modelUri = resolveModelUri(
|
|
74
|
+
ctx.config,
|
|
75
|
+
"embed",
|
|
76
|
+
undefined,
|
|
77
|
+
collection?.name
|
|
78
|
+
);
|
|
63
79
|
|
|
64
80
|
const jobId = await ctx.jobManager.startTypedJobWithLock(
|
|
65
81
|
"embed",
|
|
@@ -74,7 +90,7 @@ export function handleEmbed(
|
|
|
74
90
|
if (!embedResult.ok) {
|
|
75
91
|
throw new Error(
|
|
76
92
|
`MODEL_NOT_FOUND: Embedding model not cached. ` +
|
|
77
|
-
`Model: ${modelUri}
|
|
93
|
+
`Model: ${modelUri}. ` +
|
|
78
94
|
`Run 'gno models pull embed' first.`
|
|
79
95
|
);
|
|
80
96
|
}
|
|
@@ -108,6 +124,7 @@ export function handleEmbed(
|
|
|
108
124
|
statsPort,
|
|
109
125
|
embedPort,
|
|
110
126
|
vectorIndex,
|
|
127
|
+
collection: collection?.name,
|
|
111
128
|
modelUri,
|
|
112
129
|
batchSize: 32,
|
|
113
130
|
});
|
|
@@ -14,7 +14,7 @@ import { normalizeCollectionName } from "../../core/validation";
|
|
|
14
14
|
import { embedBacklog } from "../../embed";
|
|
15
15
|
import { defaultSyncService } from "../../ingestion";
|
|
16
16
|
import { LlmAdapter } from "../../llm/nodeLlamaCpp/adapter";
|
|
17
|
-
import {
|
|
17
|
+
import { resolveModelUri } from "../../llm/registry";
|
|
18
18
|
import {
|
|
19
19
|
createVectorIndexPort,
|
|
20
20
|
createVectorStatsPort,
|
|
@@ -101,9 +101,12 @@ export function handleIndex(
|
|
|
101
101
|
runUpdateCmd: false,
|
|
102
102
|
};
|
|
103
103
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
const modelUri = resolveModelUri(
|
|
105
|
+
ctx.config,
|
|
106
|
+
"embed",
|
|
107
|
+
undefined,
|
|
108
|
+
collection?.name
|
|
109
|
+
);
|
|
107
110
|
|
|
108
111
|
const jobId = await ctx.jobManager.startTypedJobWithLock(
|
|
109
112
|
"index",
|
|
@@ -137,7 +140,7 @@ export function handleIndex(
|
|
|
137
140
|
if (!embedResult.ok) {
|
|
138
141
|
throw new Error(
|
|
139
142
|
`MODEL_NOT_FOUND: Embedding model not cached. ` +
|
|
140
|
-
`Model: ${modelUri}
|
|
143
|
+
`Model: ${modelUri}. ` +
|
|
141
144
|
`Run 'gno models pull embed' first.`
|
|
142
145
|
);
|
|
143
146
|
}
|
|
@@ -171,6 +174,7 @@ export function handleIndex(
|
|
|
171
174
|
statsPort,
|
|
172
175
|
embedPort,
|
|
173
176
|
vectorIndex,
|
|
177
|
+
collection: collection?.name,
|
|
174
178
|
modelUri,
|
|
175
179
|
batchSize: 32,
|
|
176
180
|
});
|
package/src/mcp/tools/index.ts
CHANGED
|
@@ -13,6 +13,7 @@ import type { ToolContext } from "../server";
|
|
|
13
13
|
import { normalizeTag } from "../../core/tags";
|
|
14
14
|
import { handleAddCollection } from "./add-collection";
|
|
15
15
|
import { handleCapture } from "./capture";
|
|
16
|
+
import { handleClearCollectionEmbeddings } from "./clear-collection-embeddings";
|
|
16
17
|
import { handleEmbed } from "./embed";
|
|
17
18
|
import { handleGet } from "./get";
|
|
18
19
|
import { handleIndex } from "./index-cmd";
|
|
@@ -213,7 +214,12 @@ const syncInputSchema = z.object({
|
|
|
213
214
|
.describe("Run the collection's configured update command before syncing"),
|
|
214
215
|
});
|
|
215
216
|
|
|
216
|
-
const embedInputSchema = z.object({
|
|
217
|
+
const embedInputSchema = z.object({
|
|
218
|
+
collection: z
|
|
219
|
+
.string()
|
|
220
|
+
.optional()
|
|
221
|
+
.describe("Collection name to embed. Omit to embed all collections"),
|
|
222
|
+
});
|
|
217
223
|
|
|
218
224
|
const indexInputSchema = z.object({
|
|
219
225
|
collection: z
|
|
@@ -230,6 +236,17 @@ const removeCollectionInputSchema = z.object({
|
|
|
230
236
|
.describe("Collection name to remove"),
|
|
231
237
|
});
|
|
232
238
|
|
|
239
|
+
const clearCollectionEmbeddingsInputSchema = z.object({
|
|
240
|
+
collection: z
|
|
241
|
+
.string()
|
|
242
|
+
.min(1, "Collection cannot be empty")
|
|
243
|
+
.describe("Collection name to clean"),
|
|
244
|
+
mode: z
|
|
245
|
+
.enum(["stale", "all"])
|
|
246
|
+
.default("stale")
|
|
247
|
+
.describe("Cleanup mode: stale models only, or all embeddings"),
|
|
248
|
+
});
|
|
249
|
+
|
|
233
250
|
const createFolderInputSchema = z.object({
|
|
234
251
|
collection: z.string().min(1, "Collection cannot be empty"),
|
|
235
252
|
name: z.string().min(1, "Folder name cannot be empty"),
|
|
@@ -799,7 +816,7 @@ export function registerTools(server: McpServer, ctx: ToolContext): void {
|
|
|
799
816
|
|
|
800
817
|
server.tool(
|
|
801
818
|
"gno_embed",
|
|
802
|
-
"Generate vector embeddings for all unembedded chunks. Async: returns a job ID. Poll with gno_job_status.",
|
|
819
|
+
"Generate vector embeddings for all unembedded chunks, optionally scoped to one collection. Async: returns a job ID. Poll with gno_job_status.",
|
|
803
820
|
embedInputSchema.shape,
|
|
804
821
|
(args) => handleEmbed(args, ctx)
|
|
805
822
|
);
|
|
@@ -818,6 +835,13 @@ export function registerTools(server: McpServer, ctx: ToolContext): void {
|
|
|
818
835
|
(args) => handleRemoveCollection(args, ctx)
|
|
819
836
|
);
|
|
820
837
|
|
|
838
|
+
server.tool(
|
|
839
|
+
"gno_clear_collection_embeddings",
|
|
840
|
+
"Remove stale or all embeddings for one collection.",
|
|
841
|
+
clearCollectionEmbeddingsInputSchema.shape,
|
|
842
|
+
(args) => handleClearCollectionEmbeddings(args, ctx)
|
|
843
|
+
);
|
|
844
|
+
|
|
821
845
|
server.tool(
|
|
822
846
|
"gno_create_folder",
|
|
823
847
|
"Create a folder inside an existing collection.",
|
package/src/sdk/embed.ts
CHANGED
|
@@ -202,7 +202,7 @@ export async function runEmbed(
|
|
|
202
202
|
|
|
203
203
|
const backlogResult = force
|
|
204
204
|
? await getActiveChunkCount(db)
|
|
205
|
-
: await stats.countBacklog(modelUri);
|
|
205
|
+
: await stats.countBacklog(modelUri, { collection: options.collection });
|
|
206
206
|
if (!backlogResult.ok) {
|
|
207
207
|
throw sdkError("STORE", backlogResult.error.message, {
|
|
208
208
|
cause: backlogResult.error.cause,
|
|
@@ -264,6 +264,7 @@ export async function runEmbed(
|
|
|
264
264
|
statsPort: stats,
|
|
265
265
|
embedPort,
|
|
266
266
|
vectorIndex,
|
|
267
|
+
collection: options.collection,
|
|
267
268
|
modelUri,
|
|
268
269
|
batchSize,
|
|
269
270
|
});
|