npm - @gmickel/gno - Versions diffs - 1.3.1 → 1.4.1 - Mend

@gmickel/gno 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +2 -0
package/assets/skill/SKILL.md +12 -0
package/package.json +68 -65
package/src/bench/fixture.ts +247 -0
package/src/bench/metrics.ts +137 -0
package/src/bench/types.ts +96 -0
package/src/cli/commands/bench.ts +280 -0
package/src/cli/commands/doctor.ts +4 -1
package/src/cli/options.ts +2 -0
package/src/cli/program.ts +52 -0
package/src/mcp/tools/index.ts +53 -21
package/src/serve/public/globals.built.css +2 -2
package/src/serve/server.ts +2 -2
package/src/types/wasm.d.ts +4 -0

package/src/cli/commands/bench.ts ADDED Viewed

@@ -0,0 +1,280 @@
+/**
+ * gno bench command implementation.
+ * Runs retrieval benchmarks from user fixtures.
+ *
+ * @module src/cli/commands/bench
+ */
+import type {
+  BenchCase,
+  BenchCaseResult,
+  BenchMode,
+  BenchModeResult,
+  BenchOptions,
+  BenchResult,
+} from "../../bench/types";
+import type { SearchResult } from "../../pipeline/types";
+import { loadBenchFixture, normalizeBenchRef } from "../../bench/fixture";
+import { averageMetrics, computeRetrievalMetrics } from "../../bench/metrics";
+import { DEFAULT_THOROUGH_CANDIDATE_LIMIT } from "../../core/depth-policy";
+import { query } from "./query";
+import { search } from "./search";
+import { vsearch } from "./vsearch";
+function round(value: number, places = 2): number {
+  return Number(value.toFixed(places));
+}
+function summarizeLatency(values: number[]): BenchModeResult["latency"] {
+  if (values.length === 0) {
+    return { p50Ms: 0, p95Ms: 0, meanMs: 0 };
+  }
+  const sorted = [...values].sort((a, b) => a - b);
+  const percentile = (p: number): number => {
+    const index = Math.ceil((p / 100) * sorted.length) - 1;
+    return sorted[Math.max(0, Math.min(sorted.length - 1, index))] ?? 0;
+  };
+  return {
+    p50Ms: round(percentile(50)),
+    p95Ms: round(percentile(95)),
+    meanMs: round(
+      values.reduce((sum, value) => sum + value, 0) / values.length
+    ),
+  };
+}
+function resultRefs(result: SearchResult): Set<string> {
+  return new Set(
+    [
+      result.docid,
+      result.uri,
+      normalizeBenchRef(result.uri),
+      result.source.relPath,
+      result.title,
+    ].filter((value): value is string => Boolean(value))
+  );
+}
+function findHits(
+  results: SearchResult[],
+  expected: string[],
+  k: number
+): string[] {
+  const hits: string[] = [];
+  const expectedSet = new Set(expected.map(normalizeBenchRef));
+  for (const result of results.slice(0, k)) {
+    const refs = resultRefs(result);
+    const hit = [...expectedSet].find((expectedRef) => refs.has(expectedRef));
+    if (hit && !hits.includes(hit)) {
+      hits.push(hit);
+    }
+  }
+  return hits;
+}
+function topDocs(results: SearchResult[]): string[] {
+  return results.map((result) => result.source.relPath);
+}
+function rankedMetricDocs(
+  results: SearchResult[],
+  expected: string[]
+): string[] {
+  const expectedSet = new Set(expected.map(normalizeBenchRef));
+  return results.map((result) => {
+    const refs = resultRefs(result);
+    return (
+      [...expectedSet].find((expectedRef) => refs.has(expectedRef)) ??
+      result.source.relPath
+    );
+  });
+}
+async function runModeCase(input: {
+  mode: BenchMode;
+  benchCase: BenchCase;
+  topK: number;
+  candidateLimit?: number;
+  options: BenchOptions;
+}): Promise<BenchCaseResult> {
+  const { mode, benchCase, topK, options } = input;
+  const limit = mode.limit ?? topK;
+  const candidateLimit =
+    mode.candidateLimit ??
+    input.candidateLimit ??
+    (mode.depth === "thorough" ? DEFAULT_THOROUGH_CANDIDATE_LIMIT : undefined);
+  const startedAt = performance.now();
+  const queryModes = benchCase.queryModes ?? mode.queryModes;
+  let result:
+    | Awaited<ReturnType<typeof search>>
+    | Awaited<ReturnType<typeof vsearch>>
+    | Awaited<ReturnType<typeof query>>;
+  if (mode.type === "bm25") {
+    result = await search(benchCase.query, {
+      configPath: options.configPath,
+      indexName: options.indexName,
+      collection: benchCase.collection,
+      limit,
+      json: true,
+    });
+  } else if (mode.type === "vector") {
+    result = await vsearch(benchCase.query, {
+      configPath: options.configPath,
+      indexName: options.indexName,
+      collection: benchCase.collection,
+      limit,
+      json: true,
+    });
+  } else {
+    result = await query(benchCase.query, {
+      configPath: options.configPath,
+      indexName: options.indexName,
+      collection: benchCase.collection,
+      limit,
+      candidateLimit,
+      noExpand: mode.noExpand,
+      noRerank: mode.noRerank,
+      queryModes,
+      json: true,
+    });
+  }
+  const latencyMs = round(performance.now() - startedAt);
+  if (!result.success) {
+    return {
+      id: benchCase.id,
+      query: benchCase.query,
+      topK,
+      expected: benchCase.expected,
+      hits: [],
+      topDocs: [],
+      metrics: computeRetrievalMetrics({
+        output: [],
+        expected: benchCase.expected,
+        judgments: benchCase.judgments,
+        k: topK,
+      }),
+      latencyMs,
+      error: result.error,
+    };
+  }
+  const docs = topDocs(result.data.results);
+  const metricDocs = rankedMetricDocs(result.data.results, benchCase.expected);
+  const hits = findHits(result.data.results, benchCase.expected, topK);
+  return {
+    id: benchCase.id,
+    query: benchCase.query,
+    topK,
+    expected: benchCase.expected,
+    hits,
+    topDocs: docs.slice(0, topK),
+    metrics: computeRetrievalMetrics({
+      output: metricDocs,
+      expected: benchCase.expected,
+      judgments: benchCase.judgments,
+      k: topK,
+    }),
+    latencyMs,
+  };
+}
+/**
+ * Execute gno bench command.
+ */
+export async function bench(
+  fixturePath: string,
+  options: BenchOptions = {}
+): Promise<BenchResult> {
+  const loaded = await loadBenchFixture(fixturePath, options);
+  if (!loaded.ok) {
+    return { success: false, error: loaded.error, isValidation: true };
+  }
+  const { fixture } = loaded;
+  const modeResults: BenchModeResult[] = [];
+  for (const mode of fixture.modes) {
+    const cases: BenchCaseResult[] = [];
+    for (const benchCase of fixture.queries) {
+      const topK = benchCase.topK ?? fixture.topK;
+      cases.push(
+        await runModeCase({
+          mode,
+          benchCase,
+          topK,
+          candidateLimit: fixture.candidateLimit,
+          options,
+        })
+      );
+    }
+    const failures = cases.filter((entry) => entry.error).length;
+    modeResults.push({
+      name: mode.name,
+      type: mode.type,
+      status: failures === cases.length ? "failed" : "ok",
+      queryCount: cases.length,
+      failures,
+      metrics: averageMetrics(cases.map((entry) => entry.metrics)),
+      latency: summarizeLatency(cases.map((entry) => entry.latencyMs)),
+      cases,
+    });
+  }
+  return {
+    success: true,
+    data: {
+      fixture: {
+        path: fixturePath,
+        name: fixture.metadata?.name,
+        version: fixture.version,
+        queryCount: fixture.queries.length,
+        topK: fixture.topK,
+      },
+      generatedAt: new Date().toISOString(),
+      modes: modeResults,
+      meta: {
+        indexName: options.indexName ?? "default",
+        collection: fixture.collection,
+      },
+    },
+  };
+}
+export function formatBench(
+  result: BenchResult,
+  options: { json?: boolean }
+): string {
+  if (!result.success) {
+    return options.json
+      ? JSON.stringify({
+          error: { code: "BENCH_FAILED", message: result.error },
+        })
+      : `Error: ${result.error}`;
+  }
+  if (options.json) {
+    return JSON.stringify(result.data, null, 2);
+  }
+  const lines = [
+    `Bench: ${result.data.fixture.name ?? result.data.fixture.path}`,
+    `Queries: ${result.data.fixture.queryCount}  Top K: ${result.data.fixture.topK}`,
+    "",
+    "| Mode | Status | Precision@K | Recall@K | F1@K | MRR | nDCG@K | p95 ms | Failures |",
+    "| ---- | ------ | ----------- | -------- | ---- | --- | ------ | ------ | -------- |",
+  ];
+  for (const mode of result.data.modes) {
+    lines.push(
+      `| ${mode.name} | ${mode.status} | ${mode.metrics.precisionAtK.toFixed(3)} | ${mode.metrics.recallAtK.toFixed(3)} | ${mode.metrics.f1AtK.toFixed(3)} | ${mode.metrics.mrr.toFixed(3)} | ${mode.metrics.ndcgAtK.toFixed(3)} | ${mode.latency.p95Ms.toFixed(2)} | ${mode.failures} |`
+    );
+  }
+  return lines.join("\n");
+}

package/src/cli/commands/doctor.ts CHANGED Viewed

@@ -268,8 +268,11 @@ async function checkSqliteExtensions(): Promise<DoctorCheck[]> {
   let vecMessage: string;
   if (sqliteVecAvailable) {
+    const formattedVersion = sqliteVecVersion.startsWith("v")
+      ? sqliteVecVersion
+      : `v${sqliteVecVersion}`;
     vecMessage = sqliteVecVersion
-      ? `sqlite-vec loaded (v${sqliteVecVersion})`
+      ? `sqlite-vec loaded (${formattedVersion})`
       : "sqlite-vec loaded";
   } else if (mode === "unavailable") {
     vecMessage =

package/src/cli/options.ts CHANGED Viewed

@@ -22,6 +22,7 @@ export const CMD = {
   search: "search",
   vsearch: "vsearch",
   query: "query",
+  bench: "bench",
   ask: "ask",
   get: "get",
   multiGet: "multi-get",
@@ -45,6 +46,7 @@ const FORMAT_SUPPORT: Record<CommandId, OutputFormat[]> = {
   [CMD.search]: ["terminal", "json", "files", "csv", "md", "xml"],
   [CMD.vsearch]: ["terminal", "json", "files", "csv", "md", "xml"],
   [CMD.query]: ["terminal", "json", "files", "csv", "md", "xml"],
+  [CMD.bench]: ["terminal", "json"],
   [CMD.ask]: ["terminal", "json", "md"],
   [CMD.get]: ["terminal", "json", "md"],
   [CMD.multiGet]: ["terminal", "json", "files", "md"],

package/src/cli/program.ts CHANGED Viewed

@@ -677,6 +677,58 @@ function wireSearchCommands(program: Command): void {
       await writeOutput(output, format);
     });
+  // bench - Retrieval benchmark fixture runner
+  program
+    .command("bench <fixture>")
+    .description("Run retrieval quality benchmarks from a fixture")
+    .option("-c, --collection <name>", "override fixture collection")
+    .option("-k, --top-k <num>", "override top-k metric cutoff")
+    .option(
+      "--mode <name>",
+      "benchmark mode (repeatable): bm25, vector, hybrid, fast, no-rerank, thorough",
+      (value: string, previous: string[] = []) => [...previous, value],
+      []
+    )
+    .option("-C, --candidate-limit <num>", "max candidates passed to reranking")
+    .option("--json", "JSON output")
+    .action(async (fixture: string, cmdOpts: Record<string, unknown>) => {
+      const format = getFormat(cmdOpts);
+      assertFormatSupported(CMD.bench, format);
+      const globals = getGlobals();
+      const topK = cmdOpts.topK
+        ? parsePositiveInt("top-k", cmdOpts.topK)
+        : undefined;
+      const candidateLimit = cmdOpts.candidateLimit
+        ? parsePositiveInt("candidate-limit", cmdOpts.candidateLimit)
+        : undefined;
+      const { bench, formatBench } = await import("./commands/bench");
+      const result = await bench(fixture, {
+        configPath: globals.config,
+        indexName: globals.index,
+        collection: cmdOpts.collection as string | undefined,
+        topK,
+        candidateLimit,
+        modes:
+          Array.isArray(cmdOpts.mode) && cmdOpts.mode.length > 0
+            ? (cmdOpts.mode as string[])
+            : undefined,
+        json: format === "json",
+      });
+      if (!result.success) {
+        throw new CliError(
+          result.isValidation ? "VALIDATION" : "RUNTIME",
+          result.error
+        );
+      }
+      await writeOutput(
+        formatBench(result, { json: format === "json" }),
+        format
+      );
+    });
   // ask - Human-friendly query with grounded answer
   program
     .command("ask <query>")

package/src/mcp/tools/index.ts CHANGED Viewed

@@ -53,6 +53,20 @@ export function normalizeTagFilters(tags?: string[]): string[] | undefined {
   return [...new Set(tags.map(normalizeTag))];
 }
+export const MCP_TOOL_DESCRIPTIONS = {
+  search:
+    "BM25 keyword search. Fast exact-term lookup for names, identifiers, error text, and known phrases. Results include uri/docid and line when available; use gno_get with fromLine/lineCount or gno_multi_get for full context. Use gno_query when wording is uncertain.",
+  vsearch:
+    "Vector semantic search. Finds conceptually similar docs with different wording. Best after embeddings are current; use intent to disambiguate short terms. Use gno_query for default hybrid retrieval.",
+  query:
+    "Hybrid search (BM25 + vector + optional expansion/reranking). Recommended default. Use intent for ambiguous terms, queryModes to combine term/intent/hyde strategies, fast=true for quick lookup, thorough=true when recall matters, and candidateLimit to trade latency for coverage.",
+  get: "Retrieve one document by gno:// URI, docid (#abc123), or collection/path. After search results include line, pass fromLine and lineCount to fetch only the relevant range before expanding to the full document.",
+  multiGet:
+    "Retrieve multiple documents by refs array or glob pattern. Use after gno_search/gno_query to batch top result URIs/docids; set maxBytes and lineNumbers to control context size.",
+  status:
+    "Get index health: collection count, document count, chunk count, embedding backlog, and per-collection stats. Check first when vector/hybrid results look stale or unavailable.",
+} as const;
 // ─────────────────────────────────────────────────────────────────────────────
 // Shared Input Schemas
 // ─────────────────────────────────────────────────────────────────────────────
@@ -61,7 +75,9 @@ const searchInputSchema = z.object({
   query: z
     .string()
     .min(1, "Query cannot be empty")
-    .describe("Search query text"),
+    .describe(
+      "Exact keyword, identifier, filename, error text, or phrase to match with BM25"
+    ),
   collection: z
     .string()
     .optional()
@@ -89,7 +105,7 @@ const searchInputSchema = z.object({
     .string()
     .optional()
     .describe(
-      "Disambiguating context for ambiguous queries (e.g. 'programming language' when query is 'python')"
+      "Disambiguating context for ambiguous queries; not searched directly (e.g. 'programming language' when query is 'python')"
     ),
   exclude: z
     .array(z.string())
@@ -274,7 +290,9 @@ const vsearchInputSchema = z.object({
   query: z
     .string()
     .min(1, "Query cannot be empty")
-    .describe("Search query text (matched by semantic meaning, not keywords)"),
+    .describe(
+      "Natural-language concept to match semantically; use gno_search for exact error text or identifiers"
+    ),
   collection: z
     .string()
     .optional()
@@ -299,7 +317,9 @@ const vsearchInputSchema = z.object({
   intent: z
     .string()
     .optional()
-    .describe("Disambiguating context for the query"),
+    .describe(
+      "Disambiguating context for ambiguous terms; steers snippet choice without becoming the searched text"
+    ),
   exclude: z
     .array(z.string())
     .optional()
@@ -325,20 +345,24 @@ const queryModeInputSchema = z.object({
   mode: z
     .enum(["term", "intent", "hyde"])
     .describe(
-      "Retrieval strategy: 'term' (keyword match), 'intent' (disambiguation), 'hyde' (hypothetical document for semantic matching)"
+      "Retrieval strategy: 'term' for exact lexical anchors, 'intent' for disambiguation, 'hyde' for one hypothetical answer/document to improve semantic matching"
     ),
   text: z
     .string()
     .trim()
     .min(1, "Query mode text cannot be empty")
-    .describe("Text for this query mode"),
+    .describe(
+      "Text for this query mode; keep term modes concise and hyde modes answer-shaped"
+    ),
 });
 export const queryInputSchema = z.object({
   query: z
     .string()
     .min(1, "Query cannot be empty")
-    .describe("Search query text"),
+    .describe(
+      "Primary user query; combine with intent or queryModes for ambiguous requests"
+    ),
   collection: z
     .string()
     .optional()
@@ -366,7 +390,7 @@ export const queryInputSchema = z.object({
     .string()
     .optional()
     .describe(
-      "Disambiguating context (e.g. 'programming language' when query is 'python')"
+      "Disambiguating context (e.g. 'programming language' when query is 'python'); steers expansion, rerank, and snippet choice"
     ),
   candidateLimit: z
     .number()
@@ -375,7 +399,7 @@ export const queryInputSchema = z.object({
     .max(100)
     .optional()
     .describe(
-      "Max candidates passed to reranking stage (higher = better recall, slower)"
+      "Max candidates passed to reranking stage; raise when top results miss relevant docs, lower for latency"
     ),
   exclude: z
     .array(z.string())
@@ -397,7 +421,7 @@ export const queryInputSchema = z.object({
   queryModes: z
     .array(queryModeInputSchema)
     .describe(
-      "Structured query modes to combine multiple retrieval strategies. Max one 'hyde' entry."
+      "Structured query modes for typed retrieval: combine term anchors, intent disambiguation, and at most one hyde hypothetical document"
     )
     .superRefine((entries, ctx) => {
       const hydeCount = entries.filter((entry) => entry.mode === "hyde").length;
@@ -417,7 +441,7 @@ export const queryInputSchema = z.object({
     .boolean()
     .default(false)
     .describe(
-      "Enable query expansion for best recall (~5-8s). Use when default returns no results"
+      "Enable query expansion for best recall (~5-8s). Use for broad research or when default results miss likely docs"
     ),
   expand: z
     .boolean()
@@ -443,13 +467,17 @@ const getInputSchema = z.object({
     .int()
     .min(1)
     .optional()
-    .describe("Start reading from this line number"),
+    .describe(
+      "Start reading from this line number; use the line returned by search/query results"
+    ),
   lineCount: z
     .number()
     .int()
     .min(1)
     .optional()
-    .describe("Number of lines to return (from fromLine)"),
+    .describe(
+      "Number of lines to return from fromLine; prefer a small range before fetching full docs"
+    ),
   lineNumbers: z
     .boolean()
     .default(true)
@@ -461,7 +489,9 @@ const multiGetInputSchema = z.object({
     .array(z.string())
     .min(1)
     .optional()
-    .describe("Array of document references (URIs or docids)"),
+    .describe(
+      "Array of document references from search/query results (gno:// URIs or docids)"
+    ),
   pattern: z
     .string()
     .optional()
@@ -471,7 +501,9 @@ const multiGetInputSchema = z.object({
     .int()
     .min(1)
     .default(10_240)
-    .describe("Max bytes per document (truncates longer docs)"),
+    .describe(
+      "Max bytes per document; lower this when batching many top search results"
+    ),
   lineNumbers: z
     .boolean()
     .default(true)
@@ -717,42 +749,42 @@ export function registerTools(server: McpServer, ctx: ToolContext): void {
   // Tool IDs use underscores (MCP pattern: ^[a-zA-Z0-9_-]{1,64}$)
   server.tool(
     "gno_search",
-    "BM25 keyword search. Instant, best for exact terms. Use gno_query for better quality.",
+    MCP_TOOL_DESCRIPTIONS.search,
     searchInputSchema.shape,
     (args) => handleSearch(args, ctx)
   );
   server.tool(
     "gno_vsearch",
-    "Vector semantic search. Finds conceptually similar docs even with different wording. Use gno_query for best results.",
+    MCP_TOOL_DESCRIPTIONS.vsearch,
     vsearchInputSchema.shape,
     (args) => handleVsearch(args, ctx)
   );
   server.tool(
     "gno_query",
-    "Hybrid search (BM25 + vector + reranking). Best quality, recommended default. Use fast=true for speed, thorough=true for best recall.",
+    MCP_TOOL_DESCRIPTIONS.query,
     queryInputSchema.shape,
     (args) => handleQuery(args, ctx)
   );
   server.tool(
     "gno_get",
-    "Retrieve a single document's full content by URI (gno://collection/path), docid (#abc123), or collection/path.",
+    MCP_TOOL_DESCRIPTIONS.get,
     getInputSchema.shape,
     (args) => handleGet(args, ctx)
   );
   server.tool(
     "gno_multi_get",
-    "Retrieve multiple documents by refs array or glob pattern. Use maxBytes to control truncation.",
+    MCP_TOOL_DESCRIPTIONS.multiGet,
     multiGetInputSchema.shape,
     (args) => handleMultiGet(args, ctx)
   );
   server.tool(
     "gno_status",
-    "Get index health: collection count, document count, chunk count, embedding backlog, and per-collection stats.",
+    MCP_TOOL_DESCRIPTIONS.status,
     statusInputSchema.shape,
     (args) => handleStatus(args, ctx)
   );