npm - sweet-search - Versions diffs - 2.3.0 → 2.4.2 - Mend

sweet-search 2.3.0 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/core/infrastructure/native-inference.js +13 -2
package/core/skills/sweet-index/SKILL.md +92 -0
package/package.json +11 -10
package/scripts/init.js +74 -15
package/scripts/uninstall.js +99 -2

package/core/infrastructure/native-inference.js CHANGED Viewed

@@ -30,8 +30,18 @@
  *                                                before addon loads so
  *                                                the Rust dtype policy
  *                                                picks BF16/F16/F32 by
- *                                                compute capability.
+ *                                                compute capability and
+ *                                                model family.
  *                                                See mod.rs::optimal_dtype
+ *   SWEET_SEARCH_NATIVE_DTYPE=f32|bf16|f16       Global dtype preference.
+ *                                                On CUDA, BF16 is used for
+ *                                                embeddings on Ampere+ but
+ *                                                LI remains F32 for quality.
+ *   SWEET_SEARCH_NATIVE_EMBED_DTYPE=f32|bf16|f16 Per-model diagnostic
+ *                                                override for embeddings.
+ *   SWEET_SEARCH_NATIVE_LI_DTYPE=f32|bf16|f16    Per-model diagnostic
+ *                                                override; BF16/F16 LI is
+ *                                                known to drift on CUDA.
  *   CANDLE_METAL_COMPUTE_PER_BUFFER=<N>        — candle default 50 (tuned)
  *   CANDLE_METAL_COMMAND_POOL_SIZE=<N>         — candle default 5 (tuned)
  */
@@ -87,7 +97,8 @@ export function pickCascadeDirForDevice(deviceKind, cascadeDirOverride, resolveC
 /**
  * Ensure `SWEET_SEARCH_CUDA_COMPUTE_CAP` is set for the current process
  * before the addon loads a CUDA model. The Rust `optimal_dtype` reads
- * this env var to pick BF16 on Ampere+ and F16/F32 on older GPUs.
+ * this env var to pick BF16 for the embedding model on Ampere+ while
+ * keeping ModernBERT LI on F32 unless explicitly overridden.
  *
  * Idempotent: honors an already-set value (useful for forcing a dtype
  * tier in benchmarks) and silently no-ops when there is no NVIDIA GPU.

package/core/skills/sweet-index/SKILL.md ADDED Viewed

@@ -0,0 +1,92 @@
+---
+name: sweet-index
+description: "Use when (re)indexing a Sweet Search project. Runs the full-profile indexer with GPU model prewarming (CoreML cascade on M3+, candle Metal on M1/M2, ORT CPU elsewhere), kills ORT CPU models during indexing to avoid memory contention, and rewarms them for query readiness on completion. Incremental runs under 20 files stay on ORT CPU."
+category: developer-tooling
+priority: high
+tokenEstimate: 600
+agents: []
+implementation_status: active
+optimization_version: 1.0
+last_optimized: 2026-04-17
+dependencies: [sweet-search]
+quick_reference_card: true
+tags: [indexing, embeddings, late-interaction, gpu, coreml, metal, ort, prewarming, sweet-search]
+trust_tier: 1
+---
+# /sweet-index — Index the Codebase
+<default_to_action>
+When the user invokes `/sweet-index`, run the full-profile indexing command
+immediately. Do not ask clarifying questions — the indexer is idempotent, safe
+to re-run, and handles incremental vs full reindex automatically.
+</default_to_action>
+## What this does
+Runs `core/indexing/index-codebase-v21.js` with the `--full` flag so every
+artifact is rebuilt from scratch. The indexer itself manages the model
+lifecycle end-to-end:
+1. **Kill resident ORT CPU models** — prevents memory contention and mutex
+   fighting with the GPU models about to be loaded.
+2. **Detect best backend** via `hardware-capability.js` —
+   `coreml-cascade` on M3+ Apple Silicon, `candle-metal` on M1/M2,
+   `candle-cpu` elsewhere.
+3. **Load GPU models + warmup forward pass** — compiles Metal pipelines,
+   CoreML variant bundles, and BLAS thread pools so the first indexing
+   batch pays no cold-start cost.
+4. **Index the codebase** — code graph, vector embeddings, HNSW,
+   late-interaction index, quantized artifacts, sparse-gram index.
+5. **Kill GPU models** — releases Metal queues and Neural Engine.
+6. **Load + warmup ORT CPU models** — both embedding and LI get one dummy
+   forward pass so the first query after indexing is warm.
+On small-changeset incremental runs (under 20 files), the indexer skips the
+GPU swap entirely — the load/warmup overhead would dwarf the actual work.
+## Usage
+```bash
+node core/indexing/index-codebase-v21.js --full
+```
+Or via npm script:
+```bash
+npm run index:full
+```
+## What to report
+After the command completes, pick out these lines from stderr:
+- `GPU index pool armed (<backend>)` → confirms which backend was used
+- `embed=<load>+<warm>ms, li=<load>+<warm>ms` → prewarm timings
+- `CPU models warmed for queries: load=…ms, warm=…ms (embed=ok, li=ok)` →
+  confirms ORT CPU is armed for subsequent searches
+- `INDEXING COMPLETE (FULL)` with `Duration`, `Files indexed`, `Entities`,
+  `Relationships` → headline stats
+## Flags (full list)
+| Flag | Purpose |
+|------|---------|
+| `--full` | Full reindex — rebuild everything. Always armed via this skill. |
+| `--no-late-interaction` | Skip LI index (faster, lower quality). Rarely wanted. |
+| `--late-interaction-pool=N` | Token pooling factor (2 halves tokens). |
+| `--vectors-only` | Skip code graph — breaks GraphRAG. Avoid. |
+| `--graph-only` | Only build code graph, skip vectors. |
+| `--verbose` / `-v` | Force per-phase progress output. |
+Do **not** pass `--sqlite-fast` — it disables fsync between phases and is
+only safe for benchmarking on throwaway state.
+## When not to use
+- **Single-file edits**: the indexer auto-detects small changesets and stays
+  on CPU, so `/sweet-index` is still safe, but a watcher-triggered
+  incremental run is cheaper.
+- **Queries feel slow**: usually means the ORT CPU models are not loaded.
+  Run `/sweet-index` once to rewarm them, or restart the sweet-search
+  server.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sweet-search",
-  "version": "2.3.0",
+  "version": "2.4.2",
   "description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
   "type": "module",
   "main": "core/search/sweet-search.js",
@@ -13,7 +13,7 @@
   "author": "Marko Sladojevic <marko@panonit.com> (https://panonit.com)",
   "repository": {
     "type": "git",
-    "url": "https://github.com/panonitorg/sweet-search"
+    "url": "git+https://github.com/panonitorg/sweet-search.git"
   },
   "bugs": {
     "url": "https://github.com/panonitorg/sweet-search/issues"
@@ -34,8 +34,8 @@
     "panonit"
   ],
   "bin": {
-    "sweet-search": "./core/cli.js",
-    "sweet-search-mcp": "./mcp/server.js"
+    "sweet-search": "core/cli.js",
+    "sweet-search-mcp": "mcp/server.js"
   },
   "files": [
     "core/*.js",
@@ -48,6 +48,7 @@
     "core/vocabulary/",
     "core/vector-store/",
     "core/query/",
+    "core/skills/",
     "mcp/",
     "scripts/benchmark-harness.js",
     "scripts/init.js",
@@ -139,12 +140,12 @@
     "vitest": "^4.0.16"
   },
   "optionalDependencies": {
-    "@sweet-search/native-darwin-arm64": "2.3.0",
-    "@sweet-search/native-darwin-x64": "2.3.0",
-    "@sweet-search/native-linux-arm64-gnu": "2.3.0",
-    "@sweet-search/native-linux-arm64-gnu-cuda": "2.3.0",
-    "@sweet-search/native-linux-x64-gnu": "2.3.0",
-    "@sweet-search/native-linux-x64-gnu-cuda": "2.3.0"
+    "@sweet-search/native-darwin-arm64": "2.4.2",
+    "@sweet-search/native-darwin-x64": "2.4.2",
+    "@sweet-search/native-linux-arm64-gnu": "2.4.2",
+    "@sweet-search/native-linux-arm64-gnu-cuda": "2.4.2",
+    "@sweet-search/native-linux-x64-gnu": "2.4.2",
+    "@sweet-search/native-linux-x64-gnu-cuda": "2.4.2"
   },
   "engines": {
     "node": ">=18.0.0"

package/scripts/init.js CHANGED Viewed

@@ -277,7 +277,7 @@ export async function downloadModelsForProfile(profile, options = {}) {
 function printReport(report) {
   const {
     profile, maxsimTier, routerType, models, verification, runtimeDownloads,
-    capability, cascadeReport, dedupReport, prewarmHookReport,
+    capability, cascadeReport, dedupReport, prewarmHookReport, skillReport,
   } = report;
   console.log('');
@@ -359,6 +359,16 @@ function printReport(report) {
     // 'skipped' is silent — explicit user opt-out.
   }
+  if (skillReport) {
+    if (skillReport.status === 'installed') {
+      console.log(`  /sweet-index skill:   installed → ${skillReport.skillPath}`);
+    } else if (skillReport.status === 'already-installed') {
+      console.log(`  /sweet-index skill:   already installed`);
+    } else if (skillReport.status === 'error') {
+      console.log(`  /sweet-index skill:   ERROR — ${skillReport.detail}`);
+    }
+  }
   console.log(`  Runtime downloads:    ${runtimeDownloads}`);
   const passedCount = verification.checks.filter(c => c.status === 'pass').length;
@@ -535,6 +545,54 @@ export function registerPrewarmSessionStartHook({
   };
 }
+// ---------------------------------------------------------------------------
+// /sweet-index skill installation
+// ---------------------------------------------------------------------------
+// The skill is shipped inside the npm tarball at core/skills/sweet-index/SKILL.md
+// (see package.json::files). Init copies it into the project's
+// .claude/skills/sweet-index/ — creating the .claude tree if absent — so users
+// who haven't yet adopted Claude Code still get the skill available the moment
+// they do. Per-project install (not global ~/.claude) so different projects
+// can pin different sweet-search versions without skill drift.
+//
+// Returns `{ status, detail, skillPath? }` for the init report:
+//   installed         — copied SKILL.md (new install)
+//   already-installed — destination existed, left untouched (idempotent)
+//   error             — copy failed; init continues (never blocks)
+export function installSweetIndexSkill({ projectRoot, packageRoot } = {}) {
+  const skillDir = join(projectRoot, '.claude', 'skills', 'sweet-index');
+  const skillDest = join(skillDir, 'SKILL.md');
+  const skillSrc = join(packageRoot, 'core', 'skills', 'sweet-index', 'SKILL.md');
+  if (!existsSync(skillSrc)) {
+    return {
+      status: 'error',
+      detail: `skill source missing in package: ${skillSrc} (re-install sweet-search)`,
+    };
+  }
+  if (existsSync(skillDest)) {
+    return {
+      status: 'already-installed',
+      detail: skillDir,
+      skillPath: skillDest,
+    };
+  }
+  try {
+    mkdirSync(skillDir, { recursive: true });
+    copyFileSync(skillSrc, skillDest);
+    return {
+      status: 'installed',
+      detail: skillDir,
+      skillPath: skillDest,
+    };
+  } catch (err) {
+    return { status: 'error', detail: err.message };
+  }
+}
 // ---------------------------------------------------------------------------
 // Help text
 // ---------------------------------------------------------------------------
@@ -876,20 +934,20 @@ export async function runInit(args) {
     process.stderr.write(`[init] Warning: Could not install index-maintainer: ${e.message}\n`);
   }
-  // 11. Install /sweet-index skill
-  try {
-    const skillDir = join(projectRoot, '.claude', 'skills', 'sweet-index');
-    const skillDest = join(skillDir, 'SKILL.md');
-    const skillSrc = join(PACKAGE_ROOT, 'core', 'skills', 'sweet-index', 'SKILL.md');
-    if (!existsSync(skillDest)) {
-      mkdirSync(skillDir, { recursive: true });
-      copyFileSync(skillSrc, skillDest);
-      process.stderr.write(`[init] Installed /sweet-index skill to ${skillDir}\n`);
-    } else {
-      process.stderr.write(`[init] /sweet-index skill already installed\n`);
-    }
-  } catch (e) {
-    process.stderr.write(`[init] Warning: Could not install /sweet-index skill: ${e.message}\n`);
+  // 11. Install /sweet-index skill — always, even if .claude/ doesn't exist.
+  //     Users who haven't adopted Claude Code yet still get the skill in place
+  //     the moment they do; we treat the skill as part of the product, not a
+  //     Claude-Code-conditional add-on.
+  const skillReport = installSweetIndexSkill({
+    projectRoot,
+    packageRoot: PACKAGE_ROOT,
+  });
+  if (skillReport.status === 'installed') {
+    process.stderr.write(`[init] Installed /sweet-index skill to ${skillReport.detail}\n`);
+  } else if (skillReport.status === 'already-installed') {
+    process.stderr.write(`[init] /sweet-index skill already installed\n`);
+  } else if (skillReport.status === 'error') {
+    process.stderr.write(`[init] Warning: Could not install /sweet-index skill: ${skillReport.detail}\n`);
   }
   // 11.5. Register Claude Code SessionStart daemon-prewarm hook.
@@ -920,6 +978,7 @@ export async function runInit(args) {
     cascadeReport,
     dedupReport,
     prewarmHookReport,
+    skillReport,
   });
 }

package/scripts/uninstall.js CHANGED Viewed

@@ -11,7 +11,7 @@
  *   sweet-search uninstall [--dry-run] [--keep-models] [--purge] [--force]
  */
-import { existsSync, readdirSync, readFileSync, renameSync, rmSync, statSync, unlinkSync, writeFileSync } from 'node:fs';
+import { existsSync, readdirSync, readFileSync, renameSync, rmdirSync, rmSync, statSync, unlinkSync, writeFileSync } from 'node:fs';
 import { dirname, join } from 'node:path';
 import { execSync } from 'node:child_process';
 import { fileURLToPath } from 'node:url';
@@ -213,6 +213,65 @@ export function stopRunningDaemon({
   return result;
 }
+/**
+ * Remove the sweet-search /sweet-index skill from `.claude/skills/sweet-index/`.
+ * Only removes the directory we created — leaves `.claude/skills/` and `.claude/`
+ * untouched even if they're empty afterwards, because the user may add other
+ * skills/hooks/settings to `.claude/` over time and we don't own that root.
+ *
+ * Returns `{ status, detail, skillPath? }`:
+ *   not-found  — directory absent (nothing to do)
+ *   removed    — rm -rf on the sweet-index/ subtree succeeded
+ *   dry-run    — found the directory but skipped the delete
+ *   error      — rm failed (permissions, etc.); uninstall continues
+ */
+export function removeSweetIndexSkill(projectRoot, { dryRun = false } = {}) {
+  const skillDir = join(projectRoot, '.claude', 'skills', 'sweet-index');
+  if (!existsSync(skillDir)) {
+    return { status: 'not-found', detail: 'no .claude/skills/sweet-index/' };
+  }
+  if (dryRun) {
+    return { status: 'dry-run', detail: skillDir, skillPath: skillDir };
+  }
+  try {
+    rmSync(skillDir, { recursive: true, force: true });
+    return { status: 'removed', detail: skillDir, skillPath: skillDir };
+  } catch (err) {
+    return { status: 'error', detail: err.message };
+  }
+}
+/**
+ * Best-effort cleanup of empty parent directories left behind after rm -rf'ing
+ * the per-model cache dirs and the CoreML cascade root.
+ *
+ * Walks up from `start` toward `stopAt` (exclusive) and removes each directory
+ * iff it's empty. rmdirSync naturally fails on non-empty dirs, so this is
+ * inherently safe — we never delete a directory that has files we didn't put
+ * there. Stops at the first non-empty dir or when `stopAt` is reached.
+ *
+ * Used to clean ~/.cache/sweet-search/{models,coreml-cascade}/ → ~/.cache/sweet-search/
+ * after their contents are removed. Without this, uninstall leaves an empty
+ * sweet-search directory dangling under the user's cache root.
+ */
+function pruneEmptyAncestors(start, stopAt) {
+  let dir = start;
+  while (dir && dir !== stopAt && dir !== dirname(dir)) {
+    if (!existsSync(dir)) {
+      dir = dirname(dir);
+      continue;
+    }
+    try {
+      const entries = readdirSync(dir);
+      if (entries.length > 0) return; // non-empty — stop walking
+      rmdirSync(dir);
+    } catch {
+      return; // permission / race / non-empty — stop walking
+    }
+    dir = dirname(dir);
+  }
+}
 /**
  * Remove the sweet-search-owned SessionStart entry from `.claude/settings.json`,
  * preserving every other hook, permission, and top-level key. Detection is
@@ -309,9 +368,12 @@ What gets removed:
   - CoreML variant cascade (if built) — includes ~1.8 GB of .mlpackage
     artifacts AND the sibling .mlmodelc compiled cache files next to
     each variant. Skipped by --keep-models.
+  - .claude/skills/sweet-index/ (the per-project /sweet-index skill copy)
+  - daemon-prewarm SessionStart entry inside .claude/settings.json
 What is NOT removed:
   - User source code, indexes, or database files outside .sweet-search/
+  - .claude/ itself or any other hooks/skills/settings the user owns
   - The npm package itself (unless --purge)
 `);
 }
@@ -369,8 +431,13 @@ export async function runUninstall(args) {
   const hookPreview = removePrewarmSessionStartHook(projectRoot, { dryRun: true });
   const hasHookEntry = hookPreview.status === 'dry-run';
+  // Check for the /sweet-index skill so we can report it even when
+  // .sweet-search/ was already deleted by hand.
+  const skillPreview = removeSweetIndexSkill(projectRoot, { dryRun: true });
+  const hasSkillEntry = skillPreview.status === 'dry-run';
   // Nothing to remove?
-  if (removals.length === 0 && !hasHookEntry) {
+  if (removals.length === 0 && !hasHookEntry && !hasSkillEntry) {
     console.log('Nothing to remove — Sweet Search is not initialized in this project.');
     return;
   }
@@ -387,6 +454,9 @@ export async function runUninstall(args) {
   if (hasHookEntry) {
     console.log(`    daemon-prewarm SessionStart hook in .claude/settings.json`);
   }
+  if (hasSkillEntry) {
+    console.log(`    /sweet-index skill (.claude/skills/sweet-index/)`);
+  }
   console.log(`  Total: ${formatBytes(totalBytes)}`);
   if (parsed.keepModels) {
     console.log('  Model cache: kept (--keep-models)');
@@ -398,6 +468,10 @@ export async function runUninstall(args) {
     if (dryHook.status === 'dry-run') {
       console.log(`  Would also remove: prewarm SessionStart hook (.claude/settings.json — ${dryHook.detail})`);
     }
+    const drySkill = removeSweetIndexSkill(projectRoot, { dryRun: true });
+    if (drySkill.status === 'dry-run') {
+      console.log(`  Would also remove: /sweet-index skill (${drySkill.detail})`);
+    }
     console.log('Dry run — nothing was removed.');
     return;
   }
@@ -430,6 +504,29 @@ export async function runUninstall(args) {
     }
   }
+  // Prune empty parent directories left behind under the model cache root
+  // (~/.cache/sweet-search/{models,coreml-cascade}/ → ~/.cache/sweet-search/).
+  // rmdirSync naturally fails on non-empty dirs, so this only deletes
+  // directories we've effectively emptied. Stops before $HOME/.cache.
+  if (!parsed.keepModels) {
+    const cacheRoot = resolveModelCacheRoot();          // .../sweet-search/models
+    const sweetSearchCacheRoot = dirname(cacheRoot);    // .../sweet-search
+    const userCacheRoot = dirname(sweetSearchCacheRoot); // .../.cache (do not touch)
+    pruneEmptyAncestors(cacheRoot, userCacheRoot);
+  }
+  // Remove the per-project /sweet-index skill init copied into .claude/.
+  // Non-fatal — a failure here just leaves the SKILL.md stub behind.
+  const skillResult = removeSweetIndexSkill(projectRoot, { dryRun: parsed.dryRun });
+  if (skillResult.status === 'removed') {
+    console.log(`  Removed: /sweet-index skill (${skillResult.detail})`);
+    removed++;
+  } else if (skillResult.status === 'error') {
+    console.log(`  Failed to remove /sweet-index skill: ${skillResult.detail}`);
+    kept++;
+  }
+  // 'not-found' and 'dry-run' are silent in the main output.
   // Reverse the Claude Code daemon-prewarm SessionStart entry init added to
   // .claude/settings.json. Non-fatal — a failure here doesn't leave the
   // user in a worse state than before uninstall ran.