npm - tea-rags - Versions diffs - 1.19.0 → 1.19.1 - Mend

tea-rags 1.19.0 → 1.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/benchmarks/benchmark-embeddings.mjs +148 -0
package/benchmarks/lib/benchmarks.mjs +726 -0
package/benchmarks/lib/cleanup.mjs +40 -0
package/benchmarks/lib/colors.mjs +52 -0
package/benchmarks/lib/config.mjs +115 -0
package/benchmarks/lib/embedding-calibration.mjs +508 -0
package/benchmarks/lib/estimator.mjs +93 -0
package/benchmarks/lib/files.mjs +94 -0
package/benchmarks/lib/output.mjs +218 -0
package/benchmarks/lib/provider.mjs +66 -0
package/benchmarks/lib/smart-stepping.mjs +186 -0
package/benchmarks/lib/stopping.mjs +79 -0
package/benchmarks/tune.mjs +753 -0
package/package.json +2 -1

package/benchmarks/benchmark-embeddings.mjs ADDED Viewed

@@ -0,0 +1,148 @@
+#!/usr/bin/env node
+/**
+ * Embedding Diagnostic Benchmark
+ *
+ * Automatically calibrates EMBEDDING_BATCH_SIZE and EMBEDDING_CONCURRENCY
+ * using a three-phase plateau-detection algorithm.
+ *
+ * Phase 1: Find batch size plateau (CONCURRENCY=1)
+ * Phase 2: Test concurrency on plateau batches
+ * Phase 3: Select robust configuration (within 2% of max, prefer lower concurrency/batch)
+ *
+ * Run: npm run benchmark-embeddings
+ */
+import { c, printBox } from "./lib/colors.mjs";
+import { AVG_LOC_PER_CHUNK, config, MEDIAN_CODE_CHUNK_SIZE } from "./lib/config.mjs";
+import { calibrateEmbeddings } from "./lib/embedding-calibration.mjs";
+import { checkProviderConnectivity, createEmbeddingProvider } from "./lib/provider.mjs";
+/**
+ * Format time in human readable format
+ */
+function formatTime(ms) {
+  if (ms < 1000) return `${ms}ms`;
+  if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
+  const minutes = Math.floor(ms / 60000);
+  const seconds = Math.round((ms % 60000) / 1000);
+  return `${minutes}m ${seconds}s`;
+}
+async function main() {
+  console.clear();
+  printBox("EMBEDDING CALIBRATION BENCHMARK", "Three-phase plateau detection");
+  // Show configuration
+  console.log(`${c.bold}Configuration:${c.reset}`);
+  console.log(`  ${c.dim}Ollama:${c.reset}        ${config.EMBEDDING_BASE_URL}`);
+  console.log(`  ${c.dim}Model:${c.reset}         ${config.EMBEDDING_MODEL}`);
+  console.log(`  ${c.dim}Chunk size:${c.reset}    ${MEDIAN_CODE_CHUNK_SIZE} chars (median from production)`);
+  console.log();
+  // Check embedding provider
+  process.stdout.write(`${c.dim}Checking embedding provider...${c.reset} `);
+  const embeddingCheck = await checkProviderConnectivity();
+  if (!embeddingCheck.ok) {
+    console.log(`${c.red}FAILED${c.reset}`);
+    console.log(`\n${c.red}Error:${c.reset} ${embeddingCheck.error}`);
+    process.exit(1);
+  }
+  console.log(`${c.green}OK${c.reset}`);
+  // Initialize embeddings
+  const { provider: embeddings, name: providerName } = await createEmbeddingProvider();
+  console.log(`  ${c.green}✓${c.reset} Embedding provider: ${providerName}`);
+  console.log(`  ${c.green}✓${c.reset} Vector dimension: ${embeddings.getDimensions()}`);
+  console.log();
+  // Run calibration
+  const result = await calibrateEmbeddings(embeddings, { verbose: true });
+  // ========== OUTPUT ==========
+  console.log();
+  // Detect setup type
+  const isOnnx = providerName === "onnx";
+  const isRemote =
+    !isOnnx && !config.EMBEDDING_BASE_URL.includes("localhost") && !config.EMBEDDING_BASE_URL.includes("127.0.0.1");
+  const setupIcon = isOnnx ? "⚡" : isRemote ? "🌐" : "🏠";
+  const setupName = isOnnx ? "Local ONNX" : isRemote ? "Remote GPU" : "Local GPU";
+  printBox(`${setupIcon} ${setupName.toUpperCase()} - OPTIMAL CONFIGURATION`, "");
+  // Main result
+  console.log(
+    `  ${c.bold}EMBEDDING_BATCH_SIZE${c.reset}   = ${c.green}${c.bold}${result.EMBEDDING_BATCH_SIZE}${c.reset}`,
+  );
+  console.log(
+    `  ${c.bold}EMBEDDING_CONCURRENCY${c.reset}  = ${c.green}${c.bold}${result.EMBEDDING_CONCURRENCY}${c.reset}`,
+  );
+  console.log();
+  console.log(`  ${c.bold}Throughput:${c.reset} ${c.cyan}${result.throughput_chunks_per_sec} chunks/s${c.reset}`);
+  console.log();
+  // Explain the choice
+  console.log(`${c.bold}Why this configuration?${c.reset}`);
+  if (isOnnx) {
+    console.log(`  ${c.dim}•${c.reset} Local ONNX runtime (${providerName})`);
+    console.log(`  ${c.dim}•${c.reset} In-process inference, no network overhead`);
+    if (result.EMBEDDING_BATCH_SIZE <= 16) {
+      console.log(`  ${c.dim}•${c.reset} Small batches optimal for ONNX memory management`);
+    }
+  } else if (isRemote) {
+    console.log(`  ${c.dim}•${c.reset} Remote GPU detected (${config.EMBEDDING_BASE_URL})`);
+    console.log(`  ${c.dim}•${c.reset} Lower batch + higher concurrency hides network latency`);
+    console.log(`  ${c.dim}•${c.reset} While one batch transfers, GPU processes another`);
+    if (result.EMBEDDING_CONCURRENCY > 1) {
+      console.log(
+        `  ${c.dim}•${c.reset} CONCURRENCY=${result.EMBEDDING_CONCURRENCY} overlaps network I/O with GPU compute`,
+      );
+    }
+  } else {
+    console.log(`  ${c.dim}•${c.reset} Local GPU detected (minimal network latency)`);
+    console.log(`  ${c.dim}•${c.reset} Higher batch + lower concurrency minimizes overhead`);
+    if (result.EMBEDDING_CONCURRENCY === 1) {
+      console.log(`  ${c.dim}•${c.reset} CONCURRENCY=1 indicates GPU-bound workload`);
+    }
+  }
+  console.log();
+  // Environment export
+  console.log(`${c.bold}Add to your environment:${c.reset}`);
+  console.log();
+  console.log(`  ${c.cyan}export EMBEDDING_BATCH_SIZE=${result.EMBEDDING_BATCH_SIZE}${c.reset}`);
+  console.log(`  ${c.cyan}export EMBEDDING_CONCURRENCY=${result.EMBEDDING_CONCURRENCY}${c.reset}`);
+  console.log();
+  // Time estimates
+  console.log(`${c.bold}Estimated indexing times:${c.reset}`);
+  const projects = [
+    { name: "10K LoC", loc: 10_000 },
+    { name: "100K LoC", loc: 100_000 },
+    { name: "1M LoC", loc: 1_000_000 },
+    { name: "VS Code (3.5M)", loc: 3_500_000 },
+  ];
+  for (const p of projects) {
+    const chunks = Math.ceil(p.loc / AVG_LOC_PER_CHUNK);
+    const seconds = Math.ceil(chunks / result.throughput_chunks_per_sec);
+    console.log(`  ${c.dim}${p.name.padEnd(20)}${c.reset} ${c.bold}${formatTime(seconds * 1000)}${c.reset}`);
+  }
+  console.log();
+  // Stats
+  console.log(`${c.dim}────────────────────────────────────────${c.reset}`);
+  console.log(
+    `${c.dim}Configs tested: ${result.stable_configs_count} stable, ${result.discarded_configs_count} discarded${c.reset}`,
+  );
+  console.log(`${c.bold}Total benchmark time: ${formatTime(result.calibration_time_ms)}${c.reset}`);
+  // Terminate provider (ONNX keeps socket alive)
+  if ("terminate" in embeddings && typeof embeddings.terminate === "function") {
+    await embeddings.terminate();
+  }
+}
+main().catch((err) => {
+  console.error(`${c.red}Fatal error:${c.reset}`, err.message);
+  console.error(err.stack);
+  process.exit(1);
+});