nodebench-mcp 2.17.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +21 -0
  2. package/NODEBENCH_AGENTS.md +2 -2
  3. package/README.md +514 -82
  4. package/dist/__tests__/analytics.test.d.ts +11 -0
  5. package/dist/__tests__/analytics.test.js +546 -0
  6. package/dist/__tests__/analytics.test.js.map +1 -0
  7. package/dist/__tests__/dynamicLoading.test.d.ts +1 -0
  8. package/dist/__tests__/dynamicLoading.test.js +278 -0
  9. package/dist/__tests__/dynamicLoading.test.js.map +1 -0
  10. package/dist/__tests__/evalHarness.test.js +1 -1
  11. package/dist/__tests__/evalHarness.test.js.map +1 -1
  12. package/dist/__tests__/helpers/answerMatch.js +22 -22
  13. package/dist/__tests__/presetRealWorldBench.test.js +9 -0
  14. package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
  15. package/dist/__tests__/tools.test.js +1 -1
  16. package/dist/__tests__/toolsetGatingEval.test.js +9 -1
  17. package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
  18. package/dist/analytics/index.d.ts +10 -0
  19. package/dist/analytics/index.js +11 -0
  20. package/dist/analytics/index.js.map +1 -0
  21. package/dist/analytics/projectDetector.d.ts +19 -0
  22. package/dist/analytics/projectDetector.js +259 -0
  23. package/dist/analytics/projectDetector.js.map +1 -0
  24. package/dist/analytics/schema.d.ts +57 -0
  25. package/dist/analytics/schema.js +157 -0
  26. package/dist/analytics/schema.js.map +1 -0
  27. package/dist/analytics/smartPreset.d.ts +63 -0
  28. package/dist/analytics/smartPreset.js +300 -0
  29. package/dist/analytics/smartPreset.js.map +1 -0
  30. package/dist/analytics/toolTracker.d.ts +59 -0
  31. package/dist/analytics/toolTracker.js +163 -0
  32. package/dist/analytics/toolTracker.js.map +1 -0
  33. package/dist/analytics/usageStats.d.ts +64 -0
  34. package/dist/analytics/usageStats.js +252 -0
  35. package/dist/analytics/usageStats.js.map +1 -0
  36. package/dist/db.js +359 -321
  37. package/dist/db.js.map +1 -1
  38. package/dist/index.d.ts +2 -1
  39. package/dist/index.js +652 -89
  40. package/dist/index.js.map +1 -1
  41. package/dist/tools/architectTools.js +13 -13
  42. package/dist/tools/critterTools.js +14 -14
  43. package/dist/tools/parallelAgentTools.js +176 -176
  44. package/dist/tools/patternTools.js +11 -11
  45. package/dist/tools/progressiveDiscoveryTools.d.ts +5 -1
  46. package/dist/tools/progressiveDiscoveryTools.js +111 -19
  47. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  48. package/dist/tools/researchWritingTools.js +42 -42
  49. package/dist/tools/rssTools.js +396 -396
  50. package/dist/tools/toolRegistry.d.ts +17 -0
  51. package/dist/tools/toolRegistry.js +65 -17
  52. package/dist/tools/toolRegistry.js.map +1 -1
  53. package/dist/tools/voiceBridgeTools.js +498 -498
  54. package/dist/toolsetRegistry.d.ts +10 -0
  55. package/dist/toolsetRegistry.js +84 -0
  56. package/dist/toolsetRegistry.js.map +1 -0
  57. package/package.json +4 -4
@@ -105,6 +105,23 @@ export declare function hybridSearch(query: string, tools: Array<{
105
105
  explain?: boolean;
106
106
  /** Pre-computed query embedding vector for semantic search (passed from async caller) */
107
107
  embeddingQueryVec?: Float32Array;
108
+ /** If true, search ALL_REGISTRY_ENTRIES (full 175-tool registry) regardless of loaded preset.
109
+ * Needed for dynamic loading: discover_tools must find unloaded tools to suggest load_toolset. */
110
+ searchFullRegistry?: boolean;
111
+ /** Ablation flags: disable individual strategies to measure their contribution */
112
+ ablation?: {
113
+ disableSynonyms?: boolean;
114
+ disableFuzzy?: boolean;
115
+ disableTagCoverage?: boolean;
116
+ disableTfIdf?: boolean;
117
+ disableNgram?: boolean;
118
+ disableBigram?: boolean;
119
+ disableDense?: boolean;
120
+ disableDomainBoost?: boolean;
121
+ disableTraceEdges?: boolean;
122
+ disablePrefix?: boolean;
123
+ disableEmbedding?: boolean;
124
+ };
108
125
  }): SearchResult[];
109
126
  /** Available search modes for discover_tools */
110
127
  export declare const SEARCH_MODES: SearchMode[];
@@ -1097,7 +1097,7 @@ const REGISTRY_ENTRIES = [
1097
1097
  {
1098
1098
  name: "call_llm",
1099
1099
  category: "llm",
1100
- tags: ["llm", "call", "generate", "prompt", "gemini", "openai", "anthropic", "gpt", "claude"],
1100
+ tags: ["llm", "call", "generate", "prompt", "gemini", "openai", "anthropic", "gpt", "claude", "model", "ai", "inference", "completion", "analyze", "text"],
1101
1101
  quickRef: {
1102
1102
  nextAction: "LLM response received. Validate output quality. Use for analysis, generation, or judgment tasks.",
1103
1103
  nextTools: ["extract_structured_data", "record_learning"],
@@ -1107,7 +1107,7 @@ const REGISTRY_ENTRIES = [
1107
1107
  {
1108
1108
  name: "extract_structured_data",
1109
1109
  category: "llm",
1110
- tags: ["extract", "structured", "data", "json", "parse", "schema", "llm"],
1110
+ tags: ["extract", "structured", "data", "json", "parse", "schema", "llm", "model", "ai", "transform", "output"],
1111
1111
  quickRef: {
1112
1112
  nextAction: "Structured data extracted. Validate against expected schema. Use for downstream processing.",
1113
1113
  nextTools: ["record_eval_result", "record_learning"],
@@ -1117,7 +1117,7 @@ const REGISTRY_ENTRIES = [
1117
1117
  {
1118
1118
  name: "benchmark_models",
1119
1119
  category: "llm",
1120
- tags: ["benchmark", "models", "compare", "latency", "quality", "cost", "llm"],
1120
+ tags: ["benchmark", "models", "compare", "latency", "quality", "cost", "llm", "ai", "gpt", "claude", "gemini", "evaluate"],
1121
1121
  quickRef: {
1122
1122
  nextAction: "Benchmark complete. Compare models on quality, latency, and cost. Record winner with record_learning.",
1123
1123
  nextTools: ["record_learning", "call_llm"],
@@ -2110,6 +2110,7 @@ export function getToolComplexity(toolName) {
2110
2110
  }
2111
2111
  // ── Synonym / semantic expansion map ──────────────────────────────────────
2112
2112
  const SYNONYM_MAP = {
2113
+ // ── Existing technical synonyms ──
2113
2114
  verify: ["validate", "check", "confirm", "test", "assert", "ensure", "correct"],
2114
2115
  test: ["verify", "validate", "check", "assert", "spec", "expect"],
2115
2116
  search: ["find", "discover", "lookup", "query", "locate", "browse"],
@@ -2118,7 +2119,7 @@ const SYNONYM_MAP = {
2118
2119
  setup: ["bootstrap", "init", "configure", "scaffold", "create"],
2119
2120
  fix: ["resolve", "repair", "debug", "patch", "correct"],
2120
2121
  deploy: ["ship", "publish", "release", "launch", "ci", "cd", "pipeline"],
2121
- analyze: ["inspect", "review", "examine", "audit", "scan"],
2122
+ analyze: ["inspect", "review", "examine", "audit", "scan", "screenshot"],
2122
2123
  monitor: ["watch", "observe", "track", "follow"],
2123
2124
  security: ["vulnerability", "audit", "cve", "secret", "credential", "leak", "exposure"],
2124
2125
  benchmark: ["measure", "evaluate", "score", "grade", "performance", "capability"],
@@ -2131,7 +2132,7 @@ const SYNONYM_MAP = {
2131
2132
  ui: ["frontend", "visual", "screenshot", "responsive", "layout", "css", "component"],
2132
2133
  llm: ["model", "ai", "generate", "prompt", "gpt", "claude", "gemini"],
2133
2134
  migrate: ["upgrade", "update", "port", "convert", "transition", "refactor"],
2134
- review: ["inspect", "audit", "pr", "pull-request", "feedback", "critique"],
2135
+ review: ["inspect", "audit", "pr", "pull-request", "feedback", "critique", "merge"],
2135
2136
  performance: ["speed", "latency", "optimize", "fast", "slow", "bottleneck"],
2136
2137
  data: ["csv", "xlsx", "json", "pdf", "file", "parse", "extract", "spreadsheet"],
2137
2138
  paper: ["academic", "research", "write", "publish", "neurips", "icml", "arxiv", "section"],
@@ -2147,6 +2148,31 @@ const SYNONYM_MAP = {
2147
2148
  why: ["purpose", "reason", "intentionality", "motivation", "goal", "critter"],
2148
2149
  purpose: ["why", "reason", "intentionality", "motivation", "goal", "critter"],
2149
2150
  reflect: ["think", "pause", "reconsider", "intentionality", "metacognition", "critter"],
2151
+ // ── New user natural language expansions (ablation-driven) ──
2152
+ website: ["seo", "url", "web", "fetch", "page", "lighthouse", "performance"],
2153
+ webpage: ["seo", "url", "web", "fetch", "page", "html"],
2154
+ fast: ["seo", "performance", "speed", "latency", "lighthouse"],
2155
+ slow: ["seo", "performance", "speed", "latency", "lighthouse", "bottleneck"],
2156
+ inbox: ["email", "read_emails", "send_email", "messages"],
2157
+ email: ["send_email", "read_emails", "inbox", "messages", "smtp", "imap"],
2158
+ ai: ["llm", "model", "prompt", "generate", "gpt", "claude", "gemini", "call_llm"],
2159
+ summarize: ["llm", "extract", "generate", "analyze", "call_llm"],
2160
+ bugs: ["scan", "code", "analysis", "dependencies", "vulnerabilities", "debug"],
2161
+ readme: ["documentation", "generate", "report", "markdown", "document"],
2162
+ compiles: ["closed_loop", "build", "test", "verify", "compile"],
2163
+ works: ["test", "verify", "closed_loop", "flywheel", "quality", "check"],
2164
+ commits: ["git", "commit", "messages", "conventional", "pr"],
2165
+ push: ["git", "commit", "merge", "pr", "deploy"],
2166
+ merge: ["git", "pr", "review", "checklist", "enforce"],
2167
+ open: ["read", "file", "csv", "json", "parse", "load"],
2168
+ look: ["read", "analyze", "inspect", "view", "examine", "fetch"],
2169
+ good: ["quality", "gate", "check", "validate", "analysis"],
2170
+ screenshot: ["analyze", "capture", "vision", "ui", "responsive", "visual"],
2171
+ run: ["test", "execute", "closed_loop", "quality", "cli"],
2172
+ check: ["verify", "validate", "audit", "scan", "review", "gate", "test"],
2173
+ help: ["generate", "create", "scaffold", "analyze", "recommend"],
2174
+ computer: ["llm", "ai", "model", "analyze", "extract"],
2175
+ text: ["extract", "parse", "read", "llm", "structured", "analyze"],
2150
2176
  };
2151
2177
  // ── TF-IDF: compute inverse document frequency for tags ───────────────────
2152
2178
  let _idfCache = null;
@@ -2458,6 +2484,7 @@ export function hybridSearch(query, tools, options) {
2458
2484
  const explain = options?.explain ?? false;
2459
2485
  const mode = options?.mode ?? "hybrid";
2460
2486
  const idf = computeIDF();
2487
+ const ab = options?.ablation ?? {};
2461
2488
  // Regex mode: compile pattern, match against name+description
2462
2489
  let regexPattern = null;
2463
2490
  if (mode === "regex") {
@@ -2516,7 +2543,16 @@ export function hybridSearch(query, tools, options) {
2516
2543
  }
2517
2544
  }
2518
2545
  const toolScores = new Map();
2519
- for (const tool of tools) {
2546
+ // When searchFullRegistry is enabled, search ALL registry entries (not just loaded tools).
2547
+ // This lets discover_tools find unloaded tools and suggest load_toolset.
2548
+ const toolDescMap = new Map(tools.map(t => [t.name, t.description]));
2549
+ const searchList = options?.searchFullRegistry
2550
+ ? ALL_REGISTRY_ENTRIES.map(e => ({
2551
+ name: e.name,
2552
+ description: toolDescMap.get(e.name) ?? `${e.tags.join(" ")} ${e.category} ${e.phase}`,
2553
+ }))
2554
+ : tools;
2555
+ for (const tool of searchList) {
2520
2556
  const entry = TOOL_REGISTRY.get(tool.name);
2521
2557
  if (!entry)
2522
2558
  continue;
@@ -2557,7 +2593,7 @@ export function hybridSearch(query, tools, options) {
2557
2593
  }
2558
2594
  }
2559
2595
  // ── MODE: prefix ──
2560
- if (mode === "hybrid" || mode === "prefix") {
2596
+ if ((mode === "hybrid" || mode === "prefix") && !ab.disablePrefix) {
2561
2597
  for (const word of queryWords) {
2562
2598
  if (nameLower.startsWith(word)) {
2563
2599
  score += 20;
@@ -2589,7 +2625,7 @@ export function hybridSearch(query, tools, options) {
2589
2625
  // Tag exact match (weighted by TF-IDF)
2590
2626
  if (entry.tags.includes(word)) {
2591
2627
  const idfWeight = idf.get(word) ?? 3;
2592
- const tagScore = Math.round(10 * (idfWeight / 3));
2628
+ const tagScore = ab.disableTfIdf ? 10 : Math.round(10 * (idfWeight / 3));
2593
2629
  score += tagScore;
2594
2630
  reasons.push(`keyword:tag(${word},idf=${idfWeight.toFixed(1)})`);
2595
2631
  }
@@ -2614,9 +2650,21 @@ export function hybridSearch(query, tools, options) {
2614
2650
  score += 12;
2615
2651
  reasons.push(`keyword:methodology(${entry.quickRef.methodology})`);
2616
2652
  }
2653
+ // ── TAG COVERAGE BONUS: reward tools where many query words hit tags ──
2654
+ // If 60%+ of query words match tags, that's a strong relevance signal.
2655
+ if (queryWords.length >= 3 && !ab.disableTagCoverage) {
2656
+ const tagSet = new Set(entry.tags);
2657
+ const hits = queryWords.filter(w => tagSet.has(w)).length;
2658
+ const coverage = hits / queryWords.length;
2659
+ if (coverage >= 0.6) {
2660
+ const coverageBonus = Math.round(coverage * hits * 5);
2661
+ score += coverageBonus;
2662
+ reasons.push(`tag_coverage:${hits}/${queryWords.length}(${(coverage * 100).toFixed(0)}%,+${coverageBonus})`);
2663
+ }
2664
+ }
2617
2665
  }
2618
2666
  // ── SEMANTIC: synonym expansion (only score expanded words, not original) ──
2619
- if (mode === "hybrid" || mode === "semantic") {
2667
+ if ((mode === "hybrid" || mode === "semantic") && !ab.disableSynonyms) {
2620
2668
  for (const syn of expandedWords) {
2621
2669
  if (queryWords.includes(syn))
2622
2670
  continue; // skip original words
@@ -2635,7 +2683,7 @@ export function hybridSearch(query, tools, options) {
2635
2683
  }
2636
2684
  }
2637
2685
  // ── FUZZY: Levenshtein distance for typo tolerance ──
2638
- if (mode === "hybrid" || mode === "fuzzy") {
2686
+ if ((mode === "hybrid" || mode === "fuzzy") && !ab.disableFuzzy) {
2639
2687
  for (const word of queryWords) {
2640
2688
  if (word.length < 4)
2641
2689
  continue; // skip short words for fuzzy
@@ -2662,7 +2710,7 @@ export function hybridSearch(query, tools, options) {
2662
2710
  }
2663
2711
  }
2664
2712
  // ── N-GRAM: trigram similarity ──
2665
- if (mode === "hybrid" || mode === "fuzzy") {
2713
+ if ((mode === "hybrid" || mode === "fuzzy") && !ab.disableNgram) {
2666
2714
  for (const word of queryWords) {
2667
2715
  if (word.length < 4)
2668
2716
  continue;
@@ -2683,7 +2731,7 @@ export function hybridSearch(query, tools, options) {
2683
2731
  }
2684
2732
  }
2685
2733
  // ── BIGRAM: phrase matching ──
2686
- if (queryBigrams.length > 0) {
2734
+ if (queryBigrams.length > 0 && !ab.disableBigram) {
2687
2735
  for (const bigram of queryBigrams) {
2688
2736
  if (allText.includes(bigram)) {
2689
2737
  score += 15;
@@ -2692,7 +2740,7 @@ export function hybridSearch(query, tools, options) {
2692
2740
  }
2693
2741
  }
2694
2742
  // ── DENSE: TF-IDF cosine similarity (query vec pre-computed above) ──
2695
- if (denseQueryVec && denseDocVectors) {
2743
+ if (denseQueryVec && denseDocVectors && !ab.disableDense) {
2696
2744
  const docVec = denseDocVectors.get(tool.name);
2697
2745
  if (docVec) {
2698
2746
  const sim = cosineSimilarity(denseQueryVec, docVec);
@@ -2704,7 +2752,7 @@ export function hybridSearch(query, tools, options) {
2704
2752
  }
2705
2753
  }
2706
2754
  // ── EMBEDDING: Agent-as-a-Graph bipartite RRF (ranks pre-computed above) ──
2707
- if (embToolRanks && embDomainRanks) {
2755
+ if (embToolRanks && embDomainRanks && !ab.disableEmbedding) {
2708
2756
  const toolRank = embToolRanks.get(tool.name);
2709
2757
  if (toolRank) {
2710
2758
  const rrfScore = Math.round(WRRF_ALPHA_T * 1000 / (WRRF_K + toolRank));
@@ -2744,18 +2792,18 @@ export function hybridSearch(query, tools, options) {
2744
2792
  neighbors.forEach((n) => traceBoostTargets.add(n));
2745
2793
  }
2746
2794
  const results = [];
2747
- for (const tool of tools) {
2795
+ for (const tool of searchList) {
2748
2796
  const entry = TOOL_REGISTRY.get(tool.name);
2749
2797
  const scored = toolScores.get(tool.name);
2750
2798
  if (!entry || !scored)
2751
2799
  continue;
2752
- const domainBoost = getDomainBoost(entry.category, topCategories);
2800
+ const domainBoost = ab.disableDomainBoost ? 0 : getDomainBoost(entry.category, topCategories);
2753
2801
  if (domainBoost > 0) {
2754
2802
  scored.score += domainBoost;
2755
2803
  scored.reasons.push(`domain_boost:+${domainBoost}`);
2756
2804
  }
2757
2805
  // Execution trace edge: boost tools that frequently co-occur with top results
2758
- if (traceBoostTargets.has(tool.name) && !topToolNames.includes(tool.name)) {
2806
+ if (traceBoostTargets.has(tool.name) && !topToolNames.includes(tool.name) && !ab.disableTraceEdges) {
2759
2807
  scored.score += TRACE_EDGE_BOOST;
2760
2808
  scored.reasons.push(`trace_edge:+${TRACE_EDGE_BOOST}`);
2761
2809
  }