nodebench-mcp 2.17.0 → 2.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/NODEBENCH_AGENTS.md +2 -2
- package/README.md +516 -82
- package/dist/__tests__/analytics.test.d.ts +11 -0
- package/dist/__tests__/analytics.test.js +546 -0
- package/dist/__tests__/analytics.test.js.map +1 -0
- package/dist/__tests__/dynamicLoading.test.d.ts +1 -0
- package/dist/__tests__/dynamicLoading.test.js +278 -0
- package/dist/__tests__/dynamicLoading.test.js.map +1 -0
- package/dist/__tests__/evalHarness.test.js +1 -1
- package/dist/__tests__/evalHarness.test.js.map +1 -1
- package/dist/__tests__/helpers/answerMatch.js +22 -22
- package/dist/__tests__/presetRealWorldBench.test.js +9 -0
- package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
- package/dist/__tests__/tools.test.js +1 -1
- package/dist/__tests__/toolsetGatingEval.test.js +9 -1
- package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
- package/dist/analytics/index.d.ts +10 -0
- package/dist/analytics/index.js +11 -0
- package/dist/analytics/index.js.map +1 -0
- package/dist/analytics/projectDetector.d.ts +19 -0
- package/dist/analytics/projectDetector.js +259 -0
- package/dist/analytics/projectDetector.js.map +1 -0
- package/dist/analytics/schema.d.ts +57 -0
- package/dist/analytics/schema.js +157 -0
- package/dist/analytics/schema.js.map +1 -0
- package/dist/analytics/smartPreset.d.ts +63 -0
- package/dist/analytics/smartPreset.js +300 -0
- package/dist/analytics/smartPreset.js.map +1 -0
- package/dist/analytics/toolTracker.d.ts +59 -0
- package/dist/analytics/toolTracker.js +163 -0
- package/dist/analytics/toolTracker.js.map +1 -0
- package/dist/analytics/usageStats.d.ts +64 -0
- package/dist/analytics/usageStats.js +252 -0
- package/dist/analytics/usageStats.js.map +1 -0
- package/dist/db.js +359 -321
- package/dist/db.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +652 -89
- package/dist/index.js.map +1 -1
- package/dist/tools/architectTools.js +13 -13
- package/dist/tools/critterTools.js +14 -14
- package/dist/tools/parallelAgentTools.js +176 -176
- package/dist/tools/patternTools.js +11 -11
- package/dist/tools/progressiveDiscoveryTools.d.ts +5 -1
- package/dist/tools/progressiveDiscoveryTools.js +111 -19
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/dist/tools/researchWritingTools.js +42 -42
- package/dist/tools/rssTools.js +396 -396
- package/dist/tools/toolRegistry.d.ts +17 -0
- package/dist/tools/toolRegistry.js +65 -17
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/voiceBridgeTools.js +498 -498
- package/dist/toolsetRegistry.d.ts +10 -0
- package/dist/toolsetRegistry.js +84 -0
- package/dist/toolsetRegistry.js.map +1 -0
- package/package.json +4 -4
|
@@ -105,6 +105,23 @@ export declare function hybridSearch(query: string, tools: Array<{
|
|
|
105
105
|
explain?: boolean;
|
|
106
106
|
/** Pre-computed query embedding vector for semantic search (passed from async caller) */
|
|
107
107
|
embeddingQueryVec?: Float32Array;
|
|
108
|
+
/** If true, search ALL_REGISTRY_ENTRIES (full 175-tool registry) regardless of loaded preset.
|
|
109
|
+
* Needed for dynamic loading: discover_tools must find unloaded tools to suggest load_toolset. */
|
|
110
|
+
searchFullRegistry?: boolean;
|
|
111
|
+
/** Ablation flags: disable individual strategies to measure their contribution */
|
|
112
|
+
ablation?: {
|
|
113
|
+
disableSynonyms?: boolean;
|
|
114
|
+
disableFuzzy?: boolean;
|
|
115
|
+
disableTagCoverage?: boolean;
|
|
116
|
+
disableTfIdf?: boolean;
|
|
117
|
+
disableNgram?: boolean;
|
|
118
|
+
disableBigram?: boolean;
|
|
119
|
+
disableDense?: boolean;
|
|
120
|
+
disableDomainBoost?: boolean;
|
|
121
|
+
disableTraceEdges?: boolean;
|
|
122
|
+
disablePrefix?: boolean;
|
|
123
|
+
disableEmbedding?: boolean;
|
|
124
|
+
};
|
|
108
125
|
}): SearchResult[];
|
|
109
126
|
/** Available search modes for discover_tools */
|
|
110
127
|
export declare const SEARCH_MODES: SearchMode[];
|
|
@@ -1097,7 +1097,7 @@ const REGISTRY_ENTRIES = [
|
|
|
1097
1097
|
{
|
|
1098
1098
|
name: "call_llm",
|
|
1099
1099
|
category: "llm",
|
|
1100
|
-
tags: ["llm", "call", "generate", "prompt", "gemini", "openai", "anthropic", "gpt", "claude"],
|
|
1100
|
+
tags: ["llm", "call", "generate", "prompt", "gemini", "openai", "anthropic", "gpt", "claude", "model", "ai", "inference", "completion", "analyze", "text"],
|
|
1101
1101
|
quickRef: {
|
|
1102
1102
|
nextAction: "LLM response received. Validate output quality. Use for analysis, generation, or judgment tasks.",
|
|
1103
1103
|
nextTools: ["extract_structured_data", "record_learning"],
|
|
@@ -1107,7 +1107,7 @@ const REGISTRY_ENTRIES = [
|
|
|
1107
1107
|
{
|
|
1108
1108
|
name: "extract_structured_data",
|
|
1109
1109
|
category: "llm",
|
|
1110
|
-
tags: ["extract", "structured", "data", "json", "parse", "schema", "llm"],
|
|
1110
|
+
tags: ["extract", "structured", "data", "json", "parse", "schema", "llm", "model", "ai", "transform", "output"],
|
|
1111
1111
|
quickRef: {
|
|
1112
1112
|
nextAction: "Structured data extracted. Validate against expected schema. Use for downstream processing.",
|
|
1113
1113
|
nextTools: ["record_eval_result", "record_learning"],
|
|
@@ -1117,7 +1117,7 @@ const REGISTRY_ENTRIES = [
|
|
|
1117
1117
|
{
|
|
1118
1118
|
name: "benchmark_models",
|
|
1119
1119
|
category: "llm",
|
|
1120
|
-
tags: ["benchmark", "models", "compare", "latency", "quality", "cost", "llm"],
|
|
1120
|
+
tags: ["benchmark", "models", "compare", "latency", "quality", "cost", "llm", "ai", "gpt", "claude", "gemini", "evaluate"],
|
|
1121
1121
|
quickRef: {
|
|
1122
1122
|
nextAction: "Benchmark complete. Compare models on quality, latency, and cost. Record winner with record_learning.",
|
|
1123
1123
|
nextTools: ["record_learning", "call_llm"],
|
|
@@ -2110,6 +2110,7 @@ export function getToolComplexity(toolName) {
|
|
|
2110
2110
|
}
|
|
2111
2111
|
// ── Synonym / semantic expansion map ──────────────────────────────────────
|
|
2112
2112
|
const SYNONYM_MAP = {
|
|
2113
|
+
// ── Existing technical synonyms ──
|
|
2113
2114
|
verify: ["validate", "check", "confirm", "test", "assert", "ensure", "correct"],
|
|
2114
2115
|
test: ["verify", "validate", "check", "assert", "spec", "expect"],
|
|
2115
2116
|
search: ["find", "discover", "lookup", "query", "locate", "browse"],
|
|
@@ -2118,7 +2119,7 @@ const SYNONYM_MAP = {
|
|
|
2118
2119
|
setup: ["bootstrap", "init", "configure", "scaffold", "create"],
|
|
2119
2120
|
fix: ["resolve", "repair", "debug", "patch", "correct"],
|
|
2120
2121
|
deploy: ["ship", "publish", "release", "launch", "ci", "cd", "pipeline"],
|
|
2121
|
-
analyze: ["inspect", "review", "examine", "audit", "scan"],
|
|
2122
|
+
analyze: ["inspect", "review", "examine", "audit", "scan", "screenshot"],
|
|
2122
2123
|
monitor: ["watch", "observe", "track", "follow"],
|
|
2123
2124
|
security: ["vulnerability", "audit", "cve", "secret", "credential", "leak", "exposure"],
|
|
2124
2125
|
benchmark: ["measure", "evaluate", "score", "grade", "performance", "capability"],
|
|
@@ -2131,7 +2132,7 @@ const SYNONYM_MAP = {
|
|
|
2131
2132
|
ui: ["frontend", "visual", "screenshot", "responsive", "layout", "css", "component"],
|
|
2132
2133
|
llm: ["model", "ai", "generate", "prompt", "gpt", "claude", "gemini"],
|
|
2133
2134
|
migrate: ["upgrade", "update", "port", "convert", "transition", "refactor"],
|
|
2134
|
-
review: ["inspect", "audit", "pr", "pull-request", "feedback", "critique"],
|
|
2135
|
+
review: ["inspect", "audit", "pr", "pull-request", "feedback", "critique", "merge"],
|
|
2135
2136
|
performance: ["speed", "latency", "optimize", "fast", "slow", "bottleneck"],
|
|
2136
2137
|
data: ["csv", "xlsx", "json", "pdf", "file", "parse", "extract", "spreadsheet"],
|
|
2137
2138
|
paper: ["academic", "research", "write", "publish", "neurips", "icml", "arxiv", "section"],
|
|
@@ -2147,6 +2148,31 @@ const SYNONYM_MAP = {
|
|
|
2147
2148
|
why: ["purpose", "reason", "intentionality", "motivation", "goal", "critter"],
|
|
2148
2149
|
purpose: ["why", "reason", "intentionality", "motivation", "goal", "critter"],
|
|
2149
2150
|
reflect: ["think", "pause", "reconsider", "intentionality", "metacognition", "critter"],
|
|
2151
|
+
// ── New user natural language expansions (ablation-driven) ──
|
|
2152
|
+
website: ["seo", "url", "web", "fetch", "page", "lighthouse", "performance"],
|
|
2153
|
+
webpage: ["seo", "url", "web", "fetch", "page", "html"],
|
|
2154
|
+
fast: ["seo", "performance", "speed", "latency", "lighthouse"],
|
|
2155
|
+
slow: ["seo", "performance", "speed", "latency", "lighthouse", "bottleneck"],
|
|
2156
|
+
inbox: ["email", "read_emails", "send_email", "messages"],
|
|
2157
|
+
email: ["send_email", "read_emails", "inbox", "messages", "smtp", "imap"],
|
|
2158
|
+
ai: ["llm", "model", "prompt", "generate", "gpt", "claude", "gemini", "call_llm"],
|
|
2159
|
+
summarize: ["llm", "extract", "generate", "analyze", "call_llm"],
|
|
2160
|
+
bugs: ["scan", "code", "analysis", "dependencies", "vulnerabilities", "debug"],
|
|
2161
|
+
readme: ["documentation", "generate", "report", "markdown", "document"],
|
|
2162
|
+
compiles: ["closed_loop", "build", "test", "verify", "compile"],
|
|
2163
|
+
works: ["test", "verify", "closed_loop", "flywheel", "quality", "check"],
|
|
2164
|
+
commits: ["git", "commit", "messages", "conventional", "pr"],
|
|
2165
|
+
push: ["git", "commit", "merge", "pr", "deploy"],
|
|
2166
|
+
merge: ["git", "pr", "review", "checklist", "enforce"],
|
|
2167
|
+
open: ["read", "file", "csv", "json", "parse", "load"],
|
|
2168
|
+
look: ["read", "analyze", "inspect", "view", "examine", "fetch"],
|
|
2169
|
+
good: ["quality", "gate", "check", "validate", "analysis"],
|
|
2170
|
+
screenshot: ["analyze", "capture", "vision", "ui", "responsive", "visual"],
|
|
2171
|
+
run: ["test", "execute", "closed_loop", "quality", "cli"],
|
|
2172
|
+
check: ["verify", "validate", "audit", "scan", "review", "gate", "test"],
|
|
2173
|
+
help: ["generate", "create", "scaffold", "analyze", "recommend"],
|
|
2174
|
+
computer: ["llm", "ai", "model", "analyze", "extract"],
|
|
2175
|
+
text: ["extract", "parse", "read", "llm", "structured", "analyze"],
|
|
2150
2176
|
};
|
|
2151
2177
|
// ── TF-IDF: compute inverse document frequency for tags ───────────────────
|
|
2152
2178
|
let _idfCache = null;
|
|
@@ -2458,6 +2484,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2458
2484
|
const explain = options?.explain ?? false;
|
|
2459
2485
|
const mode = options?.mode ?? "hybrid";
|
|
2460
2486
|
const idf = computeIDF();
|
|
2487
|
+
const ab = options?.ablation ?? {};
|
|
2461
2488
|
// Regex mode: compile pattern, match against name+description
|
|
2462
2489
|
let regexPattern = null;
|
|
2463
2490
|
if (mode === "regex") {
|
|
@@ -2516,7 +2543,16 @@ export function hybridSearch(query, tools, options) {
|
|
|
2516
2543
|
}
|
|
2517
2544
|
}
|
|
2518
2545
|
const toolScores = new Map();
|
|
2519
|
-
|
|
2546
|
+
// When searchFullRegistry is enabled, search ALL registry entries (not just loaded tools).
|
|
2547
|
+
// This lets discover_tools find unloaded tools and suggest load_toolset.
|
|
2548
|
+
const toolDescMap = new Map(tools.map(t => [t.name, t.description]));
|
|
2549
|
+
const searchList = options?.searchFullRegistry
|
|
2550
|
+
? ALL_REGISTRY_ENTRIES.map(e => ({
|
|
2551
|
+
name: e.name,
|
|
2552
|
+
description: toolDescMap.get(e.name) ?? `${e.tags.join(" ")} ${e.category} ${e.phase}`,
|
|
2553
|
+
}))
|
|
2554
|
+
: tools;
|
|
2555
|
+
for (const tool of searchList) {
|
|
2520
2556
|
const entry = TOOL_REGISTRY.get(tool.name);
|
|
2521
2557
|
if (!entry)
|
|
2522
2558
|
continue;
|
|
@@ -2557,7 +2593,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2557
2593
|
}
|
|
2558
2594
|
}
|
|
2559
2595
|
// ── MODE: prefix ──
|
|
2560
|
-
if (mode === "hybrid" || mode === "prefix") {
|
|
2596
|
+
if ((mode === "hybrid" || mode === "prefix") && !ab.disablePrefix) {
|
|
2561
2597
|
for (const word of queryWords) {
|
|
2562
2598
|
if (nameLower.startsWith(word)) {
|
|
2563
2599
|
score += 20;
|
|
@@ -2589,7 +2625,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2589
2625
|
// Tag exact match (weighted by TF-IDF)
|
|
2590
2626
|
if (entry.tags.includes(word)) {
|
|
2591
2627
|
const idfWeight = idf.get(word) ?? 3;
|
|
2592
|
-
const tagScore = Math.round(10 * (idfWeight / 3));
|
|
2628
|
+
const tagScore = ab.disableTfIdf ? 10 : Math.round(10 * (idfWeight / 3));
|
|
2593
2629
|
score += tagScore;
|
|
2594
2630
|
reasons.push(`keyword:tag(${word},idf=${idfWeight.toFixed(1)})`);
|
|
2595
2631
|
}
|
|
@@ -2614,9 +2650,21 @@ export function hybridSearch(query, tools, options) {
|
|
|
2614
2650
|
score += 12;
|
|
2615
2651
|
reasons.push(`keyword:methodology(${entry.quickRef.methodology})`);
|
|
2616
2652
|
}
|
|
2653
|
+
// ── TAG COVERAGE BONUS: reward tools where many query words hit tags ──
|
|
2654
|
+
// If 60%+ of query words match tags, that's a strong relevance signal.
|
|
2655
|
+
if (queryWords.length >= 3 && !ab.disableTagCoverage) {
|
|
2656
|
+
const tagSet = new Set(entry.tags);
|
|
2657
|
+
const hits = queryWords.filter(w => tagSet.has(w)).length;
|
|
2658
|
+
const coverage = hits / queryWords.length;
|
|
2659
|
+
if (coverage >= 0.6) {
|
|
2660
|
+
const coverageBonus = Math.round(coverage * hits * 5);
|
|
2661
|
+
score += coverageBonus;
|
|
2662
|
+
reasons.push(`tag_coverage:${hits}/${queryWords.length}(${(coverage * 100).toFixed(0)}%,+${coverageBonus})`);
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2617
2665
|
}
|
|
2618
2666
|
// ── SEMANTIC: synonym expansion (only score expanded words, not original) ──
|
|
2619
|
-
if (mode === "hybrid" || mode === "semantic") {
|
|
2667
|
+
if ((mode === "hybrid" || mode === "semantic") && !ab.disableSynonyms) {
|
|
2620
2668
|
for (const syn of expandedWords) {
|
|
2621
2669
|
if (queryWords.includes(syn))
|
|
2622
2670
|
continue; // skip original words
|
|
@@ -2635,7 +2683,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2635
2683
|
}
|
|
2636
2684
|
}
|
|
2637
2685
|
// ── FUZZY: Levenshtein distance for typo tolerance ──
|
|
2638
|
-
if (mode === "hybrid" || mode === "fuzzy") {
|
|
2686
|
+
if ((mode === "hybrid" || mode === "fuzzy") && !ab.disableFuzzy) {
|
|
2639
2687
|
for (const word of queryWords) {
|
|
2640
2688
|
if (word.length < 4)
|
|
2641
2689
|
continue; // skip short words for fuzzy
|
|
@@ -2662,7 +2710,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2662
2710
|
}
|
|
2663
2711
|
}
|
|
2664
2712
|
// ── N-GRAM: trigram similarity ──
|
|
2665
|
-
if (mode === "hybrid" || mode === "fuzzy") {
|
|
2713
|
+
if ((mode === "hybrid" || mode === "fuzzy") && !ab.disableNgram) {
|
|
2666
2714
|
for (const word of queryWords) {
|
|
2667
2715
|
if (word.length < 4)
|
|
2668
2716
|
continue;
|
|
@@ -2683,7 +2731,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2683
2731
|
}
|
|
2684
2732
|
}
|
|
2685
2733
|
// ── BIGRAM: phrase matching ──
|
|
2686
|
-
if (queryBigrams.length > 0) {
|
|
2734
|
+
if (queryBigrams.length > 0 && !ab.disableBigram) {
|
|
2687
2735
|
for (const bigram of queryBigrams) {
|
|
2688
2736
|
if (allText.includes(bigram)) {
|
|
2689
2737
|
score += 15;
|
|
@@ -2692,7 +2740,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2692
2740
|
}
|
|
2693
2741
|
}
|
|
2694
2742
|
// ── DENSE: TF-IDF cosine similarity (query vec pre-computed above) ──
|
|
2695
|
-
if (denseQueryVec && denseDocVectors) {
|
|
2743
|
+
if (denseQueryVec && denseDocVectors && !ab.disableDense) {
|
|
2696
2744
|
const docVec = denseDocVectors.get(tool.name);
|
|
2697
2745
|
if (docVec) {
|
|
2698
2746
|
const sim = cosineSimilarity(denseQueryVec, docVec);
|
|
@@ -2704,7 +2752,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
2704
2752
|
}
|
|
2705
2753
|
}
|
|
2706
2754
|
// ── EMBEDDING: Agent-as-a-Graph bipartite RRF (ranks pre-computed above) ──
|
|
2707
|
-
if (embToolRanks && embDomainRanks) {
|
|
2755
|
+
if (embToolRanks && embDomainRanks && !ab.disableEmbedding) {
|
|
2708
2756
|
const toolRank = embToolRanks.get(tool.name);
|
|
2709
2757
|
if (toolRank) {
|
|
2710
2758
|
const rrfScore = Math.round(WRRF_ALPHA_T * 1000 / (WRRF_K + toolRank));
|
|
@@ -2744,18 +2792,18 @@ export function hybridSearch(query, tools, options) {
|
|
|
2744
2792
|
neighbors.forEach((n) => traceBoostTargets.add(n));
|
|
2745
2793
|
}
|
|
2746
2794
|
const results = [];
|
|
2747
|
-
for (const tool of
|
|
2795
|
+
for (const tool of searchList) {
|
|
2748
2796
|
const entry = TOOL_REGISTRY.get(tool.name);
|
|
2749
2797
|
const scored = toolScores.get(tool.name);
|
|
2750
2798
|
if (!entry || !scored)
|
|
2751
2799
|
continue;
|
|
2752
|
-
const domainBoost = getDomainBoost(entry.category, topCategories);
|
|
2800
|
+
const domainBoost = ab.disableDomainBoost ? 0 : getDomainBoost(entry.category, topCategories);
|
|
2753
2801
|
if (domainBoost > 0) {
|
|
2754
2802
|
scored.score += domainBoost;
|
|
2755
2803
|
scored.reasons.push(`domain_boost:+${domainBoost}`);
|
|
2756
2804
|
}
|
|
2757
2805
|
// Execution trace edge: boost tools that frequently co-occur with top results
|
|
2758
|
-
if (traceBoostTargets.has(tool.name) && !topToolNames.includes(tool.name)) {
|
|
2806
|
+
if (traceBoostTargets.has(tool.name) && !topToolNames.includes(tool.name) && !ab.disableTraceEdges) {
|
|
2759
2807
|
scored.score += TRACE_EDGE_BOOST;
|
|
2760
2808
|
scored.reasons.push(`trace_edge:+${TRACE_EDGE_BOOST}`);
|
|
2761
2809
|
}
|