nodebench-mcp 2.14.2 → 2.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/NODEBENCH_AGENTS.md +3 -3
  2. package/README.md +9 -9
  3. package/dist/__tests__/architectComplex.test.d.ts +1 -0
  4. package/dist/__tests__/architectComplex.test.js +375 -0
  5. package/dist/__tests__/architectComplex.test.js.map +1 -0
  6. package/dist/__tests__/architectSmoke.test.d.ts +1 -0
  7. package/dist/__tests__/architectSmoke.test.js +92 -0
  8. package/dist/__tests__/architectSmoke.test.js.map +1 -0
  9. package/dist/__tests__/critterCalibrationEval.d.ts +8 -0
  10. package/dist/__tests__/critterCalibrationEval.js +370 -0
  11. package/dist/__tests__/critterCalibrationEval.js.map +1 -0
  12. package/dist/__tests__/embeddingProvider.test.d.ts +1 -0
  13. package/dist/__tests__/embeddingProvider.test.js +86 -0
  14. package/dist/__tests__/embeddingProvider.test.js.map +1 -0
  15. package/dist/__tests__/evalHarness.test.js +6 -1
  16. package/dist/__tests__/evalHarness.test.js.map +1 -1
  17. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +1 -1
  18. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +1 -1
  19. package/dist/__tests__/gaiaCapabilityEval.test.js +759 -28
  20. package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
  21. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +1 -1
  22. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
  23. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +558 -4
  24. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +1 -1
  25. package/dist/__tests__/presetRealWorldBench.test.js +2 -2
  26. package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
  27. package/dist/__tests__/tools.test.js +1016 -8
  28. package/dist/__tests__/tools.test.js.map +1 -1
  29. package/dist/__tests__/toolsetGatingEval.test.js +3 -3
  30. package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
  31. package/dist/db.js +64 -0
  32. package/dist/db.js.map +1 -1
  33. package/dist/index.js +76 -9
  34. package/dist/index.js.map +1 -1
  35. package/dist/tools/architectTools.d.ts +15 -0
  36. package/dist/tools/architectTools.js +304 -0
  37. package/dist/tools/architectTools.js.map +1 -0
  38. package/dist/tools/critterTools.d.ts +21 -0
  39. package/dist/tools/critterTools.js +230 -0
  40. package/dist/tools/critterTools.js.map +1 -0
  41. package/dist/tools/emailTools.d.ts +15 -0
  42. package/dist/tools/emailTools.js +664 -0
  43. package/dist/tools/emailTools.js.map +1 -0
  44. package/dist/tools/embeddingProvider.d.ts +67 -0
  45. package/dist/tools/embeddingProvider.js +299 -0
  46. package/dist/tools/embeddingProvider.js.map +1 -0
  47. package/dist/tools/metaTools.js +660 -0
  48. package/dist/tools/metaTools.js.map +1 -1
  49. package/dist/tools/progressiveDiscoveryTools.js +24 -7
  50. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  51. package/dist/tools/reconTools.js +83 -33
  52. package/dist/tools/reconTools.js.map +1 -1
  53. package/dist/tools/rssTools.d.ts +8 -0
  54. package/dist/tools/rssTools.js +833 -0
  55. package/dist/tools/rssTools.js.map +1 -0
  56. package/dist/tools/toolRegistry.d.ts +30 -2
  57. package/dist/tools/toolRegistry.js +424 -25
  58. package/dist/tools/toolRegistry.js.map +1 -1
  59. package/package.json +14 -3
@@ -9,6 +9,7 @@
9
9
  * 2. Append quickRefs to every tool response (so agents always know what to do next)
10
10
  * 3. Build tool chains (recommended sequences for common workflows)
11
11
  */
12
+ import { isEmbeddingReady, embeddingSearch } from "./embeddingProvider.js";
12
13
  // ── Registry: every tool mapped with metadata ────────────────────────────
13
14
  const REGISTRY_ENTRIES = [
14
15
  // ═══ VERIFICATION ═══
@@ -1494,6 +1495,17 @@ const REGISTRY_ENTRIES = [
1494
1495
  },
1495
1496
  phase: "meta",
1496
1497
  },
1498
+ {
1499
+ name: "check_mcp_setup",
1500
+ category: "meta",
1501
+ tags: ["setup", "wizard", "diagnostic", "config", "env", "api-key", "onboarding", "health-check", "status", "readiness"],
1502
+ quickRef: {
1503
+ nextAction: "Review the readiness report. Configure missing domains by following setupInstructions. Re-run to verify.",
1504
+ nextTools: ["check_email_setup", "discover_tools", "getMethodology"],
1505
+ tip: "Run this FIRST when starting with NodeBench MCP. Checks all env vars, API keys, npm packages, and servers across every domain.",
1506
+ },
1507
+ phase: "meta",
1508
+ },
1497
1509
  // ── Image solver tools (GAIA media lane) ──────────────────────────────
1498
1510
  {
1499
1511
  name: "solve_red_green_deviation_average_from_image",
@@ -1824,6 +1836,154 @@ const REGISTRY_ENTRIES = [
1824
1836
  },
1825
1837
  phase: "research",
1826
1838
  },
1839
+ // ═══ CRITTER (pre-action intentionality check) ═══
1840
+ {
1841
+ name: "critter_check",
1842
+ category: "critter",
1843
+ tags: ["intentionality", "why", "who", "purpose", "audience", "reflection", "scope", "pre-action", "metacognition"],
1844
+ quickRef: {
1845
+ nextAction: "Answered critter check. If verdict is 'proceed', continue with your task. If 'reconsider', sharpen answers and re-run.",
1846
+ nextTools: ["save_session_note", "start_verification_cycle", "run_recon"],
1847
+ methodology: "agent_contract",
1848
+ tip: "Call at the start of any non-trivial task. Prevents scope creep and aimless exploration.",
1849
+ },
1850
+ phase: "research",
1851
+ },
1852
+ // ═══ EMAIL (SMTP send, IMAP read, draft replies) ═══
1853
+ {
1854
+ name: "send_email",
1855
+ category: "email",
1856
+ tags: ["email", "smtp", "send", "gmail", "notification", "alert", "digest", "report"],
1857
+ quickRef: {
1858
+ nextAction: "Email sent. Log the action with save_session_note and continue with your workflow.",
1859
+ nextTools: ["save_session_note", "record_learning", "build_research_digest"],
1860
+ methodology: "agent_contract",
1861
+ tip: "Requires EMAIL_USER and EMAIL_PASS env vars. For Gmail, use an App Password. Supports html parameter for rich emails.",
1862
+ },
1863
+ phase: "implement",
1864
+ },
1865
+ {
1866
+ name: "read_emails",
1867
+ category: "email",
1868
+ tags: ["email", "imap", "read", "inbox", "gmail", "fetch", "messages", "unread"],
1869
+ quickRef: {
1870
+ nextAction: "Emails retrieved. Review subjects/content, then draft_email_reply for actionable items or save_session_note to persist context.",
1871
+ nextTools: ["draft_email_reply", "save_session_note", "extract_structured_data"],
1872
+ methodology: "agent_contract",
1873
+ tip: "Requires EMAIL_USER and EMAIL_PASS env vars. Use folder param for specific mailboxes (INBOX default). Limit controls count.",
1874
+ },
1875
+ phase: "research",
1876
+ },
1877
+ {
1878
+ name: "draft_email_reply",
1879
+ category: "email",
1880
+ tags: ["email", "reply", "draft", "compose", "response", "assistant"],
1881
+ quickRef: {
1882
+ nextAction: "Draft generated. Review the draft, then send_email to deliver or edit and re-draft.",
1883
+ nextTools: ["send_email", "save_session_note"],
1884
+ methodology: "agent_contract",
1885
+ tip: "Generates a professional reply draft from original email context. Always review before sending.",
1886
+ },
1887
+ phase: "implement",
1888
+ },
1889
+ // ═══ RSS (subscribe, fetch, digest feeds) ═══
1890
+ {
1891
+ name: "add_rss_source",
1892
+ category: "rss",
1893
+ tags: ["rss", "atom", "feed", "subscribe", "source", "monitor", "research", "news"],
1894
+ quickRef: {
1895
+ nextAction: "RSS source registered. Call fetch_rss_feeds to pull articles, then build_research_digest for a summary.",
1896
+ nextTools: ["fetch_rss_feeds", "build_research_digest", "save_session_note"],
1897
+ methodology: "research_digest",
1898
+ tip: "Validates the feed URL on add. Use category param to group sources for filtered digests.",
1899
+ },
1900
+ phase: "research",
1901
+ },
1902
+ {
1903
+ name: "fetch_rss_feeds",
1904
+ category: "rss",
1905
+ tags: ["rss", "atom", "feed", "fetch", "articles", "news", "update", "pull"],
1906
+ quickRef: {
1907
+ nextAction: "Feeds fetched. New articles stored in SQLite. Call build_research_digest to generate a summary of new items.",
1908
+ nextTools: ["build_research_digest", "save_session_note", "record_learning"],
1909
+ methodology: "research_digest",
1910
+ tip: "Deduplicates automatically — same article won't be stored twice. Fetches all registered sources if no URLs specified.",
1911
+ },
1912
+ phase: "research",
1913
+ },
1914
+ {
1915
+ name: "build_research_digest",
1916
+ category: "rss",
1917
+ tags: ["rss", "digest", "summary", "research", "newsletter", "report", "markdown", "html"],
1918
+ quickRef: {
1919
+ nextAction: "Digest generated. Use send_email with html format to distribute, or save_session_note to persist the digest.",
1920
+ nextTools: ["send_email", "save_session_note", "record_learning"],
1921
+ methodology: "research_digest",
1922
+ tip: "Marks articles as seen after digest — next call only shows truly new content. Use format='html' for email-ready output.",
1923
+ },
1924
+ phase: "implement",
1925
+ },
1926
+ // ═══ SETUP WIZARDS ═══
1927
+ {
1928
+ name: "check_email_setup",
1929
+ category: "email",
1930
+ tags: ["email", "setup", "wizard", "diagnostic", "config", "smtp", "imap", "onboarding", "gmail", "outlook"],
1931
+ quickRef: {
1932
+ nextAction: "Setup check complete. If ready, try send_email or read_emails. If not, follow the setup instructions.",
1933
+ nextTools: ["send_email", "read_emails", "get_workflow_chain"],
1934
+ methodology: "agent_contract",
1935
+ tip: "Run this FIRST before using any email tools. Tests SMTP/IMAP connections and generates MCP config snippets.",
1936
+ },
1937
+ phase: "research",
1938
+ },
1939
+ {
1940
+ name: "scaffold_research_pipeline",
1941
+ category: "rss",
1942
+ tags: ["rss", "scaffold", "pipeline", "project", "cron", "automation", "digest", "email", "standalone", "setup", "wizard"],
1943
+ quickRef: {
1944
+ nextAction: "Pipeline scaffolded. Save the generated files, configure .env, add feeds, and run.",
1945
+ nextTools: ["save_session_note", "check_email_setup"],
1946
+ methodology: "research_digest",
1947
+ tip: "Generates a ZERO-dependency standalone Node.js project. Copy files, add feeds, run. No nodebench-mcp needed at runtime.",
1948
+ },
1949
+ phase: "implement",
1950
+ },
1951
+ // ═══════════════════════════════════════════
1952
+ // ARCHITECT — Structural code analysis
1953
+ // ═══════════════════════════════════════════
1954
+ {
1955
+ name: "scan_capabilities",
1956
+ category: "architect",
1957
+ tags: ["structural-analysis", "capability-scan", "code-patterns", "regex-scan", "react", "backend", "state-management", "layout", "interaction", "rendering"],
1958
+ quickRef: {
1959
+ nextAction: "Review the capability report. Use verify_concept_support to check if a specific concept is implemented.",
1960
+ nextTools: ["verify_concept_support", "generate_implementation_plan", "save_session_note"],
1961
+ tip: "Pure regex analysis — no LLM needed, instant results. Scans React hooks, layout patterns, interaction handlers, rendering, and backend patterns.",
1962
+ },
1963
+ phase: "research",
1964
+ },
1965
+ {
1966
+ name: "verify_concept_support",
1967
+ category: "architect",
1968
+ tags: ["concept-verification", "gap-analysis", "structural-analysis", "regex-scan", "implementation-check", "progress-tracking"],
1969
+ quickRef: {
1970
+ nextAction: "If gaps found, use generate_implementation_plan to build a plan. If fully implemented, move on.",
1971
+ nextTools: ["generate_implementation_plan", "scan_capabilities", "record_learning"],
1972
+ tip: "Define required signatures from web research, then verify against code. Results persisted to SQLite for tracking progress.",
1973
+ },
1974
+ phase: "research",
1975
+ },
1976
+ {
1977
+ name: "generate_implementation_plan",
1978
+ category: "architect",
1979
+ tags: ["implementation-plan", "gap-analysis", "code-generation", "structural-analysis", "concept-verification", "strategy"],
1980
+ quickRef: {
1981
+ nextAction: "Follow the step-by-step plan. After each step, re-run verify_concept_support to track progress.",
1982
+ nextTools: ["verify_concept_support", "scan_capabilities", "start_verification_cycle"],
1983
+ tip: "Pass current_context from scan_capabilities to get conflict-aware injection strategies.",
1984
+ },
1985
+ phase: "implement",
1986
+ },
1827
1987
  ];
1828
1988
  // ── Exported lookup structures ───────────────────────────────────────────
1829
1989
  /** Map of tool name → registry entry for O(1) lookup */
@@ -1877,6 +2037,10 @@ const CATEGORY_COMPLEXITY = {
1877
2037
  git_workflow: "medium",
1878
2038
  seo: "medium",
1879
2039
  voice_bridge: "medium",
2040
+ critter: "low",
2041
+ email: "medium",
2042
+ rss: "low",
2043
+ architect: "low",
1880
2044
  };
1881
2045
  /** Per-tool complexity overrides (when category default is wrong) */
1882
2046
  const TOOL_COMPLEXITY_OVERRIDES = {
@@ -1946,7 +2110,7 @@ export function getToolComplexity(toolName) {
1946
2110
  }
1947
2111
  // ── Synonym / semantic expansion map ──────────────────────────────────────
1948
2112
  const SYNONYM_MAP = {
1949
- verify: ["validate", "check", "confirm", "test", "assert", "ensure"],
2113
+ verify: ["validate", "check", "confirm", "test", "assert", "ensure", "correct"],
1950
2114
  test: ["verify", "validate", "check", "assert", "spec", "expect"],
1951
2115
  search: ["find", "discover", "lookup", "query", "locate", "browse"],
1952
2116
  find: ["search", "discover", "lookup", "locate"],
@@ -1958,7 +2122,7 @@ const SYNONYM_MAP = {
1958
2122
  monitor: ["watch", "observe", "track", "follow"],
1959
2123
  security: ["vulnerability", "audit", "cve", "secret", "credential", "leak", "exposure"],
1960
2124
  benchmark: ["measure", "evaluate", "score", "grade", "performance", "capability"],
1961
- parallel: ["multi-agent", "coordinate", "team", "concurrent", "distributed"],
2125
+ parallel: ["multi-agent", "coordinate", "team", "concurrent", "distributed", "multiple"],
1962
2126
  document: ["doc", "documentation", "readme", "agents-md", "report"],
1963
2127
  research: ["recon", "investigate", "discover", "explore", "gather"],
1964
2128
  quality: ["gate", "check", "validate", "standard", "rule"],
@@ -1970,10 +2134,19 @@ const SYNONYM_MAP = {
1970
2134
  review: ["inspect", "audit", "pr", "pull-request", "feedback", "critique"],
1971
2135
  performance: ["speed", "latency", "optimize", "fast", "slow", "bottleneck"],
1972
2136
  data: ["csv", "xlsx", "json", "pdf", "file", "parse", "extract", "spreadsheet"],
1973
- paper: ["academic", "research", "write", "publish", "neurips", "icml", "arxiv"],
2137
+ paper: ["academic", "research", "write", "publish", "neurips", "icml", "arxiv", "section"],
1974
2138
  start: ["begin", "init", "kick-off", "launch", "bootstrap", "new"],
1975
2139
  report: ["generate", "summary", "output", "export", "document"],
1976
2140
  clean: ["cleanup", "prune", "remove", "delete", "stale", "orphan"],
2141
+ remember: ["save", "record", "persist", "store", "note", "session"],
2142
+ save: ["remember", "record", "persist", "store", "note", "keep"],
2143
+ wrong: ["investigate", "debug", "diagnose", "error", "issue", "problem", "fail"],
2144
+ correct: ["verify", "validate", "check", "ensure", "confirm"],
2145
+ write: ["paper", "section", "draft", "compose", "author", "document"],
2146
+ task: ["claim", "assign", "work", "agent", "parallel", "concurrent"],
2147
+ why: ["purpose", "reason", "intentionality", "motivation", "goal", "critter"],
2148
+ purpose: ["why", "reason", "intentionality", "motivation", "goal", "critter"],
2149
+ reflect: ["think", "pause", "reconsider", "intentionality", "metacognition", "critter"],
1977
2150
  };
1978
2151
  // ── TF-IDF: compute inverse document frequency for tags ───────────────────
1979
2152
  let _idfCache = null;
@@ -2042,7 +2215,7 @@ function ngramSimilarity(a, b, n = 3) {
2042
2215
  }
2043
2216
  // ── Dense search: TF-IDF cosine similarity on full text ──────────────────
2044
2217
  /** Tokenize text into lowercase words (alpha + underscore only) */
2045
- function tokenize(text) {
2218
+ export function tokenize(text) {
2046
2219
  return text.toLowerCase().match(/[a-z_]+/g) ?? [];
2047
2220
  }
2048
2221
  /** Build a TF vector: word → frequency */
@@ -2059,7 +2232,7 @@ function termFreq(tokens) {
2059
2232
  /** Pre-computed document TF-IDF vectors for dense search (lazy init) */
2060
2233
  let _denseVectorsCache = null;
2061
2234
  let _denseIDFCache = null;
2062
- function buildDenseIndex() {
2235
+ export function buildDenseIndex() {
2063
2236
  if (_denseVectorsCache && _denseIDFCache)
2064
2237
  return { vectors: _denseVectorsCache, idf: _denseIDFCache };
2065
2238
  // Build corpus: each tool's full text (name + tags + description + category)
@@ -2120,6 +2293,137 @@ const DOMAIN_CLUSTERS = {
2120
2293
  writing: ["research_writing", "documentation"],
2121
2294
  measurement: ["eval", "benchmark", "self_eval"],
2122
2295
  };
2296
+ // ── Execution trace edges — co-occurrence mining from tool_call_log ────────
2297
+ // Based on Agent-as-a-Graph (arxiv:2511.18194): execution trace edges
2298
+ // mine sequential co-occurrence patterns to discover implicit tool relationships.
2299
+ let _cooccurrenceCache = null;
2300
+ let _cooccurrenceCacheTime = 0;
2301
+ const COOCCURRENCE_TTL_MS = 60_000; // refresh every 60s
2302
+ /** Agent-as-a-Graph wRRF constants (arxiv:2511.18194).
2303
+ *
2304
+ * Paper optimal: α_A=1.5, α_T=1.0, K=60. Ablation confirmed this beats 5 alternatives
2305
+ * even for single-server tool retrieval (Recall@5=0.625 vs 0.583 for α_D=0.6/K=20).
2306
+ *
2307
+ * Key finding: K and α_D are coupled. K=60 dampens scores enough that α_D=1.5 lifts
2308
+ * category siblings gently. K=20 with α_D=1.5 overshoots (domain boost drowns lexical).
2309
+ * The paper's full parameter set is internally consistent — don't cherry-pick.
2310
+ *
2311
+ * Max embedding contribution at rank 1: α_T * 1000/(60+1) ≈ 16 pts (tool),
2312
+ * α_D * 1000/(60+1) ≈ 25 pts (domain). These slot into the additive scoring system
2313
+ * alongside keyword (3-50), fuzzy (4-12), dense (0-40) as a moderate signal.
2314
+ *
2315
+ * Validated via 6-config ablation grid: see tools.test.ts "wRRF α ratio ablation". */
2316
+ let WRRF_ALPHA_T = 1.0; // tool weight — direct embedding match
2317
+ let WRRF_ALPHA_D = 1.5; // domain weight — upward traversal boost (paper optimal)
2318
+ let WRRF_K = 60; // RRF smoothing constant (paper optimal)
2319
+ /** Bonus score for tools that frequently co-occur with top-ranked results.
2320
+ * Calibrated to lift borderline tools ~1-2 positions without overriding strong lexical matches.
2321
+ * At +4, a tool needs ≥8 points of lexical evidence to appear in results at all (score > 0),
2322
+ * then trace edges nudge it up. Compare: keyword:desc = +3, semantic:tag = +6, domain_boost = +5. */
2323
+ const TRACE_EDGE_BOOST = 4;
2324
+ // DB accessor injected at init time to avoid circular import (toolRegistry is pure ESM)
2325
+ let _dbAccessor = null;
2326
+ /** Inject the DB accessor — called once from index.ts at startup. */
2327
+ export function _setDbAccessor(accessor) {
2328
+ _dbAccessor = accessor;
2329
+ }
2330
+ /**
2331
+ * Mine co-occurrence patterns from tool_call_log.
2332
+ * Returns a map of toolName → [most co-occurring tools] based on session adjacency.
2333
+ *
2334
+ * Approach: for each session, pull the ordered tool sequence, then count
2335
+ * pairs within a sliding window of 5 calls. O(n) per session, no self-join.
2336
+ */
2337
+ function getCooccurrenceEdges() {
2338
+ const now = Date.now();
2339
+ if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
2340
+ return _cooccurrenceCache;
2341
+ }
2342
+ const edges = new Map();
2343
+ if (!_dbAccessor) {
2344
+ _cooccurrenceCache = edges;
2345
+ _cooccurrenceCacheTime = now;
2346
+ return edges;
2347
+ }
2348
+ try {
2349
+ const db = _dbAccessor();
2350
+ // Pull recent sessions' tool sequences, ordered by creation time
2351
+ const rows = db.prepare(`
2352
+ SELECT session_id, tool_name
2353
+ FROM tool_call_log
2354
+ WHERE created_at > datetime('now', '-7 days')
2355
+ ORDER BY session_id, created_at ASC
2356
+ `).all();
2357
+ // Group by session
2358
+ const sessions = new Map();
2359
+ for (const row of rows) {
2360
+ const list = sessions.get(row.session_id) ?? [];
2361
+ list.push(row.tool_name);
2362
+ sessions.set(row.session_id, list);
2363
+ }
2364
+ // Count co-occurrences within sliding window of 5
2365
+ const pairCounts = new Map();
2366
+ for (const [, sequence] of sessions) {
2367
+ for (let i = 0; i < sequence.length; i++) {
2368
+ const toolA = sequence[i];
2369
+ for (let j = i + 1; j < Math.min(i + 6, sequence.length); j++) {
2370
+ const toolB = sequence[j];
2371
+ if (toolA === toolB)
2372
+ continue;
2373
+ const key = `${toolA}\0${toolB}`;
2374
+ pairCounts.set(key, (pairCounts.get(key) ?? 0) + 1);
2375
+ // Bidirectional
2376
+ const keyR = `${toolB}\0${toolA}`;
2377
+ pairCounts.set(keyR, (pairCounts.get(keyR) ?? 0) + 1);
2378
+ }
2379
+ }
2380
+ }
2381
+ // Filter to pairs with 2+ co-occurrences, sort by count
2382
+ const sorted = [...pairCounts.entries()]
2383
+ .filter(([, cnt]) => cnt >= 2)
2384
+ .sort((a, b) => b[1] - a[1]);
2385
+ for (const [key] of sorted) {
2386
+ const [toolA, toolB] = key.split("\0");
2387
+ const list = edges.get(toolA) ?? [];
2388
+ if (list.length < 10) {
2389
+ list.push(toolB);
2390
+ edges.set(toolA, list);
2391
+ }
2392
+ }
2393
+ }
2394
+ catch {
2395
+ // No DB or table not yet created — return empty (graceful degradation)
2396
+ }
2397
+ _cooccurrenceCache = edges;
2398
+ _cooccurrenceCacheTime = now;
2399
+ return edges;
2400
+ }
2401
+ /** Reset co-occurrence cache — for testing only. */
2402
+ export function _resetCooccurrenceCache() {
2403
+ _cooccurrenceCache = null;
2404
+ _cooccurrenceCacheTime = 0;
2405
+ }
2406
+ /** Inject co-occurrence edges directly — for testing only. */
2407
+ export function _setCooccurrenceForTesting(edges) {
2408
+ _cooccurrenceCache = edges;
2409
+ _cooccurrenceCacheTime = Date.now() + 999_999_999; // never expire
2410
+ }
2411
+ /** Override wRRF weights — for ablation testing only.
2412
+ * Allows comparing paper's α_A=1.5,α_T=1.0,K=60 vs our α_T=1.0,α_D=0.6,K=20. */
2413
+ export function _setWrrfParamsForTesting(params) {
2414
+ if (params.alphaT !== undefined)
2415
+ WRRF_ALPHA_T = params.alphaT;
2416
+ if (params.alphaD !== undefined)
2417
+ WRRF_ALPHA_D = params.alphaD;
2418
+ if (params.k !== undefined)
2419
+ WRRF_K = params.k;
2420
+ }
2421
+ /** Restore default wRRF weights — for ablation testing only. */
2422
+ export function _resetWrrfParamsForTesting() {
2423
+ WRRF_ALPHA_T = 1.0;
2424
+ WRRF_ALPHA_D = 1.5;
2425
+ WRRF_K = 60;
2426
+ }
2123
2427
  function getDomainBoost(category, topCategories) {
2124
2428
  for (const [, cluster] of Object.entries(DOMAIN_CLUSTERS)) {
2125
2429
  if (cluster.includes(category) && cluster.some((c) => topCategories.has(c) && c !== category)) {
@@ -2129,7 +2433,7 @@ function getDomainBoost(category, topCategories) {
2129
2433
  return 0;
2130
2434
  }
2131
2435
  /**
2132
- * Multi-modal hybrid search engine.
2436
+ * Multi-modal hybrid search engine with Agent-as-a-Graph bipartite retrieval.
2133
2437
  *
2134
2438
  * Search modes (all run in parallel, scores merged):
2135
2439
  * - **keyword**: Exact and partial word matching on name, tags, description, category
@@ -2141,6 +2445,11 @@ function getDomainBoost(category, topCategories) {
2141
2445
  * - **regex**: Pass a regex pattern to match against tool names/descriptions
2142
2446
  * - **bigram**: Two-word phrase matching (e.g., "quality gate" matched as phrase)
2143
2447
  * - **domain boost**: Related categories get boosted when top results cluster
2448
+ * - **embedding**: Neural embedding with type-specific wRRF (tool α_T + domain α_D nodes)
2449
+ * - **graph traversal**: Upward traversal from tools → domains → sibling tools
2450
+ * - **trace edges**: Execution co-occurrence mining from tool_call_log (dynamic graph edges)
2451
+ *
2452
+ * Graph architecture based on arxiv:2511.18194 (Agent-as-a-Graph).
2144
2453
  */
2145
2454
  export function hybridSearch(query, tools, options) {
2146
2455
  const queryLower = query.toLowerCase().trim();
@@ -2171,6 +2480,41 @@ export function hybridSearch(query, tools, options) {
2171
2480
  syns.forEach((s) => expandedWords.add(s));
2172
2481
  }
2173
2482
  }
2483
+ // ── Pre-compute query-invariant data ONCE before the per-tool loop ──
2484
+ // Dense: TF-IDF query vector (query-invariant — don't recompute per tool)
2485
+ let denseQueryVec = null;
2486
+ let denseDocVectors = null;
2487
+ if (mode === "dense" || mode === "hybrid") {
2488
+ const { vectors, idf: denseIdf } = buildDenseIndex();
2489
+ const queryTokens = tokenize(queryLower);
2490
+ if (queryTokens.length > 0) {
2491
+ const queryTf = termFreq(queryTokens);
2492
+ denseQueryVec = new Map();
2493
+ for (const [term, tfVal] of queryTf) {
2494
+ denseQueryVec.set(term, tfVal * (denseIdf.get(term) ?? 1));
2495
+ }
2496
+ denseDocVectors = vectors;
2497
+ }
2498
+ }
2499
+ // Embedding: pre-split ranks by node type (query-invariant — don't recompute per tool)
2500
+ let embToolRanks = null;
2501
+ let embDomainRanks = null;
2502
+ if ((mode === "embedding" || mode === "hybrid") && isEmbeddingReady() && options?.embeddingQueryVec) {
2503
+ const vecResults = embeddingSearch(options.embeddingQueryVec, 50);
2504
+ embToolRanks = new Map();
2505
+ embDomainRanks = new Map();
2506
+ let toolIdx = 0, domainIdx = 0;
2507
+ for (const r of vecResults) {
2508
+ if (r.nodeType === "domain") {
2509
+ domainIdx++;
2510
+ embDomainRanks.set(r.name.replace("domain:", ""), domainIdx);
2511
+ }
2512
+ else {
2513
+ toolIdx++;
2514
+ embToolRanks.set(r.name, toolIdx);
2515
+ }
2516
+ }
2517
+ }
2174
2518
  const toolScores = new Map();
2175
2519
  for (const tool of tools) {
2176
2520
  const entry = TOOL_REGISTRY.get(tool.name);
@@ -2347,27 +2691,35 @@ export function hybridSearch(query, tools, options) {
2347
2691
  }
2348
2692
  }
2349
2693
  }
2350
- // ── DENSE: TF-IDF cosine similarity on full text ──
2351
- if (mode === "dense" || mode === "hybrid") {
2352
- const { vectors, idf: denseIdf } = buildDenseIndex();
2353
- const queryTokens = tokenize(queryLower);
2354
- if (queryTokens.length > 0) {
2355
- const queryTf = termFreq(queryTokens);
2356
- const queryVec = new Map();
2357
- for (const [term, tfVal] of queryTf) {
2358
- queryVec.set(term, tfVal * (denseIdf.get(term) ?? 1));
2359
- }
2360
- const docVec = vectors.get(tool.name);
2361
- if (docVec) {
2362
- const sim = cosineSimilarity(queryVec, docVec);
2363
- if (sim > 0.05) {
2364
- const denseScore = Math.round(sim * 40);
2365
- score += denseScore;
2366
- reasons.push(`dense:cosine(sim=${sim.toFixed(3)},+${denseScore})`);
2367
- }
2694
+ // ── DENSE: TF-IDF cosine similarity (query vec pre-computed above) ──
2695
+ if (denseQueryVec && denseDocVectors) {
2696
+ const docVec = denseDocVectors.get(tool.name);
2697
+ if (docVec) {
2698
+ const sim = cosineSimilarity(denseQueryVec, docVec);
2699
+ if (sim > 0.05) {
2700
+ const denseScore = Math.round(sim * 40);
2701
+ score += denseScore;
2702
+ reasons.push(`dense:cosine(sim=${sim.toFixed(3)},+${denseScore})`);
2368
2703
  }
2369
2704
  }
2370
2705
  }
2706
+ // ── EMBEDDING: Agent-as-a-Graph bipartite RRF (ranks pre-computed above) ──
2707
+ if (embToolRanks && embDomainRanks) {
2708
+ const toolRank = embToolRanks.get(tool.name);
2709
+ if (toolRank) {
2710
+ const rrfScore = Math.round(WRRF_ALPHA_T * 1000 / (WRRF_K + toolRank));
2711
+ score += rrfScore;
2712
+ reasons.push(`embedding:tool_rrf(rank=${toolRank},+${rrfScore})`);
2713
+ }
2714
+ // Upward traversal: if this tool's domain matched, boost it (sibling expansion)
2715
+ const toolCategory = entry.category;
2716
+ const domainRank = embDomainRanks.get(toolCategory);
2717
+ if (domainRank) {
2718
+ const domainRrf = Math.round(WRRF_ALPHA_D * 1000 / (WRRF_K + domainRank));
2719
+ score += domainRrf;
2720
+ reasons.push(`embedding:domain_rrf(${toolCategory},rank=${domainRank},+${domainRrf})`);
2721
+ }
2722
+ }
2371
2723
  if (score > 0) {
2372
2724
  toolScores.set(tool.name, { score, reasons });
2373
2725
  }
@@ -2380,6 +2732,17 @@ export function hybridSearch(query, tools, options) {
2380
2732
  if (entry)
2381
2733
  topCategories.add(entry.category);
2382
2734
  }
2735
+ // ── Execution trace edges (2nd pass) — co-occurrence boost ──
2736
+ // Agent-as-a-Graph: mine tool_call_log for sequential co-occurrence.
2737
+ // If a top-ranked tool frequently co-occurs with another tool, boost the sibling.
2738
+ const cooccurrence = getCooccurrenceEdges();
2739
+ const topToolNames = sortedPrelim.slice(0, 5).map(([name]) => name);
2740
+ const traceBoostTargets = new Set();
2741
+ for (const topTool of topToolNames) {
2742
+ const neighbors = cooccurrence.get(topTool);
2743
+ if (neighbors)
2744
+ neighbors.forEach((n) => traceBoostTargets.add(n));
2745
+ }
2383
2746
  const results = [];
2384
2747
  for (const tool of tools) {
2385
2748
  const entry = TOOL_REGISTRY.get(tool.name);
@@ -2391,6 +2754,11 @@ export function hybridSearch(query, tools, options) {
2391
2754
  scored.score += domainBoost;
2392
2755
  scored.reasons.push(`domain_boost:+${domainBoost}`);
2393
2756
  }
2757
+ // Execution trace edge: boost tools that frequently co-occur with top results
2758
+ if (traceBoostTargets.has(tool.name) && !topToolNames.includes(tool.name)) {
2759
+ scored.score += TRACE_EDGE_BOOST;
2760
+ scored.reasons.push(`trace_edge:+${TRACE_EDGE_BOOST}`);
2761
+ }
2394
2762
  results.push({
2395
2763
  name: tool.name,
2396
2764
  description: tool.description,
@@ -2406,7 +2774,7 @@ export function hybridSearch(query, tools, options) {
2406
2774
  return results.slice(0, limit);
2407
2775
  }
2408
2776
  /** Available search modes for discover_tools */
2409
- export const SEARCH_MODES = ["hybrid", "fuzzy", "regex", "prefix", "semantic", "exact", "dense"];
2777
+ export const SEARCH_MODES = ["hybrid", "fuzzy", "regex", "prefix", "semantic", "exact", "dense", "embedding"];
2410
2778
  /** Pre-built workflow chains for common tasks */
2411
2779
  export const WORKFLOW_CHAINS = {
2412
2780
  new_feature: {
@@ -2736,5 +3104,36 @@ export const WORKFLOW_CHAINS = {
2736
3104
  { tool: "record_learning", action: "Record voice pipeline implementation patterns" },
2737
3105
  ],
2738
3106
  },
3107
+ intentionality_check: {
3108
+ name: "Intentionality Check (Critter)",
3109
+ description: "Pause before action — articulate why and who, then proceed with clarity",
3110
+ steps: [
3111
+ { tool: "critter_check", action: "Answer: Why are you doing this? Who is it for? Score your intentionality" },
3112
+ { tool: "save_session_note", action: "Persist the critter check so it survives context compaction" },
3113
+ { tool: "run_recon", action: "Gather context now that purpose is clear" },
3114
+ ],
3115
+ },
3116
+ research_digest: {
3117
+ name: "Automated Research Digest",
3118
+ description: "Subscribe to RSS/Atom feeds, fetch new articles, build a digest, and optionally email it",
3119
+ steps: [
3120
+ { tool: "add_rss_source", action: "Register RSS/Atom feed URLs for topics of interest (arXiv, blogs, news)" },
3121
+ { tool: "fetch_rss_feeds", action: "Pull latest articles from all registered sources — new items stored in SQLite" },
3122
+ { tool: "build_research_digest", action: "Generate a categorized digest of new (unseen) articles in markdown, json, or html" },
3123
+ { tool: "send_email", action: "Email the html digest to yourself or your team for daily/weekly review" },
3124
+ { tool: "save_session_note", action: "Persist key findings so they survive context compaction" },
3125
+ { tool: "record_learning", action: "Record insights from noteworthy articles for the knowledge base" },
3126
+ ],
3127
+ },
3128
+ email_assistant: {
3129
+ name: "Email Draft Assistant",
3130
+ description: "Read inbox, draft professional replies, review, and send — all from the agent",
3131
+ steps: [
3132
+ { tool: "read_emails", action: "Fetch recent/unread emails from IMAP inbox to understand what needs attention" },
3133
+ { tool: "draft_email_reply", action: "Generate a professional reply draft from original email context and your instructions" },
3134
+ { tool: "send_email", action: "Send the reviewed and approved draft reply" },
3135
+ { tool: "save_session_note", action: "Log sent emails so you have an audit trail that survives compaction" },
3136
+ ],
3137
+ },
2739
3138
  };
2740
3139
  //# sourceMappingURL=toolRegistry.js.map