nodebench-mcp 2.58.0 → 2.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3921,6 +3921,106 @@ const REGISTRY_ENTRIES = [
3921
3921
  phase: "research",
3922
3922
  complexity: "medium",
3923
3923
  },
3924
+ // ═══ BENCHMARK TOOLS ═══
3925
+ {
3926
+ name: "run_benchmark_batch",
3927
+ category: "benchmark",
3928
+ tags: ["benchmark", "batch", "eval", "suite", "run"],
3929
+ quickRef: { nextAction: "Batch complete. Use get_benchmark_report for results.", nextTools: ["get_benchmark_report", "get_benchmark_history"], methodology: "ai_flywheel", tip: "Runs a batch of benchmark scenarios." },
3930
+ phase: "test",
3931
+ complexity: "medium",
3932
+ },
3933
+ {
3934
+ name: "get_benchmark_history",
3935
+ category: "benchmark",
3936
+ tags: ["benchmark", "history", "results", "trend"],
3937
+ quickRef: { nextAction: "Review history. Use get_benchmark_report for detailed analysis.", nextTools: ["get_benchmark_report", "run_benchmark_batch"], methodology: "ai_flywheel", tip: "Returns prior benchmark run results." },
3938
+ phase: "research",
3939
+ complexity: "low",
3940
+ },
3941
+ {
3942
+ name: "get_benchmark_report",
3943
+ category: "benchmark",
3944
+ tags: ["benchmark", "report", "summary", "analysis"],
3945
+ quickRef: { nextAction: "Report ready. Review pass rates and regressions.", nextTools: ["run_benchmark_batch", "get_benchmark_history"], methodology: "ai_flywheel", tip: "Generates a formatted benchmark report." },
3946
+ phase: "research",
3947
+ complexity: "low",
3948
+ },
3949
+ // ═══ DOGFOOD JUDGE BATCH ═══
3950
+ {
3951
+ name: "judge_tool_output",
3952
+ category: "dogfood_judge",
3953
+ tags: ["judge", "eval", "llm", "boolean", "criteria", "output"],
3954
+ quickRef: { nextAction: "Output judged. Use get_judge_history to track trends.", nextTools: ["get_judge_history", "run_judge_loop"], methodology: "ai_flywheel", tip: "Uses Gemini 3.1 Flash Lite to judge a tool output against boolean criteria." },
3955
+ phase: "verify",
3956
+ complexity: "medium",
3957
+ },
3958
+ {
3959
+ name: "run_judge_loop",
3960
+ category: "dogfood_judge",
3961
+ tags: ["judge", "loop", "flywheel", "self-improving", "eval"],
3962
+ quickRef: { nextAction: "Judge loop complete. Review diagnosis and improvements.", nextTools: ["get_judge_history", "judge_tool_output"], methodology: "ai_flywheel", tip: "Self-improving eval loop: run → diagnose → fix → grow corpus → re-run." },
3963
+ phase: "verify",
3964
+ complexity: "high",
3965
+ },
3966
+ {
3967
+ name: "get_judge_history",
3968
+ category: "dogfood_judge",
3969
+ tags: ["judge", "history", "trend", "regression", "eval"],
3970
+ quickRef: { nextAction: "History loaded. Compare runs to detect regressions.", nextTools: ["run_judge_loop", "judge_tool_output"], methodology: "ai_flywheel", tip: "Returns prior LLM judge eval runs with pass rates and deltas." },
3971
+ phase: "research",
3972
+ complexity: "low",
3973
+ },
3974
+ {
3975
+ name: "run_dogfood_batch_with_judge",
3976
+ category: "dogfood_judge",
3977
+ tags: ["dogfood", "batch", "judge", "eval", "flywheel"],
3978
+ quickRef: { nextAction: "Batch judged. Use get_judge_history for trends.", nextTools: ["get_judge_history", "run_judge_loop"], methodology: "ai_flywheel", tip: "Runs dogfood scenarios then judges outputs with LLM." },
3979
+ phase: "verify",
3980
+ complexity: "high",
3981
+ },
3982
+ // ═══ SESSION MEMORY / CONTEXT ═══
3983
+ {
3984
+ name: "get_context_bundle",
3985
+ category: "session_memory",
3986
+ tags: ["context", "bundle", "session", "memory", "recovery", "compaction"],
3987
+ quickRef: { nextAction: "Context bundle ready. Inject into prompt for continuity.", nextTools: ["inject_context_into_prompt", "summarize_session"], methodology: "founder", tip: "Gathers session context into a reusable bundle for handoff or recovery." },
3988
+ phase: "utility",
3989
+ complexity: "low",
3990
+ },
3991
+ {
3992
+ name: "inject_context_into_prompt",
3993
+ category: "session_memory",
3994
+ tags: ["context", "inject", "prompt", "session", "memory", "compaction"],
3995
+ quickRef: { nextAction: "Context injected. Resume work with full continuity.", nextTools: ["get_context_bundle", "summarize_session"], methodology: "founder", tip: "Injects a context bundle into the current prompt for post-compaction recovery." },
3996
+ phase: "utility",
3997
+ complexity: "low",
3998
+ },
3999
+ // ═══ ENTITY ENRICHMENT ═══
4000
+ {
4001
+ name: "enrich_entity",
4002
+ category: "founder",
4003
+ tags: ["entity", "enrichment", "company", "profile", "web", "search"],
4004
+ quickRef: { nextAction: "Entity enriched. Use detect_contradictions to validate.", nextTools: ["detect_contradictions", "founder_deep_context_gather"], methodology: "founder", tip: "Enriches an entity profile with web search data and structured extraction." },
4005
+ phase: "research",
4006
+ complexity: "medium",
4007
+ },
4008
+ {
4009
+ name: "detect_contradictions",
4010
+ category: "founder",
4011
+ tags: ["contradiction", "detection", "entity", "validation", "truth"],
4012
+ quickRef: { nextAction: "Contradictions detected. Review and resolve or flag.", nextTools: ["enrich_entity", "flag_important_change"], methodology: "founder", tip: "Scans entity data for contradictory claims or stale facts." },
4013
+ phase: "verify",
4014
+ complexity: "medium",
4015
+ },
4016
+ {
4017
+ name: "ingest_upload",
4018
+ category: "founder",
4019
+ tags: ["ingest", "upload", "document", "notes", "pdf", "context"],
4020
+ quickRef: { nextAction: "Upload ingested. Use founder_deep_context_gather to build packet.", nextTools: ["founder_deep_context_gather", "enrich_entity"], methodology: "founder", tip: "Ingests uploaded documents (notes, PDFs, transcripts) into entity context." },
4021
+ phase: "research",
4022
+ complexity: "medium",
4023
+ },
3924
4024
  // ═══ CAUSAL MEMORY ═══
3925
4025
  {
3926
4026
  name: "record_event",