@jokerized/getresearchdone 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +103 -0
- package/README.md +211 -0
- package/agents/grd-baseline-assessor.md +684 -0
- package/agents/grd-code-reviewer.md +300 -0
- package/agents/grd-codebase-mapper.md +355 -0
- package/agents/grd-critique-agent.md +119 -0
- package/agents/grd-debugger.md +519 -0
- package/agents/grd-deep-diver.md +737 -0
- package/agents/grd-eval-planner.md +913 -0
- package/agents/grd-eval-reporter.md +717 -0
- package/agents/grd-executor.md +683 -0
- package/agents/grd-feasibility-analyst.md +624 -0
- package/agents/grd-integration-checker.md +367 -0
- package/agents/grd-knowledge-miner.md +81 -0
- package/agents/grd-migrator.md +88 -0
- package/agents/grd-phase-researcher.md +697 -0
- package/agents/grd-plan-checker.md +443 -0
- package/agents/grd-planner.md +1532 -0
- package/agents/grd-product-owner.md +562 -0
- package/agents/grd-project-researcher.md +513 -0
- package/agents/grd-research-synthesizer.md +273 -0
- package/agents/grd-roadmapper.md +798 -0
- package/agents/grd-surveyor.md +566 -0
- package/agents/grd-verifier.md +893 -0
- package/bin/gd.js +4 -0
- package/bin/gd.ts +227 -0
- package/bin/grd-manifest.js +4 -0
- package/bin/grd-manifest.ts +286 -0
- package/bin/grd-mcp-server.js +4 -0
- package/bin/grd-mcp-server.ts +124 -0
- package/bin/grd-tools.js +4 -0
- package/bin/grd-tools.ts +2471 -0
- package/bin/postinstall.js +4 -0
- package/bin/postinstall.ts +80 -0
- package/commands/add-phase.md +123 -0
- package/commands/add-todo.md +87 -0
- package/commands/assess-baseline.md +289 -0
- package/commands/autopilot.md +100 -0
- package/commands/autoplan.md +55 -0
- package/commands/check-todos.md +87 -0
- package/commands/compare-methods.md +262 -0
- package/commands/complete-milestone.md +225 -0
- package/commands/debug.md +372 -0
- package/commands/deep-dive.md +288 -0
- package/commands/discover.md +281 -0
- package/commands/discuss-phase.md +188 -0
- package/commands/discuss.md +55 -0
- package/commands/eval-report.md +310 -0
- package/commands/evolve.md +79 -0
- package/commands/execute-phase.md +1017 -0
- package/commands/feasibility.md +292 -0
- package/commands/help.md +407 -0
- package/commands/init.md +1508 -0
- package/commands/insert-phase.md +113 -0
- package/commands/iterate.md +327 -0
- package/commands/list-phase-assumptions.md +217 -0
- package/commands/long-term-roadmap.md +202 -0
- package/commands/map-codebase.md +111 -0
- package/commands/migrate.md +159 -0
- package/commands/new-milestone.md +169 -0
- package/commands/pause-work.md +83 -0
- package/commands/plan-milestone-gaps.md +373 -0
- package/commands/plan-phase.md +655 -0
- package/commands/principles.md +328 -0
- package/commands/product-plan.md +319 -0
- package/commands/progress.md +481 -0
- package/commands/quick.md +167 -0
- package/commands/reapply-patches.md +154 -0
- package/commands/remove-phase.md +97 -0
- package/commands/requirement.md +96 -0
- package/commands/resume-project.md +113 -0
- package/commands/settings.md +1144 -0
- package/commands/survey.md +242 -0
- package/commands/sync.md +246 -0
- package/commands/tracker-setup.md +322 -0
- package/commands/update.md +202 -0
- package/commands/verify-phase.md +335 -0
- package/commands/verify-work.md +701 -0
- package/commands/wireup.md +29 -0
- package/dist/bin/gd.d.ts +3 -0
- package/dist/bin/gd.d.ts.map +1 -0
- package/dist/bin/gd.js +178 -0
- package/dist/bin/gd.js.map +1 -0
- package/dist/bin/grd-manifest.d.ts +3 -0
- package/dist/bin/grd-manifest.d.ts.map +1 -0
- package/dist/bin/grd-manifest.js +202 -0
- package/dist/bin/grd-manifest.js.map +1 -0
- package/dist/bin/grd-mcp-server.d.ts +3 -0
- package/dist/bin/grd-mcp-server.d.ts.map +1 -0
- package/dist/bin/grd-mcp-server.js +71 -0
- package/dist/bin/grd-mcp-server.js.map +1 -0
- package/dist/bin/grd-tools.d.ts +3 -0
- package/dist/bin/grd-tools.d.ts.map +1 -0
- package/dist/bin/grd-tools.js +1680 -0
- package/dist/bin/grd-tools.js.map +1 -0
- package/dist/bin/postinstall.d.ts +3 -0
- package/dist/bin/postinstall.d.ts.map +1 -0
- package/dist/bin/postinstall.js +61 -0
- package/dist/bin/postinstall.js.map +1 -0
- package/dist/lib/autopilot-milestone.d.ts +2 -0
- package/dist/lib/autopilot-milestone.d.ts.map +1 -0
- package/dist/lib/autopilot-milestone.js +94 -0
- package/dist/lib/autopilot-milestone.js.map +1 -0
- package/dist/lib/autopilot-pipeline.d.ts +2 -0
- package/dist/lib/autopilot-pipeline.d.ts.map +1 -0
- package/dist/lib/autopilot-pipeline.js +830 -0
- package/dist/lib/autopilot-pipeline.js.map +1 -0
- package/dist/lib/autopilot-waves.d.ts +2 -0
- package/dist/lib/autopilot-waves.d.ts.map +1 -0
- package/dist/lib/autopilot-waves.js +266 -0
- package/dist/lib/autopilot-waves.js.map +1 -0
- package/dist/lib/autopilot.d.ts +2 -0
- package/dist/lib/autopilot.d.ts.map +1 -0
- package/dist/lib/autopilot.js +1314 -0
- package/dist/lib/autopilot.js.map +1 -0
- package/dist/lib/autoplan.d.ts +2 -0
- package/dist/lib/autoplan.d.ts.map +1 -0
- package/dist/lib/autoplan.js +198 -0
- package/dist/lib/autoplan.js.map +1 -0
- package/dist/lib/autoresearch.d.ts +2 -0
- package/dist/lib/autoresearch.d.ts.map +1 -0
- package/dist/lib/autoresearch.js +626 -0
- package/dist/lib/autoresearch.js.map +1 -0
- package/dist/lib/backend.d.ts +2 -0
- package/dist/lib/backend.d.ts.map +1 -0
- package/dist/lib/backend.js +1036 -0
- package/dist/lib/backend.js.map +1 -0
- package/dist/lib/benchmark.d.ts +99 -0
- package/dist/lib/benchmark.d.ts.map +1 -0
- package/dist/lib/benchmark.js +278 -0
- package/dist/lib/benchmark.js.map +1 -0
- package/dist/lib/citations.d.ts +2 -0
- package/dist/lib/citations.d.ts.map +1 -0
- package/dist/lib/citations.js +642 -0
- package/dist/lib/citations.js.map +1 -0
- package/dist/lib/cleanup.d.ts +2 -0
- package/dist/lib/cleanup.d.ts.map +1 -0
- package/dist/lib/cleanup.js +1222 -0
- package/dist/lib/cleanup.js.map +1 -0
- package/dist/lib/cli/adapters.d.ts +10 -0
- package/dist/lib/cli/adapters.d.ts.map +1 -0
- package/dist/lib/cli/adapters.js +27 -0
- package/dist/lib/cli/adapters.js.map +1 -0
- package/dist/lib/cli/agent.d.ts +17 -0
- package/dist/lib/cli/agent.d.ts.map +1 -0
- package/dist/lib/cli/agent.js +53 -0
- package/dist/lib/cli/agent.js.map +1 -0
- package/dist/lib/cli/index.d.ts +21 -0
- package/dist/lib/cli/index.d.ts.map +1 -0
- package/dist/lib/cli/index.js +264 -0
- package/dist/lib/cli/index.js.map +1 -0
- package/dist/lib/cli/output.d.ts +20 -0
- package/dist/lib/cli/output.d.ts.map +1 -0
- package/dist/lib/cli/output.js +22 -0
- package/dist/lib/cli/output.js.map +1 -0
- package/dist/lib/cli/scan-dispatch.d.ts +9 -0
- package/dist/lib/cli/scan-dispatch.d.ts.map +1 -0
- package/dist/lib/cli/scan-dispatch.js +107 -0
- package/dist/lib/cli/scan-dispatch.js.map +1 -0
- package/dist/lib/cli/tools.d.ts +16 -0
- package/dist/lib/cli/tools.d.ts.map +1 -0
- package/dist/lib/cli/tools.js +168 -0
- package/dist/lib/cli/tools.js.map +1 -0
- package/dist/lib/commands/_dashboard-parsers.d.ts +2 -0
- package/dist/lib/commands/_dashboard-parsers.d.ts.map +1 -0
- package/dist/lib/commands/_dashboard-parsers.js +192 -0
- package/dist/lib/commands/_dashboard-parsers.js.map +1 -0
- package/dist/lib/commands/analysis.d.ts +2 -0
- package/dist/lib/commands/analysis.d.ts.map +1 -0
- package/dist/lib/commands/analysis.js +1418 -0
- package/dist/lib/commands/analysis.js.map +1 -0
- package/dist/lib/commands/assumptions.d.ts +2 -0
- package/dist/lib/commands/assumptions.d.ts.map +1 -0
- package/dist/lib/commands/assumptions.js +166 -0
- package/dist/lib/commands/assumptions.js.map +1 -0
- package/dist/lib/commands/blame.d.ts +2 -0
- package/dist/lib/commands/blame.d.ts.map +1 -0
- package/dist/lib/commands/blame.js +133 -0
- package/dist/lib/commands/blame.js.map +1 -0
- package/dist/lib/commands/budget.d.ts +2 -0
- package/dist/lib/commands/budget.d.ts.map +1 -0
- package/dist/lib/commands/budget.js +100 -0
- package/dist/lib/commands/budget.js.map +1 -0
- package/dist/lib/commands/check-plans.d.ts +2 -0
- package/dist/lib/commands/check-plans.d.ts.map +1 -0
- package/dist/lib/commands/check-plans.js +190 -0
- package/dist/lib/commands/check-plans.js.map +1 -0
- package/dist/lib/commands/config.d.ts +2 -0
- package/dist/lib/commands/config.d.ts.map +1 -0
- package/dist/lib/commands/config.js +188 -0
- package/dist/lib/commands/config.js.map +1 -0
- package/dist/lib/commands/dashboard.d.ts +2 -0
- package/dist/lib/commands/dashboard.d.ts.map +1 -0
- package/dist/lib/commands/dashboard.js +466 -0
- package/dist/lib/commands/dashboard.js.map +1 -0
- package/dist/lib/commands/estimate.d.ts +2 -0
- package/dist/lib/commands/estimate.d.ts.map +1 -0
- package/dist/lib/commands/estimate.js +148 -0
- package/dist/lib/commands/estimate.js.map +1 -0
- package/dist/lib/commands/eval-diff.d.ts +2 -0
- package/dist/lib/commands/eval-diff.d.ts.map +1 -0
- package/dist/lib/commands/eval-diff.js +213 -0
- package/dist/lib/commands/eval-diff.js.map +1 -0
- package/dist/lib/commands/freshness.d.ts +2 -0
- package/dist/lib/commands/freshness.d.ts.map +1 -0
- package/dist/lib/commands/freshness.js +163 -0
- package/dist/lib/commands/freshness.js.map +1 -0
- package/dist/lib/commands/health.d.ts +2 -0
- package/dist/lib/commands/health.d.ts.map +1 -0
- package/dist/lib/commands/health.js +435 -0
- package/dist/lib/commands/health.js.map +1 -0
- package/dist/lib/commands/index.d.ts +2 -0
- package/dist/lib/commands/index.d.ts.map +1 -0
- package/dist/lib/commands/index.js +128 -0
- package/dist/lib/commands/index.js.map +1 -0
- package/dist/lib/commands/install.d.ts +56 -0
- package/dist/lib/commands/install.d.ts.map +1 -0
- package/dist/lib/commands/install.js +214 -0
- package/dist/lib/commands/install.js.map +1 -0
- package/dist/lib/commands/knowhow-aggregator.d.ts +2 -0
- package/dist/lib/commands/knowhow-aggregator.d.ts.map +1 -0
- package/dist/lib/commands/knowhow-aggregator.js +279 -0
- package/dist/lib/commands/knowhow-aggregator.js.map +1 -0
- package/dist/lib/commands/knowledge-search.d.ts +2 -0
- package/dist/lib/commands/knowledge-search.d.ts.map +1 -0
- package/dist/lib/commands/knowledge-search.js +113 -0
- package/dist/lib/commands/knowledge-search.js.map +1 -0
- package/dist/lib/commands/long-term-roadmap.d.ts +2 -0
- package/dist/lib/commands/long-term-roadmap.d.ts.map +1 -0
- package/dist/lib/commands/long-term-roadmap.js +272 -0
- package/dist/lib/commands/long-term-roadmap.js.map +1 -0
- package/dist/lib/commands/patterns.d.ts +91 -0
- package/dist/lib/commands/patterns.d.ts.map +1 -0
- package/dist/lib/commands/patterns.js +391 -0
- package/dist/lib/commands/patterns.js.map +1 -0
- package/dist/lib/commands/phase-info.d.ts +2 -0
- package/dist/lib/commands/phase-info.d.ts.map +1 -0
- package/dist/lib/commands/phase-info.js +509 -0
- package/dist/lib/commands/phase-info.js.map +1 -0
- package/dist/lib/commands/plan-lint.d.ts +56 -0
- package/dist/lib/commands/plan-lint.d.ts.map +1 -0
- package/dist/lib/commands/plan-lint.js +481 -0
- package/dist/lib/commands/plan-lint.js.map +1 -0
- package/dist/lib/commands/plan-phase.d.ts +53 -0
- package/dist/lib/commands/plan-phase.d.ts.map +1 -0
- package/dist/lib/commands/plan-phase.js +288 -0
- package/dist/lib/commands/plan-phase.js.map +1 -0
- package/dist/lib/commands/progress.d.ts +2 -0
- package/dist/lib/commands/progress.d.ts.map +1 -0
- package/dist/lib/commands/progress.js +266 -0
- package/dist/lib/commands/progress.js.map +1 -0
- package/dist/lib/commands/quality.d.ts +2 -0
- package/dist/lib/commands/quality.d.ts.map +1 -0
- package/dist/lib/commands/quality.js +80 -0
- package/dist/lib/commands/quality.js.map +1 -0
- package/dist/lib/commands/rollback.d.ts +2 -0
- package/dist/lib/commands/rollback.d.ts.map +1 -0
- package/dist/lib/commands/rollback.js +145 -0
- package/dist/lib/commands/rollback.js.map +1 -0
- package/dist/lib/commands/scan.d.ts +25 -0
- package/dist/lib/commands/scan.d.ts.map +1 -0
- package/dist/lib/commands/scan.js +28 -0
- package/dist/lib/commands/scan.js.map +1 -0
- package/dist/lib/commands/search.d.ts +2 -0
- package/dist/lib/commands/search.d.ts.map +1 -0
- package/dist/lib/commands/search.js +212 -0
- package/dist/lib/commands/search.js.map +1 -0
- package/dist/lib/commands/select-candidate.d.ts +128 -0
- package/dist/lib/commands/select-candidate.d.ts.map +1 -0
- package/dist/lib/commands/select-candidate.js +518 -0
- package/dist/lib/commands/select-candidate.js.map +1 -0
- package/dist/lib/commands/singularity.d.ts +2 -0
- package/dist/lib/commands/singularity.d.ts.map +1 -0
- package/dist/lib/commands/singularity.js +185 -0
- package/dist/lib/commands/singularity.js.map +1 -0
- package/dist/lib/commands/slug-timestamp.d.ts +2 -0
- package/dist/lib/commands/slug-timestamp.d.ts.map +1 -0
- package/dist/lib/commands/slug-timestamp.js +54 -0
- package/dist/lib/commands/slug-timestamp.js.map +1 -0
- package/dist/lib/commands/tail.d.ts +2 -0
- package/dist/lib/commands/tail.d.ts.map +1 -0
- package/dist/lib/commands/tail.js +100 -0
- package/dist/lib/commands/tail.js.map +1 -0
- package/dist/lib/commands/todo.d.ts +2 -0
- package/dist/lib/commands/todo.d.ts.map +1 -0
- package/dist/lib/commands/todo.js +200 -0
- package/dist/lib/commands/todo.js.map +1 -0
- package/dist/lib/commands/watch.d.ts +2 -0
- package/dist/lib/commands/watch.d.ts.map +1 -0
- package/dist/lib/commands/watch.js +72 -0
- package/dist/lib/commands/watch.js.map +1 -0
- package/dist/lib/complexity.d.ts +55 -0
- package/dist/lib/complexity.d.ts.map +1 -0
- package/dist/lib/complexity.js +80 -0
- package/dist/lib/complexity.js.map +1 -0
- package/dist/lib/context/agents.d.ts +2 -0
- package/dist/lib/context/agents.d.ts.map +1 -0
- package/dist/lib/context/agents.js +344 -0
- package/dist/lib/context/agents.js.map +1 -0
- package/dist/lib/context/base.d.ts +2 -0
- package/dist/lib/context/base.d.ts.map +1 -0
- package/dist/lib/context/base.js +81 -0
- package/dist/lib/context/base.js.map +1 -0
- package/dist/lib/context/execute.d.ts +2 -0
- package/dist/lib/context/execute.d.ts.map +1 -0
- package/dist/lib/context/execute.js +753 -0
- package/dist/lib/context/execute.js.map +1 -0
- package/dist/lib/context/index.d.ts +2 -0
- package/dist/lib/context/index.d.ts.map +1 -0
- package/dist/lib/context/index.js +88 -0
- package/dist/lib/context/index.js.map +1 -0
- package/dist/lib/context/progress.d.ts +2 -0
- package/dist/lib/context/progress.d.ts.map +1 -0
- package/dist/lib/context/progress.js +178 -0
- package/dist/lib/context/progress.js.map +1 -0
- package/dist/lib/context/project.d.ts +2 -0
- package/dist/lib/context/project.d.ts.map +1 -0
- package/dist/lib/context/project.js +413 -0
- package/dist/lib/context/project.js.map +1 -0
- package/dist/lib/context/research.d.ts +2 -0
- package/dist/lib/context/research.d.ts.map +1 -0
- package/dist/lib/context/research.js +466 -0
- package/dist/lib/context/research.js.map +1 -0
- package/dist/lib/dead-ends.d.ts +28 -0
- package/dist/lib/dead-ends.d.ts.map +1 -0
- package/dist/lib/dead-ends.js +451 -0
- package/dist/lib/dead-ends.js.map +1 -0
- package/dist/lib/deps.d.ts +2 -0
- package/dist/lib/deps.d.ts.map +1 -0
- package/dist/lib/deps.js +630 -0
- package/dist/lib/deps.js.map +1 -0
- package/dist/lib/discussion.d.ts +2 -0
- package/dist/lib/discussion.d.ts.map +1 -0
- package/dist/lib/discussion.js +1041 -0
- package/dist/lib/discussion.js.map +1 -0
- package/dist/lib/drift.d.ts +36 -0
- package/dist/lib/drift.d.ts.map +1 -0
- package/dist/lib/drift.js +481 -0
- package/dist/lib/drift.js.map +1 -0
- package/dist/lib/evolve/_dimensions-features.d.ts +2 -0
- package/dist/lib/evolve/_dimensions-features.d.ts.map +1 -0
- package/dist/lib/evolve/_dimensions-features.js +369 -0
- package/dist/lib/evolve/_dimensions-features.js.map +1 -0
- package/dist/lib/evolve/_dimensions.d.ts +2 -0
- package/dist/lib/evolve/_dimensions.d.ts.map +1 -0
- package/dist/lib/evolve/_dimensions.js +358 -0
- package/dist/lib/evolve/_dimensions.js.map +1 -0
- package/dist/lib/evolve/_product-ideation.d.ts +2 -0
- package/dist/lib/evolve/_product-ideation.d.ts.map +1 -0
- package/dist/lib/evolve/_product-ideation.js +281 -0
- package/dist/lib/evolve/_product-ideation.js.map +1 -0
- package/dist/lib/evolve/_prompts.d.ts +2 -0
- package/dist/lib/evolve/_prompts.d.ts.map +1 -0
- package/dist/lib/evolve/_prompts.js +153 -0
- package/dist/lib/evolve/_prompts.js.map +1 -0
- package/dist/lib/evolve/cli.d.ts +2 -0
- package/dist/lib/evolve/cli.d.ts.map +1 -0
- package/dist/lib/evolve/cli.js +224 -0
- package/dist/lib/evolve/cli.js.map +1 -0
- package/dist/lib/evolve/discovery.d.ts +2 -0
- package/dist/lib/evolve/discovery.d.ts.map +1 -0
- package/dist/lib/evolve/discovery.js +391 -0
- package/dist/lib/evolve/discovery.js.map +1 -0
- package/dist/lib/evolve/index.d.ts +2 -0
- package/dist/lib/evolve/index.d.ts.map +1 -0
- package/dist/lib/evolve/index.js +88 -0
- package/dist/lib/evolve/index.js.map +1 -0
- package/dist/lib/evolve/orchestrator.d.ts +2 -0
- package/dist/lib/evolve/orchestrator.d.ts.map +1 -0
- package/dist/lib/evolve/orchestrator.js +851 -0
- package/dist/lib/evolve/orchestrator.js.map +1 -0
- package/dist/lib/evolve/scoring.d.ts +2 -0
- package/dist/lib/evolve/scoring.d.ts.map +1 -0
- package/dist/lib/evolve/scoring.js +118 -0
- package/dist/lib/evolve/scoring.js.map +1 -0
- package/dist/lib/evolve/state.d.ts +2 -0
- package/dist/lib/evolve/state.d.ts.map +1 -0
- package/dist/lib/evolve/state.js +264 -0
- package/dist/lib/evolve/state.js.map +1 -0
- package/dist/lib/evolve/types.d.ts +249 -0
- package/dist/lib/evolve/types.d.ts.map +1 -0
- package/dist/lib/evolve/types.js +3 -0
- package/dist/lib/evolve/types.js.map +1 -0
- package/dist/lib/frontmatter.d.ts +2 -0
- package/dist/lib/frontmatter.d.ts.map +1 -0
- package/dist/lib/frontmatter.js +513 -0
- package/dist/lib/frontmatter.js.map +1 -0
- package/dist/lib/gates.d.ts +2 -0
- package/dist/lib/gates.d.ts.map +1 -0
- package/dist/lib/gates.js +578 -0
- package/dist/lib/gates.js.map +1 -0
- package/dist/lib/genome.d.ts +10 -0
- package/dist/lib/genome.d.ts.map +1 -0
- package/dist/lib/genome.js +368 -0
- package/dist/lib/genome.js.map +1 -0
- package/dist/lib/got.d.ts +2 -0
- package/dist/lib/got.d.ts.map +1 -0
- package/dist/lib/got.js +280 -0
- package/dist/lib/got.js.map +1 -0
- package/dist/lib/invariants.d.ts +2 -0
- package/dist/lib/invariants.d.ts.map +1 -0
- package/dist/lib/invariants.js +298 -0
- package/dist/lib/invariants.js.map +1 -0
- package/dist/lib/knowledge.d.ts +2 -0
- package/dist/lib/knowledge.d.ts.map +1 -0
- package/dist/lib/knowledge.js +658 -0
- package/dist/lib/knowledge.js.map +1 -0
- package/dist/lib/long-term-roadmap.d.ts +2 -0
- package/dist/lib/long-term-roadmap.d.ts.map +1 -0
- package/dist/lib/long-term-roadmap.js +602 -0
- package/dist/lib/long-term-roadmap.js.map +1 -0
- package/dist/lib/markdown-split.d.ts +2 -0
- package/dist/lib/markdown-split.d.ts.map +1 -0
- package/dist/lib/markdown-split.js +199 -0
- package/dist/lib/markdown-split.js.map +1 -0
- package/dist/lib/mcp-server.d.ts +2 -0
- package/dist/lib/mcp-server.d.ts.map +1 -0
- package/dist/lib/mcp-server.js +2424 -0
- package/dist/lib/mcp-server.js.map +1 -0
- package/dist/lib/metrics.d.ts +16 -0
- package/dist/lib/metrics.d.ts.map +1 -0
- package/dist/lib/metrics.js +48 -0
- package/dist/lib/metrics.js.map +1 -0
- package/dist/lib/overstory.d.ts +2 -0
- package/dist/lib/overstory.d.ts.map +1 -0
- package/dist/lib/overstory.js +211 -0
- package/dist/lib/overstory.js.map +1 -0
- package/dist/lib/parallel.d.ts +2 -0
- package/dist/lib/parallel.d.ts.map +1 -0
- package/dist/lib/parallel.js +349 -0
- package/dist/lib/parallel.js.map +1 -0
- package/dist/lib/paths.d.ts +2 -0
- package/dist/lib/paths.d.ts.map +1 -0
- package/dist/lib/paths.js +254 -0
- package/dist/lib/paths.js.map +1 -0
- package/dist/lib/phase-complete-llm.d.ts +22 -0
- package/dist/lib/phase-complete-llm.d.ts.map +1 -0
- package/dist/lib/phase-complete-llm.js +331 -0
- package/dist/lib/phase-complete-llm.js.map +1 -0
- package/dist/lib/phase-complete.d.ts +46 -0
- package/dist/lib/phase-complete.d.ts.map +1 -0
- package/dist/lib/phase-complete.js +278 -0
- package/dist/lib/phase-complete.js.map +1 -0
- package/dist/lib/phase-io.d.ts +2 -0
- package/dist/lib/phase-io.d.ts.map +1 -0
- package/dist/lib/phase-io.js +126 -0
- package/dist/lib/phase-io.js.map +1 -0
- package/dist/lib/phase.d.ts +2 -0
- package/dist/lib/phase.d.ts.map +1 -0
- package/dist/lib/phase.js +1344 -0
- package/dist/lib/phase.js.map +1 -0
- package/dist/lib/plan-tournament.d.ts +63 -0
- package/dist/lib/plan-tournament.d.ts.map +1 -0
- package/dist/lib/plan-tournament.js +353 -0
- package/dist/lib/plan-tournament.js.map +1 -0
- package/dist/lib/refinement.d.ts +74 -0
- package/dist/lib/refinement.d.ts.map +1 -0
- package/dist/lib/refinement.js +283 -0
- package/dist/lib/refinement.js.map +1 -0
- package/dist/lib/requirements.d.ts +2 -0
- package/dist/lib/requirements.d.ts.map +1 -0
- package/dist/lib/requirements.js +355 -0
- package/dist/lib/requirements.js.map +1 -0
- package/dist/lib/research-bundle.d.ts +2 -0
- package/dist/lib/research-bundle.d.ts.map +1 -0
- package/dist/lib/research-bundle.js +246 -0
- package/dist/lib/research-bundle.js.map +1 -0
- package/dist/lib/roadmap.d.ts +2 -0
- package/dist/lib/roadmap.d.ts.map +1 -0
- package/dist/lib/roadmap.js +541 -0
- package/dist/lib/roadmap.js.map +1 -0
- package/dist/lib/sample.d.ts +16 -0
- package/dist/lib/sample.d.ts.map +1 -0
- package/dist/lib/sample.js +20 -0
- package/dist/lib/sample.js.map +1 -0
- package/dist/lib/scaffold.d.ts +2 -0
- package/dist/lib/scaffold.d.ts.map +1 -0
- package/dist/lib/scaffold.js +355 -0
- package/dist/lib/scaffold.js.map +1 -0
- package/dist/lib/scan/_utils.d.ts +11 -0
- package/dist/lib/scan/_utils.d.ts.map +1 -0
- package/dist/lib/scan/_utils.js +36 -0
- package/dist/lib/scan/_utils.js.map +1 -0
- package/dist/lib/scan/base64.d.ts +15 -0
- package/dist/lib/scan/base64.d.ts.map +1 -0
- package/dist/lib/scan/base64.js +66 -0
- package/dist/lib/scan/base64.js.map +1 -0
- package/dist/lib/scan/ignorefile.d.ts +30 -0
- package/dist/lib/scan/ignorefile.d.ts.map +1 -0
- package/dist/lib/scan/ignorefile.js +101 -0
- package/dist/lib/scan/ignorefile.js.map +1 -0
- package/dist/lib/scan/injection.d.ts +14 -0
- package/dist/lib/scan/injection.d.ts.map +1 -0
- package/dist/lib/scan/injection.js +39 -0
- package/dist/lib/scan/injection.js.map +1 -0
- package/dist/lib/scan/patterns.d.ts +17 -0
- package/dist/lib/scan/patterns.d.ts.map +1 -0
- package/dist/lib/scan/patterns.js +123 -0
- package/dist/lib/scan/patterns.js.map +1 -0
- package/dist/lib/scan/strip-markdown.d.ts +7 -0
- package/dist/lib/scan/strip-markdown.d.ts.map +1 -0
- package/dist/lib/scan/strip-markdown.js +38 -0
- package/dist/lib/scan/strip-markdown.js.map +1 -0
- package/dist/lib/scan/types.d.ts +23 -0
- package/dist/lib/scan/types.d.ts.map +1 -0
- package/dist/lib/scan/types.js +3 -0
- package/dist/lib/scan/types.js.map +1 -0
- package/dist/lib/scheduler-wait.d.ts +2 -0
- package/dist/lib/scheduler-wait.d.ts.map +1 -0
- package/dist/lib/scheduler-wait.js +59 -0
- package/dist/lib/scheduler-wait.js.map +1 -0
- package/dist/lib/scheduler.d.ts +254 -0
- package/dist/lib/scheduler.d.ts.map +1 -0
- package/dist/lib/scheduler.js +1147 -0
- package/dist/lib/scheduler.js.map +1 -0
- package/dist/lib/state.d.ts +2 -0
- package/dist/lib/state.d.ts.map +1 -0
- package/dist/lib/state.js +744 -0
- package/dist/lib/state.js.map +1 -0
- package/dist/lib/think.d.ts +18 -0
- package/dist/lib/think.d.ts.map +1 -0
- package/dist/lib/think.js +317 -0
- package/dist/lib/think.js.map +1 -0
- package/dist/lib/tracker.d.ts +2 -0
- package/dist/lib/tracker.d.ts.map +1 -0
- package/dist/lib/tracker.js +1121 -0
- package/dist/lib/tracker.js.map +1 -0
- package/dist/lib/types.d.ts +1514 -0
- package/dist/lib/types.d.ts.map +1 -0
- package/dist/lib/types.js +4 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/lib/utils.d.ts +2 -0
- package/dist/lib/utils.d.ts.map +1 -0
- package/dist/lib/utils.js +1363 -0
- package/dist/lib/utils.js.map +1 -0
- package/dist/lib/verify.d.ts +2 -0
- package/dist/lib/verify.d.ts.map +1 -0
- package/dist/lib/verify.js +1153 -0
- package/dist/lib/verify.js.map +1 -0
- package/dist/lib/wireup/autofix.d.ts +2 -0
- package/dist/lib/wireup/autofix.d.ts.map +1 -0
- package/dist/lib/wireup/autofix.js +188 -0
- package/dist/lib/wireup/autofix.js.map +1 -0
- package/dist/lib/wireup/cli.d.ts +2 -0
- package/dist/lib/wireup/cli.d.ts.map +1 -0
- package/dist/lib/wireup/cli.js +194 -0
- package/dist/lib/wireup/cli.js.map +1 -0
- package/dist/lib/wireup/detection.d.ts +47 -0
- package/dist/lib/wireup/detection.d.ts.map +1 -0
- package/dist/lib/wireup/detection.js +410 -0
- package/dist/lib/wireup/detection.js.map +1 -0
- package/dist/lib/wireup/discovery.d.ts +2 -0
- package/dist/lib/wireup/discovery.d.ts.map +1 -0
- package/dist/lib/wireup/discovery.js +934 -0
- package/dist/lib/wireup/discovery.js.map +1 -0
- package/dist/lib/wireup/execution.d.ts +2 -0
- package/dist/lib/wireup/execution.d.ts.map +1 -0
- package/dist/lib/wireup/execution.js +573 -0
- package/dist/lib/wireup/execution.js.map +1 -0
- package/dist/lib/wireup/index.d.ts +2 -0
- package/dist/lib/wireup/index.d.ts.map +1 -0
- package/dist/lib/wireup/index.js +85 -0
- package/dist/lib/wireup/index.js.map +1 -0
- package/dist/lib/wireup/orchestrator.d.ts +2 -0
- package/dist/lib/wireup/orchestrator.d.ts.map +1 -0
- package/dist/lib/wireup/orchestrator.js +366 -0
- package/dist/lib/wireup/orchestrator.js.map +1 -0
- package/dist/lib/wireup/report.d.ts +47 -0
- package/dist/lib/wireup/report.d.ts.map +1 -0
- package/dist/lib/wireup/report.js +201 -0
- package/dist/lib/wireup/report.js.map +1 -0
- package/dist/lib/wireup/scenarios.d.ts +2 -0
- package/dist/lib/wireup/scenarios.d.ts.map +1 -0
- package/dist/lib/wireup/scenarios.js +516 -0
- package/dist/lib/wireup/scenarios.js.map +1 -0
- package/dist/lib/wireup/state.d.ts +2 -0
- package/dist/lib/wireup/state.d.ts.map +1 -0
- package/dist/lib/wireup/state.js +102 -0
- package/dist/lib/wireup/state.js.map +1 -0
- package/dist/lib/wireup/types.d.ts +376 -0
- package/dist/lib/wireup/types.d.ts.map +1 -0
- package/dist/lib/wireup/types.js +3 -0
- package/dist/lib/wireup/types.js.map +1 -0
- package/dist/lib/worktree.d.ts +2 -0
- package/dist/lib/worktree.d.ts.map +1 -0
- package/dist/lib/worktree.js +999 -0
- package/dist/lib/worktree.js.map +1 -0
- package/lib/autopilot-milestone.ts +136 -0
- package/lib/autopilot-pipeline.ts +1179 -0
- package/lib/autopilot-waves.ts +361 -0
- package/lib/autopilot.ts +1874 -0
- package/lib/autoplan.ts +280 -0
- package/lib/autoresearch.js +4 -0
- package/lib/autoresearch.ts +886 -0
- package/lib/backend.ts +1252 -0
- package/lib/benchmark.ts +341 -0
- package/lib/citations.ts +760 -0
- package/lib/cleanup.ts +1588 -0
- package/lib/cli/adapters.ts +41 -0
- package/lib/cli/agent.ts +83 -0
- package/lib/cli/index.ts +273 -0
- package/lib/cli/output.ts +33 -0
- package/lib/cli/scan-dispatch.ts +130 -0
- package/lib/cli/tools.ts +198 -0
- package/lib/commands/_dashboard-parsers.ts +275 -0
- package/lib/commands/analysis.ts +1851 -0
- package/lib/commands/assumptions.ts +232 -0
- package/lib/commands/blame.ts +174 -0
- package/lib/commands/budget.ts +148 -0
- package/lib/commands/check-plans.ts +233 -0
- package/lib/commands/config.ts +287 -0
- package/lib/commands/dashboard.ts +680 -0
- package/lib/commands/estimate.ts +204 -0
- package/lib/commands/eval-diff.ts +252 -0
- package/lib/commands/freshness.ts +213 -0
- package/lib/commands/health.ts +607 -0
- package/lib/commands/index.ts +266 -0
- package/lib/commands/install.ts +307 -0
- package/lib/commands/knowhow-aggregator.ts +345 -0
- package/lib/commands/knowledge-search.ts +153 -0
- package/lib/commands/long-term-roadmap.ts +390 -0
- package/lib/commands/patterns.ts +465 -0
- package/lib/commands/phase-info.ts +698 -0
- package/lib/commands/plan-lint.ts +546 -0
- package/lib/commands/plan-phase.ts +375 -0
- package/lib/commands/progress.ts +319 -0
- package/lib/commands/quality.ts +138 -0
- package/lib/commands/rollback.ts +195 -0
- package/lib/commands/scan.ts +72 -0
- package/lib/commands/search.ts +300 -0
- package/lib/commands/select-candidate.ts +687 -0
- package/lib/commands/singularity.ts +222 -0
- package/lib/commands/slug-timestamp.ts +74 -0
- package/lib/commands/tail.ts +129 -0
- package/lib/commands/todo.ts +273 -0
- package/lib/commands/watch.ts +80 -0
- package/lib/complexity.ts +117 -0
- package/lib/context/agents.ts +505 -0
- package/lib/context/base.ts +123 -0
- package/lib/context/execute.ts +977 -0
- package/lib/context/index.ts +110 -0
- package/lib/context/progress.ts +278 -0
- package/lib/context/project.ts +531 -0
- package/lib/context/research.ts +646 -0
- package/lib/dead-ends.ts +506 -0
- package/lib/deps.ts +773 -0
- package/lib/discussion.ts +1275 -0
- package/lib/drift.ts +519 -0
- package/lib/evolve/_dimensions-features.ts +525 -0
- package/lib/evolve/_dimensions.ts +511 -0
- package/lib/evolve/_product-ideation.ts +405 -0
- package/lib/evolve/_prompts.ts +178 -0
- package/lib/evolve/cli.ts +330 -0
- package/lib/evolve/discovery.ts +571 -0
- package/lib/evolve/index.ts +105 -0
- package/lib/evolve/orchestrator.ts +1139 -0
- package/lib/evolve/scoring.ts +167 -0
- package/lib/evolve/state.ts +330 -0
- package/lib/evolve/types.ts +290 -0
- package/lib/frontmatter.ts +615 -0
- package/lib/gates.ts +695 -0
- package/lib/genome.ts +402 -0
- package/lib/got.js +4 -0
- package/lib/got.ts +361 -0
- package/lib/invariants.ts +378 -0
- package/lib/knowledge.ts +768 -0
- package/lib/long-term-roadmap.ts +806 -0
- package/lib/markdown-split.ts +273 -0
- package/lib/mcp-server.ts +3292 -0
- package/lib/metrics.ts +49 -0
- package/lib/overstory.ts +270 -0
- package/lib/parallel.ts +570 -0
- package/lib/paths.ts +293 -0
- package/lib/phase-complete-llm.ts +376 -0
- package/lib/phase-complete.ts +366 -0
- package/lib/phase-io.ts +101 -0
- package/lib/phase.ts +1981 -0
- package/lib/plan-tournament.ts +426 -0
- package/lib/refinement.ts +349 -0
- package/lib/requirements.ts +469 -0
- package/lib/research-bundle.ts +300 -0
- package/lib/roadmap.ts +775 -0
- package/lib/scaffold.ts +480 -0
- package/lib/scan/_utils.ts +37 -0
- package/lib/scan/base64.ts +90 -0
- package/lib/scan/ignorefile.ts +109 -0
- package/lib/scan/injection.ts +67 -0
- package/lib/scan/patterns.ts +139 -0
- package/lib/scan/strip-markdown.ts +39 -0
- package/lib/scan/types.ts +28 -0
- package/lib/scheduler-wait.ts +58 -0
- package/lib/scheduler.ts +1370 -0
- package/lib/state.ts +1000 -0
- package/lib/think.ts +365 -0
- package/lib/tracker.ts +1591 -0
- package/lib/types.ts +1663 -0
- package/lib/utils.ts +1479 -0
- package/lib/verify.ts +1434 -0
- package/lib/wireup/autofix.ts +241 -0
- package/lib/wireup/cli.ts +278 -0
- package/lib/wireup/detection.ts +542 -0
- package/lib/wireup/discovery.ts +1063 -0
- package/lib/wireup/execution.ts +686 -0
- package/lib/wireup/index.ts +117 -0
- package/lib/wireup/orchestrator.ts +519 -0
- package/lib/wireup/report.ts +286 -0
- package/lib/wireup/scenarios.ts +616 -0
- package/lib/wireup/state.ts +139 -0
- package/lib/wireup/types.ts +436 -0
- package/lib/worktree.ts +1309 -0
- package/package.json +67 -0
package/lib/benchmark.ts
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* GRD Benchmark -- Corpus management and composite scoring for the evaluation framework.
|
|
5
|
+
*
|
|
6
|
+
* Provides operations for loading/saving a benchmark corpus of research papers and
|
|
7
|
+
* computing composite quality scores from semantic and trainability dimensions.
|
|
8
|
+
*
|
|
9
|
+
* Category taxonomy adapted from NERFIFY-BENCH Figure 7 classification of papers
|
|
10
|
+
* by implementation difficulty.
|
|
11
|
+
*
|
|
12
|
+
* @module benchmark
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs') as typeof import('fs');
|
|
16
|
+
const path = require('path') as typeof import('path');
|
|
17
|
+
|
|
18
|
+
import type {
|
|
19
|
+
BenchmarkEntry,
|
|
20
|
+
BenchmarkResult,
|
|
21
|
+
ScoringRubric,
|
|
22
|
+
IntegrationCategory,
|
|
23
|
+
SemanticScore,
|
|
24
|
+
TrainabilityMetrics,
|
|
25
|
+
} from './types';
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Load all benchmark entries from a corpus directory.
|
|
29
|
+
* Returns BenchmarkEntry[] sorted by added_at descending (newest first).
|
|
30
|
+
* Returns [] if the directory does not exist or is empty.
|
|
31
|
+
*/
|
|
32
|
+
export function loadCorpus(corpusDir: string): BenchmarkEntry[] {
|
|
33
|
+
if (!fs.existsSync(corpusDir)) {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const files = fs.readdirSync(corpusDir).filter((f: string) => f.endsWith('.json'));
|
|
38
|
+
|
|
39
|
+
const entries: BenchmarkEntry[] = [];
|
|
40
|
+
for (const file of files) {
|
|
41
|
+
const filePath = path.join(corpusDir, file);
|
|
42
|
+
const raw = fs.readFileSync(filePath, 'utf8');
|
|
43
|
+
try {
|
|
44
|
+
const parsed = JSON.parse(raw) as BenchmarkEntry;
|
|
45
|
+
entries.push(parsed);
|
|
46
|
+
} catch (_err) {
|
|
47
|
+
process.stderr.write(`[benchmark] Warning: failed to parse JSON in ${filePath}\n`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Sort by added_at descending (newest first)
|
|
52
|
+
entries.sort((a, b) => {
|
|
53
|
+
if (a.added_at > b.added_at) return -1;
|
|
54
|
+
if (a.added_at < b.added_at) return 1;
|
|
55
|
+
return 0;
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
return entries;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Save a single BenchmarkEntry as JSON to corpusDir/{id}.json.
|
|
63
|
+
* Creates the directory if it does not exist.
|
|
64
|
+
*/
|
|
65
|
+
export function saveCorpusEntry(corpusDir: string, entry: BenchmarkEntry): void {
|
|
66
|
+
fs.mkdirSync(corpusDir, { recursive: true });
|
|
67
|
+
const filePath = path.join(corpusDir, `${entry.id}.json`);
|
|
68
|
+
fs.writeFileSync(filePath, JSON.stringify(entry, null, 2), 'utf8');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Compute a weighted composite score from semantic and trainability inputs.
|
|
73
|
+
*
|
|
74
|
+
* Semantic sub-score: average of novelty_capture, api_surface_match, algorithmic_fidelity.
|
|
75
|
+
* Trainability sub-score: weighted sum — build_success (0.4), runtime_stable (0.3), convergence_detected (0.3).
|
|
76
|
+
* Composite = (semantic_sub * semantic_weight + trainability_sub * trainability_weight) * category_adjustment.
|
|
77
|
+
*
|
|
78
|
+
* @param semantic - Semantic sub-scores from an evaluator
|
|
79
|
+
* @param trainability - Build/run/convergence metrics
|
|
80
|
+
* @param rubric - Weight distribution and category multipliers
|
|
81
|
+
* @param category - The paper's integration category
|
|
82
|
+
* @returns Composite score clamped to [0, 1]
|
|
83
|
+
*/
|
|
84
|
+
export function scoreComposite(
|
|
85
|
+
semantic: SemanticScore,
|
|
86
|
+
trainability: TrainabilityMetrics,
|
|
87
|
+
rubric: ScoringRubric,
|
|
88
|
+
category: IntegrationCategory,
|
|
89
|
+
): number {
|
|
90
|
+
const semanticSub =
|
|
91
|
+
(semantic.novelty_capture + semantic.api_surface_match + semantic.algorithmic_fidelity) / 3;
|
|
92
|
+
|
|
93
|
+
const trainabilitySub =
|
|
94
|
+
(trainability.build_success ? 1 : 0) * 0.4 +
|
|
95
|
+
(trainability.runtime_stable ? 1 : 0) * 0.3 +
|
|
96
|
+
(trainability.convergence_detected ? 1 : 0) * 0.3;
|
|
97
|
+
|
|
98
|
+
const composite =
|
|
99
|
+
(semanticSub * rubric.semantic_weight + trainabilitySub * rubric.trainability_weight) *
|
|
100
|
+
rubric.category_adjustments[category];
|
|
101
|
+
|
|
102
|
+
return Math.min(1, Math.max(0, composite));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Create the default ScoringRubric with NERFIFY-BENCH-inspired weights.
|
|
107
|
+
*
|
|
108
|
+
* semantic_weight=0.6 because semantic fidelity matters most for paper-to-code synthesis.
|
|
109
|
+
* trainability_weight=0.4.
|
|
110
|
+
* Category adjustments penalize harder-to-implement categories:
|
|
111
|
+
* - directly-integrable: 1.0 (no penalty)
|
|
112
|
+
* - requires-external-models: 0.85
|
|
113
|
+
* - out-of-scope: 0.5 (heaviest penalty — capabilities beyond code synthesis)
|
|
114
|
+
* - novelty-coverage: 0.9
|
|
115
|
+
*/
|
|
116
|
+
export function createDefaultRubric(): ScoringRubric {
|
|
117
|
+
return {
|
|
118
|
+
semantic_weight: 0.6,
|
|
119
|
+
trainability_weight: 0.4,
|
|
120
|
+
category_adjustments: {
|
|
121
|
+
'directly-integrable': 1.0,
|
|
122
|
+
'requires-external-models': 0.85,
|
|
123
|
+
'out-of-scope': 0.5,
|
|
124
|
+
'novelty-coverage': 0.9,
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Format a markdown benchmark report from results and corpus entries.
|
|
131
|
+
*
|
|
132
|
+
* Output structure:
|
|
133
|
+
* ## Benchmark Results
|
|
134
|
+
* {N} entries evaluated
|
|
135
|
+
* | Paper | Category | Semantic | Trainability | Composite |
|
|
136
|
+
* | ... rows ... |
|
|
137
|
+
* | **Average** | — | — | — | {avg} |
|
|
138
|
+
*
|
|
139
|
+
* @param results - Array of scored benchmark results
|
|
140
|
+
* @param entries - Corpus entries for title/category lookup
|
|
141
|
+
* @returns Markdown string
|
|
142
|
+
*/
|
|
143
|
+
/**
|
|
144
|
+
* Classify a BenchmarkEntry into an IntegrationCategory using NERFIFY-BENCH Figure 7-inspired taxonomy.
|
|
145
|
+
*
|
|
146
|
+
* Priority order (first match wins):
|
|
147
|
+
* 1. out-of-scope: hardware-specific, proprietary-data, or closed-source
|
|
148
|
+
* 2. requires-external-models: pretrained, foundation-model, external-weights, or fine-tuned
|
|
149
|
+
* 3. novelty-coverage: novel-loss, novel-architecture, or novel-representation
|
|
150
|
+
* 4. directly-integrable: default (no indicator tags found)
|
|
151
|
+
*
|
|
152
|
+
* Tag matching is case-insensitive.
|
|
153
|
+
*/
|
|
154
|
+
export function classifyEntry(entry: BenchmarkEntry): IntegrationCategory {
|
|
155
|
+
const lowerTags = entry.tags.map((t) => t.toLowerCase());
|
|
156
|
+
|
|
157
|
+
const hasTag = (tag: string): boolean => lowerTags.includes(tag.toLowerCase());
|
|
158
|
+
|
|
159
|
+
// Priority 1: out-of-scope
|
|
160
|
+
if (hasTag('hardware-specific') || hasTag('proprietary-data') || hasTag('closed-source')) {
|
|
161
|
+
return 'out-of-scope';
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Priority 2: requires-external-models
|
|
165
|
+
if (
|
|
166
|
+
hasTag('pretrained') ||
|
|
167
|
+
hasTag('foundation-model') ||
|
|
168
|
+
hasTag('external-weights') ||
|
|
169
|
+
hasTag('fine-tuned')
|
|
170
|
+
) {
|
|
171
|
+
return 'requires-external-models';
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Priority 3: novelty-coverage
|
|
175
|
+
if (hasTag('novel-loss') || hasTag('novel-architecture') || hasTag('novel-representation')) {
|
|
176
|
+
return 'novelty-coverage';
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Default: directly-integrable
|
|
180
|
+
return 'directly-integrable';
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Parse a structured evaluation summary string into a SemanticScore.
|
|
185
|
+
*
|
|
186
|
+
* Looks for lines matching:
|
|
187
|
+
* novelty_capture: X.XX
|
|
188
|
+
* api_surface_match: X.XX
|
|
189
|
+
* algorithmic_fidelity: X.XX
|
|
190
|
+
* notes: ...
|
|
191
|
+
*
|
|
192
|
+
* Values are clamped to [0, 1]. Missing fields default to 0/empty string.
|
|
193
|
+
*/
|
|
194
|
+
export function scoreSemanticFromSummary(summary: string): SemanticScore {
|
|
195
|
+
const clamp = (v: number): number => Math.min(1, Math.max(0, v));
|
|
196
|
+
|
|
197
|
+
const parseField = (fieldName: string): number => {
|
|
198
|
+
const match = summary.match(new RegExp(`${fieldName}:\\s*([\\d.eE+\\-]+)`));
|
|
199
|
+
if (!match) return 0;
|
|
200
|
+
const parsed = parseFloat(match[1]);
|
|
201
|
+
return isNaN(parsed) ? 0 : clamp(parsed);
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
const notesMatch = summary.match(/^notes:\s*(.+)$/m);
|
|
205
|
+
const notes = notesMatch ? notesMatch[1].trim() : '';
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
novelty_capture: parseField('novelty_capture'),
|
|
209
|
+
api_surface_match: parseField('api_surface_match'),
|
|
210
|
+
algorithmic_fidelity: parseField('algorithmic_fidelity'),
|
|
211
|
+
notes,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Assess trainability metrics from build/run execution outputs.
|
|
217
|
+
*
|
|
218
|
+
* build_success: buildOutput non-empty AND has no error indicators
|
|
219
|
+
* runtime_stable: runOutput non-empty AND has no crash indicators
|
|
220
|
+
* convergence_detected: runOutput contains convergence indicators
|
|
221
|
+
* execution_time_ms: passed through
|
|
222
|
+
* error_log: stderr content trimmed (empty string if blank)
|
|
223
|
+
*/
|
|
224
|
+
export function assessTrainability(
|
|
225
|
+
buildOutput: string,
|
|
226
|
+
runOutput: string,
|
|
227
|
+
stderr: string,
|
|
228
|
+
executionTimeMs: number,
|
|
229
|
+
): TrainabilityMetrics {
|
|
230
|
+
const buildErrorIndicators = ['error', 'Error', 'FAILED', 'failed to compile'];
|
|
231
|
+
const crashIndicators = ['SIGKILL', 'SIGSEGV', 'heap out of memory', 'fatal error', 'unhandled exception'];
|
|
232
|
+
const convergenceIndicators = ['converged', 'loss decreased', 'metric improved'];
|
|
233
|
+
|
|
234
|
+
const hasBuildError = buildErrorIndicators.some((indicator) => buildOutput.includes(indicator));
|
|
235
|
+
const build_success = buildOutput.length > 0 && !hasBuildError;
|
|
236
|
+
|
|
237
|
+
const hasCrash = crashIndicators.some((indicator) => runOutput.includes(indicator));
|
|
238
|
+
const runtime_stable = runOutput.length > 0 && !hasCrash;
|
|
239
|
+
|
|
240
|
+
const convergence_detected = convergenceIndicators.some((indicator) => runOutput.includes(indicator));
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
build_success,
|
|
244
|
+
runtime_stable,
|
|
245
|
+
convergence_detected,
|
|
246
|
+
execution_time_ms: executionTimeMs,
|
|
247
|
+
error_log: stderr ? stderr.trim() : '',
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Orchestrate the full evaluation pipeline for a single benchmark entry.
|
|
253
|
+
*
|
|
254
|
+
* Steps:
|
|
255
|
+
* 1. Classify: uses entry.category as-is (caller is responsible for classification)
|
|
256
|
+
* 2. Score semantic: parse semanticSummary via scoreSemanticFromSummary
|
|
257
|
+
* 3. Assess trainability: parse build/run outputs via assessTrainability
|
|
258
|
+
* 4. Compute composite: call scoreComposite with default rubric
|
|
259
|
+
* 5. Return BenchmarkResult with all fields populated
|
|
260
|
+
*/
|
|
261
|
+
export function evaluateEntry(
|
|
262
|
+
entry: BenchmarkEntry,
|
|
263
|
+
semanticSummary: string,
|
|
264
|
+
buildOutput: string,
|
|
265
|
+
runOutput: string,
|
|
266
|
+
stderr: string,
|
|
267
|
+
executionTimeMs: number,
|
|
268
|
+
rubricVersion: string,
|
|
269
|
+
evaluator: string,
|
|
270
|
+
): BenchmarkResult {
|
|
271
|
+
const category = entry.category;
|
|
272
|
+
const semantic = scoreSemanticFromSummary(semanticSummary);
|
|
273
|
+
const trainability = assessTrainability(buildOutput, runOutput, stderr, executionTimeMs);
|
|
274
|
+
const rubric = createDefaultRubric();
|
|
275
|
+
const composite_score = scoreComposite(semantic, trainability, rubric, category);
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
entry_id: entry.id,
|
|
279
|
+
semantic,
|
|
280
|
+
trainability,
|
|
281
|
+
composite_score,
|
|
282
|
+
rubric_version: rubricVersion,
|
|
283
|
+
evaluated_at: new Date().toISOString(),
|
|
284
|
+
evaluator,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
export function formatBenchmarkReport(
|
|
289
|
+
results: BenchmarkResult[],
|
|
290
|
+
entries: BenchmarkEntry[],
|
|
291
|
+
): string {
|
|
292
|
+
const entryMap = new Map<string, BenchmarkEntry>(entries.map((e) => [e.id, e]));
|
|
293
|
+
|
|
294
|
+
const lines: string[] = [];
|
|
295
|
+
lines.push('## Benchmark Results');
|
|
296
|
+
lines.push('');
|
|
297
|
+
lines.push(`${results.length} entries evaluated`);
|
|
298
|
+
lines.push('');
|
|
299
|
+
lines.push('| Paper | Category | Semantic | Trainability | Composite |');
|
|
300
|
+
lines.push('|-------|----------|----------|--------------|-----------|');
|
|
301
|
+
|
|
302
|
+
let totalComposite = 0;
|
|
303
|
+
for (const result of results) {
|
|
304
|
+
const entry = entryMap.get(result.entry_id);
|
|
305
|
+
const title = entry ? entry.title : result.entry_id;
|
|
306
|
+
const category = entry ? entry.category : '—';
|
|
307
|
+
|
|
308
|
+
const semanticAvg =
|
|
309
|
+
(result.semantic.novelty_capture +
|
|
310
|
+
result.semantic.api_surface_match +
|
|
311
|
+
result.semantic.algorithmic_fidelity) /
|
|
312
|
+
3;
|
|
313
|
+
const trainabilityLabel =
|
|
314
|
+
result.trainability.build_success && result.trainability.runtime_stable ? 'PASS' : 'FAIL';
|
|
315
|
+
const compositeFormatted = result.composite_score.toFixed(2);
|
|
316
|
+
|
|
317
|
+
lines.push(
|
|
318
|
+
`| ${title} | ${category} | ${semanticAvg.toFixed(2)} | ${trainabilityLabel} | ${compositeFormatted} |`,
|
|
319
|
+
);
|
|
320
|
+
totalComposite += result.composite_score;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if (results.length > 0) {
|
|
324
|
+
const avg = totalComposite / results.length;
|
|
325
|
+
lines.push(`| **Average** | — | — | — | ${avg.toFixed(2)} |`);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return lines.join('\n');
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
module.exports = {
|
|
332
|
+
loadCorpus,
|
|
333
|
+
saveCorpusEntry,
|
|
334
|
+
scoreComposite,
|
|
335
|
+
createDefaultRubric,
|
|
336
|
+
formatBenchmarkReport,
|
|
337
|
+
classifyEntry,
|
|
338
|
+
scoreSemanticFromSummary,
|
|
339
|
+
assessTrainability,
|
|
340
|
+
evaluateEntry,
|
|
341
|
+
};
|