@nathapp/nax 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitlab-ci.yml +96 -0
- package/BRIEF.md +140 -0
- package/CHANGELOG.md +60 -0
- package/CLAUDE.md +159 -0
- package/README.md +373 -0
- package/US-007-IMPLEMENTATION.md +139 -0
- package/bin/nax.ts +930 -0
- package/biome.json +14 -0
- package/bun.lock +168 -0
- package/bunfig.toml +11 -0
- package/docs/20260216-fix-plan-context-review.md +56 -0
- package/docs/20260216-relentless-vs-ngent-comparison.md +208 -0
- package/docs/20260216-v02-plan.md +136 -0
- package/docs/20260216-v02-review.md +685 -0
- package/docs/20260217-dogfood-findings.md +56 -0
- package/docs/20260217-p2-plus-plan.md +117 -0
- package/docs/20260217-partial-fixes-plan.md +62 -0
- package/docs/20260217-plan-analyze-spec.md +117 -0
- package/docs/20260217-post-impl-review.md +1137 -0
- package/docs/20260217-quick-wins-plan.md +66 -0
- package/docs/20260217-split-runner-plan.md +75 -0
- package/docs/20260217-v03-impl-plan.md +80 -0
- package/docs/20260217-v03-post-impl-review.md +589 -0
- package/docs/20260217-v04-impl-plan.md +86 -0
- package/docs/20260217-v05-post-impl-review.md +850 -0
- package/docs/20260217-v06-post-impl-review.md +817 -0
- package/docs/20260218-adr003-port-plan.md +151 -0
- package/docs/20260218-review-adr003-verification.md +175 -0
- package/docs/20260219-fix-plan-bug16-19.md +79 -0
- package/docs/20260219-fix-plan-bug20-22.md +114 -0
- package/docs/20260219-plan-llm-routing.md +116 -0
- package/docs/20260219-review-bug20-22-fixes.md +135 -0
- package/docs/20260219-routing-baseline-keyword.md +63 -0
- package/docs/20260220-plan-structured-logging-p1.md +80 -0
- package/docs/20260220-plan-structured-logging-p2.md +37 -0
- package/docs/20260220-review-llm-routing.md +180 -0
- package/docs/20260220-review-post-fix-llm-routing.md +70 -0
- package/docs/20260221-fix-plan-relevantfiles-split.md +101 -0
- package/docs/20260221-fix-plan-routing-mode.md +125 -0
- package/docs/20260221-review-v0.9-implementation.md +379 -0
- package/docs/20260222-fix-plan-v091-routing-isolation.md +197 -0
- package/docs/20260223-fix-plan-prompt-audit.md +62 -0
- package/docs/20260224-nax-roadmap-phases.md +189 -0
- package/docs/20260225-phase2-llm-service-layer.md +401 -0
- package/docs/20260225-review-v0.10.1.md +187 -0
- package/docs/20260303-v010-implementation-plan.md +165 -0
- package/docs/CLAUDE.md.bak +191 -0
- package/docs/ROADMAP.md +165 -0
- package/docs/SPEC-rectification.md +0 -0
- package/docs/SPEC.md +324 -0
- package/docs/US-001-plugin-loading-verification.md +152 -0
- package/docs/architecture-analysis.md +1076 -0
- package/docs/bugs/BUG-21-escalation-null-attempts.md +48 -0
- package/docs/bugs-from-dogfood-run-c.md +243 -0
- package/docs/code-review-20260228.md +612 -0
- package/docs/code-review-v0.15.0.md +629 -0
- package/docs/hook-lifecycle-test-plan.md +149 -0
- package/docs/releases/v0.11.0-and-earlier.md +20 -0
- package/docs/releases/v0.12.0.md +15 -0
- package/docs/releases/v0.13.0.md +14 -0
- package/docs/releases/v0.14.0.md +20 -0
- package/docs/releases/v0.14.1.md +36 -0
- package/docs/releases/v0.14.2.md +51 -0
- package/docs/releases/v0.14.3.md +174 -0
- package/docs/releases/v0.14.4.md +94 -0
- package/docs/releases/v0.15.0.md +502 -0
- package/docs/releases/v0.15.1.md +170 -0
- package/docs/releases/v0.15.3.md +193 -0
- package/docs/specs/status-file-v0.10.1.md +812 -0
- package/docs/v0.10-global-config.md +206 -0
- package/docs/v0.10-plugin-system.md +415 -0
- package/docs/v0.10-prompt-optimizer.md +234 -0
- package/docs/v0.3-spec.md +244 -0
- package/docs/v0.4-spec.md +140 -0
- package/docs/v0.5-spec.md +237 -0
- package/docs/v0.6-spec.md +371 -0
- package/docs/v0.7-spec.md +177 -0
- package/docs/v0.8-llm-routing.md +206 -0
- package/docs/v0.8-structured-logging.md +132 -0
- package/docs/v0.9.3-prompt-audit.md +112 -0
- package/examples/plugins/console-reporter/index.test.ts +207 -0
- package/examples/plugins/console-reporter/index.ts +110 -0
- package/nax/config.json +147 -0
- package/nax/features/bugfix-v0171/prd.json +52 -0
- package/nax/features/config-management/prd.json +108 -0
- package/nax/features/config-management/progress.txt +5 -0
- package/nax/features/diagnose/acceptance.test.ts +412 -0
- package/nax/features/diagnose/prd.json +41 -0
- package/nax/features/orchestration-fixes/prd.json +89 -0
- package/nax/features/orchestration-fixes/progress.txt +1 -0
- package/nax/features/plugin-integration/US-007-VERIFICATION.md +259 -0
- package/nax/features/plugin-integration/prd.json +208 -0
- package/nax/features/plugin-integration/progress.txt +5 -0
- package/nax/features/precheck/prd.json +205 -0
- package/nax/features/precheck/progress.txt +15 -0
- package/nax/features/structured-logging/prd.json +199 -0
- package/nax/features/unlock/prd.json +36 -0
- package/package.json +47 -0
- package/src/acceptance/fix-generator.ts +348 -0
- package/src/acceptance/generator.ts +282 -0
- package/src/acceptance/index.ts +30 -0
- package/src/acceptance/types.ts +79 -0
- package/src/agents/claude-decompose.ts +169 -0
- package/src/agents/claude-plan.ts +139 -0
- package/src/agents/claude.ts +324 -0
- package/src/agents/cost.ts +268 -0
- package/src/agents/index.ts +13 -0
- package/src/agents/registry.ts +48 -0
- package/src/agents/types-extended.ts +133 -0
- package/src/agents/types.ts +113 -0
- package/src/agents/validation.ts +69 -0
- package/src/analyze/classifier.ts +305 -0
- package/src/analyze/index.ts +16 -0
- package/src/analyze/scanner.ts +175 -0
- package/src/analyze/types.ts +51 -0
- package/src/cli/accept.ts +108 -0
- package/src/cli/analyze-parser.ts +284 -0
- package/src/cli/analyze.ts +207 -0
- package/src/cli/config.ts +561 -0
- package/src/cli/constitution.ts +109 -0
- package/src/cli/diagnose-analysis.ts +159 -0
- package/src/cli/diagnose-formatter.ts +87 -0
- package/src/cli/diagnose.ts +203 -0
- package/src/cli/generate.ts +127 -0
- package/src/cli/index.ts +37 -0
- package/src/cli/init.ts +188 -0
- package/src/cli/interact.ts +295 -0
- package/src/cli/plan.ts +198 -0
- package/src/cli/plugins.ts +111 -0
- package/src/cli/prompts.ts +295 -0
- package/src/cli/runs.ts +174 -0
- package/src/cli/status-cost.ts +151 -0
- package/src/cli/status-features.ts +338 -0
- package/src/cli/status.ts +13 -0
- package/src/commands/common.ts +171 -0
- package/src/commands/diagnose.ts +17 -0
- package/src/commands/index.ts +8 -0
- package/src/commands/logs.ts +384 -0
- package/src/commands/precheck.ts +86 -0
- package/src/commands/unlock.ts +96 -0
- package/src/config/defaults.ts +160 -0
- package/src/config/index.ts +22 -0
- package/src/config/loader.ts +121 -0
- package/src/config/merger.ts +147 -0
- package/src/config/path-security.ts +121 -0
- package/src/config/paths.ts +27 -0
- package/src/config/schema.ts +56 -0
- package/src/config/schemas.ts +286 -0
- package/src/config/types.ts +423 -0
- package/src/config/validate.ts +103 -0
- package/src/constitution/generator.ts +191 -0
- package/src/constitution/generators/aider.ts +41 -0
- package/src/constitution/generators/claude.ts +35 -0
- package/src/constitution/generators/cursor.ts +36 -0
- package/src/constitution/generators/opencode.ts +38 -0
- package/src/constitution/generators/types.ts +33 -0
- package/src/constitution/generators/windsurf.ts +36 -0
- package/src/constitution/index.ts +10 -0
- package/src/constitution/loader.ts +133 -0
- package/src/constitution/types.ts +31 -0
- package/src/context/auto-detect.ts +227 -0
- package/src/context/builder.ts +246 -0
- package/src/context/elements.ts +83 -0
- package/src/context/formatter.ts +107 -0
- package/src/context/generator.ts +129 -0
- package/src/context/generators/aider.ts +34 -0
- package/src/context/generators/claude.ts +28 -0
- package/src/context/generators/cursor.ts +28 -0
- package/src/context/generators/opencode.ts +30 -0
- package/src/context/generators/windsurf.ts +28 -0
- package/src/context/greenfield.ts +114 -0
- package/src/context/index.ts +33 -0
- package/src/context/injector.ts +279 -0
- package/src/context/test-scanner.ts +370 -0
- package/src/context/types.ts +98 -0
- package/src/errors.ts +67 -0
- package/src/execution/batching.ts +157 -0
- package/src/execution/crash-recovery.ts +373 -0
- package/src/execution/escalation/escalation.ts +44 -0
- package/src/execution/escalation/index.ts +13 -0
- package/src/execution/escalation/tier-escalation.ts +295 -0
- package/src/execution/escalation/tier-outcome.ts +158 -0
- package/src/execution/helpers.ts +38 -0
- package/src/execution/index.ts +45 -0
- package/src/execution/lifecycle/acceptance-loop.ts +272 -0
- package/src/execution/lifecycle/headless-formatter.ts +85 -0
- package/src/execution/lifecycle/index.ts +12 -0
- package/src/execution/lifecycle/parallel-lifecycle.ts +101 -0
- package/src/execution/lifecycle/precheck-runner.ts +140 -0
- package/src/execution/lifecycle/run-cleanup.ts +81 -0
- package/src/execution/lifecycle/run-completion.ts +129 -0
- package/src/execution/lifecycle/run-initialization.ts +141 -0
- package/src/execution/lifecycle/run-lifecycle.ts +312 -0
- package/src/execution/lifecycle/run-setup.ts +204 -0
- package/src/execution/lifecycle/story-hooks.ts +38 -0
- package/src/execution/lifecycle/story-size-prompts.ts +123 -0
- package/src/execution/lock.ts +115 -0
- package/src/execution/parallel-executor.ts +216 -0
- package/src/execution/parallel.ts +400 -0
- package/src/execution/pid-registry.ts +280 -0
- package/src/execution/pipeline-result-handler.ts +388 -0
- package/src/execution/post-verify-rectification.ts +188 -0
- package/src/execution/post-verify.ts +274 -0
- package/src/execution/progress.ts +25 -0
- package/src/execution/prompts.ts +127 -0
- package/src/execution/queue-handler.ts +109 -0
- package/src/execution/rectification.ts +13 -0
- package/src/execution/runner.ts +377 -0
- package/src/execution/sequential-executor.ts +388 -0
- package/src/execution/status-file.ts +264 -0
- package/src/execution/status-writer.ts +139 -0
- package/src/execution/story-context.ts +229 -0
- package/src/execution/test-output-parser.ts +14 -0
- package/src/execution/verification.ts +72 -0
- package/src/hooks/index.ts +2 -0
- package/src/hooks/runner.ts +286 -0
- package/src/hooks/types.ts +67 -0
- package/src/interaction/chain.ts +154 -0
- package/src/interaction/index.ts +60 -0
- package/src/interaction/init.ts +83 -0
- package/src/interaction/plugins/auto.ts +217 -0
- package/src/interaction/plugins/cli.ts +300 -0
- package/src/interaction/plugins/telegram.ts +384 -0
- package/src/interaction/plugins/webhook.ts +258 -0
- package/src/interaction/state.ts +171 -0
- package/src/interaction/triggers.ts +229 -0
- package/src/interaction/types.ts +163 -0
- package/src/logger/formatters.ts +84 -0
- package/src/logger/index.ts +16 -0
- package/src/logger/logger.ts +298 -0
- package/src/logger/types.ts +48 -0
- package/src/logging/formatter.ts +355 -0
- package/src/logging/index.ts +22 -0
- package/src/logging/types.ts +93 -0
- package/src/metrics/aggregator.ts +190 -0
- package/src/metrics/index.ts +14 -0
- package/src/metrics/tracker.ts +200 -0
- package/src/metrics/types.ts +109 -0
- package/src/optimizer/index.ts +62 -0
- package/src/optimizer/noop.optimizer.ts +24 -0
- package/src/optimizer/rule-based.optimizer.ts +248 -0
- package/src/optimizer/types.ts +53 -0
- package/src/pipeline/events.ts +130 -0
- package/src/pipeline/index.ts +19 -0
- package/src/pipeline/runner.ts +161 -0
- package/src/pipeline/stages/acceptance.ts +197 -0
- package/src/pipeline/stages/completion.ts +99 -0
- package/src/pipeline/stages/constitution.ts +63 -0
- package/src/pipeline/stages/context.ts +117 -0
- package/src/pipeline/stages/execution.ts +194 -0
- package/src/pipeline/stages/index.ts +62 -0
- package/src/pipeline/stages/optimizer.ts +74 -0
- package/src/pipeline/stages/prompt.ts +57 -0
- package/src/pipeline/stages/queue-check.ts +103 -0
- package/src/pipeline/stages/review.ts +181 -0
- package/src/pipeline/stages/routing.ts +81 -0
- package/src/pipeline/stages/verify.ts +100 -0
- package/src/pipeline/types.ts +167 -0
- package/src/plugins/index.ts +31 -0
- package/src/plugins/loader.ts +287 -0
- package/src/plugins/registry.ts +168 -0
- package/src/plugins/types.ts +327 -0
- package/src/plugins/validator.ts +352 -0
- package/src/prd/index.ts +172 -0
- package/src/prd/types.ts +202 -0
- package/src/precheck/checks-blockers.ts +391 -0
- package/src/precheck/checks-warnings.ts +142 -0
- package/src/precheck/checks.ts +30 -0
- package/src/precheck/index.ts +247 -0
- package/src/precheck/story-size-gate.ts +144 -0
- package/src/precheck/types.ts +31 -0
- package/src/queue/index.ts +2 -0
- package/src/queue/manager.ts +254 -0
- package/src/queue/types.ts +54 -0
- package/src/review/index.ts +8 -0
- package/src/review/runner.ts +172 -0
- package/src/review/types.ts +66 -0
- package/src/routing/builder.ts +81 -0
- package/src/routing/chain.ts +74 -0
- package/src/routing/index.ts +16 -0
- package/src/routing/loader.ts +58 -0
- package/src/routing/router.ts +303 -0
- package/src/routing/strategies/adaptive.ts +215 -0
- package/src/routing/strategies/index.ts +8 -0
- package/src/routing/strategies/keyword.ts +163 -0
- package/src/routing/strategies/llm-prompts.ts +209 -0
- package/src/routing/strategies/llm.ts +235 -0
- package/src/routing/strategies/manual.ts +50 -0
- package/src/routing/strategy.ts +99 -0
- package/src/tdd/cleanup.ts +111 -0
- package/src/tdd/index.ts +23 -0
- package/src/tdd/isolation.ts +123 -0
- package/src/tdd/orchestrator.ts +383 -0
- package/src/tdd/prompts.ts +270 -0
- package/src/tdd/rectification-gate.ts +183 -0
- package/src/tdd/session-runner.ts +179 -0
- package/src/tdd/types.ts +81 -0
- package/src/tdd/verdict.ts +271 -0
- package/src/tui/App.tsx +265 -0
- package/src/tui/components/AgentPanel.tsx +75 -0
- package/src/tui/components/CostOverlay.tsx +118 -0
- package/src/tui/components/HelpOverlay.tsx +107 -0
- package/src/tui/components/StatusBar.tsx +63 -0
- package/src/tui/components/StoriesPanel.tsx +177 -0
- package/src/tui/hooks/useKeyboard.ts +142 -0
- package/src/tui/hooks/useLayout.ts +137 -0
- package/src/tui/hooks/usePipelineEvents.ts +183 -0
- package/src/tui/hooks/usePty.ts +194 -0
- package/src/tui/index.tsx +38 -0
- package/src/tui/types.ts +76 -0
- package/src/utils/git.ts +83 -0
- package/src/utils/queue-writer.ts +54 -0
- package/src/verification/executor.ts +235 -0
- package/src/verification/gate.ts +207 -0
- package/src/verification/index.ts +12 -0
- package/src/verification/parser.ts +230 -0
- package/src/verification/rectification.ts +108 -0
- package/src/verification/types.ts +113 -0
- package/src/worktree/dispatcher.ts +65 -0
- package/src/worktree/index.ts +2 -0
- package/src/worktree/manager.ts +187 -0
- package/src/worktree/merge.ts +301 -0
- package/src/worktree/types.ts +4 -0
- package/test/TEST_COVERAGE_US001.md +217 -0
- package/test/TEST_COVERAGE_US003.md +84 -0
- package/test/TEST_COVERAGE_US005.md +86 -0
- package/test/US-002-orchestrator.test.ts +246 -0
- package/test/acceptance/cm-003-default-view.test.ts +194 -0
- package/test/execution/pid-registry.test.ts +240 -0
- package/test/execution/post-verify.test.ts +224 -0
- package/test/helpers/timeout.ts +42 -0
- package/test/integration/US-002-TEST-SUMMARY.md +107 -0
- package/test/integration/US-003-TEST-SUMMARY.md +149 -0
- package/test/integration/US-004-TEST-SUMMARY.md +106 -0
- package/test/integration/US-005-TEST-SUMMARY.md +138 -0
- package/test/integration/US-007-TEST-SUMMARY.md +100 -0
- package/test/integration/agent-validation.test.ts +439 -0
- package/test/integration/analyze-integration.test.ts +261 -0
- package/test/integration/analyze-scanner.test.ts +131 -0
- package/test/integration/cli-config-default-edge-cases.test.ts +222 -0
- package/test/integration/cli-config-default-view.test.ts +229 -0
- package/test/integration/cli-config-diff.test.ts +460 -0
- package/test/integration/cli-config.test.ts +736 -0
- package/test/integration/cli-diagnose.test.ts +592 -0
- package/test/integration/cli-logs.test.ts +314 -0
- package/test/integration/cli-plugins.test.ts +678 -0
- package/test/integration/cli-precheck.test.ts +371 -0
- package/test/integration/cli-run-headless.test.ts +173 -0
- package/test/integration/cli.test.ts +75 -0
- package/test/integration/config/merger.test.ts +465 -0
- package/test/integration/config/paths.test.ts +51 -0
- package/test/integration/config-loader.test.ts +265 -0
- package/test/integration/config.test.ts +444 -0
- package/test/integration/context-integration.test.ts +702 -0
- package/test/integration/context-provider-injection.test.ts +506 -0
- package/test/integration/context-verification-integration.test.ts +295 -0
- package/test/integration/e2e.test.ts +896 -0
- package/test/integration/execution.test.ts +625 -0
- package/test/integration/helpers.test.ts +295 -0
- package/test/integration/hooks.test.ts +361 -0
- package/test/integration/interaction-chain-pipeline.test.ts +464 -0
- package/test/integration/isolation.test.ts +143 -0
- package/test/integration/logger.test.ts +461 -0
- package/test/integration/parallel.test.ts +250 -0
- package/test/integration/path-security.test.ts +173 -0
- package/test/integration/pipeline-acceptance.test.ts +302 -0
- package/test/integration/pipeline-events.test.ts +475 -0
- package/test/integration/pipeline.test.ts +658 -0
- package/test/integration/plan.test.ts +157 -0
- package/test/integration/plugin-routing.test.ts +921 -0
- package/test/integration/plugins/config-integration.test.ts +172 -0
- package/test/integration/plugins/config-resolution.test.ts +522 -0
- package/test/integration/plugins/loader.test.ts +641 -0
- package/test/integration/plugins/registry.test.ts +746 -0
- package/test/integration/plugins/validator.test.ts +563 -0
- package/test/integration/prd-pause.test.ts +205 -0
- package/test/integration/prd-resolvers.test.ts +185 -0
- package/test/integration/precheck-integration.test.ts +468 -0
- package/test/integration/precheck.test.ts +805 -0
- package/test/integration/progress.test.ts +34 -0
- package/test/integration/rectification-flow.test.ts +512 -0
- package/test/integration/reporter-lifecycle.test.ts +860 -0
- package/test/integration/review-config-commands.test.ts +319 -0
- package/test/integration/review-config-schema.test.ts +116 -0
- package/test/integration/review-plugin-integration.test.ts +722 -0
- package/test/integration/review.test.ts +149 -0
- package/test/integration/routing-stage-bug-021.test.ts +274 -0
- package/test/integration/routing-stage-greenfield.test.ts +286 -0
- package/test/integration/runner-config-plugins.test.ts +461 -0
- package/test/integration/runner-fixes.test.ts +399 -0
- package/test/integration/runner-plugin-integration.test.ts +543 -0
- package/test/integration/runner.test.ts +1679 -0
- package/test/integration/s5-greenfield-fallback.test.ts +297 -0
- package/test/integration/status-file-integration.test.ts +325 -0
- package/test/integration/status-file.test.ts +379 -0
- package/test/integration/status-writer.test.ts +345 -0
- package/test/integration/story-id-in-events.test.ts +273 -0
- package/test/integration/tdd-cleanup.test.ts +246 -0
- package/test/integration/tdd-orchestrator.test.ts +1762 -0
- package/test/integration/test-scanner.test.ts +403 -0
- package/test/integration/verification-asset-check.test.ts +142 -0
- package/test/integration/verify-stage.test.ts +275 -0
- package/test/integration/worktree/manager.test.ts +218 -0
- package/test/integration/worktree/merge.test.ts +341 -0
- package/test/manual/logging-formatter-demo.ts +158 -0
- package/test/ui/tui-agent-panel.test.tsx +99 -0
- package/test/ui/tui-controls.test.ts +334 -0
- package/test/ui/tui-cost-and-pty.test.ts +189 -0
- package/test/ui/tui-layout.test.ts +378 -0
- package/test/ui/tui-pty-integration.test.tsx +159 -0
- package/test/ui/tui-stories.test.ts +332 -0
- package/test/unit/acceptance.test.ts +186 -0
- package/test/unit/agent-stderr-capture.test.ts +146 -0
- package/test/unit/analyze-classifier.test.ts +215 -0
- package/test/unit/analyze.test.ts +224 -0
- package/test/unit/auto-detect.test.ts +249 -0
- package/test/unit/cli-status.test.ts +417 -0
- package/test/unit/commands/common.test.ts +320 -0
- package/test/unit/commands/logs.test.ts +416 -0
- package/test/unit/commands/unlock.test.ts +319 -0
- package/test/unit/constitution-generators.test.ts +160 -0
- package/test/unit/constitution.test.ts +209 -0
- package/test/unit/context.test.ts +1722 -0
- package/test/unit/cost.test.ts +231 -0
- package/test/unit/crash-recovery.test.ts +308 -0
- package/test/unit/escalation.test.ts +126 -0
- package/test/unit/execution-logging-stderr.test.ts +156 -0
- package/test/unit/execution-stage.test.ts +122 -0
- package/test/unit/fix-generator.test.ts +275 -0
- package/test/unit/formatters.test.ts +469 -0
- package/test/unit/greenfield.test.ts +179 -0
- package/test/unit/helpers.test.ts +317 -0
- package/test/unit/interaction/human-review-trigger.test.ts +164 -0
- package/test/unit/interaction-network-failures.test.ts +389 -0
- package/test/unit/interaction-plugins.test.ts +164 -0
- package/test/unit/isolation.test.ts +134 -0
- package/test/unit/logging/formatter.test.ts +455 -0
- package/test/unit/merge.test.ts +268 -0
- package/test/unit/metrics.test.ts +276 -0
- package/test/unit/optimizer/noop.optimizer.test.ts +125 -0
- package/test/unit/optimizer/rule-based.optimizer.test.ts +358 -0
- package/test/unit/prd-auto-default.test.ts +290 -0
- package/test/unit/prd-failure-category.test.ts +176 -0
- package/test/unit/prd-get-next-story.test.ts +186 -0
- package/test/unit/precheck-checks.test.ts +840 -0
- package/test/unit/precheck-story-size-gate.test.ts +287 -0
- package/test/unit/precheck-types.test.ts +142 -0
- package/test/unit/prompts.test.ts +475 -0
- package/test/unit/queue.test.ts +237 -0
- package/test/unit/rectification.test.ts +284 -0
- package/test/unit/registry.test.ts +287 -0
- package/test/unit/routing.test.ts +937 -0
- package/test/unit/run-lifecycle.test.ts +140 -0
- package/test/unit/storyid-events.test.ts +224 -0
- package/test/unit/tdd-verdict.test.ts +492 -0
- package/test/unit/test-output-parser.test.ts +377 -0
- package/test/unit/verdict.test.ts +324 -0
- package/test/unit/worktree-manager.test.ts +158 -0
- package/tsconfig.json +27 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logging formatter module
|
|
3
|
+
*
|
|
4
|
+
* Provides human-friendly log formatting with multiple verbosity levels
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export {
|
|
8
|
+
formatLogEntry,
|
|
9
|
+
formatRunSummary,
|
|
10
|
+
formatTimestamp,
|
|
11
|
+
formatDuration,
|
|
12
|
+
formatCost,
|
|
13
|
+
type FormattedEntry,
|
|
14
|
+
} from "./formatter.js";
|
|
15
|
+
export {
|
|
16
|
+
EMOJI,
|
|
17
|
+
type VerbosityMode,
|
|
18
|
+
type FormatterOptions,
|
|
19
|
+
type RunSummary,
|
|
20
|
+
type StoryStartData,
|
|
21
|
+
type StageResultData,
|
|
22
|
+
} from "./types.js";
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logging formatter types for human-readable output
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { LogEntry } from "../logger/types.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Verbosity mode for log formatting
|
|
9
|
+
*/
|
|
10
|
+
export type VerbosityMode = "quiet" | "normal" | "verbose" | "json";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Emoji indicators for different log events
|
|
14
|
+
*/
|
|
15
|
+
export const EMOJI = {
|
|
16
|
+
// Status indicators
|
|
17
|
+
success: "✓",
|
|
18
|
+
failure: "✗",
|
|
19
|
+
warning: "⚠",
|
|
20
|
+
info: "ℹ",
|
|
21
|
+
skip: "⊘",
|
|
22
|
+
|
|
23
|
+
// Stage/process indicators
|
|
24
|
+
routing: "🎯",
|
|
25
|
+
execution: "⚙️",
|
|
26
|
+
review: "🔍",
|
|
27
|
+
tdd: "🔄",
|
|
28
|
+
agent: "🤖",
|
|
29
|
+
cost: "💰",
|
|
30
|
+
duration: "⏱️",
|
|
31
|
+
|
|
32
|
+
// Story progress
|
|
33
|
+
storyStart: "▶",
|
|
34
|
+
storyComplete: "●",
|
|
35
|
+
retry: "↻",
|
|
36
|
+
} as const;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Run summary statistics
|
|
40
|
+
*/
|
|
41
|
+
export interface RunSummary {
|
|
42
|
+
/** Total stories in run */
|
|
43
|
+
total: number;
|
|
44
|
+
/** Stories that passed */
|
|
45
|
+
passed: number;
|
|
46
|
+
/** Stories that failed */
|
|
47
|
+
failed: number;
|
|
48
|
+
/** Stories that were skipped */
|
|
49
|
+
skipped: number;
|
|
50
|
+
/** Total run duration in milliseconds */
|
|
51
|
+
durationMs: number;
|
|
52
|
+
/** Total cost in dollars */
|
|
53
|
+
totalCost: number;
|
|
54
|
+
/** Run start timestamp */
|
|
55
|
+
startedAt: string;
|
|
56
|
+
/** Run completion timestamp */
|
|
57
|
+
completedAt?: string;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Story start event data
|
|
62
|
+
*/
|
|
63
|
+
export interface StoryStartData {
|
|
64
|
+
storyId: string;
|
|
65
|
+
title: string;
|
|
66
|
+
complexity?: string;
|
|
67
|
+
modelTier?: string;
|
|
68
|
+
attempt?: number;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Stage result event data
|
|
73
|
+
*/
|
|
74
|
+
export interface StageResultData {
|
|
75
|
+
stage: string;
|
|
76
|
+
success: boolean;
|
|
77
|
+
action?: "continue" | "skip" | "fail" | "escalate" | "pause";
|
|
78
|
+
reason?: string;
|
|
79
|
+
cost?: number;
|
|
80
|
+
durationMs?: number;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Formatter options
|
|
85
|
+
*/
|
|
86
|
+
export interface FormatterOptions {
|
|
87
|
+
/** Verbosity mode */
|
|
88
|
+
mode: VerbosityMode;
|
|
89
|
+
/** Whether to use color/emoji (default: true) */
|
|
90
|
+
useColor?: boolean;
|
|
91
|
+
/** Timezone for timestamp formatting (default: system timezone) */
|
|
92
|
+
timezone?: string;
|
|
93
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Aggregator
|
|
3
|
+
*
|
|
4
|
+
* Calculates aggregate metrics across all runs.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { AggregateMetrics, RunMetrics, StoryMetrics } from "./types";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Calculate aggregate metrics across all runs.
|
|
11
|
+
*
|
|
12
|
+
* Analyzes historical data to compute:
|
|
13
|
+
* - Overall success rates
|
|
14
|
+
* - Per-model efficiency
|
|
15
|
+
* - Complexity prediction accuracy
|
|
16
|
+
* - Cost statistics
|
|
17
|
+
*
|
|
18
|
+
* @param runs - Array of all historical run metrics
|
|
19
|
+
* @returns Aggregate metrics
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```ts
|
|
23
|
+
* const runs = await loadRunMetrics(workdir);
|
|
24
|
+
* const aggregate = calculateAggregateMetrics(runs);
|
|
25
|
+
* console.log(`First pass rate: ${(aggregate.firstPassRate * 100).toFixed(1)}%`);
|
|
26
|
+
* console.log(`Avg cost per story: $${aggregate.avgCostPerStory.toFixed(4)}`);
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export function calculateAggregateMetrics(runs: RunMetrics[]): AggregateMetrics {
|
|
30
|
+
if (runs.length === 0) {
|
|
31
|
+
return {
|
|
32
|
+
totalRuns: 0,
|
|
33
|
+
totalCost: 0,
|
|
34
|
+
totalStories: 0,
|
|
35
|
+
firstPassRate: 0,
|
|
36
|
+
escalationRate: 0,
|
|
37
|
+
avgCostPerStory: 0,
|
|
38
|
+
avgCostPerFeature: 0,
|
|
39
|
+
modelEfficiency: {},
|
|
40
|
+
complexityAccuracy: {},
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Flatten all story metrics
|
|
45
|
+
const allStories: StoryMetrics[] = runs.flatMap((run) => run.stories);
|
|
46
|
+
|
|
47
|
+
const totalRuns = runs.length;
|
|
48
|
+
const totalCost = runs.reduce((sum, run) => sum + run.totalCost, 0);
|
|
49
|
+
const totalStories = allStories.length;
|
|
50
|
+
|
|
51
|
+
// Calculate first pass rate
|
|
52
|
+
const firstPassSuccesses = allStories.filter((s) => s.firstPassSuccess).length;
|
|
53
|
+
const firstPassRate = totalStories > 0 ? firstPassSuccesses / totalStories : 0;
|
|
54
|
+
|
|
55
|
+
// Calculate escalation rate (stories that needed more than 1 attempt)
|
|
56
|
+
const escalatedStories = allStories.filter((s) => s.attempts > 1).length;
|
|
57
|
+
const escalationRate = totalStories > 0 ? escalatedStories / totalStories : 0;
|
|
58
|
+
|
|
59
|
+
// Average costs
|
|
60
|
+
const avgCostPerStory = totalStories > 0 ? totalCost / totalStories : 0;
|
|
61
|
+
const avgCostPerFeature = totalRuns > 0 ? totalCost / totalRuns : 0;
|
|
62
|
+
|
|
63
|
+
// Per-model efficiency
|
|
64
|
+
const modelStats = new Map<
|
|
65
|
+
string,
|
|
66
|
+
{
|
|
67
|
+
attempts: number;
|
|
68
|
+
successes: number;
|
|
69
|
+
totalCost: number;
|
|
70
|
+
}
|
|
71
|
+
>();
|
|
72
|
+
|
|
73
|
+
for (const story of allStories) {
|
|
74
|
+
const modelKey = story.modelUsed;
|
|
75
|
+
const existing = modelStats.get(modelKey) || {
|
|
76
|
+
attempts: 0,
|
|
77
|
+
successes: 0,
|
|
78
|
+
totalCost: 0,
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
modelStats.set(modelKey, {
|
|
82
|
+
attempts: existing.attempts + story.attempts,
|
|
83
|
+
successes: existing.successes + (story.success ? 1 : 0),
|
|
84
|
+
totalCost: existing.totalCost + story.cost,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const modelEfficiency: AggregateMetrics["modelEfficiency"] = {};
|
|
89
|
+
for (const [modelKey, stats] of modelStats) {
|
|
90
|
+
const passRate = stats.attempts > 0 ? stats.successes / stats.attempts : 0;
|
|
91
|
+
const avgCost = stats.successes > 0 ? stats.totalCost / stats.successes : 0;
|
|
92
|
+
|
|
93
|
+
modelEfficiency[modelKey] = {
|
|
94
|
+
attempts: stats.attempts,
|
|
95
|
+
successes: stats.successes,
|
|
96
|
+
passRate,
|
|
97
|
+
avgCost,
|
|
98
|
+
totalCost: stats.totalCost,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Complexity prediction accuracy
|
|
103
|
+
const complexityStats = new Map<
|
|
104
|
+
string,
|
|
105
|
+
{
|
|
106
|
+
predicted: number;
|
|
107
|
+
tierCounts: Map<string, number>;
|
|
108
|
+
mismatches: number;
|
|
109
|
+
}
|
|
110
|
+
>();
|
|
111
|
+
|
|
112
|
+
for (const story of allStories) {
|
|
113
|
+
const complexity = story.complexity;
|
|
114
|
+
const existing = complexityStats.get(complexity) || {
|
|
115
|
+
predicted: 0,
|
|
116
|
+
tierCounts: new Map<string, number>(),
|
|
117
|
+
mismatches: 0,
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
existing.predicted += 1;
|
|
121
|
+
|
|
122
|
+
// Track which tier was actually used
|
|
123
|
+
const finalTier = story.finalTier;
|
|
124
|
+
existing.tierCounts.set(finalTier, (existing.tierCounts.get(finalTier) || 0) + 1);
|
|
125
|
+
|
|
126
|
+
// Count as mismatch if escalated (initial tier != final tier)
|
|
127
|
+
if (story.modelTier !== story.finalTier) {
|
|
128
|
+
existing.mismatches += 1;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
complexityStats.set(complexity, existing);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const complexityAccuracy: AggregateMetrics["complexityAccuracy"] = {};
|
|
135
|
+
for (const [complexity, stats] of complexityStats) {
|
|
136
|
+
// Find most common final tier
|
|
137
|
+
let maxCount = 0;
|
|
138
|
+
let mostCommonTier = "unknown";
|
|
139
|
+
for (const [tier, count] of stats.tierCounts) {
|
|
140
|
+
if (count > maxCount) {
|
|
141
|
+
maxCount = count;
|
|
142
|
+
mostCommonTier = tier;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const mismatchRate = stats.predicted > 0 ? stats.mismatches / stats.predicted : 0;
|
|
147
|
+
|
|
148
|
+
complexityAccuracy[complexity] = {
|
|
149
|
+
predicted: stats.predicted,
|
|
150
|
+
actualTierUsed: mostCommonTier,
|
|
151
|
+
mismatchRate,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
totalRuns,
|
|
157
|
+
totalCost,
|
|
158
|
+
totalStories,
|
|
159
|
+
firstPassRate,
|
|
160
|
+
escalationRate,
|
|
161
|
+
avgCostPerStory,
|
|
162
|
+
avgCostPerFeature,
|
|
163
|
+
modelEfficiency,
|
|
164
|
+
complexityAccuracy,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Get the last run metrics from the list.
|
|
170
|
+
*
|
|
171
|
+
* @param runs - Array of all run metrics
|
|
172
|
+
* @returns Most recent run, or null if no runs
|
|
173
|
+
*
|
|
174
|
+
* @example
|
|
175
|
+
* ```ts
|
|
176
|
+
* const runs = await loadRunMetrics(workdir);
|
|
177
|
+
* const lastRun = getLastRun(runs);
|
|
178
|
+
* if (lastRun) {
|
|
179
|
+
* console.log(`Last run: ${lastRun.feature} (${lastRun.storiesCompleted}/${lastRun.totalStories} stories)`);
|
|
180
|
+
* }
|
|
181
|
+
* ```
|
|
182
|
+
*/
|
|
183
|
+
export function getLastRun(runs: RunMetrics[]): RunMetrics | null {
|
|
184
|
+
if (runs.length === 0) {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Runs are appended chronologically, so last element is most recent
|
|
189
|
+
return runs[runs.length - 1];
|
|
190
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Tracking
|
|
3
|
+
*
|
|
4
|
+
* Per-story and per-run cost tracking for data-driven routing optimization.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export type { StoryMetrics, RunMetrics, AggregateMetrics } from "./types";
|
|
8
|
+
export {
|
|
9
|
+
collectStoryMetrics,
|
|
10
|
+
collectBatchMetrics,
|
|
11
|
+
saveRunMetrics,
|
|
12
|
+
loadRunMetrics,
|
|
13
|
+
} from "./tracker";
|
|
14
|
+
export { calculateAggregateMetrics, getLastRun } from "./aggregator";
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Tracker
|
|
3
|
+
*
|
|
4
|
+
* Collects and persists per-story and per-run metrics.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
8
|
+
import path from "node:path";
|
|
9
|
+
import { resolveModel } from "../config/schema";
|
|
10
|
+
import { getLogger } from "../logger";
|
|
11
|
+
import type { PipelineContext } from "../pipeline/types";
|
|
12
|
+
import type { RunMetrics, StoryMetrics } from "./types";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Collect metrics for a single story execution.
|
|
16
|
+
*
|
|
17
|
+
* Extracts timing, cost, model usage, and escalation data from the pipeline context.
|
|
18
|
+
*
|
|
19
|
+
* @param ctx - Pipeline context with execution results
|
|
20
|
+
* @param storyStartTime - Story start timestamp (ISO string)
|
|
21
|
+
* @returns Story metrics object
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```ts
|
|
25
|
+
* const metrics = collectStoryMetrics(ctx, "2026-02-17T10:30:00.000Z");
|
|
26
|
+
* // {
|
|
27
|
+
* // storyId: "US-001",
|
|
28
|
+
* // complexity: "medium",
|
|
29
|
+
* // modelTier: "balanced",
|
|
30
|
+
* // modelUsed: "claude-sonnet-4.5",
|
|
31
|
+
* // attempts: 1,
|
|
32
|
+
* // finalTier: "balanced",
|
|
33
|
+
* // success: true,
|
|
34
|
+
* // cost: 0.0234,
|
|
35
|
+
* // durationMs: 45000,
|
|
36
|
+
* // firstPassSuccess: true,
|
|
37
|
+
* // ...
|
|
38
|
+
* // }
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export function collectStoryMetrics(ctx: PipelineContext, storyStartTime: string): StoryMetrics {
|
|
42
|
+
const story = ctx.story;
|
|
43
|
+
const routing = ctx.routing;
|
|
44
|
+
const agentResult = ctx.agentResult;
|
|
45
|
+
|
|
46
|
+
// Calculate attempts (initial + escalations)
|
|
47
|
+
const escalationCount = story.escalations?.length || 0;
|
|
48
|
+
const attempts = Math.max(1, story.attempts || 1);
|
|
49
|
+
|
|
50
|
+
// Determine final tier (from last escalation or initial routing)
|
|
51
|
+
const finalTier = escalationCount > 0 ? story.escalations[escalationCount - 1].toTier : routing.modelTier;
|
|
52
|
+
|
|
53
|
+
// First pass success = succeeded with no escalations
|
|
54
|
+
const firstPassSuccess = agentResult?.success === true && escalationCount === 0;
|
|
55
|
+
|
|
56
|
+
// Extract model name from config
|
|
57
|
+
const modelEntry = ctx.config.models[routing.modelTier];
|
|
58
|
+
const modelDef = modelEntry ? resolveModel(modelEntry) : null;
|
|
59
|
+
const modelUsed = modelDef?.model || routing.modelTier;
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
storyId: story.id,
|
|
63
|
+
complexity: routing.complexity,
|
|
64
|
+
modelTier: routing.modelTier,
|
|
65
|
+
modelUsed,
|
|
66
|
+
attempts,
|
|
67
|
+
finalTier,
|
|
68
|
+
success: agentResult?.success || false,
|
|
69
|
+
cost: agentResult?.estimatedCost || 0,
|
|
70
|
+
durationMs: agentResult?.durationMs || 0,
|
|
71
|
+
firstPassSuccess,
|
|
72
|
+
startedAt: storyStartTime,
|
|
73
|
+
completedAt: new Date().toISOString(),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Collect metrics for a batch of stories.
|
|
79
|
+
*
|
|
80
|
+
* Creates individual story metrics for each story in the batch,
|
|
81
|
+
* distributing the total cost and duration proportionally.
|
|
82
|
+
*
|
|
83
|
+
* @param ctx - Pipeline context with batch execution results
|
|
84
|
+
* @param storyStartTime - Batch start timestamp (ISO string)
|
|
85
|
+
* @returns Array of story metrics (one per story in batch)
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```ts
|
|
89
|
+
* const batchMetrics = collectBatchMetrics(ctx, "2026-02-17T10:30:00.000Z");
|
|
90
|
+
* // [
|
|
91
|
+
* // { storyId: "US-001", cost: 0.0078, ... },
|
|
92
|
+
* // { storyId: "US-002", cost: 0.0078, ... },
|
|
93
|
+
* // { storyId: "US-003", cost: 0.0078, ... },
|
|
94
|
+
* // ]
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export function collectBatchMetrics(ctx: PipelineContext, storyStartTime: string): StoryMetrics[] {
|
|
98
|
+
const stories = ctx.stories;
|
|
99
|
+
const routing = ctx.routing;
|
|
100
|
+
const agentResult = ctx.agentResult;
|
|
101
|
+
|
|
102
|
+
const totalCost = agentResult?.estimatedCost || 0;
|
|
103
|
+
const totalDuration = agentResult?.durationMs || 0;
|
|
104
|
+
const costPerStory = totalCost / stories.length;
|
|
105
|
+
const durationPerStory = totalDuration / stories.length;
|
|
106
|
+
|
|
107
|
+
const modelEntry = ctx.config.models[routing.modelTier];
|
|
108
|
+
const modelDef = modelEntry ? resolveModel(modelEntry) : null;
|
|
109
|
+
const modelUsed = modelDef?.model || routing.modelTier;
|
|
110
|
+
|
|
111
|
+
return stories.map((story) => ({
|
|
112
|
+
storyId: story.id,
|
|
113
|
+
complexity: routing.complexity,
|
|
114
|
+
modelTier: routing.modelTier,
|
|
115
|
+
modelUsed,
|
|
116
|
+
attempts: 1, // batch stories don't escalate individually
|
|
117
|
+
finalTier: routing.modelTier,
|
|
118
|
+
success: true, // if batch succeeded, all stories succeeded
|
|
119
|
+
cost: costPerStory,
|
|
120
|
+
durationMs: durationPerStory,
|
|
121
|
+
firstPassSuccess: true, // batch = first pass success
|
|
122
|
+
startedAt: storyStartTime,
|
|
123
|
+
completedAt: new Date().toISOString(),
|
|
124
|
+
}));
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Save run metrics to nax/metrics.json.
|
|
129
|
+
*
|
|
130
|
+
* Appends the run metrics to the existing metrics file (or creates it if missing).
|
|
131
|
+
* Each run is a separate entry in the JSON array.
|
|
132
|
+
*
|
|
133
|
+
* @param workdir - Project root directory
|
|
134
|
+
* @param runMetrics - Run metrics to persist
|
|
135
|
+
*
|
|
136
|
+
* @example
|
|
137
|
+
* ```ts
|
|
138
|
+
* await saveRunMetrics("/home/user/project", {
|
|
139
|
+
* runId: "run-20260217-103045",
|
|
140
|
+
* feature: "auth-system",
|
|
141
|
+
* totalCost: 0.1234,
|
|
142
|
+
* stories: [...],
|
|
143
|
+
* ...
|
|
144
|
+
* });
|
|
145
|
+
* ```
|
|
146
|
+
*/
|
|
147
|
+
export async function saveRunMetrics(workdir: string, runMetrics: RunMetrics): Promise<void> {
|
|
148
|
+
const metricsPath = path.join(workdir, "nax", "metrics.json");
|
|
149
|
+
|
|
150
|
+
let allMetrics: RunMetrics[] = [];
|
|
151
|
+
|
|
152
|
+
// Load existing metrics if file exists
|
|
153
|
+
const logger = getLogger();
|
|
154
|
+
if (existsSync(metricsPath)) {
|
|
155
|
+
try {
|
|
156
|
+
const file = Bun.file(metricsPath);
|
|
157
|
+
const content = await file.json();
|
|
158
|
+
allMetrics = Array.isArray(content) ? content : [];
|
|
159
|
+
} catch (err) {
|
|
160
|
+
logger.warn("metrics", "Could not parse metrics file, starting fresh", { metricsPath });
|
|
161
|
+
allMetrics = [];
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Append new run
|
|
166
|
+
allMetrics.push(runMetrics);
|
|
167
|
+
|
|
168
|
+
// Write back
|
|
169
|
+
await Bun.write(metricsPath, JSON.stringify(allMetrics, null, 2));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Load all run metrics from nax/metrics.json.
|
|
174
|
+
*
|
|
175
|
+
* @param workdir - Project root directory
|
|
176
|
+
* @returns Array of run metrics, or empty array if file doesn't exist
|
|
177
|
+
*
|
|
178
|
+
* @example
|
|
179
|
+
* ```ts
|
|
180
|
+
* const runs = await loadRunMetrics("/home/user/project");
|
|
181
|
+
* console.log(`Total runs: ${runs.length}`);
|
|
182
|
+
* ```
|
|
183
|
+
*/
|
|
184
|
+
export async function loadRunMetrics(workdir: string): Promise<RunMetrics[]> {
|
|
185
|
+
const metricsPath = path.join(workdir, "nax", "metrics.json");
|
|
186
|
+
|
|
187
|
+
if (!existsSync(metricsPath)) {
|
|
188
|
+
return [];
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
const file = Bun.file(metricsPath);
|
|
193
|
+
const content = await file.json();
|
|
194
|
+
return Array.isArray(content) ? content : [];
|
|
195
|
+
} catch (err) {
|
|
196
|
+
const logger = getLogger();
|
|
197
|
+
logger.warn("metrics", "Could not parse metrics file", { metricsPath });
|
|
198
|
+
return [];
|
|
199
|
+
}
|
|
200
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Tracking Types
|
|
3
|
+
*
|
|
4
|
+
* Structured cost and performance metrics for tracking agent execution.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Per-story execution metrics
|
|
9
|
+
*/
|
|
10
|
+
export interface StoryMetrics {
|
|
11
|
+
/** Story ID */
|
|
12
|
+
storyId: string;
|
|
13
|
+
/** Classified complexity */
|
|
14
|
+
complexity: string;
|
|
15
|
+
/** Initial model tier */
|
|
16
|
+
modelTier: string;
|
|
17
|
+
/** Actual model used (e.g., "claude-sonnet-4.5") */
|
|
18
|
+
modelUsed: string;
|
|
19
|
+
/** Number of attempts (includes escalations) */
|
|
20
|
+
attempts: number;
|
|
21
|
+
/** Final tier that succeeded */
|
|
22
|
+
finalTier: string;
|
|
23
|
+
/** Whether the story succeeded */
|
|
24
|
+
success: boolean;
|
|
25
|
+
/** Total cost for this story (including all attempts) */
|
|
26
|
+
cost: number;
|
|
27
|
+
/** Total duration in milliseconds */
|
|
28
|
+
durationMs: number;
|
|
29
|
+
/** Whether it passed on the first attempt */
|
|
30
|
+
firstPassSuccess: boolean;
|
|
31
|
+
/** Timestamp when started */
|
|
32
|
+
startedAt: string;
|
|
33
|
+
/** Timestamp when completed */
|
|
34
|
+
completedAt: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Per-run execution metrics
|
|
39
|
+
*/
|
|
40
|
+
export interface RunMetrics {
|
|
41
|
+
/** Unique run ID */
|
|
42
|
+
runId: string;
|
|
43
|
+
/** Feature name */
|
|
44
|
+
feature: string;
|
|
45
|
+
/** Run start timestamp */
|
|
46
|
+
startedAt: string;
|
|
47
|
+
/** Run completion timestamp */
|
|
48
|
+
completedAt: string;
|
|
49
|
+
/** Total cost for the run */
|
|
50
|
+
totalCost: number;
|
|
51
|
+
/** Total number of stories in the run */
|
|
52
|
+
totalStories: number;
|
|
53
|
+
/** Number of stories completed successfully */
|
|
54
|
+
storiesCompleted: number;
|
|
55
|
+
/** Number of stories that failed */
|
|
56
|
+
storiesFailed: number;
|
|
57
|
+
/** Total duration in milliseconds */
|
|
58
|
+
totalDurationMs: number;
|
|
59
|
+
/** Per-story metrics */
|
|
60
|
+
stories: StoryMetrics[];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Aggregate metrics across all runs
|
|
65
|
+
*/
|
|
66
|
+
export interface AggregateMetrics {
|
|
67
|
+
/** Total number of runs */
|
|
68
|
+
totalRuns: number;
|
|
69
|
+
/** Total cost across all runs */
|
|
70
|
+
totalCost: number;
|
|
71
|
+
/** Total stories across all runs */
|
|
72
|
+
totalStories: number;
|
|
73
|
+
/** Percentage of stories passing on first attempt */
|
|
74
|
+
firstPassRate: number;
|
|
75
|
+
/** Percentage of stories needing escalation */
|
|
76
|
+
escalationRate: number;
|
|
77
|
+
/** Average cost per story */
|
|
78
|
+
avgCostPerStory: number;
|
|
79
|
+
/** Average cost per feature run */
|
|
80
|
+
avgCostPerFeature: number;
|
|
81
|
+
/** Per-model efficiency metrics */
|
|
82
|
+
modelEfficiency: Record<
|
|
83
|
+
string,
|
|
84
|
+
{
|
|
85
|
+
/** Total attempts with this model */
|
|
86
|
+
attempts: number;
|
|
87
|
+
/** Successful attempts */
|
|
88
|
+
successes: number;
|
|
89
|
+
/** Success rate (0-1) */
|
|
90
|
+
passRate: number;
|
|
91
|
+
/** Average cost per story */
|
|
92
|
+
avgCost: number;
|
|
93
|
+
/** Total cost for this model */
|
|
94
|
+
totalCost: number;
|
|
95
|
+
}
|
|
96
|
+
>;
|
|
97
|
+
/** Complexity prediction accuracy */
|
|
98
|
+
complexityAccuracy: Record<
|
|
99
|
+
string,
|
|
100
|
+
{
|
|
101
|
+
/** Number of stories predicted at this complexity */
|
|
102
|
+
predicted: number;
|
|
103
|
+
/** Most common final tier used */
|
|
104
|
+
actualTierUsed: string;
|
|
105
|
+
/** Rate at which prediction didn't match actual tier needed */
|
|
106
|
+
mismatchRate: number;
|
|
107
|
+
}
|
|
108
|
+
>;
|
|
109
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Optimizer
|
|
3
|
+
*
|
|
4
|
+
* Exports optimizer types, implementations, and factory function.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export type {
|
|
8
|
+
IPromptOptimizer,
|
|
9
|
+
PromptOptimizerInput,
|
|
10
|
+
PromptOptimizerResult,
|
|
11
|
+
} from "./types.js";
|
|
12
|
+
export { estimateTokens } from "./types.js";
|
|
13
|
+
export { NoopOptimizer } from "./noop.optimizer.js";
|
|
14
|
+
export { RuleBasedOptimizer } from "./rule-based.optimizer.js";
|
|
15
|
+
|
|
16
|
+
import type { NaxConfig } from "../config/schema.js";
|
|
17
|
+
import type { PluginRegistry } from "../plugins/registry.js";
|
|
18
|
+
import { NoopOptimizer } from "./noop.optimizer.js";
|
|
19
|
+
import { RuleBasedOptimizer } from "./rule-based.optimizer.js";
|
|
20
|
+
import type { IPromptOptimizer } from "./types.js";
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Resolve the prompt optimizer to use for this run.
|
|
24
|
+
*
|
|
25
|
+
* Resolution order:
|
|
26
|
+
* 1. Plugin-provided optimizer (if any plugins provide "optimizer")
|
|
27
|
+
* 2. Built-in strategy from config (rule-based, noop)
|
|
28
|
+
* 3. Fallback to NoopOptimizer
|
|
29
|
+
*
|
|
30
|
+
* @param config - Nax configuration
|
|
31
|
+
* @param pluginRegistry - Plugin registry (optional, for plugin-provided optimizers)
|
|
32
|
+
* @returns Resolved optimizer instance
|
|
33
|
+
*/
|
|
34
|
+
export function resolveOptimizer(config: NaxConfig, pluginRegistry?: PluginRegistry): IPromptOptimizer {
|
|
35
|
+
// Check if optimizer is disabled
|
|
36
|
+
if (!config.optimizer?.enabled) {
|
|
37
|
+
return new NoopOptimizer();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// 1. Check plugin registry first
|
|
41
|
+
if (pluginRegistry) {
|
|
42
|
+
const pluginOptimizers = pluginRegistry.getOptimizers();
|
|
43
|
+
if (pluginOptimizers.length > 0) {
|
|
44
|
+
// Use first plugin optimizer (plugin optimizers use the same interface)
|
|
45
|
+
return pluginOptimizers[0];
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// 2. Use built-in strategy from config
|
|
50
|
+
const strategy = config.optimizer.strategy ?? "noop";
|
|
51
|
+
|
|
52
|
+
switch (strategy) {
|
|
53
|
+
case "rule-based":
|
|
54
|
+
return new RuleBasedOptimizer();
|
|
55
|
+
case "noop":
|
|
56
|
+
return new NoopOptimizer();
|
|
57
|
+
default:
|
|
58
|
+
// Unknown strategy, fallback to noop
|
|
59
|
+
console.warn(`[nax] Unknown optimizer strategy '${strategy}', using noop`);
|
|
60
|
+
return new NoopOptimizer();
|
|
61
|
+
}
|
|
62
|
+
}
|