voratiq 0.1.0-beta.2 → 0.1.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -29
- package/dist/agents/launch/chat.d.ts +23 -0
- package/dist/agents/launch/chat.js +44 -0
- package/dist/agents/launch/environment.d.ts +8 -0
- package/dist/{commands/run/agents/workspace-prep.js → agents/launch/environment.js} +5 -27
- package/dist/agents/launch/prompt.d.ts +6 -0
- package/dist/agents/launch/prompt.js +12 -0
- package/dist/agents/launch/provider-state.d.ts +39 -0
- package/dist/agents/launch/provider-state.js +103 -0
- package/dist/agents/runtime/auth.d.ts +27 -0
- package/dist/agents/runtime/auth.js +72 -0
- package/dist/agents/runtime/chat.d.ts +5 -0
- package/dist/agents/runtime/chat.js +7 -0
- package/dist/agents/runtime/errors.d.ts +27 -0
- package/dist/agents/runtime/errors.js +51 -0
- package/dist/{commands/run/agents → agents/runtime}/failures.d.ts +0 -1
- package/dist/agents/runtime/failures.js +136 -0
- package/dist/agents/runtime/harness.d.ts +2 -0
- package/dist/agents/runtime/harness.js +119 -0
- package/dist/{commands/run/agents/sandbox-launcher.d.ts → agents/runtime/launcher.d.ts} +18 -6
- package/dist/{commands/run/agents/sandbox-launcher.js → agents/runtime/launcher.js} +17 -39
- package/dist/{commands/run/agents/workspace-prep.d.ts → agents/runtime/manifest.d.ts} +6 -6
- package/dist/agents/runtime/manifest.js +34 -0
- package/dist/agents/runtime/policy.d.ts +32 -0
- package/dist/agents/runtime/policy.js +240 -0
- package/dist/agents/runtime/registry.d.ts +4 -0
- package/dist/agents/runtime/registry.js +54 -0
- package/dist/{commands/run → agents/runtime}/sandbox.d.ts +8 -2
- package/dist/{commands/run → agents/runtime}/sandbox.js +28 -67
- package/dist/agents/runtime/shim/run-agent-shim.d.ts +1 -0
- package/dist/agents/runtime/shim/run-agent-shim.js +276 -0
- package/dist/agents/runtime/types.d.ts +91 -0
- package/dist/{commands/run/agents → agents/runtime}/watchdog.d.ts +4 -3
- package/dist/{commands/run/agents → agents/runtime}/watchdog.js +155 -26
- package/dist/auth/providers/codex.js +7 -2
- package/dist/auth/providers/gemini.js +14 -6
- package/dist/auth/providers/types.d.ts +1 -0
- package/dist/auth/providers/utils.d.ts +0 -1
- package/dist/auth/providers/utils.js +1 -49
- package/dist/bin.js +369 -71
- package/dist/cli/apply.d.ts +4 -0
- package/dist/cli/apply.js +28 -9
- package/dist/cli/auto.d.ts +32 -0
- package/dist/cli/auto.js +232 -0
- package/dist/cli/contract.d.ts +328 -0
- package/dist/cli/contract.js +480 -0
- package/dist/cli/errors.d.ts +3 -0
- package/dist/cli/errors.js +21 -3
- package/dist/cli/init.d.ts +5 -0
- package/dist/cli/init.js +34 -6
- package/dist/cli/list.d.ts +6 -4
- package/dist/cli/list.js +39 -16
- package/dist/cli/mcp.d.ts +2 -0
- package/dist/cli/mcp.js +16 -0
- package/dist/cli/message.d.ts +28 -0
- package/dist/cli/message.js +147 -0
- package/dist/cli/operator-envelope.d.ts +180 -0
- package/dist/cli/operator-envelope.js +425 -0
- package/dist/cli/output.d.ts +15 -1
- package/dist/cli/output.js +153 -5
- package/dist/cli/prune.d.ts +7 -3
- package/dist/cli/prune.js +57 -12
- package/dist/cli/reduce.d.ts +29 -0
- package/dist/cli/reduce.js +211 -0
- package/dist/cli/root-launcher.d.ts +4 -0
- package/dist/cli/root-launcher.js +15 -0
- package/dist/cli/run.d.ts +27 -1
- package/dist/cli/run.js +108 -16
- package/dist/cli/spec.d.ts +31 -0
- package/dist/cli/spec.js +180 -0
- package/dist/cli/verify.d.ts +35 -0
- package/dist/cli/verify.js +297 -0
- package/dist/commands/apply/command.d.ts +2 -0
- package/dist/commands/apply/command.js +145 -6
- package/dist/commands/apply/errors.d.ts +43 -4
- package/dist/commands/apply/errors.js +100 -22
- package/dist/commands/apply/types.d.ts +2 -1
- package/dist/commands/auto/command.d.ts +145 -0
- package/dist/commands/auto/command.js +433 -0
- package/dist/commands/auto/errors.d.ts +19 -0
- package/dist/commands/auto/errors.js +19 -0
- package/dist/commands/auto/validation.d.ts +14 -0
- package/dist/commands/auto/validation.js +90 -0
- package/dist/commands/fetch.d.ts +2 -2
- package/dist/commands/fetch.js +4 -4
- package/dist/commands/init/agents.d.ts +2 -1
- package/dist/commands/init/agents.js +66 -63
- package/dist/commands/init/command.js +300 -16
- package/dist/commands/init/types.d.ts +18 -7
- package/dist/commands/interactive/lifecycle.d.ts +15 -0
- package/dist/commands/interactive/lifecycle.js +141 -0
- package/dist/commands/list/command.d.ts +10 -3
- package/dist/commands/list/command.js +597 -40
- package/dist/commands/message/command.d.ts +23 -0
- package/dist/commands/message/command.js +215 -0
- package/dist/commands/message/errors.d.ts +9 -0
- package/dist/commands/message/errors.js +20 -0
- package/dist/commands/message/lifecycle.d.ts +14 -0
- package/dist/commands/message/lifecycle.js +128 -0
- package/dist/commands/prune/command.d.ts +2 -1
- package/dist/commands/prune/command.js +61 -10
- package/dist/commands/prune/errors.d.ts +1 -1
- package/dist/commands/prune/errors.js +5 -5
- package/dist/commands/prune/types.d.ts +21 -0
- package/dist/commands/reduce/command.d.ts +26 -0
- package/dist/commands/reduce/command.js +145 -0
- package/dist/commands/reduce/errors.d.ts +17 -0
- package/dist/commands/reduce/errors.js +32 -0
- package/dist/commands/reduce/targets.d.ts +11 -0
- package/dist/commands/reduce/targets.js +271 -0
- package/dist/commands/root-launcher/command.d.ts +31 -0
- package/dist/commands/root-launcher/command.js +233 -0
- package/dist/commands/run/command.d.ts +6 -1
- package/dist/commands/run/command.js +70 -40
- package/dist/commands/run/lifecycle.d.ts +7 -5
- package/dist/commands/run/lifecycle.js +45 -23
- package/dist/commands/run/record-init.d.ts +4 -1
- package/dist/commands/run/record-init.js +5 -2
- package/dist/commands/run/shim/run-agent-shim.d.ts +2 -1
- package/dist/commands/run/shim/run-agent-shim.js +4 -219
- package/dist/commands/run/validation.d.ts +2 -3
- package/dist/commands/run/validation.js +54 -25
- package/dist/commands/shared/max-parallel.d.ts +5 -0
- package/dist/commands/shared/max-parallel.js +15 -0
- package/dist/commands/shared/preview.d.ts +10 -0
- package/dist/commands/shared/preview.js +60 -0
- package/dist/commands/shared/resolve-reduction-competitors.d.ts +15 -0
- package/dist/commands/shared/resolve-reduction-competitors.js +13 -0
- package/dist/commands/shared/resolve-stage-competitors.d.ts +19 -0
- package/dist/commands/shared/resolve-stage-competitors.js +171 -0
- package/dist/commands/shared/session-id.d.ts +1 -0
- package/dist/commands/shared/session-id.js +1 -0
- package/dist/commands/spec/command.d.ts +22 -0
- package/dist/commands/spec/command.js +330 -0
- package/dist/commands/spec/errors.d.ts +11 -0
- package/dist/commands/spec/errors.js +23 -0
- package/dist/commands/verify/agents.d.ts +8 -0
- package/dist/commands/verify/agents.js +29 -0
- package/dist/commands/verify/command.d.ts +23 -0
- package/dist/commands/verify/command.js +168 -0
- package/dist/commands/verify/lifecycle.d.ts +14 -0
- package/dist/commands/verify/lifecycle.js +229 -0
- package/dist/commands/verify/max-parallel.d.ts +7 -0
- package/dist/commands/verify/max-parallel.js +15 -0
- package/dist/commands/verify/targets.d.ts +18 -0
- package/dist/commands/verify/targets.js +420 -0
- package/dist/competition/command-adapter.d.ts +35 -0
- package/dist/competition/command-adapter.js +20 -0
- package/dist/competition/core.d.ts +41 -0
- package/dist/competition/core.js +181 -0
- package/dist/competition/shared/extra-context.d.ts +14 -0
- package/dist/competition/shared/extra-context.js +100 -0
- package/dist/competition/shared/preflight.d.ts +11 -0
- package/dist/competition/shared/preflight.js +39 -0
- package/dist/competition/shared/prompt-helpers.d.ts +16 -0
- package/dist/competition/shared/prompt-helpers.js +27 -0
- package/dist/competition/shared/prune.d.ts +1 -0
- package/dist/competition/shared/prune.js +4 -0
- package/dist/competition/shared/sandbox-policy.d.ts +9 -0
- package/dist/competition/shared/sandbox-policy.js +7 -0
- package/dist/competition/shared/teardown.d.ts +36 -0
- package/dist/competition/shared/teardown.js +101 -0
- package/dist/configs/agents/defaults.d.ts +31 -2
- package/dist/configs/agents/defaults.js +346 -30
- package/dist/configs/agents/errors.js +14 -11
- package/dist/configs/agents/loader.d.ts +11 -1
- package/dist/configs/agents/loader.js +71 -4
- package/dist/configs/agents/types.js +2 -2
- package/dist/configs/environment/detect.js +9 -4
- package/dist/configs/environment/errors.js +4 -4
- package/dist/configs/environment/loader.d.ts +1 -1
- package/dist/configs/environment/loader.js +3 -3
- package/dist/configs/orchestration/bootstrap.d.ts +16 -0
- package/dist/configs/orchestration/bootstrap.js +122 -0
- package/dist/configs/orchestration/errors.d.ts +15 -0
- package/dist/configs/orchestration/errors.js +28 -0
- package/dist/configs/orchestration/loader.d.ts +9 -0
- package/dist/configs/orchestration/loader.js +148 -0
- package/dist/configs/orchestration/types.d.ts +102 -0
- package/dist/configs/orchestration/types.js +65 -0
- package/dist/configs/sandbox/defaults.js +14 -4
- package/dist/configs/sandbox/errors.d.ts +1 -1
- package/dist/configs/sandbox/errors.js +1 -1
- package/dist/configs/sandbox/loader.js +6 -4
- package/dist/configs/sandbox/schemas.js +4 -2
- package/dist/configs/settings/loader.d.ts +7 -0
- package/dist/configs/settings/loader.js +81 -0
- package/dist/configs/settings/types.d.ts +47 -0
- package/dist/configs/settings/types.js +23 -0
- package/dist/configs/verification/errors.d.ts +11 -0
- package/dist/configs/verification/errors.js +21 -0
- package/dist/configs/verification/loader.d.ts +8 -0
- package/dist/configs/verification/loader.js +43 -0
- package/dist/configs/verification/methods.d.ts +35 -0
- package/dist/configs/verification/methods.js +41 -0
- package/dist/configs/verification/programmatic-defaults.d.ts +10 -0
- package/dist/configs/verification/programmatic-defaults.js +42 -0
- package/dist/configs/verification/programmatic-detect.d.ts +10 -0
- package/dist/configs/{evals/detect.js → verification/programmatic-detect.js} +22 -33
- package/dist/configs/verification/types.d.ts +49 -0
- package/dist/configs/verification/types.js +45 -0
- package/dist/contracts/list.d.ts +207 -0
- package/dist/contracts/list.js +154 -0
- package/dist/domain/interactive/model/types.d.ts +104 -0
- package/dist/domain/interactive/model/types.js +83 -0
- package/dist/domain/interactive/persistence/adapter.d.ts +39 -0
- package/dist/domain/interactive/persistence/adapter.js +144 -0
- package/dist/domain/interactive/prompt.d.ts +3 -0
- package/dist/domain/interactive/prompt.js +7 -0
- package/dist/domain/message/competition/adapter.d.ts +36 -0
- package/dist/domain/message/competition/adapter.js +197 -0
- package/dist/domain/message/competition/prompt.d.ts +8 -0
- package/dist/domain/message/competition/prompt.js +29 -0
- package/dist/domain/message/model/mutators.d.ts +17 -0
- package/dist/domain/message/model/mutators.js +107 -0
- package/dist/domain/message/model/types.d.ts +100 -0
- package/dist/domain/message/model/types.js +87 -0
- package/dist/domain/message/persistence/adapter.d.ts +43 -0
- package/dist/domain/message/persistence/adapter.js +124 -0
- package/dist/domain/reduce/competition/adapter.d.ts +42 -0
- package/dist/domain/reduce/competition/adapter.js +826 -0
- package/dist/domain/reduce/competition/output-validation.d.ts +4 -0
- package/dist/domain/reduce/competition/output-validation.js +18 -0
- package/dist/domain/reduce/competition/prompt.d.ts +10 -0
- package/dist/domain/reduce/competition/prompt.js +96 -0
- package/dist/domain/reduce/competition/reduction.d.ts +9 -0
- package/dist/domain/reduce/competition/reduction.js +32 -0
- package/dist/domain/reduce/model/types.d.ts +122 -0
- package/dist/domain/reduce/model/types.js +84 -0
- package/dist/domain/reduce/persistence/adapter.d.ts +43 -0
- package/dist/domain/reduce/persistence/adapter.js +126 -0
- package/dist/domain/run/competition/adapter.d.ts +30 -0
- package/dist/domain/run/competition/adapter.js +39 -0
- package/dist/domain/run/competition/agent-execution.d.ts +20 -0
- package/dist/domain/run/competition/agent-execution.js +45 -0
- package/dist/domain/run/competition/agent-preparation.d.ts +12 -0
- package/dist/domain/run/competition/agent-preparation.js +24 -0
- package/dist/domain/run/competition/agents/artifacts.d.ts +17 -0
- package/dist/domain/run/competition/agents/artifacts.js +173 -0
- package/dist/{commands/run → domain/run/competition}/agents/lifecycle.d.ts +3 -3
- package/dist/{commands/run → domain/run/competition}/agents/lifecycle.js +84 -64
- package/dist/domain/run/competition/agents/post-processing.d.ts +12 -0
- package/dist/domain/run/competition/agents/post-processing.js +4 -0
- package/dist/domain/run/competition/agents/preparation.js +64 -0
- package/dist/{commands/run → domain/run/competition}/agents/run-context.d.ts +9 -16
- package/dist/{commands/run → domain/run/competition}/agents/run-context.js +22 -70
- package/dist/{commands/run → domain/run/competition}/agents/types.d.ts +10 -13
- package/dist/domain/run/competition/agents/workspace.d.ts +21 -0
- package/dist/domain/run/competition/agents/workspace.js +47 -0
- package/dist/{commands/run → domain/run/competition}/errors.d.ts +8 -1
- package/dist/{commands/run → domain/run/competition}/errors.js +39 -9
- package/dist/{commands/run → domain/run/competition}/phases.d.ts +1 -2
- package/dist/domain/run/competition/phases.js +1 -0
- package/dist/domain/run/competition/prompt.d.ts +7 -0
- package/dist/domain/run/competition/prompt.js +27 -0
- package/dist/{commands/run → domain/run/competition}/reports.d.ts +5 -3
- package/dist/{commands/run → domain/run/competition}/reports.js +7 -19
- package/dist/domain/run/competition/termination-state.d.ts +4 -0
- package/dist/domain/run/competition/termination-state.js +12 -0
- package/dist/{records → domain/run/model}/enhanced.d.ts +6 -7
- package/dist/{records → domain/run/model}/enhanced.js +11 -11
- package/dist/{records → domain/run/model}/errors.d.ts +1 -1
- package/dist/{records → domain/run/model}/errors.js +5 -5
- package/dist/{records → domain/run/model}/mutators.d.ts +4 -3
- package/dist/{records → domain/run/model}/mutators.js +58 -36
- package/dist/domain/run/model/types.d.ts +376 -0
- package/dist/domain/run/model/types.js +192 -0
- package/dist/{records/persistence.d.ts → domain/run/persistence/adapter.d.ts} +9 -3
- package/dist/domain/run/persistence/adapter.js +340 -0
- package/dist/domain/run/persistence/error-mapping.d.ts +2 -0
- package/dist/domain/run/persistence/error-mapping.js +17 -0
- package/dist/domain/shared/lifecycle.d.ts +54 -0
- package/dist/domain/shared/lifecycle.js +165 -0
- package/dist/domain/shared/token-usage.d.ts +21 -0
- package/dist/domain/shared/token-usage.js +38 -0
- package/dist/domain/spec/competition/adapter.d.ts +31 -0
- package/dist/domain/spec/competition/adapter.js +196 -0
- package/dist/domain/spec/competition/prompt.d.ts +11 -0
- package/dist/domain/spec/competition/prompt.js +44 -0
- package/dist/domain/spec/model/output.d.ts +13 -0
- package/dist/domain/spec/model/output.js +36 -0
- package/dist/domain/spec/model/types.d.ts +98 -0
- package/dist/domain/spec/model/types.js +84 -0
- package/dist/domain/spec/persistence/adapter.d.ts +51 -0
- package/dist/domain/spec/persistence/adapter.js +140 -0
- package/dist/domain/verify/blinding/aliases.d.ts +7 -0
- package/dist/domain/verify/blinding/aliases.js +23 -0
- package/dist/domain/verify/competition/adapter.d.ts +54 -0
- package/dist/domain/verify/competition/adapter.js +444 -0
- package/dist/domain/verify/competition/artifacts.d.ts +6 -0
- package/dist/domain/verify/competition/artifacts.js +7 -0
- package/dist/domain/verify/competition/blinding.d.ts +24 -0
- package/dist/domain/verify/competition/blinding.js +109 -0
- package/dist/domain/verify/competition/finalize.d.ts +11 -0
- package/dist/domain/verify/competition/finalize.js +65 -0
- package/dist/domain/verify/competition/programmatic.d.ts +15 -0
- package/dist/domain/verify/competition/programmatic.js +352 -0
- package/dist/domain/verify/competition/prompt.d.ts +19 -0
- package/dist/domain/verify/competition/prompt.js +63 -0
- package/dist/domain/verify/competition/rubric.d.ts +23 -0
- package/dist/domain/verify/competition/rubric.js +77 -0
- package/dist/domain/verify/competition/shared-layout.d.ts +121 -0
- package/dist/domain/verify/competition/shared-layout.js +365 -0
- package/dist/domain/verify/competition/target.d.ts +47 -0
- package/dist/domain/verify/competition/target.js +1 -0
- package/dist/domain/verify/model/mutators.d.ts +16 -0
- package/dist/domain/verify/model/mutators.js +126 -0
- package/dist/domain/verify/model/types.d.ts +408 -0
- package/dist/domain/verify/model/types.js +289 -0
- package/dist/domain/verify/persistence/adapter.d.ts +43 -0
- package/dist/domain/verify/persistence/adapter.js +126 -0
- package/dist/domain/verify/programmatic/runner.d.ts +22 -0
- package/dist/domain/verify/programmatic/runner.js +209 -0
- package/dist/domain/verify/rubric-result.d.ts +28 -0
- package/dist/domain/verify/rubric-result.js +121 -0
- package/dist/extra-context/contract.d.ts +17 -0
- package/dist/extra-context/contract.js +60 -0
- package/dist/interactive/index.d.ts +2 -0
- package/dist/interactive/index.js +1 -0
- package/dist/interactive/providers/launch.d.ts +23 -0
- package/dist/interactive/providers/launch.js +203 -0
- package/dist/interactive/providers/mcp.d.ts +13 -0
- package/dist/interactive/providers/mcp.js +547 -0
- package/dist/interactive/providers/shared.d.ts +2 -0
- package/dist/interactive/providers/shared.js +1 -0
- package/dist/interactive/providers.d.ts +3 -0
- package/dist/interactive/providers.js +3 -0
- package/dist/interactive/records.d.ts +2 -0
- package/dist/interactive/records.js +1 -0
- package/dist/interactive/substrate.d.ts +21 -0
- package/dist/interactive/substrate.js +522 -0
- package/dist/interactive/types.d.ts +101 -0
- package/dist/interactive/types.js +1 -0
- package/dist/mcp/server.d.ts +88 -0
- package/dist/mcp/server.js +790 -0
- package/dist/persistence/error-mapping.d.ts +19 -0
- package/dist/persistence/error-mapping.js +44 -0
- package/dist/persistence/errors.d.ts +26 -0
- package/dist/persistence/errors.js +49 -0
- package/dist/persistence/extra-context.d.ts +9 -0
- package/dist/persistence/extra-context.js +60 -0
- package/dist/{records → persistence}/history-lock.js +2 -2
- package/dist/persistence/record-path-schema.d.ts +3 -0
- package/dist/persistence/record-path-schema.js +16 -0
- package/dist/persistence/session-store.d.ts +92 -0
- package/dist/persistence/session-store.js +412 -0
- package/dist/policy/auto.d.ts +13 -0
- package/dist/policy/auto.js +22 -0
- package/dist/policy/index.d.ts +5 -0
- package/dist/policy/index.js +5 -0
- package/dist/policy/resolution.d.ts +6 -0
- package/dist/policy/resolution.js +23 -0
- package/dist/policy/result.d.ts +53 -0
- package/dist/policy/result.js +15 -0
- package/dist/policy/selector.d.ts +11 -0
- package/dist/policy/selector.js +57 -0
- package/dist/policy/verification.d.ts +77 -0
- package/dist/policy/verification.js +365 -0
- package/dist/policy/verifier-selection.d.ts +13 -0
- package/dist/policy/verifier-selection.js +78 -0
- package/dist/preflight/branch.d.ts +9 -0
- package/dist/preflight/branch.js +48 -0
- package/dist/preflight/errors.d.ts +3 -0
- package/dist/preflight/errors.js +10 -3
- package/dist/preflight/index.d.ts +13 -0
- package/dist/preflight/index.js +43 -8
- package/dist/render/interactions/confirmation.js +4 -2
- package/dist/render/transcripts/apply.js +9 -10
- package/dist/render/transcripts/auto.d.ts +27 -0
- package/dist/render/transcripts/auto.js +21 -0
- package/dist/render/transcripts/init.d.ts +4 -15
- package/dist/render/transcripts/init.js +71 -72
- package/dist/render/transcripts/list.d.ts +10 -1
- package/dist/render/transcripts/list.js +121 -15
- package/dist/render/transcripts/message.d.ts +72 -0
- package/dist/render/transcripts/message.js +362 -0
- package/dist/render/transcripts/prune.d.ts +7 -2
- package/dist/render/transcripts/prune.js +64 -17
- package/dist/render/transcripts/reduce.d.ts +74 -0
- package/dist/render/transcripts/reduce.js +395 -0
- package/dist/render/transcripts/root-launcher.d.ts +19 -0
- package/dist/render/transcripts/root-launcher.js +40 -0
- package/dist/render/transcripts/run.d.ts +35 -6
- package/dist/render/transcripts/run.js +241 -165
- package/dist/render/transcripts/shared.d.ts +2 -0
- package/dist/render/transcripts/shared.js +11 -4
- package/dist/render/transcripts/spec.d.ts +74 -0
- package/dist/render/transcripts/spec.js +394 -0
- package/dist/render/transcripts/stage-progress.d.ts +22 -0
- package/dist/render/transcripts/stage-progress.js +6 -0
- package/dist/render/transcripts/update-check.d.ts +2 -0
- package/dist/render/transcripts/update-check.js +22 -0
- package/dist/render/transcripts/verify.d.ts +74 -0
- package/dist/render/transcripts/verify.js +409 -0
- package/dist/render/utils/agents.d.ts +10 -9
- package/dist/render/utils/agents.js +30 -82
- package/dist/render/utils/badges.d.ts +3 -20
- package/dist/render/utils/badges.js +3 -36
- package/dist/render/utils/duration.d.ts +12 -0
- package/dist/render/utils/duration.js +37 -0
- package/dist/render/utils/interactive-frame.d.ts +6 -0
- package/dist/render/utils/interactive-frame.js +38 -0
- package/dist/render/utils/records.js +4 -4
- package/dist/render/utils/runs.d.ts +3 -9
- package/dist/render/utils/runs.js +16 -48
- package/dist/render/utils/stage-output.d.ts +20 -0
- package/dist/render/utils/stage-output.js +44 -0
- package/dist/render/utils/timezone.d.ts +2 -0
- package/dist/render/utils/timezone.js +42 -0
- package/dist/render/utils/transcript-shell.d.ts +66 -0
- package/dist/render/utils/transcript-shell.js +155 -0
- package/dist/render/utils/transcript.d.ts +7 -1
- package/dist/render/utils/transcript.js +12 -2
- package/dist/render/utils/wrap.d.ts +1 -0
- package/dist/render/utils/wrap.js +20 -0
- package/dist/status/colors.d.ts +2 -3
- package/dist/status/colors.js +3 -3
- package/dist/status/index.d.ts +108 -8
- package/dist/status/index.js +164 -5
- package/dist/update-check/checker.d.ts +24 -0
- package/dist/update-check/checker.js +130 -0
- package/dist/update-check/prompt.d.ts +25 -0
- package/dist/update-check/prompt.js +62 -0
- package/dist/update-check/semver.d.ts +17 -0
- package/dist/update-check/semver.js +36 -0
- package/dist/update-check/state-path.d.ts +8 -0
- package/dist/update-check/state-path.js +18 -0
- package/dist/utils/binaries.js +14 -8
- package/dist/utils/errors.d.ts +3 -1
- package/dist/utils/errors.js +3 -1
- package/dist/utils/git.d.ts +10 -0
- package/dist/utils/git.js +15 -3
- package/dist/utils/output.d.ts +5 -1
- package/dist/utils/output.js +4 -2
- package/dist/utils/process.d.ts +2 -1
- package/dist/utils/process.js +7 -3
- package/dist/utils/session-id.d.ts +1 -0
- package/dist/utils/session-id.js +22 -0
- package/dist/utils/slug.d.ts +2 -0
- package/dist/utils/slug.js +15 -0
- package/dist/utils/voratiq-cli-target.d.ts +9 -0
- package/dist/utils/voratiq-cli-target.js +58 -0
- package/dist/workspace/agents.d.ts +13 -16
- package/dist/workspace/agents.js +22 -147
- package/dist/workspace/chat/artifacts.d.ts +9 -0
- package/dist/workspace/chat/artifacts.js +82 -12
- package/dist/workspace/chat/native-usage.d.ts +13 -0
- package/dist/workspace/chat/native-usage.js +60 -0
- package/dist/workspace/chat/sources.d.ts +9 -5
- package/dist/workspace/chat/sources.js +89 -23
- package/dist/workspace/chat/token-usage-result.d.ts +23 -0
- package/dist/workspace/chat/token-usage-result.js +7 -0
- package/dist/workspace/chat/usage-extractor.d.ts +30 -0
- package/dist/workspace/chat/usage-extractor.js +461 -0
- package/dist/workspace/chat/usage-mappings.d.ts +20 -0
- package/dist/workspace/chat/usage-mappings.js +136 -0
- package/dist/workspace/credential-guard.js +1 -1
- package/dist/workspace/dependencies.js +4 -4
- package/dist/workspace/errors.d.ts +5 -0
- package/dist/workspace/errors.js +13 -3
- package/dist/workspace/layout.d.ts +17 -6
- package/dist/workspace/layout.js +51 -32
- package/dist/workspace/promotion.d.ts +32 -0
- package/dist/workspace/promotion.js +34 -0
- package/dist/workspace/prune.d.ts +1 -1
- package/dist/workspace/run.d.ts +1 -3
- package/dist/workspace/run.js +6 -15
- package/dist/workspace/setup.d.ts +8 -0
- package/dist/workspace/setup.js +359 -56
- package/dist/workspace/shim.js +1 -1
- package/dist/workspace/structure.d.ts +91 -26
- package/dist/workspace/structure.js +227 -43
- package/dist/workspace/templates.d.ts +9 -3
- package/dist/workspace/templates.js +26 -15
- package/dist/workspace/verification-defaults.d.ts +12 -0
- package/dist/workspace/verification-defaults.js +1017 -0
- package/package.json +30 -24
- package/dist/cli/review.d.ts +0 -12
- package/dist/cli/review.js +0 -33
- package/dist/commands/errors.d.ts +0 -4
- package/dist/commands/errors.js +0 -7
- package/dist/commands/init/evals.d.ts +0 -4
- package/dist/commands/init/evals.js +0 -219
- package/dist/commands/review/command.d.ts +0 -10
- package/dist/commands/review/command.js +0 -26
- package/dist/commands/run/agent-execution.d.ts +0 -19
- package/dist/commands/run/agent-execution.js +0 -63
- package/dist/commands/run/agents/auth-stage.d.ts +0 -23
- package/dist/commands/run/agents/auth-stage.js +0 -108
- package/dist/commands/run/agents/chat-preserver.d.ts +0 -9
- package/dist/commands/run/agents/chat-preserver.js +0 -35
- package/dist/commands/run/agents/eval-runner.d.ts +0 -19
- package/dist/commands/run/agents/eval-runner.js +0 -27
- package/dist/commands/run/agents/failures.js +0 -32
- package/dist/commands/run/agents/preparation.js +0 -123
- package/dist/commands/run/agents.d.ts +0 -14
- package/dist/commands/run/agents.js +0 -47
- package/dist/commands/run/prompts.d.ts +0 -4
- package/dist/commands/run/prompts.js +0 -16
- package/dist/commands/run/sandbox-registry.d.ts +0 -4
- package/dist/commands/run/sandbox-registry.js +0 -54
- package/dist/configs/evals/defaults.d.ts +0 -8
- package/dist/configs/evals/defaults.js +0 -28
- package/dist/configs/evals/detect.d.ts +0 -10
- package/dist/configs/evals/errors.d.ts +0 -16
- package/dist/configs/evals/errors.js +0 -29
- package/dist/configs/evals/loader.d.ts +0 -9
- package/dist/configs/evals/loader.js +0 -46
- package/dist/configs/evals/types.d.ts +0 -42
- package/dist/configs/evals/types.js +0 -74
- package/dist/evals/runner.d.ts +0 -16
- package/dist/evals/runner.js +0 -132
- package/dist/records/persistence.js +0 -469
- package/dist/records/types.d.ts +0 -255
- package/dist/records/types.js +0 -160
- package/dist/render/transcripts/review.d.ts +0 -2
- package/dist/render/transcripts/review.js +0 -36
- /package/dist/{commands/run → agents/runtime}/shim/agent-manifest.d.ts +0 -0
- /package/dist/{commands/run → agents/runtime}/shim/agent-manifest.js +0 -0
- /package/dist/{commands/run → agents/runtime/shim}/argv.d.ts +0 -0
- /package/dist/{commands/run → agents/runtime/shim}/argv.js +0 -0
- /package/dist/{commands/run/agents → agents/runtime}/types.js +0 -0
- /package/dist/{commands/run → domain/run/competition}/agents/preparation.d.ts +0 -0
- /package/dist/{commands/run/phases.js → domain/run/competition/agents/types.js} +0 -0
- /package/dist/{commands/run → domain/run/model}/id.d.ts +0 -0
- /package/dist/{commands/run → domain/run/model}/id.js +0 -0
- /package/dist/{records → persistence}/history-lock.d.ts +0 -0
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import type { EnvironmentConfig } from "../../../configs/environment/types.js";
|
|
2
|
-
import type { AgentEvalResult, EvalDefinition } from "../../../configs/evals/types.js";
|
|
3
|
-
import { type ArtifactCollectionResult, type SandboxPersona } from "../../../workspace/agents.js";
|
|
4
|
-
import type { AgentWorkspacePaths } from "../../../workspace/layout.js";
|
|
5
|
-
export interface EvalRunInput {
|
|
6
|
-
evalPlan: readonly EvalDefinition[];
|
|
7
|
-
workspacePaths: AgentWorkspacePaths;
|
|
8
|
-
baseRevisionSha: string;
|
|
9
|
-
root: string;
|
|
10
|
-
manifestEnv: Record<string, string>;
|
|
11
|
-
environment: EnvironmentConfig;
|
|
12
|
-
persona: SandboxPersona;
|
|
13
|
-
}
|
|
14
|
-
export interface EvalRunResult {
|
|
15
|
-
artifacts: ArtifactCollectionResult;
|
|
16
|
-
evaluations: AgentEvalResult[];
|
|
17
|
-
warnings: string[];
|
|
18
|
-
}
|
|
19
|
-
export declare function runPostProcessingAndEvaluations(input: EvalRunInput): Promise<EvalRunResult>;
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { executeEvaluations } from "../../../evals/runner.js";
|
|
2
|
-
import { collectAgentArtifacts, } from "../../../workspace/agents.js";
|
|
3
|
-
export async function runPostProcessingAndEvaluations(input) {
|
|
4
|
-
const { evalPlan, workspacePaths, baseRevisionSha, root, manifestEnv, environment, persona, } = input;
|
|
5
|
-
const artifacts = await collectAgentArtifacts({
|
|
6
|
-
baseRevisionSha,
|
|
7
|
-
workspacePath: workspacePaths.workspacePath,
|
|
8
|
-
summaryPath: workspacePaths.summaryPath,
|
|
9
|
-
diffPath: workspacePaths.diffPath,
|
|
10
|
-
root,
|
|
11
|
-
environment,
|
|
12
|
-
persona,
|
|
13
|
-
});
|
|
14
|
-
const evalOutcome = await executeEvaluations({
|
|
15
|
-
evaluations: evalPlan,
|
|
16
|
-
cwd: workspacePaths.workspacePath,
|
|
17
|
-
root,
|
|
18
|
-
logsDirectory: workspacePaths.evalsDirPath,
|
|
19
|
-
env: manifestEnv,
|
|
20
|
-
environment,
|
|
21
|
-
});
|
|
22
|
-
return {
|
|
23
|
-
artifacts,
|
|
24
|
-
evaluations: evalOutcome.results,
|
|
25
|
-
warnings: evalOutcome.warnings,
|
|
26
|
-
};
|
|
27
|
-
}
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { readFile } from "node:fs/promises";
|
|
2
|
-
import { CLAUDE_OAUTH_RELOGIN_HINT, CLAUDE_PROVIDER_ID, } from "../../../auth/providers/claude/constants.js";
|
|
3
|
-
const CLAUDE_FAILURE_PATTERNS = [
|
|
4
|
-
/Please run \/login/i,
|
|
5
|
-
/OAuth token has expired/i,
|
|
6
|
-
];
|
|
7
|
-
export async function detectAgentProcessFailureDetail(input) {
|
|
8
|
-
if (input.provider !== CLAUDE_PROVIDER_ID) {
|
|
9
|
-
return undefined;
|
|
10
|
-
}
|
|
11
|
-
const combinedLogs = await readCombinedLogs(input.stdoutPath, input.stderrPath);
|
|
12
|
-
if (combinedLogs &&
|
|
13
|
-
CLAUDE_FAILURE_PATTERNS.some((pattern) => pattern.test(combinedLogs))) {
|
|
14
|
-
return CLAUDE_OAUTH_RELOGIN_HINT;
|
|
15
|
-
}
|
|
16
|
-
return undefined;
|
|
17
|
-
}
|
|
18
|
-
async function readCombinedLogs(stdoutPath, stderrPath) {
|
|
19
|
-
const [stdout, stderr] = await Promise.all([
|
|
20
|
-
safeRead(stdoutPath),
|
|
21
|
-
safeRead(stderrPath),
|
|
22
|
-
]);
|
|
23
|
-
return `${stdout}\n${stderr}`;
|
|
24
|
-
}
|
|
25
|
-
async function safeRead(path) {
|
|
26
|
-
try {
|
|
27
|
-
return await readFile(path, "utf8");
|
|
28
|
-
}
|
|
29
|
-
catch {
|
|
30
|
-
return "";
|
|
31
|
-
}
|
|
32
|
-
}
|
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
import { ensureWorkspaceError, prepareAgentWorkspace, } from "../../../workspace/agents.js";
|
|
2
|
-
import { buildAgentWorkspacePaths } from "../../../workspace/layout.js";
|
|
3
|
-
import { MissingAgentProviderError, RunCommandError } from "../errors.js";
|
|
4
|
-
import { registerStagedSandboxContext, teardownRegisteredSandboxContext, } from "../sandbox-registry.js";
|
|
5
|
-
import { stageAgentAuth } from "./auth-stage.js";
|
|
6
|
-
import { captureAgentChatTranscripts } from "./chat-preserver.js";
|
|
7
|
-
import { AgentRunContext } from "./run-context.js";
|
|
8
|
-
import { configureSandboxSettings } from "./sandbox-launcher.js";
|
|
9
|
-
import { writeAgentManifest } from "./workspace-prep.js";
|
|
10
|
-
export async function prepareAgentForExecution(context) {
|
|
11
|
-
const { agent, baseRevisionSha, runId, root, evalPlan, environment } = context;
|
|
12
|
-
const workspacePaths = buildAgentWorkspacePaths({
|
|
13
|
-
root,
|
|
14
|
-
runId,
|
|
15
|
-
agentId: agent.id,
|
|
16
|
-
});
|
|
17
|
-
const startedAt = new Date().toISOString();
|
|
18
|
-
const agentContext = new AgentRunContext({
|
|
19
|
-
agent,
|
|
20
|
-
runId,
|
|
21
|
-
startedAt,
|
|
22
|
-
evalPlan,
|
|
23
|
-
});
|
|
24
|
-
try {
|
|
25
|
-
await prepareAgentWorkspace({
|
|
26
|
-
paths: workspacePaths,
|
|
27
|
-
baseRevisionSha,
|
|
28
|
-
root,
|
|
29
|
-
agentId: agent.id,
|
|
30
|
-
runId,
|
|
31
|
-
environment,
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
catch (error) {
|
|
35
|
-
return {
|
|
36
|
-
status: "failed",
|
|
37
|
-
result: await agentContext.failWith(ensureWorkspaceFailure(error)),
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
let authContext;
|
|
41
|
-
let manifestEnv = {};
|
|
42
|
-
try {
|
|
43
|
-
const staged = await stageAgentAuth({
|
|
44
|
-
agent,
|
|
45
|
-
agentRoot: workspacePaths.agentRoot,
|
|
46
|
-
runId,
|
|
47
|
-
root,
|
|
48
|
-
});
|
|
49
|
-
authContext = staged.context;
|
|
50
|
-
registerStagedSandboxContext(authContext);
|
|
51
|
-
manifestEnv = staged.env;
|
|
52
|
-
}
|
|
53
|
-
catch (error) {
|
|
54
|
-
return {
|
|
55
|
-
status: "failed",
|
|
56
|
-
result: await agentContext.failWith(ensureWorkspaceFailure(error)),
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
try {
|
|
60
|
-
manifestEnv = await writeAgentManifest({
|
|
61
|
-
agent,
|
|
62
|
-
workspacePaths,
|
|
63
|
-
env: manifestEnv,
|
|
64
|
-
environment,
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
catch (error) {
|
|
68
|
-
await captureAgentChatTranscripts({
|
|
69
|
-
agent,
|
|
70
|
-
agentContext,
|
|
71
|
-
agentRoot: workspacePaths.agentRoot,
|
|
72
|
-
reason: "pre-run",
|
|
73
|
-
});
|
|
74
|
-
await teardownRegisteredSandboxContext(authContext);
|
|
75
|
-
return {
|
|
76
|
-
status: "failed",
|
|
77
|
-
result: await agentContext.failWith(ensureWorkspaceFailure(error)),
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
try {
|
|
81
|
-
const providerId = agent.provider;
|
|
82
|
-
if (!providerId) {
|
|
83
|
-
throw new MissingAgentProviderError(agent.id);
|
|
84
|
-
}
|
|
85
|
-
await configureSandboxSettings({
|
|
86
|
-
workspacePaths,
|
|
87
|
-
providerId,
|
|
88
|
-
root,
|
|
89
|
-
});
|
|
90
|
-
}
|
|
91
|
-
catch (error) {
|
|
92
|
-
await captureAgentChatTranscripts({
|
|
93
|
-
agent,
|
|
94
|
-
agentContext,
|
|
95
|
-
agentRoot: workspacePaths.agentRoot,
|
|
96
|
-
reason: "pre-run",
|
|
97
|
-
});
|
|
98
|
-
await teardownRegisteredSandboxContext(authContext);
|
|
99
|
-
return {
|
|
100
|
-
status: "failed",
|
|
101
|
-
result: await agentContext.failWith(ensureWorkspaceFailure(error)),
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
return {
|
|
105
|
-
status: "ready",
|
|
106
|
-
prepared: {
|
|
107
|
-
agent,
|
|
108
|
-
agentContext,
|
|
109
|
-
workspacePaths,
|
|
110
|
-
runtimeManifestPath: workspacePaths.runtimeManifestPath,
|
|
111
|
-
baseRevisionSha,
|
|
112
|
-
root,
|
|
113
|
-
runId,
|
|
114
|
-
evalPlan,
|
|
115
|
-
environment,
|
|
116
|
-
manifestEnv,
|
|
117
|
-
authContext,
|
|
118
|
-
},
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
function ensureWorkspaceFailure(error) {
|
|
122
|
-
return error instanceof RunCommandError ? error : ensureWorkspaceError(error);
|
|
123
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import type { AgentDefinition } from "../../configs/agents/types.js";
|
|
2
|
-
import type { EnvironmentConfig } from "../../configs/environment/types.js";
|
|
3
|
-
import type { EvalDefinition } from "../../configs/evals/types.js";
|
|
4
|
-
import type { AgentPreparationResult, PreparedAgentExecution } from "./agents/types.js";
|
|
5
|
-
import type { AgentExecutionResult } from "./reports.js";
|
|
6
|
-
export declare function prepareAgents(options: {
|
|
7
|
-
agents: readonly AgentDefinition[];
|
|
8
|
-
baseRevisionSha: string;
|
|
9
|
-
runId: string;
|
|
10
|
-
root: string;
|
|
11
|
-
evalPlan: readonly EvalDefinition[];
|
|
12
|
-
environment: EnvironmentConfig;
|
|
13
|
-
}): Promise<AgentPreparationResult>;
|
|
14
|
-
export declare function runAgentsWithLimit(agents: PreparedAgentExecution[], limit: number): Promise<AgentExecutionResult[]>;
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import { runPreparedAgent } from "./agents/lifecycle.js";
|
|
2
|
-
import { prepareAgentForExecution } from "./agents/preparation.js";
|
|
3
|
-
export async function prepareAgents(options) {
|
|
4
|
-
const { agents, baseRevisionSha, runId, root, evalPlan, environment } = options;
|
|
5
|
-
const ready = [];
|
|
6
|
-
const failures = [];
|
|
7
|
-
for (const agent of agents) {
|
|
8
|
-
const preparation = await prepareAgentForExecution({
|
|
9
|
-
agent,
|
|
10
|
-
baseRevisionSha,
|
|
11
|
-
runId,
|
|
12
|
-
root,
|
|
13
|
-
evalPlan,
|
|
14
|
-
environment,
|
|
15
|
-
});
|
|
16
|
-
if (preparation.status === "ready") {
|
|
17
|
-
ready.push(preparation.prepared);
|
|
18
|
-
}
|
|
19
|
-
else {
|
|
20
|
-
failures.push(preparation.result);
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
return { ready, failures };
|
|
24
|
-
}
|
|
25
|
-
export async function runAgentsWithLimit(agents, limit) {
|
|
26
|
-
if (agents.length === 0) {
|
|
27
|
-
return [];
|
|
28
|
-
}
|
|
29
|
-
const workerCount = Math.max(1, Math.min(limit, agents.length));
|
|
30
|
-
const results = new Array(agents.length);
|
|
31
|
-
let nextIndex = 0;
|
|
32
|
-
async function worker() {
|
|
33
|
-
while (true) {
|
|
34
|
-
const current = nextIndex++;
|
|
35
|
-
if (current >= agents.length) {
|
|
36
|
-
return;
|
|
37
|
-
}
|
|
38
|
-
results[current] = await runPreparedAgent(agents[current]);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
const workers = [];
|
|
42
|
-
for (let index = 0; index < workerCount; index += 1) {
|
|
43
|
-
workers.push(worker());
|
|
44
|
-
}
|
|
45
|
-
await Promise.all(workers);
|
|
46
|
-
return results;
|
|
47
|
-
}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
export function buildAgentPrompt(options) {
|
|
2
|
-
const { specContent } = options;
|
|
3
|
-
const lines = [
|
|
4
|
-
"Implement the following task:",
|
|
5
|
-
"",
|
|
6
|
-
"```",
|
|
7
|
-
specContent.trimEnd(),
|
|
8
|
-
"```",
|
|
9
|
-
"",
|
|
10
|
-
"Constraints:",
|
|
11
|
-
"- You are running headlessly. Never pause for user interaction.",
|
|
12
|
-
"- You are sandboxed. If an operation is blocked, skip it and continue.",
|
|
13
|
-
"- When finished, write a 1-2 sentence summary to `.summary.txt` (in the workspace root).",
|
|
14
|
-
];
|
|
15
|
-
return `${lines.join("\n")}\n`;
|
|
16
|
-
}
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
import type { StagedAuthContext } from "./agents/auth-stage.js";
|
|
2
|
-
export declare function registerStagedSandboxContext(context: StagedAuthContext): void;
|
|
3
|
-
export declare function teardownRegisteredSandboxContext(context: StagedAuthContext | undefined): Promise<void>;
|
|
4
|
-
export declare function teardownRunSandboxes(runId: string | undefined): Promise<void>;
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { teardownAuthContext } from "./agents/auth-stage.js";
|
|
2
|
-
const sandboxRegistry = new Map();
|
|
3
|
-
export function registerStagedSandboxContext(context) {
|
|
4
|
-
const contexts = sandboxRegistry.get(context.runId);
|
|
5
|
-
if (contexts) {
|
|
6
|
-
contexts.add(context);
|
|
7
|
-
return;
|
|
8
|
-
}
|
|
9
|
-
sandboxRegistry.set(context.runId, new Set([context]));
|
|
10
|
-
}
|
|
11
|
-
export async function teardownRegisteredSandboxContext(context) {
|
|
12
|
-
if (!context) {
|
|
13
|
-
return;
|
|
14
|
-
}
|
|
15
|
-
await teardownAuthContext(context);
|
|
16
|
-
removeContextFromRegistry(context);
|
|
17
|
-
}
|
|
18
|
-
export async function teardownRunSandboxes(runId) {
|
|
19
|
-
if (!runId) {
|
|
20
|
-
return;
|
|
21
|
-
}
|
|
22
|
-
const contexts = sandboxRegistry.get(runId);
|
|
23
|
-
if (!contexts || contexts.size === 0) {
|
|
24
|
-
sandboxRegistry.delete(runId);
|
|
25
|
-
return;
|
|
26
|
-
}
|
|
27
|
-
const failures = [];
|
|
28
|
-
const stagedContexts = Array.from(contexts);
|
|
29
|
-
for (const context of stagedContexts) {
|
|
30
|
-
try {
|
|
31
|
-
await teardownAuthContext(context);
|
|
32
|
-
removeContextFromRegistry(context);
|
|
33
|
-
}
|
|
34
|
-
catch (error) {
|
|
35
|
-
failures.push(error);
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
if (failures.length === 1) {
|
|
39
|
-
throw failures[0];
|
|
40
|
-
}
|
|
41
|
-
if (failures.length > 1) {
|
|
42
|
-
throw new AggregateError(failures, `Failed to teardown ${failures.length} sandbox contexts for run ${runId}`);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
function removeContextFromRegistry(context) {
|
|
46
|
-
const contexts = sandboxRegistry.get(context.runId);
|
|
47
|
-
if (!contexts) {
|
|
48
|
-
return;
|
|
49
|
-
}
|
|
50
|
-
contexts.delete(context);
|
|
51
|
-
if (contexts.size === 0) {
|
|
52
|
-
sandboxRegistry.delete(context.runId);
|
|
53
|
-
}
|
|
54
|
-
}
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
export interface EvalDefault {
|
|
2
|
-
readonly slug: string;
|
|
3
|
-
readonly command?: string;
|
|
4
|
-
}
|
|
5
|
-
export declare const DEFAULT_EVAL_TEMPLATE_HEADER: readonly string[];
|
|
6
|
-
export declare const DEFAULT_EVAL_DEFAULTS: readonly EvalDefault[];
|
|
7
|
-
export declare function serializeEvalDefaults(defaults: readonly EvalDefault[]): string[];
|
|
8
|
-
export declare function listEvalDefaults(): EvalDefault[];
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
export const DEFAULT_EVAL_TEMPLATE_HEADER = [
|
|
2
|
-
"# Eval commands run after each agent finishes its run.",
|
|
3
|
-
"# For each slug, provide the command that runs that eval.",
|
|
4
|
-
"# Leave any entry blank (or delete it entirely) to skip that eval.",
|
|
5
|
-
'# Example: format: "npm run format:check"',
|
|
6
|
-
"",
|
|
7
|
-
];
|
|
8
|
-
export const DEFAULT_EVAL_DEFAULTS = [
|
|
9
|
-
{ slug: "format" },
|
|
10
|
-
{ slug: "lint" },
|
|
11
|
-
{ slug: "typecheck" },
|
|
12
|
-
{ slug: "tests" },
|
|
13
|
-
];
|
|
14
|
-
export function serializeEvalDefaults(defaults) {
|
|
15
|
-
const lines = [...DEFAULT_EVAL_TEMPLATE_HEADER];
|
|
16
|
-
for (const { slug, command } of defaults) {
|
|
17
|
-
if (command && command.length > 0) {
|
|
18
|
-
lines.push(`${slug}: ${JSON.stringify(command)}`);
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
21
|
-
lines.push(`${slug}:`);
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
return lines;
|
|
25
|
-
}
|
|
26
|
-
export function listEvalDefaults() {
|
|
27
|
-
return DEFAULT_EVAL_DEFAULTS.map((definition) => ({ ...definition }));
|
|
28
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { type EnvironmentConfig } from "../environment/types.js";
|
|
2
|
-
import type { EvalSlug } from "./types.js";
|
|
3
|
-
export declare const CANONICAL_EVAL_SLUGS: EvalSlug[];
|
|
4
|
-
export interface EvalSuggestion {
|
|
5
|
-
source: "node" | "python";
|
|
6
|
-
commands: Map<EvalSlug, string>;
|
|
7
|
-
notes: string[];
|
|
8
|
-
warnings: string[];
|
|
9
|
-
}
|
|
10
|
-
export declare function detectEvalSuggestions(root: string, environment: EnvironmentConfig): Promise<EvalSuggestion[]>;
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { WorkspaceSetupError } from "../../workspace/errors.js";
|
|
2
|
-
declare const DEFAULT_EVALS_ERROR_CONTEXT: "Invalid evals.yaml";
|
|
3
|
-
export { DEFAULT_EVALS_ERROR_CONTEXT };
|
|
4
|
-
export declare class EvalsError extends WorkspaceSetupError {
|
|
5
|
-
constructor(message: string);
|
|
6
|
-
}
|
|
7
|
-
export declare class EvalsConfigError extends EvalsError {
|
|
8
|
-
constructor(message: string);
|
|
9
|
-
}
|
|
10
|
-
export declare class MissingEvalsConfigError extends EvalsConfigError {
|
|
11
|
-
readonly filePath: string;
|
|
12
|
-
constructor(filePath: string);
|
|
13
|
-
}
|
|
14
|
-
export declare class EvalsYamlParseError extends EvalsConfigError {
|
|
15
|
-
constructor(message: string);
|
|
16
|
-
}
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import { WorkspaceSetupError } from "../../workspace/errors.js";
|
|
2
|
-
const DEFAULT_EVALS_ERROR_CONTEXT = "Invalid evals.yaml";
|
|
3
|
-
export { DEFAULT_EVALS_ERROR_CONTEXT };
|
|
4
|
-
export class EvalsError extends WorkspaceSetupError {
|
|
5
|
-
constructor(message) {
|
|
6
|
-
super(message);
|
|
7
|
-
this.name = "EvalsError";
|
|
8
|
-
}
|
|
9
|
-
}
|
|
10
|
-
export class EvalsConfigError extends EvalsError {
|
|
11
|
-
constructor(message) {
|
|
12
|
-
super(message);
|
|
13
|
-
this.name = "EvalsConfigError";
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
export class MissingEvalsConfigError extends EvalsConfigError {
|
|
17
|
-
filePath;
|
|
18
|
-
constructor(filePath) {
|
|
19
|
-
super(`Missing eval configuration file at ${filePath}.`);
|
|
20
|
-
this.filePath = filePath;
|
|
21
|
-
this.name = "MissingEvalsConfigError";
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
export class EvalsYamlParseError extends EvalsConfigError {
|
|
25
|
-
constructor(message) {
|
|
26
|
-
super(message);
|
|
27
|
-
this.name = "EvalsYamlParseError";
|
|
28
|
-
}
|
|
29
|
-
}
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { type EvalDefinition, type EvalsConfig } from "./types.js";
|
|
2
|
-
export declare function readEvalsConfig(content: string): EvalsConfig;
|
|
3
|
-
export interface LoadEvalConfigOptions {
|
|
4
|
-
root?: string;
|
|
5
|
-
filePath?: string;
|
|
6
|
-
readFile?: (path: string) => string;
|
|
7
|
-
}
|
|
8
|
-
export declare function loadEvalConfig(options?: LoadEvalConfigOptions): EvalsConfig;
|
|
9
|
-
export declare function buildEvalDefinitions(config: EvalsConfig): EvalDefinition[];
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import { ZodError } from "zod";
|
|
2
|
-
import { parseYamlDocument, } from "../../utils/yaml-reader.js";
|
|
3
|
-
import { resolveWorkspacePath, VORATIQ_EVALS_FILE, } from "../../workspace/structure.js";
|
|
4
|
-
import { createConfigLoader } from "../shared/loader-factory.js";
|
|
5
|
-
import { formatYamlErrorMessage } from "../shared/yaml-error-formatter.js";
|
|
6
|
-
import { DEFAULT_EVALS_ERROR_CONTEXT, EvalsConfigError, EvalsYamlParseError, MissingEvalsConfigError, } from "./errors.js";
|
|
7
|
-
import { evalsConfigSchema, } from "./types.js";
|
|
8
|
-
export function readEvalsConfig(content) {
|
|
9
|
-
const parsed = parseYamlDocument(content, {
|
|
10
|
-
formatError: formatEvalsYamlError,
|
|
11
|
-
});
|
|
12
|
-
try {
|
|
13
|
-
const config = evalsConfigSchema.parse(parsed);
|
|
14
|
-
return config;
|
|
15
|
-
}
|
|
16
|
-
catch (error) {
|
|
17
|
-
if (error instanceof ZodError) {
|
|
18
|
-
const detail = error.issues
|
|
19
|
-
.map((issue) => issue.message)
|
|
20
|
-
.join("; ")
|
|
21
|
-
.trim();
|
|
22
|
-
throw new EvalsConfigError(`${DEFAULT_EVALS_ERROR_CONTEXT}: ${detail || "invalid mapping"}`);
|
|
23
|
-
}
|
|
24
|
-
throw error;
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
function formatEvalsYamlError(detail) {
|
|
28
|
-
const message = formatYamlErrorMessage(detail, {
|
|
29
|
-
context: DEFAULT_EVALS_ERROR_CONTEXT,
|
|
30
|
-
});
|
|
31
|
-
return new EvalsYamlParseError(message);
|
|
32
|
-
}
|
|
33
|
-
const loadEvalConfigInternal = createConfigLoader({
|
|
34
|
-
resolveFilePath: (root, options) => options.filePath ?? resolveWorkspacePath(root, VORATIQ_EVALS_FILE),
|
|
35
|
-
selectReadFile: (options) => options.readFile,
|
|
36
|
-
handleMissing: ({ filePath }) => {
|
|
37
|
-
throw new MissingEvalsConfigError(filePath);
|
|
38
|
-
},
|
|
39
|
-
parse: (content) => readEvalsConfig(content),
|
|
40
|
-
});
|
|
41
|
-
export function loadEvalConfig(options = {}) {
|
|
42
|
-
return loadEvalConfigInternal(options);
|
|
43
|
-
}
|
|
44
|
-
export function buildEvalDefinitions(config) {
|
|
45
|
-
return config.map(({ slug, command }) => ({ slug, command }));
|
|
46
|
-
}
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { EVAL_STATUS_VALUES } from "../../status/index.js";
|
|
3
|
-
export declare const evalSlugSchema: z.ZodString;
|
|
4
|
-
export type EvalSlug = z.infer<typeof evalSlugSchema>;
|
|
5
|
-
export declare const evalStatusSchema: z.ZodEnum<{
|
|
6
|
-
succeeded: "succeeded";
|
|
7
|
-
failed: "failed";
|
|
8
|
-
errored: "errored";
|
|
9
|
-
skipped: "skipped";
|
|
10
|
-
}>;
|
|
11
|
-
export type EvalStatus = (typeof EVAL_STATUS_VALUES)[number];
|
|
12
|
-
export declare const evalCommandSchema: z.ZodPipe<z.ZodUnion<readonly [z.ZodString, z.ZodNull, z.ZodUndefined]>, z.ZodTransform<string | undefined, string | null | undefined>>;
|
|
13
|
-
export interface EvalCommandEntry {
|
|
14
|
-
slug: EvalSlug;
|
|
15
|
-
command?: string;
|
|
16
|
-
}
|
|
17
|
-
export type EvalsConfig = ReadonlyArray<EvalCommandEntry>;
|
|
18
|
-
export declare const evalsConfigSchema: z.ZodPipe<z.ZodRecord<z.ZodString, z.ZodPipe<z.ZodUnion<readonly [z.ZodString, z.ZodNull, z.ZodUndefined]>, z.ZodTransform<string | undefined, string | null | undefined>>>, z.ZodTransform<EvalCommandEntry[], Record<string, string | undefined>>>;
|
|
19
|
-
export declare const agentEvalResultSchema: z.ZodObject<{
|
|
20
|
-
slug: z.ZodString;
|
|
21
|
-
status: z.ZodEnum<{
|
|
22
|
-
succeeded: "succeeded";
|
|
23
|
-
failed: "failed";
|
|
24
|
-
errored: "errored";
|
|
25
|
-
skipped: "skipped";
|
|
26
|
-
}>;
|
|
27
|
-
command: z.ZodOptional<z.ZodString>;
|
|
28
|
-
exitCode: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
29
|
-
logPath: z.ZodOptional<z.ZodString>;
|
|
30
|
-
error: z.ZodOptional<z.ZodString>;
|
|
31
|
-
}, z.core.$strip>;
|
|
32
|
-
export type AgentEvalResult = z.infer<typeof agentEvalResultSchema>;
|
|
33
|
-
export interface EvalDefinition {
|
|
34
|
-
slug: EvalSlug;
|
|
35
|
-
command?: string;
|
|
36
|
-
}
|
|
37
|
-
/**
|
|
38
|
-
* Normalizes an eval command by trimming whitespace and converting empty strings to undefined.
|
|
39
|
-
* Exported for reuse in init scaffolding and other contexts.
|
|
40
|
-
*/
|
|
41
|
-
export declare function normalizeEvalCommand(command: string | undefined): string | undefined;
|
|
42
|
-
export declare function sanitizeSlugForFilename(slug: string): string;
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { evalStatusSchema as sharedEvalStatusSchema, } from "../../status/index.js";
|
|
3
|
-
const EVAL_SLUG_PATTERN = /^[a-z0-9]+(?:[.-][a-z0-9]+)*$/u;
|
|
4
|
-
export const evalSlugSchema = z
|
|
5
|
-
.string()
|
|
6
|
-
.min(1, "eval slug is required")
|
|
7
|
-
.regex(EVAL_SLUG_PATTERN, "eval slug must contain only lowercase letters, numbers, dots, or hyphens");
|
|
8
|
-
export const evalStatusSchema = sharedEvalStatusSchema;
|
|
9
|
-
export const evalCommandSchema = z
|
|
10
|
-
.union([z.string(), z.null(), z.undefined()])
|
|
11
|
-
.transform((value) => {
|
|
12
|
-
if (value === null || value === undefined) {
|
|
13
|
-
return undefined;
|
|
14
|
-
}
|
|
15
|
-
const trimmed = value.trim();
|
|
16
|
-
return trimmed.length > 0 ? trimmed : undefined;
|
|
17
|
-
});
|
|
18
|
-
export const evalsConfigSchema = z
|
|
19
|
-
.record(z.string(), evalCommandSchema)
|
|
20
|
-
.superRefine((value, ctx) => {
|
|
21
|
-
const seen = new Set();
|
|
22
|
-
for (const rawSlug of Object.keys(value)) {
|
|
23
|
-
const trimmedSlug = rawSlug.trim();
|
|
24
|
-
const parsed = evalSlugSchema.safeParse(trimmedSlug);
|
|
25
|
-
if (!parsed.success) {
|
|
26
|
-
const message = parsed.error.issues[0]?.message ?? "invalid eval slug";
|
|
27
|
-
ctx.addIssue({
|
|
28
|
-
code: z.ZodIssueCode.custom,
|
|
29
|
-
path: [rawSlug],
|
|
30
|
-
message,
|
|
31
|
-
});
|
|
32
|
-
continue;
|
|
33
|
-
}
|
|
34
|
-
if (seen.has(parsed.data)) {
|
|
35
|
-
ctx.addIssue({
|
|
36
|
-
code: z.ZodIssueCode.custom,
|
|
37
|
-
path: [rawSlug],
|
|
38
|
-
message: `Duplicate eval slug "${parsed.data}"`,
|
|
39
|
-
});
|
|
40
|
-
}
|
|
41
|
-
seen.add(parsed.data);
|
|
42
|
-
}
|
|
43
|
-
})
|
|
44
|
-
.transform((value) => {
|
|
45
|
-
const entries = [];
|
|
46
|
-
for (const [rawSlug, rawCommand] of Object.entries(value)) {
|
|
47
|
-
const slug = evalSlugSchema.parse(rawSlug.trim());
|
|
48
|
-
const normalizedCommand = normalizeEvalCommand(rawCommand);
|
|
49
|
-
entries.push({ slug, command: normalizedCommand });
|
|
50
|
-
}
|
|
51
|
-
return entries;
|
|
52
|
-
});
|
|
53
|
-
export const agentEvalResultSchema = z.object({
|
|
54
|
-
slug: evalSlugSchema,
|
|
55
|
-
status: evalStatusSchema,
|
|
56
|
-
command: z.string().optional(),
|
|
57
|
-
exitCode: z.number().nullable().optional(),
|
|
58
|
-
logPath: z.string().optional(),
|
|
59
|
-
error: z.string().optional(),
|
|
60
|
-
});
|
|
61
|
-
/**
|
|
62
|
-
* Normalizes an eval command by trimming whitespace and converting empty strings to undefined.
|
|
63
|
-
* Exported for reuse in init scaffolding and other contexts.
|
|
64
|
-
*/
|
|
65
|
-
export function normalizeEvalCommand(command) {
|
|
66
|
-
if (command === undefined) {
|
|
67
|
-
return undefined;
|
|
68
|
-
}
|
|
69
|
-
const trimmed = command.trim();
|
|
70
|
-
return trimmed.length > 0 ? trimmed : undefined;
|
|
71
|
-
}
|
|
72
|
-
export function sanitizeSlugForFilename(slug) {
|
|
73
|
-
return slug.replace(/[^a-z0-9.-]/gu, "-");
|
|
74
|
-
}
|
package/dist/evals/runner.d.ts
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { type EnvironmentConfig } from "../configs/environment/types.js";
|
|
2
|
-
import type { AgentEvalResult, EvalDefinition } from "../configs/evals/types.js";
|
|
3
|
-
interface ExecuteEvaluationsOptions {
|
|
4
|
-
evaluations: readonly EvalDefinition[];
|
|
5
|
-
cwd: string;
|
|
6
|
-
root: string;
|
|
7
|
-
logsDirectory: string;
|
|
8
|
-
env?: NodeJS.ProcessEnv;
|
|
9
|
-
environment: EnvironmentConfig;
|
|
10
|
-
}
|
|
11
|
-
export interface ExecuteEvaluationsResult {
|
|
12
|
-
results: AgentEvalResult[];
|
|
13
|
-
warnings: string[];
|
|
14
|
-
}
|
|
15
|
-
export declare function executeEvaluations(options: ExecuteEvaluationsOptions): Promise<ExecuteEvaluationsResult>;
|
|
16
|
-
export {};
|