@oscharko-dev/keiko 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/NOTICE +7 -0
- package/README.md +621 -0
- package/TRADEMARKS.md +41 -0
- package/dist/audit/aggregate.d.ts +5 -0
- package/dist/audit/aggregate.js +25 -0
- package/dist/audit/build.d.ts +2 -0
- package/dist/audit/build.js +224 -0
- package/dist/audit/errors.d.ts +25 -0
- package/dist/audit/errors.js +39 -0
- package/dist/audit/index-api.d.ts +14 -0
- package/dist/audit/index-api.js +131 -0
- package/dist/audit/index.d.ts +12 -0
- package/dist/audit/index.js +17 -0
- package/dist/audit/persist.d.ts +8 -0
- package/dist/audit/persist.js +40 -0
- package/dist/audit/redaction.d.ts +3 -0
- package/dist/audit/redaction.js +61 -0
- package/dist/audit/report.d.ts +18 -0
- package/dist/audit/report.js +50 -0
- package/dist/audit/retention.d.ts +3 -0
- package/dist/audit/retention.js +95 -0
- package/dist/audit/runid.d.ts +1 -0
- package/dist/audit/runid.js +29 -0
- package/dist/audit/side-file.d.ts +12 -0
- package/dist/audit/side-file.js +82 -0
- package/dist/audit/store.d.ts +12 -0
- package/dist/audit/store.js +198 -0
- package/dist/audit/types.d.ts +188 -0
- package/dist/audit/types.js +8 -0
- package/dist/audit/workflow-evidence.d.ts +27 -0
- package/dist/audit/workflow-evidence.js +145 -0
- package/dist/cli/context.d.ts +2 -0
- package/dist/cli/context.js +102 -0
- package/dist/cli/evaluate.d.ts +7 -0
- package/dist/cli/evaluate.js +207 -0
- package/dist/cli/evidence.d.ts +8 -0
- package/dist/cli/evidence.js +88 -0
- package/dist/cli/gateway-config.d.ts +10 -0
- package/dist/cli/gateway-config.js +12 -0
- package/dist/cli/gen-tests.d.ts +7 -0
- package/dist/cli/gen-tests.js +208 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +14 -0
- package/dist/cli/investigate.d.ts +8 -0
- package/dist/cli/investigate.js +242 -0
- package/dist/cli/models.d.ts +3 -0
- package/dist/cli/models.js +64 -0
- package/dist/cli/run.d.ts +7 -0
- package/dist/cli/run.js +187 -0
- package/dist/cli/runner.d.ts +6 -0
- package/dist/cli/runner.js +83 -0
- package/dist/cli/ui.d.ts +31 -0
- package/dist/cli/ui.js +240 -0
- package/dist/cli/verify.d.ts +2 -0
- package/dist/cli/verify.js +103 -0
- package/dist/evaluations/fixtures/bug-investigation/happy-path.d.ts +2 -0
- package/dist/evaluations/fixtures/bug-investigation/happy-path.js +66 -0
- package/dist/evaluations/fixtures/bug-investigation/investigation-only.d.ts +2 -0
- package/dist/evaluations/fixtures/bug-investigation/investigation-only.js +39 -0
- package/dist/evaluations/fixtures/bug-investigation/unsafe-action.d.ts +2 -0
- package/dist/evaluations/fixtures/bug-investigation/unsafe-action.js +37 -0
- package/dist/evaluations/fixtures/index.d.ts +7 -0
- package/dist/evaluations/fixtures/index.js +35 -0
- package/dist/evaluations/fixtures/support.d.ts +5 -0
- package/dist/evaluations/fixtures/support.js +42 -0
- package/dist/evaluations/fixtures/unit-tests/happy-path.d.ts +2 -0
- package/dist/evaluations/fixtures/unit-tests/happy-path.js +40 -0
- package/dist/evaluations/fixtures/unit-tests/retry-then-accept.d.ts +2 -0
- package/dist/evaluations/fixtures/unit-tests/retry-then-accept.js +39 -0
- package/dist/evaluations/fixtures/unit-tests/unsafe-action.d.ts +2 -0
- package/dist/evaluations/fixtures/unit-tests/unsafe-action.js +32 -0
- package/dist/evaluations/index.d.ts +12 -0
- package/dist/evaluations/index.js +12 -0
- package/dist/evaluations/manifest-check.d.ts +1 -0
- package/dist/evaluations/manifest-check.js +48 -0
- package/dist/evaluations/model-provider.d.ts +12 -0
- package/dist/evaluations/model-provider.js +26 -0
- package/dist/evaluations/render.d.ts +2 -0
- package/dist/evaluations/render.js +59 -0
- package/dist/evaluations/runner-support.d.ts +27 -0
- package/dist/evaluations/runner-support.js +163 -0
- package/dist/evaluations/runner.d.ts +20 -0
- package/dist/evaluations/runner.js +174 -0
- package/dist/evaluations/scorer.d.ts +14 -0
- package/dist/evaluations/scorer.js +131 -0
- package/dist/evaluations/scripted-model.d.ts +6 -0
- package/dist/evaluations/scripted-model.js +26 -0
- package/dist/evaluations/surface-parity.d.ts +2 -0
- package/dist/evaluations/surface-parity.js +184 -0
- package/dist/evaluations/types.d.ts +74 -0
- package/dist/evaluations/types.js +16 -0
- package/dist/gateway/capabilities.d.ts +11 -0
- package/dist/gateway/capabilities.data.d.ts +2 -0
- package/dist/gateway/capabilities.data.js +203 -0
- package/dist/gateway/capabilities.js +41 -0
- package/dist/gateway/config.d.ts +15 -0
- package/dist/gateway/config.js +154 -0
- package/dist/gateway/errors.d.ts +72 -0
- package/dist/gateway/errors.js +82 -0
- package/dist/gateway/gateway.d.ts +19 -0
- package/dist/gateway/gateway.js +94 -0
- package/dist/gateway/index.d.ts +10 -0
- package/dist/gateway/index.js +11 -0
- package/dist/gateway/model-selection.d.ts +9 -0
- package/dist/gateway/model-selection.js +36 -0
- package/dist/gateway/normalize.d.ts +7 -0
- package/dist/gateway/normalize.js +93 -0
- package/dist/gateway/openai-adapter.d.ts +20 -0
- package/dist/gateway/openai-adapter.js +263 -0
- package/dist/gateway/redaction.d.ts +1 -0
- package/dist/gateway/redaction.js +51 -0
- package/dist/gateway/resilience.d.ts +24 -0
- package/dist/gateway/resilience.js +166 -0
- package/dist/gateway/types.d.ts +108 -0
- package/dist/gateway/types.js +2 -0
- package/dist/harness/adapters.d.ts +23 -0
- package/dist/harness/adapters.js +38 -0
- package/dist/harness/context.d.ts +33 -0
- package/dist/harness/context.js +21 -0
- package/dist/harness/emitter.d.ts +15 -0
- package/dist/harness/emitter.js +72 -0
- package/dist/harness/errors.d.ts +21 -0
- package/dist/harness/errors.js +39 -0
- package/dist/harness/executor.d.ts +3 -0
- package/dist/harness/executor.js +211 -0
- package/dist/harness/fingerprint.d.ts +6 -0
- package/dist/harness/fingerprint.js +43 -0
- package/dist/harness/index.d.ts +9 -0
- package/dist/harness/index.js +13 -0
- package/dist/harness/loop.d.ts +3 -0
- package/dist/harness/loop.js +159 -0
- package/dist/harness/patcher.d.ts +4 -0
- package/dist/harness/patcher.js +49 -0
- package/dist/harness/planner.d.ts +3 -0
- package/dist/harness/planner.js +21 -0
- package/dist/harness/ports.d.ts +61 -0
- package/dist/harness/ports.js +4 -0
- package/dist/harness/session.d.ts +25 -0
- package/dist/harness/session.js +116 -0
- package/dist/harness/sinks.d.ts +30 -0
- package/dist/harness/sinks.js +72 -0
- package/dist/harness/tasks/explain-plan.d.ts +3 -0
- package/dist/harness/tasks/explain-plan.js +29 -0
- package/dist/harness/tasks/generate-unit-tests.d.ts +3 -0
- package/dist/harness/tasks/generate-unit-tests.js +28 -0
- package/dist/harness/tasks/investigate-bug.d.ts +3 -0
- package/dist/harness/tasks/investigate-bug.js +31 -0
- package/dist/harness/tasks/policy.d.ts +11 -0
- package/dist/harness/tasks/policy.js +22 -0
- package/dist/harness/tasks/verify.d.ts +3 -0
- package/dist/harness/tasks/verify.js +16 -0
- package/dist/harness/types.d.ts +270 -0
- package/dist/harness/types.js +33 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +36 -0
- package/dist/sdk/index.d.ts +9 -0
- package/dist/sdk/index.js +37 -0
- package/dist/sdk/run-agent.d.ts +16 -0
- package/dist/sdk/run-agent.js +56 -0
- package/dist/tools/browser/cdp-client.d.ts +35 -0
- package/dist/tools/browser/cdp-client.js +218 -0
- package/dist/tools/browser/errors.d.ts +25 -0
- package/dist/tools/browser/errors.js +55 -0
- package/dist/tools/browser/index.d.ts +5 -0
- package/dist/tools/browser/index.js +6 -0
- package/dist/tools/browser/session.d.ts +44 -0
- package/dist/tools/browser/session.js +748 -0
- package/dist/tools/browser/types.d.ts +48 -0
- package/dist/tools/browser/types.js +2 -0
- package/dist/tools/browser/validators.d.ts +5 -0
- package/dist/tools/browser/validators.js +97 -0
- package/dist/tools/errors.d.ts +59 -0
- package/dist/tools/errors.js +94 -0
- package/dist/tools/exec.d.ts +42 -0
- package/dist/tools/exec.js +327 -0
- package/dist/tools/index.d.ts +11 -0
- package/dist/tools/index.js +14 -0
- package/dist/tools/patch-content.d.ts +10 -0
- package/dist/tools/patch-content.js +126 -0
- package/dist/tools/patch-normalize.d.ts +1 -0
- package/dist/tools/patch-normalize.js +80 -0
- package/dist/tools/patch-parse.d.ts +8 -0
- package/dist/tools/patch-parse.js +201 -0
- package/dist/tools/patch.d.ts +18 -0
- package/dist/tools/patch.js +403 -0
- package/dist/tools/registry.d.ts +36 -0
- package/dist/tools/registry.js +231 -0
- package/dist/tools/sandbox.d.ts +8 -0
- package/dist/tools/sandbox.js +121 -0
- package/dist/tools/schemas.d.ts +2 -0
- package/dist/tools/schemas.js +51 -0
- package/dist/tools/terminal-policy.d.ts +9 -0
- package/dist/tools/terminal-policy.js +313 -0
- package/dist/tools/types.d.ts +99 -0
- package/dist/tools/types.js +103 -0
- package/dist/tools/writer.d.ts +7 -0
- package/dist/tools/writer.js +20 -0
- package/dist/ui/browser.d.ts +10 -0
- package/dist/ui/browser.js +231 -0
- package/dist/ui/chat-handlers.d.ts +4 -0
- package/dist/ui/chat-handlers.js +281 -0
- package/dist/ui/csp-hashes.json +17 -0
- package/dist/ui/csp.d.ts +2 -0
- package/dist/ui/csp.js +66 -0
- package/dist/ui/deps.d.ts +34 -0
- package/dist/ui/deps.js +137 -0
- package/dist/ui/evidence.d.ts +27 -0
- package/dist/ui/evidence.js +142 -0
- package/dist/ui/files-deny.d.ts +2 -0
- package/dist/ui/files-deny.js +12 -0
- package/dist/ui/files.d.ts +65 -0
- package/dist/ui/files.js +492 -0
- package/dist/ui/headers.d.ts +2 -0
- package/dist/ui/headers.js +21 -0
- package/dist/ui/host-check.d.ts +2 -0
- package/dist/ui/host-check.js +58 -0
- package/dist/ui/index.d.ts +20 -0
- package/dist/ui/index.js +23 -0
- package/dist/ui/load-csp.d.ts +1 -0
- package/dist/ui/load-csp.js +28 -0
- package/dist/ui/read-handlers.d.ts +8 -0
- package/dist/ui/read-handlers.js +247 -0
- package/dist/ui/routes.d.ts +36 -0
- package/dist/ui/routes.js +129 -0
- package/dist/ui/run-engine.d.ts +20 -0
- package/dist/ui/run-engine.js +345 -0
- package/dist/ui/run-handlers.d.ts +8 -0
- package/dist/ui/run-handlers.js +431 -0
- package/dist/ui/run-request.d.ts +13 -0
- package/dist/ui/run-request.js +219 -0
- package/dist/ui/runs.d.ts +43 -0
- package/dist/ui/runs.js +92 -0
- package/dist/ui/server.d.ts +11 -0
- package/dist/ui/server.js +143 -0
- package/dist/ui/sink.d.ts +27 -0
- package/dist/ui/sink.js +80 -0
- package/dist/ui/sse.d.ts +7 -0
- package/dist/ui/sse.js +27 -0
- package/dist/ui/static/404.html +1 -0
- package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_buildManifest.js +1 -0
- package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_ssgManifest.js +1 -0
- package/dist/ui/static/_next/static/chunks/255-d47fd57964443afe.js +1 -0
- package/dist/ui/static/_next/static/chunks/4-be1fef693af8e088.js +1 -0
- package/dist/ui/static/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/_not-found/page-75825b09bcecad97.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/launch/page-9c86a13c29884245.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/layout-bdea63fe87947d50.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/page-4168c12c68b7a853.js +1 -0
- package/dist/ui/static/_next/static/chunks/framework-a6e0b7e30f98059a.js +1 -0
- package/dist/ui/static/_next/static/chunks/main-778a50aebff02192.js +1 -0
- package/dist/ui/static/_next/static/chunks/main-app-30679af7240d63e9.js +1 -0
- package/dist/ui/static/_next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
- package/dist/ui/static/_next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
- package/dist/ui/static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- package/dist/ui/static/_next/static/chunks/webpack-4a462cecab786e93.js +1 -0
- package/dist/ui/static/_next/static/css/be7cb54d5c5673b6.css +1 -0
- package/dist/ui/static/assets/editors/goland.svg +35 -0
- package/dist/ui/static/assets/editors/intellij.svg +39 -0
- package/dist/ui/static/assets/editors/pycharm.svg +58 -0
- package/dist/ui/static/assets/editors/rustrover.svg +19 -0
- package/dist/ui/static/assets/editors/vscode.svg +1 -0
- package/dist/ui/static/assets/editors/webstorm.svg +21 -0
- package/dist/ui/static/assets/icons/anthropic.svg +1 -0
- package/dist/ui/static/assets/icons/brave.svg +1 -0
- package/dist/ui/static/assets/icons/css3.svg +1 -0
- package/dist/ui/static/assets/icons/docker.svg +1 -0
- package/dist/ui/static/assets/icons/git.svg +1 -0
- package/dist/ui/static/assets/icons/github.svg +1 -0
- package/dist/ui/static/assets/icons/go.svg +1 -0
- package/dist/ui/static/assets/icons/gradle.svg +1 -0
- package/dist/ui/static/assets/icons/grafana.svg +1 -0
- package/dist/ui/static/assets/icons/graphql.svg +1 -0
- package/dist/ui/static/assets/icons/html5.svg +1 -0
- package/dist/ui/static/assets/icons/image.svg +1 -0
- package/dist/ui/static/assets/icons/java.svg +1 -0
- package/dist/ui/static/assets/icons/javascript.svg +1 -0
- package/dist/ui/static/assets/icons/json.svg +1 -0
- package/dist/ui/static/assets/icons/kafka.svg +1 -0
- package/dist/ui/static/assets/icons/kubernetes.svg +1 -0
- package/dist/ui/static/assets/icons/linear.svg +1 -0
- package/dist/ui/static/assets/icons/markdown.svg +1 -0
- package/dist/ui/static/assets/icons/nginx.svg +1 -0
- package/dist/ui/static/assets/icons/nodejs.svg +1 -0
- package/dist/ui/static/assets/icons/notion.svg +1 -0
- package/dist/ui/static/assets/icons/openai.svg +1 -0
- package/dist/ui/static/assets/icons/playwright.svg +1 -0
- package/dist/ui/static/assets/icons/postgresql.svg +1 -0
- package/dist/ui/static/assets/icons/prometheus.svg +1 -0
- package/dist/ui/static/assets/icons/properties.svg +1 -0
- package/dist/ui/static/assets/icons/puppeteer.svg +1 -0
- package/dist/ui/static/assets/icons/python.svg +1 -0
- package/dist/ui/static/assets/icons/react.svg +1 -0
- package/dist/ui/static/assets/icons/redis.svg +1 -0
- package/dist/ui/static/assets/icons/rust.svg +1 -0
- package/dist/ui/static/assets/icons/sentry.svg +1 -0
- package/dist/ui/static/assets/icons/slack.svg +1 -0
- package/dist/ui/static/assets/icons/spring.svg +1 -0
- package/dist/ui/static/assets/icons/typescript.svg +1 -0
- package/dist/ui/static/assets/icons/upstash.svg +1 -0
- package/dist/ui/static/assets/icons/yaml.svg +1 -0
- package/dist/ui/static/assets/keiko-logo.svg +10 -0
- package/dist/ui/static/index.html +1 -0
- package/dist/ui/static/index.txt +19 -0
- package/dist/ui/static/keiko-logo.svg +10 -0
- package/dist/ui/static/launch.html +1 -0
- package/dist/ui/static/launch.txt +19 -0
- package/dist/ui/static.d.ts +3 -0
- package/dist/ui/static.js +72 -0
- package/dist/ui/store/chats.d.ts +14 -0
- package/dist/ui/store/chats.js +110 -0
- package/dist/ui/store/db.d.ts +6 -0
- package/dist/ui/store/db.js +182 -0
- package/dist/ui/store/errors.d.ts +12 -0
- package/dist/ui/store/errors.js +30 -0
- package/dist/ui/store/index.d.ts +6 -0
- package/dist/ui/store/index.js +6 -0
- package/dist/ui/store/messages.d.ts +5 -0
- package/dist/ui/store/messages.js +137 -0
- package/dist/ui/store/paths.d.ts +4 -0
- package/dist/ui/store/paths.js +69 -0
- package/dist/ui/store/projects.d.ts +7 -0
- package/dist/ui/store/projects.js +61 -0
- package/dist/ui/store/schema.d.ts +3 -0
- package/dist/ui/store/schema.js +77 -0
- package/dist/ui/store/types.d.ts +80 -0
- package/dist/ui/store/types.js +3 -0
- package/dist/ui/store/validation.d.ts +4 -0
- package/dist/ui/store/validation.js +72 -0
- package/dist/ui/store-handlers.d.ts +16 -0
- package/dist/ui/store-handlers.js +465 -0
- package/dist/ui/terminal-errors.d.ts +21 -0
- package/dist/ui/terminal-errors.js +45 -0
- package/dist/ui/terminal-evidence.d.ts +20 -0
- package/dist/ui/terminal-evidence.js +65 -0
- package/dist/ui/terminal-routes.d.ts +9 -0
- package/dist/ui/terminal-routes.js +219 -0
- package/dist/ui/terminal.d.ts +67 -0
- package/dist/ui/terminal.js +835 -0
- package/dist/verification/classify.d.ts +10 -0
- package/dist/verification/classify.js +53 -0
- package/dist/verification/detect.d.ts +4 -0
- package/dist/verification/detect.js +81 -0
- package/dist/verification/errors.d.ts +11 -0
- package/dist/verification/errors.js +21 -0
- package/dist/verification/index.d.ts +17 -0
- package/dist/verification/index.js +13 -0
- package/dist/verification/limits.d.ts +3 -0
- package/dist/verification/limits.js +40 -0
- package/dist/verification/monitor.d.ts +4 -0
- package/dist/verification/monitor.js +58 -0
- package/dist/verification/orchestrator.d.ts +16 -0
- package/dist/verification/orchestrator.js +363 -0
- package/dist/verification/plan.d.ts +9 -0
- package/dist/verification/plan.js +125 -0
- package/dist/verification/summary.d.ts +40 -0
- package/dist/verification/summary.js +67 -0
- package/dist/verification/types.d.ts +63 -0
- package/dist/verification/types.js +13 -0
- package/dist/workflows/bug-investigation/context.d.ts +7 -0
- package/dist/workflows/bug-investigation/context.js +119 -0
- package/dist/workflows/bug-investigation/descriptor.d.ts +3 -0
- package/dist/workflows/bug-investigation/descriptor.js +46 -0
- package/dist/workflows/bug-investigation/emit.d.ts +12 -0
- package/dist/workflows/bug-investigation/emit.js +35 -0
- package/dist/workflows/bug-investigation/events.d.ts +81 -0
- package/dist/workflows/bug-investigation/events.js +9 -0
- package/dist/workflows/bug-investigation/failure-parse.d.ts +3 -0
- package/dist/workflows/bug-investigation/failure-parse.js +154 -0
- package/dist/workflows/bug-investigation/guard.d.ts +2 -0
- package/dist/workflows/bug-investigation/guard.js +69 -0
- package/dist/workflows/bug-investigation/index.d.ts +7 -0
- package/dist/workflows/bug-investigation/index.js +13 -0
- package/dist/workflows/bug-investigation/internal.d.ts +37 -0
- package/dist/workflows/bug-investigation/internal.js +64 -0
- package/dist/workflows/bug-investigation/model-loop.d.ts +4 -0
- package/dist/workflows/bug-investigation/model-loop.js +223 -0
- package/dist/workflows/bug-investigation/parse.d.ts +3 -0
- package/dist/workflows/bug-investigation/parse.js +123 -0
- package/dist/workflows/bug-investigation/prompt.d.ts +4 -0
- package/dist/workflows/bug-investigation/prompt.js +107 -0
- package/dist/workflows/bug-investigation/report.d.ts +23 -0
- package/dist/workflows/bug-investigation/report.js +151 -0
- package/dist/workflows/bug-investigation/stages.d.ts +13 -0
- package/dist/workflows/bug-investigation/stages.js +242 -0
- package/dist/workflows/bug-investigation/types.d.ts +91 -0
- package/dist/workflows/bug-investigation/types.js +14 -0
- package/dist/workflows/bug-investigation/verify-stage.d.ts +10 -0
- package/dist/workflows/bug-investigation/verify-stage.js +91 -0
- package/dist/workflows/bug-investigation/workflow.d.ts +2 -0
- package/dist/workflows/bug-investigation/workflow.js +74 -0
- package/dist/workflows/descriptor.d.ts +20 -0
- package/dist/workflows/descriptor.js +8 -0
- package/dist/workflows/index.d.ts +3 -0
- package/dist/workflows/index.js +2 -0
- package/dist/workflows/unit-tests/context.d.ts +7 -0
- package/dist/workflows/unit-tests/context.js +129 -0
- package/dist/workflows/unit-tests/conventions.d.ts +4 -0
- package/dist/workflows/unit-tests/conventions.js +87 -0
- package/dist/workflows/unit-tests/descriptor.d.ts +4 -0
- package/dist/workflows/unit-tests/descriptor.js +43 -0
- package/dist/workflows/unit-tests/emit.d.ts +12 -0
- package/dist/workflows/unit-tests/emit.js +35 -0
- package/dist/workflows/unit-tests/events.d.ts +78 -0
- package/dist/workflows/unit-tests/events.js +7 -0
- package/dist/workflows/unit-tests/index.d.ts +6 -0
- package/dist/workflows/unit-tests/index.js +10 -0
- package/dist/workflows/unit-tests/internal.d.ts +35 -0
- package/dist/workflows/unit-tests/internal.js +43 -0
- package/dist/workflows/unit-tests/model-loop.d.ts +4 -0
- package/dist/workflows/unit-tests/model-loop.js +95 -0
- package/dist/workflows/unit-tests/parse.d.ts +6 -0
- package/dist/workflows/unit-tests/parse.js +68 -0
- package/dist/workflows/unit-tests/prompt.d.ts +4 -0
- package/dist/workflows/unit-tests/prompt.js +71 -0
- package/dist/workflows/unit-tests/report.d.ts +21 -0
- package/dist/workflows/unit-tests/report.js +90 -0
- package/dist/workflows/unit-tests/stages.d.ts +9 -0
- package/dist/workflows/unit-tests/stages.js +155 -0
- package/dist/workflows/unit-tests/types.d.ts +70 -0
- package/dist/workflows/unit-tests/types.js +11 -0
- package/dist/workflows/unit-tests/verify-stage.d.ts +9 -0
- package/dist/workflows/unit-tests/verify-stage.js +56 -0
- package/dist/workflows/unit-tests/workflow.d.ts +2 -0
- package/dist/workflows/unit-tests/workflow.js +58 -0
- package/dist/workspace/contextPack.d.ts +9 -0
- package/dist/workspace/contextPack.js +94 -0
- package/dist/workspace/detect.d.ts +3 -0
- package/dist/workspace/detect.js +135 -0
- package/dist/workspace/discovery.d.ts +9 -0
- package/dist/workspace/discovery.js +167 -0
- package/dist/workspace/errors.d.ts +39 -0
- package/dist/workspace/errors.js +66 -0
- package/dist/workspace/fs.d.ts +21 -0
- package/dist/workspace/fs.js +36 -0
- package/dist/workspace/ignore.d.ts +14 -0
- package/dist/workspace/ignore.js +176 -0
- package/dist/workspace/index.d.ts +11 -0
- package/dist/workspace/index.js +13 -0
- package/dist/workspace/paths.d.ts +2 -0
- package/dist/workspace/paths.js +38 -0
- package/dist/workspace/realpath.d.ts +7 -0
- package/dist/workspace/realpath.js +72 -0
- package/dist/workspace/retrieval.d.ts +9 -0
- package/dist/workspace/retrieval.js +74 -0
- package/dist/workspace/summary.d.ts +3 -0
- package/dist/workspace/summary.js +54 -0
- package/dist/workspace/types.d.ts +103 -0
- package/dist/workspace/types.js +27 -0
- package/package.json +58 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// All Evidence* interfaces, the retention/redaction config, the injectable deps, and the frozen
|
|
2
|
+
// EVIDENCE_SCHEMA_VERSION / DEFAULT_RETENTION tables (ADR-0010 D2/D6/D10). No runtime logic beyond
|
|
3
|
+
// the two frozen tables. Everything here is plain-JSON, deeply readonly, and JSON-serializable:
|
|
4
|
+
// timestamps are epoch-ms numbers sourced from events/RunResult, never Date objects.
|
|
5
|
+
// The schema discriminant — distinct from the harness event `schemaVersion`. A breaking change
|
|
6
|
+
// produces "2" as a NEW union member rather than mutating "1" (ADR-0010 D2).
|
|
7
|
+
export const EVIDENCE_SCHEMA_VERSION = "1";
|
|
8
|
+
export const DEFAULT_RETENTION = { maxRuns: 50 };
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { type EvidenceReport } from "./report.js";
|
|
2
|
+
import { type EvidenceManifest } from "./types.js";
|
|
3
|
+
import type { EvidenceStore } from "./store.js";
|
|
4
|
+
import type { EnvSource } from "../gateway/index.js";
|
|
5
|
+
export type WorkflowRunKind = "unit-tests" | "bug-investigation";
|
|
6
|
+
export type WorkflowTerminalStatus = "completed" | "cancelled" | "failed";
|
|
7
|
+
export interface EvidencePersistContext {
|
|
8
|
+
readonly store: EvidenceStore;
|
|
9
|
+
readonly env: EnvSource;
|
|
10
|
+
readonly additionalSecrets?: readonly string[] | undefined;
|
|
11
|
+
}
|
|
12
|
+
export interface WorkflowRunIdentity {
|
|
13
|
+
readonly runId: string;
|
|
14
|
+
readonly fingerprint: string;
|
|
15
|
+
readonly modelId: string;
|
|
16
|
+
readonly kind: WorkflowRunKind;
|
|
17
|
+
readonly status: WorkflowTerminalStatus;
|
|
18
|
+
readonly startedAt: number;
|
|
19
|
+
readonly finishedAt: number;
|
|
20
|
+
readonly workspaceRoot?: string | undefined;
|
|
21
|
+
}
|
|
22
|
+
export interface WorkflowEventLike {
|
|
23
|
+
readonly type: string;
|
|
24
|
+
}
|
|
25
|
+
export declare function foldWorkflowUsage(events: readonly WorkflowEventLike[]): EvidenceManifest["usageTotals"];
|
|
26
|
+
export declare function buildWorkflowManifest(identity: WorkflowRunIdentity, events: readonly WorkflowEventLike[], report: unknown): EvidenceManifest;
|
|
27
|
+
export declare function persistWorkflowEvidence(identity: WorkflowRunIdentity, report: unknown, events: readonly WorkflowEventLike[], ctx: EvidencePersistContext): EvidenceReport;
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
// Shared workflow→EvidenceManifest mapping (ADR-0010 + ADR-0011 AC5 + ADR-0012 D9/C2). This is the
|
|
2
|
+
// PURE, surface-agnostic core that folds a terminated workflow run (its typed report + buffered
|
|
3
|
+
// events) into a redacted, versioned EvidenceManifest and writes it through the #10 EvidenceStore.
|
|
4
|
+
//
|
|
5
|
+
// It was extracted from src/ui/evidence.ts so BOTH the UI BFF and the evaluation harness build the
|
|
6
|
+
// manifest from one implementation. The dependency direction is preserved: this module lives in the
|
|
7
|
+
// audit layer and imports only audit + harness + gateway primitives. It defines its own narrow
|
|
8
|
+
// `WorkflowRunKind` / `WorkflowTerminalStatus` so it never depends on src/ui types. The UI
|
|
9
|
+
// re-exports it (behaviour-preserving); the evaluation runner imports it directly.
|
|
10
|
+
//
|
|
11
|
+
import { buildEvidenceReport } from "./report.js";
|
|
12
|
+
import { resolveCostClass } from "./aggregate.js";
|
|
13
|
+
import { createAuditRedactor, deepRedactStrings } from "./redaction.js";
|
|
14
|
+
import { EVIDENCE_SCHEMA_VERSION } from "./types.js";
|
|
15
|
+
import { HARNESS_VERSION } from "../harness/index.js";
|
|
16
|
+
const KIND_TO_TASK_TYPE = {
|
|
17
|
+
"unit-tests": "generate-unit-tests",
|
|
18
|
+
"bug-investigation": "investigate-bug",
|
|
19
|
+
};
|
|
20
|
+
// Folds the buffered `workflow:model:call:completed` events into usage totals. The workflow event
|
|
21
|
+
// carries token/latency fields at the TOP level (not under `usage` like the harness event), so this
|
|
22
|
+
// sums the four dimensions directly rather than reusing the audit `aggregateUsage` fold.
|
|
23
|
+
export function foldWorkflowUsage(events) {
|
|
24
|
+
let promptTokens = 0;
|
|
25
|
+
let completionTokens = 0;
|
|
26
|
+
let requestCount = 0;
|
|
27
|
+
let totalLatencyMs = 0;
|
|
28
|
+
for (const event of events) {
|
|
29
|
+
if (event.type !== "workflow:model:call:completed" &&
|
|
30
|
+
event.type !== "bug:model:call:completed") {
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
const record = event;
|
|
34
|
+
promptTokens += numberOf(record.promptTokens);
|
|
35
|
+
completionTokens += numberOf(record.completionTokens);
|
|
36
|
+
totalLatencyMs += numberOf(record.latencyMs);
|
|
37
|
+
requestCount += 1;
|
|
38
|
+
}
|
|
39
|
+
return { promptTokens, completionTokens, requestCount, totalLatencyMs };
|
|
40
|
+
}
|
|
41
|
+
function numberOf(value) {
|
|
42
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
43
|
+
}
|
|
44
|
+
export function buildWorkflowManifest(identity, events, report) {
|
|
45
|
+
return {
|
|
46
|
+
evidenceSchemaVersion: EVIDENCE_SCHEMA_VERSION,
|
|
47
|
+
run: {
|
|
48
|
+
runId: identity.runId,
|
|
49
|
+
fingerprint: identity.fingerprint,
|
|
50
|
+
harnessVersion: HARNESS_VERSION,
|
|
51
|
+
taskType: KIND_TO_TASK_TYPE[identity.kind],
|
|
52
|
+
outcome: identity.status,
|
|
53
|
+
startedAt: identity.startedAt,
|
|
54
|
+
finishedAt: identity.finishedAt,
|
|
55
|
+
durationMs: Math.max(0, identity.finishedAt - identity.startedAt),
|
|
56
|
+
},
|
|
57
|
+
model: { modelId: identity.modelId, costClass: resolveCostClass(identity.modelId) },
|
|
58
|
+
usageTotals: foldWorkflowUsage(events),
|
|
59
|
+
...(contextOf(identity.workspaceRoot) === undefined
|
|
60
|
+
? {}
|
|
61
|
+
: { context: contextOf(identity.workspaceRoot) }),
|
|
62
|
+
stateTransitions: [],
|
|
63
|
+
toolCalls: [],
|
|
64
|
+
commandExecutions: [],
|
|
65
|
+
verification: verificationOf(report),
|
|
66
|
+
patch: patchOf(report),
|
|
67
|
+
failure: undefined,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
function contextOf(workspaceRoot) {
|
|
71
|
+
if (workspaceRoot === undefined) {
|
|
72
|
+
return undefined;
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
workspaceRoot,
|
|
76
|
+
totalCandidates: 0,
|
|
77
|
+
usedBytes: 0,
|
|
78
|
+
budgetBytes: 0,
|
|
79
|
+
droppedForBudget: 0,
|
|
80
|
+
entries: [],
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
// Builds, redacts, writes the workflow manifest through the store, and returns the structured
|
|
84
|
+
// EvidenceReport. Errors intentionally surface to the caller so UI/evaluation paths cannot silently
|
|
85
|
+
// claim a terminal run without a durable evidence artifact.
|
|
86
|
+
export function persistWorkflowEvidence(identity, report, events, ctx) {
|
|
87
|
+
const manifest = buildWorkflowManifest(identity, events, report);
|
|
88
|
+
const redactor = createAuditRedactor({ additionalSecrets: ctx.additionalSecrets ?? [] }, ctx.env);
|
|
89
|
+
const redacted = deepRedactStrings(manifest, redactor);
|
|
90
|
+
const location = ctx.store.put(redacted.run.runId, JSON.stringify(redacted));
|
|
91
|
+
return buildEvidenceReport(redacted, location);
|
|
92
|
+
}
|
|
93
|
+
function isRecord(value) {
|
|
94
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
95
|
+
}
|
|
96
|
+
// Extracts the verification audit summary from a workflow report when present. The summary is already
|
|
97
|
+
// the audit shape; the deep redact in persist re-scrubs every string leaf for defense in depth.
|
|
98
|
+
function verificationOf(report) {
|
|
99
|
+
if (!isRecord(report)) {
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
const summary = report.verificationSummary ?? verifiedVerificationOf(report);
|
|
103
|
+
return isRecord(summary) ? summary : undefined;
|
|
104
|
+
}
|
|
105
|
+
function verifiedVerificationOf(report) {
|
|
106
|
+
const verified = report.verified;
|
|
107
|
+
return isRecord(verified) ? verified.verification : undefined;
|
|
108
|
+
}
|
|
109
|
+
// Builds patch metadata (counts/bytes only, never the raw diff) from a workflow report. unit-tests
|
|
110
|
+
// reports carry `addedTestFiles`; bug-investigation reports carry `changedFiles`.
|
|
111
|
+
function patchOf(report) {
|
|
112
|
+
if (!isRecord(report)) {
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
const proposedDiff = report.proposedDiff;
|
|
116
|
+
const proposed = typeof proposedDiff === "string" && proposedDiff.length > 0;
|
|
117
|
+
const changedFiles = changedFileCount(report);
|
|
118
|
+
if (!proposed && changedFiles === 0) {
|
|
119
|
+
return undefined;
|
|
120
|
+
}
|
|
121
|
+
return {
|
|
122
|
+
proposed,
|
|
123
|
+
applied: patchApplied(report),
|
|
124
|
+
targetFileCount: changedFiles,
|
|
125
|
+
patchBytes: typeof proposedDiff === "string" ? Buffer.byteLength(proposedDiff, "utf8") : 0,
|
|
126
|
+
changedFiles,
|
|
127
|
+
created: 0,
|
|
128
|
+
deleted: 0,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
function patchApplied(report) {
|
|
132
|
+
if (report.status === "completed" || report.status === "fix-applied") {
|
|
133
|
+
return true;
|
|
134
|
+
}
|
|
135
|
+
const verified = report.verified;
|
|
136
|
+
return isRecord(verified) && verified.patchApplied === true;
|
|
137
|
+
}
|
|
138
|
+
function changedFileCount(report) {
|
|
139
|
+
const added = report.addedTestFiles;
|
|
140
|
+
if (Array.isArray(added)) {
|
|
141
|
+
return added.length;
|
|
142
|
+
}
|
|
143
|
+
const changed = report.changedFiles;
|
|
144
|
+
return Array.isArray(changed) ? changed.length : 0;
|
|
145
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// `keiko context` — a DRY-RUN-BY-CONSTRUCTION repository-context summary. It detects a
|
|
2
|
+
// workspace, builds a redacted structured summary (and a context pack when --task/--budget is
|
|
3
|
+
// given), and prints a human table or JSON. It NEVER constructs an agent session and NEVER
|
|
4
|
+
// calls a model: there is no import of the harness/gateway run path in this file.
|
|
5
|
+
import { detectWorkspace } from "../workspace/detect.js";
|
|
6
|
+
import { discoverWithStats } from "../workspace/discovery.js";
|
|
7
|
+
import { buildContextPackFromFiles } from "../workspace/contextPack.js";
|
|
8
|
+
import { buildWorkspaceSummary } from "../workspace/summary.js";
|
|
9
|
+
import { WorkspaceError } from "../workspace/errors.js";
|
|
10
|
+
import { DEFAULT_CONTEXT_REQUEST } from "../workspace/types.js";
|
|
11
|
+
const USAGE = `Usage:
|
|
12
|
+
keiko context [--dir PATH] [--task TEXT] [--budget BYTES] [--json]
|
|
13
|
+
|
|
14
|
+
Detects the workspace, prints a redacted context summary, and (with --task or
|
|
15
|
+
--budget) a deterministic context pack. Dry-run by construction: no model is called.
|
|
16
|
+
`;
|
|
17
|
+
// Returns the value of a `--flag value` pair, `undefined` if absent, or `null` if the flag is
|
|
18
|
+
// present but missing its value (a usage error).
|
|
19
|
+
function flagValue(args, name) {
|
|
20
|
+
const i = args.indexOf(name);
|
|
21
|
+
if (i === -1) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
const value = args[i + 1];
|
|
25
|
+
return value === undefined || value.startsWith("--") ? null : value;
|
|
26
|
+
}
|
|
27
|
+
function parseArgs(args) {
|
|
28
|
+
const dirRaw = flagValue(args, "--dir");
|
|
29
|
+
const taskRaw = flagValue(args, "--task");
|
|
30
|
+
const budgetRaw = flagValue(args, "--budget");
|
|
31
|
+
if (dirRaw === null || taskRaw === null || budgetRaw === null) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
if (budgetRaw !== undefined && !/^[1-9][0-9]*$/.test(budgetRaw)) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
const budget = budgetRaw === undefined ? undefined : Number.parseInt(budgetRaw, 10);
|
|
38
|
+
if (budget !== undefined && !Number.isSafeInteger(budget)) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
return { dir: dirRaw ?? ".", task: taskRaw, budget, json: args.includes("--json") };
|
|
42
|
+
}
|
|
43
|
+
function buildSummary(parsed) {
|
|
44
|
+
const workspace = detectWorkspace(parsed.dir);
|
|
45
|
+
const { files, stats } = discoverWithStats(workspace, DEFAULT_CONTEXT_REQUEST.discovery);
|
|
46
|
+
const wantsPack = parsed.task !== undefined || parsed.budget !== undefined;
|
|
47
|
+
if (!wantsPack) {
|
|
48
|
+
return buildWorkspaceSummary(workspace, undefined, stats);
|
|
49
|
+
}
|
|
50
|
+
const pack = buildContextPackFromFiles(workspace, {
|
|
51
|
+
...DEFAULT_CONTEXT_REQUEST,
|
|
52
|
+
task: parsed.task,
|
|
53
|
+
budgetBytes: parsed.budget ?? DEFAULT_CONTEXT_REQUEST.budgetBytes,
|
|
54
|
+
}, files);
|
|
55
|
+
return buildWorkspaceSummary(workspace, pack, stats);
|
|
56
|
+
}
|
|
57
|
+
function renderContext(summary, io) {
|
|
58
|
+
const context = summary.context;
|
|
59
|
+
if (context === undefined) {
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
io.out(`Context: used=${String(context.usedBytes)}/${String(context.budgetBytes)} bytes, dropped=${String(context.droppedForBudget)}\n`);
|
|
63
|
+
io.out("PATH\tREASON\tSIZE\tEXCERPT-BYTES\tTRUNCATED\n");
|
|
64
|
+
for (const entry of context.entries) {
|
|
65
|
+
io.out(`${entry.path}\t${entry.selectionReason}\t${String(entry.sizeBytes)}\t${String(entry.excerptBytes)}\t${entry.truncated ? "yes" : "no"}\n`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
function renderText(summary, io) {
|
|
69
|
+
io.out(`Workspace: ${summary.root}\n`);
|
|
70
|
+
io.out(`Name: ${summary.name ?? "(none)"}\n`);
|
|
71
|
+
io.out(`Version: ${summary.version ?? "(none)"}\n`);
|
|
72
|
+
io.out(`Framework: ${summary.testFramework}\n`);
|
|
73
|
+
io.out(`Sources: ${summary.sourceDirs.join(", ") || "(none)"}\n`);
|
|
74
|
+
io.out(`Tests: ${summary.testDirs.join(", ") || "(none)"}\n`);
|
|
75
|
+
io.out(`Languages: ${summary.languages.join(", ")}\n`);
|
|
76
|
+
io.out(`Counts: discovered=${String(summary.counts.discovered)} denied=${String(summary.counts.denied)} ignored=${String(summary.counts.ignored)}\n`);
|
|
77
|
+
renderContext(summary, io);
|
|
78
|
+
}
|
|
79
|
+
export function runContextCli(args, io) {
|
|
80
|
+
const parsed = parseArgs(args);
|
|
81
|
+
if (parsed === null) {
|
|
82
|
+
io.err(USAGE);
|
|
83
|
+
return 2;
|
|
84
|
+
}
|
|
85
|
+
try {
|
|
86
|
+
const summary = buildSummary(parsed);
|
|
87
|
+
if (parsed.json) {
|
|
88
|
+
io.out(`${JSON.stringify(summary, null, 2)}\n`);
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
renderText(summary, io);
|
|
92
|
+
}
|
|
93
|
+
return 0;
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
if (error instanceof WorkspaceError) {
|
|
97
|
+
io.err(`Error [${error.code}]: ${error.message}\n`);
|
|
98
|
+
return 1;
|
|
99
|
+
}
|
|
100
|
+
throw error;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { EnvSource } from "../gateway/config.js";
|
|
2
|
+
import { type EvalRunnerDeps } from "../evaluations/index.js";
|
|
3
|
+
import type { CliIo } from "./runner.js";
|
|
4
|
+
export interface EvaluateDeps {
|
|
5
|
+
readonly runner?: EvalRunnerDeps | undefined;
|
|
6
|
+
}
|
|
7
|
+
export declare function runEvaluateCli(args: readonly string[], io: CliIo, env?: EnvSource, deps?: EvaluateDeps): Promise<number>;
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
// `keiko evaluate` — runs the evaluation harness (ADR-0012 D10). Offline (default, deterministic, no
|
|
2
|
+
// network) replays each fixture's scripted transcript; --live builds a GatewayModelPort and fails
|
|
3
|
+
// CLOSED (exit 1, names the required env vars) when no config/credentials resolve — it NEVER silently
|
|
4
|
+
// falls back to offline. Dry-run-safe by construction: fixtures choose their own apply mode. Mirrors
|
|
5
|
+
// runGenTestsCli structurally (injected CliIo + deps, testable without process.*). Exit 0 when all
|
|
6
|
+
// applicable dimensions pass AND surface parity passes; 1 on dimension/parity failure or runtime
|
|
7
|
+
// error; 2 on usage error (unknown flag, mutual exclusion, unknown suite/fixture name).
|
|
8
|
+
import { writeFileSync } from "node:fs";
|
|
9
|
+
import { GatewayError } from "../gateway/errors.js";
|
|
10
|
+
import { redact } from "../gateway/redaction.js";
|
|
11
|
+
import { createAuditRedactor, deepRedactStrings } from "../audit/index.js";
|
|
12
|
+
import { fixtureByName, fixturesForSuite, isSuiteName, renderEvalSummary, runEvaluationSuite, } from "../evaluations/index.js";
|
|
13
|
+
const USAGE = `Usage:
|
|
14
|
+
keiko evaluate [--suite <unit-tests|bug-investigation|all>] [--fixture <name>]
|
|
15
|
+
[--live] [--model <id>] [--config PATH] [--json] [--output <path>]
|
|
16
|
+
|
|
17
|
+
Runs the evaluation harness against the built-in fixtures. Offline by default
|
|
18
|
+
(deterministic, no network); pass --live to evaluate against a configured model.
|
|
19
|
+
--suite and --fixture are mutually exclusive.
|
|
20
|
+
`;
|
|
21
|
+
function flagValue(args, name) {
|
|
22
|
+
const i = args.indexOf(name);
|
|
23
|
+
if (i === -1) {
|
|
24
|
+
return undefined;
|
|
25
|
+
}
|
|
26
|
+
const value = args[i + 1];
|
|
27
|
+
return value === undefined || value.startsWith("--") ? null : value;
|
|
28
|
+
}
|
|
29
|
+
const VALUE_FLAGS = ["--suite", "--fixture", "--model", "--config", "--output"];
|
|
30
|
+
const BOOLEAN_FLAGS = ["--live", "--json"];
|
|
31
|
+
function readValueFlags(args) {
|
|
32
|
+
const values = {};
|
|
33
|
+
for (const flag of VALUE_FLAGS) {
|
|
34
|
+
const value = flagValue(args, flag);
|
|
35
|
+
if (value === null) {
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
values[flag] = value;
|
|
39
|
+
}
|
|
40
|
+
return values;
|
|
41
|
+
}
|
|
42
|
+
function isValueFlag(value) {
|
|
43
|
+
return VALUE_FLAGS.includes(value);
|
|
44
|
+
}
|
|
45
|
+
function isBooleanFlag(value) {
|
|
46
|
+
return BOOLEAN_FLAGS.includes(value);
|
|
47
|
+
}
|
|
48
|
+
function findUsageError(args) {
|
|
49
|
+
for (let i = 0; i < args.length; i += 1) {
|
|
50
|
+
const arg = args[i];
|
|
51
|
+
if (arg === undefined) {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (isValueFlag(arg)) {
|
|
55
|
+
const value = args[i + 1];
|
|
56
|
+
if (value === undefined || value.startsWith("--")) {
|
|
57
|
+
return `missing value for ${arg}`;
|
|
58
|
+
}
|
|
59
|
+
i += 1;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
if (isBooleanFlag(arg)) {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
return arg.startsWith("--") ? `unknown flag ${arg}` : `unexpected argument ${arg}`;
|
|
66
|
+
}
|
|
67
|
+
return undefined;
|
|
68
|
+
}
|
|
69
|
+
function parseArgs(args) {
|
|
70
|
+
const values = readValueFlags(args);
|
|
71
|
+
if (values === null) {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
suite: values["--suite"],
|
|
76
|
+
fixture: values["--fixture"],
|
|
77
|
+
live: args.includes("--live"),
|
|
78
|
+
model: values["--model"],
|
|
79
|
+
config: values["--config"],
|
|
80
|
+
json: args.includes("--json"),
|
|
81
|
+
output: values["--output"],
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
// Resolves the fixture set from --suite / --fixture, enforcing mutual exclusion and name validity.
|
|
85
|
+
function selectFixtures(parsed) {
|
|
86
|
+
if (parsed.suite !== undefined && parsed.fixture !== undefined) {
|
|
87
|
+
return { usageError: "Error: --suite and --fixture are mutually exclusive.\n" };
|
|
88
|
+
}
|
|
89
|
+
if (parsed.fixture !== undefined) {
|
|
90
|
+
const fixture = fixtureByName(parsed.fixture);
|
|
91
|
+
return fixture === undefined
|
|
92
|
+
? { usageError: `Error: unknown fixture "${parsed.fixture}".\n` }
|
|
93
|
+
: { fixtures: [fixture] };
|
|
94
|
+
}
|
|
95
|
+
const suite = parsed.suite ?? "all";
|
|
96
|
+
if (!isSuiteName(suite)) {
|
|
97
|
+
return { usageError: `Error: unknown suite "${suite}".\n` };
|
|
98
|
+
}
|
|
99
|
+
return { fixtures: fixturesForSuite(suite) };
|
|
100
|
+
}
|
|
101
|
+
// In live mode, deep-redact the scorecard before serialization so that any model content that
|
|
102
|
+
// leaked into workflow report fields (e.g. fixture reasons) is scrubbed by the same audit
|
|
103
|
+
// redactor applied at evidence-persist time. Offline scorecard is static harness text — safe as-is.
|
|
104
|
+
function redactedScorecard(scorecard, live, env) {
|
|
105
|
+
if (!live) {
|
|
106
|
+
return scorecard;
|
|
107
|
+
}
|
|
108
|
+
const redactFn = createAuditRedactor({ additionalSecrets: keikoApiKeySecrets(env) }, env);
|
|
109
|
+
return deepRedactStrings(scorecard, redactFn);
|
|
110
|
+
}
|
|
111
|
+
function isKeikoApiKeyEnvName(name) {
|
|
112
|
+
return (name === "KEIKO_DEFAULT_API_KEY" ||
|
|
113
|
+
(name.startsWith("KEIKO_MODEL_") && name.endsWith("_API_KEY")));
|
|
114
|
+
}
|
|
115
|
+
function keikoApiKeySecrets(env) {
|
|
116
|
+
const secrets = [];
|
|
117
|
+
for (const [name, value] of Object.entries(env)) {
|
|
118
|
+
if (value !== undefined && isKeikoApiKeyEnvName(name)) {
|
|
119
|
+
secrets.push(value);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return secrets;
|
|
123
|
+
}
|
|
124
|
+
function writeScorecard(path, output) {
|
|
125
|
+
writeFileSync(path, `${JSON.stringify(output, null, 2)}\n`, { encoding: "utf8", flag: "wx" });
|
|
126
|
+
}
|
|
127
|
+
function emit(scorecard, parsed, io, env) {
|
|
128
|
+
const output = redactedScorecard(scorecard, parsed.live, env);
|
|
129
|
+
if (parsed.output !== undefined) {
|
|
130
|
+
writeScorecard(parsed.output, output);
|
|
131
|
+
}
|
|
132
|
+
if (parsed.json) {
|
|
133
|
+
io.out(`${JSON.stringify(output, null, 2)}\n`);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
io.out(`${renderEvalSummary(scorecard)}\n`);
|
|
137
|
+
}
|
|
138
|
+
// Exit 0 only when every scored dimension passed (zero failures) AND surface parity passed.
|
|
139
|
+
function exitCodeFor(scorecard) {
|
|
140
|
+
if (!scorecard.surfaceParity.allPassed) {
|
|
141
|
+
return 1;
|
|
142
|
+
}
|
|
143
|
+
return scorecard.dimensions.some((d) => d.failCount > 0) ? 1 : 0;
|
|
144
|
+
}
|
|
145
|
+
export async function runEvaluateCli(args, io, env = {}, deps = {}) {
|
|
146
|
+
if (args.includes("--help")) {
|
|
147
|
+
io.out(USAGE);
|
|
148
|
+
return 0;
|
|
149
|
+
}
|
|
150
|
+
const usageError = findUsageError(args);
|
|
151
|
+
if (usageError !== undefined) {
|
|
152
|
+
io.err(`Error: ${usageError}.\n${USAGE}`);
|
|
153
|
+
return 2;
|
|
154
|
+
}
|
|
155
|
+
const parsed = parseArgs(args);
|
|
156
|
+
if (parsed === null) {
|
|
157
|
+
io.err(USAGE);
|
|
158
|
+
return 2;
|
|
159
|
+
}
|
|
160
|
+
const selection = selectFixtures(parsed);
|
|
161
|
+
if ("usageError" in selection) {
|
|
162
|
+
io.err(selection.usageError);
|
|
163
|
+
return 2;
|
|
164
|
+
}
|
|
165
|
+
return runSuite(parsed, selection.fixtures, io, env, deps);
|
|
166
|
+
}
|
|
167
|
+
async function runSuite(parsed, fixtures, io, env, deps) {
|
|
168
|
+
try {
|
|
169
|
+
const scorecard = await runEvaluationSuite({
|
|
170
|
+
mode: parsed.live ? "live" : "offline",
|
|
171
|
+
fixtures,
|
|
172
|
+
...(parsed.model === undefined ? {} : { modelIdOverride: parsed.model }),
|
|
173
|
+
...(parsed.config === undefined ? {} : { configPath: parsed.config }),
|
|
174
|
+
},
|
|
175
|
+
// Provide Date.now as the default wall-clock so a real `keiko evaluate` prints the actual
|
|
176
|
+
// current time. Tests override this via deps.runner.now for deterministic evaluatedAt.
|
|
177
|
+
{ env, now: Date.now, ...deps.runner });
|
|
178
|
+
emit(scorecard, parsed, io, env);
|
|
179
|
+
return exitCodeFor(scorecard);
|
|
180
|
+
}
|
|
181
|
+
catch (error) {
|
|
182
|
+
if (isOutputAlreadyExistsError(error)) {
|
|
183
|
+
io.err(`Error: output file already exists: ${parsed.output ?? "<unknown>"}\n`);
|
|
184
|
+
return 1;
|
|
185
|
+
}
|
|
186
|
+
return handleRunError(error, parsed, io);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
function isOutputAlreadyExistsError(error) {
|
|
190
|
+
return (typeof error === "object" &&
|
|
191
|
+
error !== null &&
|
|
192
|
+
"code" in error &&
|
|
193
|
+
error.code === "EEXIST");
|
|
194
|
+
}
|
|
195
|
+
// Live-mode fail-closed: a GatewayError (incl. ConfigInvalidError) means no resolvable config or
|
|
196
|
+
// credentials. Name the required env vars and exit 1 — never fall back to offline silently.
|
|
197
|
+
function handleRunError(error, parsed, io) {
|
|
198
|
+
if (error instanceof GatewayError) {
|
|
199
|
+
io.err(`Error: model gateway configuration problem — ${redact(error.message)}\n` +
|
|
200
|
+
(parsed.live
|
|
201
|
+
? "Live evaluation requires a configured provider. Pass --config PATH or set " +
|
|
202
|
+
"KEIKO_CONFIG_FILE.\n"
|
|
203
|
+
: ""));
|
|
204
|
+
return 1;
|
|
205
|
+
}
|
|
206
|
+
throw error;
|
|
207
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { type EvidenceStore } from "../audit/store.js";
|
|
2
|
+
import type { EnvSource } from "../gateway/config.js";
|
|
3
|
+
import type { CliIo } from "./runner.js";
|
|
4
|
+
export interface EvidenceCliDeps {
|
|
5
|
+
readonly store?: EvidenceStore | undefined;
|
|
6
|
+
readonly env?: EnvSource | undefined;
|
|
7
|
+
}
|
|
8
|
+
export declare function runEvidenceCli(args: readonly string[], io: CliIo, deps?: EvidenceCliDeps): number;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// `keiko evidence` — inspects previously written evidence manifests (ADR-0010 D9). `list` prints the
|
|
2
|
+
// EvidenceListEntry[] (text or --json); `show <runId>` prints one EvidenceReport / full manifest
|
|
3
|
+
// (--json). It reads ONLY the contained base dir via the EvidenceStore (default $KEIKO_EVIDENCE_DIR
|
|
4
|
+
// or ./.keiko/evidence, overridable with --evidence-dir). Because manifests are redacted by
|
|
5
|
+
// construction there is no un-redaction path. Exit 0 on success, 1 on a missing runId / read error,
|
|
6
|
+
// 2 on usage (unknown or missing subcommand, invalid runId). Tests inject an in-memory store via deps
|
|
7
|
+
// so no disk is touched.
|
|
8
|
+
import { buildEvidenceReport, renderEvidenceReport } from "../audit/report.js";
|
|
9
|
+
import { listEvidence, loadEvidence } from "../audit/index-api.js";
|
|
10
|
+
import { createNodeEvidenceStore, resolveEvidenceDir } from "../audit/store.js";
|
|
11
|
+
import { AuditError, InvalidRunIdError } from "../audit/errors.js";
|
|
12
|
+
const USAGE = `Usage:
|
|
13
|
+
keiko evidence list [--evidence-dir PATH] [--json]
|
|
14
|
+
keiko evidence show <runId> [--evidence-dir PATH] [--json]
|
|
15
|
+
|
|
16
|
+
Lists or shows redacted evidence manifests written by \`keiko run\`. Reads only the
|
|
17
|
+
evidence base dir (default $KEIKO_EVIDENCE_DIR or ./.keiko/evidence; override with --evidence-dir).
|
|
18
|
+
`;
|
|
19
|
+
function flagValue(args, name) {
|
|
20
|
+
const i = args.indexOf(name);
|
|
21
|
+
if (i === -1) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
const value = args[i + 1];
|
|
25
|
+
return value === undefined || value.startsWith("--") ? undefined : value;
|
|
26
|
+
}
|
|
27
|
+
function resolveStore(args, deps) {
|
|
28
|
+
if (deps.store !== undefined) {
|
|
29
|
+
return deps.store;
|
|
30
|
+
}
|
|
31
|
+
return createNodeEvidenceStore(resolveEvidenceDir(flagValue(args, "--evidence-dir"), deps.env));
|
|
32
|
+
}
|
|
33
|
+
function renderListText(entries) {
|
|
34
|
+
if (entries.length === 0) {
|
|
35
|
+
return "No evidence manifests found.\n";
|
|
36
|
+
}
|
|
37
|
+
const rows = entries.map((e) => `${e.runId} ${e.taskType} ${e.outcome} started=${String(e.startedAt)} finished=${String(e.finishedAt)}`);
|
|
38
|
+
return `${rows.join("\n")}\n`;
|
|
39
|
+
}
|
|
40
|
+
function runList(store, json, io) {
|
|
41
|
+
const entries = listEvidence(store);
|
|
42
|
+
io.out(json ? `${JSON.stringify(entries, null, 2)}\n` : renderListText(entries));
|
|
43
|
+
return 0;
|
|
44
|
+
}
|
|
45
|
+
function runShow(store, runId, json, io) {
|
|
46
|
+
const manifest = loadEvidence(store, runId);
|
|
47
|
+
if (manifest === undefined) {
|
|
48
|
+
io.err(`keiko evidence: no manifest for runId: ${runId}\n`);
|
|
49
|
+
return 1;
|
|
50
|
+
}
|
|
51
|
+
if (json) {
|
|
52
|
+
io.out(`${JSON.stringify(manifest, null, 2)}\n`);
|
|
53
|
+
return 0;
|
|
54
|
+
}
|
|
55
|
+
io.out(renderEvidenceReport(buildEvidenceReport(manifest, store.location?.(runId) ?? `${runId}.json`)));
|
|
56
|
+
return 0;
|
|
57
|
+
}
|
|
58
|
+
// Maps a thrown AuditError to an exit code: an invalid runId is a usage error (2), any other
|
|
59
|
+
// audit/read failure is a runtime error (1). Messages are already redacted at construction.
|
|
60
|
+
function exitForAuditError(error, io) {
|
|
61
|
+
io.err(`keiko evidence: ${error.message}\n`);
|
|
62
|
+
return error instanceof InvalidRunIdError ? 2 : 1;
|
|
63
|
+
}
|
|
64
|
+
export function runEvidenceCli(args, io, deps = {}) {
|
|
65
|
+
const sub = args[0];
|
|
66
|
+
const json = args.includes("--json");
|
|
67
|
+
try {
|
|
68
|
+
if (sub === "list") {
|
|
69
|
+
return runList(resolveStore(args, deps), json, io);
|
|
70
|
+
}
|
|
71
|
+
if (sub === "show") {
|
|
72
|
+
const runId = args[1];
|
|
73
|
+
if (runId === undefined || runId.startsWith("--")) {
|
|
74
|
+
io.err(`keiko evidence: show requires a <runId>.\n${USAGE}`);
|
|
75
|
+
return 2;
|
|
76
|
+
}
|
|
77
|
+
return runShow(resolveStore(args, deps), runId, json, io);
|
|
78
|
+
}
|
|
79
|
+
io.err(sub === undefined ? USAGE : `keiko evidence: unknown subcommand: ${sub}\n${USAGE}`);
|
|
80
|
+
return 2;
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
if (error instanceof AuditError) {
|
|
84
|
+
return exitForAuditError(error, io);
|
|
85
|
+
}
|
|
86
|
+
throw error;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { EnvSource } from "../gateway/index.js";
|
|
2
|
+
export type ConfigPathResolution = {
|
|
3
|
+
readonly kind: "path";
|
|
4
|
+
readonly path: string;
|
|
5
|
+
} | {
|
|
6
|
+
readonly kind: "missing-value";
|
|
7
|
+
} | {
|
|
8
|
+
readonly kind: "not-configured";
|
|
9
|
+
};
|
|
10
|
+
export declare function resolveConfigPathFromArgs(args: readonly string[], env: EnvSource): ConfigPathResolution;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export function resolveConfigPathFromArgs(args, env) {
|
|
2
|
+
const flagIndex = args.indexOf("--config");
|
|
3
|
+
if (flagIndex !== -1) {
|
|
4
|
+
const value = args[flagIndex + 1];
|
|
5
|
+
return value === undefined || value.startsWith("--")
|
|
6
|
+
? { kind: "missing-value" }
|
|
7
|
+
: { kind: "path", path: value };
|
|
8
|
+
}
|
|
9
|
+
return env.KEIKO_CONFIG_FILE === undefined
|
|
10
|
+
? { kind: "not-configured" }
|
|
11
|
+
: { kind: "path", path: env.KEIKO_CONFIG_FILE };
|
|
12
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { type EnvSource } from "../gateway/config.js";
|
|
2
|
+
import type { ModelPort } from "../harness/ports.js";
|
|
3
|
+
import type { CliIo } from "./runner.js";
|
|
4
|
+
export interface GenTestsDeps {
|
|
5
|
+
readonly model?: ModelPort | undefined;
|
|
6
|
+
}
|
|
7
|
+
export declare function runGenTestsCli(args: readonly string[], io: CliIo, env?: EnvSource, deps?: GenTestsDeps): Promise<number>;
|