@oscharko-dev/keiko 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/NOTICE +7 -0
- package/README.md +621 -0
- package/TRADEMARKS.md +41 -0
- package/dist/audit/aggregate.d.ts +5 -0
- package/dist/audit/aggregate.js +25 -0
- package/dist/audit/build.d.ts +2 -0
- package/dist/audit/build.js +224 -0
- package/dist/audit/errors.d.ts +25 -0
- package/dist/audit/errors.js +39 -0
- package/dist/audit/index-api.d.ts +14 -0
- package/dist/audit/index-api.js +131 -0
- package/dist/audit/index.d.ts +12 -0
- package/dist/audit/index.js +17 -0
- package/dist/audit/persist.d.ts +8 -0
- package/dist/audit/persist.js +40 -0
- package/dist/audit/redaction.d.ts +3 -0
- package/dist/audit/redaction.js +61 -0
- package/dist/audit/report.d.ts +18 -0
- package/dist/audit/report.js +50 -0
- package/dist/audit/retention.d.ts +3 -0
- package/dist/audit/retention.js +95 -0
- package/dist/audit/runid.d.ts +1 -0
- package/dist/audit/runid.js +29 -0
- package/dist/audit/side-file.d.ts +12 -0
- package/dist/audit/side-file.js +82 -0
- package/dist/audit/store.d.ts +12 -0
- package/dist/audit/store.js +198 -0
- package/dist/audit/types.d.ts +188 -0
- package/dist/audit/types.js +8 -0
- package/dist/audit/workflow-evidence.d.ts +27 -0
- package/dist/audit/workflow-evidence.js +145 -0
- package/dist/cli/context.d.ts +2 -0
- package/dist/cli/context.js +102 -0
- package/dist/cli/evaluate.d.ts +7 -0
- package/dist/cli/evaluate.js +207 -0
- package/dist/cli/evidence.d.ts +8 -0
- package/dist/cli/evidence.js +88 -0
- package/dist/cli/gateway-config.d.ts +10 -0
- package/dist/cli/gateway-config.js +12 -0
- package/dist/cli/gen-tests.d.ts +7 -0
- package/dist/cli/gen-tests.js +208 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +14 -0
- package/dist/cli/investigate.d.ts +8 -0
- package/dist/cli/investigate.js +242 -0
- package/dist/cli/models.d.ts +3 -0
- package/dist/cli/models.js +64 -0
- package/dist/cli/run.d.ts +7 -0
- package/dist/cli/run.js +187 -0
- package/dist/cli/runner.d.ts +6 -0
- package/dist/cli/runner.js +83 -0
- package/dist/cli/ui.d.ts +31 -0
- package/dist/cli/ui.js +240 -0
- package/dist/cli/verify.d.ts +2 -0
- package/dist/cli/verify.js +103 -0
- package/dist/evaluations/fixtures/bug-investigation/happy-path.d.ts +2 -0
- package/dist/evaluations/fixtures/bug-investigation/happy-path.js +66 -0
- package/dist/evaluations/fixtures/bug-investigation/investigation-only.d.ts +2 -0
- package/dist/evaluations/fixtures/bug-investigation/investigation-only.js +39 -0
- package/dist/evaluations/fixtures/bug-investigation/unsafe-action.d.ts +2 -0
- package/dist/evaluations/fixtures/bug-investigation/unsafe-action.js +37 -0
- package/dist/evaluations/fixtures/index.d.ts +7 -0
- package/dist/evaluations/fixtures/index.js +35 -0
- package/dist/evaluations/fixtures/support.d.ts +5 -0
- package/dist/evaluations/fixtures/support.js +42 -0
- package/dist/evaluations/fixtures/unit-tests/happy-path.d.ts +2 -0
- package/dist/evaluations/fixtures/unit-tests/happy-path.js +40 -0
- package/dist/evaluations/fixtures/unit-tests/retry-then-accept.d.ts +2 -0
- package/dist/evaluations/fixtures/unit-tests/retry-then-accept.js +39 -0
- package/dist/evaluations/fixtures/unit-tests/unsafe-action.d.ts +2 -0
- package/dist/evaluations/fixtures/unit-tests/unsafe-action.js +32 -0
- package/dist/evaluations/index.d.ts +12 -0
- package/dist/evaluations/index.js +12 -0
- package/dist/evaluations/manifest-check.d.ts +1 -0
- package/dist/evaluations/manifest-check.js +48 -0
- package/dist/evaluations/model-provider.d.ts +12 -0
- package/dist/evaluations/model-provider.js +26 -0
- package/dist/evaluations/render.d.ts +2 -0
- package/dist/evaluations/render.js +59 -0
- package/dist/evaluations/runner-support.d.ts +27 -0
- package/dist/evaluations/runner-support.js +163 -0
- package/dist/evaluations/runner.d.ts +20 -0
- package/dist/evaluations/runner.js +174 -0
- package/dist/evaluations/scorer.d.ts +14 -0
- package/dist/evaluations/scorer.js +131 -0
- package/dist/evaluations/scripted-model.d.ts +6 -0
- package/dist/evaluations/scripted-model.js +26 -0
- package/dist/evaluations/surface-parity.d.ts +2 -0
- package/dist/evaluations/surface-parity.js +184 -0
- package/dist/evaluations/types.d.ts +74 -0
- package/dist/evaluations/types.js +16 -0
- package/dist/gateway/capabilities.d.ts +11 -0
- package/dist/gateway/capabilities.data.d.ts +2 -0
- package/dist/gateway/capabilities.data.js +203 -0
- package/dist/gateway/capabilities.js +41 -0
- package/dist/gateway/config.d.ts +15 -0
- package/dist/gateway/config.js +154 -0
- package/dist/gateway/errors.d.ts +72 -0
- package/dist/gateway/errors.js +82 -0
- package/dist/gateway/gateway.d.ts +19 -0
- package/dist/gateway/gateway.js +94 -0
- package/dist/gateway/index.d.ts +10 -0
- package/dist/gateway/index.js +11 -0
- package/dist/gateway/model-selection.d.ts +9 -0
- package/dist/gateway/model-selection.js +36 -0
- package/dist/gateway/normalize.d.ts +7 -0
- package/dist/gateway/normalize.js +93 -0
- package/dist/gateway/openai-adapter.d.ts +20 -0
- package/dist/gateway/openai-adapter.js +263 -0
- package/dist/gateway/redaction.d.ts +1 -0
- package/dist/gateway/redaction.js +51 -0
- package/dist/gateway/resilience.d.ts +24 -0
- package/dist/gateway/resilience.js +166 -0
- package/dist/gateway/types.d.ts +108 -0
- package/dist/gateway/types.js +2 -0
- package/dist/harness/adapters.d.ts +23 -0
- package/dist/harness/adapters.js +38 -0
- package/dist/harness/context.d.ts +33 -0
- package/dist/harness/context.js +21 -0
- package/dist/harness/emitter.d.ts +15 -0
- package/dist/harness/emitter.js +72 -0
- package/dist/harness/errors.d.ts +21 -0
- package/dist/harness/errors.js +39 -0
- package/dist/harness/executor.d.ts +3 -0
- package/dist/harness/executor.js +211 -0
- package/dist/harness/fingerprint.d.ts +6 -0
- package/dist/harness/fingerprint.js +43 -0
- package/dist/harness/index.d.ts +9 -0
- package/dist/harness/index.js +13 -0
- package/dist/harness/loop.d.ts +3 -0
- package/dist/harness/loop.js +159 -0
- package/dist/harness/patcher.d.ts +4 -0
- package/dist/harness/patcher.js +49 -0
- package/dist/harness/planner.d.ts +3 -0
- package/dist/harness/planner.js +21 -0
- package/dist/harness/ports.d.ts +61 -0
- package/dist/harness/ports.js +4 -0
- package/dist/harness/session.d.ts +25 -0
- package/dist/harness/session.js +116 -0
- package/dist/harness/sinks.d.ts +30 -0
- package/dist/harness/sinks.js +72 -0
- package/dist/harness/tasks/explain-plan.d.ts +3 -0
- package/dist/harness/tasks/explain-plan.js +29 -0
- package/dist/harness/tasks/generate-unit-tests.d.ts +3 -0
- package/dist/harness/tasks/generate-unit-tests.js +28 -0
- package/dist/harness/tasks/investigate-bug.d.ts +3 -0
- package/dist/harness/tasks/investigate-bug.js +31 -0
- package/dist/harness/tasks/policy.d.ts +11 -0
- package/dist/harness/tasks/policy.js +22 -0
- package/dist/harness/tasks/verify.d.ts +3 -0
- package/dist/harness/tasks/verify.js +16 -0
- package/dist/harness/types.d.ts +270 -0
- package/dist/harness/types.js +33 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +36 -0
- package/dist/sdk/index.d.ts +9 -0
- package/dist/sdk/index.js +37 -0
- package/dist/sdk/run-agent.d.ts +16 -0
- package/dist/sdk/run-agent.js +56 -0
- package/dist/tools/browser/cdp-client.d.ts +35 -0
- package/dist/tools/browser/cdp-client.js +218 -0
- package/dist/tools/browser/errors.d.ts +25 -0
- package/dist/tools/browser/errors.js +55 -0
- package/dist/tools/browser/index.d.ts +5 -0
- package/dist/tools/browser/index.js +6 -0
- package/dist/tools/browser/session.d.ts +44 -0
- package/dist/tools/browser/session.js +748 -0
- package/dist/tools/browser/types.d.ts +48 -0
- package/dist/tools/browser/types.js +2 -0
- package/dist/tools/browser/validators.d.ts +5 -0
- package/dist/tools/browser/validators.js +97 -0
- package/dist/tools/errors.d.ts +59 -0
- package/dist/tools/errors.js +94 -0
- package/dist/tools/exec.d.ts +42 -0
- package/dist/tools/exec.js +327 -0
- package/dist/tools/index.d.ts +11 -0
- package/dist/tools/index.js +14 -0
- package/dist/tools/patch-content.d.ts +10 -0
- package/dist/tools/patch-content.js +126 -0
- package/dist/tools/patch-normalize.d.ts +1 -0
- package/dist/tools/patch-normalize.js +80 -0
- package/dist/tools/patch-parse.d.ts +8 -0
- package/dist/tools/patch-parse.js +201 -0
- package/dist/tools/patch.d.ts +18 -0
- package/dist/tools/patch.js +403 -0
- package/dist/tools/registry.d.ts +36 -0
- package/dist/tools/registry.js +231 -0
- package/dist/tools/sandbox.d.ts +8 -0
- package/dist/tools/sandbox.js +121 -0
- package/dist/tools/schemas.d.ts +2 -0
- package/dist/tools/schemas.js +51 -0
- package/dist/tools/terminal-policy.d.ts +9 -0
- package/dist/tools/terminal-policy.js +313 -0
- package/dist/tools/types.d.ts +99 -0
- package/dist/tools/types.js +103 -0
- package/dist/tools/writer.d.ts +7 -0
- package/dist/tools/writer.js +20 -0
- package/dist/ui/browser.d.ts +10 -0
- package/dist/ui/browser.js +231 -0
- package/dist/ui/chat-handlers.d.ts +4 -0
- package/dist/ui/chat-handlers.js +281 -0
- package/dist/ui/csp-hashes.json +17 -0
- package/dist/ui/csp.d.ts +2 -0
- package/dist/ui/csp.js +66 -0
- package/dist/ui/deps.d.ts +34 -0
- package/dist/ui/deps.js +137 -0
- package/dist/ui/evidence.d.ts +27 -0
- package/dist/ui/evidence.js +142 -0
- package/dist/ui/files-deny.d.ts +2 -0
- package/dist/ui/files-deny.js +12 -0
- package/dist/ui/files.d.ts +65 -0
- package/dist/ui/files.js +492 -0
- package/dist/ui/headers.d.ts +2 -0
- package/dist/ui/headers.js +21 -0
- package/dist/ui/host-check.d.ts +2 -0
- package/dist/ui/host-check.js +58 -0
- package/dist/ui/index.d.ts +20 -0
- package/dist/ui/index.js +23 -0
- package/dist/ui/load-csp.d.ts +1 -0
- package/dist/ui/load-csp.js +28 -0
- package/dist/ui/read-handlers.d.ts +8 -0
- package/dist/ui/read-handlers.js +247 -0
- package/dist/ui/routes.d.ts +36 -0
- package/dist/ui/routes.js +129 -0
- package/dist/ui/run-engine.d.ts +20 -0
- package/dist/ui/run-engine.js +345 -0
- package/dist/ui/run-handlers.d.ts +8 -0
- package/dist/ui/run-handlers.js +431 -0
- package/dist/ui/run-request.d.ts +13 -0
- package/dist/ui/run-request.js +219 -0
- package/dist/ui/runs.d.ts +43 -0
- package/dist/ui/runs.js +92 -0
- package/dist/ui/server.d.ts +11 -0
- package/dist/ui/server.js +143 -0
- package/dist/ui/sink.d.ts +27 -0
- package/dist/ui/sink.js +80 -0
- package/dist/ui/sse.d.ts +7 -0
- package/dist/ui/sse.js +27 -0
- package/dist/ui/static/404.html +1 -0
- package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_buildManifest.js +1 -0
- package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_ssgManifest.js +1 -0
- package/dist/ui/static/_next/static/chunks/255-d47fd57964443afe.js +1 -0
- package/dist/ui/static/_next/static/chunks/4-be1fef693af8e088.js +1 -0
- package/dist/ui/static/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/_not-found/page-75825b09bcecad97.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/launch/page-9c86a13c29884245.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/layout-bdea63fe87947d50.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/page-4168c12c68b7a853.js +1 -0
- package/dist/ui/static/_next/static/chunks/framework-a6e0b7e30f98059a.js +1 -0
- package/dist/ui/static/_next/static/chunks/main-778a50aebff02192.js +1 -0
- package/dist/ui/static/_next/static/chunks/main-app-30679af7240d63e9.js +1 -0
- package/dist/ui/static/_next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
- package/dist/ui/static/_next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
- package/dist/ui/static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- package/dist/ui/static/_next/static/chunks/webpack-4a462cecab786e93.js +1 -0
- package/dist/ui/static/_next/static/css/be7cb54d5c5673b6.css +1 -0
- package/dist/ui/static/assets/editors/goland.svg +35 -0
- package/dist/ui/static/assets/editors/intellij.svg +39 -0
- package/dist/ui/static/assets/editors/pycharm.svg +58 -0
- package/dist/ui/static/assets/editors/rustrover.svg +19 -0
- package/dist/ui/static/assets/editors/vscode.svg +1 -0
- package/dist/ui/static/assets/editors/webstorm.svg +21 -0
- package/dist/ui/static/assets/icons/anthropic.svg +1 -0
- package/dist/ui/static/assets/icons/brave.svg +1 -0
- package/dist/ui/static/assets/icons/css3.svg +1 -0
- package/dist/ui/static/assets/icons/docker.svg +1 -0
- package/dist/ui/static/assets/icons/git.svg +1 -0
- package/dist/ui/static/assets/icons/github.svg +1 -0
- package/dist/ui/static/assets/icons/go.svg +1 -0
- package/dist/ui/static/assets/icons/gradle.svg +1 -0
- package/dist/ui/static/assets/icons/grafana.svg +1 -0
- package/dist/ui/static/assets/icons/graphql.svg +1 -0
- package/dist/ui/static/assets/icons/html5.svg +1 -0
- package/dist/ui/static/assets/icons/image.svg +1 -0
- package/dist/ui/static/assets/icons/java.svg +1 -0
- package/dist/ui/static/assets/icons/javascript.svg +1 -0
- package/dist/ui/static/assets/icons/json.svg +1 -0
- package/dist/ui/static/assets/icons/kafka.svg +1 -0
- package/dist/ui/static/assets/icons/kubernetes.svg +1 -0
- package/dist/ui/static/assets/icons/linear.svg +1 -0
- package/dist/ui/static/assets/icons/markdown.svg +1 -0
- package/dist/ui/static/assets/icons/nginx.svg +1 -0
- package/dist/ui/static/assets/icons/nodejs.svg +1 -0
- package/dist/ui/static/assets/icons/notion.svg +1 -0
- package/dist/ui/static/assets/icons/openai.svg +1 -0
- package/dist/ui/static/assets/icons/playwright.svg +1 -0
- package/dist/ui/static/assets/icons/postgresql.svg +1 -0
- package/dist/ui/static/assets/icons/prometheus.svg +1 -0
- package/dist/ui/static/assets/icons/properties.svg +1 -0
- package/dist/ui/static/assets/icons/puppeteer.svg +1 -0
- package/dist/ui/static/assets/icons/python.svg +1 -0
- package/dist/ui/static/assets/icons/react.svg +1 -0
- package/dist/ui/static/assets/icons/redis.svg +1 -0
- package/dist/ui/static/assets/icons/rust.svg +1 -0
- package/dist/ui/static/assets/icons/sentry.svg +1 -0
- package/dist/ui/static/assets/icons/slack.svg +1 -0
- package/dist/ui/static/assets/icons/spring.svg +1 -0
- package/dist/ui/static/assets/icons/typescript.svg +1 -0
- package/dist/ui/static/assets/icons/upstash.svg +1 -0
- package/dist/ui/static/assets/icons/yaml.svg +1 -0
- package/dist/ui/static/assets/keiko-logo.svg +10 -0
- package/dist/ui/static/index.html +1 -0
- package/dist/ui/static/index.txt +19 -0
- package/dist/ui/static/keiko-logo.svg +10 -0
- package/dist/ui/static/launch.html +1 -0
- package/dist/ui/static/launch.txt +19 -0
- package/dist/ui/static.d.ts +3 -0
- package/dist/ui/static.js +72 -0
- package/dist/ui/store/chats.d.ts +14 -0
- package/dist/ui/store/chats.js +110 -0
- package/dist/ui/store/db.d.ts +6 -0
- package/dist/ui/store/db.js +182 -0
- package/dist/ui/store/errors.d.ts +12 -0
- package/dist/ui/store/errors.js +30 -0
- package/dist/ui/store/index.d.ts +6 -0
- package/dist/ui/store/index.js +6 -0
- package/dist/ui/store/messages.d.ts +5 -0
- package/dist/ui/store/messages.js +137 -0
- package/dist/ui/store/paths.d.ts +4 -0
- package/dist/ui/store/paths.js +69 -0
- package/dist/ui/store/projects.d.ts +7 -0
- package/dist/ui/store/projects.js +61 -0
- package/dist/ui/store/schema.d.ts +3 -0
- package/dist/ui/store/schema.js +77 -0
- package/dist/ui/store/types.d.ts +80 -0
- package/dist/ui/store/types.js +3 -0
- package/dist/ui/store/validation.d.ts +4 -0
- package/dist/ui/store/validation.js +72 -0
- package/dist/ui/store-handlers.d.ts +16 -0
- package/dist/ui/store-handlers.js +465 -0
- package/dist/ui/terminal-errors.d.ts +21 -0
- package/dist/ui/terminal-errors.js +45 -0
- package/dist/ui/terminal-evidence.d.ts +20 -0
- package/dist/ui/terminal-evidence.js +65 -0
- package/dist/ui/terminal-routes.d.ts +9 -0
- package/dist/ui/terminal-routes.js +219 -0
- package/dist/ui/terminal.d.ts +67 -0
- package/dist/ui/terminal.js +835 -0
- package/dist/verification/classify.d.ts +10 -0
- package/dist/verification/classify.js +53 -0
- package/dist/verification/detect.d.ts +4 -0
- package/dist/verification/detect.js +81 -0
- package/dist/verification/errors.d.ts +11 -0
- package/dist/verification/errors.js +21 -0
- package/dist/verification/index.d.ts +17 -0
- package/dist/verification/index.js +13 -0
- package/dist/verification/limits.d.ts +3 -0
- package/dist/verification/limits.js +40 -0
- package/dist/verification/monitor.d.ts +4 -0
- package/dist/verification/monitor.js +58 -0
- package/dist/verification/orchestrator.d.ts +16 -0
- package/dist/verification/orchestrator.js +363 -0
- package/dist/verification/plan.d.ts +9 -0
- package/dist/verification/plan.js +125 -0
- package/dist/verification/summary.d.ts +40 -0
- package/dist/verification/summary.js +67 -0
- package/dist/verification/types.d.ts +63 -0
- package/dist/verification/types.js +13 -0
- package/dist/workflows/bug-investigation/context.d.ts +7 -0
- package/dist/workflows/bug-investigation/context.js +119 -0
- package/dist/workflows/bug-investigation/descriptor.d.ts +3 -0
- package/dist/workflows/bug-investigation/descriptor.js +46 -0
- package/dist/workflows/bug-investigation/emit.d.ts +12 -0
- package/dist/workflows/bug-investigation/emit.js +35 -0
- package/dist/workflows/bug-investigation/events.d.ts +81 -0
- package/dist/workflows/bug-investigation/events.js +9 -0
- package/dist/workflows/bug-investigation/failure-parse.d.ts +3 -0
- package/dist/workflows/bug-investigation/failure-parse.js +154 -0
- package/dist/workflows/bug-investigation/guard.d.ts +2 -0
- package/dist/workflows/bug-investigation/guard.js +69 -0
- package/dist/workflows/bug-investigation/index.d.ts +7 -0
- package/dist/workflows/bug-investigation/index.js +13 -0
- package/dist/workflows/bug-investigation/internal.d.ts +37 -0
- package/dist/workflows/bug-investigation/internal.js +64 -0
- package/dist/workflows/bug-investigation/model-loop.d.ts +4 -0
- package/dist/workflows/bug-investigation/model-loop.js +223 -0
- package/dist/workflows/bug-investigation/parse.d.ts +3 -0
- package/dist/workflows/bug-investigation/parse.js +123 -0
- package/dist/workflows/bug-investigation/prompt.d.ts +4 -0
- package/dist/workflows/bug-investigation/prompt.js +107 -0
- package/dist/workflows/bug-investigation/report.d.ts +23 -0
- package/dist/workflows/bug-investigation/report.js +151 -0
- package/dist/workflows/bug-investigation/stages.d.ts +13 -0
- package/dist/workflows/bug-investigation/stages.js +242 -0
- package/dist/workflows/bug-investigation/types.d.ts +91 -0
- package/dist/workflows/bug-investigation/types.js +14 -0
- package/dist/workflows/bug-investigation/verify-stage.d.ts +10 -0
- package/dist/workflows/bug-investigation/verify-stage.js +91 -0
- package/dist/workflows/bug-investigation/workflow.d.ts +2 -0
- package/dist/workflows/bug-investigation/workflow.js +74 -0
- package/dist/workflows/descriptor.d.ts +20 -0
- package/dist/workflows/descriptor.js +8 -0
- package/dist/workflows/index.d.ts +3 -0
- package/dist/workflows/index.js +2 -0
- package/dist/workflows/unit-tests/context.d.ts +7 -0
- package/dist/workflows/unit-tests/context.js +129 -0
- package/dist/workflows/unit-tests/conventions.d.ts +4 -0
- package/dist/workflows/unit-tests/conventions.js +87 -0
- package/dist/workflows/unit-tests/descriptor.d.ts +4 -0
- package/dist/workflows/unit-tests/descriptor.js +43 -0
- package/dist/workflows/unit-tests/emit.d.ts +12 -0
- package/dist/workflows/unit-tests/emit.js +35 -0
- package/dist/workflows/unit-tests/events.d.ts +78 -0
- package/dist/workflows/unit-tests/events.js +7 -0
- package/dist/workflows/unit-tests/index.d.ts +6 -0
- package/dist/workflows/unit-tests/index.js +10 -0
- package/dist/workflows/unit-tests/internal.d.ts +35 -0
- package/dist/workflows/unit-tests/internal.js +43 -0
- package/dist/workflows/unit-tests/model-loop.d.ts +4 -0
- package/dist/workflows/unit-tests/model-loop.js +95 -0
- package/dist/workflows/unit-tests/parse.d.ts +6 -0
- package/dist/workflows/unit-tests/parse.js +68 -0
- package/dist/workflows/unit-tests/prompt.d.ts +4 -0
- package/dist/workflows/unit-tests/prompt.js +71 -0
- package/dist/workflows/unit-tests/report.d.ts +21 -0
- package/dist/workflows/unit-tests/report.js +90 -0
- package/dist/workflows/unit-tests/stages.d.ts +9 -0
- package/dist/workflows/unit-tests/stages.js +155 -0
- package/dist/workflows/unit-tests/types.d.ts +70 -0
- package/dist/workflows/unit-tests/types.js +11 -0
- package/dist/workflows/unit-tests/verify-stage.d.ts +9 -0
- package/dist/workflows/unit-tests/verify-stage.js +56 -0
- package/dist/workflows/unit-tests/workflow.d.ts +2 -0
- package/dist/workflows/unit-tests/workflow.js +58 -0
- package/dist/workspace/contextPack.d.ts +9 -0
- package/dist/workspace/contextPack.js +94 -0
- package/dist/workspace/detect.d.ts +3 -0
- package/dist/workspace/detect.js +135 -0
- package/dist/workspace/discovery.d.ts +9 -0
- package/dist/workspace/discovery.js +167 -0
- package/dist/workspace/errors.d.ts +39 -0
- package/dist/workspace/errors.js +66 -0
- package/dist/workspace/fs.d.ts +21 -0
- package/dist/workspace/fs.js +36 -0
- package/dist/workspace/ignore.d.ts +14 -0
- package/dist/workspace/ignore.js +176 -0
- package/dist/workspace/index.d.ts +11 -0
- package/dist/workspace/index.js +13 -0
- package/dist/workspace/paths.d.ts +2 -0
- package/dist/workspace/paths.js +38 -0
- package/dist/workspace/realpath.d.ts +7 -0
- package/dist/workspace/realpath.js +72 -0
- package/dist/workspace/retrieval.d.ts +9 -0
- package/dist/workspace/retrieval.js +74 -0
- package/dist/workspace/summary.d.ts +3 -0
- package/dist/workspace/summary.js +54 -0
- package/dist/workspace/types.d.ts +103 -0
- package/dist/workspace/types.js +27 -0
- package/package.json +58 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
export type HarnessStateName = "intake" | "planning" | "context-selection" | "model-call" | "tool-call" | "patch-proposal" | "verification" | "reporting" | "completed" | "cancelled" | "failed" | "limit-exceeded";
|
|
2
|
+
export type TerminalState = "completed" | "cancelled" | "failed" | "limit-exceeded";
|
|
3
|
+
export declare const TERMINAL_STATES: ReadonlySet<HarnessStateName>;
|
|
4
|
+
export interface StateTransition {
|
|
5
|
+
readonly from: HarnessStateName;
|
|
6
|
+
readonly to: HarnessStateName;
|
|
7
|
+
readonly reason: string;
|
|
8
|
+
}
|
|
9
|
+
export interface HarnessLimits {
|
|
10
|
+
readonly maxIterations: number;
|
|
11
|
+
readonly maxModelCalls: number;
|
|
12
|
+
readonly maxToolCalls: number;
|
|
13
|
+
readonly maxCommandExecutions: number;
|
|
14
|
+
readonly maxContextBytes: number;
|
|
15
|
+
readonly maxPatchBytes: number;
|
|
16
|
+
readonly maxWallTimeMs: number;
|
|
17
|
+
readonly maxFailureAttempts: number;
|
|
18
|
+
}
|
|
19
|
+
export declare const DEFAULT_LIMITS: HarnessLimits;
|
|
20
|
+
export type TaskType = "generate-unit-tests" | "investigate-bug" | "explain-plan" | "verify";
|
|
21
|
+
export interface GenerateUnitTestsInput {
|
|
22
|
+
readonly filePath: string;
|
|
23
|
+
readonly targetFunction?: string | undefined;
|
|
24
|
+
readonly context?: string | undefined;
|
|
25
|
+
}
|
|
26
|
+
export interface InvestigateBugInput {
|
|
27
|
+
readonly description: string;
|
|
28
|
+
readonly filePaths?: readonly string[] | undefined;
|
|
29
|
+
readonly context?: string | undefined;
|
|
30
|
+
}
|
|
31
|
+
export interface ExplainPlanInput {
|
|
32
|
+
readonly filePath: string;
|
|
33
|
+
readonly question?: string | undefined;
|
|
34
|
+
readonly context?: string | undefined;
|
|
35
|
+
}
|
|
36
|
+
export interface VerifyInput {
|
|
37
|
+
readonly workspaceRoot: string;
|
|
38
|
+
readonly targetFiles?: readonly string[] | undefined;
|
|
39
|
+
}
|
|
40
|
+
export type TaskInput = {
|
|
41
|
+
readonly taskType: "generate-unit-tests";
|
|
42
|
+
readonly input: GenerateUnitTestsInput;
|
|
43
|
+
} | {
|
|
44
|
+
readonly taskType: "investigate-bug";
|
|
45
|
+
readonly input: InvestigateBugInput;
|
|
46
|
+
} | {
|
|
47
|
+
readonly taskType: "explain-plan";
|
|
48
|
+
readonly input: ExplainPlanInput;
|
|
49
|
+
} | {
|
|
50
|
+
readonly taskType: "verify";
|
|
51
|
+
readonly input: VerifyInput;
|
|
52
|
+
};
|
|
53
|
+
export interface RunCounters {
|
|
54
|
+
iterations: number;
|
|
55
|
+
modelCalls: number;
|
|
56
|
+
toolCalls: number;
|
|
57
|
+
commandExecutions: number;
|
|
58
|
+
failureAttempts: number;
|
|
59
|
+
browserNavigations: number;
|
|
60
|
+
}
|
|
61
|
+
export type RunOutcome = "completed" | "cancelled" | "failed" | "limit-exceeded";
|
|
62
|
+
export interface RunResult {
|
|
63
|
+
readonly runId: string;
|
|
64
|
+
readonly fingerprint: string;
|
|
65
|
+
readonly outcome: RunOutcome;
|
|
66
|
+
readonly taskType: TaskType;
|
|
67
|
+
readonly report?: string | undefined;
|
|
68
|
+
readonly patchDiff?: string | undefined;
|
|
69
|
+
readonly failure?: HarnessFailure | undefined;
|
|
70
|
+
readonly startedAt: number;
|
|
71
|
+
readonly finishedAt: number;
|
|
72
|
+
readonly events: readonly HarnessEvent[];
|
|
73
|
+
}
|
|
74
|
+
export interface RunManifest {
|
|
75
|
+
readonly runId: string;
|
|
76
|
+
readonly fingerprint: string;
|
|
77
|
+
readonly harnessVersion: string;
|
|
78
|
+
readonly taskType: TaskType;
|
|
79
|
+
readonly taskInput: TaskInput;
|
|
80
|
+
readonly limits: HarnessLimits;
|
|
81
|
+
readonly modelId: string;
|
|
82
|
+
readonly workingDirectory: string;
|
|
83
|
+
readonly dryRun: boolean;
|
|
84
|
+
readonly startedAt: string;
|
|
85
|
+
readonly events: readonly HarnessEvent[];
|
|
86
|
+
}
|
|
87
|
+
export declare const HARNESS_CODES: {
|
|
88
|
+
readonly LIMIT_ITERATIONS: "HARNESS_LIMIT_ITERATIONS";
|
|
89
|
+
readonly LIMIT_MODEL_CALLS: "HARNESS_LIMIT_MODEL_CALLS";
|
|
90
|
+
readonly LIMIT_TOOL_CALLS: "HARNESS_LIMIT_TOOL_CALLS";
|
|
91
|
+
readonly LIMIT_COMMAND_EXEC: "HARNESS_LIMIT_COMMAND_EXECUTIONS";
|
|
92
|
+
readonly LIMIT_CONTEXT_SIZE: "HARNESS_LIMIT_CONTEXT_SIZE";
|
|
93
|
+
readonly LIMIT_PATCH_SIZE: "HARNESS_LIMIT_PATCH_SIZE";
|
|
94
|
+
readonly LIMIT_WALL_TIME: "HARNESS_LIMIT_WALL_TIME";
|
|
95
|
+
readonly LIMIT_FAILURE_ATTEMPTS: "HARNESS_LIMIT_FAILURE_ATTEMPTS";
|
|
96
|
+
readonly MODEL_ERROR: "HARNESS_MODEL_ERROR";
|
|
97
|
+
readonly TOOL_ERROR: "HARNESS_TOOL_ERROR";
|
|
98
|
+
readonly INTERNAL: "HARNESS_INTERNAL";
|
|
99
|
+
};
|
|
100
|
+
export type HarnessCode = (typeof HARNESS_CODES)[keyof typeof HARNESS_CODES];
|
|
101
|
+
export interface HarnessFailure {
|
|
102
|
+
readonly category: HarnessCode;
|
|
103
|
+
readonly message: string;
|
|
104
|
+
readonly detail?: string | undefined;
|
|
105
|
+
}
|
|
106
|
+
interface BaseEvent {
|
|
107
|
+
readonly schemaVersion: "1";
|
|
108
|
+
readonly runId: string;
|
|
109
|
+
readonly fingerprint: string;
|
|
110
|
+
readonly seq: number;
|
|
111
|
+
readonly ts: number;
|
|
112
|
+
}
|
|
113
|
+
export interface RunStartedEvent extends BaseEvent {
|
|
114
|
+
readonly type: "run:started";
|
|
115
|
+
readonly taskType: TaskType;
|
|
116
|
+
readonly modelId: string;
|
|
117
|
+
readonly limits: HarnessLimits;
|
|
118
|
+
}
|
|
119
|
+
export interface StateTransitionEvent extends BaseEvent {
|
|
120
|
+
readonly type: "state:transition";
|
|
121
|
+
readonly from: HarnessStateName;
|
|
122
|
+
readonly to: HarnessStateName;
|
|
123
|
+
readonly reason: string;
|
|
124
|
+
}
|
|
125
|
+
export interface ModelCallStartedEvent extends BaseEvent {
|
|
126
|
+
readonly type: "model:call:started";
|
|
127
|
+
readonly modelId: string;
|
|
128
|
+
readonly messageCount: number;
|
|
129
|
+
readonly contextBytes: number;
|
|
130
|
+
}
|
|
131
|
+
export interface ModelCallCompletedEvent extends BaseEvent {
|
|
132
|
+
readonly type: "model:call:completed";
|
|
133
|
+
readonly modelId: string;
|
|
134
|
+
readonly finishReason: string;
|
|
135
|
+
readonly toolCallCount: number;
|
|
136
|
+
readonly usage: {
|
|
137
|
+
readonly requestId: string;
|
|
138
|
+
readonly promptTokens: number;
|
|
139
|
+
readonly completionTokens: number;
|
|
140
|
+
readonly latencyMs: number;
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
export interface ModelCallFailedEvent extends BaseEvent {
|
|
144
|
+
readonly type: "model:call:failed";
|
|
145
|
+
readonly modelId: string;
|
|
146
|
+
readonly errorCode: string;
|
|
147
|
+
readonly message: string;
|
|
148
|
+
}
|
|
149
|
+
export interface ToolCallStartedEvent extends BaseEvent {
|
|
150
|
+
readonly type: "tool:call:started";
|
|
151
|
+
readonly toolName: string;
|
|
152
|
+
readonly toolCallId: string;
|
|
153
|
+
}
|
|
154
|
+
export interface ToolCallCompletedEvent extends BaseEvent {
|
|
155
|
+
readonly type: "tool:call:completed";
|
|
156
|
+
readonly toolName: string;
|
|
157
|
+
readonly toolCallId: string;
|
|
158
|
+
readonly durationMs: number;
|
|
159
|
+
}
|
|
160
|
+
export interface ToolCallFailedEvent extends BaseEvent {
|
|
161
|
+
readonly type: "tool:call:failed";
|
|
162
|
+
readonly toolName: string;
|
|
163
|
+
readonly toolCallId: string;
|
|
164
|
+
readonly errorCode: string;
|
|
165
|
+
readonly message: string;
|
|
166
|
+
}
|
|
167
|
+
export interface CommandExecutedEvent extends BaseEvent {
|
|
168
|
+
readonly type: "command:executed";
|
|
169
|
+
readonly executable: string;
|
|
170
|
+
readonly argCount: number;
|
|
171
|
+
readonly exitCode: number | null;
|
|
172
|
+
readonly timedOut: boolean;
|
|
173
|
+
readonly durationMs: number;
|
|
174
|
+
}
|
|
175
|
+
export interface SandboxConfiguredEvent extends BaseEvent {
|
|
176
|
+
readonly type: "sandbox:configured";
|
|
177
|
+
readonly envAllowlist: readonly string[];
|
|
178
|
+
readonly network: "inherit" | "none";
|
|
179
|
+
readonly maxOutputBytes: number;
|
|
180
|
+
readonly timeoutMs: number;
|
|
181
|
+
readonly terminationGraceMs: number;
|
|
182
|
+
readonly cwdRequested: boolean;
|
|
183
|
+
}
|
|
184
|
+
export interface PatchAppliedEvent extends BaseEvent {
|
|
185
|
+
readonly type: "patch:applied";
|
|
186
|
+
readonly changedFiles: number;
|
|
187
|
+
readonly created: number;
|
|
188
|
+
readonly deleted: number;
|
|
189
|
+
}
|
|
190
|
+
export interface ReasoningTraceEvent extends BaseEvent {
|
|
191
|
+
readonly type: "reasoning:trace";
|
|
192
|
+
readonly phase: HarnessStateName;
|
|
193
|
+
readonly rationale: string;
|
|
194
|
+
readonly modelResponse?: string | undefined;
|
|
195
|
+
}
|
|
196
|
+
export interface PatchProposedEvent extends BaseEvent {
|
|
197
|
+
readonly type: "patch:proposed";
|
|
198
|
+
readonly targetFile: string;
|
|
199
|
+
readonly patchBytes: number;
|
|
200
|
+
readonly diff: string;
|
|
201
|
+
}
|
|
202
|
+
export interface VerificationResultEvent extends BaseEvent {
|
|
203
|
+
readonly type: "verification:result";
|
|
204
|
+
readonly passed: boolean;
|
|
205
|
+
readonly detail: string;
|
|
206
|
+
}
|
|
207
|
+
export interface RunCompletedEvent extends BaseEvent {
|
|
208
|
+
readonly type: "run:completed";
|
|
209
|
+
readonly report: string;
|
|
210
|
+
readonly patchDiff?: string | undefined;
|
|
211
|
+
}
|
|
212
|
+
export interface RunCancelledEvent extends BaseEvent {
|
|
213
|
+
readonly type: "run:cancelled";
|
|
214
|
+
readonly reason?: string | undefined;
|
|
215
|
+
readonly atState: HarnessStateName;
|
|
216
|
+
}
|
|
217
|
+
export interface RunFailedEvent extends BaseEvent {
|
|
218
|
+
readonly type: "run:failed";
|
|
219
|
+
readonly failure: HarnessFailure;
|
|
220
|
+
readonly atState: HarnessStateName;
|
|
221
|
+
}
|
|
222
|
+
export type BrowserSessionCloseReason = "explicit" | "process-exit" | "chrome-disconnected" | "idle-timeout";
|
|
223
|
+
export interface BrowserSessionOpenedEvent extends BaseEvent {
|
|
224
|
+
readonly type: "browser:session-opened";
|
|
225
|
+
readonly sessionId: string;
|
|
226
|
+
readonly cdpPort: number;
|
|
227
|
+
readonly targetId: string;
|
|
228
|
+
}
|
|
229
|
+
export interface BrowserNavigatedEvent extends BaseEvent {
|
|
230
|
+
readonly type: "browser:navigated";
|
|
231
|
+
readonly sessionId: string;
|
|
232
|
+
readonly originOnly: string;
|
|
233
|
+
readonly httpStatus: number | null;
|
|
234
|
+
}
|
|
235
|
+
export interface BrowserScreenshotCapturedEvent extends BaseEvent {
|
|
236
|
+
readonly type: "browser:screenshot-captured";
|
|
237
|
+
readonly sessionId: string;
|
|
238
|
+
readonly captureSeq: number;
|
|
239
|
+
readonly persisted: boolean;
|
|
240
|
+
readonly viewportPx: {
|
|
241
|
+
readonly width: number;
|
|
242
|
+
readonly height: number;
|
|
243
|
+
};
|
|
244
|
+
readonly path?: string | undefined;
|
|
245
|
+
}
|
|
246
|
+
export interface BrowserPageContentCapturedEvent extends BaseEvent {
|
|
247
|
+
readonly type: "browser:page-content-captured";
|
|
248
|
+
readonly sessionId: string;
|
|
249
|
+
readonly captureSeq: number;
|
|
250
|
+
readonly byteLength: number;
|
|
251
|
+
}
|
|
252
|
+
export interface BrowserSessionClosedEvent extends BaseEvent {
|
|
253
|
+
readonly type: "browser:session-closed";
|
|
254
|
+
readonly sessionId: string;
|
|
255
|
+
readonly reason: BrowserSessionCloseReason;
|
|
256
|
+
}
|
|
257
|
+
export interface BrowserTrustWarningEvent extends BaseEvent {
|
|
258
|
+
readonly type: "browser:trust-warning";
|
|
259
|
+
readonly sessionId: string;
|
|
260
|
+
readonly warning: string;
|
|
261
|
+
}
|
|
262
|
+
export interface BrowserErrorEvent extends BaseEvent {
|
|
263
|
+
readonly type: "browser:error";
|
|
264
|
+
readonly sessionId: string;
|
|
265
|
+
readonly code: string;
|
|
266
|
+
readonly message: string;
|
|
267
|
+
}
|
|
268
|
+
export type BrowserEvent = BrowserSessionOpenedEvent | BrowserNavigatedEvent | BrowserScreenshotCapturedEvent | BrowserPageContentCapturedEvent | BrowserSessionClosedEvent | BrowserTrustWarningEvent | BrowserErrorEvent;
|
|
269
|
+
export type HarnessEvent = RunStartedEvent | StateTransitionEvent | ModelCallStartedEvent | ModelCallCompletedEvent | ModelCallFailedEvent | ToolCallStartedEvent | ToolCallCompletedEvent | ToolCallFailedEvent | CommandExecutedEvent | SandboxConfiguredEvent | PatchAppliedEvent | ReasoningTraceEvent | PatchProposedEvent | VerificationResultEvent | RunCompletedEvent | RunCancelledEvent | RunFailedEvent | BrowserEvent;
|
|
270
|
+
export {};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// All harness interfaces, states, events, limits, and task types. No runtime code
|
|
2
|
+
// other than the frozen constant tables (DEFAULT_LIMITS, HARNESS_CODES, TERMINAL_STATES)
|
|
3
|
+
// that the type layer needs to expose as values. Mirrors the ADR-0003 types.ts precedent.
|
|
4
|
+
export const TERMINAL_STATES = new Set([
|
|
5
|
+
"completed",
|
|
6
|
+
"cancelled",
|
|
7
|
+
"failed",
|
|
8
|
+
"limit-exceeded",
|
|
9
|
+
]);
|
|
10
|
+
export const DEFAULT_LIMITS = {
|
|
11
|
+
maxIterations: 10,
|
|
12
|
+
maxModelCalls: 20,
|
|
13
|
+
maxToolCalls: 30,
|
|
14
|
+
maxCommandExecutions: 10,
|
|
15
|
+
maxContextBytes: 512_000,
|
|
16
|
+
maxPatchBytes: 65_536,
|
|
17
|
+
maxWallTimeMs: 300_000,
|
|
18
|
+
maxFailureAttempts: 3,
|
|
19
|
+
};
|
|
20
|
+
// ─── Failure taxonomy ─────────────────────────────────────────────────────────
|
|
21
|
+
export const HARNESS_CODES = {
|
|
22
|
+
LIMIT_ITERATIONS: "HARNESS_LIMIT_ITERATIONS",
|
|
23
|
+
LIMIT_MODEL_CALLS: "HARNESS_LIMIT_MODEL_CALLS",
|
|
24
|
+
LIMIT_TOOL_CALLS: "HARNESS_LIMIT_TOOL_CALLS",
|
|
25
|
+
LIMIT_COMMAND_EXEC: "HARNESS_LIMIT_COMMAND_EXECUTIONS",
|
|
26
|
+
LIMIT_CONTEXT_SIZE: "HARNESS_LIMIT_CONTEXT_SIZE",
|
|
27
|
+
LIMIT_PATCH_SIZE: "HARNESS_LIMIT_PATCH_SIZE",
|
|
28
|
+
LIMIT_WALL_TIME: "HARNESS_LIMIT_WALL_TIME",
|
|
29
|
+
LIMIT_FAILURE_ATTEMPTS: "HARNESS_LIMIT_FAILURE_ATTEMPTS",
|
|
30
|
+
MODEL_ERROR: "HARNESS_MODEL_ERROR",
|
|
31
|
+
TOOL_ERROR: "HARNESS_TOOL_ERROR",
|
|
32
|
+
INTERNAL: "HARNESS_INTERNAL",
|
|
33
|
+
};
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export { SDK_VERSION, runAgent, type SdkAgentConfig, type SdkEvidenceOptions, } from "./sdk/index.js";
|
|
2
|
+
export * from "./harness/index.js";
|
|
3
|
+
export * from "./gateway/index.js";
|
|
4
|
+
export * from "./workspace/index.js";
|
|
5
|
+
export * from "./verification/index.js";
|
|
6
|
+
export { summarizeForAudit } from "./workspace/index.js";
|
|
7
|
+
export { summarizeForAudit as summarizeVerificationForAudit } from "./verification/index.js";
|
|
8
|
+
export { generateUnitTests, renderMarkdownReport as renderUnitTestReport, UNIT_TEST_WORKFLOW_DESCRIPTOR, DEFAULT_WORKFLOW_LIMITS, detectConventions, isTestPath, type AddedTestFile, type FileNamingStyle, type TestConventions, type UnitTestTarget, type UnitTestWorkflowDeps, type UnitTestWorkflowInput, type UnitTestWorkflowReport, type WorkflowDescriptor, type WorkflowEvent, type WorkflowEventSink, type WorkflowInputSpec, type WorkflowLimits, type WorkflowStatus, } from "./workflows/index.js";
|
|
9
|
+
export { investigateBug, renderBugMarkdownReport as renderBugInvestigationReport, BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR, DEFAULT_BUG_WORKFLOW_LIMITS, isSensitivePath, isElevatedReviewPath, parseFailureEvidence, type BugInvestigationDeps, type BugInvestigationEvent, type BugInvestigationInput, type BugInvestigationReport, type BugReportInput, type BugWorkflowEventSink, type BugWorkflowLimits, type BugWorkflowStatus, type ChangedFile, type FailureEvidence, type FailureFrame, type Hypothesis, type VerifiedFindings, } from "./workflows/index.js";
|
|
10
|
+
export { buildEvidenceManifest, persistEvidence, createAuditRedactor, createNodeEvidenceStore, createInMemoryEvidenceStore, aggregateUsage, resolveCostClass, listEvidence, loadEvidence, applyRetention, buildEvidenceReport, renderEvidenceReport, assertValidRunId, EVIDENCE_SCHEMA_VERSION, DEFAULT_RETENTION, type AuditRedactionConfig, type EvidenceBuildInput, type EvidenceCommandExecution, type EvidenceDeps, type EvidenceListEntry, type EvidenceManifest, type EvidenceModel, type EvidencePatch, type EvidenceReasoningEntry, type EvidenceReport, type EvidenceRunIdentity, type EvidenceStateTransition, type EvidenceStore, type EvidenceToolCall, type EvidenceUsageTotals, type EvidenceVerificationResult, type RetentionPolicy, } from "./audit/index.js";
|
|
11
|
+
export { runEvaluationSuite, createScriptedModelPort, EVAL_SCORECARD_SCHEMA_VERSION, type ScriptedModelPort, type EvalScorecard, type EvaluationFixture, type EvaluationDimension, type EvaluationMode, type DimensionResult, type DimensionOutcome, type ScorecardEntry, type ScorecardSummary, type SurfaceParityResult, type FixtureRunResult, type FixtureOracle, type WorkflowKind, } from "./evaluations/index.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Package-root boundary: the public surface is the agent harness, the model gateway, and
|
|
2
|
+
// the SDK version constant. The harness barrel already re-exports the session/run API the
|
|
3
|
+
// SDK surfaces, so we pull SDK_VERSION explicitly and avoid duplicate star re-exports.
|
|
4
|
+
export { SDK_VERSION, runAgent, } from "./sdk/index.js";
|
|
5
|
+
export * from "./harness/index.js";
|
|
6
|
+
export * from "./gateway/index.js";
|
|
7
|
+
export * from "./workspace/index.js";
|
|
8
|
+
export * from "./verification/index.js";
|
|
9
|
+
// Both the workspace and verification barrels expose a `summarizeForAudit`. An explicit re-export
|
|
10
|
+
// takes precedence over the two star exports and resolves the ambiguity at the package root: the
|
|
11
|
+
// canonical root `summarizeForAudit` is the workspace one (established by ADR-0005), and the
|
|
12
|
+
// verification audit projection is additionally surfaced under an unambiguous alias. Inside
|
|
13
|
+
// ./verification/index.js the function keeps its layer-local name `summarizeForAudit` (ADR-0007).
|
|
14
|
+
export { summarizeForAudit } from "./workspace/index.js";
|
|
15
|
+
export { summarizeForAudit as summarizeVerificationForAudit } from "./verification/index.js";
|
|
16
|
+
// Reviewable developer-assist workflows (ADR-0008). Exported explicitly rather than via `export *`
|
|
17
|
+
// because the workflow event family reuses the harness event-type NAMES (ModelCallStartedEvent,
|
|
18
|
+
// ModelCallCompletedEvent) by structural convention — a star re-export would collide with the
|
|
19
|
+
// harness ones already surfaced above. The WorkflowEvent union is surfaced; the two name-colliding
|
|
20
|
+
// member interfaces are reachable via that union.
|
|
21
|
+
export { generateUnitTests, renderMarkdownReport as renderUnitTestReport, UNIT_TEST_WORKFLOW_DESCRIPTOR, DEFAULT_WORKFLOW_LIMITS, detectConventions, isTestPath, } from "./workflows/index.js";
|
|
22
|
+
// Bug-investigation workflow (ADR-0009). Exported explicitly (not via `export *`) for the same
|
|
23
|
+
// reason as the unit-test workflow: the event family reuses harness event-type NAMES by structural
|
|
24
|
+
// convention, so a star re-export would collide with the harness ones surfaced above. The Markdown
|
|
25
|
+
// renderer is aliased to renderBugInvestigationReport (mirroring renderUnitTestReport) so the two
|
|
26
|
+
// workflow renderers do not collide at the package root.
|
|
27
|
+
export { investigateBug, renderBugMarkdownReport as renderBugInvestigationReport, BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR, DEFAULT_BUG_WORKFLOW_LIMITS, isSensitivePath, isElevatedReviewPath, parseFailureEvidence, } from "./workflows/index.js";
|
|
28
|
+
// Audit ledger / evidence manifests (ADR-0010). Exported explicitly (not via `export *`) to keep the
|
|
29
|
+
// public surface auditable, matching the workflow precedent above. None of these names collides with
|
|
30
|
+
// an existing root export; in particular the layer does NOT export a bare `summarizeForAudit` or
|
|
31
|
+
// `redact` (it composes them internally), so the canonical root `summarizeForAudit` is unaffected.
|
|
32
|
+
export { buildEvidenceManifest, persistEvidence, createAuditRedactor, createNodeEvidenceStore, createInMemoryEvidenceStore, aggregateUsage, resolveCostClass, listEvidence, loadEvidence, applyRetention, buildEvidenceReport, renderEvidenceReport, assertValidRunId, EVIDENCE_SCHEMA_VERSION, DEFAULT_RETENTION, } from "./audit/index.js";
|
|
33
|
+
// Wave 1 evaluation harness (ADR-0012 D11). Exported explicitly (not via `export *`) to keep the
|
|
34
|
+
// public surface auditable, matching the workflow/audit precedent above. None of these names
|
|
35
|
+
// collides with an existing root export.
|
|
36
|
+
export { runEvaluationSuite, createScriptedModelPort, EVAL_SCORECARD_SCHEMA_VERSION, } from "./evaluations/index.js";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export declare const SDK_VERSION = "0.1.0-beta.0";
|
|
2
|
+
export { createSession, type AgentConfig, type AgentSession, type HarnessDeps, type RunResult, type TaskInput, type TaskType, } from "../harness/index.js";
|
|
3
|
+
export { runAgent, type SdkAgentConfig, type SdkEvidenceOptions } from "./run-agent.js";
|
|
4
|
+
export { buildWorkspaceSummary, detectWorkspace, summarizeForAudit, type AuditEntry, type AuditSummary, type ContextEntrySummary, type ContextPackSummary, type WorkspaceInfo, type WorkspaceSummary, } from "../workspace/index.js";
|
|
5
|
+
export { buildVerificationPlan, buildVerificationSummary, classifyOutcome, detectScripts, renderMarkdownSummary, resolveTargetedTests, runVerification, summarizeForAudit as summarizeVerificationForAudit, DEFAULT_VERIFICATION_LIMITS, type ResourceLimitDecision, type VerificationAuditSummary, type VerificationDeps, type VerificationKind, type VerificationPlan, type VerificationReport, type VerificationResourceLimits, type VerificationResult, type VerificationStatus, type VerificationStep, type VerificationSummary, } from "../verification/index.js";
|
|
6
|
+
export { generateUnitTests, renderMarkdownReport, UNIT_TEST_WORKFLOW_DESCRIPTOR, DEFAULT_WORKFLOW_LIMITS, detectConventions, isTestPath, type AddedTestFile, type FileNamingStyle, type TestConventions, type UnitTestTarget, type UnitTestWorkflowDeps, type UnitTestWorkflowInput, type UnitTestWorkflowReport, type WorkflowDescriptor, type WorkflowEvent, type WorkflowEventSink, type WorkflowInputSpec, type WorkflowLimits, type WorkflowStatus, } from "../workflows/index.js";
|
|
7
|
+
export { investigateBug, renderBugMarkdownReport as renderBugInvestigationReport, BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR, DEFAULT_BUG_WORKFLOW_LIMITS, isSensitivePath, isElevatedReviewPath, parseFailureEvidence, type BugInvestigationDeps, type BugInvestigationEvent, type BugInvestigationInput, type BugInvestigationReport, type BugReportInput, type BugWorkflowEventSink, type BugWorkflowLimits, type BugWorkflowStatus, type ChangedFile, type FailureEvidence, type FailureFrame, type Hypothesis, type VerifiedFindings, } from "../workflows/index.js";
|
|
8
|
+
export { buildEvidenceManifest, persistEvidence, createAuditRedactor, createNodeEvidenceStore, createInMemoryEvidenceStore, aggregateUsage, resolveCostClass, listEvidence, loadEvidence, applyRetention, buildEvidenceReport, renderEvidenceReport, assertValidRunId, EVIDENCE_SCHEMA_VERSION, DEFAULT_RETENTION, type AuditRedactionConfig, type EvidenceBuildInput, type EvidenceCommandExecution, type EvidenceDeps, type EvidenceListEntry, type EvidenceManifest, type EvidenceModel, type EvidencePatch, type EvidenceReasoningEntry, type EvidenceReport, type EvidenceRunIdentity, type EvidenceStateTransition, type EvidenceStore, type EvidenceToolCall, type EvidenceUsageTotals, type EvidenceVerificationResult, type RetentionPolicy, } from "../audit/index.js";
|
|
9
|
+
export { runEvaluationSuite, createScriptedModelPort, EVAL_SCORECARD_SCHEMA_VERSION, type ScriptedModelPort, type EvalScorecard, type EvaluationFixture, type EvaluationDimension, type EvaluationMode, type DimensionResult, type DimensionOutcome, type ScorecardEntry, type ScorecardSummary, type SurfaceParityResult, type FixtureRunResult, type FixtureOracle, type WorkflowKind, } from "../evaluations/index.js";
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// Single-sourced package version; CLI and SDK both read this to avoid drift.
|
|
2
|
+
export const SDK_VERSION = "0.1.0-beta.0";
|
|
3
|
+
// The typed agent surface. AgentConfig, the session factory, the run result, and the
|
|
4
|
+
// session handle all live in the harness module (ADR-0004); the SDK re-exports them so
|
|
5
|
+
// callers import the agent API from one place.
|
|
6
|
+
export { createSession, } from "../harness/index.js";
|
|
7
|
+
export { runAgent } from "./run-agent.js";
|
|
8
|
+
// Safe workspace context surface (ADR-0005). The only file-read path is the
|
|
9
|
+
// boundary-checked one; no export returns raw arbitrary file content.
|
|
10
|
+
export { buildWorkspaceSummary, detectWorkspace, summarizeForAudit, } from "../workspace/index.js";
|
|
11
|
+
// Verification orchestrator surface (ADR-0007). Verification reuses the #6 command boundary
|
|
12
|
+
// unchanged; these are the plan/run/summary entry points and their JSON-serializable shapes
|
|
13
|
+
// (the stable contract the #10 audit ledger persists). The audit projection is exposed under an
|
|
14
|
+
// explicit alias because the workspace surface already owns `summarizeForAudit`.
|
|
15
|
+
export { buildVerificationPlan, buildVerificationSummary, classifyOutcome, detectScripts, renderMarkdownSummary, resolveTargetedTests, runVerification, summarizeForAudit as summarizeVerificationForAudit, DEFAULT_VERIFICATION_LIMITS, } from "../verification/index.js";
|
|
16
|
+
// Reviewable developer-assist workflows (ADR-0008). The unit-test generation workflow is the first
|
|
17
|
+
// programmatic workflow surface: generateUnitTests is the single entry, the descriptor lets a UI
|
|
18
|
+
// (#13) render the workflow without the implementation, and the WorkflowEvent union plus report
|
|
19
|
+
// types are the stable contract the #10 audit ledger persists.
|
|
20
|
+
export { generateUnitTests, renderMarkdownReport, UNIT_TEST_WORKFLOW_DESCRIPTOR, DEFAULT_WORKFLOW_LIMITS, detectConventions, isTestPath, } from "../workflows/index.js";
|
|
21
|
+
// Bug-investigation workflow (ADR-0009). The second programmatic workflow surface: investigateBug
|
|
22
|
+
// is the single entry, BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR lets a UI (#13) render it without the
|
|
23
|
+
// implementation, and the BugInvestigationReport (with its structural verified/hypothesis split)
|
|
24
|
+
// plus the BugInvestigationEvent union are the stable contract the #10 audit ledger persists. The
|
|
25
|
+
// Markdown renderer is aliased to avoid colliding with the unit-test workflow's renderMarkdownReport.
|
|
26
|
+
export { investigateBug, renderBugMarkdownReport as renderBugInvestigationReport, BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR, DEFAULT_BUG_WORKFLOW_LIMITS, isSensitivePath, isElevatedReviewPath, parseFailureEvidence, } from "../workflows/index.js";
|
|
27
|
+
// Audit ledger / evidence manifests (ADR-0010). The first persistent-artifact surface: persistEvidence
|
|
28
|
+
// builds → redacts-by-construction → writes a redacted, versioned EvidenceManifest, and listEvidence /
|
|
29
|
+
// loadEvidence are the #13 UI seam. Exported via an explicit named block (not `export *`) to keep the
|
|
30
|
+
// surface auditable; none of these names collides with an existing layer export (the layer does NOT
|
|
31
|
+
// export a bare `summarizeForAudit` or `redact` — it composes them internally).
|
|
32
|
+
export { buildEvidenceManifest, persistEvidence, createAuditRedactor, createNodeEvidenceStore, createInMemoryEvidenceStore, aggregateUsage, resolveCostClass, listEvidence, loadEvidence, applyRetention, buildEvidenceReport, renderEvidenceReport, assertValidRunId, EVIDENCE_SCHEMA_VERSION, DEFAULT_RETENTION, } from "../audit/index.js";
|
|
33
|
+
// Wave 1 evaluation harness (ADR-0012 D11). The deterministic offline runner, the product-code
|
|
34
|
+
// scripted-model replay port, and the versioned scorecard schema, exported via an explicit named
|
|
35
|
+
// block (no `export *`). ScriptedModelPort is surfaced so external callers can build replay tooling
|
|
36
|
+
// without the full runner. No name collides with an existing SDK export.
|
|
37
|
+
export { runEvaluationSuite, createScriptedModelPort, EVAL_SCORECARD_SCHEMA_VERSION, } from "../evaluations/index.js";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { AuditRedactionConfig, BuildOptions, RetentionPolicy } from "../audit/types.js";
|
|
2
|
+
import type { EvidenceStore } from "../audit/store.js";
|
|
3
|
+
import { type AgentConfig, type AgentSession, type HarnessDeps, type TaskInput } from "../harness/index.js";
|
|
4
|
+
import type { EnvSource } from "../gateway/index.js";
|
|
5
|
+
export interface SdkEvidenceOptions {
|
|
6
|
+
readonly write?: boolean | undefined;
|
|
7
|
+
readonly store?: EvidenceStore | undefined;
|
|
8
|
+
readonly env?: EnvSource | undefined;
|
|
9
|
+
readonly retention?: RetentionPolicy | undefined;
|
|
10
|
+
readonly redaction?: AuditRedactionConfig | undefined;
|
|
11
|
+
readonly options?: BuildOptions | undefined;
|
|
12
|
+
}
|
|
13
|
+
export interface SdkAgentConfig extends AgentConfig {
|
|
14
|
+
readonly evidence?: SdkEvidenceOptions | undefined;
|
|
15
|
+
}
|
|
16
|
+
export declare function runAgent(task: TaskInput, config: SdkAgentConfig, deps: HarnessDeps): AgentSession;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// SDK-level runAgent wrapper. The harness createSession remains the deterministic core; this wrapper
|
|
2
|
+
// adds the #10 SDK contract that completed SDK runs persist a redacted EvidenceManifest by default.
|
|
3
|
+
import { persistEvidence } from "../audit/persist.js";
|
|
4
|
+
import { createSession, DEFAULT_LIMITS, HARNESS_VERSION, } from "../harness/index.js";
|
|
5
|
+
function resolveLimits(config) {
|
|
6
|
+
return { ...DEFAULT_LIMITS, ...config.limits };
|
|
7
|
+
}
|
|
8
|
+
function resolveDryRun(config) {
|
|
9
|
+
return config.dryRun ?? true;
|
|
10
|
+
}
|
|
11
|
+
function buildRunManifest(task, config, result) {
|
|
12
|
+
return {
|
|
13
|
+
runId: result.runId,
|
|
14
|
+
fingerprint: result.fingerprint,
|
|
15
|
+
harnessVersion: HARNESS_VERSION,
|
|
16
|
+
taskType: task.taskType,
|
|
17
|
+
taskInput: task,
|
|
18
|
+
limits: resolveLimits(config),
|
|
19
|
+
modelId: config.model,
|
|
20
|
+
workingDirectory: config.workingDirectory,
|
|
21
|
+
dryRun: resolveDryRun(config),
|
|
22
|
+
startedAt: new Date(result.startedAt).toISOString(),
|
|
23
|
+
events: result.events,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
function evidenceBuildInput(task, config, result, evidence) {
|
|
27
|
+
return {
|
|
28
|
+
result,
|
|
29
|
+
manifest: buildRunManifest(task, config, result),
|
|
30
|
+
...(evidence?.redaction === undefined ? {} : { redaction: evidence.redaction }),
|
|
31
|
+
...(evidence?.options === undefined ? {} : { options: evidence.options }),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
function evidenceDeps(evidence) {
|
|
35
|
+
return {
|
|
36
|
+
...(evidence?.store === undefined ? {} : { store: evidence.store }),
|
|
37
|
+
...(evidence?.env === undefined ? {} : { env: evidence.env }),
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
function persistRunEvidence(task, config, result) {
|
|
41
|
+
const evidence = config.evidence;
|
|
42
|
+
if (evidence?.write === false) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
persistEvidence(evidenceBuildInput(task, config, result, evidence), evidenceDeps(evidence), evidence?.retention);
|
|
46
|
+
}
|
|
47
|
+
export function runAgent(task, config, deps) {
|
|
48
|
+
const session = createSession(task, config, deps);
|
|
49
|
+
return {
|
|
50
|
+
...session,
|
|
51
|
+
result: session.result.then((result) => {
|
|
52
|
+
persistRunEvidence(task, config, result);
|
|
53
|
+
return result;
|
|
54
|
+
}),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { WebSocket } from "ws";
|
|
2
|
+
export declare const PERMITTED_CDP_METHODS: ReadonlySet<string>;
|
|
3
|
+
export interface CdpClientOptions {
|
|
4
|
+
readonly timeoutMs?: number;
|
|
5
|
+
readonly socketFactory?: (url: string) => WebSocket;
|
|
6
|
+
}
|
|
7
|
+
export type CdpEventListener = (event: {
|
|
8
|
+
readonly method: string;
|
|
9
|
+
readonly params: unknown;
|
|
10
|
+
readonly sessionId?: string;
|
|
11
|
+
}) => void;
|
|
12
|
+
export type CdpCloseListener = (reason: string) => void;
|
|
13
|
+
export declare class CdpClient {
|
|
14
|
+
private readonly socket;
|
|
15
|
+
private readonly timeoutMs;
|
|
16
|
+
private readonly pending;
|
|
17
|
+
private readonly listeners;
|
|
18
|
+
private readonly closeListeners;
|
|
19
|
+
private nextId;
|
|
20
|
+
private connectPromise;
|
|
21
|
+
private closed;
|
|
22
|
+
private closeReason;
|
|
23
|
+
constructor(url: string, options?: CdpClientOptions);
|
|
24
|
+
connect(): Promise<void>;
|
|
25
|
+
onEvent(listener: CdpEventListener): () => void;
|
|
26
|
+
onClose(listener: CdpCloseListener): () => void;
|
|
27
|
+
send<T = unknown>(method: string, params?: Record<string, unknown>, sessionId?: string): Promise<T>;
|
|
28
|
+
close(): void;
|
|
29
|
+
isClosed(): boolean;
|
|
30
|
+
closeCause(): string | undefined;
|
|
31
|
+
private handleMessage;
|
|
32
|
+
private dispatchEvent;
|
|
33
|
+
private resolvePending;
|
|
34
|
+
private handleSocketClosed;
|
|
35
|
+
}
|