@vauban-org/agent-sdk 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRACT.md +6968 -0
- package/README.md +205 -0
- package/dist/adapters/hitl/memory-state-store.d.ts +23 -0
- package/dist/adapters/hitl/memory-state-store.d.ts.map +1 -0
- package/dist/adapters/hitl/memory-state-store.js +72 -0
- package/dist/adapters/hitl/memory-state-store.js.map +1 -0
- package/dist/adapters/hitl/postgres-state-store.d.ts +29 -0
- package/dist/adapters/hitl/postgres-state-store.d.ts.map +1 -0
- package/dist/adapters/hitl/postgres-state-store.js +103 -0
- package/dist/adapters/hitl/postgres-state-store.js.map +1 -0
- package/dist/adapters/llm/anthropic-direct.d.ts +28 -0
- package/dist/adapters/llm/anthropic-direct.d.ts.map +1 -0
- package/dist/adapters/llm/anthropic-direct.js +163 -0
- package/dist/adapters/llm/anthropic-direct.js.map +1 -0
- package/dist/adapters/llm/cascade.d.ts +27 -0
- package/dist/adapters/llm/cascade.d.ts.map +1 -0
- package/dist/adapters/llm/cascade.js +88 -0
- package/dist/adapters/llm/cascade.js.map +1 -0
- package/dist/adapters/llm/litellm.d.ts +28 -0
- package/dist/adapters/llm/litellm.d.ts.map +1 -0
- package/dist/adapters/llm/litellm.js +239 -0
- package/dist/adapters/llm/litellm.js.map +1 -0
- package/dist/adapters/messaging/console.d.ts +32 -0
- package/dist/adapters/messaging/console.d.ts.map +1 -0
- package/dist/adapters/messaging/console.js +47 -0
- package/dist/adapters/messaging/console.js.map +1 -0
- package/dist/adapters/messaging/discord.d.ts +34 -0
- package/dist/adapters/messaging/discord.d.ts.map +1 -0
- package/dist/adapters/messaging/discord.js +86 -0
- package/dist/adapters/messaging/discord.js.map +1 -0
- package/dist/adapters/messaging/mcp.d.ts +53 -0
- package/dist/adapters/messaging/mcp.d.ts.map +1 -0
- package/dist/adapters/messaging/mcp.js +43 -0
- package/dist/adapters/messaging/mcp.js.map +1 -0
- package/dist/adapters/messaging/slack.d.ts +21 -0
- package/dist/adapters/messaging/slack.d.ts.map +1 -0
- package/dist/adapters/messaging/slack.js +71 -0
- package/dist/adapters/messaging/slack.js.map +1 -0
- package/dist/adapters/messaging/telegram.d.ts +31 -0
- package/dist/adapters/messaging/telegram.d.ts.map +1 -0
- package/dist/adapters/messaging/telegram.js +85 -0
- package/dist/adapters/messaging/telegram.js.map +1 -0
- package/dist/adapters/registry/memory.d.ts +40 -0
- package/dist/adapters/registry/memory.d.ts.map +1 -0
- package/dist/adapters/registry/memory.js +71 -0
- package/dist/adapters/registry/memory.js.map +1 -0
- package/dist/adapters/registry/postgres.d.ts +39 -0
- package/dist/adapters/registry/postgres.d.ts.map +1 -0
- package/dist/adapters/registry/postgres.js +124 -0
- package/dist/adapters/registry/postgres.js.map +1 -0
- package/dist/agents/index.d.ts +2 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +3 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/trading.d.ts +99 -0
- package/dist/agents/trading.d.ts.map +1 -0
- package/dist/agents/trading.js +39 -0
- package/dist/agents/trading.js.map +1 -0
- package/dist/auth/errors.d.ts +38 -0
- package/dist/auth/errors.d.ts.map +1 -0
- package/dist/auth/errors.js +57 -0
- package/dist/auth/errors.js.map +1 -0
- package/dist/auth/nonce-store.d.ts +52 -0
- package/dist/auth/nonce-store.d.ts.map +1 -0
- package/dist/auth/nonce-store.js +55 -0
- package/dist/auth/nonce-store.js.map +1 -0
- package/dist/auth/sign-event.d.ts +28 -0
- package/dist/auth/sign-event.d.ts.map +1 -0
- package/dist/auth/sign-event.js +42 -0
- package/dist/auth/sign-event.js.map +1 -0
- package/dist/auth/verify-event.d.ts +36 -0
- package/dist/auth/verify-event.d.ts.map +1 -0
- package/dist/auth/verify-event.js +78 -0
- package/dist/auth/verify-event.js.map +1 -0
- package/dist/boot/init-sdk.d.ts +69 -0
- package/dist/boot/init-sdk.d.ts.map +1 -0
- package/dist/boot/init-sdk.js +53 -0
- package/dist/boot/init-sdk.js.map +1 -0
- package/dist/boot/load-agent-context.d.ts +82 -0
- package/dist/boot/load-agent-context.d.ts.map +1 -0
- package/dist/boot/load-agent-context.js +90 -0
- package/dist/boot/load-agent-context.js.map +1 -0
- package/dist/boot/load-recent-memory.d.ts +47 -0
- package/dist/boot/load-recent-memory.d.ts.map +1 -0
- package/dist/boot/load-recent-memory.js +56 -0
- package/dist/boot/load-recent-memory.js.map +1 -0
- package/dist/budget/budget-state.d.ts +88 -0
- package/dist/budget/budget-state.d.ts.map +1 -0
- package/dist/budget/budget-state.js +172 -0
- package/dist/budget/budget-state.js.map +1 -0
- package/dist/budget/index.d.ts +3 -0
- package/dist/budget/index.d.ts.map +1 -0
- package/dist/budget/index.js +2 -0
- package/dist/budget/index.js.map +1 -0
- package/dist/clients/agents.d.ts +40 -0
- package/dist/clients/agents.d.ts.map +1 -0
- package/dist/clients/agents.js +85 -0
- package/dist/clients/agents.js.map +1 -0
- package/dist/clients/index.d.ts +7 -0
- package/dist/clients/index.d.ts.map +1 -0
- package/dist/clients/index.js +7 -0
- package/dist/clients/index.js.map +1 -0
- package/dist/clients/messaging-adapters.d.ts +51 -0
- package/dist/clients/messaging-adapters.d.ts.map +1 -0
- package/dist/clients/messaging-adapters.js +118 -0
- package/dist/clients/messaging-adapters.js.map +1 -0
- package/dist/clients/pipelines.d.ts +43 -0
- package/dist/clients/pipelines.d.ts.map +1 -0
- package/dist/clients/pipelines.js +96 -0
- package/dist/clients/pipelines.js.map +1 -0
- package/dist/compute/strategies/best-of-n.d.ts +46 -0
- package/dist/compute/strategies/best-of-n.d.ts.map +1 -0
- package/dist/compute/strategies/best-of-n.js +112 -0
- package/dist/compute/strategies/best-of-n.js.map +1 -0
- package/dist/compute/strategies/bon-mav.d.ts +65 -0
- package/dist/compute/strategies/bon-mav.d.ts.map +1 -0
- package/dist/compute/strategies/bon-mav.js +154 -0
- package/dist/compute/strategies/bon-mav.js.map +1 -0
- package/dist/compute/strategies/single-shot.d.ts +17 -0
- package/dist/compute/strategies/single-shot.d.ts.map +1 -0
- package/dist/compute/strategies/single-shot.js +36 -0
- package/dist/compute/strategies/single-shot.js.map +1 -0
- package/dist/compute/types.d.ts +30 -0
- package/dist/compute/types.d.ts.map +1 -0
- package/dist/compute/types.js +8 -0
- package/dist/compute/types.js.map +1 -0
- package/dist/compute/verifier.d.ts +55 -0
- package/dist/compute/verifier.d.ts.map +1 -0
- package/dist/compute/verifier.js +35 -0
- package/dist/compute/verifier.js.map +1 -0
- package/dist/compute/with-compute.d.ts +50 -0
- package/dist/compute/with-compute.d.ts.map +1 -0
- package/dist/compute/with-compute.js +69 -0
- package/dist/compute/with-compute.js.map +1 -0
- package/dist/constitution/axioms.d.ts +65 -0
- package/dist/constitution/axioms.d.ts.map +1 -0
- package/dist/constitution/axioms.js +496 -0
- package/dist/constitution/axioms.js.map +1 -0
- package/dist/constitution/gate.d.ts +47 -0
- package/dist/constitution/gate.d.ts.map +1 -0
- package/dist/constitution/gate.js +143 -0
- package/dist/constitution/gate.js.map +1 -0
- package/dist/constitution/index.d.ts +20 -0
- package/dist/constitution/index.d.ts.map +1 -0
- package/dist/constitution/index.js +15 -0
- package/dist/constitution/index.js.map +1 -0
- package/dist/constitution/scorer.d.ts +67 -0
- package/dist/constitution/scorer.d.ts.map +1 -0
- package/dist/constitution/scorer.js +111 -0
- package/dist/constitution/scorer.js.map +1 -0
- package/dist/constitution/signal.d.ts +41 -0
- package/dist/constitution/signal.d.ts.map +1 -0
- package/dist/constitution/signal.js +61 -0
- package/dist/constitution/signal.js.map +1 -0
- package/dist/constitution/types.d.ts +295 -0
- package/dist/constitution/types.d.ts.map +1 -0
- package/dist/constitution/types.js +59 -0
- package/dist/constitution/types.js.map +1 -0
- package/dist/counterfactual/index.d.ts +11 -0
- package/dist/counterfactual/index.d.ts.map +1 -0
- package/dist/counterfactual/index.js +10 -0
- package/dist/counterfactual/index.js.map +1 -0
- package/dist/counterfactual/replay-with-alt.d.ts +137 -0
- package/dist/counterfactual/replay-with-alt.d.ts.map +1 -0
- package/dist/counterfactual/replay-with-alt.js +191 -0
- package/dist/counterfactual/replay-with-alt.js.map +1 -0
- package/dist/dataops/index.d.ts +5 -0
- package/dist/dataops/index.d.ts.map +1 -0
- package/dist/dataops/index.js +3 -0
- package/dist/dataops/index.js.map +1 -0
- package/dist/dataops/starkscan.d.ts +13 -0
- package/dist/dataops/starkscan.d.ts.map +1 -0
- package/dist/dataops/starkscan.js +12 -0
- package/dist/dataops/starkscan.js.map +1 -0
- package/dist/dataops/voyager.d.ts +58 -0
- package/dist/dataops/voyager.d.ts.map +1 -0
- package/dist/dataops/voyager.js +146 -0
- package/dist/dataops/voyager.js.map +1 -0
- package/dist/deprecation.d.ts +49 -0
- package/dist/deprecation.d.ts.map +1 -0
- package/dist/deprecation.js +84 -0
- package/dist/deprecation.js.map +1 -0
- package/dist/durable/bullmq-runner.d.ts +138 -0
- package/dist/durable/bullmq-runner.d.ts.map +1 -0
- package/dist/durable/bullmq-runner.js +378 -0
- package/dist/durable/bullmq-runner.js.map +1 -0
- package/dist/durable/index.d.ts +3 -0
- package/dist/durable/index.d.ts.map +1 -0
- package/dist/durable/index.js +2 -0
- package/dist/durable/index.js.map +1 -0
- package/dist/economy/circuit-breaker.d.ts +37 -0
- package/dist/economy/circuit-breaker.d.ts.map +1 -0
- package/dist/economy/circuit-breaker.js +104 -0
- package/dist/economy/circuit-breaker.js.map +1 -0
- package/dist/economy/economy-router.d.ts +103 -0
- package/dist/economy/economy-router.d.ts.map +1 -0
- package/dist/economy/economy-router.js +176 -0
- package/dist/economy/economy-router.js.map +1 -0
- package/dist/economy/index.d.ts +16 -0
- package/dist/economy/index.d.ts.map +1 -0
- package/dist/economy/index.js +11 -0
- package/dist/economy/index.js.map +1 -0
- package/dist/economy/outcome-tracker.d.ts +90 -0
- package/dist/economy/outcome-tracker.d.ts.map +1 -0
- package/dist/economy/outcome-tracker.js +172 -0
- package/dist/economy/outcome-tracker.js.map +1 -0
- package/dist/economy/router.d.ts +97 -0
- package/dist/economy/router.d.ts.map +1 -0
- package/dist/economy/router.js +181 -0
- package/dist/economy/router.js.map +1 -0
- package/dist/economy/tier-policy.d.ts +46 -0
- package/dist/economy/tier-policy.d.ts.map +1 -0
- package/dist/economy/tier-policy.js +76 -0
- package/dist/economy/tier-policy.js.map +1 -0
- package/dist/errors.d.ts +102 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +135 -0
- package/dist/errors.js.map +1 -0
- package/dist/evals/datasets/index.d.ts +27 -0
- package/dist/evals/datasets/index.d.ts.map +1 -0
- package/dist/evals/datasets/index.js +279 -0
- package/dist/evals/datasets/index.js.map +1 -0
- package/dist/evals/harness.d.ts +52 -0
- package/dist/evals/harness.d.ts.map +1 -0
- package/dist/evals/harness.js +108 -0
- package/dist/evals/harness.js.map +1 -0
- package/dist/evals/index.d.ts +5 -0
- package/dist/evals/index.d.ts.map +1 -0
- package/dist/evals/index.js +3 -0
- package/dist/evals/index.js.map +1 -0
- package/dist/factory/agent-factory.d.ts +86 -0
- package/dist/factory/agent-factory.d.ts.map +1 -0
- package/dist/factory/agent-factory.js +119 -0
- package/dist/factory/agent-factory.js.map +1 -0
- package/dist/hitl/api.d.ts +38 -0
- package/dist/hitl/api.d.ts.map +1 -0
- package/dist/hitl/api.js +58 -0
- package/dist/hitl/api.js.map +1 -0
- package/dist/hitl/approval-channel.d.ts +84 -0
- package/dist/hitl/approval-channel.d.ts.map +1 -0
- package/dist/hitl/approval-channel.js +59 -0
- package/dist/hitl/approval-channel.js.map +1 -0
- package/dist/hitl/callback-handlers.d.ts +21 -0
- package/dist/hitl/callback-handlers.d.ts.map +1 -0
- package/dist/hitl/callback-handlers.js +30 -0
- package/dist/hitl/callback-handlers.js.map +1 -0
- package/dist/hitl/index.d.ts +3 -0
- package/dist/hitl/index.d.ts.map +1 -0
- package/dist/hitl/index.js +2 -0
- package/dist/hitl/index.js.map +1 -0
- package/dist/hitl/slack.d.ts +76 -0
- package/dist/hitl/slack.d.ts.map +1 -0
- package/dist/hitl/slack.js +243 -0
- package/dist/hitl/slack.js.map +1 -0
- package/dist/hitl/telegram.d.ts +74 -0
- package/dist/hitl/telegram.d.ts.map +1 -0
- package/dist/hitl/telegram.js +227 -0
- package/dist/hitl/telegram.js.map +1 -0
- package/dist/index.d.ts +173 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +147 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/parse-structured-output.d.ts +55 -0
- package/dist/llm/parse-structured-output.d.ts.map +1 -0
- package/dist/llm/parse-structured-output.js +80 -0
- package/dist/llm/parse-structured-output.js.map +1 -0
- package/dist/loop/index.d.ts +7 -0
- package/dist/loop/index.d.ts.map +1 -0
- package/dist/loop/index.js +5 -0
- package/dist/loop/index.js.map +1 -0
- package/dist/loop/minimal-loop.d.ts +81 -0
- package/dist/loop/minimal-loop.d.ts.map +1 -0
- package/dist/loop/minimal-loop.js +318 -0
- package/dist/loop/minimal-loop.js.map +1 -0
- package/dist/loop/sdk-loop.d.ts +103 -0
- package/dist/loop/sdk-loop.d.ts.map +1 -0
- package/dist/loop/sdk-loop.js +344 -0
- package/dist/loop/sdk-loop.js.map +1 -0
- package/dist/mcp/index.d.ts +38 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +113 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/metrics/create-agent-metrics.d.ts +78 -0
- package/dist/metrics/create-agent-metrics.d.ts.map +1 -0
- package/dist/metrics/create-agent-metrics.js +115 -0
- package/dist/metrics/create-agent-metrics.js.map +1 -0
- package/dist/orchestration/bulkhead.d.ts +70 -0
- package/dist/orchestration/bulkhead.d.ts.map +1 -0
- package/dist/orchestration/bulkhead.js +99 -0
- package/dist/orchestration/bulkhead.js.map +1 -0
- package/dist/orchestration/idempotency.d.ts +52 -0
- package/dist/orchestration/idempotency.d.ts.map +1 -0
- package/dist/orchestration/idempotency.js +93 -0
- package/dist/orchestration/idempotency.js.map +1 -0
- package/dist/orchestration/index.d.ts +16 -0
- package/dist/orchestration/index.d.ts.map +1 -0
- package/dist/orchestration/index.js +15 -0
- package/dist/orchestration/index.js.map +1 -0
- package/dist/orchestration/ooda/agent-config-loader.d.ts +67 -0
- package/dist/orchestration/ooda/agent-config-loader.d.ts.map +1 -0
- package/dist/orchestration/ooda/agent-config-loader.js +65 -0
- package/dist/orchestration/ooda/agent-config-loader.js.map +1 -0
- package/dist/orchestration/ooda/agent.d.ts +54 -0
- package/dist/orchestration/ooda/agent.d.ts.map +1 -0
- package/dist/orchestration/ooda/agent.js +730 -0
- package/dist/orchestration/ooda/agent.js.map +1 -0
- package/dist/orchestration/ooda/audit-log.d.ts +26 -0
- package/dist/orchestration/ooda/audit-log.d.ts.map +1 -0
- package/dist/orchestration/ooda/audit-log.js +41 -0
- package/dist/orchestration/ooda/audit-log.js.map +1 -0
- package/dist/orchestration/ooda/brain-context.d.ts +121 -0
- package/dist/orchestration/ooda/brain-context.d.ts.map +1 -0
- package/dist/orchestration/ooda/brain-context.js +149 -0
- package/dist/orchestration/ooda/brain-context.js.map +1 -0
- package/dist/orchestration/ooda/child-agent.d.ts +55 -0
- package/dist/orchestration/ooda/child-agent.d.ts.map +1 -0
- package/dist/orchestration/ooda/child-agent.js +16 -0
- package/dist/orchestration/ooda/child-agent.js.map +1 -0
- package/dist/orchestration/ooda/cron-schedule.d.ts +26 -0
- package/dist/orchestration/ooda/cron-schedule.d.ts.map +1 -0
- package/dist/orchestration/ooda/cron-schedule.js +94 -0
- package/dist/orchestration/ooda/cron-schedule.js.map +1 -0
- package/dist/orchestration/ooda/debate.d.ts +68 -0
- package/dist/orchestration/ooda/debate.d.ts.map +1 -0
- package/dist/orchestration/ooda/debate.js +39 -0
- package/dist/orchestration/ooda/debate.js.map +1 -0
- package/dist/orchestration/ooda/economic-observer.d.ts +50 -0
- package/dist/orchestration/ooda/economic-observer.d.ts.map +1 -0
- package/dist/orchestration/ooda/economic-observer.js +36 -0
- package/dist/orchestration/ooda/economic-observer.js.map +1 -0
- package/dist/orchestration/ooda/errors.d.ts +21 -0
- package/dist/orchestration/ooda/errors.d.ts.map +1 -0
- package/dist/orchestration/ooda/errors.js +30 -0
- package/dist/orchestration/ooda/errors.js.map +1 -0
- package/dist/orchestration/ooda/execution-mode-guard.d.ts +24 -0
- package/dist/orchestration/ooda/execution-mode-guard.d.ts.map +1 -0
- package/dist/orchestration/ooda/execution-mode-guard.js +31 -0
- package/dist/orchestration/ooda/execution-mode-guard.js.map +1 -0
- package/dist/orchestration/ooda/factory.d.ts +12 -0
- package/dist/orchestration/ooda/factory.d.ts.map +1 -0
- package/dist/orchestration/ooda/factory.js +27 -0
- package/dist/orchestration/ooda/factory.js.map +1 -0
- package/dist/orchestration/ooda/guardrails.d.ts +65 -0
- package/dist/orchestration/ooda/guardrails.d.ts.map +1 -0
- package/dist/orchestration/ooda/guardrails.js +127 -0
- package/dist/orchestration/ooda/guardrails.js.map +1 -0
- package/dist/orchestration/ooda/guards/always-on.d.ts +15 -0
- package/dist/orchestration/ooda/guards/always-on.d.ts.map +1 -0
- package/dist/orchestration/ooda/guards/always-on.js +19 -0
- package/dist/orchestration/ooda/guards/always-on.js.map +1 -0
- package/dist/orchestration/ooda/guards/business-hours.d.ts +46 -0
- package/dist/orchestration/ooda/guards/business-hours.d.ts.map +1 -0
- package/dist/orchestration/ooda/guards/business-hours.js +78 -0
- package/dist/orchestration/ooda/guards/business-hours.js.map +1 -0
- package/dist/orchestration/ooda/guards/index.d.ts +14 -0
- package/dist/orchestration/ooda/guards/index.d.ts.map +1 -0
- package/dist/orchestration/ooda/guards/index.js +14 -0
- package/dist/orchestration/ooda/guards/index.js.map +1 -0
- package/dist/orchestration/ooda/guards/redis-circuit-breaker.d.ts +84 -0
- package/dist/orchestration/ooda/guards/redis-circuit-breaker.d.ts.map +1 -0
- package/dist/orchestration/ooda/guards/redis-circuit-breaker.js +96 -0
- package/dist/orchestration/ooda/guards/redis-circuit-breaker.js.map +1 -0
- package/dist/orchestration/ooda/guards/rth-session.d.ts +27 -0
- package/dist/orchestration/ooda/guards/rth-session.d.ts.map +1 -0
- package/dist/orchestration/ooda/guards/rth-session.js +95 -0
- package/dist/orchestration/ooda/guards/rth-session.js.map +1 -0
- package/dist/orchestration/ooda/handoff.d.ts +82 -0
- package/dist/orchestration/ooda/handoff.d.ts.map +1 -0
- package/dist/orchestration/ooda/handoff.js +86 -0
- package/dist/orchestration/ooda/handoff.js.map +1 -0
- package/dist/orchestration/ooda/hitl-gate.d.ts +57 -0
- package/dist/orchestration/ooda/hitl-gate.d.ts.map +1 -0
- package/dist/orchestration/ooda/hitl-gate.js +107 -0
- package/dist/orchestration/ooda/hitl-gate.js.map +1 -0
- package/dist/orchestration/ooda/index.d.ts +48 -0
- package/dist/orchestration/ooda/index.d.ts.map +1 -0
- package/dist/orchestration/ooda/index.js +33 -0
- package/dist/orchestration/ooda/index.js.map +1 -0
- package/dist/orchestration/ooda/inner-monologue.d.ts +69 -0
- package/dist/orchestration/ooda/inner-monologue.d.ts.map +1 -0
- package/dist/orchestration/ooda/inner-monologue.js +69 -0
- package/dist/orchestration/ooda/inner-monologue.js.map +1 -0
- package/dist/orchestration/ooda/learn.d.ts +60 -0
- package/dist/orchestration/ooda/learn.d.ts.map +1 -0
- package/dist/orchestration/ooda/learn.js +94 -0
- package/dist/orchestration/ooda/learn.js.map +1 -0
- package/dist/orchestration/ooda/multimodal.d.ts +91 -0
- package/dist/orchestration/ooda/multimodal.d.ts.map +1 -0
- package/dist/orchestration/ooda/multimodal.js +88 -0
- package/dist/orchestration/ooda/multimodal.js.map +1 -0
- package/dist/orchestration/ooda/phase-routing.d.ts +41 -0
- package/dist/orchestration/ooda/phase-routing.d.ts.map +1 -0
- package/dist/orchestration/ooda/phase-routing.js +43 -0
- package/dist/orchestration/ooda/phase-routing.js.map +1 -0
- package/dist/orchestration/ooda/plan-execute.d.ts +55 -0
- package/dist/orchestration/ooda/plan-execute.d.ts.map +1 -0
- package/dist/orchestration/ooda/plan-execute.js +41 -0
- package/dist/orchestration/ooda/plan-execute.js.map +1 -0
- package/dist/orchestration/ooda/react-loop.d.ts +96 -0
- package/dist/orchestration/ooda/react-loop.d.ts.map +1 -0
- package/dist/orchestration/ooda/react-loop.js +90 -0
- package/dist/orchestration/ooda/react-loop.js.map +1 -0
- package/dist/orchestration/ooda/reflexion.d.ts +56 -0
- package/dist/orchestration/ooda/reflexion.d.ts.map +1 -0
- package/dist/orchestration/ooda/reflexion.js +58 -0
- package/dist/orchestration/ooda/reflexion.js.map +1 -0
- package/dist/orchestration/ooda/resource-limits.d.ts +63 -0
- package/dist/orchestration/ooda/resource-limits.d.ts.map +1 -0
- package/dist/orchestration/ooda/resource-limits.js +107 -0
- package/dist/orchestration/ooda/resource-limits.js.map +1 -0
- package/dist/orchestration/ooda/run-step-persistence.d.ts +50 -0
- package/dist/orchestration/ooda/run-step-persistence.d.ts.map +1 -0
- package/dist/orchestration/ooda/run-step-persistence.js +89 -0
- package/dist/orchestration/ooda/run-step-persistence.js.map +1 -0
- package/dist/orchestration/ooda/skills.d.ts +58 -0
- package/dist/orchestration/ooda/skills.d.ts.map +1 -0
- package/dist/orchestration/ooda/skills.js +19 -0
- package/dist/orchestration/ooda/skills.js.map +1 -0
- package/dist/orchestration/ooda/structured-output.d.ts +46 -0
- package/dist/orchestration/ooda/structured-output.d.ts.map +1 -0
- package/dist/orchestration/ooda/structured-output.js +60 -0
- package/dist/orchestration/ooda/structured-output.js.map +1 -0
- package/dist/orchestration/ooda/types.d.ts +463 -0
- package/dist/orchestration/ooda/types.d.ts.map +1 -0
- package/dist/orchestration/ooda/types.js +38 -0
- package/dist/orchestration/ooda/types.js.map +1 -0
- package/dist/otel/attributes.d.ts +71 -0
- package/dist/otel/attributes.d.ts.map +1 -0
- package/dist/otel/attributes.js +56 -0
- package/dist/otel/attributes.js.map +1 -0
- package/dist/otel/index.d.ts +5 -0
- package/dist/otel/index.d.ts.map +1 -0
- package/dist/otel/index.js +5 -0
- package/dist/otel/index.js.map +1 -0
- package/dist/otel/ingest.d.ts +99 -0
- package/dist/otel/ingest.d.ts.map +1 -0
- package/dist/otel/ingest.js +192 -0
- package/dist/otel/ingest.js.map +1 -0
- package/dist/otel/trace-context.d.ts +48 -0
- package/dist/otel/trace-context.d.ts.map +1 -0
- package/dist/otel/trace-context.js +137 -0
- package/dist/otel/trace-context.js.map +1 -0
- package/dist/otel/types.d.ts +54 -0
- package/dist/otel/types.d.ts.map +1 -0
- package/dist/otel/types.js +12 -0
- package/dist/otel/types.js.map +1 -0
- package/dist/outcomes/client.d.ts +81 -0
- package/dist/outcomes/client.d.ts.map +1 -0
- package/dist/outcomes/client.js +98 -0
- package/dist/outcomes/client.js.map +1 -0
- package/dist/outcomes/compute-roi.d.ts +40 -0
- package/dist/outcomes/compute-roi.d.ts.map +1 -0
- package/dist/outcomes/compute-roi.js +58 -0
- package/dist/outcomes/compute-roi.js.map +1 -0
- package/dist/outcomes/index.d.ts +4 -0
- package/dist/outcomes/index.d.ts.map +1 -0
- package/dist/outcomes/index.js +4 -0
- package/dist/outcomes/index.js.map +1 -0
- package/dist/outcomes/meter.d.ts +73 -0
- package/dist/outcomes/meter.d.ts.map +1 -0
- package/dist/outcomes/meter.js +140 -0
- package/dist/outcomes/meter.js.map +1 -0
- package/dist/outcomes/trajectory.d.ts +53 -0
- package/dist/outcomes/trajectory.d.ts.map +1 -0
- package/dist/outcomes/trajectory.js +110 -0
- package/dist/outcomes/trajectory.js.map +1 -0
- package/dist/outcomes/types.d.ts +93 -0
- package/dist/outcomes/types.d.ts.map +1 -0
- package/dist/outcomes/types.js +8 -0
- package/dist/outcomes/types.js.map +1 -0
- package/dist/patterns/_shared/brain-logger.d.ts +12 -0
- package/dist/patterns/_shared/brain-logger.d.ts.map +1 -0
- package/dist/patterns/_shared/brain-logger.js +6 -0
- package/dist/patterns/_shared/brain-logger.js.map +1 -0
- package/dist/patterns/circuit-breaker/index.d.ts +3 -0
- package/dist/patterns/circuit-breaker/index.d.ts.map +1 -0
- package/dist/patterns/circuit-breaker/index.js +2 -0
- package/dist/patterns/circuit-breaker/index.js.map +1 -0
- package/dist/patterns/circuit-breaker/session-cb.d.ts +3 -0
- package/dist/patterns/circuit-breaker/session-cb.d.ts.map +1 -0
- package/dist/patterns/circuit-breaker/session-cb.js +175 -0
- package/dist/patterns/circuit-breaker/session-cb.js.map +1 -0
- package/dist/patterns/circuit-breaker/types.d.ts +68 -0
- package/dist/patterns/circuit-breaker/types.d.ts.map +1 -0
- package/dist/patterns/circuit-breaker/types.js +2 -0
- package/dist/patterns/circuit-breaker/types.js.map +1 -0
- package/dist/patterns/escalation/index.d.ts +3 -0
- package/dist/patterns/escalation/index.d.ts.map +1 -0
- package/dist/patterns/escalation/index.js +2 -0
- package/dist/patterns/escalation/index.js.map +1 -0
- package/dist/patterns/escalation/pyramid.d.ts +3 -0
- package/dist/patterns/escalation/pyramid.d.ts.map +1 -0
- package/dist/patterns/escalation/pyramid.js +86 -0
- package/dist/patterns/escalation/pyramid.js.map +1 -0
- package/dist/patterns/escalation/types.d.ts +46 -0
- package/dist/patterns/escalation/types.d.ts.map +1 -0
- package/dist/patterns/escalation/types.js +2 -0
- package/dist/patterns/escalation/types.js.map +1 -0
- package/dist/patterns/quality-gate/gate.d.ts +3 -0
- package/dist/patterns/quality-gate/gate.d.ts.map +1 -0
- package/dist/patterns/quality-gate/gate.js +73 -0
- package/dist/patterns/quality-gate/gate.js.map +1 -0
- package/dist/patterns/quality-gate/index.d.ts +3 -0
- package/dist/patterns/quality-gate/index.d.ts.map +1 -0
- package/dist/patterns/quality-gate/index.js +2 -0
- package/dist/patterns/quality-gate/index.js.map +1 -0
- package/dist/patterns/quality-gate/types.d.ts +41 -0
- package/dist/patterns/quality-gate/types.d.ts.map +1 -0
- package/dist/patterns/quality-gate/types.js +2 -0
- package/dist/patterns/quality-gate/types.js.map +1 -0
- package/dist/permissions/capability-gate.d.ts +57 -0
- package/dist/permissions/capability-gate.d.ts.map +1 -0
- package/dist/permissions/capability-gate.js +26 -0
- package/dist/permissions/capability-gate.js.map +1 -0
- package/dist/permissions/index.d.ts +3 -0
- package/dist/permissions/index.d.ts.map +1 -0
- package/dist/permissions/index.js +2 -0
- package/dist/permissions/index.js.map +1 -0
- package/dist/permissions/renewal-manager.d.ts +49 -0
- package/dist/permissions/renewal-manager.d.ts.map +1 -0
- package/dist/permissions/renewal-manager.js +60 -0
- package/dist/permissions/renewal-manager.js.map +1 -0
- package/dist/permissions/sdk-permissions.d.ts +48 -0
- package/dist/permissions/sdk-permissions.d.ts.map +1 -0
- package/dist/permissions/sdk-permissions.js +94 -0
- package/dist/permissions/sdk-permissions.js.map +1 -0
- package/dist/ports/agent-registry.d.ts +95 -0
- package/dist/ports/agent-registry.d.ts.map +1 -0
- package/dist/ports/agent-registry.js +16 -0
- package/dist/ports/agent-registry.js.map +1 -0
- package/dist/ports/brain.d.ts +147 -0
- package/dist/ports/brain.d.ts.map +1 -0
- package/dist/ports/brain.js +105 -0
- package/dist/ports/brain.js.map +1 -0
- package/dist/ports/db.d.ts +8 -0
- package/dist/ports/db.d.ts.map +1 -0
- package/dist/ports/db.js +2 -0
- package/dist/ports/db.js.map +1 -0
- package/dist/ports/event-bus.d.ts +167 -0
- package/dist/ports/event-bus.d.ts.map +1 -0
- package/dist/ports/event-bus.js +25 -0
- package/dist/ports/event-bus.js.map +1 -0
- package/dist/ports/eviction-policy.d.ts +92 -0
- package/dist/ports/eviction-policy.d.ts.map +1 -0
- package/dist/ports/eviction-policy.js +120 -0
- package/dist/ports/eviction-policy.js.map +1 -0
- package/dist/ports/hitl.contract.test.d.ts +9 -0
- package/dist/ports/hitl.contract.test.d.ts.map +1 -0
- package/dist/ports/hitl.contract.test.js +100 -0
- package/dist/ports/hitl.contract.test.js.map +1 -0
- package/dist/ports/hitl.d.ts +91 -0
- package/dist/ports/hitl.d.ts.map +1 -0
- package/dist/ports/hitl.js +76 -0
- package/dist/ports/hitl.js.map +1 -0
- package/dist/ports/index.d.ts +19 -0
- package/dist/ports/index.d.ts.map +1 -0
- package/dist/ports/index.js +4 -0
- package/dist/ports/index.js.map +1 -0
- package/dist/ports/key-provider.d.ts +113 -0
- package/dist/ports/key-provider.d.ts.map +1 -0
- package/dist/ports/key-provider.js +119 -0
- package/dist/ports/key-provider.js.map +1 -0
- package/dist/ports/llm-provider.contract.d.ts +24 -0
- package/dist/ports/llm-provider.contract.d.ts.map +1 -0
- package/dist/ports/llm-provider.contract.js +111 -0
- package/dist/ports/llm-provider.contract.js.map +1 -0
- package/dist/ports/llm-provider.d.ts +63 -0
- package/dist/ports/llm-provider.d.ts.map +1 -0
- package/dist/ports/llm-provider.js +13 -0
- package/dist/ports/llm-provider.js.map +1 -0
- package/dist/ports/logger.d.ts +18 -0
- package/dist/ports/logger.d.ts.map +1 -0
- package/dist/ports/logger.js +10 -0
- package/dist/ports/logger.js.map +1 -0
- package/dist/ports/messaging.contract.test.d.ts +22 -0
- package/dist/ports/messaging.contract.test.d.ts.map +1 -0
- package/dist/ports/messaging.contract.test.js +296 -0
- package/dist/ports/messaging.contract.test.js.map +1 -0
- package/dist/ports/messaging.d.ts +47 -0
- package/dist/ports/messaging.d.ts.map +1 -0
- package/dist/ports/messaging.js +24 -0
- package/dist/ports/messaging.js.map +1 -0
- package/dist/ports/outcome.d.ts +25 -0
- package/dist/ports/outcome.d.ts.map +1 -0
- package/dist/ports/outcome.js +9 -0
- package/dist/ports/outcome.js.map +1 -0
- package/dist/ports/timestamp.d.ts +53 -0
- package/dist/ports/timestamp.d.ts.map +1 -0
- package/dist/ports/timestamp.js +28 -0
- package/dist/ports/timestamp.js.map +1 -0
- package/dist/privacy/channel.d.ts +137 -0
- package/dist/privacy/channel.d.ts.map +1 -0
- package/dist/privacy/channel.js +196 -0
- package/dist/privacy/channel.js.map +1 -0
- package/dist/privacy/composition.d.ts +140 -0
- package/dist/privacy/composition.d.ts.map +1 -0
- package/dist/privacy/composition.js +182 -0
- package/dist/privacy/composition.js.map +1 -0
- package/dist/privacy/delegation.d.ts +87 -0
- package/dist/privacy/delegation.d.ts.map +1 -0
- package/dist/privacy/delegation.js +67 -0
- package/dist/privacy/delegation.js.map +1 -0
- package/dist/privacy/index.d.ts +25 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +21 -0
- package/dist/privacy/index.js.map +1 -0
- package/dist/privacy/nullifier.d.ts +59 -0
- package/dist/privacy/nullifier.d.ts.map +1 -0
- package/dist/privacy/nullifier.js +70 -0
- package/dist/privacy/nullifier.js.map +1 -0
- package/dist/privacy/poseidon-felt252.d.ts +35 -0
- package/dist/privacy/poseidon-felt252.d.ts.map +1 -0
- package/dist/privacy/poseidon-felt252.js +93 -0
- package/dist/privacy/poseidon-felt252.js.map +1 -0
- package/dist/privacy/uar.d.ts +170 -0
- package/dist/privacy/uar.d.ts.map +1 -0
- package/dist/privacy/uar.js +150 -0
- package/dist/privacy/uar.js.map +1 -0
- package/dist/prompts/build-orient-prompt.d.ts +41 -0
- package/dist/prompts/build-orient-prompt.d.ts.map +1 -0
- package/dist/prompts/build-orient-prompt.js +40 -0
- package/dist/prompts/build-orient-prompt.js.map +1 -0
- package/dist/proof/chain.d.ts +90 -0
- package/dist/proof/chain.d.ts.map +1 -0
- package/dist/proof/chain.js +177 -0
- package/dist/proof/chain.js.map +1 -0
- package/dist/proof/fallback-adapter.d.ts +50 -0
- package/dist/proof/fallback-adapter.d.ts.map +1 -0
- package/dist/proof/fallback-adapter.js +99 -0
- package/dist/proof/fallback-adapter.js.map +1 -0
- package/dist/proof/index.d.ts +35 -0
- package/dist/proof/index.d.ts.map +1 -0
- package/dist/proof/index.js +161 -0
- package/dist/proof/index.js.map +1 -0
- package/dist/proof/load.d.ts +27 -0
- package/dist/proof/load.d.ts.map +1 -0
- package/dist/proof/load.js +44 -0
- package/dist/proof/load.js.map +1 -0
- package/dist/proof/otel.d.ts +25 -0
- package/dist/proof/otel.d.ts.map +1 -0
- package/dist/proof/otel.js +57 -0
- package/dist/proof/otel.js.map +1 -0
- package/dist/proof/receipt-queue.d.ts +97 -0
- package/dist/proof/receipt-queue.d.ts.map +1 -0
- package/dist/proof/receipt-queue.js +131 -0
- package/dist/proof/receipt-queue.js.map +1 -0
- package/dist/proof/sha256.d.ts +27 -0
- package/dist/proof/sha256.d.ts.map +1 -0
- package/dist/proof/sha256.js +65 -0
- package/dist/proof/sha256.js.map +1 -0
- package/dist/proof/types.d.ts +55 -0
- package/dist/proof/types.d.ts.map +1 -0
- package/dist/proof/types.js +10 -0
- package/dist/proof/types.js.map +1 -0
- package/dist/proof/verify.d.ts +28 -0
- package/dist/proof/verify.d.ts.map +1 -0
- package/dist/proof/verify.js +70 -0
- package/dist/proof/verify.js.map +1 -0
- package/dist/registry/agent-capability.d.ts +29 -0
- package/dist/registry/agent-capability.d.ts.map +1 -0
- package/dist/registry/agent-capability.js +21 -0
- package/dist/registry/agent-capability.js.map +1 -0
- package/dist/registry/agent-card.d.ts +86 -0
- package/dist/registry/agent-card.d.ts.map +1 -0
- package/dist/registry/agent-card.js +80 -0
- package/dist/registry/agent-card.js.map +1 -0
- package/dist/registry/agent-ids.d.ts +30 -0
- package/dist/registry/agent-ids.d.ts.map +1 -0
- package/dist/registry/agent-ids.js +46 -0
- package/dist/registry/agent-ids.js.map +1 -0
- package/dist/registry/agent-registry.d.ts +104 -0
- package/dist/registry/agent-registry.d.ts.map +1 -0
- package/dist/registry/agent-registry.js +277 -0
- package/dist/registry/agent-registry.js.map +1 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +4 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/replay/clock.d.ts +69 -0
- package/dist/replay/clock.d.ts.map +1 -0
- package/dist/replay/clock.js +84 -0
- package/dist/replay/clock.js.map +1 -0
- package/dist/replay/cross-cycle.d.ts +143 -0
- package/dist/replay/cross-cycle.d.ts.map +1 -0
- package/dist/replay/cross-cycle.js +271 -0
- package/dist/replay/cross-cycle.js.map +1 -0
- package/dist/replay/http-vcr.d.ts +46 -0
- package/dist/replay/http-vcr.d.ts.map +1 -0
- package/dist/replay/http-vcr.js +87 -0
- package/dist/replay/http-vcr.js.map +1 -0
- package/dist/replay/index.d.ts +18 -0
- package/dist/replay/index.d.ts.map +1 -0
- package/dist/replay/index.js +18 -0
- package/dist/replay/index.js.map +1 -0
- package/dist/replay/llm-cache.d.ts +126 -0
- package/dist/replay/llm-cache.d.ts.map +1 -0
- package/dist/replay/llm-cache.js +114 -0
- package/dist/replay/llm-cache.js.map +1 -0
- package/dist/replay/random.d.ts +114 -0
- package/dist/replay/random.d.ts.map +1 -0
- package/dist/replay/random.js +183 -0
- package/dist/replay/random.js.map +1 -0
- package/dist/replay/replay.d.ts +160 -0
- package/dist/replay/replay.d.ts.map +1 -0
- package/dist/replay/replay.js +115 -0
- package/dist/replay/replay.js.map +1 -0
- package/dist/resilience/bulkhead.d.ts +37 -0
- package/dist/resilience/bulkhead.d.ts.map +1 -0
- package/dist/resilience/bulkhead.js +65 -0
- package/dist/resilience/bulkhead.js.map +1 -0
- package/dist/resilience/circuit-breaker.d.ts +56 -0
- package/dist/resilience/circuit-breaker.d.ts.map +1 -0
- package/dist/resilience/circuit-breaker.js +93 -0
- package/dist/resilience/circuit-breaker.js.map +1 -0
- package/dist/resilience/idempotent.d.ts +61 -0
- package/dist/resilience/idempotent.d.ts.map +1 -0
- package/dist/resilience/idempotent.js +103 -0
- package/dist/resilience/idempotent.js.map +1 -0
- package/dist/resilience/index.d.ts +28 -0
- package/dist/resilience/index.d.ts.map +1 -0
- package/dist/resilience/index.js +25 -0
- package/dist/resilience/index.js.map +1 -0
- package/dist/router/index.d.ts +3 -0
- package/dist/router/index.d.ts.map +1 -0
- package/dist/router/index.js +2 -0
- package/dist/router/index.js.map +1 -0
- package/dist/router/provider-router.d.ts +61 -0
- package/dist/router/provider-router.d.ts.map +1 -0
- package/dist/router/provider-router.js +214 -0
- package/dist/router/provider-router.js.map +1 -0
- package/dist/runs/health.d.ts +24 -0
- package/dist/runs/health.d.ts.map +1 -0
- package/dist/runs/health.js +52 -0
- package/dist/runs/health.js.map +1 -0
- package/dist/runs/index.d.ts +4 -0
- package/dist/runs/index.d.ts.map +1 -0
- package/dist/runs/index.js +4 -0
- package/dist/runs/index.js.map +1 -0
- package/dist/runs/subscribe.d.ts +34 -0
- package/dist/runs/subscribe.d.ts.map +1 -0
- package/dist/runs/subscribe.js +228 -0
- package/dist/runs/subscribe.js.map +1 -0
- package/dist/runs/types.d.ts +53 -0
- package/dist/runs/types.d.ts.map +1 -0
- package/dist/runs/types.js +11 -0
- package/dist/runs/types.js.map +1 -0
- package/dist/safety/immune.d.ts +62 -0
- package/dist/safety/immune.d.ts.map +1 -0
- package/dist/safety/immune.js +109 -0
- package/dist/safety/immune.js.map +1 -0
- package/dist/safety/index.d.ts +3 -0
- package/dist/safety/index.d.ts.map +1 -0
- package/dist/safety/index.js +2 -0
- package/dist/safety/index.js.map +1 -0
- package/dist/safety/sanitize.d.ts +71 -0
- package/dist/safety/sanitize.d.ts.map +1 -0
- package/dist/safety/sanitize.js +139 -0
- package/dist/safety/sanitize.js.map +1 -0
- package/dist/sdk-permission-mapping.d.ts +41 -0
- package/dist/sdk-permission-mapping.d.ts.map +1 -0
- package/dist/sdk-permission-mapping.js +96 -0
- package/dist/sdk-permission-mapping.js.map +1 -0
- package/dist/skills/_otel.d.ts +2 -0
- package/dist/skills/_otel.d.ts.map +1 -0
- package/dist/skills/_otel.js +29 -0
- package/dist/skills/_otel.js.map +1 -0
- package/dist/skills/alpaca-quote.d.ts +30 -0
- package/dist/skills/alpaca-quote.d.ts.map +1 -0
- package/dist/skills/alpaca-quote.js +70 -0
- package/dist/skills/alpaca-quote.js.map +1 -0
- package/dist/skills/base-skills.d.ts +30 -0
- package/dist/skills/base-skills.d.ts.map +1 -0
- package/dist/skills/base-skills.js +26 -0
- package/dist/skills/base-skills.js.map +1 -0
- package/dist/skills/brain-query.d.ts +34 -0
- package/dist/skills/brain-query.d.ts.map +1 -0
- package/dist/skills/brain-query.js +46 -0
- package/dist/skills/brain-query.js.map +1 -0
- package/dist/skills/brain-store.d.ts +44 -0
- package/dist/skills/brain-store.d.ts.map +1 -0
- package/dist/skills/brain-store.js +55 -0
- package/dist/skills/brain-store.js.map +1 -0
- package/dist/skills/calendar-check.d.ts +37 -0
- package/dist/skills/calendar-check.d.ts.map +1 -0
- package/dist/skills/calendar-check.js +87 -0
- package/dist/skills/calendar-check.js.map +1 -0
- package/dist/skills/cboe-vix-spot.d.ts +24 -0
- package/dist/skills/cboe-vix-spot.d.ts.map +1 -0
- package/dist/skills/cboe-vix-spot.js +70 -0
- package/dist/skills/cboe-vix-spot.js.map +1 -0
- package/dist/skills/errors.d.ts +33 -0
- package/dist/skills/errors.d.ts.map +1 -0
- package/dist/skills/errors.js +50 -0
- package/dist/skills/errors.js.map +1 -0
- package/dist/skills/hitl-request.d.ts +39 -0
- package/dist/skills/hitl-request.d.ts.map +1 -0
- package/dist/skills/hitl-request.js +55 -0
- package/dist/skills/hitl-request.js.map +1 -0
- package/dist/skills/http-fetch.d.ts +32 -0
- package/dist/skills/http-fetch.d.ts.map +1 -0
- package/dist/skills/http-fetch.js +82 -0
- package/dist/skills/http-fetch.js.map +1 -0
- package/dist/skills/index.d.ts +46 -0
- package/dist/skills/index.d.ts.map +1 -0
- package/dist/skills/index.js +32 -0
- package/dist/skills/index.js.map +1 -0
- package/dist/skills/record-outcome.d.ts +49 -0
- package/dist/skills/record-outcome.d.ts.map +1 -0
- package/dist/skills/record-outcome.js +104 -0
- package/dist/skills/record-outcome.js.map +1 -0
- package/dist/skills/run-sql-query.d.ts +33 -0
- package/dist/skills/run-sql-query.d.ts.map +1 -0
- package/dist/skills/run-sql-query.js +78 -0
- package/dist/skills/run-sql-query.js.map +1 -0
- package/dist/skills/send-email.d.ts +39 -0
- package/dist/skills/send-email.d.ts.map +1 -0
- package/dist/skills/send-email.js +100 -0
- package/dist/skills/send-email.js.map +1 -0
- package/dist/skills/skill-ledger.d.ts +62 -0
- package/dist/skills/skill-ledger.d.ts.map +1 -0
- package/dist/skills/skill-ledger.js +40 -0
- package/dist/skills/skill-ledger.js.map +1 -0
- package/dist/skills/skill-registry-builder.d.ts +91 -0
- package/dist/skills/skill-registry-builder.d.ts.map +1 -0
- package/dist/skills/skill-registry-builder.js +173 -0
- package/dist/skills/skill-registry-builder.js.map +1 -0
- package/dist/skills/slack-notify.d.ts +43 -0
- package/dist/skills/slack-notify.d.ts.map +1 -0
- package/dist/skills/slack-notify.js +70 -0
- package/dist/skills/slack-notify.js.map +1 -0
- package/dist/skills/starknet-balance.d.ts +34 -0
- package/dist/skills/starknet-balance.d.ts.map +1 -0
- package/dist/skills/starknet-balance.js +91 -0
- package/dist/skills/starknet-balance.js.map +1 -0
- package/dist/skills/telegram-notify.d.ts +44 -0
- package/dist/skills/telegram-notify.d.ts.map +1 -0
- package/dist/skills/telegram-notify.js +71 -0
- package/dist/skills/telegram-notify.js.map +1 -0
- package/dist/skills/web-search.d.ts +32 -0
- package/dist/skills/web-search.d.ts.map +1 -0
- package/dist/skills/web-search.js +84 -0
- package/dist/skills/web-search.js.map +1 -0
- package/dist/templates/complex-agent.d.ts +105 -0
- package/dist/templates/complex-agent.d.ts.map +1 -0
- package/dist/templates/complex-agent.js +94 -0
- package/dist/templates/complex-agent.js.map +1 -0
- package/dist/templates/index.d.ts +17 -0
- package/dist/templates/index.d.ts.map +1 -0
- package/dist/templates/index.js +14 -0
- package/dist/templates/index.js.map +1 -0
- package/dist/templates/reasoning-agent.d.ts +160 -0
- package/dist/templates/reasoning-agent.d.ts.map +1 -0
- package/dist/templates/reasoning-agent.js +119 -0
- package/dist/templates/reasoning-agent.js.map +1 -0
- package/dist/templates/simple-agent.d.ts +108 -0
- package/dist/templates/simple-agent.d.ts.map +1 -0
- package/dist/templates/simple-agent.js +84 -0
- package/dist/templates/simple-agent.js.map +1 -0
- package/dist/testing/brain-conformance.d.ts +45 -0
- package/dist/testing/brain-conformance.d.ts.map +1 -0
- package/dist/testing/brain-conformance.js +68 -0
- package/dist/testing/brain-conformance.js.map +1 -0
- package/dist/testing/chaos.d.ts +126 -0
- package/dist/testing/chaos.d.ts.map +1 -0
- package/dist/testing/chaos.js +176 -0
- package/dist/testing/chaos.js.map +1 -0
- package/dist/testing/contracts/brain-port.contract.d.ts +23 -0
- package/dist/testing/contracts/brain-port.contract.d.ts.map +1 -0
- package/dist/testing/contracts/brain-port.contract.js +79 -0
- package/dist/testing/contracts/brain-port.contract.js.map +1 -0
- package/dist/testing/contracts/economic-observer.contract.d.ts +23 -0
- package/dist/testing/contracts/economic-observer.contract.d.ts.map +1 -0
- package/dist/testing/contracts/economic-observer.contract.js +71 -0
- package/dist/testing/contracts/economic-observer.contract.js.map +1 -0
- package/dist/testing/contracts/event-bus.contract.d.ts +30 -0
- package/dist/testing/contracts/event-bus.contract.d.ts.map +1 -0
- package/dist/testing/contracts/event-bus.contract.js +216 -0
- package/dist/testing/contracts/event-bus.contract.js.map +1 -0
- package/dist/testing/db-conformance.d.ts +22 -0
- package/dist/testing/db-conformance.d.ts.map +1 -0
- package/dist/testing/db-conformance.js +25 -0
- package/dist/testing/db-conformance.js.map +1 -0
- package/dist/testing/eval.d.ts +61 -0
- package/dist/testing/eval.d.ts.map +1 -0
- package/dist/testing/eval.js +98 -0
- package/dist/testing/eval.js.map +1 -0
- package/dist/testing/index.d.ts +32 -0
- package/dist/testing/index.d.ts.map +1 -0
- package/dist/testing/index.js +38 -0
- package/dist/testing/index.js.map +1 -0
- package/dist/testing/integration-harness.d.ts +45 -0
- package/dist/testing/integration-harness.d.ts.map +1 -0
- package/dist/testing/integration-harness.js +57 -0
- package/dist/testing/integration-harness.js.map +1 -0
- package/dist/testing/logger-conformance.d.ts +16 -0
- package/dist/testing/logger-conformance.d.ts.map +1 -0
- package/dist/testing/logger-conformance.js +43 -0
- package/dist/testing/logger-conformance.js.map +1 -0
- package/dist/testing/outcome-conformance.d.ts +17 -0
- package/dist/testing/outcome-conformance.d.ts.map +1 -0
- package/dist/testing/outcome-conformance.js +50 -0
- package/dist/testing/outcome-conformance.js.map +1 -0
- package/dist/testing/runner.d.ts +19 -0
- package/dist/testing/runner.d.ts.map +1 -0
- package/dist/testing/runner.js +9 -0
- package/dist/testing/runner.js.map +1 -0
- package/dist/testing/test-brain-port.d.ts +25 -0
- package/dist/testing/test-brain-port.d.ts.map +1 -0
- package/dist/testing/test-brain-port.js +54 -0
- package/dist/testing/test-brain-port.js.map +1 -0
- package/dist/testing/test-child-agent.d.ts +32 -0
- package/dist/testing/test-child-agent.d.ts.map +1 -0
- package/dist/testing/test-child-agent.js +59 -0
- package/dist/testing/test-child-agent.js.map +1 -0
- package/dist/testing/test-event-bus.d.ts +57 -0
- package/dist/testing/test-event-bus.d.ts.map +1 -0
- package/dist/testing/test-event-bus.js +191 -0
- package/dist/testing/test-event-bus.js.map +1 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +4 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/registry.d.ts +20 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +102 -0
- package/dist/tools/registry.js.map +1 -0
- package/dist/tools/types.d.ts +85 -0
- package/dist/tools/types.d.ts.map +1 -0
- package/dist/tools/types.js +13 -0
- package/dist/tools/types.js.map +1 -0
- package/dist/tools/zod-to-json-schema.d.ts +11 -0
- package/dist/tools/zod-to-json-schema.d.ts.map +1 -0
- package/dist/tools/zod-to-json-schema.js +92 -0
- package/dist/tools/zod-to-json-schema.js.map +1 -0
- package/dist/trace/canonical.d.ts +37 -0
- package/dist/trace/canonical.d.ts.map +1 -0
- package/dist/trace/canonical.js +139 -0
- package/dist/trace/canonical.js.map +1 -0
- package/dist/trace/policy.d.ts +103 -0
- package/dist/trace/policy.d.ts.map +1 -0
- package/dist/trace/policy.js +137 -0
- package/dist/trace/policy.js.map +1 -0
- package/dist/trace/schema.d.ts +195 -0
- package/dist/trace/schema.d.ts.map +1 -0
- package/dist/trace/schema.js +18 -0
- package/dist/trace/schema.js.map +1 -0
- package/dist/trace/strict-pii-detector.d.ts +28 -0
- package/dist/trace/strict-pii-detector.d.ts.map +1 -0
- package/dist/trace/strict-pii-detector.js +144 -0
- package/dist/trace/strict-pii-detector.js.map +1 -0
- package/dist/tracing/traced-port.d.ts +50 -0
- package/dist/tracing/traced-port.d.ts.map +1 -0
- package/dist/tracing/traced-port.js +108 -0
- package/dist/tracing/traced-port.js.map +1 -0
- package/dist/tracking/agent-run-tracker.d.ts +63 -0
- package/dist/tracking/agent-run-tracker.d.ts.map +1 -0
- package/dist/tracking/agent-run-tracker.js +132 -0
- package/dist/tracking/agent-run-tracker.js.map +1 -0
- package/dist/tracking/gen-ai.d.ts +54 -0
- package/dist/tracking/gen-ai.d.ts.map +1 -0
- package/dist/tracking/gen-ai.js +101 -0
- package/dist/tracking/gen-ai.js.map +1 -0
- package/dist/tracking/index.d.ts +4 -0
- package/dist/tracking/index.d.ts.map +1 -0
- package/dist/tracking/index.js +3 -0
- package/dist/tracking/index.js.map +1 -0
- package/dist/types/agent.d.ts +84 -0
- package/dist/types/agent.d.ts.map +1 -0
- package/dist/types/agent.js +13 -0
- package/dist/types/agent.js.map +1 -0
- package/dist/types/agent.test.d.ts +11 -0
- package/dist/types/agent.test.d.ts.map +1 -0
- package/dist/types/agent.test.js +144 -0
- package/dist/types/agent.test.js.map +1 -0
- package/dist/types/escalation-mapping.d.ts +30 -0
- package/dist/types/escalation-mapping.d.ts.map +1 -0
- package/dist/types/escalation-mapping.js +32 -0
- package/dist/types/escalation-mapping.js.map +1 -0
- package/docs/byom.md +106 -0
- package/docs/contributing.md +139 -0
- package/docs/economy.md +159 -0
- package/docs/getting-started.md +179 -0
- package/docs/index.md +57 -0
- package/docs/migration/0.15-to-0.17.md +227 -0
- package/docs/papers/bon-mav-notes.md +36 -0
- package/docs/patterns.md +138 -0
- package/docs/platforms.md +137 -0
- package/docs/ports/agent-registry.md +100 -0
- package/docs/ports/event-bus.md +131 -0
- package/docs/ports/hitl.md +105 -0
- package/docs/ports/llm-provider.md +125 -0
- package/docs/ports/messaging.md +94 -0
- package/docs/templates.md +165 -0
- package/package.json +160 -0
- package/src/adapters/hitl/memory-state-store.ts +96 -0
- package/src/adapters/hitl/postgres-state-store.ts +147 -0
- package/src/adapters/llm/anthropic-direct.ts +212 -0
- package/src/adapters/llm/cascade.ts +109 -0
- package/src/adapters/llm/litellm.ts +300 -0
- package/src/adapters/messaging/console.ts +73 -0
- package/src/adapters/messaging/discord.ts +125 -0
- package/src/adapters/messaging/mcp.ts +84 -0
- package/src/adapters/messaging/slack.ts +109 -0
- package/src/adapters/messaging/telegram.ts +134 -0
- package/src/adapters/registry/memory.ts +84 -0
- package/src/adapters/registry/postgres.ts +166 -0
- package/src/agents/index.ts +2 -0
- package/src/agents/trading.ts +114 -0
- package/src/auth/errors.ts +66 -0
- package/src/auth/nonce-store.ts +79 -0
- package/src/auth/sign-event.ts +51 -0
- package/src/auth/verify-event.ts +112 -0
- package/src/boot/init-sdk.ts +92 -0
- package/src/boot/load-agent-context.ts +164 -0
- package/src/boot/load-recent-memory.ts +92 -0
- package/src/budget/budget-state.ts +234 -0
- package/src/budget/index.ts +11 -0
- package/src/clients/agents.ts +140 -0
- package/src/clients/index.ts +6 -0
- package/src/clients/messaging-adapters.ts +173 -0
- package/src/clients/pipelines.ts +160 -0
- package/src/compute/strategies/best-of-n.ts +162 -0
- package/src/compute/strategies/bon-mav.ts +221 -0
- package/src/compute/strategies/single-shot.ts +45 -0
- package/src/compute/types.ts +36 -0
- package/src/compute/verifier.ts +84 -0
- package/src/compute/with-compute.ts +108 -0
- package/src/constitution/axioms.ts +548 -0
- package/src/constitution/gate.ts +180 -0
- package/src/constitution/index.ts +53 -0
- package/src/constitution/scorer.ts +170 -0
- package/src/constitution/signal.ts +114 -0
- package/src/constitution/types.ts +195 -0
- package/src/counterfactual/index.ts +20 -0
- package/src/counterfactual/replay-with-alt.ts +357 -0
- package/src/dataops/index.ts +4 -0
- package/src/dataops/starkscan.ts +13 -0
- package/src/dataops/voyager.ts +215 -0
- package/src/deprecation.ts +102 -0
- package/src/durable/bullmq-runner.ts +560 -0
- package/src/durable/index.ts +11 -0
- package/src/economy/circuit-breaker.ts +130 -0
- package/src/economy/economy-router.ts +242 -0
- package/src/economy/index.ts +28 -0
- package/src/economy/outcome-tracker.ts +228 -0
- package/src/economy/router.ts +246 -0
- package/src/economy/tier-policy.ts +116 -0
- package/src/errors.ts +159 -0
- package/src/evals/datasets/index.ts +343 -0
- package/src/evals/harness.ts +171 -0
- package/src/evals/index.ts +4 -0
- package/src/factory/agent-factory.ts +261 -0
- package/src/hitl/api.ts +78 -0
- package/src/hitl/approval-channel.ts +128 -0
- package/src/hitl/callback-handlers.ts +61 -0
- package/src/hitl/index.ts +9 -0
- package/src/hitl/slack.ts +358 -0
- package/src/hitl/telegram.ts +342 -0
- package/src/index.ts +710 -0
- package/src/llm/parse-structured-output.ts +116 -0
- package/src/loop/index.ts +26 -0
- package/src/loop/minimal-loop.ts +448 -0
- package/src/loop/sdk-loop.ts +505 -0
- package/src/mcp/index.ts +146 -0
- package/src/metrics/create-agent-metrics.ts +231 -0
- package/src/orchestration/bulkhead.ts +145 -0
- package/src/orchestration/idempotency.ts +137 -0
- package/src/orchestration/index.ts +24 -0
- package/src/orchestration/ooda/agent-config-loader.ts +118 -0
- package/src/orchestration/ooda/agent.ts +1009 -0
- package/src/orchestration/ooda/audit-log.ts +57 -0
- package/src/orchestration/ooda/brain-context.ts +269 -0
- package/src/orchestration/ooda/child-agent.ts +60 -0
- package/src/orchestration/ooda/cron-schedule.ts +115 -0
- package/src/orchestration/ooda/debate.ts +100 -0
- package/src/orchestration/ooda/economic-observer.ts +77 -0
- package/src/orchestration/ooda/errors.ts +32 -0
- package/src/orchestration/ooda/execution-mode-guard.ts +37 -0
- package/src/orchestration/ooda/factory.ts +48 -0
- package/src/orchestration/ooda/guardrails.ts +172 -0
- package/src/orchestration/ooda/guards/always-on.ts +21 -0
- package/src/orchestration/ooda/guards/business-hours.ts +134 -0
- package/src/orchestration/ooda/guards/cme-holidays-2026.json +1 -0
- package/src/orchestration/ooda/guards/index.ts +27 -0
- package/src/orchestration/ooda/guards/redis-circuit-breaker.ts +172 -0
- package/src/orchestration/ooda/guards/rth-session.ts +133 -0
- package/src/orchestration/ooda/handoff.ts +150 -0
- package/src/orchestration/ooda/hitl-gate.ts +166 -0
- package/src/orchestration/ooda/index.ts +163 -0
- package/src/orchestration/ooda/inner-monologue.ts +99 -0
- package/src/orchestration/ooda/learn.ts +146 -0
- package/src/orchestration/ooda/multimodal.ts +174 -0
- package/src/orchestration/ooda/phase-routing.ts +74 -0
- package/src/orchestration/ooda/plan-execute.ts +87 -0
- package/src/orchestration/ooda/react-loop.ts +192 -0
- package/src/orchestration/ooda/reflexion.ts +98 -0
- package/src/orchestration/ooda/resource-limits.ts +123 -0
- package/src/orchestration/ooda/run-step-persistence.ts +137 -0
- package/src/orchestration/ooda/skills.ts +60 -0
- package/src/orchestration/ooda/structured-output.ts +93 -0
- package/src/orchestration/ooda/types.ts +508 -0
- package/src/otel/attributes.ts +104 -0
- package/src/otel/index.ts +4 -0
- package/src/otel/ingest.ts +294 -0
- package/src/otel/trace-context.ts +197 -0
- package/src/otel/types.ts +49 -0
- package/src/outcomes/client.ts +194 -0
- package/src/outcomes/compute-roi.ts +89 -0
- package/src/outcomes/index.ts +3 -0
- package/src/outcomes/meter.ts +217 -0
- package/src/outcomes/trajectory.ts +170 -0
- package/src/outcomes/types.ts +88 -0
- package/src/patterns/_shared/brain-logger.ts +18 -0
- package/src/patterns/circuit-breaker/index.ts +8 -0
- package/src/patterns/circuit-breaker/session-cb.ts +198 -0
- package/src/patterns/circuit-breaker/types.ts +72 -0
- package/src/patterns/escalation/index.ts +8 -0
- package/src/patterns/escalation/pyramid.ts +125 -0
- package/src/patterns/escalation/types.ts +62 -0
- package/src/patterns/quality-gate/gate.ts +102 -0
- package/src/patterns/quality-gate/index.ts +8 -0
- package/src/patterns/quality-gate/types.ts +45 -0
- package/src/permissions/capability-gate.ts +63 -0
- package/src/permissions/index.ts +11 -0
- package/src/permissions/renewal-manager.ts +89 -0
- package/src/permissions/sdk-permissions.ts +124 -0
- package/src/ports/agent-registry.ts +104 -0
- package/src/ports/brain.ts +255 -0
- package/src/ports/db.ts +7 -0
- package/src/ports/event-bus.ts +199 -0
- package/src/ports/eviction-policy.ts +179 -0
- package/src/ports/hitl.contract.test.ts +131 -0
- package/src/ports/hitl.ts +153 -0
- package/src/ports/index.ts +48 -0
- package/src/ports/key-provider.ts +179 -0
- package/src/ports/llm-provider.contract.ts +132 -0
- package/src/ports/llm-provider.ts +68 -0
- package/src/ports/logger.ts +23 -0
- package/src/ports/messaging.contract.test.ts +422 -0
- package/src/ports/messaging.ts +53 -0
- package/src/ports/outcome.ts +26 -0
- package/src/ports/timestamp.ts +64 -0
- package/src/privacy/channel.ts +283 -0
- package/src/privacy/composition.ts +289 -0
- package/src/privacy/delegation.ts +128 -0
- package/src/privacy/index.ts +64 -0
- package/src/privacy/nullifier.ts +79 -0
- package/src/privacy/poseidon-felt252.ts +104 -0
- package/src/privacy/uar.ts +234 -0
- package/src/prompts/build-orient-prompt.ts +78 -0
- package/src/proof/chain.ts +249 -0
- package/src/proof/fallback-adapter.ts +117 -0
- package/src/proof/index.ts +207 -0
- package/src/proof/load.ts +61 -0
- package/src/proof/otel.ts +74 -0
- package/src/proof/receipt-queue.ts +235 -0
- package/src/proof/sha256.ts +80 -0
- package/src/proof/types.ts +58 -0
- package/src/proof/verify.ts +89 -0
- package/src/registry/agent-capability.ts +48 -0
- package/src/registry/agent-card.ts +162 -0
- package/src/registry/agent-ids.ts +56 -0
- package/src/registry/agent-registry.ts +378 -0
- package/src/registry/index.ts +23 -0
- package/src/replay/clock.ts +104 -0
- package/src/replay/cross-cycle.ts +406 -0
- package/src/replay/http-vcr.ts +123 -0
- package/src/replay/index.ts +62 -0
- package/src/replay/llm-cache.ts +199 -0
- package/src/replay/random.ts +249 -0
- package/src/replay/replay.ts +271 -0
- package/src/resilience/bulkhead.ts +95 -0
- package/src/resilience/circuit-breaker.ts +129 -0
- package/src/resilience/idempotent.ts +126 -0
- package/src/resilience/index.ts +51 -0
- package/src/router/index.ts +11 -0
- package/src/router/provider-router.ts +319 -0
- package/src/runs/health.ts +84 -0
- package/src/runs/index.ts +3 -0
- package/src/runs/subscribe.ts +263 -0
- package/src/runs/types.ts +63 -0
- package/src/safety/immune.ts +145 -0
- package/src/safety/index.ts +6 -0
- package/src/safety/sanitize.ts +183 -0
- package/src/sdk-permission-mapping.ts +123 -0
- package/src/skills/_otel.ts +31 -0
- package/src/skills/alpaca-quote.ts +82 -0
- package/src/skills/base-skills.ts +38 -0
- package/src/skills/brain-query.ts +62 -0
- package/src/skills/brain-store.ts +69 -0
- package/src/skills/calendar-check.ts +111 -0
- package/src/skills/cboe-vix-spot.ts +89 -0
- package/src/skills/errors.ts +59 -0
- package/src/skills/hitl-request.ts +68 -0
- package/src/skills/http-fetch.ts +92 -0
- package/src/skills/index.ts +71 -0
- package/src/skills/record-outcome.ts +125 -0
- package/src/skills/run-sql-query.ts +87 -0
- package/src/skills/send-email.ts +111 -0
- package/src/skills/skill-ledger.ts +92 -0
- package/src/skills/skill-registry-builder.ts +264 -0
- package/src/skills/slack-notify.ts +90 -0
- package/src/skills/starknet-balance.ts +106 -0
- package/src/skills/telegram-notify.ts +88 -0
- package/src/skills/web-search.ts +108 -0
- package/src/templates/complex-agent.ts +187 -0
- package/src/templates/index.ts +27 -0
- package/src/templates/reasoning-agent.ts +251 -0
- package/src/templates/simple-agent.ts +173 -0
- package/src/testing/brain-conformance.ts +105 -0
- package/src/testing/chaos.ts +266 -0
- package/src/testing/contracts/brain-port.contract.ts +91 -0
- package/src/testing/contracts/economic-observer.contract.ts +87 -0
- package/src/testing/contracts/event-bus.contract.ts +324 -0
- package/src/testing/db-conformance.ts +43 -0
- package/src/testing/eval.ts +161 -0
- package/src/testing/index.ts +59 -0
- package/src/testing/integration-harness.ts +75 -0
- package/src/testing/logger-conformance.ts +57 -0
- package/src/testing/outcome-conformance.ts +62 -0
- package/src/testing/runner.ts +19 -0
- package/src/testing/test-brain-port.ts +69 -0
- package/src/testing/test-child-agent.ts +70 -0
- package/src/testing/test-event-bus.ts +247 -0
- package/src/tools/index.ts +11 -0
- package/src/tools/registry.ts +122 -0
- package/src/tools/types.ts +114 -0
- package/src/tools/zod-to-json-schema.ts +112 -0
- package/src/trace/canonical.ts +158 -0
- package/src/trace/policy.ts +242 -0
- package/src/trace/schema.ts +245 -0
- package/src/trace/strict-pii-detector.ts +156 -0
- package/src/tracing/traced-port.ts +149 -0
- package/src/tracking/agent-run-tracker.ts +228 -0
- package/src/tracking/gen-ai.ts +153 -0
- package/src/tracking/index.ts +19 -0
- package/src/types/agent.ts +93 -0
- package/src/types/escalation-mapping.ts +48 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Synthetic eval dataset — 30 tasks, product-agnostic.
|
|
3
|
+
* Sprint-579: eval-dataset-30.
|
|
4
|
+
*
|
|
5
|
+
* Categories: math (6), code (6), classification (6),
|
|
6
|
+
* summarization (6), reasoning (6).
|
|
7
|
+
* Ground truth labels manually validated.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export type EvalCategory =
|
|
11
|
+
| "math"
|
|
12
|
+
| "code"
|
|
13
|
+
| "classification"
|
|
14
|
+
| "summarization"
|
|
15
|
+
| "reasoning";
|
|
16
|
+
|
|
17
|
+
export type EvalDifficulty = "easy" | "medium" | "hard";
|
|
18
|
+
|
|
19
|
+
export interface EvalTask {
|
|
20
|
+
id: string;
|
|
21
|
+
category: EvalCategory;
|
|
22
|
+
difficulty: EvalDifficulty;
|
|
23
|
+
/** Prompt sent to the model under evaluation. */
|
|
24
|
+
prompt: string;
|
|
25
|
+
/** Ground truth — exact match or list of accepted values. */
|
|
26
|
+
groundTruth: string | string[];
|
|
27
|
+
/** Optional: judge function id for non-exact-match scoring. */
|
|
28
|
+
scorer?: "exact" | "numeric_tolerance" | "contains" | "code_output";
|
|
29
|
+
/** Tolerance for numeric_tolerance scorer. */
|
|
30
|
+
tolerance?: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// ─── Math (6) ────────────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
const mathTasks: EvalTask[] = [
|
|
36
|
+
{
|
|
37
|
+
id: "math-001",
|
|
38
|
+
category: "math",
|
|
39
|
+
difficulty: "easy",
|
|
40
|
+
prompt: "What is 17 × 23?",
|
|
41
|
+
groundTruth: "391",
|
|
42
|
+
scorer: "exact",
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
id: "math-002",
|
|
46
|
+
category: "math",
|
|
47
|
+
difficulty: "easy",
|
|
48
|
+
prompt: "What is the square root of 144?",
|
|
49
|
+
groundTruth: "12",
|
|
50
|
+
scorer: "exact",
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
id: "math-003",
|
|
54
|
+
category: "math",
|
|
55
|
+
difficulty: "medium",
|
|
56
|
+
prompt:
|
|
57
|
+
"A train travels at 80 km/h. How many kilometers does it cover in 2 hours and 45 minutes?",
|
|
58
|
+
groundTruth: "220",
|
|
59
|
+
scorer: "numeric_tolerance",
|
|
60
|
+
tolerance: 0.5,
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
id: "math-004",
|
|
64
|
+
category: "math",
|
|
65
|
+
difficulty: "medium",
|
|
66
|
+
prompt:
|
|
67
|
+
"What is the sum of the first 10 positive odd numbers?",
|
|
68
|
+
groundTruth: "100",
|
|
69
|
+
scorer: "exact",
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
id: "math-005",
|
|
73
|
+
category: "math",
|
|
74
|
+
difficulty: "hard",
|
|
75
|
+
prompt:
|
|
76
|
+
"If f(x) = 3x² − 2x + 1, what is f(4)?",
|
|
77
|
+
groundTruth: "41",
|
|
78
|
+
scorer: "exact",
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
id: "math-006",
|
|
82
|
+
category: "math",
|
|
83
|
+
difficulty: "hard",
|
|
84
|
+
prompt:
|
|
85
|
+
"A rectangle has a perimeter of 56 cm and a length that is twice its width. What is the area in cm²?",
|
|
86
|
+
groundTruth: ["196", "196 cm²", "196cm²"],
|
|
87
|
+
scorer: "contains",
|
|
88
|
+
},
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
// ─── Code generation (6) ─────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
const codeTasks: EvalTask[] = [
|
|
94
|
+
{
|
|
95
|
+
id: "code-001",
|
|
96
|
+
category: "code",
|
|
97
|
+
difficulty: "easy",
|
|
98
|
+
prompt:
|
|
99
|
+
"Write a Python function `is_palindrome(s: str) -> bool` that returns True if the string is a palindrome (case-insensitive, ignore spaces).",
|
|
100
|
+
groundTruth: "is_palindrome",
|
|
101
|
+
scorer: "contains",
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
id: "code-002",
|
|
105
|
+
category: "code",
|
|
106
|
+
difficulty: "easy",
|
|
107
|
+
prompt:
|
|
108
|
+
"Write a TypeScript function `sum(arr: number[]): number` that returns the sum of all elements.",
|
|
109
|
+
groundTruth: "reduce",
|
|
110
|
+
scorer: "contains",
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
id: "code-003",
|
|
114
|
+
category: "code",
|
|
115
|
+
difficulty: "medium",
|
|
116
|
+
prompt:
|
|
117
|
+
"Write a Python function `fibonacci(n: int) -> list[int]` that returns the first n Fibonacci numbers. fibonacci(8) should return [0,1,1,2,3,5,8,13].",
|
|
118
|
+
groundTruth: "[0, 1, 1, 2, 3, 5, 8, 13]",
|
|
119
|
+
scorer: "contains",
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
id: "code-004",
|
|
123
|
+
category: "code",
|
|
124
|
+
difficulty: "medium",
|
|
125
|
+
prompt:
|
|
126
|
+
"Write a TypeScript function `groupBy<T>(arr: T[], keyFn: (item: T) => string): Record<string, T[]>` that groups array elements by a key function.",
|
|
127
|
+
groundTruth: "Record",
|
|
128
|
+
scorer: "contains",
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
id: "code-005",
|
|
132
|
+
category: "code",
|
|
133
|
+
difficulty: "hard",
|
|
134
|
+
prompt:
|
|
135
|
+
"Write a Python function `lru_cache_impl(capacity: int)` that returns an LRU cache object with `get(key)` and `put(key, value)` methods. Use an OrderedDict.",
|
|
136
|
+
groundTruth: "OrderedDict",
|
|
137
|
+
scorer: "contains",
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
id: "code-006",
|
|
141
|
+
category: "code",
|
|
142
|
+
difficulty: "hard",
|
|
143
|
+
prompt:
|
|
144
|
+
"Implement a TypeScript `debounce<T extends (...args: unknown[]) => void>(fn: T, delay: number): T` function.",
|
|
145
|
+
groundTruth: "setTimeout",
|
|
146
|
+
scorer: "contains",
|
|
147
|
+
},
|
|
148
|
+
];
|
|
149
|
+
|
|
150
|
+
// ─── Classification (6) ──────────────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
const classificationTasks: EvalTask[] = [
|
|
153
|
+
{
|
|
154
|
+
id: "class-001",
|
|
155
|
+
category: "classification",
|
|
156
|
+
difficulty: "easy",
|
|
157
|
+
prompt:
|
|
158
|
+
"Classify the sentiment of this text as POSITIVE, NEGATIVE, or NEUTRAL:\n\"The product arrived on time and works perfectly.\"",
|
|
159
|
+
groundTruth: "POSITIVE",
|
|
160
|
+
scorer: "contains",
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
id: "class-002",
|
|
164
|
+
category: "classification",
|
|
165
|
+
difficulty: "easy",
|
|
166
|
+
prompt:
|
|
167
|
+
"Is the following email spam or not spam?\n\"Congratulations! You've been selected for a $1,000 gift card. Click here to claim.\"",
|
|
168
|
+
groundTruth: "spam",
|
|
169
|
+
scorer: "contains",
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
id: "class-003",
|
|
173
|
+
category: "classification",
|
|
174
|
+
difficulty: "medium",
|
|
175
|
+
prompt:
|
|
176
|
+
"Classify this news headline into one of: POLITICS, TECHNOLOGY, SPORTS, FINANCE, HEALTH.\n\"Central bank raises interest rates by 25 basis points amid inflation concerns.\"",
|
|
177
|
+
groundTruth: "FINANCE",
|
|
178
|
+
scorer: "contains",
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
id: "class-004",
|
|
182
|
+
category: "classification",
|
|
183
|
+
difficulty: "medium",
|
|
184
|
+
prompt:
|
|
185
|
+
"Is this code comment a TODO, a FIXME, or neither?\n\"// This should be refactored once the API stabilizes.\"",
|
|
186
|
+
groundTruth: ["TODO", "todo"],
|
|
187
|
+
scorer: "contains",
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
id: "class-005",
|
|
191
|
+
category: "classification",
|
|
192
|
+
difficulty: "hard",
|
|
193
|
+
prompt:
|
|
194
|
+
"Classify the programming language: `let x: i32 = 42; fn main() { println!(\"{}\", x); }`",
|
|
195
|
+
groundTruth: "Rust",
|
|
196
|
+
scorer: "contains",
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
id: "class-006",
|
|
200
|
+
category: "classification",
|
|
201
|
+
difficulty: "hard",
|
|
202
|
+
prompt:
|
|
203
|
+
"Classify the logical fallacy in: \"We should trust Dr. Smith's diet advice — he has a PhD in biochemistry.\"\nChoose from: AD_HOMINEM, APPEAL_TO_AUTHORITY, STRAW_MAN, FALSE_DICHOTOMY",
|
|
204
|
+
groundTruth: "APPEAL_TO_AUTHORITY",
|
|
205
|
+
scorer: "contains",
|
|
206
|
+
},
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
// ─── Summarization (6) ───────────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
const summarizationTasks: EvalTask[] = [
|
|
212
|
+
{
|
|
213
|
+
id: "sum-001",
|
|
214
|
+
category: "summarization",
|
|
215
|
+
difficulty: "easy",
|
|
216
|
+
prompt:
|
|
217
|
+
"Summarize in one sentence:\n\"The Eiffel Tower is a wrought-iron lattice tower in Paris, France. It was designed by Gustave Eiffel and built between 1887 and 1889 as the entrance arch to the 1889 World's Fair. It stands 330 metres tall and attracts millions of tourists each year.\"",
|
|
218
|
+
groundTruth: ["Eiffel", "Paris", "1889"],
|
|
219
|
+
scorer: "contains",
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
id: "sum-002",
|
|
223
|
+
category: "summarization",
|
|
224
|
+
difficulty: "easy",
|
|
225
|
+
prompt:
|
|
226
|
+
"Extract the key action from this sentence in 5 words or fewer:\n\"The board unanimously voted to approve the merger with Acme Corp after reviewing the Q3 financials.\"",
|
|
227
|
+
groundTruth: ["approved", "merger", "voted"],
|
|
228
|
+
scorer: "contains",
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
id: "sum-003",
|
|
232
|
+
category: "summarization",
|
|
233
|
+
difficulty: "medium",
|
|
234
|
+
prompt:
|
|
235
|
+
"Summarize the following code in one sentence describing what it does:\n```python\ndef fn(lst):\n return [x for x in lst if x % 2 == 0]\n```",
|
|
236
|
+
groundTruth: ["even", "filter"],
|
|
237
|
+
scorer: "contains",
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
id: "sum-004",
|
|
241
|
+
category: "summarization",
|
|
242
|
+
difficulty: "medium",
|
|
243
|
+
prompt:
|
|
244
|
+
"Give a one-line TL;DR for this commit message:\n\"fix: prevent null pointer exception in UserService.getProfile when user has no associated organization by adding optional chaining and a default empty object fallback\"",
|
|
245
|
+
groundTruth: ["null", "UserService", "optional"],
|
|
246
|
+
scorer: "contains",
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
id: "sum-005",
|
|
250
|
+
category: "summarization",
|
|
251
|
+
difficulty: "hard",
|
|
252
|
+
prompt:
|
|
253
|
+
"Summarize the main thesis of this argument in one sentence:\n\"Critics of universal basic income argue that giving people money without work requirements removes the incentive to be productive, may lead to inflation as more money chases the same goods, and could be prohibitively expensive for governments to sustain in the long term.\"",
|
|
254
|
+
groundTruth: ["incentive", "inflation", "expensive"],
|
|
255
|
+
scorer: "contains",
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
id: "sum-006",
|
|
259
|
+
category: "summarization",
|
|
260
|
+
difficulty: "hard",
|
|
261
|
+
prompt:
|
|
262
|
+
"In ≤20 words, capture the decision made in this ADR:\n\"We chose PostgreSQL over MongoDB because our data has clear relational structure, we need ACID guarantees for financial transactions, and the team has strong PostgreSQL expertise.\"",
|
|
263
|
+
groundTruth: ["PostgreSQL", "MongoDB"],
|
|
264
|
+
scorer: "contains",
|
|
265
|
+
},
|
|
266
|
+
];
|
|
267
|
+
|
|
268
|
+
// ─── Reasoning multi-step (6) ────────────────────────────────────────────────
|
|
269
|
+
|
|
270
|
+
const reasoningTasks: EvalTask[] = [
|
|
271
|
+
{
|
|
272
|
+
id: "reason-001",
|
|
273
|
+
category: "reasoning",
|
|
274
|
+
difficulty: "easy",
|
|
275
|
+
prompt:
|
|
276
|
+
"Alice is taller than Bob. Bob is taller than Carol. Is Alice taller than Carol? Answer YES or NO.",
|
|
277
|
+
groundTruth: "YES",
|
|
278
|
+
scorer: "contains",
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
id: "reason-002",
|
|
282
|
+
category: "reasoning",
|
|
283
|
+
difficulty: "easy",
|
|
284
|
+
prompt:
|
|
285
|
+
"A store sells apples for €1.50 each and gives a 10% discount if you buy 5 or more. How much does it cost to buy exactly 6 apples?",
|
|
286
|
+
groundTruth: ["8.10", "8,10"],
|
|
287
|
+
scorer: "contains",
|
|
288
|
+
},
|
|
289
|
+
{
|
|
290
|
+
id: "reason-003",
|
|
291
|
+
category: "reasoning",
|
|
292
|
+
difficulty: "medium",
|
|
293
|
+
prompt:
|
|
294
|
+
"You have a 3-litre jug and a 5-litre jug. You need exactly 4 litres. Describe the minimum number of steps needed. How many steps?",
|
|
295
|
+
groundTruth: ["6", "7"],
|
|
296
|
+
scorer: "contains",
|
|
297
|
+
},
|
|
298
|
+
{
|
|
299
|
+
id: "reason-004",
|
|
300
|
+
category: "reasoning",
|
|
301
|
+
difficulty: "medium",
|
|
302
|
+
prompt:
|
|
303
|
+
"If all Bloops are Razzies, and all Razzies are Lazzies, are all Bloops definitely Lazzies? Answer YES or NO and give one-line reasoning.",
|
|
304
|
+
groundTruth: "YES",
|
|
305
|
+
scorer: "contains",
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
id: "reason-005",
|
|
309
|
+
category: "reasoning",
|
|
310
|
+
difficulty: "hard",
|
|
311
|
+
prompt:
|
|
312
|
+
"A function has O(n log n) time complexity. If it takes 10ms for n=1000, approximately how many milliseconds will it take for n=1,000,000? Round to the nearest integer.",
|
|
313
|
+
groundTruth: ["20000", "20,000"],
|
|
314
|
+
scorer: "contains",
|
|
315
|
+
},
|
|
316
|
+
{
|
|
317
|
+
id: "reason-006",
|
|
318
|
+
category: "reasoning",
|
|
319
|
+
difficulty: "hard",
|
|
320
|
+
prompt:
|
|
321
|
+
"Three switches control three bulbs in the next room. You can flip switches as many times as you like, but you can only enter the room once. Describe a strategy to identify which switch controls which bulb.",
|
|
322
|
+
groundTruth: ["heat", "warm", "on"],
|
|
323
|
+
scorer: "contains",
|
|
324
|
+
},
|
|
325
|
+
];
|
|
326
|
+
|
|
327
|
+
// ─── Full dataset ─────────────────────────────────────────────────────────────
|
|
328
|
+
|
|
329
|
+
export const EVAL_DATASET_30: EvalTask[] = [
|
|
330
|
+
...mathTasks,
|
|
331
|
+
...codeTasks,
|
|
332
|
+
...classificationTasks,
|
|
333
|
+
...summarizationTasks,
|
|
334
|
+
...reasoningTasks,
|
|
335
|
+
];
|
|
336
|
+
|
|
337
|
+
export function getTasksByCategory(category: EvalCategory): EvalTask[] {
|
|
338
|
+
return EVAL_DATASET_30.filter((t) => t.category === category);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
export function getTasksByDifficulty(difficulty: EvalDifficulty): EvalTask[] {
|
|
342
|
+
return EVAL_DATASET_30.filter((t) => t.difficulty === difficulty);
|
|
343
|
+
}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval harness — strategy × dataset → {accuracy, cost, latency, rationale}.
|
|
3
|
+
*
|
|
4
|
+
* Sprint-579: eval-harness. Product-agnostic; consumer apps supply their own datasets.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* const result = await evalSuite(singleShotStrategy, EVAL_DATASET_30);
|
|
8
|
+
* console.log(result.accuracy); // 0.73
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { EvalTask } from "./datasets/index.js";
|
|
12
|
+
|
|
13
|
+
// ─── Strategy ────────────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
export interface StrategyOutput {
|
|
16
|
+
output: string;
|
|
17
|
+
costUsd?: number;
|
|
18
|
+
latencyMs?: number;
|
|
19
|
+
candidates?: string[];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface EvalStrategy {
|
|
23
|
+
name: string;
|
|
24
|
+
run(prompt: string): Promise<StrategyOutput>;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ─── Results ─────────────────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
export interface EvalTaskResult {
|
|
30
|
+
taskId: string;
|
|
31
|
+
category: string;
|
|
32
|
+
difficulty: string;
|
|
33
|
+
passed: boolean;
|
|
34
|
+
output: string;
|
|
35
|
+
score: number;
|
|
36
|
+
latencyMs: number;
|
|
37
|
+
costUsd: number;
|
|
38
|
+
rationale?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface EvalSuiteResult {
|
|
42
|
+
strategy: string;
|
|
43
|
+
accuracy: number;
|
|
44
|
+
costUsd: number;
|
|
45
|
+
latencyMs: number;
|
|
46
|
+
rationale: EvalTaskResult[];
|
|
47
|
+
meta: {
|
|
48
|
+
totalTasks: number;
|
|
49
|
+
passedTasks: number;
|
|
50
|
+
failedTasks: number;
|
|
51
|
+
durationMs: number;
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ─── Scorers ─────────────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
function scoreExact(output: string, groundTruth: string | string[]): number {
|
|
58
|
+
const normalized = output.trim().toLowerCase();
|
|
59
|
+
const truths = Array.isArray(groundTruth) ? groundTruth : [groundTruth];
|
|
60
|
+
return truths.some((gt) => normalized === gt.trim().toLowerCase()) ? 1 : 0;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function scoreContains(output: string, groundTruth: string | string[]): number {
|
|
64
|
+
const normalized = output.toLowerCase();
|
|
65
|
+
const truths = Array.isArray(groundTruth) ? groundTruth : [groundTruth];
|
|
66
|
+
return truths.some((gt) => normalized.includes(gt.trim().toLowerCase())) ? 1 : 0;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function scoreNumericTolerance(
|
|
70
|
+
output: string,
|
|
71
|
+
groundTruth: string | string[],
|
|
72
|
+
tolerance: number
|
|
73
|
+
): number {
|
|
74
|
+
const truths = Array.isArray(groundTruth) ? groundTruth : [groundTruth];
|
|
75
|
+
const outputNum = parseFloat(output.replace(/[^0-9.\-]/g, ""));
|
|
76
|
+
if (Number.isNaN(outputNum)) return 0;
|
|
77
|
+
return truths.some((gt) => {
|
|
78
|
+
const gtNum = parseFloat(gt.replace(/[^0-9.\-]/g, ""));
|
|
79
|
+
return !Number.isNaN(gtNum) && Math.abs(outputNum - gtNum) <= tolerance;
|
|
80
|
+
})
|
|
81
|
+
? 1
|
|
82
|
+
: 0;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function scoreTask(task: EvalTask, output: string): number {
|
|
86
|
+
switch (task.scorer ?? "contains") {
|
|
87
|
+
case "exact":
|
|
88
|
+
return scoreExact(output, task.groundTruth);
|
|
89
|
+
case "contains":
|
|
90
|
+
return scoreContains(output, task.groundTruth);
|
|
91
|
+
case "numeric_tolerance":
|
|
92
|
+
return scoreNumericTolerance(output, task.groundTruth, task.tolerance ?? 0);
|
|
93
|
+
case "code_output":
|
|
94
|
+
// Placeholder: falls back to contains for now.
|
|
95
|
+
return scoreContains(output, task.groundTruth);
|
|
96
|
+
default:
|
|
97
|
+
return scoreContains(output, task.groundTruth);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ─── Main harness ─────────────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Run a strategy against a dataset of eval tasks.
|
|
105
|
+
* Returns accuracy (0-1), total cost in USD, median latency in ms,
|
|
106
|
+
* and per-task rationale.
|
|
107
|
+
*/
|
|
108
|
+
export async function evalSuite(
|
|
109
|
+
strategy: EvalStrategy,
|
|
110
|
+
dataset: EvalTask[]
|
|
111
|
+
): Promise<EvalSuiteResult> {
|
|
112
|
+
const t0 = Date.now();
|
|
113
|
+
const results: EvalTaskResult[] = [];
|
|
114
|
+
|
|
115
|
+
for (const task of dataset) {
|
|
116
|
+
const taskT0 = Date.now();
|
|
117
|
+
let output = "";
|
|
118
|
+
let costUsd = 0;
|
|
119
|
+
let latencyMs = 0;
|
|
120
|
+
let rationale: string | undefined;
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const stratOut = await strategy.run(task.prompt);
|
|
124
|
+
output = stratOut.output;
|
|
125
|
+
costUsd = stratOut.costUsd ?? 0;
|
|
126
|
+
latencyMs = stratOut.latencyMs ?? Date.now() - taskT0;
|
|
127
|
+
} catch (err) {
|
|
128
|
+
output = "";
|
|
129
|
+
rationale = `strategy.run threw: ${err instanceof Error ? err.message : String(err)}`;
|
|
130
|
+
latencyMs = Date.now() - taskT0;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const score = scoreTask(task, output);
|
|
134
|
+
|
|
135
|
+
results.push({
|
|
136
|
+
taskId: task.id,
|
|
137
|
+
category: task.category,
|
|
138
|
+
difficulty: task.difficulty,
|
|
139
|
+
passed: score >= 1,
|
|
140
|
+
output,
|
|
141
|
+
score,
|
|
142
|
+
latencyMs,
|
|
143
|
+
costUsd,
|
|
144
|
+
rationale,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const passed = results.filter((r) => r.passed).length;
|
|
149
|
+
const accuracy = dataset.length > 0 ? passed / dataset.length : 0;
|
|
150
|
+
const totalCost = results.reduce((s, r) => s + r.costUsd, 0);
|
|
151
|
+
|
|
152
|
+
const sortedLatencies = results.map((r) => r.latencyMs).sort((a, b) => a - b);
|
|
153
|
+
const medianLatency =
|
|
154
|
+
sortedLatencies.length > 0
|
|
155
|
+
? (sortedLatencies[Math.floor((sortedLatencies.length - 1) / 2)] ?? 0)
|
|
156
|
+
: 0;
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
strategy: strategy.name,
|
|
160
|
+
accuracy,
|
|
161
|
+
costUsd: totalCost,
|
|
162
|
+
latencyMs: medianLatency,
|
|
163
|
+
rationale: results,
|
|
164
|
+
meta: {
|
|
165
|
+
totalTasks: dataset.length,
|
|
166
|
+
passedTasks: passed,
|
|
167
|
+
failedTasks: dataset.length - passed,
|
|
168
|
+
durationMs: Date.now() - t0,
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { evalSuite, scoreTask } from "./harness.js";
|
|
2
|
+
export type { EvalStrategy, StrategyOutput, EvalTaskResult, EvalSuiteResult } from "./harness.js";
|
|
3
|
+
export { EVAL_DATASET_30, getTasksByCategory, getTasksByDifficulty } from "./datasets/index.js";
|
|
4
|
+
export type { EvalTask, EvalCategory, EvalDifficulty } from "./datasets/index.js";
|