@amodalai/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/.last_build +0 -0
- package/dist/src/amodal-config.d.ts +194 -0
- package/dist/src/amodal-config.js +326 -0
- package/dist/src/amodal-config.js.map +1 -0
- package/dist/src/audit/audit-logger.d.ts +52 -0
- package/dist/src/audit/audit-logger.js +137 -0
- package/dist/src/audit/audit-logger.js.map +1 -0
- package/dist/src/audit/audit-outputs.d.ts +34 -0
- package/dist/src/audit/audit-outputs.js +73 -0
- package/dist/src/audit/audit-outputs.js.map +1 -0
- package/dist/src/audit/audit-redact.d.ts +14 -0
- package/dist/src/audit/audit-redact.js +55 -0
- package/dist/src/audit/audit-redact.js.map +1 -0
- package/dist/src/audit/audit-types.d.ts +122 -0
- package/dist/src/audit/audit-types.js +64 -0
- package/dist/src/audit/audit-types.js.map +1 -0
- package/dist/src/audit/index.d.ts +10 -0
- package/dist/src/audit/index.js +10 -0
- package/dist/src/audit/index.js.map +1 -0
- package/dist/src/eval/eval-cost.d.ts +33 -0
- package/dist/src/eval/eval-cost.js +73 -0
- package/dist/src/eval/eval-cost.js.map +1 -0
- package/dist/src/eval/eval-diff.d.ts +11 -0
- package/dist/src/eval/eval-diff.js +97 -0
- package/dist/src/eval/eval-diff.js.map +1 -0
- package/dist/src/eval/eval-formatter.d.ts +23 -0
- package/dist/src/eval/eval-formatter.js +221 -0
- package/dist/src/eval/eval-formatter.js.map +1 -0
- package/dist/src/eval/eval-judge.d.ts +26 -0
- package/dist/src/eval/eval-judge.js +76 -0
- package/dist/src/eval/eval-judge.js.map +1 -0
- package/dist/src/eval/eval-run-builder.d.ts +25 -0
- package/dist/src/eval/eval-run-builder.js +78 -0
- package/dist/src/eval/eval-run-builder.js.map +1 -0
- package/dist/src/eval/eval-runner.d.ts +36 -0
- package/dist/src/eval/eval-runner.js +92 -0
- package/dist/src/eval/eval-runner.js.map +1 -0
- package/dist/src/eval/eval-session-provider.d.ts +40 -0
- package/dist/src/eval/eval-session-provider.js +46 -0
- package/dist/src/eval/eval-session-provider.js.map +1 -0
- package/dist/src/eval/eval-types.d.ts +146 -0
- package/dist/src/eval/eval-types.js +7 -0
- package/dist/src/eval/eval-types.js.map +1 -0
- package/dist/src/eval/experiment-runner.d.ts +16 -0
- package/dist/src/eval/experiment-runner.js +73 -0
- package/dist/src/eval/experiment-runner.js.map +1 -0
- package/dist/src/eval/experiment-types.d.ts +56 -0
- package/dist/src/eval/experiment-types.js +7 -0
- package/dist/src/eval/experiment-types.js.map +1 -0
- package/dist/src/eval/index.d.ts +22 -0
- package/dist/src/eval/index.js +18 -0
- package/dist/src/eval/index.js.map +1 -0
- package/dist/src/eval/multi-model-runner.d.ts +42 -0
- package/dist/src/eval/multi-model-runner.js +70 -0
- package/dist/src/eval/multi-model-runner.js.map +1 -0
- package/dist/src/eval/platform-eval-client.d.ts +105 -0
- package/dist/src/eval/platform-eval-client.js +155 -0
- package/dist/src/eval/platform-eval-client.js.map +1 -0
- package/dist/src/index.d.ts +41 -0
- package/dist/src/index.js +68 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/knowledge/index.d.ts +11 -0
- package/dist/src/knowledge/index.js +12 -0
- package/dist/src/knowledge/index.js.map +1 -0
- package/dist/src/knowledge/kb-formatter.d.ts +15 -0
- package/dist/src/knowledge/kb-formatter.js +78 -0
- package/dist/src/knowledge/kb-formatter.js.map +1 -0
- package/dist/src/knowledge/kb-index.d.ts +27 -0
- package/dist/src/knowledge/kb-index.js +66 -0
- package/dist/src/knowledge/kb-index.js.map +1 -0
- package/dist/src/knowledge/kb-types.d.ts +54 -0
- package/dist/src/knowledge/kb-types.js +7 -0
- package/dist/src/knowledge/kb-types.js.map +1 -0
- package/dist/src/knowledge/knowledge-store.d.ts +56 -0
- package/dist/src/knowledge/knowledge-store.js +141 -0
- package/dist/src/knowledge/knowledge-store.js.map +1 -0
- package/dist/src/knowledge/load-knowledge.d.ts +30 -0
- package/dist/src/knowledge/load-knowledge.js +161 -0
- package/dist/src/knowledge/load-knowledge.js.map +1 -0
- package/dist/src/knowledge/propose-kb-update.d.ts +32 -0
- package/dist/src/knowledge/propose-kb-update.js +148 -0
- package/dist/src/knowledge/propose-kb-update.js.map +1 -0
- package/dist/src/knowledge/propose-knowledge.d.ts +34 -0
- package/dist/src/knowledge/propose-knowledge.js +163 -0
- package/dist/src/knowledge/propose-knowledge.js.map +1 -0
- package/dist/src/mcp/index.d.ts +7 -0
- package/dist/src/mcp/index.js +7 -0
- package/dist/src/mcp/index.js.map +1 -0
- package/dist/src/mcp/mcp-manager.d.ts +96 -0
- package/dist/src/mcp/mcp-manager.js +192 -0
- package/dist/src/mcp/mcp-manager.js.map +1 -0
- package/dist/src/packages/config-deps.d.ts +20 -0
- package/dist/src/packages/config-deps.js +77 -0
- package/dist/src/packages/config-deps.js.map +1 -0
- package/dist/src/packages/env-file.d.ts +28 -0
- package/dist/src/packages/env-file.js +143 -0
- package/dist/src/packages/env-file.js.map +1 -0
- package/dist/src/packages/frontmatter.d.ts +28 -0
- package/dist/src/packages/frontmatter.js +77 -0
- package/dist/src/packages/frontmatter.js.map +1 -0
- package/dist/src/packages/index.d.ts +16 -0
- package/dist/src/packages/index.js +17 -0
- package/dist/src/packages/index.js.map +1 -0
- package/dist/src/packages/lock-file.d.ts +35 -0
- package/dist/src/packages/lock-file.js +117 -0
- package/dist/src/packages/lock-file.js.map +1 -0
- package/dist/src/packages/manifest-reader.d.ts +19 -0
- package/dist/src/packages/manifest-reader.js +91 -0
- package/dist/src/packages/manifest-reader.js.map +1 -0
- package/dist/src/packages/merge-engine.d.ts +63 -0
- package/dist/src/packages/merge-engine.js +357 -0
- package/dist/src/packages/merge-engine.js.map +1 -0
- package/dist/src/packages/npm-context.d.ts +56 -0
- package/dist/src/packages/npm-context.js +235 -0
- package/dist/src/packages/npm-context.js.map +1 -0
- package/dist/src/packages/npm-registry.d.ts +35 -0
- package/dist/src/packages/npm-registry.js +107 -0
- package/dist/src/packages/npm-registry.js.map +1 -0
- package/dist/src/packages/package-error.d.ts +16 -0
- package/dist/src/packages/package-error.js +17 -0
- package/dist/src/packages/package-error.js.map +1 -0
- package/dist/src/packages/package-types.d.ts +477 -0
- package/dist/src/packages/package-types.js +156 -0
- package/dist/src/packages/package-types.js.map +1 -0
- package/dist/src/packages/resolver.d.ts +41 -0
- package/dist/src/packages/resolver.js +353 -0
- package/dist/src/packages/resolver.js.map +1 -0
- package/dist/src/platform/config-builder.d.ts +24 -0
- package/dist/src/platform/config-builder.js +70 -0
- package/dist/src/platform/config-builder.js.map +1 -0
- package/dist/src/platform/index.d.ts +8 -0
- package/dist/src/platform/index.js +9 -0
- package/dist/src/platform/index.js.map +1 -0
- package/dist/src/platform/platform-client.d.ts +160 -0
- package/dist/src/platform/platform-client.js +486 -0
- package/dist/src/platform/platform-client.js.map +1 -0
- package/dist/src/platform/platform-types.d.ts +81 -0
- package/dist/src/platform/platform-types.js +18 -0
- package/dist/src/platform/platform-types.js.map +1 -0
- package/dist/src/providers/content-generator/google-to-llm.d.ts +87 -0
- package/dist/src/providers/content-generator/google-to-llm.js +226 -0
- package/dist/src/providers/content-generator/google-to-llm.js.map +1 -0
- package/dist/src/providers/content-generator/index.d.ts +10 -0
- package/dist/src/providers/content-generator/index.js +9 -0
- package/dist/src/providers/content-generator/index.js.map +1 -0
- package/dist/src/providers/content-generator/llm-to-google.d.ts +59 -0
- package/dist/src/providers/content-generator/llm-to-google.js +178 -0
- package/dist/src/providers/content-generator/llm-to-google.js.map +1 -0
- package/dist/src/providers/content-generator/multi-provider-content-generator.d.ts +61 -0
- package/dist/src/providers/content-generator/multi-provider-content-generator.js +144 -0
- package/dist/src/providers/content-generator/multi-provider-content-generator.js.map +1 -0
- package/dist/src/providers/runtime/anthropic-provider.d.ts +18 -0
- package/dist/src/providers/runtime/anthropic-provider.js +253 -0
- package/dist/src/providers/runtime/anthropic-provider.js.map +1 -0
- package/dist/src/providers/runtime/azure-provider.d.ts +25 -0
- package/dist/src/providers/runtime/azure-provider.js +206 -0
- package/dist/src/providers/runtime/azure-provider.js.map +1 -0
- package/dist/src/providers/runtime/bedrock-provider.d.ts +22 -0
- package/dist/src/providers/runtime/bedrock-provider.js +276 -0
- package/dist/src/providers/runtime/bedrock-provider.js.map +1 -0
- package/dist/src/providers/runtime/failover-provider.d.ts +30 -0
- package/dist/src/providers/runtime/failover-provider.js +124 -0
- package/dist/src/providers/runtime/failover-provider.js.map +1 -0
- package/dist/src/providers/runtime/google-provider.d.ts +17 -0
- package/dist/src/providers/runtime/google-provider.js +239 -0
- package/dist/src/providers/runtime/google-provider.js.map +1 -0
- package/dist/src/providers/runtime/index.d.ts +16 -0
- package/dist/src/providers/runtime/index.js +16 -0
- package/dist/src/providers/runtime/index.js.map +1 -0
- package/dist/src/providers/runtime/openai-provider.d.ts +21 -0
- package/dist/src/providers/runtime/openai-provider.js +266 -0
- package/dist/src/providers/runtime/openai-provider.js.map +1 -0
- package/dist/src/providers/runtime/provider-errors.d.ts +39 -0
- package/dist/src/providers/runtime/provider-errors.js +50 -0
- package/dist/src/providers/runtime/provider-errors.js.map +1 -0
- package/dist/src/providers/runtime/provider-factory.d.ts +19 -0
- package/dist/src/providers/runtime/provider-factory.js +45 -0
- package/dist/src/providers/runtime/provider-factory.js.map +1 -0
- package/dist/src/providers/runtime/runtime-provider-types.d.ts +63 -0
- package/dist/src/providers/runtime/runtime-provider-types.js +7 -0
- package/dist/src/providers/runtime/runtime-provider-types.js.map +1 -0
- package/dist/src/providers/runtime/streaming-types.d.ts +40 -0
- package/dist/src/providers/runtime/streaming-types.js +7 -0
- package/dist/src/providers/runtime/streaming-types.js.map +1 -0
- package/dist/src/repo/config-schema.d.ts +238 -0
- package/dist/src/repo/config-schema.js +155 -0
- package/dist/src/repo/config-schema.js.map +1 -0
- package/dist/src/repo/connection-schemas.d.ts +449 -0
- package/dist/src/repo/connection-schemas.js +109 -0
- package/dist/src/repo/connection-schemas.js.map +1 -0
- package/dist/src/repo/connection-types.d.ts +29 -0
- package/dist/src/repo/connection-types.js +7 -0
- package/dist/src/repo/connection-types.js.map +1 -0
- package/dist/src/repo/drift-detector.d.ts +26 -0
- package/dist/src/repo/drift-detector.js +66 -0
- package/dist/src/repo/drift-detector.js.map +1 -0
- package/dist/src/repo/graphql-drift-detector.d.ts +27 -0
- package/dist/src/repo/graphql-drift-detector.js +66 -0
- package/dist/src/repo/graphql-drift-detector.js.map +1 -0
- package/dist/src/repo/graphql-parser.d.ts +30 -0
- package/dist/src/repo/graphql-parser.js +125 -0
- package/dist/src/repo/graphql-parser.js.map +1 -0
- package/dist/src/repo/graphql-surface-parser.d.ts +20 -0
- package/dist/src/repo/graphql-surface-parser.js +74 -0
- package/dist/src/repo/graphql-surface-parser.js.map +1 -0
- package/dist/src/repo/index.d.ts +30 -0
- package/dist/src/repo/index.js +29 -0
- package/dist/src/repo/index.js.map +1 -0
- package/dist/src/repo/local-reader.d.ts +10 -0
- package/dist/src/repo/local-reader.js +299 -0
- package/dist/src/repo/local-reader.js.map +1 -0
- package/dist/src/repo/openapi-parser.d.ts +35 -0
- package/dist/src/repo/openapi-parser.js +93 -0
- package/dist/src/repo/openapi-parser.js.map +1 -0
- package/dist/src/repo/parsers.d.ts +91 -0
- package/dist/src/repo/parsers.js +454 -0
- package/dist/src/repo/parsers.js.map +1 -0
- package/dist/src/repo/platform-reader.d.ts +10 -0
- package/dist/src/repo/platform-reader.js +206 -0
- package/dist/src/repo/platform-reader.js.map +1 -0
- package/dist/src/repo/repo-loader.d.ts +14 -0
- package/dist/src/repo/repo-loader.js +25 -0
- package/dist/src/repo/repo-loader.js.map +1 -0
- package/dist/src/repo/repo-types.d.ts +159 -0
- package/dist/src/repo/repo-types.js +17 -0
- package/dist/src/repo/repo-types.js.map +1 -0
- package/dist/src/repo/spec-syncer.d.ts +30 -0
- package/dist/src/repo/spec-syncer.js +85 -0
- package/dist/src/repo/spec-syncer.js.map +1 -0
- package/dist/src/repo/store-loader.d.ts +19 -0
- package/dist/src/repo/store-loader.js +94 -0
- package/dist/src/repo/store-loader.js.map +1 -0
- package/dist/src/repo/store-schemas.d.ts +313 -0
- package/dist/src/repo/store-schemas.js +103 -0
- package/dist/src/repo/store-schemas.js.map +1 -0
- package/dist/src/repo/store-tool-schema.d.ts +29 -0
- package/dist/src/repo/store-tool-schema.js +103 -0
- package/dist/src/repo/store-tool-schema.js.map +1 -0
- package/dist/src/repo/store-types.d.ts +91 -0
- package/dist/src/repo/store-types.js +7 -0
- package/dist/src/repo/store-types.js.map +1 -0
- package/dist/src/repo/surface-parser.d.ts +17 -0
- package/dist/src/repo/surface-parser.js +75 -0
- package/dist/src/repo/surface-parser.js.map +1 -0
- package/dist/src/repo/tool-loader.d.ts +33 -0
- package/dist/src/repo/tool-loader.js +240 -0
- package/dist/src/repo/tool-loader.js.map +1 -0
- package/dist/src/repo/tool-types.d.ts +205 -0
- package/dist/src/repo/tool-types.js +61 -0
- package/dist/src/repo/tool-types.js.map +1 -0
- package/dist/src/roles/index.d.ts +7 -0
- package/dist/src/roles/index.js +8 -0
- package/dist/src/roles/index.js.map +1 -0
- package/dist/src/roles/role-filter.d.ts +33 -0
- package/dist/src/roles/role-filter.js +55 -0
- package/dist/src/roles/role-filter.js.map +1 -0
- package/dist/src/roles/role-types.d.ts +76 -0
- package/dist/src/roles/role-types.js +38 -0
- package/dist/src/roles/role-types.js.map +1 -0
- package/dist/src/runtime/connection-bridge.d.ts +19 -0
- package/dist/src/runtime/connection-bridge.js +103 -0
- package/dist/src/runtime/connection-bridge.js.map +1 -0
- package/dist/src/runtime/context-compiler.d.ts +35 -0
- package/dist/src/runtime/context-compiler.js +183 -0
- package/dist/src/runtime/context-compiler.js.map +1 -0
- package/dist/src/runtime/default-prompt.d.ts +28 -0
- package/dist/src/runtime/default-prompt.js +71 -0
- package/dist/src/runtime/default-prompt.js.map +1 -0
- package/dist/src/runtime/explore-tool.d.ts +96 -0
- package/dist/src/runtime/explore-tool.js +111 -0
- package/dist/src/runtime/explore-tool.js.map +1 -0
- package/dist/src/runtime/index.d.ts +26 -0
- package/dist/src/runtime/index.js +19 -0
- package/dist/src/runtime/index.js.map +1 -0
- package/dist/src/runtime/output-pipeline.d.ts +62 -0
- package/dist/src/runtime/output-pipeline.js +69 -0
- package/dist/src/runtime/output-pipeline.js.map +1 -0
- package/dist/src/runtime/plan-mode.d.ts +39 -0
- package/dist/src/runtime/plan-mode.js +81 -0
- package/dist/src/runtime/plan-mode.js.map +1 -0
- package/dist/src/runtime/preference-client.d.ts +39 -0
- package/dist/src/runtime/preference-client.js +70 -0
- package/dist/src/runtime/preference-client.js.map +1 -0
- package/dist/src/runtime/preference-detector.d.ts +22 -0
- package/dist/src/runtime/preference-detector.js +95 -0
- package/dist/src/runtime/preference-detector.js.map +1 -0
- package/dist/src/runtime/request-integration.d.ts +18 -0
- package/dist/src/runtime/request-integration.js +36 -0
- package/dist/src/runtime/request-integration.js.map +1 -0
- package/dist/src/runtime/runtime-types.d.ts +48 -0
- package/dist/src/runtime/runtime-types.js +7 -0
- package/dist/src/runtime/runtime-types.js.map +1 -0
- package/dist/src/runtime/session-setup.d.ts +53 -0
- package/dist/src/runtime/session-setup.js +90 -0
- package/dist/src/runtime/session-setup.js.map +1 -0
- package/dist/src/runtime/telemetry-client.d.ts +39 -0
- package/dist/src/runtime/telemetry-client.js +87 -0
- package/dist/src/runtime/telemetry-client.js.map +1 -0
- package/dist/src/runtime/telemetry-hooks.d.ts +47 -0
- package/dist/src/runtime/telemetry-hooks.js +115 -0
- package/dist/src/runtime/telemetry-hooks.js.map +1 -0
- package/dist/src/runtime/token-allocator.d.ts +34 -0
- package/dist/src/runtime/token-allocator.js +86 -0
- package/dist/src/runtime/token-allocator.js.map +1 -0
- package/dist/src/runtime/user-context.d.ts +49 -0
- package/dist/src/runtime/user-context.js +135 -0
- package/dist/src/runtime/user-context.js.map +1 -0
- package/dist/src/sdk.d.ts +57 -0
- package/dist/src/sdk.js +377 -0
- package/dist/src/sdk.js.map +1 -0
- package/dist/src/security/action-gate.d.ts +23 -0
- package/dist/src/security/action-gate.js +78 -0
- package/dist/src/security/action-gate.js.map +1 -0
- package/dist/src/security/field-scrubber.d.ts +27 -0
- package/dist/src/security/field-scrubber.js +152 -0
- package/dist/src/security/field-scrubber.js.map +1 -0
- package/dist/src/security/index.d.ts +14 -0
- package/dist/src/security/index.js +15 -0
- package/dist/src/security/index.js.map +1 -0
- package/dist/src/security/leak-detector.d.ts +23 -0
- package/dist/src/security/leak-detector.js +51 -0
- package/dist/src/security/leak-detector.js.map +1 -0
- package/dist/src/security/output-guard.d.ts +33 -0
- package/dist/src/security/output-guard.js +118 -0
- package/dist/src/security/output-guard.js.map +1 -0
- package/dist/src/security/pattern-scanner.d.ts +19 -0
- package/dist/src/security/pattern-scanner.js +66 -0
- package/dist/src/security/pattern-scanner.js.map +1 -0
- package/dist/src/security/scope-checker.d.ts +27 -0
- package/dist/src/security/scope-checker.js +52 -0
- package/dist/src/security/scope-checker.js.map +1 -0
- package/dist/src/security/scrub-tracker.d.ts +21 -0
- package/dist/src/security/scrub-tracker.js +39 -0
- package/dist/src/security/scrub-tracker.js.map +1 -0
- package/dist/src/security/security-types.d.ts +69 -0
- package/dist/src/security/security-types.js +17 -0
- package/dist/src/security/security-types.js.map +1 -0
- package/dist/src/security/threshold-evaluator.d.ts +13 -0
- package/dist/src/security/threshold-evaluator.js +44 -0
- package/dist/src/security/threshold-evaluator.js.map +1 -0
- package/dist/src/snapshot/index.d.ts +8 -0
- package/dist/src/snapshot/index.js +9 -0
- package/dist/src/snapshot/index.js.map +1 -0
- package/dist/src/snapshot/snapshot-builder.d.ts +37 -0
- package/dist/src/snapshot/snapshot-builder.js +152 -0
- package/dist/src/snapshot/snapshot-builder.js.map +1 -0
- package/dist/src/snapshot/snapshot-loader.d.ts +29 -0
- package/dist/src/snapshot/snapshot-loader.js +188 -0
- package/dist/src/snapshot/snapshot-loader.js.map +1 -0
- package/dist/src/snapshot/snapshot-types.d.ts +1940 -0
- package/dist/src/snapshot/snapshot-types.js +145 -0
- package/dist/src/snapshot/snapshot-types.js.map +1 -0
- package/dist/src/stores/index.d.ts +6 -0
- package/dist/src/stores/index.js +7 -0
- package/dist/src/stores/index.js.map +1 -0
- package/dist/src/stores/store-backend.d.ts +135 -0
- package/dist/src/stores/store-backend.js +7 -0
- package/dist/src/stores/store-backend.js.map +1 -0
- package/dist/src/templates/connections.d.ts +14 -0
- package/dist/src/templates/connections.js +7 -0
- package/dist/src/templates/connections.js.map +1 -0
- package/dist/src/templates/index.d.ts +7 -0
- package/dist/src/templates/index.js +8 -0
- package/dist/src/templates/index.js.map +1 -0
- package/dist/src/templates/template-resolver.d.ts +45 -0
- package/dist/src/templates/template-resolver.js +121 -0
- package/dist/src/templates/template-resolver.js.map +1 -0
- package/dist/src/tool-context.d.ts +33 -0
- package/dist/src/tool-context.js +7 -0
- package/dist/src/tool-context.js.map +1 -0
- package/dist/src/tool-registration.d.ts +14 -0
- package/dist/src/tool-registration.js +51 -0
- package/dist/src/tool-registration.js.map +1 -0
- package/dist/src/tools/amodal-tool-errors.d.ts +24 -0
- package/dist/src/tools/amodal-tool-errors.js +28 -0
- package/dist/src/tools/amodal-tool-errors.js.map +1 -0
- package/dist/src/tools/amodal-tool-names.d.ts +14 -0
- package/dist/src/tools/amodal-tool-names.js +15 -0
- package/dist/src/tools/amodal-tool-names.js.map +1 -0
- package/dist/src/tools/chain-tool-registry.d.ts +20 -0
- package/dist/src/tools/chain-tool-registry.js +49 -0
- package/dist/src/tools/chain-tool-registry.js.map +1 -0
- package/dist/src/tools/chain-tool-types.d.ts +190 -0
- package/dist/src/tools/chain-tool-types.js +50 -0
- package/dist/src/tools/chain-tool-types.js.map +1 -0
- package/dist/src/tools/chain-tool.d.ts +34 -0
- package/dist/src/tools/chain-tool.js +294 -0
- package/dist/src/tools/chain-tool.js.map +1 -0
- package/dist/src/tools/custom-tool-registrar.d.ts +8 -0
- package/dist/src/tools/custom-tool-registrar.js +10 -0
- package/dist/src/tools/custom-tool-registrar.js.map +1 -0
- package/dist/src/tools/definitions/amodal-tools.d.ts +9 -0
- package/dist/src/tools/definitions/amodal-tools.js +192 -0
- package/dist/src/tools/definitions/amodal-tools.js.map +1 -0
- package/dist/src/tools/function-tool-registry.d.ts +22 -0
- package/dist/src/tools/function-tool-registry.js +45 -0
- package/dist/src/tools/function-tool-registry.js.map +1 -0
- package/dist/src/tools/function-tool-types.d.ts +76 -0
- package/dist/src/tools/function-tool-types.js +27 -0
- package/dist/src/tools/function-tool-types.js.map +1 -0
- package/dist/src/tools/function-tool.d.ts +34 -0
- package/dist/src/tools/function-tool.js +97 -0
- package/dist/src/tools/function-tool.js.map +1 -0
- package/dist/src/tools/http-tool-registry.d.ts +20 -0
- package/dist/src/tools/http-tool-registry.js +34 -0
- package/dist/src/tools/http-tool-registry.js.map +1 -0
- package/dist/src/tools/http-tool-types.d.ts +92 -0
- package/dist/src/tools/http-tool-types.js +44 -0
- package/dist/src/tools/http-tool-types.js.map +1 -0
- package/dist/src/tools/http-tool.d.ts +32 -0
- package/dist/src/tools/http-tool.js +176 -0
- package/dist/src/tools/http-tool.js.map +1 -0
- package/dist/src/tools/merge-template.d.ts +33 -0
- package/dist/src/tools/merge-template.js +65 -0
- package/dist/src/tools/merge-template.js.map +1 -0
- package/dist/src/tools/request-tool-types.d.ts +46 -0
- package/dist/src/tools/request-tool-types.js +20 -0
- package/dist/src/tools/request-tool-types.js.map +1 -0
- package/dist/src/tools/request-tool.d.ts +29 -0
- package/dist/src/tools/request-tool.js +268 -0
- package/dist/src/tools/request-tool.js.map +1 -0
- package/dist/src/tools/store-query-tool.d.ts +29 -0
- package/dist/src/tools/store-query-tool.js +82 -0
- package/dist/src/tools/store-query-tool.js.map +1 -0
- package/dist/src/tools/store-write-tool.d.ts +26 -0
- package/dist/src/tools/store-write-tool.js +84 -0
- package/dist/src/tools/store-write-tool.js.map +1 -0
- package/dist/src/tools/tool-definition-types.d.ts +21 -0
- package/dist/src/tools/tool-definition-types.js +7 -0
- package/dist/src/tools/tool-definition-types.js.map +1 -0
- package/dist/src/tools/tool-utils.d.ts +29 -0
- package/dist/src/tools/tool-utils.js +66 -0
- package/dist/src/tools/tool-utils.js.map +1 -0
- package/dist/src/versions/bundle-loader.d.ts +37 -0
- package/dist/src/versions/bundle-loader.js +99 -0
- package/dist/src/versions/bundle-loader.js.map +1 -0
- package/dist/src/versions/dependency-manager.d.ts +54 -0
- package/dist/src/versions/dependency-manager.js +132 -0
- package/dist/src/versions/dependency-manager.js.map +1 -0
- package/dist/src/versions/handler-loader.d.ts +27 -0
- package/dist/src/versions/handler-loader.js +62 -0
- package/dist/src/versions/handler-loader.js.map +1 -0
- package/dist/src/versions/index.d.ts +10 -0
- package/dist/src/versions/index.js +11 -0
- package/dist/src/versions/index.js.map +1 -0
- package/dist/src/versions/version-bundle-types.d.ts +1278 -0
- package/dist/src/versions/version-bundle-types.js +207 -0
- package/dist/src/versions/version-bundle-types.js.map +1 -0
- package/dist/src/versions/version-manager.d.ts +89 -0
- package/dist/src/versions/version-manager.js +124 -0
- package/dist/src/versions/version-manager.js.map +1 -0
- package/dist/src/widgets/index.d.ts +8 -0
- package/dist/src/widgets/index.js +8 -0
- package/dist/src/widgets/index.js.map +1 -0
- package/dist/src/widgets/present-tool.d.ts +21 -0
- package/dist/src/widgets/present-tool.js +107 -0
- package/dist/src/widgets/present-tool.js.map +1 -0
- package/dist/src/widgets/widget-types.d.ts +78 -0
- package/dist/src/widgets/widget-types.js +28 -0
- package/dist/src/widgets/widget-types.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +50 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import { judgeAllAssertions } from './eval-judge.js';
|
|
7
|
+
import { computeEvalCost, aggregateRunCost } from './eval-cost.js';
|
|
8
|
+
/**
|
|
9
|
+
* Run the eval suite from a loaded repo.
|
|
10
|
+
* Yields progress events and returns the full suite result.
|
|
11
|
+
*/
|
|
12
|
+
export async function* runEvalSuite(repo, options) {
|
|
13
|
+
const startTime = Date.now();
|
|
14
|
+
let evals = repo.evals;
|
|
15
|
+
if (options.filter) {
|
|
16
|
+
const pattern = options.filter.toLowerCase();
|
|
17
|
+
evals = evals.filter((e) => e.name.toLowerCase().includes(pattern) || e.title.toLowerCase().includes(pattern));
|
|
18
|
+
}
|
|
19
|
+
const results = [];
|
|
20
|
+
const perCaseCosts = [];
|
|
21
|
+
for (let i = 0; i < evals.length; i++) {
|
|
22
|
+
const ev = evals[i];
|
|
23
|
+
yield { type: 'eval_start', evalName: ev.name, current: i + 1, total: evals.length };
|
|
24
|
+
const result = await runSingleEval(ev, options);
|
|
25
|
+
results.push(result);
|
|
26
|
+
if (result.cost) {
|
|
27
|
+
perCaseCosts.push(result.cost);
|
|
28
|
+
}
|
|
29
|
+
yield { type: 'eval_complete', evalName: ev.name, passed: result.passed, current: i + 1, total: evals.length };
|
|
30
|
+
}
|
|
31
|
+
const totalCost = perCaseCosts.length > 0 ? aggregateRunCost(perCaseCosts) : undefined;
|
|
32
|
+
const suiteResult = {
|
|
33
|
+
results,
|
|
34
|
+
totalPassed: results.filter((r) => r.passed).length,
|
|
35
|
+
totalFailed: results.filter((r) => !r.passed).length,
|
|
36
|
+
totalSkipped: 0,
|
|
37
|
+
totalDurationMs: Date.now() - startTime,
|
|
38
|
+
totalCost,
|
|
39
|
+
model: options.model,
|
|
40
|
+
gitSha: options.gitSha,
|
|
41
|
+
timestamp: new Date().toISOString(),
|
|
42
|
+
};
|
|
43
|
+
yield { type: 'suite_complete' };
|
|
44
|
+
return suiteResult;
|
|
45
|
+
}
|
|
46
|
+
async function runSingleEval(ev, options) {
|
|
47
|
+
const start = Date.now();
|
|
48
|
+
try {
|
|
49
|
+
const { response, toolCalls, usage } = await options.queryProvider.query(ev.query, ev.setup.tenant);
|
|
50
|
+
// Build enriched response that includes tool call info for the judge.
|
|
51
|
+
// The judge only sees text, so we append a structured summary of
|
|
52
|
+
// tool calls so assertions about tool usage can be evaluated.
|
|
53
|
+
let enrichedResponse = response;
|
|
54
|
+
if (toolCalls.length > 0) {
|
|
55
|
+
const toolSummary = toolCalls
|
|
56
|
+
.map((tc) => `- ${tc.name}(${JSON.stringify(tc.parameters)})`)
|
|
57
|
+
.join('\n');
|
|
58
|
+
enrichedResponse += `\n\n## Tool Calls Made\n${toolSummary}`;
|
|
59
|
+
}
|
|
60
|
+
const assertions = await judgeAllAssertions(enrichedResponse, ev.assertions, options.judgeProvider);
|
|
61
|
+
const passed = assertions.every((a) => a.passed);
|
|
62
|
+
const cost = usage && options.model
|
|
63
|
+
? computeEvalCost(usage.inputTokens, usage.outputTokens, options.model.model)
|
|
64
|
+
: undefined;
|
|
65
|
+
return {
|
|
66
|
+
eval: ev,
|
|
67
|
+
response,
|
|
68
|
+
toolCalls,
|
|
69
|
+
assertions,
|
|
70
|
+
passed,
|
|
71
|
+
durationMs: Date.now() - start,
|
|
72
|
+
cost,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
return {
|
|
77
|
+
eval: ev,
|
|
78
|
+
response: '',
|
|
79
|
+
toolCalls: [],
|
|
80
|
+
assertions: ev.assertions.map((a) => ({
|
|
81
|
+
text: a.text,
|
|
82
|
+
negated: a.negated,
|
|
83
|
+
passed: false,
|
|
84
|
+
reason: `Eval execution error: ${err instanceof Error ? err.message : String(err)}`,
|
|
85
|
+
})),
|
|
86
|
+
passed: false,
|
|
87
|
+
durationMs: Date.now() - start,
|
|
88
|
+
error: err instanceof Error ? err.message : String(err),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=eval-runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-runner.js","sourceRoot":"","sources":["../../../src/eval/eval-runner.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAC,kBAAkB,EAAC,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAC,eAAe,EAAE,gBAAgB,EAAC,MAAM,gBAAgB,CAAC;AAqBjE;;;GAGG;AACH,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,YAAY,CACjC,IAAgB,EAChB,OAA0B;IAE1B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IAEvB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC7C,KAAK,GAAG,KAAK,CAAC,MAAM,CAClB,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CACzF,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAiB,EAAE,CAAC;IACjC,MAAM,YAAY,GAAmB,EAAE,CAAC;IAExC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACpB,MAAM,EAAC,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAC,CAAC;QAEnF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;QAED,MAAM,EAAC,IAAI,EAAE,eAAe,EAAE,QAAQ,EAAE,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAC,CAAC;IAC/G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAEvF,MAAM,WAAW,GAAoB;QACnC,OAAO;QACP,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM;QACnD,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM;QACpD,YAAY,EAAE,CAAC;QACf,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QACvC,SAAS;QACT,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IAEF,MAAM,EAAC,IAAI,EAAE,gBAAgB,EAAC,CAAC;IAC/B,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,EAAc,EACd,OAA0B;IAE1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,EAAC,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAC,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,KAAK,CACpE,EAAE,CAAC,KAAK,EACR,EAAE,CAAC,KAAK,CAAC,MAAM,CAChB,CAAC;QAEF,sEAAsE;QACtE,iEAAiE;QACjE,8DAA8D;QAC9D,IAAI,gBAAgB,GAAG,QAAQ,CAAC;QAChC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,WAAW,GAAG,SAAS;iBAC1B,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,KAAK,EAAE,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC;iBAC7D,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,gBAAgB,IAAI,2BAA2B,WAAW,EAAE,CAAC;QAC/D,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,kBAAkB,CACzC,gBAAgB,EAChB,EAAE,CAAC,UAAU,EACb,OAAO,CAAC,aAAa,CACtB,CAAC;QAEF,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAEjD,MAAM,IAAI,GAAG,KAAK,IAAI,OAAO,CAAC,KAAK;YACjC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC;YAC7E,CAAC,CAAC,SAAS,CAAC;QAEd,OAAO;YACL,IAAI,EAAE,EAAE;YACR,QAAQ;YACR,SAAS;YACT,UAAU;YACV,MAAM;YACN,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;YAC9B,IAAI;SACL,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,IAAI,EAAE,EAAE;YACR,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,EAAE;YACb,UAAU,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACpC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,MAAM,EAAE,KAAK;gBACb,MAAM,EAAE,yBAAyB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;aACpF,CAAC,CAAC;YACH,MAAM,EAAE,KAAK;YACb,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;YAC9B,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import type { EvalQueryProvider } from './eval-runner.js';
|
|
7
|
+
import type { ModelConfig } from '../repo/config-schema.js';
|
|
8
|
+
import type { LLMToolDefinition } from '../providers/runtime/runtime-provider-types.js';
|
|
9
|
+
/**
|
|
10
|
+
* Options for creating a SessionEvalQueryProvider.
|
|
11
|
+
*/
|
|
12
|
+
export interface SessionEvalProviderOptions {
|
|
13
|
+
modelConfig: ModelConfig;
|
|
14
|
+
systemPrompt?: string;
|
|
15
|
+
tools?: LLMToolDefinition[];
|
|
16
|
+
maxTokens?: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* An EvalQueryProvider that uses the runtime provider infrastructure.
|
|
20
|
+
* Creates an ephemeral LLM call for each query, capturing token usage.
|
|
21
|
+
*/
|
|
22
|
+
export declare class SessionEvalQueryProvider implements EvalQueryProvider {
|
|
23
|
+
private readonly provider;
|
|
24
|
+
private readonly systemPrompt;
|
|
25
|
+
private readonly tools;
|
|
26
|
+
private readonly model;
|
|
27
|
+
private readonly maxTokens;
|
|
28
|
+
constructor(options: SessionEvalProviderOptions);
|
|
29
|
+
query(message: string, _tenantId?: string): Promise<{
|
|
30
|
+
response: string;
|
|
31
|
+
toolCalls: Array<{
|
|
32
|
+
name: string;
|
|
33
|
+
parameters: Record<string, unknown>;
|
|
34
|
+
}>;
|
|
35
|
+
usage?: {
|
|
36
|
+
inputTokens: number;
|
|
37
|
+
outputTokens: number;
|
|
38
|
+
};
|
|
39
|
+
}>;
|
|
40
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import { createRuntimeProvider } from '../providers/runtime/provider-factory.js';
|
|
7
|
+
/**
|
|
8
|
+
* An EvalQueryProvider that uses the runtime provider infrastructure.
|
|
9
|
+
* Creates an ephemeral LLM call for each query, capturing token usage.
|
|
10
|
+
*/
|
|
11
|
+
export class SessionEvalQueryProvider {
|
|
12
|
+
provider;
|
|
13
|
+
systemPrompt;
|
|
14
|
+
tools;
|
|
15
|
+
model;
|
|
16
|
+
maxTokens;
|
|
17
|
+
constructor(options) {
|
|
18
|
+
this.provider = createRuntimeProvider(options.modelConfig);
|
|
19
|
+
this.systemPrompt = options.systemPrompt ?? 'You are a helpful assistant.';
|
|
20
|
+
this.tools = options.tools ?? [];
|
|
21
|
+
this.model = options.modelConfig.model;
|
|
22
|
+
this.maxTokens = options.maxTokens ?? 4096;
|
|
23
|
+
}
|
|
24
|
+
async query(message, _tenantId) {
|
|
25
|
+
const result = await this.provider.chat({
|
|
26
|
+
model: this.model,
|
|
27
|
+
systemPrompt: this.systemPrompt,
|
|
28
|
+
messages: [{ role: 'user', content: message }],
|
|
29
|
+
tools: this.tools,
|
|
30
|
+
maxTokens: this.maxTokens,
|
|
31
|
+
});
|
|
32
|
+
const responseText = result.content
|
|
33
|
+
.filter((b) => b.type === 'text')
|
|
34
|
+
.map((b) => b.text)
|
|
35
|
+
.join('');
|
|
36
|
+
const toolCalls = result.content
|
|
37
|
+
.filter((b) => b.type === 'tool_use')
|
|
38
|
+
.map((b) => ({ name: b.name, parameters: b.input }));
|
|
39
|
+
return {
|
|
40
|
+
response: responseText,
|
|
41
|
+
toolCalls,
|
|
42
|
+
usage: result.usage,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=eval-session-provider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-session-provider.js","sourceRoot":"","sources":["../../../src/eval/eval-session-provider.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,EAAC,qBAAqB,EAAC,MAAM,0CAA0C,CAAC;AAY/E;;;GAGG;AACH,MAAM,OAAO,wBAAwB;IAClB,QAAQ,CAAkB;IAC1B,YAAY,CAAS;IACrB,KAAK,CAAsB;IAC3B,KAAK,CAAS;IACd,SAAS,CAAS;IAEnC,YAAY,OAAmC;QAC7C,IAAI,CAAC,QAAQ,GAAG,qBAAqB,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QAC3D,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,8BAA8B,CAAC;QAC3E,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC;QACvC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,KAAK,CACT,OAAe,EACf,SAAkB;QAMlB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;YACtC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,QAAQ,EAAE,CAAC,EAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAC,CAAC;YAC5C,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO;aAChC,MAAM,CAAC,CAAC,CAAC,EAAqC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;aACnE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;aAClB,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO;aAC7B,MAAM,CAAC,CAAC,CAAC,EAAqF,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC;aACvH,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC,KAAK,EAAC,CAAC,CAAC,CAAC;QAErD,OAAO;YACL,QAAQ,EAAE,YAAY;YACtB,SAAS;YACT,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import type { LoadedEval } from '../repo/repo-types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Result of judging a single assertion.
|
|
9
|
+
*/
|
|
10
|
+
export interface AssertionResult {
|
|
11
|
+
text: string;
|
|
12
|
+
negated: boolean;
|
|
13
|
+
passed: boolean;
|
|
14
|
+
reason: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Token and cost tracking for a single eval case or aggregated run.
|
|
18
|
+
*/
|
|
19
|
+
export interface EvalCostInfo {
|
|
20
|
+
inputTokens: number;
|
|
21
|
+
outputTokens: number;
|
|
22
|
+
totalTokens: number;
|
|
23
|
+
estimatedCostMicros: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Model identity for an eval run.
|
|
27
|
+
*/
|
|
28
|
+
export interface EvalModelInfo {
|
|
29
|
+
provider: string;
|
|
30
|
+
model: string;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Result of running a single eval case.
|
|
34
|
+
*/
|
|
35
|
+
export interface EvalResult {
|
|
36
|
+
eval: LoadedEval;
|
|
37
|
+
response: string;
|
|
38
|
+
toolCalls: Array<{
|
|
39
|
+
name: string;
|
|
40
|
+
parameters: Record<string, unknown>;
|
|
41
|
+
}>;
|
|
42
|
+
assertions: AssertionResult[];
|
|
43
|
+
passed: boolean;
|
|
44
|
+
durationMs: number;
|
|
45
|
+
cost?: EvalCostInfo;
|
|
46
|
+
error?: string;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Result of running an entire eval suite.
|
|
50
|
+
*/
|
|
51
|
+
export interface EvalSuiteResult {
|
|
52
|
+
results: EvalResult[];
|
|
53
|
+
totalPassed: number;
|
|
54
|
+
totalFailed: number;
|
|
55
|
+
totalSkipped: number;
|
|
56
|
+
totalDurationMs: number;
|
|
57
|
+
totalCost?: EvalCostInfo;
|
|
58
|
+
model?: EvalModelInfo;
|
|
59
|
+
gitSha?: string;
|
|
60
|
+
timestamp: string;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* A stored baseline for comparison.
|
|
64
|
+
*/
|
|
65
|
+
export interface EvalBaseline {
|
|
66
|
+
id: string;
|
|
67
|
+
orgId: string;
|
|
68
|
+
gitSha: string;
|
|
69
|
+
isProduction: boolean;
|
|
70
|
+
result: EvalSuiteResult;
|
|
71
|
+
createdAt: string;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Status of a single eval comparison.
|
|
75
|
+
*/
|
|
76
|
+
export type EvalDiffStatus = 'unchanged' | 'improved' | 'regressed' | 'new' | 'removed';
|
|
77
|
+
/**
|
|
78
|
+
* Diff between current and baseline eval results.
|
|
79
|
+
*/
|
|
80
|
+
export interface EvalDiff {
|
|
81
|
+
evalName: string;
|
|
82
|
+
status: EvalDiffStatus;
|
|
83
|
+
currentPassed: boolean | null;
|
|
84
|
+
baselinePassed: boolean | null;
|
|
85
|
+
assertionChanges: Array<{
|
|
86
|
+
text: string;
|
|
87
|
+
currentPassed: boolean | null;
|
|
88
|
+
baselinePassed: boolean | null;
|
|
89
|
+
status: EvalDiffStatus;
|
|
90
|
+
}>;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Progress event emitted during eval execution.
|
|
94
|
+
*/
|
|
95
|
+
export interface EvalProgress {
|
|
96
|
+
type: 'eval_start' | 'eval_complete' | 'suite_complete';
|
|
97
|
+
evalName?: string;
|
|
98
|
+
passed?: boolean;
|
|
99
|
+
current?: number;
|
|
100
|
+
total?: number;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* A complete eval run with model and cost tracking.
|
|
104
|
+
*/
|
|
105
|
+
export interface EvalRunRecord {
|
|
106
|
+
id: string;
|
|
107
|
+
orgId: string;
|
|
108
|
+
appId?: string;
|
|
109
|
+
model: EvalModelInfo;
|
|
110
|
+
suite: EvalSuiteResult;
|
|
111
|
+
perCaseCosts: EvalCostInfo[];
|
|
112
|
+
totalCost: EvalCostInfo;
|
|
113
|
+
gitSha?: string;
|
|
114
|
+
label?: string;
|
|
115
|
+
triggeredBy: 'manual' | 'ci' | 'automation';
|
|
116
|
+
createdAt: string;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Comparison of two eval runs.
|
|
120
|
+
*/
|
|
121
|
+
export interface EvalRunComparison {
|
|
122
|
+
runA: EvalRunRecord;
|
|
123
|
+
runB: EvalRunRecord;
|
|
124
|
+
diff: EvalDiff[];
|
|
125
|
+
costDelta: {
|
|
126
|
+
totalMicros: number;
|
|
127
|
+
perCase: number[];
|
|
128
|
+
};
|
|
129
|
+
qualityDelta: {
|
|
130
|
+
passRateDelta: number;
|
|
131
|
+
durationDeltaMs: number;
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Aggregated trend point for time-series visualization.
|
|
136
|
+
*/
|
|
137
|
+
export interface EvalTrendPoint {
|
|
138
|
+
runId: string;
|
|
139
|
+
label?: string;
|
|
140
|
+
gitSha?: string;
|
|
141
|
+
model: EvalModelInfo;
|
|
142
|
+
passRate: number;
|
|
143
|
+
totalCostMicros: number;
|
|
144
|
+
avgDurationMs: number;
|
|
145
|
+
timestamp: string;
|
|
146
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-types.js","sourceRoot":"","sources":["../../../src/eval/eval-types.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import type { AmodalRepo } from '../repo/repo-types.js';
|
|
7
|
+
import type { ExperimentConfig, ExperimentEvalResult, ExperimentAssignment, ExperimentDeployment } from './experiment-types.js';
|
|
8
|
+
import type { EvalRunnerOptions } from './eval-runner.js';
|
|
9
|
+
/**
|
|
10
|
+
* Run an experiment: eval suite against both control and variant configs.
|
|
11
|
+
*/
|
|
12
|
+
export declare function runExperiment(repo: AmodalRepo, config: ExperimentConfig, evalOptions: Omit<EvalRunnerOptions, 'gitSha'>): Promise<ExperimentEvalResult>;
|
|
13
|
+
/**
|
|
14
|
+
* Assign a session to control or variant based on traffic split.
|
|
15
|
+
*/
|
|
16
|
+
export declare function assignExperiment(deployment: ExperimentDeployment): ExperimentAssignment;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import { runEvalSuite } from './eval-runner.js';
|
|
7
|
+
/**
|
|
8
|
+
* Run an experiment: eval suite against both control and variant configs.
|
|
9
|
+
*/
|
|
10
|
+
export async function runExperiment(repo, config, evalOptions) {
|
|
11
|
+
// Run control
|
|
12
|
+
const controlGen = runEvalSuite(repo, evalOptions);
|
|
13
|
+
let controlResult;
|
|
14
|
+
while (true) {
|
|
15
|
+
const next = await controlGen.next();
|
|
16
|
+
if (next.done) {
|
|
17
|
+
controlResult = next.value;
|
|
18
|
+
break;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
// Run variant (in a real implementation, this would apply config changes)
|
|
22
|
+
const variantGen = runEvalSuite(repo, evalOptions);
|
|
23
|
+
let variantResult;
|
|
24
|
+
while (true) {
|
|
25
|
+
const next = await variantGen.next();
|
|
26
|
+
if (next.done) {
|
|
27
|
+
variantResult = next.value;
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const controlPassRate = controlResult.results.length > 0
|
|
32
|
+
? controlResult.totalPassed / controlResult.results.length
|
|
33
|
+
: 0;
|
|
34
|
+
const variantPassRate = variantResult.results.length > 0
|
|
35
|
+
? variantResult.totalPassed / variantResult.results.length
|
|
36
|
+
: 0;
|
|
37
|
+
const controlAvgDuration = controlResult.results.length > 0
|
|
38
|
+
? controlResult.results.reduce((sum, r) => sum + r.durationMs, 0) / controlResult.results.length
|
|
39
|
+
: 0;
|
|
40
|
+
const variantAvgDuration = variantResult.results.length > 0
|
|
41
|
+
? variantResult.results.reduce((sum, r) => sum + r.durationMs, 0) / variantResult.results.length
|
|
42
|
+
: 0;
|
|
43
|
+
let winner = 'tie';
|
|
44
|
+
if (variantPassRate > controlPassRate) {
|
|
45
|
+
winner = 'variant';
|
|
46
|
+
}
|
|
47
|
+
else if (controlPassRate > variantPassRate) {
|
|
48
|
+
winner = 'control';
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
experimentName: config.name,
|
|
52
|
+
control: controlResult,
|
|
53
|
+
variant: variantResult,
|
|
54
|
+
comparison: {
|
|
55
|
+
controlPassRate,
|
|
56
|
+
variantPassRate,
|
|
57
|
+
controlAvgDuration,
|
|
58
|
+
variantAvgDuration,
|
|
59
|
+
winner,
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Assign a session to control or variant based on traffic split.
|
|
65
|
+
*/
|
|
66
|
+
export function assignExperiment(deployment) {
|
|
67
|
+
const roll = Math.random() * 100;
|
|
68
|
+
return {
|
|
69
|
+
experimentId: deployment.id,
|
|
70
|
+
variant: roll < deployment.trafficPercent ? 'variant' : 'control',
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=experiment-runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"experiment-runner.js","sourceRoot":"","sources":["../../../src/eval/experiment-runner.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,EAAC,YAAY,EAAC,MAAM,kBAAkB,CAAC;AAE9C;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAgB,EAChB,MAAwB,EACxB,WAA8C;IAE9C,cAAc;IACd,MAAM,UAAU,GAAG,YAAY,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IACnD,IAAI,aAAa,CAAC;IAClB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;QACrC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC;YAC3B,MAAM;QACR,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,MAAM,UAAU,GAAG,YAAY,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IACnD,IAAI,aAAa,CAAC;IAClB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;QACrC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC;YAC3B,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,eAAe,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;QACtD,CAAC,CAAC,aAAa,CAAC,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM;QAC1D,CAAC,CAAC,CAAC,CAAC;IACN,MAAM,eAAe,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;QACtD,CAAC,CAAC,aAAa,CAAC,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM;QAC1D,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,kBAAkB,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;QACzD,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM;QAChG,CAAC,CAAC,CAAC,CAAC;IACN,MAAM,kBAAkB,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;QACzD,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM;QAChG,CAAC,CAAC,CAAC,CAAC;IAEN,IAAI,MAAM,GAAkC,KAAK,CAAC;IAClD,IAAI,eAAe,GAAG,eAAe,EAAE,CAAC;QACtC,MAAM,GAAG,SAAS,CAAC;IACrB,CAAC;SAAM,IAAI,eAAe,GAAG,eAAe,EAAE,CAAC;QAC7C,MAAM,GAAG,SAAS,CAAC;IACrB,CAAC;IAED,OAAO;QACL,cAAc,EAAE,MAAM,CAAC,IAAI;QAC3B,OAAO,EAAE,aAAa;QACtB,OAAO,EAAE,aAAa;QACtB,UAAU,EAAE;YACV,eAAe;YACf,eAAe;YACf,kBAAkB;YAClB,kBAAkB;YAClB,MAAM;SACP;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC9B,UAAgC;IAEhC,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC;IACjC,OAAO;QACL,YAAY,EAAE,UAAU,CAAC,EAAE;QAC3B,OAAO,EAAE,IAAI,GAAG,UAAU,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;KAClE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import type { EvalSuiteResult } from './eval-types.js';
|
|
7
|
+
/**
|
|
8
|
+
* A model config change to apply for the variant.
|
|
9
|
+
*/
|
|
10
|
+
export interface ExperimentChange {
|
|
11
|
+
path: string;
|
|
12
|
+
value: unknown;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Configuration for an A/B experiment.
|
|
16
|
+
*/
|
|
17
|
+
export interface ExperimentConfig {
|
|
18
|
+
name: string;
|
|
19
|
+
description?: string;
|
|
20
|
+
controlConfig: Record<string, unknown>;
|
|
21
|
+
variantConfig: Record<string, unknown>;
|
|
22
|
+
changes: ExperimentChange[];
|
|
23
|
+
trafficPercent: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Result of running an eval-based experiment.
|
|
27
|
+
*/
|
|
28
|
+
export interface ExperimentEvalResult {
|
|
29
|
+
experimentName: string;
|
|
30
|
+
control: EvalSuiteResult;
|
|
31
|
+
variant: EvalSuiteResult;
|
|
32
|
+
comparison: {
|
|
33
|
+
controlPassRate: number;
|
|
34
|
+
variantPassRate: number;
|
|
35
|
+
controlAvgDuration: number;
|
|
36
|
+
variantAvgDuration: number;
|
|
37
|
+
winner: 'control' | 'variant' | 'tie';
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* A deployed experiment with traffic splitting.
|
|
42
|
+
*/
|
|
43
|
+
export interface ExperimentDeployment {
|
|
44
|
+
id: string;
|
|
45
|
+
name: string;
|
|
46
|
+
controlConfig: Record<string, unknown>;
|
|
47
|
+
variantConfig: Record<string, unknown>;
|
|
48
|
+
trafficPercent: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Assignment for a session in an active experiment.
|
|
52
|
+
*/
|
|
53
|
+
export interface ExperimentAssignment {
|
|
54
|
+
experimentId: string;
|
|
55
|
+
variant: 'control' | 'variant';
|
|
56
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"experiment-types.js","sourceRoot":"","sources":["../../../src/eval/experiment-types.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
export * from './eval-types.js';
|
|
7
|
+
export { judgeAssertion, judgeAllAssertions } from './eval-judge.js';
|
|
8
|
+
export type { JudgeProvider } from './eval-judge.js';
|
|
9
|
+
export { runEvalSuite } from './eval-runner.js';
|
|
10
|
+
export type { EvalQueryProvider, EvalRunnerOptions } from './eval-runner.js';
|
|
11
|
+
export { diffEvalResults } from './eval-diff.js';
|
|
12
|
+
export { formatEvalTable, formatDiffTable, formatComparisonTable, formatEvalMarkdown } from './eval-formatter.js';
|
|
13
|
+
export { PlatformEvalClient } from './platform-eval-client.js';
|
|
14
|
+
export type { EvalRunSummary, PlatformEvalComparison } from './platform-eval-client.js';
|
|
15
|
+
export * from './experiment-types.js';
|
|
16
|
+
export { runExperiment, assignExperiment } from './experiment-runner.js';
|
|
17
|
+
export { MODEL_PRICING, getModelPricing, computeEvalCost, aggregateRunCost, formatCostMicros } from './eval-cost.js';
|
|
18
|
+
export { buildEvalRun, compareRuns, buildTrendPoints } from './eval-run-builder.js';
|
|
19
|
+
export { SessionEvalQueryProvider } from './eval-session-provider.js';
|
|
20
|
+
export type { SessionEvalProviderOptions } from './eval-session-provider.js';
|
|
21
|
+
export { runMultiModelEval } from './multi-model-runner.js';
|
|
22
|
+
export type { MultiModelProgress, MultiModelEvalOptions } from './multi-model-runner.js';
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
export * from './eval-types.js';
|
|
7
|
+
export { judgeAssertion, judgeAllAssertions } from './eval-judge.js';
|
|
8
|
+
export { runEvalSuite } from './eval-runner.js';
|
|
9
|
+
export { diffEvalResults } from './eval-diff.js';
|
|
10
|
+
export { formatEvalTable, formatDiffTable, formatComparisonTable, formatEvalMarkdown } from './eval-formatter.js';
|
|
11
|
+
export { PlatformEvalClient } from './platform-eval-client.js';
|
|
12
|
+
export * from './experiment-types.js';
|
|
13
|
+
export { runExperiment, assignExperiment } from './experiment-runner.js';
|
|
14
|
+
export { MODEL_PRICING, getModelPricing, computeEvalCost, aggregateRunCost, formatCostMicros } from './eval-cost.js';
|
|
15
|
+
export { buildEvalRun, compareRuns, buildTrendPoints } from './eval-run-builder.js';
|
|
16
|
+
export { SessionEvalQueryProvider } from './eval-session-provider.js';
|
|
17
|
+
export { runMultiModelEval } from './multi-model-runner.js';
|
|
18
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAC;AAChC,OAAO,EAAC,cAAc,EAAE,kBAAkB,EAAC,MAAM,iBAAiB,CAAC;AAEnE,OAAO,EAAC,YAAY,EAAC,MAAM,kBAAkB,CAAC;AAE9C,OAAO,EAAC,eAAe,EAAC,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAC,eAAe,EAAE,eAAe,EAAE,qBAAqB,EAAE,kBAAkB,EAAC,MAAM,qBAAqB,CAAC;AAChH,OAAO,EAAC,kBAAkB,EAAC,MAAM,2BAA2B,CAAC;AAE7D,cAAc,uBAAuB,CAAC;AACtC,OAAO,EAAC,aAAa,EAAE,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACvE,OAAO,EAAC,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,gBAAgB,EAAE,gBAAgB,EAAC,MAAM,gBAAgB,CAAC;AACnH,OAAO,EAAC,YAAY,EAAE,WAAW,EAAE,gBAAgB,EAAC,MAAM,uBAAuB,CAAC;AAClF,OAAO,EAAC,wBAAwB,EAAC,MAAM,4BAA4B,CAAC;AAEpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2026 Amodal Labs, Inc.
|
|
4
|
+
* SPDX-License-Identifier: MIT
|
|
5
|
+
*/
|
|
6
|
+
import type { AmodalRepo } from '../repo/repo-types.js';
|
|
7
|
+
import type { ModelConfig } from '../repo/config-schema.js';
|
|
8
|
+
import type { EvalRunRecord, EvalModelInfo } from './eval-types.js';
|
|
9
|
+
import type { JudgeProvider } from './eval-judge.js';
|
|
10
|
+
import type { LLMToolDefinition } from '../providers/runtime/runtime-provider-types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Progress event for multi-model eval runs.
|
|
13
|
+
*/
|
|
14
|
+
export interface MultiModelProgress {
|
|
15
|
+
type: 'model_start' | 'model_complete' | 'all_complete';
|
|
16
|
+
model?: EvalModelInfo;
|
|
17
|
+
currentModel?: number;
|
|
18
|
+
totalModels?: number;
|
|
19
|
+
passRate?: number;
|
|
20
|
+
costMicros?: number;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Options for multi-model eval runs.
|
|
24
|
+
*/
|
|
25
|
+
export interface MultiModelEvalOptions {
|
|
26
|
+
models: ModelConfig[];
|
|
27
|
+
judgeProvider: JudgeProvider;
|
|
28
|
+
orgId: string;
|
|
29
|
+
appId?: string;
|
|
30
|
+
gitSha?: string;
|
|
31
|
+
label?: string;
|
|
32
|
+
triggeredBy?: 'manual' | 'ci' | 'automation';
|
|
33
|
+
filter?: string;
|
|
34
|
+
systemPrompt?: string;
|
|
35
|
+
tools?: LLMToolDefinition[];
|
|
36
|
+
maxTokens?: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Run the same eval suite against multiple models.
|
|
40
|
+
* Yields progress events and returns an EvalRunRecord per model.
|
|
41
|
+
*/
|
|
42
|
+
export declare function runMultiModelEval(repo: AmodalRepo, options: MultiModelEvalOptions): AsyncGenerator<MultiModelProgress, EvalRunRecord[]>;
|