@agntk/core 0.3.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/advanced/index.d.ts +13 -22
- package/dist/advanced/index.d.ts.map +1 -1
- package/dist/advanced/index.js +10 -55
- package/dist/advanced/index.js.map +1 -1
- package/dist/agent.d.ts +2 -35
- package/dist/agent.d.ts.map +1 -1
- package/dist/agent.js +113 -111
- package/dist/agent.js.map +1 -1
- package/dist/config/defaults.d.ts +1 -5
- package/dist/config/defaults.d.ts.map +1 -1
- package/dist/config/defaults.js +1 -11
- package/dist/config/defaults.js.map +1 -1
- package/dist/config/index.d.ts +5 -1
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +7 -7
- package/dist/config/index.js.map +1 -1
- package/dist/config/loader.d.ts +0 -40
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +3 -80
- package/dist/config/loader.js.map +1 -1
- package/dist/config/schema.d.ts +117 -788
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +52 -63
- package/dist/config/schema.js.map +1 -1
- package/dist/constants.d.ts +0 -11
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +0 -17
- package/dist/constants.js.map +1 -1
- package/dist/evals/assertions.d.ts +0 -35
- package/dist/evals/assertions.d.ts.map +1 -1
- package/dist/evals/assertions.js +0 -51
- package/dist/evals/assertions.js.map +1 -1
- package/dist/evals/index.d.ts +0 -5
- package/dist/evals/index.d.ts.map +1 -1
- package/dist/evals/index.js +0 -5
- package/dist/evals/index.js.map +1 -1
- package/dist/evals/runner.d.ts +0 -27
- package/dist/evals/runner.d.ts.map +1 -1
- package/dist/evals/runner.js +3 -45
- package/dist/evals/runner.js.map +1 -1
- package/dist/evals/types.d.ts +0 -14
- package/dist/evals/types.d.ts.map +1 -1
- package/dist/evals/types.js +0 -3
- package/dist/evals/types.js.map +1 -1
- package/dist/guardrails/built-ins.d.ts +0 -28
- package/dist/guardrails/built-ins.d.ts.map +1 -1
- package/dist/guardrails/built-ins.js +21 -47
- package/dist/guardrails/built-ins.js.map +1 -1
- package/dist/guardrails/index.d.ts +1 -4
- package/dist/guardrails/index.d.ts.map +1 -1
- package/dist/guardrails/index.js +1 -4
- package/dist/guardrails/index.js.map +1 -1
- package/dist/guardrails/runner.d.ts +0 -30
- package/dist/guardrails/runner.d.ts.map +1 -1
- package/dist/guardrails/runner.js +1 -46
- package/dist/guardrails/runner.js.map +1 -1
- package/dist/guardrails/types.d.ts +0 -35
- package/dist/guardrails/types.d.ts.map +1 -1
- package/dist/guardrails/types.js +4 -7
- package/dist/guardrails/types.js.map +1 -1
- package/dist/index.d.ts +4 -12
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -31
- package/dist/index.js.map +1 -1
- package/dist/memory/loader.d.ts +0 -17
- package/dist/memory/loader.d.ts.map +1 -1
- package/dist/memory/loader.js +5 -27
- package/dist/memory/loader.js.map +1 -1
- package/dist/memory/store.d.ts +8 -16
- package/dist/memory/store.d.ts.map +1 -1
- package/dist/memory/store.js +101 -32
- package/dist/memory/store.js.map +1 -1
- package/dist/memory/types.d.ts +10 -30
- package/dist/memory/types.d.ts.map +1 -1
- package/dist/memory/types.js +0 -6
- package/dist/memory/types.js.map +1 -1
- package/dist/models.d.ts +9 -32
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +25 -68
- package/dist/models.js.map +1 -1
- package/dist/observability/index.d.ts +0 -3
- package/dist/observability/index.d.ts.map +1 -1
- package/dist/observability/index.js +0 -3
- package/dist/observability/index.js.map +1 -1
- package/dist/observability/langfuse.d.ts +0 -38
- package/dist/observability/langfuse.d.ts.map +1 -1
- package/dist/observability/langfuse.js +6 -55
- package/dist/observability/langfuse.js.map +1 -1
- package/dist/observability/types.d.ts +0 -21
- package/dist/observability/types.d.ts.map +1 -1
- package/dist/observability/types.js +0 -3
- package/dist/observability/types.js.map +1 -1
- package/dist/presets/tool-preset-registry.d.ts +0 -21
- package/dist/presets/tool-preset-registry.d.ts.map +1 -1
- package/dist/presets/tool-preset-registry.js +15 -34
- package/dist/presets/tool-preset-registry.js.map +1 -1
- package/dist/presets/tools.d.ts +41 -39
- package/dist/presets/tools.d.ts.map +1 -1
- package/dist/presets/tools.js +46 -40
- package/dist/presets/tools.js.map +1 -1
- package/dist/prompts/context.d.ts +0 -4
- package/dist/prompts/context.d.ts.map +1 -1
- package/dist/prompts/context.js +21 -33
- package/dist/prompts/context.js.map +1 -1
- package/dist/prompts/template.d.ts +0 -11
- package/dist/prompts/template.d.ts.map +1 -1
- package/dist/prompts/template.js +4 -14
- package/dist/prompts/template.js.map +1 -1
- package/dist/provider-resolver.d.ts +0 -29
- package/dist/provider-resolver.d.ts.map +1 -1
- package/dist/provider-resolver.js +17 -61
- package/dist/provider-resolver.js.map +1 -1
- package/dist/reflection.d.ts +0 -34
- package/dist/reflection.d.ts.map +1 -1
- package/dist/reflection.js +0 -40
- package/dist/reflection.js.map +1 -1
- package/dist/skills/index.d.ts +0 -3
- package/dist/skills/index.d.ts.map +1 -1
- package/dist/skills/index.js +0 -3
- package/dist/skills/index.js.map +1 -1
- package/dist/skills/loader.d.ts +0 -69
- package/dist/skills/loader.d.ts.map +1 -1
- package/dist/skills/loader.js +38 -130
- package/dist/skills/loader.js.map +1 -1
- package/dist/skills/types.d.ts +0 -36
- package/dist/skills/types.d.ts.map +1 -1
- package/dist/skills/types.js +0 -4
- package/dist/skills/types.js.map +1 -1
- package/dist/system-detect.d.ts +0 -57
- package/dist/system-detect.d.ts.map +1 -1
- package/dist/system-detect.js +9 -114
- package/dist/system-detect.js.map +1 -1
- package/dist/tools/approval.d.ts +0 -47
- package/dist/tools/approval.d.ts.map +1 -1
- package/dist/tools/approval.js +0 -46
- package/dist/tools/approval.js.map +1 -1
- package/dist/tools/ast-grep/cli.d.ts.map +1 -1
- package/dist/tools/ast-grep/cli.js +10 -4
- package/dist/tools/ast-grep/cli.js.map +1 -1
- package/dist/tools/ast-grep/constants.d.ts.map +1 -1
- package/dist/tools/ast-grep/constants.js +7 -14
- package/dist/tools/ast-grep/constants.js.map +1 -1
- package/dist/tools/ast-grep/downloader.d.ts.map +1 -1
- package/dist/tools/ast-grep/downloader.js +1 -4
- package/dist/tools/ast-grep/downloader.js.map +1 -1
- package/dist/tools/ast-grep/index.d.ts +2 -2
- package/dist/tools/ast-grep/index.d.ts.map +1 -1
- package/dist/tools/ast-grep/index.js +1 -1
- package/dist/tools/ast-grep/index.js.map +1 -1
- package/dist/tools/ast-grep/tools.d.ts +4 -4
- package/dist/tools/ast-grep/tools.d.ts.map +1 -1
- package/dist/tools/ast-grep/tools.js +2 -8
- package/dist/tools/ast-grep/tools.js.map +1 -1
- package/dist/tools/browser/index.d.ts +4 -4
- package/dist/tools/browser/index.d.ts.map +1 -1
- package/dist/tools/browser/index.js +2 -3
- package/dist/tools/browser/index.js.map +1 -1
- package/dist/tools/browser/stream.d.ts +0 -32
- package/dist/tools/browser/stream.d.ts.map +1 -1
- package/dist/tools/browser/stream.js +0 -53
- package/dist/tools/browser/stream.js.map +1 -1
- package/dist/tools/browser/tool.d.ts +10 -15
- package/dist/tools/browser/tool.d.ts.map +1 -1
- package/dist/tools/browser/tool.js +2 -26
- package/dist/tools/browser/tool.js.map +1 -1
- package/dist/tools/browser/types.d.ts +31 -156
- package/dist/tools/browser/types.d.ts.map +1 -1
- package/dist/tools/browser/types.js +22 -17
- package/dist/tools/browser/types.js.map +1 -1
- package/dist/tools/deep-reasoning/constants.d.ts +0 -6
- package/dist/tools/deep-reasoning/constants.d.ts.map +1 -1
- package/dist/tools/deep-reasoning/constants.js +0 -6
- package/dist/tools/deep-reasoning/constants.js.map +1 -1
- package/dist/tools/deep-reasoning/engine.d.ts.map +1 -1
- package/dist/tools/deep-reasoning/engine.js +0 -1
- package/dist/tools/deep-reasoning/engine.js.map +1 -1
- package/dist/tools/deep-reasoning/index.d.ts +4 -0
- package/dist/tools/deep-reasoning/index.d.ts.map +1 -1
- package/dist/tools/deep-reasoning/index.js +4 -3
- package/dist/tools/deep-reasoning/index.js.map +1 -1
- package/dist/tools/deep-reasoning/tools.d.ts.map +1 -1
- package/dist/tools/deep-reasoning/tools.js +8 -1
- package/dist/tools/deep-reasoning/tools.js.map +1 -1
- package/dist/tools/deep-reasoning/types.d.ts +5 -30
- package/dist/tools/deep-reasoning/types.d.ts.map +1 -1
- package/dist/tools/deep-reasoning/types.js +10 -1
- package/dist/tools/deep-reasoning/types.js.map +1 -1
- package/dist/tools/file/index.d.ts +3 -5
- package/dist/tools/file/index.d.ts.map +1 -1
- package/dist/tools/file/index.js +2 -4
- package/dist/tools/file/index.js.map +1 -1
- package/dist/tools/file/tools.d.ts +8 -11
- package/dist/tools/file/tools.d.ts.map +1 -1
- package/dist/tools/file/tools.js +45 -59
- package/dist/tools/file/tools.js.map +1 -1
- package/dist/tools/glob/cli.d.ts.map +1 -1
- package/dist/tools/glob/cli.js +8 -6
- package/dist/tools/glob/cli.js.map +1 -1
- package/dist/tools/glob/tools.d.ts.map +1 -1
- package/dist/tools/glob/tools.js +7 -10
- package/dist/tools/glob/tools.js.map +1 -1
- package/dist/tools/grep/cli.d.ts.map +1 -1
- package/dist/tools/grep/cli.js +0 -2
- package/dist/tools/grep/cli.js.map +1 -1
- package/dist/tools/grep/constants.d.ts.map +1 -1
- package/dist/tools/grep/constants.js +3 -5
- package/dist/tools/grep/constants.js.map +1 -1
- package/dist/tools/grep/downloader.d.ts.map +1 -1
- package/dist/tools/grep/downloader.js +3 -3
- package/dist/tools/grep/downloader.js.map +1 -1
- package/dist/tools/grep/tools.d.ts +2 -2
- package/dist/tools/grep/tools.d.ts.map +1 -1
- package/dist/tools/grep/tools.js +8 -14
- package/dist/tools/grep/tools.js.map +1 -1
- package/dist/tools/index.d.ts +10 -11
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +9 -22
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/model-retry.d.ts +0 -40
- package/dist/tools/model-retry.d.ts.map +1 -1
- package/dist/tools/model-retry.js +1 -49
- package/dist/tools/model-retry.js.map +1 -1
- package/dist/tools/plan/constants.d.ts +0 -6
- package/dist/tools/plan/constants.d.ts.map +1 -1
- package/dist/tools/plan/constants.js +9 -6
- package/dist/tools/plan/constants.js.map +1 -1
- package/dist/tools/plan/index.d.ts +1 -1
- package/dist/tools/plan/index.d.ts.map +1 -1
- package/dist/tools/plan/index.js +1 -4
- package/dist/tools/plan/index.js.map +1 -1
- package/dist/tools/plan/tools.d.ts +2 -2
- package/dist/tools/plan/tools.d.ts.map +1 -1
- package/dist/tools/plan/tools.js +10 -11
- package/dist/tools/plan/tools.js.map +1 -1
- package/dist/tools/plan/types.d.ts +22 -32
- package/dist/tools/plan/types.d.ts.map +1 -1
- package/dist/tools/plan/types.js +11 -3
- package/dist/tools/plan/types.js.map +1 -1
- package/dist/tools/progress/index.d.ts +3 -22
- package/dist/tools/progress/index.d.ts.map +1 -1
- package/dist/tools/progress/index.js +19 -34
- package/dist/tools/progress/index.js.map +1 -1
- package/dist/tools/search-skills.d.ts +0 -23
- package/dist/tools/search-skills.d.ts.map +1 -1
- package/dist/tools/search-skills.js +3 -32
- package/dist/tools/search-skills.js.map +1 -1
- package/dist/tools/shell/background.d.ts +2 -18
- package/dist/tools/shell/background.d.ts.map +1 -1
- package/dist/tools/shell/background.js +45 -48
- package/dist/tools/shell/background.js.map +1 -1
- package/dist/tools/shell/constants.d.ts +0 -3
- package/dist/tools/shell/constants.d.ts.map +1 -1
- package/dist/tools/shell/constants.js +16 -6
- package/dist/tools/shell/constants.js.map +1 -1
- package/dist/tools/shell/index.d.ts +1 -1
- package/dist/tools/shell/index.d.ts.map +1 -1
- package/dist/tools/shell/index.js +1 -1
- package/dist/tools/shell/index.js.map +1 -1
- package/dist/tools/shell/tools.d.ts +3 -3
- package/dist/tools/shell/tools.d.ts.map +1 -1
- package/dist/tools/shell/tools.js +0 -2
- package/dist/tools/shell/tools.js.map +1 -1
- package/dist/tools/shell/types.d.ts +5 -13
- package/dist/tools/shell/types.d.ts.map +1 -1
- package/dist/tools/shell/types.js +10 -1
- package/dist/tools/shell/types.js.map +1 -1
- package/dist/tools/spawn-agent/check-agent.d.ts +45 -0
- package/dist/tools/spawn-agent/check-agent.d.ts.map +1 -0
- package/dist/tools/spawn-agent/check-agent.js +84 -0
- package/dist/tools/spawn-agent/check-agent.js.map +1 -0
- package/dist/tools/spawn-agent/index.d.ts +44 -51
- package/dist/tools/spawn-agent/index.d.ts.map +1 -1
- package/dist/tools/spawn-agent/index.js +196 -84
- package/dist/tools/spawn-agent/index.js.map +1 -1
- package/dist/tools/spawn-agent/registry.d.ts +36 -0
- package/dist/tools/spawn-agent/registry.d.ts.map +1 -0
- package/dist/tools/spawn-agent/registry.js +88 -0
- package/dist/tools/spawn-agent/registry.js.map +1 -0
- package/dist/tools/utils/errors.d.ts +2 -1
- package/dist/tools/utils/errors.d.ts.map +1 -1
- package/dist/tools/utils/errors.js +2 -1
- package/dist/tools/utils/errors.js.map +1 -1
- package/dist/tools/utils/shell.d.ts +0 -11
- package/dist/tools/utils/shell.d.ts.map +1 -1
- package/dist/tools/utils/shell.js +10 -78
- package/dist/tools/utils/shell.js.map +1 -1
- package/dist/tools/utils/tool-result.d.ts +0 -3
- package/dist/tools/utils/tool-result.d.ts.map +1 -1
- package/dist/tools/utils/tool-result.js +4 -4
- package/dist/tools/utils/tool-result.js.map +1 -1
- package/dist/tools/web-search/index.d.ts +4 -0
- package/dist/tools/web-search/index.d.ts.map +1 -0
- package/dist/tools/web-search/index.js +3 -0
- package/dist/tools/web-search/index.js.map +1 -0
- package/dist/tools/web-search/tools.d.ts +17 -0
- package/dist/tools/web-search/tools.d.ts.map +1 -0
- package/dist/tools/web-search/tools.js +81 -0
- package/dist/tools/web-search/tools.js.map +1 -0
- package/dist/tools/web-search/types.d.ts +16 -0
- package/dist/tools/web-search/types.d.ts.map +1 -0
- package/dist/tools/web-search/types.js +29 -0
- package/dist/tools/web-search/types.js.map +1 -0
- package/dist/tools/workspace-middleware.d.ts +14 -0
- package/dist/tools/workspace-middleware.d.ts.map +1 -0
- package/dist/tools/workspace-middleware.js +97 -0
- package/dist/tools/workspace-middleware.js.map +1 -0
- package/dist/types/agent.d.ts +1 -66
- package/dist/types/agent.d.ts.map +1 -1
- package/dist/types/agent.js +0 -6
- package/dist/types/agent.js.map +1 -1
- package/dist/types/index.d.ts +4 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +3 -3
- package/dist/types/index.js.map +1 -1
- package/dist/types/lifecycle.d.ts +0 -141
- package/dist/types/lifecycle.d.ts.map +1 -1
- package/dist/types/lifecycle.js +4 -54
- package/dist/types/lifecycle.js.map +1 -1
- package/dist/types/streaming.d.ts +0 -4
- package/dist/types/streaming.d.ts.map +1 -1
- package/dist/types/streaming.js +0 -4
- package/dist/types/streaming.js.map +1 -1
- package/dist/usage-limits.d.ts +0 -37
- package/dist/usage-limits.d.ts.map +1 -1
- package/dist/usage-limits.js +1 -40
- package/dist/usage-limits.js.map +1 -1
- package/dist/workflow/durable-tool.d.ts +0 -84
- package/dist/workflow/durable-tool.d.ts.map +1 -1
- package/dist/workflow/durable-tool.js +2 -104
- package/dist/workflow/durable-tool.js.map +1 -1
- package/dist/workflow/hooks.d.ts +0 -215
- package/dist/workflow/hooks.d.ts.map +1 -1
- package/dist/workflow/hooks.js +15 -221
- package/dist/workflow/hooks.js.map +1 -1
- package/dist/workflow/index.d.ts +0 -9
- package/dist/workflow/index.d.ts.map +1 -1
- package/dist/workflow/index.js +0 -12
- package/dist/workflow/index.js.map +1 -1
- package/dist/workflow/utils.d.ts +0 -24
- package/dist/workflow/utils.d.ts.map +1 -1
- package/dist/workflow/utils.js +1 -32
- package/dist/workflow/utils.js.map +1 -1
- package/dist/wrappers/best-of-n.d.ts +0 -35
- package/dist/wrappers/best-of-n.d.ts.map +1 -1
- package/dist/wrappers/best-of-n.js +4 -53
- package/dist/wrappers/best-of-n.js.map +1 -1
- package/dist/wrappers/refine-loop.d.ts +36 -0
- package/dist/wrappers/refine-loop.d.ts.map +1 -0
- package/dist/wrappers/refine-loop.js +135 -0
- package/dist/wrappers/refine-loop.js.map +1 -0
- package/package.json +14 -8
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/config/schema.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/config/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,eAAe;;;;;EAAwD,CAAC;AACrF,MAAM,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,eAAe,CAAC,CAAC;AAExD,eAAO,MAAM,cAAc;;;;;;EAAuE,CAAC;AACnG,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAEtD,eAAO,MAAM,oBAAoB;;;;;iBAQ/B,CAAC;AAEH,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAElE,eAAO,MAAM,kBAAkB;;;;;;;;;;;;;;;;kBAUlB,CAAC;AAEd,MAAM,MAAM,YAAY,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,kBAAkB,CAAC,CAAC;AAE9D,eAAO,MAAM,gBAAgB;;;;;;;;;iBAM3B,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gBAAgB,CAAC,CAAC;AAE1D,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAgG5B,CAAC;AAEH,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAE5D,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAA8B,CAAC;AACpE,MAAM,MAAM,kBAAkB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,wBAAwB,CAAC,CAAC"}
|
package/dist/config/schema.js
CHANGED
|
@@ -1,119 +1,108 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview Configuration schema definitions.
|
|
3
|
-
*/
|
|
4
1
|
import { z } from 'zod';
|
|
5
|
-
// ============================================================================
|
|
6
|
-
// Model Configuration
|
|
7
|
-
// ============================================================================
|
|
8
2
|
export const ModelTierSchema = z.enum(['fast', 'standard', 'reasoning', 'powerful']);
|
|
9
3
|
export const ProviderSchema = z.enum(['openrouter', 'ollama', 'openai', 'cerebras', 'agntk-free']);
|
|
10
4
|
export const CustomProviderSchema = z.object({
|
|
11
|
-
/** Base URL for the provider's OpenAI-compatible API */
|
|
12
5
|
baseURL: z.string(),
|
|
13
|
-
/** Environment variable name containing the API key */
|
|
14
6
|
apiKeyEnv: z.string(),
|
|
15
|
-
/** Optional custom headers to include in requests */
|
|
16
7
|
headers: z.record(z.string(), z.string()).optional(),
|
|
17
|
-
/** Optional tier mappings for this provider */
|
|
18
8
|
tiers: z.record(z.string(), z.string()).optional(),
|
|
19
9
|
});
|
|
20
|
-
export const ModelsConfigSchema = z
|
|
21
|
-
|
|
10
|
+
export const ModelsConfigSchema = z
|
|
11
|
+
.object({
|
|
22
12
|
defaultProvider: ProviderSchema.optional(),
|
|
23
|
-
/** Override model for each tier (uses default provider) */
|
|
24
13
|
tiers: z.record(z.string(), z.string()).optional(),
|
|
25
|
-
/** Per-provider tier mappings */
|
|
26
14
|
providers: z.record(z.string(), z.record(z.string(), z.string())).optional(),
|
|
27
|
-
/** Custom providers with baseURL and apiKeyEnv */
|
|
28
15
|
customProviders: z.record(z.string(), CustomProviderSchema).optional(),
|
|
29
|
-
})
|
|
30
|
-
|
|
31
|
-
// Role Configuration
|
|
32
|
-
// ============================================================================
|
|
16
|
+
})
|
|
17
|
+
.optional();
|
|
33
18
|
export const RoleConfigSchema = z.object({
|
|
34
|
-
/** System prompt for this role (optional if overriding built-in) */
|
|
35
19
|
systemPrompt: z.string().optional(),
|
|
36
|
-
/** Recommended model tier */
|
|
37
20
|
recommendedModel: ModelTierSchema.optional(),
|
|
38
|
-
/** Default tools to include */
|
|
39
21
|
defaultTools: z.array(z.string()).optional(),
|
|
40
22
|
});
|
|
41
|
-
// ============================================================================
|
|
42
|
-
// Full Agent Configuration
|
|
43
|
-
// ============================================================================
|
|
44
23
|
export const AgentConfigSchema = z.object({
|
|
45
|
-
/** Model configuration */
|
|
46
24
|
models: ModelsConfigSchema,
|
|
47
|
-
/** Custom role definitions */
|
|
48
25
|
roles: z.record(z.string(), RoleConfigSchema).optional(),
|
|
49
|
-
|
|
50
|
-
|
|
26
|
+
toolPresets: z
|
|
27
|
+
.record(z.string(), z.object({
|
|
51
28
|
include: z.array(z.string()).optional(),
|
|
52
29
|
exclude: z.array(z.string()).optional(),
|
|
53
30
|
description: z.string().optional(),
|
|
54
|
-
}))
|
|
55
|
-
|
|
56
|
-
templates: z
|
|
31
|
+
}))
|
|
32
|
+
.optional(),
|
|
33
|
+
templates: z
|
|
34
|
+
.object({
|
|
57
35
|
variables: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
|
|
58
|
-
})
|
|
59
|
-
|
|
60
|
-
memory: z
|
|
36
|
+
})
|
|
37
|
+
.optional(),
|
|
38
|
+
memory: z
|
|
39
|
+
.object({
|
|
61
40
|
adapter: z.enum(['vectra']).optional(),
|
|
62
41
|
path: z.string().optional(),
|
|
63
42
|
embedModel: z.string().optional(),
|
|
64
43
|
topK: z.number().optional(),
|
|
65
44
|
similarityThreshold: z.number().optional(),
|
|
66
|
-
})
|
|
67
|
-
|
|
68
|
-
tools: z
|
|
69
|
-
|
|
45
|
+
})
|
|
46
|
+
.optional(),
|
|
47
|
+
tools: z
|
|
48
|
+
.object({
|
|
49
|
+
shell: z
|
|
50
|
+
.object({
|
|
70
51
|
timeout: z.number().optional(),
|
|
71
52
|
maxTimeout: z.number().optional(),
|
|
72
53
|
maxCommandLength: z.number().optional(),
|
|
73
|
-
})
|
|
74
|
-
|
|
54
|
+
})
|
|
55
|
+
.optional(),
|
|
56
|
+
glob: z
|
|
57
|
+
.object({
|
|
75
58
|
timeout: z.number().optional(),
|
|
76
59
|
maxFiles: z.number().optional(),
|
|
77
60
|
maxDepth: z.number().optional(),
|
|
78
61
|
maxOutputBytes: z.number().optional(),
|
|
79
|
-
})
|
|
80
|
-
|
|
62
|
+
})
|
|
63
|
+
.optional(),
|
|
64
|
+
grep: z
|
|
65
|
+
.object({
|
|
81
66
|
timeout: z.number().optional(),
|
|
82
67
|
maxContext: z.number().optional(),
|
|
83
68
|
maxOutputBytes: z.number().optional(),
|
|
84
|
-
})
|
|
85
|
-
|
|
69
|
+
})
|
|
70
|
+
.optional(),
|
|
71
|
+
plan: z
|
|
72
|
+
.object({
|
|
86
73
|
maxSteps: z.number().optional(),
|
|
87
74
|
delegationThreshold: z.number().optional(),
|
|
88
|
-
})
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
75
|
+
})
|
|
76
|
+
.optional(),
|
|
77
|
+
})
|
|
78
|
+
.optional(),
|
|
79
|
+
server: z
|
|
80
|
+
.object({
|
|
92
81
|
port: z.number().optional(),
|
|
93
82
|
host: z.string().optional(),
|
|
94
|
-
})
|
|
95
|
-
|
|
96
|
-
client: z
|
|
83
|
+
})
|
|
84
|
+
.optional(),
|
|
85
|
+
client: z
|
|
86
|
+
.object({
|
|
97
87
|
timeout: z.number().optional(),
|
|
98
88
|
retries: z.number().optional(),
|
|
99
|
-
websocket: z
|
|
89
|
+
websocket: z
|
|
90
|
+
.object({
|
|
100
91
|
reconnectDelay: z.number().optional(),
|
|
101
92
|
maxReconnects: z.number().optional(),
|
|
102
|
-
})
|
|
103
|
-
|
|
104
|
-
|
|
93
|
+
})
|
|
94
|
+
.optional(),
|
|
95
|
+
})
|
|
96
|
+
.optional(),
|
|
105
97
|
workspaceRoot: z.string().optional(),
|
|
106
|
-
/** Default max steps */
|
|
107
98
|
maxSteps: z.number().optional(),
|
|
108
|
-
|
|
109
|
-
|
|
99
|
+
debug: z
|
|
100
|
+
.object({
|
|
110
101
|
enabled: z.boolean().optional(),
|
|
111
102
|
level: z.enum(['error', 'warn', 'info', 'debug', 'trace']).optional(),
|
|
112
103
|
file: z.string().optional(),
|
|
113
|
-
})
|
|
104
|
+
})
|
|
105
|
+
.optional(),
|
|
114
106
|
});
|
|
115
|
-
// ============================================================================
|
|
116
|
-
// Partial Config (for merging)
|
|
117
|
-
// ============================================================================
|
|
118
107
|
export const PartialAgentConfigSchema = AgentConfigSchema.partial();
|
|
119
108
|
//# sourceMappingURL=schema.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/config/schema.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/config/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC,CAAC;AAGrF,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC,CAAC;AAGnG,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;IAEnB,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;IAErB,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAEpD,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CACnD,CAAC,CAAC;AAIH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC;KAChC,MAAM,CAAC;IACN,eAAe,EAAE,cAAc,CAAC,QAAQ,EAAE;IAE1C,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAElD,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE;IAE5E,eAAe,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,oBAAoB,CAAC,CAAC,QAAQ,EAAE;CACvE,CAAC;KACD,QAAQ,EAAE,CAAC;AAId,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IACvC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAEnC,gBAAgB,EAAE,eAAe,CAAC,QAAQ,EAAE;IAE5C,YAAY,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CAC7C,CAAC,CAAC;AAIH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,MAAM,EAAE,kBAAkB;IAE1B,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,gBAAgB,CAAC,CAAC,QAAQ,EAAE;IAExD,WAAW,EAAE,CAAC;SACX,MAAM,CACL,CAAC,CAAC,MAAM,EAAE,EACV,CAAC,CAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;QACvC,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;QACvC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KACnC,CAAC,CACH;SACA,QAAQ,EAAE;IAEb,SAAS,EAAE,CAAC;SACT,MAAM,CAAC;QACN,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;KAC3F,CAAC;SACD,QAAQ,EAAE;IAEb,MAAM,EAAE,CAAC;SACN,MAAM,CAAC;QACN,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,EAAE;QACtC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC3B,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QACjC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC3B,mBAAmB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC3C,CAAC;SACD,QAAQ,EAAE;IAEb,KAAK,EAAE,CAAC;SACL,MAAM,CAAC;QACN,KAAK,EAAE,CAAC;aACL,MAAM,CAAC;YACN,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YAC9B,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YACjC,gBAAgB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;SACxC,CAAC;aACD,QAAQ,EAAE;QACb,IAAI,EAAE,CAAC;aACJ,MAAM,CAAC;YACN,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YAC9B,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YAC/B,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YAC/B,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;SACtC,CAAC;aACD,QAAQ,EAAE;QACb,IAAI,EAAE,CAAC;aACJ,MAAM,CAAC;YACN,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YAC9B,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YACjC,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;SACtC,CAAC;aACD,QAAQ,EAAE;QACb,IAAI,EAAE,CAAC;aACJ,MAAM,CAAC;YACN,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YAC/B,mBAAmB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;SAC3C,CAAC;aACD,QAAQ,EAAE;KACd,CAAC;SACD,QAAQ,EAAE;IAEb,MAAM,EAAE,CAAC;SACN,MAAM,CAAC;QACN,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC3B,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC5B,CAAC;SACD,QAAQ,EAAE;IAEb,MAAM,EAAE,CAAC;SACN,MAAM,CAAC;QACN,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC9B,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC9B,SAAS,EAAE,CAAC;aACT,MAAM,CAAC;YACN,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;YACrC,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;SACrC,CAAC;aACD,QAAQ,EAAE;KACd,CAAC;SACD,QAAQ,EAAE;IAEb,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAEpC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAE/B,KAAK,EAAE,CAAC;SACL,MAAM,CAAC;QACN,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;QAC/B,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,EAAE;QACrE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC5B,CAAC;SACD,QAAQ,EAAE;CACd,CAAC,CAAC;AAIH,MAAM,CAAC,MAAM,wBAAwB,GAAG,iBAAiB,CAAC,OAAO,EAAE,CAAC"}
|
package/dist/constants.d.ts
CHANGED
|
@@ -1,17 +1,6 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - SDK-wide Constants
|
|
3
|
-
*
|
|
4
|
-
* Centralized constants for URLs, defaults, and magic numbers.
|
|
5
|
-
* All hardcoded values should live here for easy configuration.
|
|
6
|
-
*/
|
|
7
|
-
/** Geolocation API endpoint (HTTPS) */
|
|
8
1
|
export declare const GEOLOCATION_API_URL = "https://ip-api.com/json/?fields=city,regionName,country,countryCode,timezone";
|
|
9
|
-
/** Whether geolocation fetch is enabled by default */
|
|
10
2
|
export declare const GEOLOCATION_ENABLED_DEFAULT = false;
|
|
11
|
-
/** Default browser command timeout (ms) */
|
|
12
3
|
export declare const BROWSER_DEFAULT_TIMEOUT = 30000;
|
|
13
|
-
/** Max output length from browser CLI (chars) */
|
|
14
4
|
export declare const BROWSER_MAX_OUTPUT_LENGTH = 50000;
|
|
15
|
-
/** Max buffer size for browser CLI output (bytes) */
|
|
16
5
|
export declare const BROWSER_MAX_BUFFER: number;
|
|
17
6
|
//# sourceMappingURL=constants.d.ts.map
|
package/dist/constants.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,mBAAmB,iFACgD,CAAC;AAEjF,eAAO,MAAM,2BAA2B,QAAQ,CAAC;AAEjD,eAAO,MAAM,uBAAuB,QAAS,CAAC;AAE9C,eAAO,MAAM,yBAAyB,QAAS,CAAC;AAEhD,eAAO,MAAM,kBAAkB,QAAkB,CAAC"}
|
package/dist/constants.js
CHANGED
|
@@ -1,23 +1,6 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - SDK-wide Constants
|
|
3
|
-
*
|
|
4
|
-
* Centralized constants for URLs, defaults, and magic numbers.
|
|
5
|
-
* All hardcoded values should live here for easy configuration.
|
|
6
|
-
*/
|
|
7
|
-
// ============================================================================
|
|
8
|
-
// Geolocation
|
|
9
|
-
// ============================================================================
|
|
10
|
-
/** Geolocation API endpoint (HTTPS) */
|
|
11
1
|
export const GEOLOCATION_API_URL = 'https://ip-api.com/json/?fields=city,regionName,country,countryCode,timezone';
|
|
12
|
-
/** Whether geolocation fetch is enabled by default */
|
|
13
2
|
export const GEOLOCATION_ENABLED_DEFAULT = false;
|
|
14
|
-
// ============================================================================
|
|
15
|
-
// Browser Tool
|
|
16
|
-
// ============================================================================
|
|
17
|
-
/** Default browser command timeout (ms) */
|
|
18
3
|
export const BROWSER_DEFAULT_TIMEOUT = 30_000;
|
|
19
|
-
/** Max output length from browser CLI (chars) */
|
|
20
4
|
export const BROWSER_MAX_OUTPUT_LENGTH = 50_000;
|
|
21
|
-
/** Max buffer size for browser CLI output (bytes) */
|
|
22
5
|
export const BROWSER_MAX_BUFFER = 5 * 1024 * 1024;
|
|
23
6
|
//# sourceMappingURL=constants.js.map
|
package/dist/constants.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,mBAAmB,GAC9B,8EAA8E,CAAC;AAEjF,MAAM,CAAC,MAAM,2BAA2B,GAAG,KAAK,CAAC;AAEjD,MAAM,CAAC,MAAM,uBAAuB,GAAG,MAAM,CAAC;AAE9C,MAAM,CAAC,MAAM,yBAAyB,GAAG,MAAM,CAAC;AAEhD,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC"}
|
|
@@ -1,52 +1,17 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - Eval Assertions Library
|
|
3
|
-
*
|
|
4
|
-
* Built-in assertions for agent eval suites.
|
|
5
|
-
*/
|
|
6
1
|
import type { LanguageModel } from 'ai';
|
|
7
2
|
import type { Assertion, EvalAgentResult } from './types.js';
|
|
8
|
-
/**
|
|
9
|
-
* Assert that a specific tool was called at least once.
|
|
10
|
-
*/
|
|
11
3
|
export declare function toolCalled(toolName: string): Assertion;
|
|
12
|
-
/**
|
|
13
|
-
* Assert that a specific tool was NOT called.
|
|
14
|
-
*/
|
|
15
4
|
export declare function noToolCalled(toolName: string): Assertion;
|
|
16
|
-
/**
|
|
17
|
-
* Assert that a tool was called a specific number of times.
|
|
18
|
-
*/
|
|
19
5
|
export declare function toolCalledTimes(toolName: string, expectedCount: number): Assertion;
|
|
20
|
-
/**
|
|
21
|
-
* Assert that the output matches a regex pattern.
|
|
22
|
-
*/
|
|
23
6
|
export declare function outputMatches(pattern: RegExp): Assertion;
|
|
24
|
-
/**
|
|
25
|
-
* Assert that the output contains a specific string.
|
|
26
|
-
*/
|
|
27
7
|
export declare function outputContains(text: string): Assertion;
|
|
28
|
-
/**
|
|
29
|
-
* Assert the number of steps is within a range.
|
|
30
|
-
*/
|
|
31
8
|
export declare function stepCount(min: number, max?: number): Assertion;
|
|
32
|
-
/**
|
|
33
|
-
* Assert total token usage is within a budget.
|
|
34
|
-
*/
|
|
35
9
|
export declare function tokenUsage(maxTokens: number): Assertion;
|
|
36
|
-
/**
|
|
37
|
-
* Assert output quality using an LLM judge.
|
|
38
|
-
*
|
|
39
|
-
* The judge model receives the prompt, output, and criteria, then
|
|
40
|
-
* returns PASS/FAIL with reasoning.
|
|
41
|
-
*/
|
|
42
10
|
export declare function llmJudge(options: {
|
|
43
11
|
model: LanguageModel;
|
|
44
12
|
criteria: string;
|
|
45
13
|
name?: string;
|
|
46
14
|
}): Assertion;
|
|
47
|
-
/**
|
|
48
|
-
* Create a custom assertion from a function.
|
|
49
|
-
*/
|
|
50
15
|
export declare function custom(name: string, checkFn: (result: EvalAgentResult) => boolean | {
|
|
51
16
|
passed: boolean;
|
|
52
17
|
message?: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/evals/assertions.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/evals/assertions.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACxC,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1D,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,CActD;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAcxD;AAED,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,GAAG,SAAS,CAkBlF;AAED,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,CAYxD;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAYtD;AAED,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,SAAS,CAc9D;AAED,wBAAgB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAavD;AAED,wBAAgB,QAAQ,CAAC,OAAO,EAAE;IAChC,KAAK,EAAE,aAAa,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,SAAS,CAoCZ;AAED,wBAAgB,MAAM,CACpB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,CAAC,MAAM,EAAE,eAAe,KAAK,OAAO,GAAG;IAAE,MAAM,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GACpF,SAAS,CAWX"}
|
package/dist/evals/assertions.js
CHANGED
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - Eval Assertions Library
|
|
3
|
-
*
|
|
4
|
-
* Built-in assertions for agent eval suites.
|
|
5
|
-
*/
|
|
6
|
-
// ============================================================================
|
|
7
|
-
// Tool Assertions
|
|
8
|
-
// ============================================================================
|
|
9
|
-
/**
|
|
10
|
-
* Assert that a specific tool was called at least once.
|
|
11
|
-
*/
|
|
12
1
|
export function toolCalled(toolName) {
|
|
13
2
|
return {
|
|
14
3
|
name: `toolCalled(${toolName})`,
|
|
@@ -22,9 +11,6 @@ export function toolCalled(toolName) {
|
|
|
22
11
|
},
|
|
23
12
|
};
|
|
24
13
|
}
|
|
25
|
-
/**
|
|
26
|
-
* Assert that a specific tool was NOT called.
|
|
27
|
-
*/
|
|
28
14
|
export function noToolCalled(toolName) {
|
|
29
15
|
return {
|
|
30
16
|
name: `noToolCalled(${toolName})`,
|
|
@@ -38,9 +24,6 @@ export function noToolCalled(toolName) {
|
|
|
38
24
|
},
|
|
39
25
|
};
|
|
40
26
|
}
|
|
41
|
-
/**
|
|
42
|
-
* Assert that a tool was called a specific number of times.
|
|
43
|
-
*/
|
|
44
27
|
export function toolCalledTimes(toolName, expectedCount) {
|
|
45
28
|
return {
|
|
46
29
|
name: `toolCalledTimes(${toolName}, ${expectedCount})`,
|
|
@@ -59,12 +42,6 @@ export function toolCalledTimes(toolName, expectedCount) {
|
|
|
59
42
|
},
|
|
60
43
|
};
|
|
61
44
|
}
|
|
62
|
-
// ============================================================================
|
|
63
|
-
// Output Assertions
|
|
64
|
-
// ============================================================================
|
|
65
|
-
/**
|
|
66
|
-
* Assert that the output matches a regex pattern.
|
|
67
|
-
*/
|
|
68
45
|
export function outputMatches(pattern) {
|
|
69
46
|
return {
|
|
70
47
|
name: `outputMatches(${pattern})`,
|
|
@@ -78,9 +55,6 @@ export function outputMatches(pattern) {
|
|
|
78
55
|
},
|
|
79
56
|
};
|
|
80
57
|
}
|
|
81
|
-
/**
|
|
82
|
-
* Assert that the output contains a specific string.
|
|
83
|
-
*/
|
|
84
58
|
export function outputContains(text) {
|
|
85
59
|
return {
|
|
86
60
|
name: `outputContains("${text.slice(0, 30)}")`,
|
|
@@ -94,12 +68,6 @@ export function outputContains(text) {
|
|
|
94
68
|
},
|
|
95
69
|
};
|
|
96
70
|
}
|
|
97
|
-
// ============================================================================
|
|
98
|
-
// Step / Usage Assertions
|
|
99
|
-
// ============================================================================
|
|
100
|
-
/**
|
|
101
|
-
* Assert the number of steps is within a range.
|
|
102
|
-
*/
|
|
103
71
|
export function stepCount(min, max) {
|
|
104
72
|
const desc = max !== undefined ? `stepCount(${min}-${max})` : `stepCount(>=${min})`;
|
|
105
73
|
return {
|
|
@@ -115,9 +83,6 @@ export function stepCount(min, max) {
|
|
|
115
83
|
},
|
|
116
84
|
};
|
|
117
85
|
}
|
|
118
|
-
/**
|
|
119
|
-
* Assert total token usage is within a budget.
|
|
120
|
-
*/
|
|
121
86
|
export function tokenUsage(maxTokens) {
|
|
122
87
|
return {
|
|
123
88
|
name: `tokenUsage(<=${maxTokens})`,
|
|
@@ -132,22 +97,12 @@ export function tokenUsage(maxTokens) {
|
|
|
132
97
|
},
|
|
133
98
|
};
|
|
134
99
|
}
|
|
135
|
-
// ============================================================================
|
|
136
|
-
// LLM Judge
|
|
137
|
-
// ============================================================================
|
|
138
|
-
/**
|
|
139
|
-
* Assert output quality using an LLM judge.
|
|
140
|
-
*
|
|
141
|
-
* The judge model receives the prompt, output, and criteria, then
|
|
142
|
-
* returns PASS/FAIL with reasoning.
|
|
143
|
-
*/
|
|
144
100
|
export function llmJudge(options) {
|
|
145
101
|
const { model, criteria, name: assertionName } = options;
|
|
146
102
|
return {
|
|
147
103
|
name: assertionName ?? `llmJudge("${criteria.slice(0, 30)}")`,
|
|
148
104
|
check: async (result) => {
|
|
149
105
|
try {
|
|
150
|
-
// Dynamic import to avoid hard dependency on ai's generateText
|
|
151
106
|
const { generateText } = await import('ai');
|
|
152
107
|
const judgeResult = await generateText({
|
|
153
108
|
model,
|
|
@@ -179,12 +134,6 @@ Respond with EXACTLY one line: "PASS" or "FAIL: <reason>"`,
|
|
|
179
134
|
},
|
|
180
135
|
};
|
|
181
136
|
}
|
|
182
|
-
// ============================================================================
|
|
183
|
-
// Custom Assertion
|
|
184
|
-
// ============================================================================
|
|
185
|
-
/**
|
|
186
|
-
* Create a custom assertion from a function.
|
|
187
|
-
*/
|
|
188
137
|
export function custom(name, checkFn) {
|
|
189
138
|
return {
|
|
190
139
|
name,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../../src/evals/assertions.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../../src/evals/assertions.ts"],"names":[],"mappings":"AAGA,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,OAAO;QACL,IAAI,EAAE,cAAc,QAAQ,GAAG;QAC/B,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CACxC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,KAAK,QAAQ,CAAC,CACvD,CAAC;YACF,OAAO;gBACL,IAAI,EAAE,cAAc,QAAQ,GAAG;gBAC/B,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,QAAQ,kBAAkB;aAClE,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,QAAgB;IAC3C,OAAO;QACL,IAAI,EAAE,gBAAgB,QAAQ,GAAG;QACjC,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CACxC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,KAAK,QAAQ,CAAC,CACvD,CAAC;YACF,OAAO;gBACL,IAAI,EAAE,gBAAgB,QAAQ,GAAG;gBACjC,MAAM,EAAE,CAAC,MAAM;gBACf,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,QAAQ,2BAA2B,CAAC,CAAC,CAAC,SAAS;aAC3E,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,QAAgB,EAAE,aAAqB;IACrE,OAAO;QACL,IAAI,EAAE,mBAAmB,QAAQ,KAAK,aAAa,GAAG;QACtD,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,IAAI,KAAK,GAAG,CAAC,CAAC;YACd,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBAChC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBACnB,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;gBAC1E,CAAC;YACH,CAAC;YACD,OAAO;gBACL,IAAI,EAAE,mBAAmB,QAAQ,KAAK,aAAa,GAAG;gBACtD,MAAM,EAAE,KAAK,KAAK,aAAa;gBAC/B,OAAO,EACL,KAAK,KAAK,aAAa,CAAC,CAAC,CAAC,YAAY,aAAa,eAAe,KAAK,EAAE,CAAC,CAAC,CAAC,SAAS;aACxF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO;QACL,IAAI,EAAE,iBAAiB,OAAO,GAAG;QACjC,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC1C,OAAO;gBACL,IAAI,EAAE,iBAAiB,OAAO,GAAG;gBACjC,MAAM,EAAE,OAAO;gBACf,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,wBAAwB,OAAO,EAAE;aACjE,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO;QACL,IAAI,EAAE,mBAAmB,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI;QAC9C,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC5C,OAAO;gBACL,IAAI,EAAE,mBAAmB,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI;gBAC9C,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,4BAA4B,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG;aACjF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,GAAW,EAAE,GAAY;IACjD,MAAM,IAAI,GAAG,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,aAAa,GAAG,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,eAAe,GAAG,GAAG,CAAC;IACpF,OAAO;QACL,IAAI,EAAE,IAAI;QACV,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;YAClC,MAAM,OAAO,GAAG,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,IAAI,GAAG,IAAI,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,GAAG,CAAC;YAChF,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,OAAO;gBACf,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc,KAAK,kBAAkB,GAAG,KAAK,GAAG,IAAI,GAAG,GAAG;aAC1F,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,SAAiB;IAC1C,OAAO;QACL,IAAI,EAAE,gBAAgB,SAAS,GAAG;QAClC,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,KAAK,GAAG,MAAM,CAAC,UAAU,CAAC,WAAW,IAAI,CAAC,CAAC;YACjD,MAAM,YAAY,GAAG,KAAK,IAAI,SAAS,CAAC;YACxC,OAAO;gBACL,IAAI,EAAE,gBAAgB,SAAS,GAAG;gBAClC,MAAM,EAAE,YAAY;gBACpB,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,KAAK,mBAAmB,SAAS,EAAE;aACxF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,OAIxB;IACC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;IACzD,OAAO;QACL,IAAI,EAAE,aAAa,IAAI,aAAa,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI;QAC7D,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YACtB,IAAI,CAAC;gBACH,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;gBAC5C,MAAM,WAAW,GAAG,MAAM,YAAY,CAAC;oBACrC,KAAK;oBACL,MAAM,EAAE;;YAEN,QAAQ;;;EAGlB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;;0DAE8B;oBAChD,eAAe,EAAE,GAAG;iBACrB,CAAC,CAAC;gBAEH,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACxC,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;gBAC1C,OAAO;oBACL,IAAI,EAAE,aAAa,IAAI,aAAa,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI;oBAC7D,MAAM;oBACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO;iBACtC,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO;oBACL,IAAI,EAAE,aAAa,IAAI,aAAa,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI;oBAC7D,MAAM,EAAE,KAAK;oBACb,OAAO,EAAE,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;iBACvF,CAAC;YACJ,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,MAAM,CACpB,IAAY,EACZ,OAAqF;IAErF,OAAO;QACL,IAAI;QACJ,KAAK,EAAE,CAAC,MAAM,EAAE,EAAE;YAChB,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;YAChC,IAAI,OAAO,OAAO,KAAK,SAAS,EAAE,CAAC;gBACjC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;YACnC,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC;QAC9B,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/evals/index.d.ts
CHANGED
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - Evals Framework
|
|
3
|
-
*
|
|
4
|
-
* Build eval suites to test agent behavior with assertions.
|
|
5
|
-
*/
|
|
6
1
|
export { createEvalSuite } from './runner.js';
|
|
7
2
|
export { toolCalled, noToolCalled, toolCalledTimes, outputMatches, outputContains, stepCount, tokenUsage, llmJudge, custom, } from './assertions.js';
|
|
8
3
|
export type { EvalSuiteConfig, EvalSuiteResult, EvalCaseResult, EvalCase, EvalAgentResult, Assertion, AssertionResult, EvalReporter, } from './types.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evals/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evals/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EACL,UAAU,EACV,YAAY,EACZ,eAAe,EACf,aAAa,EACb,cAAc,EACd,SAAS,EACT,UAAU,EACV,QAAQ,EACR,MAAM,GACP,MAAM,cAAc,CAAC;AACtB,YAAY,EACV,eAAe,EACf,eAAe,EACf,cAAc,EACd,QAAQ,EACR,eAAe,EACf,SAAS,EACT,eAAe,EACf,YAAY,GACb,MAAM,SAAS,CAAC"}
|
package/dist/evals/index.js
CHANGED
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - Evals Framework
|
|
3
|
-
*
|
|
4
|
-
* Build eval suites to test agent behavior with assertions.
|
|
5
|
-
*/
|
|
6
1
|
export { createEvalSuite } from './runner.js';
|
|
7
2
|
export { toolCalled, noToolCalled, toolCalledTimes, outputMatches, outputContains, stepCount, tokenUsage, llmJudge, custom, } from './assertions.js';
|
|
8
3
|
//# sourceMappingURL=index.js.map
|
package/dist/evals/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/evals/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/evals/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EACL,UAAU,EACV,YAAY,EACZ,eAAe,EACf,aAAa,EACb,cAAc,EACd,SAAS,EACT,UAAU,EACV,QAAQ,EACR,MAAM,GACP,MAAM,cAAc,CAAC"}
|
package/dist/evals/runner.d.ts
CHANGED
|
@@ -1,34 +1,7 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - Eval Suite Runner
|
|
3
|
-
*
|
|
4
|
-
* Runs eval cases with concurrency control and reporting.
|
|
5
|
-
*/
|
|
6
1
|
import type { EvalSuiteConfig, EvalSuiteResult, EvalCase } from './types.js';
|
|
7
|
-
/**
|
|
8
|
-
* Create an eval suite that can be run against an agent.
|
|
9
|
-
*
|
|
10
|
-
* @example
|
|
11
|
-
* ```typescript
|
|
12
|
-
* const suite = createEvalSuite({
|
|
13
|
-
* name: 'greeting-evals',
|
|
14
|
-
* agent: myAgent,
|
|
15
|
-
* cases: [
|
|
16
|
-
* {
|
|
17
|
-
* name: 'basic-greeting',
|
|
18
|
-
* prompt: 'Say hello',
|
|
19
|
-
* assertions: [outputContains('hello')],
|
|
20
|
-
* },
|
|
21
|
-
* ],
|
|
22
|
-
* });
|
|
23
|
-
*
|
|
24
|
-
* const results = await suite.run();
|
|
25
|
-
* console.log(`${results.passed}/${results.totalCases} passed`);
|
|
26
|
-
* ```
|
|
27
|
-
*/
|
|
28
2
|
export declare function createEvalSuite(config: EvalSuiteConfig): {
|
|
29
3
|
name: string;
|
|
30
4
|
cases: EvalCase[];
|
|
31
|
-
/** Run all eval cases and return results. */
|
|
32
5
|
run(): Promise<EvalSuiteResult>;
|
|
33
6
|
};
|
|
34
7
|
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/evals/runner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/evals/runner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,eAAe,EACf,eAAe,EAEf,QAAQ,EAGT,MAAM,SAAS,CAAC;AAKjB,wBAAgB,eAAe,CAAC,MAAM,EAAE,eAAe;;;WActC,OAAO,CAAC,eAAe,CAAC;EA6CxC"}
|
package/dist/evals/runner.js
CHANGED
|
@@ -1,34 +1,5 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agntk/core - Eval Suite Runner
|
|
3
|
-
*
|
|
4
|
-
* Runs eval cases with concurrency control and reporting.
|
|
5
|
-
*/
|
|
6
1
|
import { createLogger } from '@agntk/logger';
|
|
7
2
|
const log = createLogger('@agntk/core:evals');
|
|
8
|
-
// ============================================================================
|
|
9
|
-
// Suite Factory
|
|
10
|
-
// ============================================================================
|
|
11
|
-
/**
|
|
12
|
-
* Create an eval suite that can be run against an agent.
|
|
13
|
-
*
|
|
14
|
-
* @example
|
|
15
|
-
* ```typescript
|
|
16
|
-
* const suite = createEvalSuite({
|
|
17
|
-
* name: 'greeting-evals',
|
|
18
|
-
* agent: myAgent,
|
|
19
|
-
* cases: [
|
|
20
|
-
* {
|
|
21
|
-
* name: 'basic-greeting',
|
|
22
|
-
* prompt: 'Say hello',
|
|
23
|
-
* assertions: [outputContains('hello')],
|
|
24
|
-
* },
|
|
25
|
-
* ],
|
|
26
|
-
* });
|
|
27
|
-
*
|
|
28
|
-
* const results = await suite.run();
|
|
29
|
-
* console.log(`${results.passed}/${results.totalCases} passed`);
|
|
30
|
-
* ```
|
|
31
|
-
*/
|
|
32
3
|
export function createEvalSuite(config) {
|
|
33
4
|
const { name, agent, cases, maxConcurrency = 1, reporter: reporterConfig = 'console' } = config;
|
|
34
5
|
const reporter = typeof reporterConfig === 'string'
|
|
@@ -39,16 +10,13 @@ export function createEvalSuite(config) {
|
|
|
39
10
|
return {
|
|
40
11
|
name,
|
|
41
12
|
cases,
|
|
42
|
-
/** Run all eval cases and return results. */
|
|
43
13
|
async run() {
|
|
44
14
|
const startTime = Date.now();
|
|
45
15
|
log.info('Starting eval suite', { name, caseCount: cases.length, maxConcurrency });
|
|
46
16
|
const caseResults = [];
|
|
47
|
-
// Run with concurrency control
|
|
48
17
|
const queue = [...cases];
|
|
49
18
|
const active = [];
|
|
50
19
|
while (queue.length > 0 || active.length > 0) {
|
|
51
|
-
// Fill up to maxConcurrency
|
|
52
20
|
while (active.length < maxConcurrency && queue.length > 0) {
|
|
53
21
|
const evalCase = queue.shift();
|
|
54
22
|
const promise = runCase(agent, evalCase, reporter).then((result) => {
|
|
@@ -59,7 +27,6 @@ export function createEvalSuite(config) {
|
|
|
59
27
|
});
|
|
60
28
|
active.push(promise);
|
|
61
29
|
}
|
|
62
|
-
// Wait for one to complete
|
|
63
30
|
if (active.length > 0) {
|
|
64
31
|
await Promise.race(active);
|
|
65
32
|
}
|
|
@@ -83,25 +50,21 @@ export function createEvalSuite(config) {
|
|
|
83
50
|
},
|
|
84
51
|
};
|
|
85
52
|
}
|
|
86
|
-
// ============================================================================
|
|
87
|
-
// Case Runner
|
|
88
|
-
// ============================================================================
|
|
89
53
|
async function runCase(agent, evalCase, reporter) {
|
|
90
54
|
const startTime = Date.now();
|
|
91
55
|
reporter.onCaseStart?.(evalCase.name);
|
|
92
56
|
try {
|
|
93
|
-
// Run the agent with a timeout
|
|
94
57
|
const timeout = evalCase.timeout ?? 30_000;
|
|
95
58
|
const streamResult = await Promise.race([
|
|
96
59
|
agent.stream({ prompt: evalCase.prompt }),
|
|
97
60
|
new Promise((_, reject) => setTimeout(() => reject(new Error(`Eval case timed out after ${timeout}ms`)), timeout)),
|
|
98
61
|
]);
|
|
99
|
-
// Consume the stream to get the final text
|
|
100
62
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
101
|
-
for await (const _chunk of streamResult.fullStream) {
|
|
63
|
+
for await (const _chunk of streamResult.fullStream) {
|
|
64
|
+
void 0;
|
|
65
|
+
}
|
|
102
66
|
const text = await streamResult.text;
|
|
103
67
|
const usage = await streamResult.usage;
|
|
104
|
-
// Build eval result from agent result
|
|
105
68
|
const evalResult = {
|
|
106
69
|
text: text ?? '',
|
|
107
70
|
steps: [],
|
|
@@ -113,7 +76,6 @@ async function runCase(agent, evalCase, reporter) {
|
|
|
113
76
|
}
|
|
114
77
|
: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
|
|
115
78
|
};
|
|
116
|
-
// Run all assertions
|
|
117
79
|
const assertionResults = await Promise.all(evalCase.assertions.map((assertion) => assertion.check(evalResult)));
|
|
118
80
|
const allPassed = assertionResults.every((r) => r.passed);
|
|
119
81
|
const result = {
|
|
@@ -137,9 +99,6 @@ async function runCase(agent, evalCase, reporter) {
|
|
|
137
99
|
return result;
|
|
138
100
|
}
|
|
139
101
|
}
|
|
140
|
-
// ============================================================================
|
|
141
|
-
// Built-in Reporters
|
|
142
|
-
// ============================================================================
|
|
143
102
|
function createConsoleReporter() {
|
|
144
103
|
return {
|
|
145
104
|
onCaseStart(caseName) {
|
|
@@ -167,7 +126,6 @@ function createConsoleReporter() {
|
|
|
167
126
|
function createJsonReporter() {
|
|
168
127
|
return {
|
|
169
128
|
onSuiteEnd(result) {
|
|
170
|
-
// Output CI-friendly JSON to stdout
|
|
171
129
|
const output = JSON.stringify({
|
|
172
130
|
suite: result.name,
|
|
173
131
|
total: result.totalCases,
|