@agentv/sdk 4.42.0-next.1 → 4.42.2-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -6501,17 +6501,21 @@ declare const KNOWN_SNAKE_CASE_KEYS: {
6501
6501
  readonly failOnError: "fail_on_error";
6502
6502
  readonly inputFiles: "input_files";
6503
6503
  readonly keepWorkspaces: "keep_workspaces";
6504
+ readonly maxCalls: "max_calls";
6504
6505
  readonly maxCostUsd: "max_cost_usd";
6505
6506
  readonly maxDurationMs: "max_duration_ms";
6506
6507
  readonly maxInput: "max_input";
6507
6508
  readonly maxLlmCalls: "max_llm_calls";
6508
6509
  readonly maxOutput: "max_output";
6510
+ readonly maxSteps: "max_steps";
6509
6511
  readonly maxTokens: "max_tokens";
6510
6512
  readonly maxToolCalls: "max_tool_calls";
6511
6513
  readonly minScore: "min_score";
6512
6514
  readonly onDependencyFailure: "on_dependency_failure";
6513
6515
  readonly onTurnFailure: "on_turn_failure";
6514
6516
  readonly outputPath: "output_path";
6517
+ readonly requiredMinScore: "required_min_score";
6518
+ readonly scoreRange: "score_range";
6515
6519
  readonly scoreRanges: "score_ranges";
6516
6520
  readonly skipDefaults: "skip_defaults";
6517
6521
  readonly targetExplorationRatio: "target_exploration_ratio";
@@ -6678,6 +6682,124 @@ declare function toEvalYamlObject<T extends EvalDefinition | DefinedEvalSuite>(d
6678
6682
  */
6679
6683
  declare function serializeEvalYaml<T extends EvalDefinition | DefinedEvalSuite>(definition: T): string;
6680
6684
 
6685
+ type GraderCommand = string | readonly string[];
6686
+ interface GraderHelperOptions {
6687
+ readonly name?: string;
6688
+ readonly weight?: number;
6689
+ readonly required?: boolean | number;
6690
+ readonly minScore?: number;
6691
+ readonly negate?: boolean;
6692
+ }
6693
+ interface GraderCommonConfig {
6694
+ readonly name?: string;
6695
+ readonly weight?: number;
6696
+ readonly required?: boolean | number;
6697
+ readonly minScore?: number;
6698
+ readonly negate?: boolean;
6699
+ }
6700
+ interface ContainsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6701
+ readonly type: 'contains';
6702
+ readonly value: string;
6703
+ }
6704
+ interface EqualsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6705
+ readonly type: 'equals';
6706
+ readonly value: string;
6707
+ }
6708
+ interface RegexGraderOptions extends GraderHelperOptions {
6709
+ readonly flags?: string;
6710
+ }
6711
+ interface RegexGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6712
+ readonly type: 'regex';
6713
+ readonly value: string;
6714
+ readonly flags?: string;
6715
+ }
6716
+ interface IsJsonGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6717
+ readonly type: 'is-json';
6718
+ }
6719
+ type GraderRubricOperator = 'correctness' | 'contradiction';
6720
+ interface GraderScoreRange {
6721
+ readonly scoreRange: readonly [number, number];
6722
+ readonly outcome: string;
6723
+ }
6724
+ interface GraderRubric {
6725
+ readonly id?: string;
6726
+ readonly outcome?: string;
6727
+ readonly criteria?: string;
6728
+ readonly operator?: GraderRubricOperator;
6729
+ readonly weight?: number;
6730
+ readonly required?: boolean;
6731
+ readonly minScore?: number;
6732
+ readonly requiredMinScore?: number;
6733
+ readonly scoreRanges?: readonly GraderScoreRange[];
6734
+ }
6735
+ type GraderRubricCriterion = string | GraderRubric;
6736
+ interface RubricsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6737
+ readonly type: 'rubrics';
6738
+ readonly criteria: readonly GraderRubricCriterion[];
6739
+ }
6740
+ interface GraderPromptScriptConfig {
6741
+ readonly command: readonly string[];
6742
+ readonly config?: Readonly<Record<string, unknown>>;
6743
+ }
6744
+ interface LlmGraderOptions extends GraderHelperOptions {
6745
+ readonly prompt?: string | GraderPromptScriptConfig;
6746
+ readonly rubrics?: readonly GraderRubric[];
6747
+ readonly target?: string;
6748
+ readonly config?: Readonly<Record<string, unknown>>;
6749
+ readonly maxSteps?: number;
6750
+ readonly temperature?: number;
6751
+ readonly preprocessors?: readonly EvalPreprocessor[];
6752
+ }
6753
+ interface LlmGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6754
+ readonly type: 'llm-grader';
6755
+ readonly prompt?: string | GraderPromptScriptConfig;
6756
+ readonly rubrics?: readonly GraderRubric[];
6757
+ readonly target?: string;
6758
+ readonly config?: Readonly<Record<string, unknown>>;
6759
+ readonly maxSteps?: number;
6760
+ readonly temperature?: number;
6761
+ readonly preprocessors?: readonly EvalPreprocessor[];
6762
+ }
6763
+ interface CodeGraderTargetOptions {
6764
+ readonly maxCalls?: number;
6765
+ }
6766
+ interface CodeGraderOptions extends GraderHelperOptions {
6767
+ readonly cwd?: string;
6768
+ readonly target?: true | CodeGraderTargetOptions;
6769
+ readonly config?: Readonly<Record<string, unknown>>;
6770
+ readonly preprocessors?: readonly EvalPreprocessor[];
6771
+ }
6772
+ interface CodeGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6773
+ readonly type: 'code-grader';
6774
+ readonly command: GraderCommand;
6775
+ readonly cwd?: string;
6776
+ readonly target?: true | CodeGraderTargetOptions;
6777
+ readonly config?: Readonly<Record<string, unknown>>;
6778
+ readonly preprocessors?: readonly EvalPreprocessor[];
6779
+ }
6780
+ type GraderHelperConfig = ContainsGraderConfig | EqualsGraderConfig | RegexGraderConfig | IsJsonGraderConfig | RubricsGraderConfig | LlmGraderConfig | CodeGraderConfig;
6781
+ declare function containsGrader(value: string, options?: GraderHelperOptions): ContainsGraderConfig;
6782
+ declare function equalsGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
6783
+ declare function exactGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
6784
+ declare function regexGrader(pattern: string | RegExp, options?: RegexGraderOptions): RegexGraderConfig;
6785
+ declare function isJsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
6786
+ declare function jsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
6787
+ declare function rubricsGrader(criteria: readonly GraderRubricCriterion[], options?: GraderHelperOptions): RubricsGraderConfig;
6788
+ declare function llmGrader(options?: LlmGraderOptions): LlmGraderConfig;
6789
+ declare function codeGrader(command: GraderCommand, options?: CodeGraderOptions): CodeGraderConfig;
6790
+ declare const graders: Readonly<{
6791
+ contains: typeof containsGrader;
6792
+ equals: typeof equalsGrader;
6793
+ exact: typeof exactGrader;
6794
+ regex: typeof regexGrader;
6795
+ isJson: typeof isJsonGrader;
6796
+ json: typeof jsonGrader;
6797
+ rubrics: typeof rubricsGrader;
6798
+ llmGrader: typeof llmGrader;
6799
+ codeGrader: typeof codeGrader;
6800
+ }>;
6801
+ type GraderCatalog = typeof graders;
6802
+
6681
6803
  /**
6682
6804
  * Client for invoking configured targets from code-grader scripts.
6683
6805
  *
@@ -7022,4 +7144,4 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
7022
7144
  */
7023
7145
  declare function defineAssertion(handler: AssertionHandler): void;
7024
7146
 
7025
- export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, evalSuite, serializeEvalYaml, toEvalYamlObject };
7147
+ export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderConfig, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderOptions, type CodeGraderResult, CodeGraderResultSchema, type CodeGraderTargetOptions, type ContainsGraderConfig, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EqualsGraderConfig, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type GraderCatalog, type GraderCommand, type GraderCommonConfig, type GraderHelperConfig, type GraderHelperOptions, type GraderPromptScriptConfig, type GraderRubric, type GraderRubricCriterion, type GraderRubricOperator, type GraderScoreRange, type IsJsonGraderConfig, type LlmGraderConfig, type LlmGraderOptions, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type RegexGraderConfig, type RegexGraderOptions, type RubricsGraderConfig, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, codeGrader, containsGrader, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, equalsGrader, evalSuite, exactGrader, graders, isJsonGrader, jsonGrader, llmGrader, regexGrader, rubricsGrader, serializeEvalYaml, toEvalYamlObject };
package/dist/index.d.ts CHANGED
@@ -6501,17 +6501,21 @@ declare const KNOWN_SNAKE_CASE_KEYS: {
6501
6501
  readonly failOnError: "fail_on_error";
6502
6502
  readonly inputFiles: "input_files";
6503
6503
  readonly keepWorkspaces: "keep_workspaces";
6504
+ readonly maxCalls: "max_calls";
6504
6505
  readonly maxCostUsd: "max_cost_usd";
6505
6506
  readonly maxDurationMs: "max_duration_ms";
6506
6507
  readonly maxInput: "max_input";
6507
6508
  readonly maxLlmCalls: "max_llm_calls";
6508
6509
  readonly maxOutput: "max_output";
6510
+ readonly maxSteps: "max_steps";
6509
6511
  readonly maxTokens: "max_tokens";
6510
6512
  readonly maxToolCalls: "max_tool_calls";
6511
6513
  readonly minScore: "min_score";
6512
6514
  readonly onDependencyFailure: "on_dependency_failure";
6513
6515
  readonly onTurnFailure: "on_turn_failure";
6514
6516
  readonly outputPath: "output_path";
6517
+ readonly requiredMinScore: "required_min_score";
6518
+ readonly scoreRange: "score_range";
6515
6519
  readonly scoreRanges: "score_ranges";
6516
6520
  readonly skipDefaults: "skip_defaults";
6517
6521
  readonly targetExplorationRatio: "target_exploration_ratio";
@@ -6678,6 +6682,124 @@ declare function toEvalYamlObject<T extends EvalDefinition | DefinedEvalSuite>(d
6678
6682
  */
6679
6683
  declare function serializeEvalYaml<T extends EvalDefinition | DefinedEvalSuite>(definition: T): string;
6680
6684
 
6685
+ type GraderCommand = string | readonly string[];
6686
+ interface GraderHelperOptions {
6687
+ readonly name?: string;
6688
+ readonly weight?: number;
6689
+ readonly required?: boolean | number;
6690
+ readonly minScore?: number;
6691
+ readonly negate?: boolean;
6692
+ }
6693
+ interface GraderCommonConfig {
6694
+ readonly name?: string;
6695
+ readonly weight?: number;
6696
+ readonly required?: boolean | number;
6697
+ readonly minScore?: number;
6698
+ readonly negate?: boolean;
6699
+ }
6700
+ interface ContainsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6701
+ readonly type: 'contains';
6702
+ readonly value: string;
6703
+ }
6704
+ interface EqualsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6705
+ readonly type: 'equals';
6706
+ readonly value: string;
6707
+ }
6708
+ interface RegexGraderOptions extends GraderHelperOptions {
6709
+ readonly flags?: string;
6710
+ }
6711
+ interface RegexGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6712
+ readonly type: 'regex';
6713
+ readonly value: string;
6714
+ readonly flags?: string;
6715
+ }
6716
+ interface IsJsonGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6717
+ readonly type: 'is-json';
6718
+ }
6719
+ type GraderRubricOperator = 'correctness' | 'contradiction';
6720
+ interface GraderScoreRange {
6721
+ readonly scoreRange: readonly [number, number];
6722
+ readonly outcome: string;
6723
+ }
6724
+ interface GraderRubric {
6725
+ readonly id?: string;
6726
+ readonly outcome?: string;
6727
+ readonly criteria?: string;
6728
+ readonly operator?: GraderRubricOperator;
6729
+ readonly weight?: number;
6730
+ readonly required?: boolean;
6731
+ readonly minScore?: number;
6732
+ readonly requiredMinScore?: number;
6733
+ readonly scoreRanges?: readonly GraderScoreRange[];
6734
+ }
6735
+ type GraderRubricCriterion = string | GraderRubric;
6736
+ interface RubricsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6737
+ readonly type: 'rubrics';
6738
+ readonly criteria: readonly GraderRubricCriterion[];
6739
+ }
6740
+ interface GraderPromptScriptConfig {
6741
+ readonly command: readonly string[];
6742
+ readonly config?: Readonly<Record<string, unknown>>;
6743
+ }
6744
+ interface LlmGraderOptions extends GraderHelperOptions {
6745
+ readonly prompt?: string | GraderPromptScriptConfig;
6746
+ readonly rubrics?: readonly GraderRubric[];
6747
+ readonly target?: string;
6748
+ readonly config?: Readonly<Record<string, unknown>>;
6749
+ readonly maxSteps?: number;
6750
+ readonly temperature?: number;
6751
+ readonly preprocessors?: readonly EvalPreprocessor[];
6752
+ }
6753
+ interface LlmGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6754
+ readonly type: 'llm-grader';
6755
+ readonly prompt?: string | GraderPromptScriptConfig;
6756
+ readonly rubrics?: readonly GraderRubric[];
6757
+ readonly target?: string;
6758
+ readonly config?: Readonly<Record<string, unknown>>;
6759
+ readonly maxSteps?: number;
6760
+ readonly temperature?: number;
6761
+ readonly preprocessors?: readonly EvalPreprocessor[];
6762
+ }
6763
+ interface CodeGraderTargetOptions {
6764
+ readonly maxCalls?: number;
6765
+ }
6766
+ interface CodeGraderOptions extends GraderHelperOptions {
6767
+ readonly cwd?: string;
6768
+ readonly target?: true | CodeGraderTargetOptions;
6769
+ readonly config?: Readonly<Record<string, unknown>>;
6770
+ readonly preprocessors?: readonly EvalPreprocessor[];
6771
+ }
6772
+ interface CodeGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
6773
+ readonly type: 'code-grader';
6774
+ readonly command: GraderCommand;
6775
+ readonly cwd?: string;
6776
+ readonly target?: true | CodeGraderTargetOptions;
6777
+ readonly config?: Readonly<Record<string, unknown>>;
6778
+ readonly preprocessors?: readonly EvalPreprocessor[];
6779
+ }
6780
+ type GraderHelperConfig = ContainsGraderConfig | EqualsGraderConfig | RegexGraderConfig | IsJsonGraderConfig | RubricsGraderConfig | LlmGraderConfig | CodeGraderConfig;
6781
+ declare function containsGrader(value: string, options?: GraderHelperOptions): ContainsGraderConfig;
6782
+ declare function equalsGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
6783
+ declare function exactGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
6784
+ declare function regexGrader(pattern: string | RegExp, options?: RegexGraderOptions): RegexGraderConfig;
6785
+ declare function isJsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
6786
+ declare function jsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
6787
+ declare function rubricsGrader(criteria: readonly GraderRubricCriterion[], options?: GraderHelperOptions): RubricsGraderConfig;
6788
+ declare function llmGrader(options?: LlmGraderOptions): LlmGraderConfig;
6789
+ declare function codeGrader(command: GraderCommand, options?: CodeGraderOptions): CodeGraderConfig;
6790
+ declare const graders: Readonly<{
6791
+ contains: typeof containsGrader;
6792
+ equals: typeof equalsGrader;
6793
+ exact: typeof exactGrader;
6794
+ regex: typeof regexGrader;
6795
+ isJson: typeof isJsonGrader;
6796
+ json: typeof jsonGrader;
6797
+ rubrics: typeof rubricsGrader;
6798
+ llmGrader: typeof llmGrader;
6799
+ codeGrader: typeof codeGrader;
6800
+ }>;
6801
+ type GraderCatalog = typeof graders;
6802
+
6681
6803
  /**
6682
6804
  * Client for invoking configured targets from code-grader scripts.
6683
6805
  *
@@ -7022,4 +7144,4 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
7022
7144
  */
7023
7145
  declare function defineAssertion(handler: AssertionHandler): void;
7024
7146
 
7025
- export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, evalSuite, serializeEvalYaml, toEvalYamlObject };
7147
+ export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderConfig, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderOptions, type CodeGraderResult, CodeGraderResultSchema, type CodeGraderTargetOptions, type ContainsGraderConfig, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EqualsGraderConfig, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type GraderCatalog, type GraderCommand, type GraderCommonConfig, type GraderHelperConfig, type GraderHelperOptions, type GraderPromptScriptConfig, type GraderRubric, type GraderRubricCriterion, type GraderRubricOperator, type GraderScoreRange, type IsJsonGraderConfig, type LlmGraderConfig, type LlmGraderOptions, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type RegexGraderConfig, type RegexGraderOptions, type RubricsGraderConfig, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, codeGrader, containsGrader, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, equalsGrader, evalSuite, exactGrader, graders, isJsonGrader, jsonGrader, llmGrader, regexGrader, rubricsGrader, serializeEvalYaml, toEvalYamlObject };
package/dist/index.js CHANGED
@@ -246,17 +246,21 @@ var KNOWN_SNAKE_CASE_KEYS = {
246
246
  failOnError: "fail_on_error",
247
247
  inputFiles: "input_files",
248
248
  keepWorkspaces: "keep_workspaces",
249
+ maxCalls: "max_calls",
249
250
  maxCostUsd: "max_cost_usd",
250
251
  maxDurationMs: "max_duration_ms",
251
252
  maxInput: "max_input",
252
253
  maxLlmCalls: "max_llm_calls",
253
254
  maxOutput: "max_output",
255
+ maxSteps: "max_steps",
254
256
  maxTokens: "max_tokens",
255
257
  maxToolCalls: "max_tool_calls",
256
258
  minScore: "min_score",
257
259
  onDependencyFailure: "on_dependency_failure",
258
260
  onTurnFailure: "on_turn_failure",
259
261
  outputPath: "output_path",
262
+ requiredMinScore: "required_min_score",
263
+ scoreRange: "score_range",
260
264
  scoreRanges: "score_ranges",
261
265
  skipDefaults: "skip_defaults",
262
266
  targetExplorationRatio: "target_exploration_ratio",
@@ -312,6 +316,87 @@ function serializeEvalYaml(definition) {
312
316
  return stringifyYaml(toEvalYamlObject(definition), { lineWidth: 0 }).trimEnd();
313
317
  }
314
318
 
319
+ // src/graders.ts
320
+ function withCommon(config, options = {}) {
321
+ return {
322
+ ...options.name !== void 0 ? { name: options.name } : {},
323
+ ...config,
324
+ ...options.weight !== void 0 ? { weight: options.weight } : {},
325
+ ...options.required !== void 0 ? { required: options.required } : {},
326
+ ...options.minScore !== void 0 ? { minScore: options.minScore } : {},
327
+ ...options.negate !== void 0 ? { negate: options.negate } : {}
328
+ };
329
+ }
330
+ function containsGrader(value, options) {
331
+ return withCommon({ type: "contains", value }, options);
332
+ }
333
+ function equalsGrader(value, options) {
334
+ return withCommon({ type: "equals", value }, options);
335
+ }
336
+ function exactGrader(value, options) {
337
+ return equalsGrader(value, options);
338
+ }
339
+ function regexGrader(pattern, options = {}) {
340
+ const value = pattern instanceof RegExp ? pattern.source : pattern;
341
+ const flags = options.flags ?? (pattern instanceof RegExp ? pattern.flags : void 0);
342
+ return withCommon(
343
+ {
344
+ type: "regex",
345
+ value,
346
+ ...flags ? { flags } : {}
347
+ },
348
+ options
349
+ );
350
+ }
351
+ function isJsonGrader(options) {
352
+ return withCommon({ type: "is-json" }, options);
353
+ }
354
+ function jsonGrader(options) {
355
+ return isJsonGrader(options);
356
+ }
357
+ function rubricsGrader(criteria, options) {
358
+ return withCommon({ type: "rubrics", criteria }, options);
359
+ }
360
+ function llmGrader(options = {}) {
361
+ return withCommon(
362
+ {
363
+ type: "llm-grader",
364
+ ...options.prompt !== void 0 ? { prompt: options.prompt } : {},
365
+ ...options.rubrics !== void 0 ? { rubrics: options.rubrics } : {},
366
+ ...options.target !== void 0 ? { target: options.target } : {},
367
+ ...options.config !== void 0 ? { config: options.config } : {},
368
+ ...options.maxSteps !== void 0 ? { maxSteps: options.maxSteps } : {},
369
+ ...options.temperature !== void 0 ? { temperature: options.temperature } : {},
370
+ ...options.preprocessors !== void 0 ? { preprocessors: options.preprocessors } : {}
371
+ },
372
+ options
373
+ );
374
+ }
375
+ function codeGrader(command, options = {}) {
376
+ return withCommon(
377
+ {
378
+ type: "code-grader",
379
+ command,
380
+ ...options.cwd !== void 0 ? { cwd: options.cwd } : {},
381
+ ...options.target !== void 0 ? { target: options.target } : {},
382
+ ...options.config !== void 0 ? { config: options.config } : {},
383
+ ...options.preprocessors !== void 0 ? { preprocessors: options.preprocessors } : {}
384
+ },
385
+ options
386
+ );
387
+ }
388
+ var graders = Object.freeze({
389
+ contains: containsGrader,
390
+ equals: equalsGrader,
391
+ exact: exactGrader,
392
+ regex: regexGrader,
393
+ isJson: isJsonGrader,
394
+ json: jsonGrader,
395
+ rubrics: rubricsGrader,
396
+ llmGrader,
397
+ codeGrader
398
+ });
399
+
315
400
  // src/target-client.ts
316
401
  var TargetNotAvailableError = class extends Error {
317
402
  constructor(message) {
@@ -639,12 +724,22 @@ export {
639
724
  TraceSourceSchema,
640
725
  TraceSummarySchema,
641
726
  TraceToolSchema,
727
+ codeGrader,
728
+ containsGrader,
642
729
  createTargetClient,
643
730
  defineAssertion,
644
731
  defineCodeGrader,
645
732
  defineEval,
646
733
  definePromptTemplate,
734
+ equalsGrader,
647
735
  evalSuite,
736
+ exactGrader,
737
+ graders,
738
+ isJsonGrader,
739
+ jsonGrader,
740
+ llmGrader,
741
+ regexGrader,
742
+ rubricsGrader,
648
743
  serializeEvalYaml,
649
744
  toEvalYamlObject,
650
745
  z2 as z