@agentv/sdk 4.41.6 → 4.42.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -3
- package/dist/index.cjs +105 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +123 -1
- package/dist/index.d.ts +123 -1
- package/dist/index.js +95 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -6501,17 +6501,21 @@ declare const KNOWN_SNAKE_CASE_KEYS: {
|
|
|
6501
6501
|
readonly failOnError: "fail_on_error";
|
|
6502
6502
|
readonly inputFiles: "input_files";
|
|
6503
6503
|
readonly keepWorkspaces: "keep_workspaces";
|
|
6504
|
+
readonly maxCalls: "max_calls";
|
|
6504
6505
|
readonly maxCostUsd: "max_cost_usd";
|
|
6505
6506
|
readonly maxDurationMs: "max_duration_ms";
|
|
6506
6507
|
readonly maxInput: "max_input";
|
|
6507
6508
|
readonly maxLlmCalls: "max_llm_calls";
|
|
6508
6509
|
readonly maxOutput: "max_output";
|
|
6510
|
+
readonly maxSteps: "max_steps";
|
|
6509
6511
|
readonly maxTokens: "max_tokens";
|
|
6510
6512
|
readonly maxToolCalls: "max_tool_calls";
|
|
6511
6513
|
readonly minScore: "min_score";
|
|
6512
6514
|
readonly onDependencyFailure: "on_dependency_failure";
|
|
6513
6515
|
readonly onTurnFailure: "on_turn_failure";
|
|
6514
6516
|
readonly outputPath: "output_path";
|
|
6517
|
+
readonly requiredMinScore: "required_min_score";
|
|
6518
|
+
readonly scoreRange: "score_range";
|
|
6515
6519
|
readonly scoreRanges: "score_ranges";
|
|
6516
6520
|
readonly skipDefaults: "skip_defaults";
|
|
6517
6521
|
readonly targetExplorationRatio: "target_exploration_ratio";
|
|
@@ -6678,6 +6682,124 @@ declare function toEvalYamlObject<T extends EvalDefinition | DefinedEvalSuite>(d
|
|
|
6678
6682
|
*/
|
|
6679
6683
|
declare function serializeEvalYaml<T extends EvalDefinition | DefinedEvalSuite>(definition: T): string;
|
|
6680
6684
|
|
|
6685
|
+
type GraderCommand = string | readonly string[];
|
|
6686
|
+
interface GraderHelperOptions {
|
|
6687
|
+
readonly name?: string;
|
|
6688
|
+
readonly weight?: number;
|
|
6689
|
+
readonly required?: boolean | number;
|
|
6690
|
+
readonly minScore?: number;
|
|
6691
|
+
readonly negate?: boolean;
|
|
6692
|
+
}
|
|
6693
|
+
interface GraderCommonConfig {
|
|
6694
|
+
readonly name?: string;
|
|
6695
|
+
readonly weight?: number;
|
|
6696
|
+
readonly required?: boolean | number;
|
|
6697
|
+
readonly minScore?: number;
|
|
6698
|
+
readonly negate?: boolean;
|
|
6699
|
+
}
|
|
6700
|
+
interface ContainsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6701
|
+
readonly type: 'contains';
|
|
6702
|
+
readonly value: string;
|
|
6703
|
+
}
|
|
6704
|
+
interface EqualsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6705
|
+
readonly type: 'equals';
|
|
6706
|
+
readonly value: string;
|
|
6707
|
+
}
|
|
6708
|
+
interface RegexGraderOptions extends GraderHelperOptions {
|
|
6709
|
+
readonly flags?: string;
|
|
6710
|
+
}
|
|
6711
|
+
interface RegexGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6712
|
+
readonly type: 'regex';
|
|
6713
|
+
readonly value: string;
|
|
6714
|
+
readonly flags?: string;
|
|
6715
|
+
}
|
|
6716
|
+
interface IsJsonGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6717
|
+
readonly type: 'is-json';
|
|
6718
|
+
}
|
|
6719
|
+
type GraderRubricOperator = 'correctness' | 'contradiction';
|
|
6720
|
+
interface GraderScoreRange {
|
|
6721
|
+
readonly scoreRange: readonly [number, number];
|
|
6722
|
+
readonly outcome: string;
|
|
6723
|
+
}
|
|
6724
|
+
interface GraderRubric {
|
|
6725
|
+
readonly id?: string;
|
|
6726
|
+
readonly outcome?: string;
|
|
6727
|
+
readonly criteria?: string;
|
|
6728
|
+
readonly operator?: GraderRubricOperator;
|
|
6729
|
+
readonly weight?: number;
|
|
6730
|
+
readonly required?: boolean;
|
|
6731
|
+
readonly minScore?: number;
|
|
6732
|
+
readonly requiredMinScore?: number;
|
|
6733
|
+
readonly scoreRanges?: readonly GraderScoreRange[];
|
|
6734
|
+
}
|
|
6735
|
+
type GraderRubricCriterion = string | GraderRubric;
|
|
6736
|
+
interface RubricsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6737
|
+
readonly type: 'rubrics';
|
|
6738
|
+
readonly criteria: readonly GraderRubricCriterion[];
|
|
6739
|
+
}
|
|
6740
|
+
interface GraderPromptScriptConfig {
|
|
6741
|
+
readonly command: readonly string[];
|
|
6742
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6743
|
+
}
|
|
6744
|
+
interface LlmGraderOptions extends GraderHelperOptions {
|
|
6745
|
+
readonly prompt?: string | GraderPromptScriptConfig;
|
|
6746
|
+
readonly rubrics?: readonly GraderRubric[];
|
|
6747
|
+
readonly target?: string;
|
|
6748
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6749
|
+
readonly maxSteps?: number;
|
|
6750
|
+
readonly temperature?: number;
|
|
6751
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6752
|
+
}
|
|
6753
|
+
interface LlmGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6754
|
+
readonly type: 'llm-grader';
|
|
6755
|
+
readonly prompt?: string | GraderPromptScriptConfig;
|
|
6756
|
+
readonly rubrics?: readonly GraderRubric[];
|
|
6757
|
+
readonly target?: string;
|
|
6758
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6759
|
+
readonly maxSteps?: number;
|
|
6760
|
+
readonly temperature?: number;
|
|
6761
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6762
|
+
}
|
|
6763
|
+
interface CodeGraderTargetOptions {
|
|
6764
|
+
readonly maxCalls?: number;
|
|
6765
|
+
}
|
|
6766
|
+
interface CodeGraderOptions extends GraderHelperOptions {
|
|
6767
|
+
readonly cwd?: string;
|
|
6768
|
+
readonly target?: true | CodeGraderTargetOptions;
|
|
6769
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6770
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6771
|
+
}
|
|
6772
|
+
interface CodeGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6773
|
+
readonly type: 'code-grader';
|
|
6774
|
+
readonly command: GraderCommand;
|
|
6775
|
+
readonly cwd?: string;
|
|
6776
|
+
readonly target?: true | CodeGraderTargetOptions;
|
|
6777
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6778
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6779
|
+
}
|
|
6780
|
+
type GraderHelperConfig = ContainsGraderConfig | EqualsGraderConfig | RegexGraderConfig | IsJsonGraderConfig | RubricsGraderConfig | LlmGraderConfig | CodeGraderConfig;
|
|
6781
|
+
declare function containsGrader(value: string, options?: GraderHelperOptions): ContainsGraderConfig;
|
|
6782
|
+
declare function equalsGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
|
|
6783
|
+
declare function exactGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
|
|
6784
|
+
declare function regexGrader(pattern: string | RegExp, options?: RegexGraderOptions): RegexGraderConfig;
|
|
6785
|
+
declare function isJsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
|
|
6786
|
+
declare function jsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
|
|
6787
|
+
declare function rubricsGrader(criteria: readonly GraderRubricCriterion[], options?: GraderHelperOptions): RubricsGraderConfig;
|
|
6788
|
+
declare function llmGrader(options?: LlmGraderOptions): LlmGraderConfig;
|
|
6789
|
+
declare function codeGrader(command: GraderCommand, options?: CodeGraderOptions): CodeGraderConfig;
|
|
6790
|
+
declare const graders: Readonly<{
|
|
6791
|
+
contains: typeof containsGrader;
|
|
6792
|
+
equals: typeof equalsGrader;
|
|
6793
|
+
exact: typeof exactGrader;
|
|
6794
|
+
regex: typeof regexGrader;
|
|
6795
|
+
isJson: typeof isJsonGrader;
|
|
6796
|
+
json: typeof jsonGrader;
|
|
6797
|
+
rubrics: typeof rubricsGrader;
|
|
6798
|
+
llmGrader: typeof llmGrader;
|
|
6799
|
+
codeGrader: typeof codeGrader;
|
|
6800
|
+
}>;
|
|
6801
|
+
type GraderCatalog = typeof graders;
|
|
6802
|
+
|
|
6681
6803
|
/**
|
|
6682
6804
|
* Client for invoking configured targets from code-grader scripts.
|
|
6683
6805
|
*
|
|
@@ -7022,4 +7144,4 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
|
7022
7144
|
*/
|
|
7023
7145
|
declare function defineAssertion(handler: AssertionHandler): void;
|
|
7024
7146
|
|
|
7025
|
-
export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, evalSuite, serializeEvalYaml, toEvalYamlObject };
|
|
7147
|
+
export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderConfig, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderOptions, type CodeGraderResult, CodeGraderResultSchema, type CodeGraderTargetOptions, type ContainsGraderConfig, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EqualsGraderConfig, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type GraderCatalog, type GraderCommand, type GraderCommonConfig, type GraderHelperConfig, type GraderHelperOptions, type GraderPromptScriptConfig, type GraderRubric, type GraderRubricCriterion, type GraderRubricOperator, type GraderScoreRange, type IsJsonGraderConfig, type LlmGraderConfig, type LlmGraderOptions, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type RegexGraderConfig, type RegexGraderOptions, type RubricsGraderConfig, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, codeGrader, containsGrader, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, equalsGrader, evalSuite, exactGrader, graders, isJsonGrader, jsonGrader, llmGrader, regexGrader, rubricsGrader, serializeEvalYaml, toEvalYamlObject };
|
package/dist/index.d.ts
CHANGED
|
@@ -6501,17 +6501,21 @@ declare const KNOWN_SNAKE_CASE_KEYS: {
|
|
|
6501
6501
|
readonly failOnError: "fail_on_error";
|
|
6502
6502
|
readonly inputFiles: "input_files";
|
|
6503
6503
|
readonly keepWorkspaces: "keep_workspaces";
|
|
6504
|
+
readonly maxCalls: "max_calls";
|
|
6504
6505
|
readonly maxCostUsd: "max_cost_usd";
|
|
6505
6506
|
readonly maxDurationMs: "max_duration_ms";
|
|
6506
6507
|
readonly maxInput: "max_input";
|
|
6507
6508
|
readonly maxLlmCalls: "max_llm_calls";
|
|
6508
6509
|
readonly maxOutput: "max_output";
|
|
6510
|
+
readonly maxSteps: "max_steps";
|
|
6509
6511
|
readonly maxTokens: "max_tokens";
|
|
6510
6512
|
readonly maxToolCalls: "max_tool_calls";
|
|
6511
6513
|
readonly minScore: "min_score";
|
|
6512
6514
|
readonly onDependencyFailure: "on_dependency_failure";
|
|
6513
6515
|
readonly onTurnFailure: "on_turn_failure";
|
|
6514
6516
|
readonly outputPath: "output_path";
|
|
6517
|
+
readonly requiredMinScore: "required_min_score";
|
|
6518
|
+
readonly scoreRange: "score_range";
|
|
6515
6519
|
readonly scoreRanges: "score_ranges";
|
|
6516
6520
|
readonly skipDefaults: "skip_defaults";
|
|
6517
6521
|
readonly targetExplorationRatio: "target_exploration_ratio";
|
|
@@ -6678,6 +6682,124 @@ declare function toEvalYamlObject<T extends EvalDefinition | DefinedEvalSuite>(d
|
|
|
6678
6682
|
*/
|
|
6679
6683
|
declare function serializeEvalYaml<T extends EvalDefinition | DefinedEvalSuite>(definition: T): string;
|
|
6680
6684
|
|
|
6685
|
+
type GraderCommand = string | readonly string[];
|
|
6686
|
+
interface GraderHelperOptions {
|
|
6687
|
+
readonly name?: string;
|
|
6688
|
+
readonly weight?: number;
|
|
6689
|
+
readonly required?: boolean | number;
|
|
6690
|
+
readonly minScore?: number;
|
|
6691
|
+
readonly negate?: boolean;
|
|
6692
|
+
}
|
|
6693
|
+
interface GraderCommonConfig {
|
|
6694
|
+
readonly name?: string;
|
|
6695
|
+
readonly weight?: number;
|
|
6696
|
+
readonly required?: boolean | number;
|
|
6697
|
+
readonly minScore?: number;
|
|
6698
|
+
readonly negate?: boolean;
|
|
6699
|
+
}
|
|
6700
|
+
interface ContainsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6701
|
+
readonly type: 'contains';
|
|
6702
|
+
readonly value: string;
|
|
6703
|
+
}
|
|
6704
|
+
interface EqualsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6705
|
+
readonly type: 'equals';
|
|
6706
|
+
readonly value: string;
|
|
6707
|
+
}
|
|
6708
|
+
interface RegexGraderOptions extends GraderHelperOptions {
|
|
6709
|
+
readonly flags?: string;
|
|
6710
|
+
}
|
|
6711
|
+
interface RegexGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6712
|
+
readonly type: 'regex';
|
|
6713
|
+
readonly value: string;
|
|
6714
|
+
readonly flags?: string;
|
|
6715
|
+
}
|
|
6716
|
+
interface IsJsonGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6717
|
+
readonly type: 'is-json';
|
|
6718
|
+
}
|
|
6719
|
+
type GraderRubricOperator = 'correctness' | 'contradiction';
|
|
6720
|
+
interface GraderScoreRange {
|
|
6721
|
+
readonly scoreRange: readonly [number, number];
|
|
6722
|
+
readonly outcome: string;
|
|
6723
|
+
}
|
|
6724
|
+
interface GraderRubric {
|
|
6725
|
+
readonly id?: string;
|
|
6726
|
+
readonly outcome?: string;
|
|
6727
|
+
readonly criteria?: string;
|
|
6728
|
+
readonly operator?: GraderRubricOperator;
|
|
6729
|
+
readonly weight?: number;
|
|
6730
|
+
readonly required?: boolean;
|
|
6731
|
+
readonly minScore?: number;
|
|
6732
|
+
readonly requiredMinScore?: number;
|
|
6733
|
+
readonly scoreRanges?: readonly GraderScoreRange[];
|
|
6734
|
+
}
|
|
6735
|
+
type GraderRubricCriterion = string | GraderRubric;
|
|
6736
|
+
interface RubricsGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6737
|
+
readonly type: 'rubrics';
|
|
6738
|
+
readonly criteria: readonly GraderRubricCriterion[];
|
|
6739
|
+
}
|
|
6740
|
+
interface GraderPromptScriptConfig {
|
|
6741
|
+
readonly command: readonly string[];
|
|
6742
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6743
|
+
}
|
|
6744
|
+
interface LlmGraderOptions extends GraderHelperOptions {
|
|
6745
|
+
readonly prompt?: string | GraderPromptScriptConfig;
|
|
6746
|
+
readonly rubrics?: readonly GraderRubric[];
|
|
6747
|
+
readonly target?: string;
|
|
6748
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6749
|
+
readonly maxSteps?: number;
|
|
6750
|
+
readonly temperature?: number;
|
|
6751
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6752
|
+
}
|
|
6753
|
+
interface LlmGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6754
|
+
readonly type: 'llm-grader';
|
|
6755
|
+
readonly prompt?: string | GraderPromptScriptConfig;
|
|
6756
|
+
readonly rubrics?: readonly GraderRubric[];
|
|
6757
|
+
readonly target?: string;
|
|
6758
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6759
|
+
readonly maxSteps?: number;
|
|
6760
|
+
readonly temperature?: number;
|
|
6761
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6762
|
+
}
|
|
6763
|
+
interface CodeGraderTargetOptions {
|
|
6764
|
+
readonly maxCalls?: number;
|
|
6765
|
+
}
|
|
6766
|
+
interface CodeGraderOptions extends GraderHelperOptions {
|
|
6767
|
+
readonly cwd?: string;
|
|
6768
|
+
readonly target?: true | CodeGraderTargetOptions;
|
|
6769
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6770
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6771
|
+
}
|
|
6772
|
+
interface CodeGraderConfig extends EvalAssertionConfig, GraderCommonConfig {
|
|
6773
|
+
readonly type: 'code-grader';
|
|
6774
|
+
readonly command: GraderCommand;
|
|
6775
|
+
readonly cwd?: string;
|
|
6776
|
+
readonly target?: true | CodeGraderTargetOptions;
|
|
6777
|
+
readonly config?: Readonly<Record<string, unknown>>;
|
|
6778
|
+
readonly preprocessors?: readonly EvalPreprocessor[];
|
|
6779
|
+
}
|
|
6780
|
+
type GraderHelperConfig = ContainsGraderConfig | EqualsGraderConfig | RegexGraderConfig | IsJsonGraderConfig | RubricsGraderConfig | LlmGraderConfig | CodeGraderConfig;
|
|
6781
|
+
declare function containsGrader(value: string, options?: GraderHelperOptions): ContainsGraderConfig;
|
|
6782
|
+
declare function equalsGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
|
|
6783
|
+
declare function exactGrader(value: string, options?: GraderHelperOptions): EqualsGraderConfig;
|
|
6784
|
+
declare function regexGrader(pattern: string | RegExp, options?: RegexGraderOptions): RegexGraderConfig;
|
|
6785
|
+
declare function isJsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
|
|
6786
|
+
declare function jsonGrader(options?: GraderHelperOptions): IsJsonGraderConfig;
|
|
6787
|
+
declare function rubricsGrader(criteria: readonly GraderRubricCriterion[], options?: GraderHelperOptions): RubricsGraderConfig;
|
|
6788
|
+
declare function llmGrader(options?: LlmGraderOptions): LlmGraderConfig;
|
|
6789
|
+
declare function codeGrader(command: GraderCommand, options?: CodeGraderOptions): CodeGraderConfig;
|
|
6790
|
+
declare const graders: Readonly<{
|
|
6791
|
+
contains: typeof containsGrader;
|
|
6792
|
+
equals: typeof equalsGrader;
|
|
6793
|
+
exact: typeof exactGrader;
|
|
6794
|
+
regex: typeof regexGrader;
|
|
6795
|
+
isJson: typeof isJsonGrader;
|
|
6796
|
+
json: typeof jsonGrader;
|
|
6797
|
+
rubrics: typeof rubricsGrader;
|
|
6798
|
+
llmGrader: typeof llmGrader;
|
|
6799
|
+
codeGrader: typeof codeGrader;
|
|
6800
|
+
}>;
|
|
6801
|
+
type GraderCatalog = typeof graders;
|
|
6802
|
+
|
|
6681
6803
|
/**
|
|
6682
6804
|
* Client for invoking configured targets from code-grader scripts.
|
|
6683
6805
|
*
|
|
@@ -7022,4 +7144,4 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
|
7022
7144
|
*/
|
|
7023
7145
|
declare function defineAssertion(handler: AssertionHandler): void;
|
|
7024
7146
|
|
|
7025
|
-
export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, evalSuite, serializeEvalYaml, toEvalYamlObject };
|
|
7147
|
+
export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderConfig, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderOptions, type CodeGraderResult, CodeGraderResultSchema, type CodeGraderTargetOptions, type ContainsGraderConfig, type Content, type ContentFile, ContentFileSchema, type ContentImage, ContentImageSchema, ContentSchema, type ContentText, ContentTextSchema, type DefinedEvalSuite, type EqualsGraderConfig, type EvalAssertionConfig, type EvalDefinition, type EvalDockerWorkspace, type EvalExecution, type EvalMessage, type EvalMessageContent, type EvalPreprocessor, type EvalRequires, type EvalTargetRef, type EvalTest, type EvalTrials, type EvalTurn, type EvalWorkspace, type EvalWorkspaceHook, type EvalWorkspaceHooks, type EvalWorkspaceRepo, type GraderCatalog, type GraderCommand, type GraderCommonConfig, type GraderHelperConfig, type GraderHelperOptions, type GraderPromptScriptConfig, type GraderRubric, type GraderRubricCriterion, type GraderRubricOperator, type GraderScoreRange, type IsJsonGraderConfig, type LlmGraderConfig, type LlmGraderOptions, type LowerEvalYamlValue, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type RegexGraderConfig, type RegexGraderOptions, type RubricsGraderConfig, TRACE_EVENT_TYPES, TRACE_REDACTION_LEVELS, TRACE_SOURCE_KINDS, TRACE_TOOL_STATUSES, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type Trace, type TraceArtifact, TraceArtifactSchema, type TraceBranch, TraceBranchSchema, type TraceError, TraceErrorSchema, type TraceEvent, TraceEventSchema, type TraceMessage, TraceMessageSchema, type TraceModel, TraceModelSchema, type TraceRawEvidence, TraceRawEvidenceSchema, type TraceRedactionState, TraceRedactionStateSchema, TraceSchema, type TraceSession, TraceSessionSchema, type TraceSource, type TraceSourceRef, TraceSourceRefSchema, TraceSourceSchema, type TraceSummary, TraceSummarySchema, type TraceTool, TraceToolSchema, codeGrader, containsGrader, createTargetClient, defineAssertion, defineCodeGrader, defineEval, definePromptTemplate, equalsGrader, evalSuite, exactGrader, graders, isJsonGrader, jsonGrader, llmGrader, regexGrader, rubricsGrader, serializeEvalYaml, toEvalYamlObject };
|
package/dist/index.js
CHANGED
|
@@ -246,17 +246,21 @@ var KNOWN_SNAKE_CASE_KEYS = {
|
|
|
246
246
|
failOnError: "fail_on_error",
|
|
247
247
|
inputFiles: "input_files",
|
|
248
248
|
keepWorkspaces: "keep_workspaces",
|
|
249
|
+
maxCalls: "max_calls",
|
|
249
250
|
maxCostUsd: "max_cost_usd",
|
|
250
251
|
maxDurationMs: "max_duration_ms",
|
|
251
252
|
maxInput: "max_input",
|
|
252
253
|
maxLlmCalls: "max_llm_calls",
|
|
253
254
|
maxOutput: "max_output",
|
|
255
|
+
maxSteps: "max_steps",
|
|
254
256
|
maxTokens: "max_tokens",
|
|
255
257
|
maxToolCalls: "max_tool_calls",
|
|
256
258
|
minScore: "min_score",
|
|
257
259
|
onDependencyFailure: "on_dependency_failure",
|
|
258
260
|
onTurnFailure: "on_turn_failure",
|
|
259
261
|
outputPath: "output_path",
|
|
262
|
+
requiredMinScore: "required_min_score",
|
|
263
|
+
scoreRange: "score_range",
|
|
260
264
|
scoreRanges: "score_ranges",
|
|
261
265
|
skipDefaults: "skip_defaults",
|
|
262
266
|
targetExplorationRatio: "target_exploration_ratio",
|
|
@@ -312,6 +316,87 @@ function serializeEvalYaml(definition) {
|
|
|
312
316
|
return stringifyYaml(toEvalYamlObject(definition), { lineWidth: 0 }).trimEnd();
|
|
313
317
|
}
|
|
314
318
|
|
|
319
|
+
// src/graders.ts
|
|
320
|
+
function withCommon(config, options = {}) {
|
|
321
|
+
return {
|
|
322
|
+
...options.name !== void 0 ? { name: options.name } : {},
|
|
323
|
+
...config,
|
|
324
|
+
...options.weight !== void 0 ? { weight: options.weight } : {},
|
|
325
|
+
...options.required !== void 0 ? { required: options.required } : {},
|
|
326
|
+
...options.minScore !== void 0 ? { minScore: options.minScore } : {},
|
|
327
|
+
...options.negate !== void 0 ? { negate: options.negate } : {}
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
function containsGrader(value, options) {
|
|
331
|
+
return withCommon({ type: "contains", value }, options);
|
|
332
|
+
}
|
|
333
|
+
function equalsGrader(value, options) {
|
|
334
|
+
return withCommon({ type: "equals", value }, options);
|
|
335
|
+
}
|
|
336
|
+
function exactGrader(value, options) {
|
|
337
|
+
return equalsGrader(value, options);
|
|
338
|
+
}
|
|
339
|
+
function regexGrader(pattern, options = {}) {
|
|
340
|
+
const value = pattern instanceof RegExp ? pattern.source : pattern;
|
|
341
|
+
const flags = options.flags ?? (pattern instanceof RegExp ? pattern.flags : void 0);
|
|
342
|
+
return withCommon(
|
|
343
|
+
{
|
|
344
|
+
type: "regex",
|
|
345
|
+
value,
|
|
346
|
+
...flags ? { flags } : {}
|
|
347
|
+
},
|
|
348
|
+
options
|
|
349
|
+
);
|
|
350
|
+
}
|
|
351
|
+
function isJsonGrader(options) {
|
|
352
|
+
return withCommon({ type: "is-json" }, options);
|
|
353
|
+
}
|
|
354
|
+
function jsonGrader(options) {
|
|
355
|
+
return isJsonGrader(options);
|
|
356
|
+
}
|
|
357
|
+
function rubricsGrader(criteria, options) {
|
|
358
|
+
return withCommon({ type: "rubrics", criteria }, options);
|
|
359
|
+
}
|
|
360
|
+
function llmGrader(options = {}) {
|
|
361
|
+
return withCommon(
|
|
362
|
+
{
|
|
363
|
+
type: "llm-grader",
|
|
364
|
+
...options.prompt !== void 0 ? { prompt: options.prompt } : {},
|
|
365
|
+
...options.rubrics !== void 0 ? { rubrics: options.rubrics } : {},
|
|
366
|
+
...options.target !== void 0 ? { target: options.target } : {},
|
|
367
|
+
...options.config !== void 0 ? { config: options.config } : {},
|
|
368
|
+
...options.maxSteps !== void 0 ? { maxSteps: options.maxSteps } : {},
|
|
369
|
+
...options.temperature !== void 0 ? { temperature: options.temperature } : {},
|
|
370
|
+
...options.preprocessors !== void 0 ? { preprocessors: options.preprocessors } : {}
|
|
371
|
+
},
|
|
372
|
+
options
|
|
373
|
+
);
|
|
374
|
+
}
|
|
375
|
+
function codeGrader(command, options = {}) {
|
|
376
|
+
return withCommon(
|
|
377
|
+
{
|
|
378
|
+
type: "code-grader",
|
|
379
|
+
command,
|
|
380
|
+
...options.cwd !== void 0 ? { cwd: options.cwd } : {},
|
|
381
|
+
...options.target !== void 0 ? { target: options.target } : {},
|
|
382
|
+
...options.config !== void 0 ? { config: options.config } : {},
|
|
383
|
+
...options.preprocessors !== void 0 ? { preprocessors: options.preprocessors } : {}
|
|
384
|
+
},
|
|
385
|
+
options
|
|
386
|
+
);
|
|
387
|
+
}
|
|
388
|
+
var graders = Object.freeze({
|
|
389
|
+
contains: containsGrader,
|
|
390
|
+
equals: equalsGrader,
|
|
391
|
+
exact: exactGrader,
|
|
392
|
+
regex: regexGrader,
|
|
393
|
+
isJson: isJsonGrader,
|
|
394
|
+
json: jsonGrader,
|
|
395
|
+
rubrics: rubricsGrader,
|
|
396
|
+
llmGrader,
|
|
397
|
+
codeGrader
|
|
398
|
+
});
|
|
399
|
+
|
|
315
400
|
// src/target-client.ts
|
|
316
401
|
var TargetNotAvailableError = class extends Error {
|
|
317
402
|
constructor(message) {
|
|
@@ -639,12 +724,22 @@ export {
|
|
|
639
724
|
TraceSourceSchema,
|
|
640
725
|
TraceSummarySchema,
|
|
641
726
|
TraceToolSchema,
|
|
727
|
+
codeGrader,
|
|
728
|
+
containsGrader,
|
|
642
729
|
createTargetClient,
|
|
643
730
|
defineAssertion,
|
|
644
731
|
defineCodeGrader,
|
|
645
732
|
defineEval,
|
|
646
733
|
definePromptTemplate,
|
|
734
|
+
equalsGrader,
|
|
647
735
|
evalSuite,
|
|
736
|
+
exactGrader,
|
|
737
|
+
graders,
|
|
738
|
+
isJsonGrader,
|
|
739
|
+
jsonGrader,
|
|
740
|
+
llmGrader,
|
|
741
|
+
regexGrader,
|
|
742
|
+
rubricsGrader,
|
|
648
743
|
serializeEvalYaml,
|
|
649
744
|
toEvalYamlObject,
|
|
650
745
|
z2 as z
|