@lov3kaizen/agentsea-evaluate 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +339 -0
- package/dist/annotation/index.d.mts +3 -0
- package/dist/annotation/index.d.ts +3 -0
- package/dist/annotation/index.js +630 -0
- package/dist/annotation/index.mjs +22 -0
- package/dist/chunk-5JRYKRSE.mjs +2791 -0
- package/dist/chunk-EUXXIZK3.mjs +676 -0
- package/dist/chunk-NBMUSATK.mjs +596 -0
- package/dist/chunk-PAQ2TTJJ.mjs +1105 -0
- package/dist/chunk-TUMNJN2S.mjs +416 -0
- package/dist/continuous/index.d.mts +2 -0
- package/dist/continuous/index.d.ts +2 -0
- package/dist/continuous/index.js +707 -0
- package/dist/continuous/index.mjs +16 -0
- package/dist/datasets/index.d.mts +1 -0
- package/dist/datasets/index.d.ts +1 -0
- package/dist/datasets/index.js +456 -0
- package/dist/datasets/index.mjs +14 -0
- package/dist/evaluation/index.d.mts +1 -0
- package/dist/evaluation/index.d.ts +1 -0
- package/dist/evaluation/index.js +2853 -0
- package/dist/evaluation/index.mjs +78 -0
- package/dist/feedback/index.d.mts +2 -0
- package/dist/feedback/index.d.ts +2 -0
- package/dist/feedback/index.js +1158 -0
- package/dist/feedback/index.mjs +40 -0
- package/dist/index-6Pbiq7ny.d.mts +234 -0
- package/dist/index-6Pbiq7ny.d.ts +234 -0
- package/dist/index-BNTycFEA.d.mts +479 -0
- package/dist/index-BNTycFEA.d.ts +479 -0
- package/dist/index-CTYCfWfH.d.mts +543 -0
- package/dist/index-CTYCfWfH.d.ts +543 -0
- package/dist/index-Cq5LwG_3.d.mts +322 -0
- package/dist/index-Cq5LwG_3.d.ts +322 -0
- package/dist/index-bPghFsfP.d.mts +315 -0
- package/dist/index-bPghFsfP.d.ts +315 -0
- package/dist/index.d.mts +81 -0
- package/dist/index.d.ts +81 -0
- package/dist/index.js +5962 -0
- package/dist/index.mjs +429 -0
- package/package.json +102 -0
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { T as ThumbsCollector, F as FeedbackStoreInterface, a as ThumbsFeedback } from './index-BNTycFEA.mjs';
|
|
2
|
+
export { s as AggregationMetric, A as AggregationOptions, t as AggregationResult, O as BaseCollector, B as BaseFeedbackEntry, o as CollectCorrectionInput, p as CollectMultiCriteriaInput, n as CollectPreferenceInput, D as CollectPreferenceInputSchema, m as CollectRatingInput, l as CollectThumbsInput, z as CollectThumbsInputSchema, Y as CorrectionCollector, C as CorrectionFeedback, d as CriterionDefinition, e as CriterionRating, E as ExportFormat, u as ExportOptions, J as FeedbackAggregator, g as FeedbackCollectorOptions, f as FeedbackEntry, L as FeedbackExporter, q as FeedbackQueryOptions, r as FeedbackQueryResult, v as FeedbackStoreConfig, G as MemoryFeedbackStore, _ as MultiCriteriaCollector, k as MultiCriteriaCollectorOptions, M as MultiCriteriaFeedback, P as PreferenceChoice, y as PreferenceChoiceSchema, W as PreferenceCollector, j as PreferenceCollectorOptions, c as PreferenceFeedback, U as RatingCollector, i as RatingCollectorOptions, R as RatingFeedback, H as SQLiteFeedbackStore, S as StarRating, x as StarRatingSchema, h as ThumbsCollectorOptions, b as ThumbsRating, w as ThumbsRatingSchema, Z as createCorrectionCollector, K as createFeedbackAggregator, N as createFeedbackExporter, I as createFeedbackStore, $ as createMultiCriteriaCollector, X as createPreferenceCollector, V as createRatingCollector, Q as createThumbsCollector } from './index-BNTycFEA.mjs';
|
|
3
|
+
import { E as EvaluationPipeline, a as EvalDataset, P as PipelineEvaluationResult } from './index-CTYCfWfH.mjs';
|
|
4
|
+
export { X as Accuracy, A as AccuracyMetricConfig, W as BaseMetric, B as BaseMetricConfig, aj as CodeQualityRubric, $ as Coherence, C as CoherenceMetricConfig, al as ComparativeJudge, m as ComparativeJudgeConfig, n as ComparisonInput, o as ComparisonResult, an as ConsensusJudge, p as ConsensusJudgeConfig, a5 as ContextRelevance, d as ContextRelevanceMetricConfig, a7 as CustomMetric, e as CustomMetricConfig, t as EvalDatasetConfig, K as EvalDatasetInterface, s as EvalDatasetItem, Q as EvalRunner, N as EvalRunnerConfig, x as EvaluationError, g as EvaluationInput, u as EvaluationPipelineConfig, w as EvaluationProgress, I as EvaluationSummary, D as FailureAnalysis, G as FailureFilterOptions, a3 as Faithfulness, F as FaithfulnessMetricConfig, H as HFDatasetConfig, ak as HelpfulnessRubric, h as JudgeCriterion, q as JudgeInterface, r as JudgeResult, J as JudgeType, ae as LLMJudge, L as LLMJudgeConfig, i as LLMProviderInterface, f as MetricInterface, b as MetricResult, M as MetricType, z as MetricsSummary, v as PipelineEvaluationOptions, ai as QualityRubric, Z as Relevance, R as RelevanceMetricConfig, k as RubricConfig, ag as RubricJudge, l as RubricJudgeConfig, j as RubricLevel, S as ScoreRange, y as SingleEvaluationResult, a1 as Toxicity, c as ToxicityCategory, T as ToxicityMetricConfig, Y as createAccuracyMetric, a0 as createCoherenceMetric, am as createComparativeJudge, ao as createConsensusJudge, ad as createContainsMetric, a6 as createContextRelevanceMetric, a8 as createCustomMetric, O as createEvalDataset, U as createEvalRunner, V as createEvaluationPipeline, a4 as createFaithfulnessMetric, ac as createJSONMetric, af as createLLMJudge, aa as createLengthMetric, ab as createRegexMetric, _ as createRelevanceMetric, ah as createRubricJudge, a9 as createSimpleMetric, a2 as createToxicityMetric } from './index-CTYCfWfH.mjs';
|
|
5
|
+
export { d as Annotation, o as AnnotationEvent, n as AnnotationEventType, c as AnnotationItem, a as AnnotationItemStatus, e as AnnotationQuality, t as AnnotationQueue, h as AnnotationQueueConfig, m as AnnotationResults, p as AnnotationTask, b as AnnotationTaskConfig, A as AnnotationTaskStatus, f as Annotator, g as AnnotatorStats, i as AssignmentStrategy, B as BatchAssignment, r as BinaryClassificationSchema, j as ConsensusConfig, v as ConsensusManager, C as ConsensusMethod, k as ConsensusResult, D as Disagreement, G as GoldStandardItem, I as IAnnotationTask, P as PrioritizationType, Q as QualityControlConfig, s as QualityRatingSchema, l as QueueStats, T as TextSpanSchema, u as createAnnotationQueue, q as createAnnotationTask, w as createConsensusManager } from './index-bPghFsfP.mjs';
|
|
6
|
+
export { A as AnthropicFormatItem, b as ConversationExample, C as ConversationTurn, n as DPOFormatItem, f as DatasetBuilderConfig, a as DatasetExportFormat, m as DatasetExportOptions, v as DatasetExporter, i as DatasetFilterConfig, c as DatasetItem, g as DatasetQueryOptions, h as DatasetQueryResult, d as DatasetStats, D as DatasetType, q as DatasetValidationError, p as DatasetValidationResult, r as DatasetValidationWarning, E as ExportResult, F as FeedbackStoreRef, H as HFExportOptions, k as InstructionBuildOptions, I as InstructionExample, O as OpenAIFormatItem, j as PreferenceBuildOptions, s as PreferenceDataset, t as PreferenceDatasetBuilder, l as PreferenceDatasetInterface, P as PreferencePair, Q as QAExample, R as RLHFFormatItem, o as SFTFormatItem, e as SamplingConfig, S as SamplingStrategyType, w as createDatasetExporter, u as createPreferenceDatasetBuilder } from './index-6Pbiq7ny.mjs';
|
|
7
|
+
export { r as ABMetricResult, p as ABTest, m as ABTestConfig, q as ABTestResults, G as ABTestRunner, o as ABTestStatus, n as ABTestVariants, g as Alert, d as AlertChannelConfig, A as AlertChannelType, z as AlertManager, f as AlertManagerConfig, h as AlertNotification, e as AlertRule, B as BaselineMetrics, x as ContinuousEval, C as ContinuousEvalConfig, w as ContinuousEvalEvent, v as ContinuousEvalEventType, c as ContinuousEvalStats, D as DashboardUpdate, a as EvalInput, b as EvalOutput, E as EvaluationPipelineRef, H as HistoricalQueryOptions, i as MetricBaseline, l as MetricImprovement, k as MetricRegression, s as MetricSummary, M as MonitoringStatus, R as RegressionDetectorConfig, j as RegressionResult, S as SampleAssignment, u as ScheduleConfig, t as TimeSeries, T as TimeSeriesPoint, V as VariantConfig, I as createABTestRunner, F as createAlertManager, y as createContinuousEval } from './index-Cq5LwG_3.mjs';
|
|
8
|
+
import 'zod';
|
|
9
|
+
import 'eventemitter3';
|
|
10
|
+
|
|
11
|
+
interface FeedbackMiddlewareOptions {
|
|
12
|
+
collector?: ThumbsCollector;
|
|
13
|
+
store?: FeedbackStoreInterface;
|
|
14
|
+
autoCapture?: boolean;
|
|
15
|
+
captureFields?: string[];
|
|
16
|
+
}
|
|
17
|
+
interface AgentMessage {
|
|
18
|
+
id: string;
|
|
19
|
+
role: 'user' | 'assistant';
|
|
20
|
+
content: string;
|
|
21
|
+
metadata?: Record<string, unknown>;
|
|
22
|
+
}
|
|
23
|
+
interface AgentContext {
|
|
24
|
+
conversationId: string;
|
|
25
|
+
messages: AgentMessage[];
|
|
26
|
+
metadata?: Record<string, unknown>;
|
|
27
|
+
}
|
|
28
|
+
declare class FeedbackMiddleware {
|
|
29
|
+
private collector;
|
|
30
|
+
private autoCapture;
|
|
31
|
+
private captureFields;
|
|
32
|
+
private pendingFeedback;
|
|
33
|
+
constructor(options: FeedbackMiddlewareOptions);
|
|
34
|
+
capture(context: AgentContext): void;
|
|
35
|
+
recordFeedback(responseId: string, rating: 'up' | 'down', comment?: string, userId?: string): Promise<ThumbsFeedback | null>;
|
|
36
|
+
getPendingIds(): string[];
|
|
37
|
+
clearPending(): void;
|
|
38
|
+
private cleanupPending;
|
|
39
|
+
getCollector(): ThumbsCollector;
|
|
40
|
+
}
|
|
41
|
+
declare function createFeedbackMiddleware(options: FeedbackMiddlewareOptions): FeedbackMiddleware;
|
|
42
|
+
|
|
43
|
+
interface EvaluationScenario {
|
|
44
|
+
category: string;
|
|
45
|
+
dataset: EvalDataset;
|
|
46
|
+
weight?: number;
|
|
47
|
+
}
|
|
48
|
+
interface AgentEvaluatorOptions {
|
|
49
|
+
pipeline: EvaluationPipeline;
|
|
50
|
+
scenarios: EvaluationScenario[];
|
|
51
|
+
}
|
|
52
|
+
interface AgentInterface {
|
|
53
|
+
execute(input: string, context?: unknown): Promise<string>;
|
|
54
|
+
}
|
|
55
|
+
interface AgentEvaluationResult {
|
|
56
|
+
overallScore: number;
|
|
57
|
+
categoryScores: Record<string, number>;
|
|
58
|
+
categoryResults: Record<string, PipelineEvaluationResult>;
|
|
59
|
+
recommendations: string[];
|
|
60
|
+
summary: {
|
|
61
|
+
totalTests: number;
|
|
62
|
+
passed: number;
|
|
63
|
+
failed: number;
|
|
64
|
+
passRate: number;
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
declare class AgentEvaluator {
|
|
68
|
+
private pipeline;
|
|
69
|
+
private scenarios;
|
|
70
|
+
constructor(options: AgentEvaluatorOptions);
|
|
71
|
+
evaluate(agent: AgentInterface): Promise<AgentEvaluationResult>;
|
|
72
|
+
benchmark(agent: AgentInterface, sampleSize?: number): Promise<{
|
|
73
|
+
score: number;
|
|
74
|
+
latencyMs: number;
|
|
75
|
+
}>;
|
|
76
|
+
addScenario(scenario: EvaluationScenario): void;
|
|
77
|
+
getScenarios(): EvaluationScenario[];
|
|
78
|
+
}
|
|
79
|
+
declare function createAgentEvaluator(options: AgentEvaluatorOptions): AgentEvaluator;
|
|
80
|
+
|
|
81
|
+
export { type AgentContext, type AgentEvaluationResult, AgentEvaluator, type AgentEvaluatorOptions, type AgentInterface, type AgentMessage, EvalDataset, EvaluationPipeline, type EvaluationScenario, FeedbackMiddleware, type FeedbackMiddlewareOptions, FeedbackStoreInterface, PipelineEvaluationResult, ThumbsCollector, ThumbsFeedback, createAgentEvaluator, createFeedbackMiddleware };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { T as ThumbsCollector, F as FeedbackStoreInterface, a as ThumbsFeedback } from './index-BNTycFEA.js';
|
|
2
|
+
export { s as AggregationMetric, A as AggregationOptions, t as AggregationResult, O as BaseCollector, B as BaseFeedbackEntry, o as CollectCorrectionInput, p as CollectMultiCriteriaInput, n as CollectPreferenceInput, D as CollectPreferenceInputSchema, m as CollectRatingInput, l as CollectThumbsInput, z as CollectThumbsInputSchema, Y as CorrectionCollector, C as CorrectionFeedback, d as CriterionDefinition, e as CriterionRating, E as ExportFormat, u as ExportOptions, J as FeedbackAggregator, g as FeedbackCollectorOptions, f as FeedbackEntry, L as FeedbackExporter, q as FeedbackQueryOptions, r as FeedbackQueryResult, v as FeedbackStoreConfig, G as MemoryFeedbackStore, _ as MultiCriteriaCollector, k as MultiCriteriaCollectorOptions, M as MultiCriteriaFeedback, P as PreferenceChoice, y as PreferenceChoiceSchema, W as PreferenceCollector, j as PreferenceCollectorOptions, c as PreferenceFeedback, U as RatingCollector, i as RatingCollectorOptions, R as RatingFeedback, H as SQLiteFeedbackStore, S as StarRating, x as StarRatingSchema, h as ThumbsCollectorOptions, b as ThumbsRating, w as ThumbsRatingSchema, Z as createCorrectionCollector, K as createFeedbackAggregator, N as createFeedbackExporter, I as createFeedbackStore, $ as createMultiCriteriaCollector, X as createPreferenceCollector, V as createRatingCollector, Q as createThumbsCollector } from './index-BNTycFEA.js';
|
|
3
|
+
import { E as EvaluationPipeline, a as EvalDataset, P as PipelineEvaluationResult } from './index-CTYCfWfH.js';
|
|
4
|
+
export { X as Accuracy, A as AccuracyMetricConfig, W as BaseMetric, B as BaseMetricConfig, aj as CodeQualityRubric, $ as Coherence, C as CoherenceMetricConfig, al as ComparativeJudge, m as ComparativeJudgeConfig, n as ComparisonInput, o as ComparisonResult, an as ConsensusJudge, p as ConsensusJudgeConfig, a5 as ContextRelevance, d as ContextRelevanceMetricConfig, a7 as CustomMetric, e as CustomMetricConfig, t as EvalDatasetConfig, K as EvalDatasetInterface, s as EvalDatasetItem, Q as EvalRunner, N as EvalRunnerConfig, x as EvaluationError, g as EvaluationInput, u as EvaluationPipelineConfig, w as EvaluationProgress, I as EvaluationSummary, D as FailureAnalysis, G as FailureFilterOptions, a3 as Faithfulness, F as FaithfulnessMetricConfig, H as HFDatasetConfig, ak as HelpfulnessRubric, h as JudgeCriterion, q as JudgeInterface, r as JudgeResult, J as JudgeType, ae as LLMJudge, L as LLMJudgeConfig, i as LLMProviderInterface, f as MetricInterface, b as MetricResult, M as MetricType, z as MetricsSummary, v as PipelineEvaluationOptions, ai as QualityRubric, Z as Relevance, R as RelevanceMetricConfig, k as RubricConfig, ag as RubricJudge, l as RubricJudgeConfig, j as RubricLevel, S as ScoreRange, y as SingleEvaluationResult, a1 as Toxicity, c as ToxicityCategory, T as ToxicityMetricConfig, Y as createAccuracyMetric, a0 as createCoherenceMetric, am as createComparativeJudge, ao as createConsensusJudge, ad as createContainsMetric, a6 as createContextRelevanceMetric, a8 as createCustomMetric, O as createEvalDataset, U as createEvalRunner, V as createEvaluationPipeline, a4 as createFaithfulnessMetric, ac as createJSONMetric, af as createLLMJudge, aa as createLengthMetric, ab as createRegexMetric, _ as createRelevanceMetric, ah as createRubricJudge, a9 as createSimpleMetric, a2 as createToxicityMetric } from './index-CTYCfWfH.js';
|
|
5
|
+
export { d as Annotation, o as AnnotationEvent, n as AnnotationEventType, c as AnnotationItem, a as AnnotationItemStatus, e as AnnotationQuality, t as AnnotationQueue, h as AnnotationQueueConfig, m as AnnotationResults, p as AnnotationTask, b as AnnotationTaskConfig, A as AnnotationTaskStatus, f as Annotator, g as AnnotatorStats, i as AssignmentStrategy, B as BatchAssignment, r as BinaryClassificationSchema, j as ConsensusConfig, v as ConsensusManager, C as ConsensusMethod, k as ConsensusResult, D as Disagreement, G as GoldStandardItem, I as IAnnotationTask, P as PrioritizationType, Q as QualityControlConfig, s as QualityRatingSchema, l as QueueStats, T as TextSpanSchema, u as createAnnotationQueue, q as createAnnotationTask, w as createConsensusManager } from './index-bPghFsfP.js';
|
|
6
|
+
export { A as AnthropicFormatItem, b as ConversationExample, C as ConversationTurn, n as DPOFormatItem, f as DatasetBuilderConfig, a as DatasetExportFormat, m as DatasetExportOptions, v as DatasetExporter, i as DatasetFilterConfig, c as DatasetItem, g as DatasetQueryOptions, h as DatasetQueryResult, d as DatasetStats, D as DatasetType, q as DatasetValidationError, p as DatasetValidationResult, r as DatasetValidationWarning, E as ExportResult, F as FeedbackStoreRef, H as HFExportOptions, k as InstructionBuildOptions, I as InstructionExample, O as OpenAIFormatItem, j as PreferenceBuildOptions, s as PreferenceDataset, t as PreferenceDatasetBuilder, l as PreferenceDatasetInterface, P as PreferencePair, Q as QAExample, R as RLHFFormatItem, o as SFTFormatItem, e as SamplingConfig, S as SamplingStrategyType, w as createDatasetExporter, u as createPreferenceDatasetBuilder } from './index-6Pbiq7ny.js';
|
|
7
|
+
export { r as ABMetricResult, p as ABTest, m as ABTestConfig, q as ABTestResults, G as ABTestRunner, o as ABTestStatus, n as ABTestVariants, g as Alert, d as AlertChannelConfig, A as AlertChannelType, z as AlertManager, f as AlertManagerConfig, h as AlertNotification, e as AlertRule, B as BaselineMetrics, x as ContinuousEval, C as ContinuousEvalConfig, w as ContinuousEvalEvent, v as ContinuousEvalEventType, c as ContinuousEvalStats, D as DashboardUpdate, a as EvalInput, b as EvalOutput, E as EvaluationPipelineRef, H as HistoricalQueryOptions, i as MetricBaseline, l as MetricImprovement, k as MetricRegression, s as MetricSummary, M as MonitoringStatus, R as RegressionDetectorConfig, j as RegressionResult, S as SampleAssignment, u as ScheduleConfig, t as TimeSeries, T as TimeSeriesPoint, V as VariantConfig, I as createABTestRunner, F as createAlertManager, y as createContinuousEval } from './index-Cq5LwG_3.js';
|
|
8
|
+
import 'zod';
|
|
9
|
+
import 'eventemitter3';
|
|
10
|
+
|
|
11
|
+
interface FeedbackMiddlewareOptions {
|
|
12
|
+
collector?: ThumbsCollector;
|
|
13
|
+
store?: FeedbackStoreInterface;
|
|
14
|
+
autoCapture?: boolean;
|
|
15
|
+
captureFields?: string[];
|
|
16
|
+
}
|
|
17
|
+
interface AgentMessage {
|
|
18
|
+
id: string;
|
|
19
|
+
role: 'user' | 'assistant';
|
|
20
|
+
content: string;
|
|
21
|
+
metadata?: Record<string, unknown>;
|
|
22
|
+
}
|
|
23
|
+
interface AgentContext {
|
|
24
|
+
conversationId: string;
|
|
25
|
+
messages: AgentMessage[];
|
|
26
|
+
metadata?: Record<string, unknown>;
|
|
27
|
+
}
|
|
28
|
+
declare class FeedbackMiddleware {
|
|
29
|
+
private collector;
|
|
30
|
+
private autoCapture;
|
|
31
|
+
private captureFields;
|
|
32
|
+
private pendingFeedback;
|
|
33
|
+
constructor(options: FeedbackMiddlewareOptions);
|
|
34
|
+
capture(context: AgentContext): void;
|
|
35
|
+
recordFeedback(responseId: string, rating: 'up' | 'down', comment?: string, userId?: string): Promise<ThumbsFeedback | null>;
|
|
36
|
+
getPendingIds(): string[];
|
|
37
|
+
clearPending(): void;
|
|
38
|
+
private cleanupPending;
|
|
39
|
+
getCollector(): ThumbsCollector;
|
|
40
|
+
}
|
|
41
|
+
declare function createFeedbackMiddleware(options: FeedbackMiddlewareOptions): FeedbackMiddleware;
|
|
42
|
+
|
|
43
|
+
interface EvaluationScenario {
|
|
44
|
+
category: string;
|
|
45
|
+
dataset: EvalDataset;
|
|
46
|
+
weight?: number;
|
|
47
|
+
}
|
|
48
|
+
interface AgentEvaluatorOptions {
|
|
49
|
+
pipeline: EvaluationPipeline;
|
|
50
|
+
scenarios: EvaluationScenario[];
|
|
51
|
+
}
|
|
52
|
+
interface AgentInterface {
|
|
53
|
+
execute(input: string, context?: unknown): Promise<string>;
|
|
54
|
+
}
|
|
55
|
+
interface AgentEvaluationResult {
|
|
56
|
+
overallScore: number;
|
|
57
|
+
categoryScores: Record<string, number>;
|
|
58
|
+
categoryResults: Record<string, PipelineEvaluationResult>;
|
|
59
|
+
recommendations: string[];
|
|
60
|
+
summary: {
|
|
61
|
+
totalTests: number;
|
|
62
|
+
passed: number;
|
|
63
|
+
failed: number;
|
|
64
|
+
passRate: number;
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
declare class AgentEvaluator {
|
|
68
|
+
private pipeline;
|
|
69
|
+
private scenarios;
|
|
70
|
+
constructor(options: AgentEvaluatorOptions);
|
|
71
|
+
evaluate(agent: AgentInterface): Promise<AgentEvaluationResult>;
|
|
72
|
+
benchmark(agent: AgentInterface, sampleSize?: number): Promise<{
|
|
73
|
+
score: number;
|
|
74
|
+
latencyMs: number;
|
|
75
|
+
}>;
|
|
76
|
+
addScenario(scenario: EvaluationScenario): void;
|
|
77
|
+
getScenarios(): EvaluationScenario[];
|
|
78
|
+
}
|
|
79
|
+
declare function createAgentEvaluator(options: AgentEvaluatorOptions): AgentEvaluator;
|
|
80
|
+
|
|
81
|
+
export { type AgentContext, type AgentEvaluationResult, AgentEvaluator, type AgentEvaluatorOptions, type AgentInterface, type AgentMessage, EvalDataset, EvaluationPipeline, type EvaluationScenario, FeedbackMiddleware, type FeedbackMiddlewareOptions, FeedbackStoreInterface, PipelineEvaluationResult, ThumbsCollector, ThumbsFeedback, createAgentEvaluator, createFeedbackMiddleware };
|