@crewai-ts/core 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/a2a.d.ts +1684 -0
- package/dist/a2ui-schemas.d.ts +3312 -0
- package/dist/a2ui.d.ts +379 -0
- package/dist/agent-adapters.d.ts +178 -0
- package/dist/agent-executors.d.ts +508 -0
- package/dist/agent-parser.d.ts +44 -0
- package/dist/agent-planning.d.ts +358 -0
- package/dist/agent-utils.d.ts +210 -0
- package/dist/agent.d.ts +444 -0
- package/dist/auth.d.ts +179 -0
- package/dist/config-utils.d.ts +5 -0
- package/dist/content-processor.d.ts +12 -0
- package/dist/context.d.ts +157 -0
- package/dist/converter.d.ts +97 -0
- package/dist/crew-chat.d.ts +97 -0
- package/dist/crew.d.ts +424 -0
- package/dist/decorators.d.ts +20 -0
- package/dist/env.d.ts +13 -0
- package/dist/errors.d.ts +27 -0
- package/dist/evaluators.d.ts +477 -0
- package/dist/events.d.ts +2657 -0
- package/dist/execution-utils.d.ts +85 -0
- package/dist/experimental-conversational.d.ts +181 -0
- package/dist/file-handler.d.ts +36 -0
- package/dist/file-store.d.ts +37 -0
- package/dist/files.d.ts +554 -0
- package/dist/flow-conversation.d.ts +90 -0
- package/dist/flow-definition.d.ts +195 -0
- package/dist/flow-persistence.d.ts +107 -0
- package/dist/flow-visualization.d.ts +77 -0
- package/dist/flow.d.ts +927 -0
- package/dist/formatter.d.ts +7 -0
- package/dist/guardrail.d.ts +95 -0
- package/dist/hooks.d.ts +241 -0
- package/dist/human-input.d.ts +74 -0
- package/dist/i18n.d.ts +26 -0
- package/dist/index.d.ts +99 -13004
- package/dist/input-files.d.ts +24 -0
- package/dist/input-provider.d.ts +22 -0
- package/dist/knowledge.d.ts +353 -0
- package/dist/lite-agent-output.d.ts +69 -0
- package/dist/lite-agent.d.ts +154 -0
- package/dist/llm.d.ts +630 -0
- package/dist/llms-hooks-transport.d.ts +1 -2
- package/dist/lock-store.d.ts +14 -0
- package/dist/logger.d.ts +55 -0
- package/dist/mcp.d.ts +315 -0
- package/dist/memory.d.ts +915 -0
- package/dist/metadata.d.ts +9 -0
- package/dist/misc-compat.d.ts +125 -0
- package/dist/openai-completion.d.ts +324 -0
- package/dist/outputs.d.ts +69 -0
- package/dist/planning.d.ts +60 -0
- package/dist/plus-api.d.ts +194 -0
- package/dist/project-compat.d.ts +133 -0
- package/dist/project.d.ts +221 -0
- package/dist/prompts.d.ts +66 -0
- package/dist/provider-completions.d.ts +593 -0
- package/dist/rag.d.ts +1074 -0
- package/dist/rpm.d.ts +27 -0
- package/dist/rw-lock.d.ts +21 -0
- package/dist/schema-utils.d.ts +121 -0
- package/dist/security.d.ts +66 -0
- package/dist/settings.d.ts +103 -0
- package/dist/skills.d.ts +145 -0
- package/dist/state-provider-core.d.ts +1 -1
- package/dist/state.d.ts +204 -0
- package/dist/step-execution-context.d.ts +36 -0
- package/dist/streaming.d.ts +153 -0
- package/dist/string-utils.d.ts +12 -0
- package/dist/task-output-storage.d.ts +62 -0
- package/dist/task.d.ts +305 -0
- package/dist/telemetry.d.ts +91 -0
- package/dist/token-counter-callback.d.ts +36 -0
- package/dist/tools.d.ts +563 -0
- package/dist/tracing-utils.d.ts +56 -0
- package/dist/training-converter.d.ts +36 -0
- package/dist/training-handler.d.ts +10 -0
- package/dist/types.d.ts +72 -0
- package/dist/utilities.d.ts +130 -0
- package/dist/utility-types.d.ts +10 -0
- package/dist/version.d.ts +12 -0
- package/package.json +326 -4904
- package/dist/index.d.cts +0 -13068
- package/dist/llms-hooks-transport-ChGiFBiU.d.ts +0 -233
- package/dist/llms-hooks-transport-DZlurMUQ.d.cts +0 -233
- package/dist/llms-hooks-transport.d.cts +0 -2
- package/dist/state-provider-core-Be9RKRAm.d.cts +0 -4876
- package/dist/state-provider-core-Be9RKRAm.d.ts +0 -4876
- package/dist/state-provider-core.d.cts +0 -1
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
import { Agent } from "./agent.js";
|
|
2
|
+
import { type StructuredModel } from "./converter.js";
|
|
3
|
+
import { LiteAgentExecutionCompletedEvent, TaskCompletedEvent, type EventBus } from "./events.js";
|
|
4
|
+
import { TaskOutput } from "./outputs.js";
|
|
5
|
+
import { Task } from "./task.js";
|
|
6
|
+
import type { LLM, TaskCallback } from "./types.js";
|
|
7
|
+
export declare const MetricCategory: {
|
|
8
|
+
readonly GOAL_ALIGNMENT: "goal_alignment";
|
|
9
|
+
readonly SEMANTIC_QUALITY: "semantic_quality";
|
|
10
|
+
readonly REASONING_EFFICIENCY: "reasoning_efficiency";
|
|
11
|
+
readonly TOOL_SELECTION: "tool_selection";
|
|
12
|
+
readonly PARAMETER_EXTRACTION: "parameter_extraction";
|
|
13
|
+
readonly TOOL_INVOCATION: "tool_invocation";
|
|
14
|
+
};
|
|
15
|
+
export type MetricCategory = typeof MetricCategory[keyof typeof MetricCategory];
|
|
16
|
+
export declare function metricCategoryTitle(category: MetricCategory): string;
|
|
17
|
+
export declare class EvaluationScore {
|
|
18
|
+
readonly score: number | null;
|
|
19
|
+
readonly feedback: string;
|
|
20
|
+
readonly rawResponse: string | null;
|
|
21
|
+
readonly raw_response: string | null;
|
|
22
|
+
constructor(options?: {
|
|
23
|
+
score?: number | null;
|
|
24
|
+
feedback?: string;
|
|
25
|
+
rawResponse?: string | null;
|
|
26
|
+
raw_response?: string | null;
|
|
27
|
+
});
|
|
28
|
+
toString(): string;
|
|
29
|
+
__str__(): string;
|
|
30
|
+
}
|
|
31
|
+
export declare abstract class BaseEvaluator {
|
|
32
|
+
readonly llm: LLM | string | null;
|
|
33
|
+
constructor(llm?: LLM | string | null);
|
|
34
|
+
abstract get metricCategory(): MetricCategory;
|
|
35
|
+
get metric_category(): MetricCategory;
|
|
36
|
+
abstract evaluate(agent: unknown, executionTrace: Record<string, unknown>, finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
37
|
+
evaluate_sync(agent: unknown, execution_trace: Record<string, unknown>, final_output: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
38
|
+
}
|
|
39
|
+
export declare class ConstantScoreEvaluator extends BaseEvaluator {
|
|
40
|
+
private readonly category;
|
|
41
|
+
private readonly defaultFeedback;
|
|
42
|
+
constructor(category: MetricCategory, defaultFeedback: string, llm?: LLM | string | null);
|
|
43
|
+
get metricCategory(): MetricCategory;
|
|
44
|
+
evaluate(_agent: unknown, executionTrace: Record<string, unknown>, _finalOutput: unknown, _task?: Task | null): EvaluationScore;
|
|
45
|
+
}
|
|
46
|
+
export declare class AgentEvaluationResult {
|
|
47
|
+
readonly agentId: string;
|
|
48
|
+
readonly agent_id: string;
|
|
49
|
+
readonly taskId: string;
|
|
50
|
+
readonly task_id: string;
|
|
51
|
+
readonly metrics: Map<MetricCategory, EvaluationScore>;
|
|
52
|
+
constructor(options: {
|
|
53
|
+
agentId?: string;
|
|
54
|
+
agent_id?: string;
|
|
55
|
+
taskId?: string;
|
|
56
|
+
task_id?: string;
|
|
57
|
+
metrics?: Map<MetricCategory, EvaluationScore> | Partial<Record<MetricCategory, EvaluationScore>>;
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
export declare const AggregationStrategy: {
|
|
61
|
+
readonly SIMPLE_AVERAGE: "simple_average";
|
|
62
|
+
readonly WEIGHTED_BY_COMPLEXITY: "weighted_by_complexity";
|
|
63
|
+
readonly BEST_PERFORMANCE: "best_performance";
|
|
64
|
+
readonly WORST_PERFORMANCE: "worst_performance";
|
|
65
|
+
};
|
|
66
|
+
export type AggregationStrategy = typeof AggregationStrategy[keyof typeof AggregationStrategy];
|
|
67
|
+
export declare class AgentAggregatedEvaluationResult {
|
|
68
|
+
readonly agentId: string;
|
|
69
|
+
readonly agent_id: string;
|
|
70
|
+
readonly agentRole: string;
|
|
71
|
+
readonly agent_role: string;
|
|
72
|
+
readonly taskCount: number;
|
|
73
|
+
readonly task_count: number;
|
|
74
|
+
readonly aggregationStrategy: AggregationStrategy;
|
|
75
|
+
readonly aggregation_strategy: AggregationStrategy;
|
|
76
|
+
readonly metrics: Map<MetricCategory, EvaluationScore>;
|
|
77
|
+
readonly taskResults: string[];
|
|
78
|
+
readonly task_results: string[];
|
|
79
|
+
readonly overallScore: number | null;
|
|
80
|
+
readonly overall_score: number | null;
|
|
81
|
+
constructor(options?: {
|
|
82
|
+
agentId?: string;
|
|
83
|
+
agent_id?: string;
|
|
84
|
+
agentRole?: string;
|
|
85
|
+
agent_role?: string;
|
|
86
|
+
taskCount?: number;
|
|
87
|
+
task_count?: number;
|
|
88
|
+
aggregationStrategy?: AggregationStrategy;
|
|
89
|
+
aggregation_strategy?: AggregationStrategy;
|
|
90
|
+
metrics?: Map<MetricCategory, EvaluationScore> | Partial<Record<MetricCategory, EvaluationScore>>;
|
|
91
|
+
taskResults?: string[];
|
|
92
|
+
task_results?: string[];
|
|
93
|
+
overallScore?: number | null;
|
|
94
|
+
overall_score?: number | null;
|
|
95
|
+
});
|
|
96
|
+
toString(): string;
|
|
97
|
+
__str__(): string;
|
|
98
|
+
}
|
|
99
|
+
export declare class ExecutionState {
|
|
100
|
+
currentAgentId: string | null;
|
|
101
|
+
current_agent_id: string | null;
|
|
102
|
+
currentTaskId: string | null;
|
|
103
|
+
current_task_id: string | null;
|
|
104
|
+
traces: Record<string, unknown>;
|
|
105
|
+
iteration: number;
|
|
106
|
+
iterationsResults: Record<number, Record<string, AgentEvaluationResult[]>>;
|
|
107
|
+
iterations_results: Record<number, Record<string, AgentEvaluationResult[]>>;
|
|
108
|
+
agentEvaluators: Record<string, readonly BaseEvaluator[] | null>;
|
|
109
|
+
agent_evaluators: Record<string, readonly BaseEvaluator[] | null>;
|
|
110
|
+
}
|
|
111
|
+
export declare class AgentEvaluator {
|
|
112
|
+
readonly agents: readonly unknown[];
|
|
113
|
+
readonly evaluators: readonly BaseEvaluator[] | null;
|
|
114
|
+
readonly callback: EvaluationTraceCallback;
|
|
115
|
+
private readonly executionState;
|
|
116
|
+
readonly _execution_state: ExecutionState;
|
|
117
|
+
constructor(agents?: readonly unknown[], evaluators?: readonly BaseEvaluator[] | null);
|
|
118
|
+
_subscribe_to_events(): void;
|
|
119
|
+
_handle_task_completed(_source: unknown, event: TaskCompletedEvent | (TaskCompletedEvent & {
|
|
120
|
+
task?: unknown;
|
|
121
|
+
})): void;
|
|
122
|
+
_handle_lite_agent_completed(_source: unknown, event: LiteAgentExecutionCompletedEvent): void;
|
|
123
|
+
set_iteration(iteration: number): void;
|
|
124
|
+
reset_iterations_results(): void;
|
|
125
|
+
get_evaluation_results(): Record<string, AgentEvaluationResult[]>;
|
|
126
|
+
evaluate(options: {
|
|
127
|
+
agent: unknown;
|
|
128
|
+
task?: Task | null;
|
|
129
|
+
execution_trace?: Record<string, unknown>;
|
|
130
|
+
executionTrace?: Record<string, unknown>;
|
|
131
|
+
final_output?: unknown;
|
|
132
|
+
finalOutput?: unknown;
|
|
133
|
+
state?: ExecutionState;
|
|
134
|
+
}): AgentEvaluationResult;
|
|
135
|
+
get_agent_evaluation(strategy?: AggregationStrategy): Record<string, AgentAggregatedEvaluationResult>;
|
|
136
|
+
display_results_with_iterations(): string;
|
|
137
|
+
display_evaluation_with_feedback(): string;
|
|
138
|
+
emit_evaluation_started_event(agent_role: string, agent_id: string, task_id?: string | null): void;
|
|
139
|
+
emit_evaluation_completed_event(agent_role: string, agent_id: string, task_id?: string | null, metric_category?: MetricCategory | null, score?: EvaluationScore | null): void;
|
|
140
|
+
emit_evaluation_failed_event(agent_role: string, agent_id: string, error: unknown, task_id?: string | null): void;
|
|
141
|
+
private appendEvaluationResult;
|
|
142
|
+
}
|
|
143
|
+
export declare class ToolSelectionEvaluator extends BaseEvaluator {
|
|
144
|
+
constructor(llm?: LLM | string | null);
|
|
145
|
+
get metricCategory(): MetricCategory;
|
|
146
|
+
evaluate(agent: unknown, executionTrace: Record<string, unknown>, _finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
147
|
+
}
|
|
148
|
+
export declare class ParameterExtractionEvaluator extends BaseEvaluator {
|
|
149
|
+
constructor(llm?: LLM | string | null);
|
|
150
|
+
get metricCategory(): MetricCategory;
|
|
151
|
+
evaluate(agent: unknown, executionTrace: Record<string, unknown>, _finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
152
|
+
}
|
|
153
|
+
export declare class ToolInvocationEvaluator extends BaseEvaluator {
|
|
154
|
+
constructor(llm?: LLM | string | null);
|
|
155
|
+
get metricCategory(): MetricCategory;
|
|
156
|
+
evaluate(agent: unknown, executionTrace: Record<string, unknown>, _finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
157
|
+
}
|
|
158
|
+
export declare const ReasoningPatternType: Readonly<{
|
|
159
|
+
readonly EFFICIENT: "efficient";
|
|
160
|
+
readonly LOOP: "loop";
|
|
161
|
+
readonly VERBOSE: "verbose";
|
|
162
|
+
readonly INDECISIVE: "indecisive";
|
|
163
|
+
readonly SCATTERED: "scattered";
|
|
164
|
+
}>;
|
|
165
|
+
export type ReasoningPatternType = typeof ReasoningPatternType[keyof typeof ReasoningPatternType];
|
|
166
|
+
export declare class ReasoningEfficiencyEvaluator extends BaseEvaluator {
|
|
167
|
+
constructor(llm?: LLM | string | null);
|
|
168
|
+
get metricCategory(): MetricCategory;
|
|
169
|
+
evaluate(agent: unknown, executionTrace: Record<string, unknown>, finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
170
|
+
_detect_loops(llmCalls: readonly unknown[]): [boolean, Array<Record<string, unknown>>];
|
|
171
|
+
_calculate_text_similarity(text1: string, text2: string): number;
|
|
172
|
+
_analyze_reasoning_patterns(llmCalls: readonly unknown[]): {
|
|
173
|
+
primary_pattern: ReasoningPatternType;
|
|
174
|
+
details: string;
|
|
175
|
+
metrics: {
|
|
176
|
+
avg_length: number;
|
|
177
|
+
std_length: number;
|
|
178
|
+
length_trend: number;
|
|
179
|
+
loop_score: number;
|
|
180
|
+
};
|
|
181
|
+
};
|
|
182
|
+
_calculate_trend(values: readonly number[]): number;
|
|
183
|
+
_calculate_loop_likelihood(callLengths: readonly number[], responseTimes?: readonly number[]): number;
|
|
184
|
+
_get_call_samples(llmCalls: readonly unknown[]): string;
|
|
185
|
+
}
|
|
186
|
+
export declare class GoalAlignmentEvaluator extends BaseEvaluator {
|
|
187
|
+
get metricCategory(): MetricCategory;
|
|
188
|
+
evaluate(agent: unknown, _executionTrace: Record<string, unknown>, finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
189
|
+
}
|
|
190
|
+
export declare class SemanticQualityEvaluator extends BaseEvaluator {
|
|
191
|
+
get metricCategory(): MetricCategory;
|
|
192
|
+
evaluate(agent: unknown, _executionTrace: Record<string, unknown>, finalOutput: unknown, task?: Task | null): EvaluationScore | Promise<EvaluationScore>;
|
|
193
|
+
}
|
|
194
|
+
export declare function create_default_evaluator(llm?: LLM | string | null): BaseEvaluator[];
|
|
195
|
+
export declare class EvaluationDisplayFormatter {
|
|
196
|
+
aggregateAgentResults(options: {
|
|
197
|
+
agentId?: string;
|
|
198
|
+
agent_id?: string;
|
|
199
|
+
agentRole?: string;
|
|
200
|
+
agent_role?: string;
|
|
201
|
+
results: readonly AgentEvaluationResult[];
|
|
202
|
+
strategy?: AggregationStrategy;
|
|
203
|
+
}): AgentAggregatedEvaluationResult;
|
|
204
|
+
_aggregate_agent_results(agentId: string, agentRole: string, results: readonly AgentEvaluationResult[], strategy?: AggregationStrategy): AgentAggregatedEvaluationResult;
|
|
205
|
+
display_evaluation_with_feedback(iterationsResults: Record<number, Record<string, AgentEvaluationResult[]>>): string;
|
|
206
|
+
display_summary_results(iterationsResults: Record<number, Record<string, AgentEvaluationResult[]>>): string;
|
|
207
|
+
private summarizeFeedbacks;
|
|
208
|
+
_summarize_feedbacks(_agentRole: string, _metric: string, feedbacks: readonly string[], _scores?: readonly (number | null)[], _strategy?: AggregationStrategy): string;
|
|
209
|
+
private formatIterations;
|
|
210
|
+
}
|
|
211
|
+
export declare class EvaluationTraceCallback {
|
|
212
|
+
readonly traces: Record<string, Record<string, unknown>>;
|
|
213
|
+
current_agent_id: string | null;
|
|
214
|
+
current_task_id: string | null;
|
|
215
|
+
current_llm_call: Record<string, unknown>;
|
|
216
|
+
private unsubscribeHandlers;
|
|
217
|
+
setupListeners(eventBus?: EventBus): void;
|
|
218
|
+
setup_listeners(eventBus?: EventBus): void;
|
|
219
|
+
disposeListeners(): void;
|
|
220
|
+
dispose_listeners(): void;
|
|
221
|
+
on_agent_started(_source: unknown, event: unknown): void;
|
|
222
|
+
on_lite_agent_started(_source: unknown, event: unknown): void;
|
|
223
|
+
on_agent_completed(_source: unknown, event: unknown): void;
|
|
224
|
+
on_lite_agent_completed(_source: unknown, event: unknown): void;
|
|
225
|
+
on_tool_completed(_source: unknown, event: unknown): void;
|
|
226
|
+
on_tool_usage_error(_source: unknown, event: unknown): void;
|
|
227
|
+
on_tool_execution_error(_source: unknown, event: unknown): void;
|
|
228
|
+
on_tool_selection_error(_source: unknown, event: unknown): void;
|
|
229
|
+
on_tool_validate_input_error(_source: unknown, event: unknown): void;
|
|
230
|
+
on_llm_call_started(_source: unknown, event: unknown): void;
|
|
231
|
+
on_llm_call_completed(_source: unknown, event: unknown): void;
|
|
232
|
+
on_agent_start(agent: unknown, task: unknown): void;
|
|
233
|
+
_init_trace(trace_key: string, trace?: Record<string, unknown>): void;
|
|
234
|
+
on_lite_agent_start(agentInfo: Record<string, unknown>): void;
|
|
235
|
+
on_agent_finish(agent: unknown, task: unknown, output: unknown): void;
|
|
236
|
+
_reset_current(): void;
|
|
237
|
+
on_lite_agent_finish(output: unknown): void;
|
|
238
|
+
get_trace(agent_id: string, task_id: string): Record<string, unknown>;
|
|
239
|
+
on_tool_use(tool: string, args: unknown, result: unknown, options?: {
|
|
240
|
+
success?: boolean;
|
|
241
|
+
error_type?: string | null;
|
|
242
|
+
}): void;
|
|
243
|
+
on_llm_call_start(messages: unknown, tools?: readonly Record<string, unknown>[] | null): void;
|
|
244
|
+
on_llm_call_end(messages: unknown, response: unknown, usage?: Record<string, unknown> | null): void;
|
|
245
|
+
private initTrace;
|
|
246
|
+
private resetCurrent;
|
|
247
|
+
private recordToolError;
|
|
248
|
+
}
|
|
249
|
+
export declare function create_evaluation_callbacks(): EvaluationTraceCallback;
|
|
250
|
+
export declare class ExperimentResult {
|
|
251
|
+
readonly identifier: string;
|
|
252
|
+
readonly inputs: Record<string, unknown>;
|
|
253
|
+
readonly score: number | Record<string, number>;
|
|
254
|
+
readonly expected_score: number | Record<string, number>;
|
|
255
|
+
readonly passed: boolean;
|
|
256
|
+
readonly agent_evaluations: Record<string, unknown> | null;
|
|
257
|
+
constructor(options: {
|
|
258
|
+
identifier: string;
|
|
259
|
+
inputs?: Record<string, unknown>;
|
|
260
|
+
score: number | Record<string, number>;
|
|
261
|
+
expected_score?: number | Record<string, number>;
|
|
262
|
+
expectedScore?: number | Record<string, number>;
|
|
263
|
+
passed?: boolean;
|
|
264
|
+
agent_evaluations?: Record<string, unknown> | null;
|
|
265
|
+
agentEvaluations?: Record<string, unknown> | null;
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
export declare class ExperimentResults {
|
|
269
|
+
readonly results: ExperimentResult[];
|
|
270
|
+
readonly metadata: Record<string, unknown>;
|
|
271
|
+
readonly timestamp: Date;
|
|
272
|
+
readonly display: ExperimentResultsDisplay;
|
|
273
|
+
constructor(results: readonly ExperimentResult[], metadata?: Record<string, unknown>);
|
|
274
|
+
to_json(filepath?: string | null): Record<string, unknown>;
|
|
275
|
+
compare_with_baseline(baseline_filepath: string, save_current?: boolean, print_summary?: boolean): Record<string, unknown>;
|
|
276
|
+
compareWithBaseline(baselineFilepath: string, saveCurrent?: boolean, printSummary?: boolean): Record<string, unknown>;
|
|
277
|
+
_compare_with_run(baselineRun: Record<string, unknown>): Record<string, unknown>;
|
|
278
|
+
compareWithRun(baselineRun: Record<string, unknown>): Record<string, unknown>;
|
|
279
|
+
}
|
|
280
|
+
export type ExperimentScore = number | Record<string, number>;
|
|
281
|
+
export declare class ExperimentRunner {
|
|
282
|
+
readonly dataset: Record<string, unknown>[];
|
|
283
|
+
evaluator: AgentEvaluator | null;
|
|
284
|
+
readonly display: ExperimentResultsDisplay;
|
|
285
|
+
constructor(dataset?: readonly Record<string, unknown>[]);
|
|
286
|
+
run(crewOrOptions?: {
|
|
287
|
+
crew?: {
|
|
288
|
+
agents?: readonly unknown[];
|
|
289
|
+
kickoff?: (options: {
|
|
290
|
+
inputs: Record<string, unknown>;
|
|
291
|
+
}) => unknown;
|
|
292
|
+
} | null;
|
|
293
|
+
agents?: readonly unknown[] | null;
|
|
294
|
+
print_summary?: boolean;
|
|
295
|
+
printSummary?: boolean;
|
|
296
|
+
} | {
|
|
297
|
+
agents?: readonly unknown[];
|
|
298
|
+
kickoff?: (options: {
|
|
299
|
+
inputs: Record<string, unknown>;
|
|
300
|
+
}) => unknown;
|
|
301
|
+
} | null, agentsArg?: readonly unknown[] | null, print_summary?: boolean): ExperimentResults;
|
|
302
|
+
_run_test_case(test_case: Record<string, unknown>, agents: readonly unknown[], crew?: {
|
|
303
|
+
kickoff?: (options: {
|
|
304
|
+
inputs: Record<string, unknown>;
|
|
305
|
+
}) => unknown;
|
|
306
|
+
} | null): ExperimentResult;
|
|
307
|
+
_extract_scores(agent_evaluations: Record<string, AgentAggregatedEvaluationResult>): ExperimentScore;
|
|
308
|
+
_assert_scores(expected: ExperimentScore, actual: ExperimentScore): boolean;
|
|
309
|
+
}
|
|
310
|
+
export type ExperimentResultsDisplayConsole = {
|
|
311
|
+
print?: (value: string) => void;
|
|
312
|
+
log?: (value: string) => void;
|
|
313
|
+
};
|
|
314
|
+
export declare class ExperimentResultsDisplay {
|
|
315
|
+
readonly console: ExperimentResultsDisplayConsole;
|
|
316
|
+
constructor(options?: {
|
|
317
|
+
console?: ExperimentResultsDisplayConsole;
|
|
318
|
+
});
|
|
319
|
+
summary(experiment_results: ExperimentResults): string;
|
|
320
|
+
comparison_summary(comparison: Record<string, unknown>, baseline_timestamp: string): string;
|
|
321
|
+
comparisonSummary(comparison: Record<string, unknown>, baselineTimestamp: string): string;
|
|
322
|
+
private formatComparisonRow;
|
|
323
|
+
private print;
|
|
324
|
+
}
|
|
325
|
+
export declare function assert_experiment_no_regression(comparison_result: Record<string, unknown>): void;
|
|
326
|
+
export declare function _get_baseline_filepath_fallback(): string;
|
|
327
|
+
export declare function assert_experiment_successfully(experiment_results: ExperimentResults, baseline_filepath?: string): void;
|
|
328
|
+
export declare function run_experiment(dataset: readonly Record<string, unknown>[], crew?: {
|
|
329
|
+
agents?: readonly unknown[];
|
|
330
|
+
kickoff?: (options: {
|
|
331
|
+
inputs: Record<string, unknown>;
|
|
332
|
+
}) => unknown;
|
|
333
|
+
} | null, agents?: readonly unknown[] | null, verbose?: boolean): ExperimentResults;
|
|
334
|
+
export type Entity = {
|
|
335
|
+
name: string;
|
|
336
|
+
type: string;
|
|
337
|
+
description: string;
|
|
338
|
+
relationships: string[];
|
|
339
|
+
};
|
|
340
|
+
export type TaskEvaluation = {
|
|
341
|
+
suggestions: string[];
|
|
342
|
+
quality: number;
|
|
343
|
+
entities: Entity[];
|
|
344
|
+
};
|
|
345
|
+
export type TrainingTaskEvaluation = {
|
|
346
|
+
suggestions: string[];
|
|
347
|
+
quality: number;
|
|
348
|
+
final_summary?: string;
|
|
349
|
+
finalSummary?: string;
|
|
350
|
+
};
|
|
351
|
+
export type TaskEvaluationPydanticOutput = {
|
|
352
|
+
quality: number;
|
|
353
|
+
};
|
|
354
|
+
export declare const Entity: {
|
|
355
|
+
new (options: {
|
|
356
|
+
readonly name: string;
|
|
357
|
+
readonly type: string;
|
|
358
|
+
readonly description: string;
|
|
359
|
+
readonly relationships: string[];
|
|
360
|
+
}): {
|
|
361
|
+
readonly name: string;
|
|
362
|
+
readonly type: string;
|
|
363
|
+
readonly description: string;
|
|
364
|
+
readonly relationships: string[];
|
|
365
|
+
};
|
|
366
|
+
};
|
|
367
|
+
export declare const TaskEvaluation: {
|
|
368
|
+
new (options: {
|
|
369
|
+
readonly suggestions: string[];
|
|
370
|
+
readonly quality: number;
|
|
371
|
+
readonly entities: Entity[];
|
|
372
|
+
}): {
|
|
373
|
+
readonly suggestions: string[];
|
|
374
|
+
readonly quality: number;
|
|
375
|
+
readonly entities: Entity[];
|
|
376
|
+
};
|
|
377
|
+
};
|
|
378
|
+
export declare const TrainingTaskEvaluation: {
|
|
379
|
+
new (options: {
|
|
380
|
+
suggestions: string[];
|
|
381
|
+
quality: number;
|
|
382
|
+
final_summary?: string;
|
|
383
|
+
finalSummary?: string;
|
|
384
|
+
}): {
|
|
385
|
+
readonly suggestions: string[];
|
|
386
|
+
readonly quality: number;
|
|
387
|
+
readonly final_summary: string;
|
|
388
|
+
readonly finalSummary: string;
|
|
389
|
+
};
|
|
390
|
+
};
|
|
391
|
+
export declare const TaskEvaluationPydanticOutput: {
|
|
392
|
+
new (options: {
|
|
393
|
+
readonly quality: number;
|
|
394
|
+
}): {
|
|
395
|
+
readonly quality: number;
|
|
396
|
+
};
|
|
397
|
+
};
|
|
398
|
+
export type EvaluationAgentLike = {
|
|
399
|
+
llm: LLM | string | null;
|
|
400
|
+
};
|
|
401
|
+
export declare const taskEvaluationModel: StructuredModel<TaskEvaluation>;
|
|
402
|
+
export declare const trainingTaskEvaluationModel: StructuredModel<TrainingTaskEvaluation>;
|
|
403
|
+
export declare const taskEvaluationPydanticOutputModel: StructuredModel<TaskEvaluationPydanticOutput>;
|
|
404
|
+
export declare class TaskEvaluator {
|
|
405
|
+
readonly llm: LLM | string | null;
|
|
406
|
+
readonly originalAgent: EvaluationAgentLike;
|
|
407
|
+
readonly original_agent: EvaluationAgentLike;
|
|
408
|
+
constructor(originalAgentOrOptions: EvaluationAgentLike | {
|
|
409
|
+
originalAgent?: EvaluationAgentLike;
|
|
410
|
+
original_agent?: EvaluationAgentLike;
|
|
411
|
+
});
|
|
412
|
+
evaluate(task: Task, output: string): Promise<TaskEvaluation>;
|
|
413
|
+
evaluate_training_data(trainingData: Record<string, unknown>, agentId: string): Promise<TrainingTaskEvaluation>;
|
|
414
|
+
evaluateTrainingData(trainingData: Record<string, unknown>, agentId: string): Promise<TrainingTaskEvaluation>;
|
|
415
|
+
}
|
|
416
|
+
export type CrewLikeForEvaluation = {
|
|
417
|
+
name?: string | null;
|
|
418
|
+
tasks: Task[];
|
|
419
|
+
taskCallback?: TaskCallback | null;
|
|
420
|
+
};
|
|
421
|
+
export declare class CrewEvaluator {
|
|
422
|
+
readonly crew: CrewLikeForEvaluation;
|
|
423
|
+
readonly llm: LLM | string | null;
|
|
424
|
+
tasksScores: Record<number, number[]>;
|
|
425
|
+
tasks_scores: Record<number, number[]>;
|
|
426
|
+
runExecutionTimes: Record<number, number[]>;
|
|
427
|
+
run_execution_times: Record<number, number[]>;
|
|
428
|
+
iteration: number;
|
|
429
|
+
private readonly previousTaskCallback;
|
|
430
|
+
constructor(crewOrOptions: CrewLikeForEvaluation | {
|
|
431
|
+
crew: CrewLikeForEvaluation;
|
|
432
|
+
evalLlm?: LLM | string | null;
|
|
433
|
+
eval_llm?: LLM | string | null;
|
|
434
|
+
openaiModelName?: string | null;
|
|
435
|
+
openai_model_name?: string | null;
|
|
436
|
+
llm?: LLM | string | null;
|
|
437
|
+
}, evalLlm?: LLM | string | null);
|
|
438
|
+
setIteration(iteration: number): void;
|
|
439
|
+
set_iteration(iteration: number): void;
|
|
440
|
+
evaluate(taskOutput: TaskOutput): Promise<void>;
|
|
441
|
+
printCrewEvaluationResult(): string;
|
|
442
|
+
print_crew_evaluation_result(): string;
|
|
443
|
+
private setupForEvaluating;
|
|
444
|
+
private evaluatorAgent;
|
|
445
|
+
static evaluationTask(evaluatorAgent: Agent, taskToEvaluate: Task, taskOutput: string): Task;
|
|
446
|
+
static _evaluation_task(evaluatorAgent: Agent, taskToEvaluate: Task, taskOutput: string): Task;
|
|
447
|
+
}
|
|
448
|
+
export declare const TaskEvaluationPydanticOutputModel: {
|
|
449
|
+
modelValidate?: (value: unknown) => TaskEvaluationPydanticOutput;
|
|
450
|
+
model_validate?: (value: unknown) => TaskEvaluationPydanticOutput;
|
|
451
|
+
modelValidateJson?: (value: string) => TaskEvaluationPydanticOutput;
|
|
452
|
+
model_validate_json?: (value: string) => TaskEvaluationPydanticOutput;
|
|
453
|
+
modelDump?: (value: TaskEvaluationPydanticOutput) => Record<string, unknown>;
|
|
454
|
+
model_dump?: (value: TaskEvaluationPydanticOutput) => Record<string, unknown>;
|
|
455
|
+
schema?: unknown;
|
|
456
|
+
name?: string;
|
|
457
|
+
};
|
|
458
|
+
export declare const TaskEvaluationModel: {
|
|
459
|
+
modelValidate?: (value: unknown) => TaskEvaluation;
|
|
460
|
+
model_validate?: (value: unknown) => TaskEvaluation;
|
|
461
|
+
modelValidateJson?: (value: string) => TaskEvaluation;
|
|
462
|
+
model_validate_json?: (value: string) => TaskEvaluation;
|
|
463
|
+
modelDump?: (value: TaskEvaluation) => Record<string, unknown>;
|
|
464
|
+
model_dump?: (value: TaskEvaluation) => Record<string, unknown>;
|
|
465
|
+
schema?: unknown;
|
|
466
|
+
name?: string;
|
|
467
|
+
};
|
|
468
|
+
export declare const TrainingTaskEvaluationModel: {
|
|
469
|
+
modelValidate?: (value: unknown) => TrainingTaskEvaluation;
|
|
470
|
+
model_validate?: (value: unknown) => TrainingTaskEvaluation;
|
|
471
|
+
modelValidateJson?: (value: string) => TrainingTaskEvaluation;
|
|
472
|
+
model_validate_json?: (value: string) => TrainingTaskEvaluation;
|
|
473
|
+
modelDump?: (value: TrainingTaskEvaluation) => Record<string, unknown>;
|
|
474
|
+
model_dump?: (value: TrainingTaskEvaluation) => Record<string, unknown>;
|
|
475
|
+
schema?: unknown;
|
|
476
|
+
name?: string;
|
|
477
|
+
};
|