@agentica/benchmark 0.44.0-dev.20260313-2 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,118 +1,118 @@
1
- /**
2
- * @module
3
- * This file contains the implementation of the IAgenticaCallBenchmarkEvent class.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type { AgenticaHistory, AgenticaTokenUsage } from "@agentica/core";
8
-
9
- import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
10
-
11
- /**
12
- * Event of LLM function selection benchmark.
13
- *
14
- * `IAgenticaCallBenchmarkEvent` is an union type of the events occurred
15
- * during the LLM function calling benchmark, representing one phase of
16
- * the benchmark testing about a scenario.
17
- *
18
- * In other words, when {@link AgenticaCallBenchmark} executes the
19
- * benchmark, it will run the benchmark will test a scenario repeately with
20
- * the given configuration {@link AgenticaCallBenchmark.IConfig.repeat}.
21
- * And in the repeated benchmark about a scenario,
22
- * `IAgenticaCallBenchmarkEvent` is one of the repeated testing.
23
- *
24
- * For reference, `IAgenticaCallBenchmarkEvent` is categorized into three
25
- * types: `success`, `failure`, and `error`. The `success` means the
26
- * benchmark testing is fully meet the expected scenario, and `failure`
27
- * means that the `selector` or `caller` agents had not selected or
28
- * called the expected operations. The last type, `error`, means that
29
- * an error had been occurred during the benchmark testing.
30
- *
31
- * @author Samchon
32
- */
33
- export type IAgenticaCallBenchmarkEvent
34
- = | IAgenticaCallBenchmarkEvent.ISuccess
35
- | IAgenticaCallBenchmarkEvent.IFailure
36
- | IAgenticaCallBenchmarkEvent.IError;
37
- export namespace IAgenticaCallBenchmarkEvent {
38
- /**
39
- * Success event type.
40
- *
41
- * The `success` event type represents that the benchmark
42
- * testing is fully meet the expected scenario.
43
- */
44
- export interface ISuccess
45
- extends IEventBase<"success"> {
46
- /**
47
- * Whether succeeded to function selection.
48
- */
49
- select: true;
50
-
51
- /**
52
- * Whether succeeded to function call.
53
- */
54
- call: true;
55
- }
56
-
57
- /**
58
- * Failure event type.
59
- *
60
- * The `failure` event type represents that the `selector`
61
- * or `caller` agents have not selected or called following the
62
- * expected scenario in the benchmark testing.
63
- */
64
- export interface IFailure
65
- extends IEventBase<"failure"> {
66
- /**
67
- * Whether succeeded to function selection.
68
- */
69
- select: boolean;
70
-
71
- /**
72
- * Whether succeeded to function call.
73
- */
74
- call: boolean;
75
- }
76
-
77
- export interface IError
78
- extends IEventBase<"error"> {
79
- /**
80
- * Error occurred during the benchmark.
81
- */
82
- error: unknown;
83
- }
84
-
85
- interface IEventBase<Type extends string> {
86
- /**
87
- * Discriminant type.
88
- */
89
- type: Type;
90
-
91
- /**
92
- * Expected scenario.
93
- */
94
- scenario: IAgenticaCallBenchmarkScenario;
95
-
96
- /**
97
- * Prompt histories.
98
- *
99
- * List of prompts occurred during the benchmark testing.
100
- */
101
- prompts: AgenticaHistory[];
102
-
103
- /**
104
- * Usage of the token during the benchmark.
105
- */
106
- usage: AgenticaTokenUsage;
107
-
108
- /**
109
- * When the benchmark testing started.
110
- */
111
- started_at: Date;
112
-
113
- /**
114
- * When the benchmark testing completed.
115
- */
116
- completed_at: Date;
117
- }
118
- }
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the IAgenticaCallBenchmarkEvent class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { AgenticaHistory, AgenticaTokenUsage } from "@agentica/core";
8
+
9
+ import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
10
+
11
+ /**
12
+ * Event of LLM function selection benchmark.
13
+ *
14
+ * `IAgenticaCallBenchmarkEvent` is an union type of the events occurred
15
+ * during the LLM function calling benchmark, representing one phase of
16
+ * the benchmark testing about a scenario.
17
+ *
18
+ * In other words, when {@link AgenticaCallBenchmark} executes the
19
+ * benchmark, it will run the benchmark will test a scenario repeately with
20
+ * the given configuration {@link AgenticaCallBenchmark.IConfig.repeat}.
21
+ * And in the repeated benchmark about a scenario,
22
+ * `IAgenticaCallBenchmarkEvent` is one of the repeated testing.
23
+ *
24
+ * For reference, `IAgenticaCallBenchmarkEvent` is categorized into three
25
+ * types: `success`, `failure`, and `error`. The `success` means the
26
+ * benchmark testing is fully meet the expected scenario, and `failure`
27
+ * means that the `selector` or `caller` agents had not selected or
28
+ * called the expected operations. The last type, `error`, means that
29
+ * an error had been occurred during the benchmark testing.
30
+ *
31
+ * @author Samchon
32
+ */
33
+ export type IAgenticaCallBenchmarkEvent
34
+ = | IAgenticaCallBenchmarkEvent.ISuccess
35
+ | IAgenticaCallBenchmarkEvent.IFailure
36
+ | IAgenticaCallBenchmarkEvent.IError;
37
+ export namespace IAgenticaCallBenchmarkEvent {
38
+ /**
39
+ * Success event type.
40
+ *
41
+ * The `success` event type represents that the benchmark
42
+ * testing is fully meet the expected scenario.
43
+ */
44
+ export interface ISuccess
45
+ extends IEventBase<"success"> {
46
+ /**
47
+ * Whether succeeded to function selection.
48
+ */
49
+ select: true;
50
+
51
+ /**
52
+ * Whether succeeded to function call.
53
+ */
54
+ call: true;
55
+ }
56
+
57
+ /**
58
+ * Failure event type.
59
+ *
60
+ * The `failure` event type represents that the `selector`
61
+ * or `caller` agents have not selected or called following the
62
+ * expected scenario in the benchmark testing.
63
+ */
64
+ export interface IFailure
65
+ extends IEventBase<"failure"> {
66
+ /**
67
+ * Whether succeeded to function selection.
68
+ */
69
+ select: boolean;
70
+
71
+ /**
72
+ * Whether succeeded to function call.
73
+ */
74
+ call: boolean;
75
+ }
76
+
77
+ export interface IError
78
+ extends IEventBase<"error"> {
79
+ /**
80
+ * Error occurred during the benchmark.
81
+ */
82
+ error: unknown;
83
+ }
84
+
85
+ interface IEventBase<Type extends string> {
86
+ /**
87
+ * Discriminant type.
88
+ */
89
+ type: Type;
90
+
91
+ /**
92
+ * Expected scenario.
93
+ */
94
+ scenario: IAgenticaCallBenchmarkScenario;
95
+
96
+ /**
97
+ * Prompt histories.
98
+ *
99
+ * List of prompts occurred during the benchmark testing.
100
+ */
101
+ prompts: AgenticaHistory[];
102
+
103
+ /**
104
+ * Usage of the token during the benchmark.
105
+ */
106
+ usage: AgenticaTokenUsage;
107
+
108
+ /**
109
+ * When the benchmark testing started.
110
+ */
111
+ started_at: Date;
112
+
113
+ /**
114
+ * When the benchmark testing completed.
115
+ */
116
+ completed_at: Date;
117
+ }
118
+ }
@@ -1,75 +1,75 @@
1
- /**
2
- * @module
3
- * This file contains the implementation of the IAgenticaCallBenchmarkResult class.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type { AgenticaTokenUsage } from "@agentica/core";
8
-
9
- import type { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
10
- import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
11
-
12
- /**
13
- * Result of the LLM function calling benchmark.
14
- *
15
- * `IAgenticaCallBenchmarkResult` is a structure representing the result
16
- * of the LLM function calling benchmark executed by the
17
- * {@link AgenticaCallBenchmark.execute execute} function.
18
- *
19
- * It contains every experiment results for each scenario, and aggregated
20
- * LLM token cost in the benchmark process.
21
- *
22
- * In each scenario, as the benchmark program experiments multiple times
23
- * about a scenario, it will contain multiple events. Also, because of the
24
- * characteristics of the LLM which is not predictable, the result can be
25
- * different in each event.
26
- *
27
- * @author Samchon
28
- */
29
- export interface IAgenticaCallBenchmarkResult {
30
- /**
31
- * Experiments for each scenario.
32
- */
33
- experiments: IAgenticaCallBenchmarkResult.IExperiment[];
34
-
35
- /**
36
- * Aggregated token usage information.
37
- */
38
- usage: AgenticaTokenUsage;
39
-
40
- /**
41
- * Start time of the benchmark.
42
- */
43
- started_at: Date;
44
-
45
- /**
46
- * End time of the benchmark.
47
- */
48
- completed_at: Date;
49
- }
50
- export namespace IAgenticaCallBenchmarkResult {
51
- /**
52
- * Experiment result about a scenario.
53
- */
54
- export interface IExperiment {
55
- /**
56
- * Scenario of the experiment.
57
- */
58
- scenario: IAgenticaCallBenchmarkScenario;
59
-
60
- /**
61
- * Events occurred during the benchmark in the scenario.
62
- *
63
- * When benchmarking a scenario, {@link AgenticaCallBenchmark} will
64
- * test a scenario multiple times with the given
65
- * {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
66
- * And the event is one of the repeated benchmark results.
67
- */
68
- events: IAgenticaCallBenchmarkEvent[];
69
-
70
- /**
71
- * LLM token usage information.
72
- */
73
- usage: AgenticaTokenUsage;
74
- }
75
- }
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the IAgenticaCallBenchmarkResult class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { AgenticaTokenUsage } from "@agentica/core";
8
+
9
+ import type { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
10
+ import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
11
+
12
+ /**
13
+ * Result of the LLM function calling benchmark.
14
+ *
15
+ * `IAgenticaCallBenchmarkResult` is a structure representing the result
16
+ * of the LLM function calling benchmark executed by the
17
+ * {@link AgenticaCallBenchmark.execute execute} function.
18
+ *
19
+ * It contains every experiment results for each scenario, and aggregated
20
+ * LLM token cost in the benchmark process.
21
+ *
22
+ * In each scenario, as the benchmark program experiments multiple times
23
+ * about a scenario, it will contain multiple events. Also, because of the
24
+ * characteristics of the LLM which is not predictable, the result can be
25
+ * different in each event.
26
+ *
27
+ * @author Samchon
28
+ */
29
+ export interface IAgenticaCallBenchmarkResult {
30
+ /**
31
+ * Experiments for each scenario.
32
+ */
33
+ experiments: IAgenticaCallBenchmarkResult.IExperiment[];
34
+
35
+ /**
36
+ * Aggregated token usage information.
37
+ */
38
+ usage: AgenticaTokenUsage;
39
+
40
+ /**
41
+ * Start time of the benchmark.
42
+ */
43
+ started_at: Date;
44
+
45
+ /**
46
+ * End time of the benchmark.
47
+ */
48
+ completed_at: Date;
49
+ }
50
+ export namespace IAgenticaCallBenchmarkResult {
51
+ /**
52
+ * Experiment result about a scenario.
53
+ */
54
+ export interface IExperiment {
55
+ /**
56
+ * Scenario of the experiment.
57
+ */
58
+ scenario: IAgenticaCallBenchmarkScenario;
59
+
60
+ /**
61
+ * Events occurred during the benchmark in the scenario.
62
+ *
63
+ * When benchmarking a scenario, {@link AgenticaCallBenchmark} will
64
+ * test a scenario multiple times with the given
65
+ * {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
66
+ * And the event is one of the repeated benchmark results.
67
+ */
68
+ events: IAgenticaCallBenchmarkEvent[];
69
+
70
+ /**
71
+ * LLM token usage information.
72
+ */
73
+ usage: AgenticaTokenUsage;
74
+ }
75
+ }
@@ -1,45 +1,45 @@
1
- /**
2
- * @module
3
- * This file contains the implementation of the IAgenticaCallBenchmarkScenario class.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
8
-
9
- /**
10
- * Scenario of function calling.
11
- *
12
- * `IAgenticaCallBenchmarkScenario` is a data structure which
13
- * represents a function calling benchmark scenario. It contains two
14
- * properties; {@linkk text} and {@link operations}.
15
- *
16
- * The {@link text} means the conversation text from the user, and
17
- * the other {@link operations} are the expected operations that
18
- * should be selected by the `caller` agent through the {@link text}
19
- * conversation.
20
- *
21
- * @author Samchon
22
- */
23
- export interface IAgenticaCallBenchmarkScenario {
24
- /**
25
- * Name of the scenario.
26
- *
27
- * It must be unique within the benchmark scenarios.
28
- */
29
- name: string;
30
-
31
- /**
32
- * The prompt text from user.
33
- */
34
- text: string;
35
-
36
- /**
37
- * Expected function calling sequence.
38
- *
39
- * Sequence of operations (API operation or class function) that
40
- * should be called by both `selector` and `caller` agents from
41
- * the user's {@link text} conversation for the LLM
42
- * (Large Language Model) function calling.
43
- */
44
- expected: IAgenticaBenchmarkExpected;
45
- }
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the IAgenticaCallBenchmarkScenario class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
8
+
9
+ /**
10
+ * Scenario of function calling.
11
+ *
12
+ * `IAgenticaCallBenchmarkScenario` is a data structure which
13
+ * represents a function calling benchmark scenario. It contains two
14
+ * properties; {@linkk text} and {@link operations}.
15
+ *
16
+ * The {@link text} means the conversation text from the user, and
17
+ * the other {@link operations} are the expected operations that
18
+ * should be selected by the `caller` agent through the {@link text}
19
+ * conversation.
20
+ *
21
+ * @author Samchon
22
+ */
23
+ export interface IAgenticaCallBenchmarkScenario {
24
+ /**
25
+ * Name of the scenario.
26
+ *
27
+ * It must be unique within the benchmark scenarios.
28
+ */
29
+ name: string;
30
+
31
+ /**
32
+ * The prompt text from user.
33
+ */
34
+ text: string;
35
+
36
+ /**
37
+ * Expected function calling sequence.
38
+ *
39
+ * Sequence of operations (API operation or class function) that
40
+ * should be called by both `selector` and `caller` agents from
41
+ * the user's {@link text} conversation for the LLM
42
+ * (Large Language Model) function calling.
43
+ */
44
+ expected: IAgenticaBenchmarkExpected;
45
+ }