@agentica/benchmark 0.44.0-dev.20260313-2 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,122 +1,122 @@
1
- /**
2
- * @module
3
- * This file contains the implementation of the IAgenticaSelectBenchmarkEvent class.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type {
8
- AgenticaAssistantMessageHistory,
9
- AgenticaOperationSelection,
10
- AgenticaTokenUsage,
11
- } from "@agentica/core";
12
-
13
- import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
14
-
15
- /**
16
- * Event of LLM function selection benchmark.
17
- *
18
- * `IAgenticaSelectBenchmarkEvent` is an union type of the events occurred
19
- * during the LLM function selection benchmark, representing one phase of
20
- * the benchmark testing about a scenario.
21
- *
22
- * In other words, when {@link AgenticaSelectBenchmark} executes the
23
- * benchmark, it will run the benchmark will test a scenario repeately with
24
- * the given configuration {@link AgenticaSelectBenchmark.IConfig.repeat}.
25
- * And in the repeated benchmark about a scenario,
26
- * `IAgenticaSelectBenchmarkEvent` is one of the repeated testing.
27
- *
28
- * For reference, `IAgenticaSelectBenchmarkEvent` is categorized into three
29
- * types: `success`, `failure`, and `error`. The `success` means the
30
- * benchmark testing is fully meet the expected scenario, and `failure`
31
- * means that the `selector` had not selected the expected operations. The
32
- * last type, `error`, means that an error had been occurred during the
33
- * benchmark testing.
34
- *
35
- * @author Samchon
36
- */
37
- export type IAgenticaSelectBenchmarkEvent
38
- = | IAgenticaSelectBenchmarkEvent.ISuccess
39
- | IAgenticaSelectBenchmarkEvent.IFailure
40
- | IAgenticaSelectBenchmarkEvent.IError;
41
- export namespace IAgenticaSelectBenchmarkEvent {
42
- /**
43
- * Success event type.
44
- *
45
- * The `success` event type represents that the benchmark testing is
46
- * fully meet the expected scenario.
47
- */
48
- export interface ISuccess extends IEventBase<"success"> {
49
- /**
50
- * Usage of the token during the benchmark.
51
- */
52
- usage: AgenticaTokenUsage;
53
-
54
- /**
55
- * Selected operations in the benchmark.
56
- */
57
- selected: AgenticaOperationSelection[];
58
-
59
- /**
60
- * Prompt messages from the assistant.
61
- */
62
- assistantPrompts: AgenticaAssistantMessageHistory[];
63
- }
64
-
65
- /**
66
- * Failure event type.
67
- *
68
- * The `failure` event type represents that the `selector` had not
69
- * selected the expected scenario in the benchmark testing.
70
- */
71
- export interface IFailure extends IEventBase<"failure"> {
72
- /**
73
- * Usage of the token during the benchmark.
74
- */
75
- usage: AgenticaTokenUsage;
76
-
77
- /**
78
- * Selected operations in the benchmark.
79
- */
80
- selected: AgenticaOperationSelection[];
81
-
82
- /**
83
- * Prompt messages from the assistant.
84
- */
85
- assistantPrompts: AgenticaAssistantMessageHistory[];
86
- }
87
-
88
- /**
89
- * Error event type.
90
- *
91
- * The `error` event type repsents that an error had been occurred
92
- * during the benchmark testing.
93
- */
94
- export interface IError extends IEventBase<"error"> {
95
- /**
96
- * Error occurred during the benchmark.
97
- */
98
- error: unknown;
99
- }
100
-
101
- interface IEventBase<Type extends string> {
102
- /**
103
- * Discriminant type.
104
- */
105
- type: Type;
106
-
107
- /**
108
- * Expected scenario.
109
- */
110
- scenario: IAgenticaSelectBenchmarkScenario;
111
-
112
- /**
113
- * When the benchmark testing started.
114
- */
115
- started_at: Date;
116
-
117
- /**
118
- * When the benchmark testing completed.
119
- */
120
- completed_at: Date;
121
- }
122
- }
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the IAgenticaSelectBenchmarkEvent class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type {
8
+ AgenticaAssistantMessageHistory,
9
+ AgenticaOperationSelection,
10
+ AgenticaTokenUsage,
11
+ } from "@agentica/core";
12
+
13
+ import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
14
+
15
+ /**
16
+ * Event of LLM function selection benchmark.
17
+ *
18
+ * `IAgenticaSelectBenchmarkEvent` is an union type of the events occurred
19
+ * during the LLM function selection benchmark, representing one phase of
20
+ * the benchmark testing about a scenario.
21
+ *
22
+ * In other words, when {@link AgenticaSelectBenchmark} executes the
23
+ * benchmark, it will run the benchmark will test a scenario repeately with
24
+ * the given configuration {@link AgenticaSelectBenchmark.IConfig.repeat}.
25
+ * And in the repeated benchmark about a scenario,
26
+ * `IAgenticaSelectBenchmarkEvent` is one of the repeated testing.
27
+ *
28
+ * For reference, `IAgenticaSelectBenchmarkEvent` is categorized into three
29
+ * types: `success`, `failure`, and `error`. The `success` means the
30
+ * benchmark testing is fully meet the expected scenario, and `failure`
31
+ * means that the `selector` had not selected the expected operations. The
32
+ * last type, `error`, means that an error had been occurred during the
33
+ * benchmark testing.
34
+ *
35
+ * @author Samchon
36
+ */
37
+ export type IAgenticaSelectBenchmarkEvent
38
+ = | IAgenticaSelectBenchmarkEvent.ISuccess
39
+ | IAgenticaSelectBenchmarkEvent.IFailure
40
+ | IAgenticaSelectBenchmarkEvent.IError;
41
+ export namespace IAgenticaSelectBenchmarkEvent {
42
+ /**
43
+ * Success event type.
44
+ *
45
+ * The `success` event type represents that the benchmark testing is
46
+ * fully meet the expected scenario.
47
+ */
48
+ export interface ISuccess extends IEventBase<"success"> {
49
+ /**
50
+ * Usage of the token during the benchmark.
51
+ */
52
+ usage: AgenticaTokenUsage;
53
+
54
+ /**
55
+ * Selected operations in the benchmark.
56
+ */
57
+ selected: AgenticaOperationSelection[];
58
+
59
+ /**
60
+ * Prompt messages from the assistant.
61
+ */
62
+ assistantPrompts: AgenticaAssistantMessageHistory[];
63
+ }
64
+
65
+ /**
66
+ * Failure event type.
67
+ *
68
+ * The `failure` event type represents that the `selector` had not
69
+ * selected the expected scenario in the benchmark testing.
70
+ */
71
+ export interface IFailure extends IEventBase<"failure"> {
72
+ /**
73
+ * Usage of the token during the benchmark.
74
+ */
75
+ usage: AgenticaTokenUsage;
76
+
77
+ /**
78
+ * Selected operations in the benchmark.
79
+ */
80
+ selected: AgenticaOperationSelection[];
81
+
82
+ /**
83
+ * Prompt messages from the assistant.
84
+ */
85
+ assistantPrompts: AgenticaAssistantMessageHistory[];
86
+ }
87
+
88
+ /**
89
+ * Error event type.
90
+ *
91
+ * The `error` event type repsents that an error had been occurred
92
+ * during the benchmark testing.
93
+ */
94
+ export interface IError extends IEventBase<"error"> {
95
+ /**
96
+ * Error occurred during the benchmark.
97
+ */
98
+ error: unknown;
99
+ }
100
+
101
+ interface IEventBase<Type extends string> {
102
+ /**
103
+ * Discriminant type.
104
+ */
105
+ type: Type;
106
+
107
+ /**
108
+ * Expected scenario.
109
+ */
110
+ scenario: IAgenticaSelectBenchmarkScenario;
111
+
112
+ /**
113
+ * When the benchmark testing started.
114
+ */
115
+ started_at: Date;
116
+
117
+ /**
118
+ * When the benchmark testing completed.
119
+ */
120
+ completed_at: Date;
121
+ }
122
+ }
@@ -1,75 +1,75 @@
1
- /**
2
- * @module
3
- * This file contains the implementation of the IAgenticaSelectBenchmarkResult class.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type { AgenticaTokenUsage } from "@agentica/core";
8
-
9
- import type { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
10
- import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
11
-
12
- /**
13
- * Result of the LLM function selection benchmark.
14
- *
15
- * `IAgenticaSelectBenchmarkResult` is a structure representing the result
16
- * of the LLM function selection benchmark executed by the
17
- * {@link AgenticaSelectBenchmark.execute execute} function.
18
- *
19
- * It contains every experiment results for each scenario, and aggregated
20
- * LLM token cost in the benchmark process.
21
- *
22
- * In each scenario, as the benchmark program experiments multiple times
23
- * about a scenario, it will contain multiple events. Also, because of the
24
- * characteristics of the LLM which is not predictable, the result can be
25
- * different in each event.
26
- *
27
- * @author Samchon
28
- */
29
- export interface IAgenticaSelectBenchmarkResult {
30
- /**
31
- * Experiments for each scenario.
32
- */
33
- experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
34
-
35
- /**
36
- * Aggregated token usage information.
37
- */
38
- usage: AgenticaTokenUsage;
39
-
40
- /**
41
- * Start time of the benchmark.
42
- */
43
- started_at: Date;
44
-
45
- /**
46
- * End time of the benchmark.
47
- */
48
- completed_at: Date;
49
- }
50
- export namespace IAgenticaSelectBenchmarkResult {
51
- /**
52
- * Experiment result about a scenario.
53
- */
54
- export interface IExperiment {
55
- /**
56
- * Expected scenario.
57
- */
58
- scenario: IAgenticaSelectBenchmarkScenario;
59
-
60
- /**
61
- * Events occurred during the benchmark in the scenario.
62
- *
63
- * When benchmarking a scenario, {@link AgenticaSelectBenchmark} will
64
- * test a scenario multiple times with the given
65
- * {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
66
- * And the event is one of the repeated benchmark results.
67
- */
68
- events: IAgenticaSelectBenchmarkEvent[];
69
-
70
- /**
71
- * LLM token usage information.
72
- */
73
- usage: AgenticaTokenUsage;
74
- }
75
- }
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the IAgenticaSelectBenchmarkResult class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { AgenticaTokenUsage } from "@agentica/core";
8
+
9
+ import type { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
10
+ import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
11
+
12
+ /**
13
+ * Result of the LLM function selection benchmark.
14
+ *
15
+ * `IAgenticaSelectBenchmarkResult` is a structure representing the result
16
+ * of the LLM function selection benchmark executed by the
17
+ * {@link AgenticaSelectBenchmark.execute execute} function.
18
+ *
19
+ * It contains every experiment results for each scenario, and aggregated
20
+ * LLM token cost in the benchmark process.
21
+ *
22
+ * In each scenario, as the benchmark program experiments multiple times
23
+ * about a scenario, it will contain multiple events. Also, because of the
24
+ * characteristics of the LLM which is not predictable, the result can be
25
+ * different in each event.
26
+ *
27
+ * @author Samchon
28
+ */
29
+ export interface IAgenticaSelectBenchmarkResult {
30
+ /**
31
+ * Experiments for each scenario.
32
+ */
33
+ experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
34
+
35
+ /**
36
+ * Aggregated token usage information.
37
+ */
38
+ usage: AgenticaTokenUsage;
39
+
40
+ /**
41
+ * Start time of the benchmark.
42
+ */
43
+ started_at: Date;
44
+
45
+ /**
46
+ * End time of the benchmark.
47
+ */
48
+ completed_at: Date;
49
+ }
50
+ export namespace IAgenticaSelectBenchmarkResult {
51
+ /**
52
+ * Experiment result about a scenario.
53
+ */
54
+ export interface IExperiment {
55
+ /**
56
+ * Expected scenario.
57
+ */
58
+ scenario: IAgenticaSelectBenchmarkScenario;
59
+
60
+ /**
61
+ * Events occurred during the benchmark in the scenario.
62
+ *
63
+ * When benchmarking a scenario, {@link AgenticaSelectBenchmark} will
64
+ * test a scenario multiple times with the given
65
+ * {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
66
+ * And the event is one of the repeated benchmark results.
67
+ */
68
+ events: IAgenticaSelectBenchmarkEvent[];
69
+
70
+ /**
71
+ * LLM token usage information.
72
+ */
73
+ usage: AgenticaTokenUsage;
74
+ }
75
+ }
@@ -1,45 +1,45 @@
1
- /**
2
- * @module
3
- * This file contains the implementation of the IAgenticaSelectBenchmarkScenario class.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
8
-
9
- /**
10
- * Scenario of function selection.
11
- *
12
- * `IAgenticaSelectBenchmarkScenario` is a data structure which
13
- * represents a function selection benchmark scenario. It contains two
14
- * properties; {@linkk text} and {@link operations}.
15
- *
16
- * The {@link text} means the conversation text from the user, and
17
- * the other {@link operations} are the expected operations that
18
- * should be selected by the `selector` agent through the {@link text}
19
- * conversation.
20
- *
21
- * @author Samchon
22
- */
23
- export interface IAgenticaSelectBenchmarkScenario {
24
- /**
25
- * Name of the scenario.
26
- *
27
- * It must be unique within the benchmark scenarios.
28
- */
29
- name: string;
30
-
31
- /**
32
- * The prompt text from user.
33
- */
34
- text: string;
35
-
36
- /**
37
- * Expected function selection sequence.
38
- *
39
- * Sequence of operations (API operation or class function) that
40
- * should be selected by the `selector` agent from the user's
41
- * {@link text} conversation for the LLM (Large Language Model)
42
- * function selection.
43
- */
44
- expected: IAgenticaBenchmarkExpected;
45
- }
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the IAgenticaSelectBenchmarkScenario class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
8
+
9
+ /**
10
+ * Scenario of function selection.
11
+ *
12
+ * `IAgenticaSelectBenchmarkScenario` is a data structure which
13
+ * represents a function selection benchmark scenario. It contains two
14
+ * properties; {@linkk text} and {@link operations}.
15
+ *
16
+ * The {@link text} means the conversation text from the user, and
17
+ * the other {@link operations} are the expected operations that
18
+ * should be selected by the `selector` agent through the {@link text}
19
+ * conversation.
20
+ *
21
+ * @author Samchon
22
+ */
23
+ export interface IAgenticaSelectBenchmarkScenario {
24
+ /**
25
+ * Name of the scenario.
26
+ *
27
+ * It must be unique within the benchmark scenarios.
28
+ */
29
+ name: string;
30
+
31
+ /**
32
+ * The prompt text from user.
33
+ */
34
+ text: string;
35
+
36
+ /**
37
+ * Expected function selection sequence.
38
+ *
39
+ * Sequence of operations (API operation or class function) that
40
+ * should be selected by the `selector` agent from the user's
41
+ * {@link text} conversation for the LLM (Large Language Model)
42
+ * function selection.
43
+ */
44
+ expected: IAgenticaBenchmarkExpected;
45
+ }
@@ -1,16 +1,16 @@
1
- /**
2
- * @module
3
- * This file contains functions to work with MathUtil.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
-
8
- export const MathUtil = {
9
- /**
10
- * Round a number to 2 decimal places.
11
- *
12
- * @param value - The number to round.
13
- * @returns The rounded number.
14
- */
15
- round: (value: number): number => Math.floor(value * 100) / 100,
16
- };
1
+ /**
2
+ * @module
3
+ * This file contains functions to work with MathUtil.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+
8
+ export const MathUtil = {
9
+ /**
10
+ * Round a number to 2 decimal places.
11
+ *
12
+ * @param value - The number to round.
13
+ * @returns The rounded number.
14
+ */
15
+ round: (value: number): number => Math.floor(value * 100) / 100,
16
+ };