@agentica/benchmark 0.8.1 → 0.8.3-dev.20250227
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +324 -324
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
- package/package.json +2 -2
- package/src/AgenticaCallBenchmark.ts +265 -265
- package/src/AgenticaSelectBenchmark.ts +254 -254
- package/src/index.ts +3 -3
- package/src/internal/AgenticaBenchmarkPredicator.ts +216 -216
- package/src/internal/AgenticaBenchmarkUtil.ts +40 -40
- package/src/internal/AgenticaCallBenchmarkReporter.ts +180 -181
- package/src/internal/AgenticaPromptReporter.ts +43 -43
- package/src/internal/AgenticaSelectBenchmarkReporter.ts +210 -212
- package/src/structures/IAgenticaBenchmarkExpected.ts +58 -58
- package/src/structures/IAgenticaCallBenchmarkEvent.ts +109 -109
- package/src/structures/IAgenticaCallBenchmarkResult.ts +69 -69
- package/src/structures/IAgenticaCallBenchmarkScenario.ts +39 -39
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +110 -110
- package/src/structures/IAgenticaSelectBenchmarkResult.ts +69 -69
- package/src/structures/IAgenticaSelectBenchmarkScenario.ts +39 -39
- package/src/utils/MathUtil.ts +3 -3
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
import { IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
-
|
|
3
|
-
import { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
4
|
-
import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Result of the LLM function selection benchmark.
|
|
8
|
-
*
|
|
9
|
-
* `IAgenticaSelectBenchmarkResult` is a structure representing the result
|
|
10
|
-
* of the LLM function selection benchmark executed by the
|
|
11
|
-
* {@link AgenticaSelectBenchmark.execute execute} function.
|
|
12
|
-
*
|
|
13
|
-
* It contains every experiment results for each scenario, and aggregated
|
|
14
|
-
* LLM token cost in the benchmark process.
|
|
15
|
-
*
|
|
16
|
-
* In each scenario, as the benchmark program experiments multiple times
|
|
17
|
-
* about a scenario, it will contain multiple events. Also, because of the
|
|
18
|
-
* characteristics of the LLM which is not predictable, the result can be
|
|
19
|
-
* different in each event.
|
|
20
|
-
*
|
|
21
|
-
* @author Samchon
|
|
22
|
-
*/
|
|
23
|
-
export interface IAgenticaSelectBenchmarkResult {
|
|
24
|
-
/**
|
|
25
|
-
* Experiments for each scenario.
|
|
26
|
-
*/
|
|
27
|
-
experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Aggregated token usage information.
|
|
31
|
-
*/
|
|
32
|
-
usage: IAgenticaTokenUsage;
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Start time of the benchmark.
|
|
36
|
-
*/
|
|
37
|
-
started_at: Date;
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* End time of the benchmark.
|
|
41
|
-
*/
|
|
42
|
-
completed_at: Date;
|
|
43
|
-
}
|
|
44
|
-
export namespace IAgenticaSelectBenchmarkResult {
|
|
45
|
-
/**
|
|
46
|
-
* Experiment result about a scenario.
|
|
47
|
-
*/
|
|
48
|
-
export interface IExperiment {
|
|
49
|
-
/**
|
|
50
|
-
* Expected scenario.
|
|
51
|
-
*/
|
|
52
|
-
scenario: IAgenticaSelectBenchmarkScenario;
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Events occurred during the benchmark in the scenario.
|
|
56
|
-
*
|
|
57
|
-
* When benchmarking a scenario, {@link AgenticaSelectBenchmark} will
|
|
58
|
-
* test a scenario multiple times with the given
|
|
59
|
-
* {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
|
|
60
|
-
* And the event is one of the repeated benchmark results.
|
|
61
|
-
*/
|
|
62
|
-
events: IAgenticaSelectBenchmarkEvent[];
|
|
63
|
-
|
|
64
|
-
/**
|
|
65
|
-
* LLM token usage information.
|
|
66
|
-
*/
|
|
67
|
-
usage: IAgenticaTokenUsage;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
1
|
+
import { IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
+
|
|
3
|
+
import { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
4
|
+
import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Result of the LLM function selection benchmark.
|
|
8
|
+
*
|
|
9
|
+
* `IAgenticaSelectBenchmarkResult` is a structure representing the result
|
|
10
|
+
* of the LLM function selection benchmark executed by the
|
|
11
|
+
* {@link AgenticaSelectBenchmark.execute execute} function.
|
|
12
|
+
*
|
|
13
|
+
* It contains every experiment results for each scenario, and aggregated
|
|
14
|
+
* LLM token cost in the benchmark process.
|
|
15
|
+
*
|
|
16
|
+
* In each scenario, as the benchmark program experiments multiple times
|
|
17
|
+
* about a scenario, it will contain multiple events. Also, because of the
|
|
18
|
+
* characteristics of the LLM which is not predictable, the result can be
|
|
19
|
+
* different in each event.
|
|
20
|
+
*
|
|
21
|
+
* @author Samchon
|
|
22
|
+
*/
|
|
23
|
+
export interface IAgenticaSelectBenchmarkResult {
|
|
24
|
+
/**
|
|
25
|
+
* Experiments for each scenario.
|
|
26
|
+
*/
|
|
27
|
+
experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Aggregated token usage information.
|
|
31
|
+
*/
|
|
32
|
+
usage: IAgenticaTokenUsage;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Start time of the benchmark.
|
|
36
|
+
*/
|
|
37
|
+
started_at: Date;
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* End time of the benchmark.
|
|
41
|
+
*/
|
|
42
|
+
completed_at: Date;
|
|
43
|
+
}
|
|
44
|
+
export namespace IAgenticaSelectBenchmarkResult {
|
|
45
|
+
/**
|
|
46
|
+
* Experiment result about a scenario.
|
|
47
|
+
*/
|
|
48
|
+
export interface IExperiment {
|
|
49
|
+
/**
|
|
50
|
+
* Expected scenario.
|
|
51
|
+
*/
|
|
52
|
+
scenario: IAgenticaSelectBenchmarkScenario;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Events occurred during the benchmark in the scenario.
|
|
56
|
+
*
|
|
57
|
+
* When benchmarking a scenario, {@link AgenticaSelectBenchmark} will
|
|
58
|
+
* test a scenario multiple times with the given
|
|
59
|
+
* {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
|
|
60
|
+
* And the event is one of the repeated benchmark results.
|
|
61
|
+
*/
|
|
62
|
+
events: IAgenticaSelectBenchmarkEvent[];
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* LLM token usage information.
|
|
66
|
+
*/
|
|
67
|
+
usage: IAgenticaTokenUsage;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Scenario of function selection.
|
|
5
|
-
*
|
|
6
|
-
* `IAgenticaSelectBenchmarkScenario` is a data structure which
|
|
7
|
-
* represents a function selection benchmark scenario. It contains two
|
|
8
|
-
* properties; {@linkk text} and {@link operations}.
|
|
9
|
-
*
|
|
10
|
-
* The {@link text} means the conversation text from the user, and
|
|
11
|
-
* the other {@link operations} are the expected operations that
|
|
12
|
-
* should be selected by the `selector` agent through the {@link text}
|
|
13
|
-
* conversation.
|
|
14
|
-
*
|
|
15
|
-
* @author Samchon
|
|
16
|
-
*/
|
|
17
|
-
export interface IAgenticaSelectBenchmarkScenario {
|
|
18
|
-
/**
|
|
19
|
-
* Name of the scenario.
|
|
20
|
-
*
|
|
21
|
-
* It must be unique within the benchmark scenarios.
|
|
22
|
-
*/
|
|
23
|
-
name: string;
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* The prompt text from user.
|
|
27
|
-
*/
|
|
28
|
-
text: string;
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Expected function selection sequence.
|
|
32
|
-
*
|
|
33
|
-
* Sequence of operations (API operation or class function) that
|
|
34
|
-
* should be selected by the `selector` agent from the user's
|
|
35
|
-
* {@link text} conversation for the LLM (Large Language Model)
|
|
36
|
-
* function selection.
|
|
37
|
-
*/
|
|
38
|
-
expected: IAgenticaBenchmarkExpected;
|
|
39
|
-
}
|
|
1
|
+
import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Scenario of function selection.
|
|
5
|
+
*
|
|
6
|
+
* `IAgenticaSelectBenchmarkScenario` is a data structure which
|
|
7
|
+
* represents a function selection benchmark scenario. It contains two
|
|
8
|
+
* properties; {@linkk text} and {@link operations}.
|
|
9
|
+
*
|
|
10
|
+
* The {@link text} means the conversation text from the user, and
|
|
11
|
+
* the other {@link operations} are the expected operations that
|
|
12
|
+
* should be selected by the `selector` agent through the {@link text}
|
|
13
|
+
* conversation.
|
|
14
|
+
*
|
|
15
|
+
* @author Samchon
|
|
16
|
+
*/
|
|
17
|
+
export interface IAgenticaSelectBenchmarkScenario {
|
|
18
|
+
/**
|
|
19
|
+
* Name of the scenario.
|
|
20
|
+
*
|
|
21
|
+
* It must be unique within the benchmark scenarios.
|
|
22
|
+
*/
|
|
23
|
+
name: string;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* The prompt text from user.
|
|
27
|
+
*/
|
|
28
|
+
text: string;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Expected function selection sequence.
|
|
32
|
+
*
|
|
33
|
+
* Sequence of operations (API operation or class function) that
|
|
34
|
+
* should be selected by the `selector` agent from the user's
|
|
35
|
+
* {@link text} conversation for the LLM (Large Language Model)
|
|
36
|
+
* function selection.
|
|
37
|
+
*/
|
|
38
|
+
expected: IAgenticaBenchmarkExpected;
|
|
39
|
+
}
|
package/src/utils/MathUtil.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export namespace MathUtil {
|
|
2
|
-
export const round = (value: number): number => Math.floor(value * 100) / 100;
|
|
3
|
-
}
|
|
1
|
+
export namespace MathUtil {
|
|
2
|
+
export const round = (value: number): number => Math.floor(value * 100) / 100;
|
|
3
|
+
}
|