@agentica/benchmark 0.8.3 → 0.9.0-dev.20250302
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +326 -324
- package/lib/AgenticaCallBenchmark.d.ts +7 -6
- package/lib/AgenticaCallBenchmark.js.map +1 -1
- package/lib/AgenticaSelectBenchmark.d.ts +7 -6
- package/lib/AgenticaSelectBenchmark.js.map +1 -1
- package/lib/index.mjs +46 -1
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaBenchmarkPredicator.d.ts +5 -4
- package/lib/internal/AgenticaBenchmarkPredicator.js +74 -2
- package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
- package/lib/internal/AgenticaBenchmarkUtil.d.ts +2 -1
- package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +2 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
- package/lib/internal/AgenticaPromptReporter.d.ts +2 -1
- package/lib/internal/AgenticaPromptReporter.js.map +1 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
- package/lib/structures/IAgenticaBenchmarkExpected.d.ts +10 -9
- package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +8 -7
- package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +6 -5
- package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +3 -2
- package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -8
- package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +6 -5
- package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +3 -2
- package/package.json +5 -5
- package/src/AgenticaCallBenchmark.ts +268 -265
- package/src/AgenticaSelectBenchmark.ts +256 -254
- package/src/index.ts +3 -3
- package/src/internal/AgenticaBenchmarkPredicator.ts +224 -216
- package/src/internal/AgenticaBenchmarkUtil.ts +44 -40
- package/src/internal/AgenticaCallBenchmarkReporter.ts +183 -180
- package/src/internal/AgenticaPromptReporter.ts +46 -43
- package/src/internal/AgenticaSelectBenchmarkReporter.ts +213 -210
- package/src/structures/IAgenticaBenchmarkExpected.ts +68 -58
- package/src/structures/IAgenticaCallBenchmarkEvent.ts +113 -109
- package/src/structures/IAgenticaCallBenchmarkResult.ts +70 -69
- package/src/structures/IAgenticaCallBenchmarkScenario.ts +43 -39
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +114 -110
- package/src/structures/IAgenticaSelectBenchmarkResult.ts +72 -69
- package/src/structures/IAgenticaSelectBenchmarkScenario.ts +43 -39
- package/src/utils/MathUtil.ts +3 -3
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { IAgenticaPrompt, IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
3
|
import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
3
4
|
/**
|
|
4
5
|
* Event of LLM function selection benchmark.
|
|
@@ -22,7 +23,7 @@ import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario
|
|
|
22
23
|
*
|
|
23
24
|
* @author Samchon
|
|
24
25
|
*/
|
|
25
|
-
export type IAgenticaCallBenchmarkEvent = IAgenticaCallBenchmarkEvent.ISuccess | IAgenticaCallBenchmarkEvent.IFailure | IAgenticaCallBenchmarkEvent.IError
|
|
26
|
+
export type IAgenticaCallBenchmarkEvent<Model extends ILlmSchema.Model> = IAgenticaCallBenchmarkEvent.ISuccess<Model> | IAgenticaCallBenchmarkEvent.IFailure<Model> | IAgenticaCallBenchmarkEvent.IError<Model>;
|
|
26
27
|
export declare namespace IAgenticaCallBenchmarkEvent {
|
|
27
28
|
/**
|
|
28
29
|
* Success event type.
|
|
@@ -30,7 +31,7 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
30
31
|
* The `success` event type represents that the benchmark
|
|
31
32
|
* testing is fully meet the expected scenario.
|
|
32
33
|
*/
|
|
33
|
-
export interface ISuccess extends IEventBase<"success"> {
|
|
34
|
+
export interface ISuccess<Model extends ILlmSchema.Model> extends IEventBase<"success", Model> {
|
|
34
35
|
/**
|
|
35
36
|
* Whether succeeded to function selection.
|
|
36
37
|
*/
|
|
@@ -47,7 +48,7 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
47
48
|
* or `caller` agents have not selected or called following the
|
|
48
49
|
* expected scenario in the benchmark testing.
|
|
49
50
|
*/
|
|
50
|
-
export interface IFailure extends IEventBase<"failure"> {
|
|
51
|
+
export interface IFailure<Model extends ILlmSchema.Model> extends IEventBase<"failure", Model> {
|
|
51
52
|
/**
|
|
52
53
|
* Whether succeeded to function selection.
|
|
53
54
|
*/
|
|
@@ -57,13 +58,13 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
57
58
|
*/
|
|
58
59
|
call: boolean;
|
|
59
60
|
}
|
|
60
|
-
export interface IError extends IEventBase<"error"> {
|
|
61
|
+
export interface IError<Model extends ILlmSchema.Model> extends IEventBase<"error", Model> {
|
|
61
62
|
/**
|
|
62
63
|
* Error occurred during the benchmark.
|
|
63
64
|
*/
|
|
64
65
|
error: unknown;
|
|
65
66
|
}
|
|
66
|
-
interface IEventBase<Type extends string> {
|
|
67
|
+
interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
|
|
67
68
|
/**
|
|
68
69
|
* Discriminant type.
|
|
69
70
|
*/
|
|
@@ -71,13 +72,13 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
71
72
|
/**
|
|
72
73
|
* Expected scenario.
|
|
73
74
|
*/
|
|
74
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
75
|
+
scenario: IAgenticaCallBenchmarkScenario<Model>;
|
|
75
76
|
/**
|
|
76
77
|
* Prompt histories.
|
|
77
78
|
*
|
|
78
79
|
* List of prompts occurred during the benchmark testing.
|
|
79
80
|
*/
|
|
80
|
-
prompts: IAgenticaPrompt[];
|
|
81
|
+
prompts: IAgenticaPrompt<Model>[];
|
|
81
82
|
/**
|
|
82
83
|
* Usage of the token during the benchmark.
|
|
83
84
|
*/
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
3
|
import { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
|
|
3
4
|
import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
4
5
|
/**
|
|
@@ -18,11 +19,11 @@ import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario
|
|
|
18
19
|
*
|
|
19
20
|
* @author Samchon
|
|
20
21
|
*/
|
|
21
|
-
export interface IAgenticaCallBenchmarkResult {
|
|
22
|
+
export interface IAgenticaCallBenchmarkResult<Model extends ILlmSchema.Model> {
|
|
22
23
|
/**
|
|
23
24
|
* Experiments for each scenario.
|
|
24
25
|
*/
|
|
25
|
-
experiments: IAgenticaCallBenchmarkResult.IExperiment[];
|
|
26
|
+
experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[];
|
|
26
27
|
/**
|
|
27
28
|
* Aggregated token usage information.
|
|
28
29
|
*/
|
|
@@ -40,11 +41,11 @@ export declare namespace IAgenticaCallBenchmarkResult {
|
|
|
40
41
|
/**
|
|
41
42
|
* Experiment result about a scenario.
|
|
42
43
|
*/
|
|
43
|
-
interface IExperiment {
|
|
44
|
+
interface IExperiment<Model extends ILlmSchema.Model> {
|
|
44
45
|
/**
|
|
45
46
|
* Scenario of the experiment.
|
|
46
47
|
*/
|
|
47
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
48
|
+
scenario: IAgenticaCallBenchmarkScenario<Model>;
|
|
48
49
|
/**
|
|
49
50
|
* Events occurred during the benchmark in the scenario.
|
|
50
51
|
*
|
|
@@ -53,7 +54,7 @@ export declare namespace IAgenticaCallBenchmarkResult {
|
|
|
53
54
|
* {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
|
|
54
55
|
* And the event is one of the repeated benchmark results.
|
|
55
56
|
*/
|
|
56
|
-
events: IAgenticaCallBenchmarkEvent[];
|
|
57
|
+
events: IAgenticaCallBenchmarkEvent<Model>[];
|
|
57
58
|
/**
|
|
58
59
|
* LLM token usage information.
|
|
59
60
|
*/
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
1
2
|
import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
2
3
|
/**
|
|
3
4
|
* Scenario of function calling.
|
|
@@ -13,7 +14,7 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
13
14
|
*
|
|
14
15
|
* @author Samchon
|
|
15
16
|
*/
|
|
16
|
-
export interface IAgenticaCallBenchmarkScenario {
|
|
17
|
+
export interface IAgenticaCallBenchmarkScenario<Model extends ILlmSchema.Model> {
|
|
17
18
|
/**
|
|
18
19
|
* Name of the scenario.
|
|
19
20
|
*
|
|
@@ -32,5 +33,5 @@ export interface IAgenticaCallBenchmarkScenario {
|
|
|
32
33
|
* the user's {@link text} conversation for the LLM
|
|
33
34
|
* (Large Language Model) function calling.
|
|
34
35
|
*/
|
|
35
|
-
expected: IAgenticaBenchmarkExpected
|
|
36
|
+
expected: IAgenticaBenchmarkExpected<Model>;
|
|
36
37
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { IAgenticaOperationSelection, IAgenticaPrompt, IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
3
|
import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
3
4
|
/**
|
|
4
5
|
* Event of LLM function selection benchmark.
|
|
@@ -22,7 +23,7 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
|
|
|
22
23
|
*
|
|
23
24
|
* @author Samchon
|
|
24
25
|
*/
|
|
25
|
-
export type IAgenticaSelectBenchmarkEvent = IAgenticaSelectBenchmarkEvent.ISuccess | IAgenticaSelectBenchmarkEvent.IFailure | IAgenticaSelectBenchmarkEvent.IError
|
|
26
|
+
export type IAgenticaSelectBenchmarkEvent<Model extends ILlmSchema.Model> = IAgenticaSelectBenchmarkEvent.ISuccess<Model> | IAgenticaSelectBenchmarkEvent.IFailure<Model> | IAgenticaSelectBenchmarkEvent.IError<Model>;
|
|
26
27
|
export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
27
28
|
/**
|
|
28
29
|
* Success event type.
|
|
@@ -30,7 +31,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
30
31
|
* The `success` event type represents that the benchmark testing is
|
|
31
32
|
* fully meet the expected scenario.
|
|
32
33
|
*/
|
|
33
|
-
export interface ISuccess extends IEventBase<"success"> {
|
|
34
|
+
export interface ISuccess<Model extends ILlmSchema.Model> extends IEventBase<"success", Model> {
|
|
34
35
|
/**
|
|
35
36
|
* Usage of the token during the benchmark.
|
|
36
37
|
*/
|
|
@@ -38,7 +39,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
38
39
|
/**
|
|
39
40
|
* Selected operations in the benchmark.
|
|
40
41
|
*/
|
|
41
|
-
selected: IAgenticaOperationSelection[];
|
|
42
|
+
selected: IAgenticaOperationSelection<Model>[];
|
|
42
43
|
/**
|
|
43
44
|
* Prompt messages from the assistant.
|
|
44
45
|
*/
|
|
@@ -50,7 +51,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
50
51
|
* The `failure` event type represents that the `selector` had not
|
|
51
52
|
* selected the expected scenario in the benchmark testing.
|
|
52
53
|
*/
|
|
53
|
-
export interface IFailure extends IEventBase<"failure"> {
|
|
54
|
+
export interface IFailure<Model extends ILlmSchema.Model> extends IEventBase<"failure", Model> {
|
|
54
55
|
/**
|
|
55
56
|
* Usage of the token during the benchmark.
|
|
56
57
|
*/
|
|
@@ -58,19 +59,19 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
58
59
|
/**
|
|
59
60
|
* Selected operations in the benchmark.
|
|
60
61
|
*/
|
|
61
|
-
selected: IAgenticaOperationSelection[];
|
|
62
|
+
selected: IAgenticaOperationSelection<Model>[];
|
|
62
63
|
/**
|
|
63
64
|
* Prompt messages from the assistant.
|
|
64
65
|
*/
|
|
65
66
|
assistantPrompts: IAgenticaPrompt.IText<"assistant">[];
|
|
66
67
|
}
|
|
67
|
-
export interface IError extends IEventBase<"error"> {
|
|
68
|
+
export interface IError<Model extends ILlmSchema.Model> extends IEventBase<"error", Model> {
|
|
68
69
|
/**
|
|
69
70
|
* Error occurred during the benchmark.
|
|
70
71
|
*/
|
|
71
72
|
error: unknown;
|
|
72
73
|
}
|
|
73
|
-
interface IEventBase<Type extends string> {
|
|
74
|
+
interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
|
|
74
75
|
/**
|
|
75
76
|
* Discriminant type.
|
|
76
77
|
*/
|
|
@@ -78,7 +79,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
78
79
|
/**
|
|
79
80
|
* Expected scenario.
|
|
80
81
|
*/
|
|
81
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
82
|
+
scenario: IAgenticaSelectBenchmarkScenario<Model>;
|
|
82
83
|
/**
|
|
83
84
|
* When the benchmark testing started.
|
|
84
85
|
*/
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
3
|
import { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
3
4
|
import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
4
5
|
/**
|
|
@@ -18,11 +19,11 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
|
|
|
18
19
|
*
|
|
19
20
|
* @author Samchon
|
|
20
21
|
*/
|
|
21
|
-
export interface IAgenticaSelectBenchmarkResult {
|
|
22
|
+
export interface IAgenticaSelectBenchmarkResult<Model extends ILlmSchema.Model> {
|
|
22
23
|
/**
|
|
23
24
|
* Experiments for each scenario.
|
|
24
25
|
*/
|
|
25
|
-
experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
|
|
26
|
+
experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[];
|
|
26
27
|
/**
|
|
27
28
|
* Aggregated token usage information.
|
|
28
29
|
*/
|
|
@@ -40,11 +41,11 @@ export declare namespace IAgenticaSelectBenchmarkResult {
|
|
|
40
41
|
/**
|
|
41
42
|
* Experiment result about a scenario.
|
|
42
43
|
*/
|
|
43
|
-
interface IExperiment {
|
|
44
|
+
interface IExperiment<Model extends ILlmSchema.Model> {
|
|
44
45
|
/**
|
|
45
46
|
* Expected scenario.
|
|
46
47
|
*/
|
|
47
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
48
|
+
scenario: IAgenticaSelectBenchmarkScenario<Model>;
|
|
48
49
|
/**
|
|
49
50
|
* Events occurred during the benchmark in the scenario.
|
|
50
51
|
*
|
|
@@ -53,7 +54,7 @@ export declare namespace IAgenticaSelectBenchmarkResult {
|
|
|
53
54
|
* {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
|
|
54
55
|
* And the event is one of the repeated benchmark results.
|
|
55
56
|
*/
|
|
56
|
-
events: IAgenticaSelectBenchmarkEvent[];
|
|
57
|
+
events: IAgenticaSelectBenchmarkEvent<Model>[];
|
|
57
58
|
/**
|
|
58
59
|
* LLM token usage information.
|
|
59
60
|
*/
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
1
2
|
import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
2
3
|
/**
|
|
3
4
|
* Scenario of function selection.
|
|
@@ -13,7 +14,7 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
13
14
|
*
|
|
14
15
|
* @author Samchon
|
|
15
16
|
*/
|
|
16
|
-
export interface IAgenticaSelectBenchmarkScenario {
|
|
17
|
+
export interface IAgenticaSelectBenchmarkScenario<Model extends ILlmSchema.Model> {
|
|
17
18
|
/**
|
|
18
19
|
* Name of the scenario.
|
|
19
20
|
*
|
|
@@ -32,5 +33,5 @@ export interface IAgenticaSelectBenchmarkScenario {
|
|
|
32
33
|
* {@link text} conversation for the LLM (Large Language Model)
|
|
33
34
|
* function selection.
|
|
34
35
|
*/
|
|
35
|
-
expected: IAgenticaBenchmarkExpected
|
|
36
|
+
expected: IAgenticaBenchmarkExpected<Model>;
|
|
36
37
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentica/benchmark",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0-dev.20250302",
|
|
4
4
|
"main": "lib/index.js",
|
|
5
5
|
"description": "Agentic AI Library specialized in LLM Function Calling",
|
|
6
6
|
"scripts": {
|
|
@@ -37,11 +37,11 @@
|
|
|
37
37
|
"src"
|
|
38
38
|
],
|
|
39
39
|
"dependencies": {
|
|
40
|
-
"@agentica/core": "^0.
|
|
41
|
-
"@samchon/openapi": "^
|
|
40
|
+
"@agentica/core": "^0.9.0-dev.20250302",
|
|
41
|
+
"@samchon/openapi": "^3.0.0",
|
|
42
42
|
"openai": "^4.80.0",
|
|
43
43
|
"tstl": "^3.0.0",
|
|
44
|
-
"typia": "^
|
|
44
|
+
"typia": "^8.0.0"
|
|
45
45
|
},
|
|
46
46
|
"devDependencies": {
|
|
47
47
|
"@rollup/plugin-terser": "^0.4.4",
|
|
@@ -51,7 +51,7 @@
|
|
|
51
51
|
"rollup": "^4.34.8",
|
|
52
52
|
"ts-patch": "^3.3.0",
|
|
53
53
|
"typedoc": "^0.27.7",
|
|
54
|
-
"typescript": "~5.
|
|
54
|
+
"typescript": "~5.8.2"
|
|
55
55
|
},
|
|
56
56
|
"module": "lib/index.mjs",
|
|
57
57
|
"typings": "lib/index.d.ts"
|