@agentica/benchmark 0.8.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -122
- package/lib/AgenticaCallBenchmark.d.ts +7 -6
- package/lib/AgenticaCallBenchmark.js.map +1 -1
- package/lib/AgenticaSelectBenchmark.d.ts +7 -6
- package/lib/AgenticaSelectBenchmark.js.map +1 -1
- package/lib/index.mjs +46 -1
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaBenchmarkPredicator.d.ts +5 -4
- package/lib/internal/AgenticaBenchmarkPredicator.js +74 -2
- package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
- package/lib/internal/AgenticaBenchmarkUtil.d.ts +2 -1
- package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +2 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
- package/lib/internal/AgenticaPromptReporter.d.ts +2 -1
- package/lib/internal/AgenticaPromptReporter.js.map +1 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
- package/lib/structures/IAgenticaBenchmarkExpected.d.ts +10 -9
- package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +8 -7
- package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +6 -5
- package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +3 -2
- package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -8
- package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +6 -5
- package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +3 -2
- package/package.json +5 -5
- package/src/AgenticaCallBenchmark.ts +28 -25
- package/src/AgenticaSelectBenchmark.ts +32 -30
- package/src/internal/AgenticaBenchmarkPredicator.ts +18 -10
- package/src/internal/AgenticaBenchmarkUtil.ts +5 -1
- package/src/internal/AgenticaCallBenchmarkReporter.ts +15 -12
- package/src/internal/AgenticaPromptReporter.ts +4 -1
- package/src/internal/AgenticaSelectBenchmarkReporter.ts +11 -8
- package/src/structures/IAgenticaBenchmarkExpected.ts +23 -13
- package/src/structures/IAgenticaCallBenchmarkEvent.ts +14 -10
- package/src/structures/IAgenticaCallBenchmarkResult.ts +6 -5
- package/src/structures/IAgenticaCallBenchmarkScenario.ts +6 -2
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +15 -11
- package/src/structures/IAgenticaSelectBenchmarkResult.ts +8 -5
- package/src/structures/IAgenticaSelectBenchmarkScenario.ts +6 -2
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
|
+
|
|
1
3
|
import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
2
4
|
|
|
3
5
|
/**
|
|
@@ -14,7 +16,9 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
14
16
|
*
|
|
15
17
|
* @author Samchon
|
|
16
18
|
*/
|
|
17
|
-
export interface IAgenticaCallBenchmarkScenario
|
|
19
|
+
export interface IAgenticaCallBenchmarkScenario<
|
|
20
|
+
Model extends ILlmSchema.Model,
|
|
21
|
+
> {
|
|
18
22
|
/**
|
|
19
23
|
* Name of the scenario.
|
|
20
24
|
*
|
|
@@ -35,5 +39,5 @@ export interface IAgenticaCallBenchmarkScenario {
|
|
|
35
39
|
* the user's {@link text} conversation for the LLM
|
|
36
40
|
* (Large Language Model) function calling.
|
|
37
41
|
*/
|
|
38
|
-
expected: IAgenticaBenchmarkExpected
|
|
42
|
+
expected: IAgenticaBenchmarkExpected<Model>;
|
|
39
43
|
}
|
|
@@ -3,6 +3,7 @@ import {
|
|
|
3
3
|
IAgenticaPrompt,
|
|
4
4
|
IAgenticaTokenUsage,
|
|
5
5
|
} from "@agentica/core";
|
|
6
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
6
7
|
|
|
7
8
|
import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
8
9
|
|
|
@@ -28,10 +29,10 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
|
|
|
28
29
|
*
|
|
29
30
|
* @author Samchon
|
|
30
31
|
*/
|
|
31
|
-
export type IAgenticaSelectBenchmarkEvent =
|
|
32
|
-
| IAgenticaSelectBenchmarkEvent.ISuccess
|
|
33
|
-
| IAgenticaSelectBenchmarkEvent.IFailure
|
|
34
|
-
| IAgenticaSelectBenchmarkEvent.IError
|
|
32
|
+
export type IAgenticaSelectBenchmarkEvent<Model extends ILlmSchema.Model> =
|
|
33
|
+
| IAgenticaSelectBenchmarkEvent.ISuccess<Model>
|
|
34
|
+
| IAgenticaSelectBenchmarkEvent.IFailure<Model>
|
|
35
|
+
| IAgenticaSelectBenchmarkEvent.IError<Model>;
|
|
35
36
|
export namespace IAgenticaSelectBenchmarkEvent {
|
|
36
37
|
/**
|
|
37
38
|
* Success event type.
|
|
@@ -39,7 +40,8 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
39
40
|
* The `success` event type represents that the benchmark testing is
|
|
40
41
|
* fully meet the expected scenario.
|
|
41
42
|
*/
|
|
42
|
-
export interface ISuccess extends
|
|
43
|
+
export interface ISuccess<Model extends ILlmSchema.Model>
|
|
44
|
+
extends IEventBase<"success", Model> {
|
|
43
45
|
/**
|
|
44
46
|
* Usage of the token during the benchmark.
|
|
45
47
|
*/
|
|
@@ -48,7 +50,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
48
50
|
/**
|
|
49
51
|
* Selected operations in the benchmark.
|
|
50
52
|
*/
|
|
51
|
-
selected: IAgenticaOperationSelection[];
|
|
53
|
+
selected: IAgenticaOperationSelection<Model>[];
|
|
52
54
|
|
|
53
55
|
/**
|
|
54
56
|
* Prompt messages from the assistant.
|
|
@@ -62,7 +64,8 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
62
64
|
* The `failure` event type represents that the `selector` had not
|
|
63
65
|
* selected the expected scenario in the benchmark testing.
|
|
64
66
|
*/
|
|
65
|
-
export interface IFailure extends
|
|
67
|
+
export interface IFailure<Model extends ILlmSchema.Model>
|
|
68
|
+
extends IEventBase<"failure", Model> {
|
|
66
69
|
/**
|
|
67
70
|
* Usage of the token during the benchmark.
|
|
68
71
|
*/
|
|
@@ -71,7 +74,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
71
74
|
/**
|
|
72
75
|
* Selected operations in the benchmark.
|
|
73
76
|
*/
|
|
74
|
-
selected: IAgenticaOperationSelection[];
|
|
77
|
+
selected: IAgenticaOperationSelection<Model>[];
|
|
75
78
|
|
|
76
79
|
/**
|
|
77
80
|
* Prompt messages from the assistant.
|
|
@@ -79,14 +82,15 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
79
82
|
assistantPrompts: IAgenticaPrompt.IText<"assistant">[];
|
|
80
83
|
}
|
|
81
84
|
|
|
82
|
-
export interface IError extends
|
|
85
|
+
export interface IError<Model extends ILlmSchema.Model>
|
|
86
|
+
extends IEventBase<"error", Model> {
|
|
83
87
|
/**
|
|
84
88
|
* Error occurred during the benchmark.
|
|
85
89
|
*/
|
|
86
90
|
error: unknown;
|
|
87
91
|
}
|
|
88
92
|
|
|
89
|
-
interface IEventBase<Type extends string> {
|
|
93
|
+
interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
|
|
90
94
|
/**
|
|
91
95
|
* Discriminant type.
|
|
92
96
|
*/
|
|
@@ -95,7 +99,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
95
99
|
/**
|
|
96
100
|
* Expected scenario.
|
|
97
101
|
*/
|
|
98
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
102
|
+
scenario: IAgenticaSelectBenchmarkScenario<Model>;
|
|
99
103
|
|
|
100
104
|
/**
|
|
101
105
|
* When the benchmark testing started.
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { IAgenticaTokenUsage } from "@agentica/core";
|
|
2
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
3
|
|
|
3
4
|
import { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
4
5
|
import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
@@ -20,11 +21,13 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
|
|
|
20
21
|
*
|
|
21
22
|
* @author Samchon
|
|
22
23
|
*/
|
|
23
|
-
export interface IAgenticaSelectBenchmarkResult
|
|
24
|
+
export interface IAgenticaSelectBenchmarkResult<
|
|
25
|
+
Model extends ILlmSchema.Model,
|
|
26
|
+
> {
|
|
24
27
|
/**
|
|
25
28
|
* Experiments for each scenario.
|
|
26
29
|
*/
|
|
27
|
-
experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
|
|
30
|
+
experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[];
|
|
28
31
|
|
|
29
32
|
/**
|
|
30
33
|
* Aggregated token usage information.
|
|
@@ -45,11 +48,11 @@ export namespace IAgenticaSelectBenchmarkResult {
|
|
|
45
48
|
/**
|
|
46
49
|
* Experiment result about a scenario.
|
|
47
50
|
*/
|
|
48
|
-
export interface IExperiment {
|
|
51
|
+
export interface IExperiment<Model extends ILlmSchema.Model> {
|
|
49
52
|
/**
|
|
50
53
|
* Expected scenario.
|
|
51
54
|
*/
|
|
52
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
55
|
+
scenario: IAgenticaSelectBenchmarkScenario<Model>;
|
|
53
56
|
|
|
54
57
|
/**
|
|
55
58
|
* Events occurred during the benchmark in the scenario.
|
|
@@ -59,7 +62,7 @@ export namespace IAgenticaSelectBenchmarkResult {
|
|
|
59
62
|
* {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
|
|
60
63
|
* And the event is one of the repeated benchmark results.
|
|
61
64
|
*/
|
|
62
|
-
events: IAgenticaSelectBenchmarkEvent[];
|
|
65
|
+
events: IAgenticaSelectBenchmarkEvent<Model>[];
|
|
63
66
|
|
|
64
67
|
/**
|
|
65
68
|
* LLM token usage information.
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { ILlmSchema } from "@samchon/openapi";
|
|
2
|
+
|
|
1
3
|
import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
2
4
|
|
|
3
5
|
/**
|
|
@@ -14,7 +16,9 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
14
16
|
*
|
|
15
17
|
* @author Samchon
|
|
16
18
|
*/
|
|
17
|
-
export interface IAgenticaSelectBenchmarkScenario
|
|
19
|
+
export interface IAgenticaSelectBenchmarkScenario<
|
|
20
|
+
Model extends ILlmSchema.Model,
|
|
21
|
+
> {
|
|
18
22
|
/**
|
|
19
23
|
* Name of the scenario.
|
|
20
24
|
*
|
|
@@ -35,5 +39,5 @@ export interface IAgenticaSelectBenchmarkScenario {
|
|
|
35
39
|
* {@link text} conversation for the LLM (Large Language Model)
|
|
36
40
|
* function selection.
|
|
37
41
|
*/
|
|
38
|
-
expected: IAgenticaBenchmarkExpected
|
|
42
|
+
expected: IAgenticaBenchmarkExpected<Model>;
|
|
39
43
|
}
|