@agentica/benchmark 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +124 -122
  2. package/lib/AgenticaCallBenchmark.d.ts +7 -6
  3. package/lib/AgenticaCallBenchmark.js.map +1 -1
  4. package/lib/AgenticaSelectBenchmark.d.ts +7 -6
  5. package/lib/AgenticaSelectBenchmark.js.map +1 -1
  6. package/lib/index.mjs +46 -1
  7. package/lib/index.mjs.map +1 -1
  8. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +5 -4
  9. package/lib/internal/AgenticaBenchmarkPredicator.js +74 -2
  10. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
  11. package/lib/internal/AgenticaBenchmarkUtil.d.ts +2 -1
  12. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
  13. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +2 -1
  14. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
  15. package/lib/internal/AgenticaPromptReporter.d.ts +2 -1
  16. package/lib/internal/AgenticaPromptReporter.js.map +1 -1
  17. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
  18. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +10 -9
  19. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +8 -7
  20. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +6 -5
  21. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +3 -2
  22. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -8
  23. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +6 -5
  24. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +3 -2
  25. package/package.json +5 -5
  26. package/src/AgenticaCallBenchmark.ts +28 -25
  27. package/src/AgenticaSelectBenchmark.ts +32 -30
  28. package/src/internal/AgenticaBenchmarkPredicator.ts +18 -10
  29. package/src/internal/AgenticaBenchmarkUtil.ts +5 -1
  30. package/src/internal/AgenticaCallBenchmarkReporter.ts +15 -12
  31. package/src/internal/AgenticaPromptReporter.ts +4 -1
  32. package/src/internal/AgenticaSelectBenchmarkReporter.ts +11 -8
  33. package/src/structures/IAgenticaBenchmarkExpected.ts +23 -13
  34. package/src/structures/IAgenticaCallBenchmarkEvent.ts +14 -10
  35. package/src/structures/IAgenticaCallBenchmarkResult.ts +6 -5
  36. package/src/structures/IAgenticaCallBenchmarkScenario.ts +6 -2
  37. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +15 -11
  38. package/src/structures/IAgenticaSelectBenchmarkResult.ts +8 -5
  39. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +6 -2
@@ -1,3 +1,5 @@
1
+ import { ILlmSchema } from "@samchon/openapi";
2
+
1
3
  import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
2
4
 
3
5
  /**
@@ -14,7 +16,9 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
14
16
  *
15
17
  * @author Samchon
16
18
  */
17
- export interface IAgenticaCallBenchmarkScenario {
19
+ export interface IAgenticaCallBenchmarkScenario<
20
+ Model extends ILlmSchema.Model,
21
+ > {
18
22
  /**
19
23
  * Name of the scenario.
20
24
  *
@@ -35,5 +39,5 @@ export interface IAgenticaCallBenchmarkScenario {
35
39
  * the user's {@link text} conversation for the LLM
36
40
  * (Large Language Model) function calling.
37
41
  */
38
- expected: IAgenticaBenchmarkExpected;
42
+ expected: IAgenticaBenchmarkExpected<Model>;
39
43
  }
@@ -3,6 +3,7 @@ import {
3
3
  IAgenticaPrompt,
4
4
  IAgenticaTokenUsage,
5
5
  } from "@agentica/core";
6
+ import { ILlmSchema } from "@samchon/openapi";
6
7
 
7
8
  import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
8
9
 
@@ -28,10 +29,10 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
28
29
  *
29
30
  * @author Samchon
30
31
  */
31
- export type IAgenticaSelectBenchmarkEvent =
32
- | IAgenticaSelectBenchmarkEvent.ISuccess
33
- | IAgenticaSelectBenchmarkEvent.IFailure
34
- | IAgenticaSelectBenchmarkEvent.IError;
32
+ export type IAgenticaSelectBenchmarkEvent<Model extends ILlmSchema.Model> =
33
+ | IAgenticaSelectBenchmarkEvent.ISuccess<Model>
34
+ | IAgenticaSelectBenchmarkEvent.IFailure<Model>
35
+ | IAgenticaSelectBenchmarkEvent.IError<Model>;
35
36
  export namespace IAgenticaSelectBenchmarkEvent {
36
37
  /**
37
38
  * Success event type.
@@ -39,7 +40,8 @@ export namespace IAgenticaSelectBenchmarkEvent {
39
40
  * The `success` event type represents that the benchmark testing is
40
41
  * fully meet the expected scenario.
41
42
  */
42
- export interface ISuccess extends IEventBase<"success"> {
43
+ export interface ISuccess<Model extends ILlmSchema.Model>
44
+ extends IEventBase<"success", Model> {
43
45
  /**
44
46
  * Usage of the token during the benchmark.
45
47
  */
@@ -48,7 +50,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
48
50
  /**
49
51
  * Selected operations in the benchmark.
50
52
  */
51
- selected: IAgenticaOperationSelection[];
53
+ selected: IAgenticaOperationSelection<Model>[];
52
54
 
53
55
  /**
54
56
  * Prompt messages from the assistant.
@@ -62,7 +64,8 @@ export namespace IAgenticaSelectBenchmarkEvent {
62
64
  * The `failure` event type represents that the `selector` had not
63
65
  * selected the expected scenario in the benchmark testing.
64
66
  */
65
- export interface IFailure extends IEventBase<"failure"> {
67
+ export interface IFailure<Model extends ILlmSchema.Model>
68
+ extends IEventBase<"failure", Model> {
66
69
  /**
67
70
  * Usage of the token during the benchmark.
68
71
  */
@@ -71,7 +74,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
71
74
  /**
72
75
  * Selected operations in the benchmark.
73
76
  */
74
- selected: IAgenticaOperationSelection[];
77
+ selected: IAgenticaOperationSelection<Model>[];
75
78
 
76
79
  /**
77
80
  * Prompt messages from the assistant.
@@ -79,14 +82,15 @@ export namespace IAgenticaSelectBenchmarkEvent {
79
82
  assistantPrompts: IAgenticaPrompt.IText<"assistant">[];
80
83
  }
81
84
 
82
- export interface IError extends IEventBase<"error"> {
85
+ export interface IError<Model extends ILlmSchema.Model>
86
+ extends IEventBase<"error", Model> {
83
87
  /**
84
88
  * Error occurred during the benchmark.
85
89
  */
86
90
  error: unknown;
87
91
  }
88
92
 
89
- interface IEventBase<Type extends string> {
93
+ interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
90
94
  /**
91
95
  * Discriminant type.
92
96
  */
@@ -95,7 +99,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
95
99
  /**
96
100
  * Expected scenario.
97
101
  */
98
- scenario: IAgenticaSelectBenchmarkScenario;
102
+ scenario: IAgenticaSelectBenchmarkScenario<Model>;
99
103
 
100
104
  /**
101
105
  * When the benchmark testing started.
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  import { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
4
5
  import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
@@ -20,11 +21,13 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
20
21
  *
21
22
  * @author Samchon
22
23
  */
23
- export interface IAgenticaSelectBenchmarkResult {
24
+ export interface IAgenticaSelectBenchmarkResult<
25
+ Model extends ILlmSchema.Model,
26
+ > {
24
27
  /**
25
28
  * Experiments for each scenario.
26
29
  */
27
- experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
30
+ experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[];
28
31
 
29
32
  /**
30
33
  * Aggregated token usage information.
@@ -45,11 +48,11 @@ export namespace IAgenticaSelectBenchmarkResult {
45
48
  /**
46
49
  * Experiment result about a scenario.
47
50
  */
48
- export interface IExperiment {
51
+ export interface IExperiment<Model extends ILlmSchema.Model> {
49
52
  /**
50
53
  * Expected scenario.
51
54
  */
52
- scenario: IAgenticaSelectBenchmarkScenario;
55
+ scenario: IAgenticaSelectBenchmarkScenario<Model>;
53
56
 
54
57
  /**
55
58
  * Events occurred during the benchmark in the scenario.
@@ -59,7 +62,7 @@ export namespace IAgenticaSelectBenchmarkResult {
59
62
  * {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
60
63
  * And the event is one of the repeated benchmark results.
61
64
  */
62
- events: IAgenticaSelectBenchmarkEvent[];
65
+ events: IAgenticaSelectBenchmarkEvent<Model>[];
63
66
 
64
67
  /**
65
68
  * LLM token usage information.
@@ -1,3 +1,5 @@
1
+ import { ILlmSchema } from "@samchon/openapi";
2
+
1
3
  import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
2
4
 
3
5
  /**
@@ -14,7 +16,9 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
14
16
  *
15
17
  * @author Samchon
16
18
  */
17
- export interface IAgenticaSelectBenchmarkScenario {
19
+ export interface IAgenticaSelectBenchmarkScenario<
20
+ Model extends ILlmSchema.Model,
21
+ > {
18
22
  /**
19
23
  * Name of the scenario.
20
24
  *
@@ -35,5 +39,5 @@ export interface IAgenticaSelectBenchmarkScenario {
35
39
  * {@link text} conversation for the LLM (Large Language Model)
36
40
  * function selection.
37
41
  */
38
- expected: IAgenticaBenchmarkExpected;
42
+ expected: IAgenticaBenchmarkExpected<Model>;
39
43
  }