@agentica/benchmark 0.8.3 → 0.9.0-dev.20250302

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +326 -324
  3. package/lib/AgenticaCallBenchmark.d.ts +7 -6
  4. package/lib/AgenticaCallBenchmark.js.map +1 -1
  5. package/lib/AgenticaSelectBenchmark.d.ts +7 -6
  6. package/lib/AgenticaSelectBenchmark.js.map +1 -1
  7. package/lib/index.mjs +46 -1
  8. package/lib/index.mjs.map +1 -1
  9. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +5 -4
  10. package/lib/internal/AgenticaBenchmarkPredicator.js +74 -2
  11. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
  12. package/lib/internal/AgenticaBenchmarkUtil.d.ts +2 -1
  13. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
  14. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +2 -1
  15. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
  16. package/lib/internal/AgenticaPromptReporter.d.ts +2 -1
  17. package/lib/internal/AgenticaPromptReporter.js.map +1 -1
  18. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
  19. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +10 -9
  20. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +8 -7
  21. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +6 -5
  22. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +3 -2
  23. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -8
  24. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +6 -5
  25. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +3 -2
  26. package/package.json +5 -5
  27. package/src/AgenticaCallBenchmark.ts +268 -265
  28. package/src/AgenticaSelectBenchmark.ts +256 -254
  29. package/src/index.ts +3 -3
  30. package/src/internal/AgenticaBenchmarkPredicator.ts +224 -216
  31. package/src/internal/AgenticaBenchmarkUtil.ts +44 -40
  32. package/src/internal/AgenticaCallBenchmarkReporter.ts +183 -180
  33. package/src/internal/AgenticaPromptReporter.ts +46 -43
  34. package/src/internal/AgenticaSelectBenchmarkReporter.ts +213 -210
  35. package/src/structures/IAgenticaBenchmarkExpected.ts +68 -58
  36. package/src/structures/IAgenticaCallBenchmarkEvent.ts +113 -109
  37. package/src/structures/IAgenticaCallBenchmarkResult.ts +70 -69
  38. package/src/structures/IAgenticaCallBenchmarkScenario.ts +43 -39
  39. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +114 -110
  40. package/src/structures/IAgenticaSelectBenchmarkResult.ts +72 -69
  41. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +43 -39
  42. package/src/utils/MathUtil.ts +3 -3
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaPrompt, IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
  import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
3
4
  /**
4
5
  * Event of LLM function selection benchmark.
@@ -22,7 +23,7 @@ import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario
22
23
  *
23
24
  * @author Samchon
24
25
  */
25
- export type IAgenticaCallBenchmarkEvent = IAgenticaCallBenchmarkEvent.ISuccess | IAgenticaCallBenchmarkEvent.IFailure | IAgenticaCallBenchmarkEvent.IError;
26
+ export type IAgenticaCallBenchmarkEvent<Model extends ILlmSchema.Model> = IAgenticaCallBenchmarkEvent.ISuccess<Model> | IAgenticaCallBenchmarkEvent.IFailure<Model> | IAgenticaCallBenchmarkEvent.IError<Model>;
26
27
  export declare namespace IAgenticaCallBenchmarkEvent {
27
28
  /**
28
29
  * Success event type.
@@ -30,7 +31,7 @@ export declare namespace IAgenticaCallBenchmarkEvent {
30
31
  * The `success` event type represents that the benchmark
31
32
  * testing is fully meet the expected scenario.
32
33
  */
33
- export interface ISuccess extends IEventBase<"success"> {
34
+ export interface ISuccess<Model extends ILlmSchema.Model> extends IEventBase<"success", Model> {
34
35
  /**
35
36
  * Whether succeeded to function selection.
36
37
  */
@@ -47,7 +48,7 @@ export declare namespace IAgenticaCallBenchmarkEvent {
47
48
  * or `caller` agents have not selected or called following the
48
49
  * expected scenario in the benchmark testing.
49
50
  */
50
- export interface IFailure extends IEventBase<"failure"> {
51
+ export interface IFailure<Model extends ILlmSchema.Model> extends IEventBase<"failure", Model> {
51
52
  /**
52
53
  * Whether succeeded to function selection.
53
54
  */
@@ -57,13 +58,13 @@ export declare namespace IAgenticaCallBenchmarkEvent {
57
58
  */
58
59
  call: boolean;
59
60
  }
60
- export interface IError extends IEventBase<"error"> {
61
+ export interface IError<Model extends ILlmSchema.Model> extends IEventBase<"error", Model> {
61
62
  /**
62
63
  * Error occurred during the benchmark.
63
64
  */
64
65
  error: unknown;
65
66
  }
66
- interface IEventBase<Type extends string> {
67
+ interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
67
68
  /**
68
69
  * Discriminant type.
69
70
  */
@@ -71,13 +72,13 @@ export declare namespace IAgenticaCallBenchmarkEvent {
71
72
  /**
72
73
  * Expected scenario.
73
74
  */
74
- scenario: IAgenticaCallBenchmarkScenario;
75
+ scenario: IAgenticaCallBenchmarkScenario<Model>;
75
76
  /**
76
77
  * Prompt histories.
77
78
  *
78
79
  * List of prompts occurred during the benchmark testing.
79
80
  */
80
- prompts: IAgenticaPrompt[];
81
+ prompts: IAgenticaPrompt<Model>[];
81
82
  /**
82
83
  * Usage of the token during the benchmark.
83
84
  */
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
  import { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
3
4
  import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
4
5
  /**
@@ -18,11 +19,11 @@ import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario
18
19
  *
19
20
  * @author Samchon
20
21
  */
21
- export interface IAgenticaCallBenchmarkResult {
22
+ export interface IAgenticaCallBenchmarkResult<Model extends ILlmSchema.Model> {
22
23
  /**
23
24
  * Experiments for each scenario.
24
25
  */
25
- experiments: IAgenticaCallBenchmarkResult.IExperiment[];
26
+ experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[];
26
27
  /**
27
28
  * Aggregated token usage information.
28
29
  */
@@ -40,11 +41,11 @@ export declare namespace IAgenticaCallBenchmarkResult {
40
41
  /**
41
42
  * Experiment result about a scenario.
42
43
  */
43
- interface IExperiment {
44
+ interface IExperiment<Model extends ILlmSchema.Model> {
44
45
  /**
45
46
  * Scenario of the experiment.
46
47
  */
47
- scenario: IAgenticaCallBenchmarkScenario;
48
+ scenario: IAgenticaCallBenchmarkScenario<Model>;
48
49
  /**
49
50
  * Events occurred during the benchmark in the scenario.
50
51
  *
@@ -53,7 +54,7 @@ export declare namespace IAgenticaCallBenchmarkResult {
53
54
  * {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
54
55
  * And the event is one of the repeated benchmark results.
55
56
  */
56
- events: IAgenticaCallBenchmarkEvent[];
57
+ events: IAgenticaCallBenchmarkEvent<Model>[];
57
58
  /**
58
59
  * LLM token usage information.
59
60
  */
@@ -1,3 +1,4 @@
1
+ import { ILlmSchema } from "@samchon/openapi";
1
2
  import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
2
3
  /**
3
4
  * Scenario of function calling.
@@ -13,7 +14,7 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
13
14
  *
14
15
  * @author Samchon
15
16
  */
16
- export interface IAgenticaCallBenchmarkScenario {
17
+ export interface IAgenticaCallBenchmarkScenario<Model extends ILlmSchema.Model> {
17
18
  /**
18
19
  * Name of the scenario.
19
20
  *
@@ -32,5 +33,5 @@ export interface IAgenticaCallBenchmarkScenario {
32
33
  * the user's {@link text} conversation for the LLM
33
34
  * (Large Language Model) function calling.
34
35
  */
35
- expected: IAgenticaBenchmarkExpected;
36
+ expected: IAgenticaBenchmarkExpected<Model>;
36
37
  }
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaOperationSelection, IAgenticaPrompt, IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
  import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
3
4
  /**
4
5
  * Event of LLM function selection benchmark.
@@ -22,7 +23,7 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
22
23
  *
23
24
  * @author Samchon
24
25
  */
25
- export type IAgenticaSelectBenchmarkEvent = IAgenticaSelectBenchmarkEvent.ISuccess | IAgenticaSelectBenchmarkEvent.IFailure | IAgenticaSelectBenchmarkEvent.IError;
26
+ export type IAgenticaSelectBenchmarkEvent<Model extends ILlmSchema.Model> = IAgenticaSelectBenchmarkEvent.ISuccess<Model> | IAgenticaSelectBenchmarkEvent.IFailure<Model> | IAgenticaSelectBenchmarkEvent.IError<Model>;
26
27
  export declare namespace IAgenticaSelectBenchmarkEvent {
27
28
  /**
28
29
  * Success event type.
@@ -30,7 +31,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
30
31
  * The `success` event type represents that the benchmark testing is
31
32
  * fully meet the expected scenario.
32
33
  */
33
- export interface ISuccess extends IEventBase<"success"> {
34
+ export interface ISuccess<Model extends ILlmSchema.Model> extends IEventBase<"success", Model> {
34
35
  /**
35
36
  * Usage of the token during the benchmark.
36
37
  */
@@ -38,7 +39,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
38
39
  /**
39
40
  * Selected operations in the benchmark.
40
41
  */
41
- selected: IAgenticaOperationSelection[];
42
+ selected: IAgenticaOperationSelection<Model>[];
42
43
  /**
43
44
  * Prompt messages from the assistant.
44
45
  */
@@ -50,7 +51,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
50
51
  * The `failure` event type represents that the `selector` had not
51
52
  * selected the expected scenario in the benchmark testing.
52
53
  */
53
- export interface IFailure extends IEventBase<"failure"> {
54
+ export interface IFailure<Model extends ILlmSchema.Model> extends IEventBase<"failure", Model> {
54
55
  /**
55
56
  * Usage of the token during the benchmark.
56
57
  */
@@ -58,19 +59,19 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
58
59
  /**
59
60
  * Selected operations in the benchmark.
60
61
  */
61
- selected: IAgenticaOperationSelection[];
62
+ selected: IAgenticaOperationSelection<Model>[];
62
63
  /**
63
64
  * Prompt messages from the assistant.
64
65
  */
65
66
  assistantPrompts: IAgenticaPrompt.IText<"assistant">[];
66
67
  }
67
- export interface IError extends IEventBase<"error"> {
68
+ export interface IError<Model extends ILlmSchema.Model> extends IEventBase<"error", Model> {
68
69
  /**
69
70
  * Error occurred during the benchmark.
70
71
  */
71
72
  error: unknown;
72
73
  }
73
- interface IEventBase<Type extends string> {
74
+ interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
74
75
  /**
75
76
  * Discriminant type.
76
77
  */
@@ -78,7 +79,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
78
79
  /**
79
80
  * Expected scenario.
80
81
  */
81
- scenario: IAgenticaSelectBenchmarkScenario;
82
+ scenario: IAgenticaSelectBenchmarkScenario<Model>;
82
83
  /**
83
84
  * When the benchmark testing started.
84
85
  */
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
  import { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
3
4
  import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
4
5
  /**
@@ -18,11 +19,11 @@ import { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScen
18
19
  *
19
20
  * @author Samchon
20
21
  */
21
- export interface IAgenticaSelectBenchmarkResult {
22
+ export interface IAgenticaSelectBenchmarkResult<Model extends ILlmSchema.Model> {
22
23
  /**
23
24
  * Experiments for each scenario.
24
25
  */
25
- experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
26
+ experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[];
26
27
  /**
27
28
  * Aggregated token usage information.
28
29
  */
@@ -40,11 +41,11 @@ export declare namespace IAgenticaSelectBenchmarkResult {
40
41
  /**
41
42
  * Experiment result about a scenario.
42
43
  */
43
- interface IExperiment {
44
+ interface IExperiment<Model extends ILlmSchema.Model> {
44
45
  /**
45
46
  * Expected scenario.
46
47
  */
47
- scenario: IAgenticaSelectBenchmarkScenario;
48
+ scenario: IAgenticaSelectBenchmarkScenario<Model>;
48
49
  /**
49
50
  * Events occurred during the benchmark in the scenario.
50
51
  *
@@ -53,7 +54,7 @@ export declare namespace IAgenticaSelectBenchmarkResult {
53
54
  * {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
54
55
  * And the event is one of the repeated benchmark results.
55
56
  */
56
- events: IAgenticaSelectBenchmarkEvent[];
57
+ events: IAgenticaSelectBenchmarkEvent<Model>[];
57
58
  /**
58
59
  * LLM token usage information.
59
60
  */
@@ -1,3 +1,4 @@
1
+ import { ILlmSchema } from "@samchon/openapi";
1
2
  import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
2
3
  /**
3
4
  * Scenario of function selection.
@@ -13,7 +14,7 @@ import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
13
14
  *
14
15
  * @author Samchon
15
16
  */
16
- export interface IAgenticaSelectBenchmarkScenario {
17
+ export interface IAgenticaSelectBenchmarkScenario<Model extends ILlmSchema.Model> {
17
18
  /**
18
19
  * Name of the scenario.
19
20
  *
@@ -32,5 +33,5 @@ export interface IAgenticaSelectBenchmarkScenario {
32
33
  * {@link text} conversation for the LLM (Large Language Model)
33
34
  * function selection.
34
35
  */
35
- expected: IAgenticaBenchmarkExpected;
36
+ expected: IAgenticaBenchmarkExpected<Model>;
36
37
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentica/benchmark",
3
- "version": "0.8.3",
3
+ "version": "0.9.0-dev.20250302",
4
4
  "main": "lib/index.js",
5
5
  "description": "Agentic AI Library specialized in LLM Function Calling",
6
6
  "scripts": {
@@ -37,11 +37,11 @@
37
37
  "src"
38
38
  ],
39
39
  "dependencies": {
40
- "@agentica/core": "^0.8.3",
41
- "@samchon/openapi": "^2.4.3",
40
+ "@agentica/core": "^0.9.0-dev.20250302",
41
+ "@samchon/openapi": "^3.0.0",
42
42
  "openai": "^4.80.0",
43
43
  "tstl": "^3.0.0",
44
- "typia": "^7.6.4"
44
+ "typia": "^8.0.0"
45
45
  },
46
46
  "devDependencies": {
47
47
  "@rollup/plugin-terser": "^0.4.4",
@@ -51,7 +51,7 @@
51
51
  "rollup": "^4.34.8",
52
52
  "ts-patch": "^3.3.0",
53
53
  "typedoc": "^0.27.7",
54
- "typescript": "~5.7.3"
54
+ "typescript": "~5.8.2"
55
55
  },
56
56
  "module": "lib/index.mjs",
57
57
  "typings": "lib/index.d.ts"