@agentica/benchmark 0.34.2 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/AgenticaCallBenchmark.d.ts +6 -7
- package/lib/AgenticaCallBenchmark.js.map +1 -1
- package/lib/AgenticaSelectBenchmark.d.ts +6 -7
- package/lib/AgenticaSelectBenchmark.js.map +1 -1
- package/lib/MicroAgenticaCallBenchmark.d.ts +6 -7
- package/lib/MicroAgenticaCallBenchmark.js.map +1 -1
- package/lib/index.mjs +1 -1
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaBenchmarkPredicator.d.ts +4 -5
- package/lib/internal/AgenticaBenchmarkPredicator.js +4 -4
- package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
- package/lib/internal/AgenticaBenchmarkUtil.d.ts +1 -2
- package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +1 -2
- package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
- package/lib/internal/AgenticaPromptReporter.d.ts +1 -2
- package/lib/internal/AgenticaPromptReporter.js.map +1 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +1 -2
- package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
- package/lib/structures/IAgenticaBenchmarkExpected.d.ts +9 -10
- package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +7 -8
- package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +5 -6
- package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +2 -3
- package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +8 -9
- package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +5 -6
- package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +2 -3
- package/package.json +7 -7
- package/src/AgenticaCallBenchmark.ts +18 -19
- package/src/AgenticaSelectBenchmark.ts +24 -25
- package/src/MicroAgenticaCallBenchmark.ts +18 -19
- package/src/internal/AgenticaBenchmarkPredicator.ts +11 -12
- package/src/internal/AgenticaBenchmarkUtil.ts +1 -3
- package/src/internal/AgenticaCallBenchmarkReporter.ts +7 -8
- package/src/internal/AgenticaPromptReporter.ts +1 -2
- package/src/internal/AgenticaSelectBenchmarkReporter.ts +5 -6
- package/src/structures/IAgenticaBenchmarkExpected.ts +16 -17
- package/src/structures/IAgenticaCallBenchmarkEvent.ts +13 -14
- package/src/structures/IAgenticaCallBenchmarkResult.ts +5 -6
- package/src/structures/IAgenticaCallBenchmarkScenario.ts +2 -6
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +11 -15
- package/src/structures/IAgenticaSelectBenchmarkResult.ts +5 -8
- package/src/structures/IAgenticaSelectBenchmarkScenario.ts +2 -6
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaHistory, AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
10
9
|
/**
|
|
11
10
|
* Event of LLM function selection benchmark.
|
|
@@ -29,7 +28,7 @@ import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkSce
|
|
|
29
28
|
*
|
|
30
29
|
* @author Samchon
|
|
31
30
|
*/
|
|
32
|
-
export type IAgenticaCallBenchmarkEvent
|
|
31
|
+
export type IAgenticaCallBenchmarkEvent = IAgenticaCallBenchmarkEvent.ISuccess | IAgenticaCallBenchmarkEvent.IFailure | IAgenticaCallBenchmarkEvent.IError;
|
|
33
32
|
export declare namespace IAgenticaCallBenchmarkEvent {
|
|
34
33
|
/**
|
|
35
34
|
* Success event type.
|
|
@@ -37,7 +36,7 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
37
36
|
* The `success` event type represents that the benchmark
|
|
38
37
|
* testing is fully meet the expected scenario.
|
|
39
38
|
*/
|
|
40
|
-
export interface ISuccess
|
|
39
|
+
export interface ISuccess extends IEventBase<"success"> {
|
|
41
40
|
/**
|
|
42
41
|
* Whether succeeded to function selection.
|
|
43
42
|
*/
|
|
@@ -54,7 +53,7 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
54
53
|
* or `caller` agents have not selected or called following the
|
|
55
54
|
* expected scenario in the benchmark testing.
|
|
56
55
|
*/
|
|
57
|
-
export interface IFailure
|
|
56
|
+
export interface IFailure extends IEventBase<"failure"> {
|
|
58
57
|
/**
|
|
59
58
|
* Whether succeeded to function selection.
|
|
60
59
|
*/
|
|
@@ -64,13 +63,13 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
64
63
|
*/
|
|
65
64
|
call: boolean;
|
|
66
65
|
}
|
|
67
|
-
export interface IError
|
|
66
|
+
export interface IError extends IEventBase<"error"> {
|
|
68
67
|
/**
|
|
69
68
|
* Error occurred during the benchmark.
|
|
70
69
|
*/
|
|
71
70
|
error: unknown;
|
|
72
71
|
}
|
|
73
|
-
interface IEventBase<Type extends string
|
|
72
|
+
interface IEventBase<Type extends string> {
|
|
74
73
|
/**
|
|
75
74
|
* Discriminant type.
|
|
76
75
|
*/
|
|
@@ -78,13 +77,13 @@ export declare namespace IAgenticaCallBenchmarkEvent {
|
|
|
78
77
|
/**
|
|
79
78
|
* Expected scenario.
|
|
80
79
|
*/
|
|
81
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
80
|
+
scenario: IAgenticaCallBenchmarkScenario;
|
|
82
81
|
/**
|
|
83
82
|
* Prompt histories.
|
|
84
83
|
*
|
|
85
84
|
* List of prompts occurred during the benchmark testing.
|
|
86
85
|
*/
|
|
87
|
-
prompts: AgenticaHistory
|
|
86
|
+
prompts: AgenticaHistory[];
|
|
88
87
|
/**
|
|
89
88
|
* Usage of the token during the benchmark.
|
|
90
89
|
*/
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
import type { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
|
|
10
9
|
import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
11
10
|
/**
|
|
@@ -25,11 +24,11 @@ import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkSce
|
|
|
25
24
|
*
|
|
26
25
|
* @author Samchon
|
|
27
26
|
*/
|
|
28
|
-
export interface IAgenticaCallBenchmarkResult
|
|
27
|
+
export interface IAgenticaCallBenchmarkResult {
|
|
29
28
|
/**
|
|
30
29
|
* Experiments for each scenario.
|
|
31
30
|
*/
|
|
32
|
-
experiments: IAgenticaCallBenchmarkResult.IExperiment
|
|
31
|
+
experiments: IAgenticaCallBenchmarkResult.IExperiment[];
|
|
33
32
|
/**
|
|
34
33
|
* Aggregated token usage information.
|
|
35
34
|
*/
|
|
@@ -47,11 +46,11 @@ export declare namespace IAgenticaCallBenchmarkResult {
|
|
|
47
46
|
/**
|
|
48
47
|
* Experiment result about a scenario.
|
|
49
48
|
*/
|
|
50
|
-
interface IExperiment
|
|
49
|
+
interface IExperiment {
|
|
51
50
|
/**
|
|
52
51
|
* Scenario of the experiment.
|
|
53
52
|
*/
|
|
54
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
53
|
+
scenario: IAgenticaCallBenchmarkScenario;
|
|
55
54
|
/**
|
|
56
55
|
* Events occurred during the benchmark in the scenario.
|
|
57
56
|
*
|
|
@@ -60,7 +59,7 @@ export declare namespace IAgenticaCallBenchmarkResult {
|
|
|
60
59
|
* {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
|
|
61
60
|
* And the event is one of the repeated benchmark results.
|
|
62
61
|
*/
|
|
63
|
-
events: IAgenticaCallBenchmarkEvent
|
|
62
|
+
events: IAgenticaCallBenchmarkEvent[];
|
|
64
63
|
/**
|
|
65
64
|
* LLM token usage information.
|
|
66
65
|
*/
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
7
|
import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
9
8
|
/**
|
|
10
9
|
* Scenario of function calling.
|
|
@@ -20,7 +19,7 @@ import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
20
19
|
*
|
|
21
20
|
* @author Samchon
|
|
22
21
|
*/
|
|
23
|
-
export interface IAgenticaCallBenchmarkScenario
|
|
22
|
+
export interface IAgenticaCallBenchmarkScenario {
|
|
24
23
|
/**
|
|
25
24
|
* Name of the scenario.
|
|
26
25
|
*
|
|
@@ -39,5 +38,5 @@ export interface IAgenticaCallBenchmarkScenario<Model extends ILlmSchema.Model>
|
|
|
39
38
|
* the user's {@link text} conversation for the LLM
|
|
40
39
|
* (Large Language Model) function calling.
|
|
41
40
|
*/
|
|
42
|
-
expected: IAgenticaBenchmarkExpected
|
|
41
|
+
expected: IAgenticaBenchmarkExpected;
|
|
43
42
|
}
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaAssistantMessageHistory, AgenticaOperationSelection, AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
10
9
|
/**
|
|
11
10
|
* Event of LLM function selection benchmark.
|
|
@@ -29,7 +28,7 @@ import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmar
|
|
|
29
28
|
*
|
|
30
29
|
* @author Samchon
|
|
31
30
|
*/
|
|
32
|
-
export type IAgenticaSelectBenchmarkEvent
|
|
31
|
+
export type IAgenticaSelectBenchmarkEvent = IAgenticaSelectBenchmarkEvent.ISuccess | IAgenticaSelectBenchmarkEvent.IFailure | IAgenticaSelectBenchmarkEvent.IError;
|
|
33
32
|
export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
34
33
|
/**
|
|
35
34
|
* Success event type.
|
|
@@ -37,7 +36,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
37
36
|
* The `success` event type represents that the benchmark testing is
|
|
38
37
|
* fully meet the expected scenario.
|
|
39
38
|
*/
|
|
40
|
-
export interface ISuccess
|
|
39
|
+
export interface ISuccess extends IEventBase<"success"> {
|
|
41
40
|
/**
|
|
42
41
|
* Usage of the token during the benchmark.
|
|
43
42
|
*/
|
|
@@ -45,7 +44,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
45
44
|
/**
|
|
46
45
|
* Selected operations in the benchmark.
|
|
47
46
|
*/
|
|
48
|
-
selected: AgenticaOperationSelection
|
|
47
|
+
selected: AgenticaOperationSelection[];
|
|
49
48
|
/**
|
|
50
49
|
* Prompt messages from the assistant.
|
|
51
50
|
*/
|
|
@@ -57,7 +56,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
57
56
|
* The `failure` event type represents that the `selector` had not
|
|
58
57
|
* selected the expected scenario in the benchmark testing.
|
|
59
58
|
*/
|
|
60
|
-
export interface IFailure
|
|
59
|
+
export interface IFailure extends IEventBase<"failure"> {
|
|
61
60
|
/**
|
|
62
61
|
* Usage of the token during the benchmark.
|
|
63
62
|
*/
|
|
@@ -65,7 +64,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
65
64
|
/**
|
|
66
65
|
* Selected operations in the benchmark.
|
|
67
66
|
*/
|
|
68
|
-
selected: AgenticaOperationSelection
|
|
67
|
+
selected: AgenticaOperationSelection[];
|
|
69
68
|
/**
|
|
70
69
|
* Prompt messages from the assistant.
|
|
71
70
|
*/
|
|
@@ -77,13 +76,13 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
77
76
|
* The `error` event type repsents that an error had been occurred
|
|
78
77
|
* during the benchmark testing.
|
|
79
78
|
*/
|
|
80
|
-
export interface IError
|
|
79
|
+
export interface IError extends IEventBase<"error"> {
|
|
81
80
|
/**
|
|
82
81
|
* Error occurred during the benchmark.
|
|
83
82
|
*/
|
|
84
83
|
error: unknown;
|
|
85
84
|
}
|
|
86
|
-
interface IEventBase<Type extends string
|
|
85
|
+
interface IEventBase<Type extends string> {
|
|
87
86
|
/**
|
|
88
87
|
* Discriminant type.
|
|
89
88
|
*/
|
|
@@ -91,7 +90,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
91
90
|
/**
|
|
92
91
|
* Expected scenario.
|
|
93
92
|
*/
|
|
94
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
93
|
+
scenario: IAgenticaSelectBenchmarkScenario;
|
|
95
94
|
/**
|
|
96
95
|
* When the benchmark testing started.
|
|
97
96
|
*/
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
import type { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
10
9
|
import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
11
10
|
/**
|
|
@@ -25,11 +24,11 @@ import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmar
|
|
|
25
24
|
*
|
|
26
25
|
* @author Samchon
|
|
27
26
|
*/
|
|
28
|
-
export interface IAgenticaSelectBenchmarkResult
|
|
27
|
+
export interface IAgenticaSelectBenchmarkResult {
|
|
29
28
|
/**
|
|
30
29
|
* Experiments for each scenario.
|
|
31
30
|
*/
|
|
32
|
-
experiments: IAgenticaSelectBenchmarkResult.IExperiment
|
|
31
|
+
experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
|
|
33
32
|
/**
|
|
34
33
|
* Aggregated token usage information.
|
|
35
34
|
*/
|
|
@@ -47,11 +46,11 @@ export declare namespace IAgenticaSelectBenchmarkResult {
|
|
|
47
46
|
/**
|
|
48
47
|
* Experiment result about a scenario.
|
|
49
48
|
*/
|
|
50
|
-
interface IExperiment
|
|
49
|
+
interface IExperiment {
|
|
51
50
|
/**
|
|
52
51
|
* Expected scenario.
|
|
53
52
|
*/
|
|
54
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
53
|
+
scenario: IAgenticaSelectBenchmarkScenario;
|
|
55
54
|
/**
|
|
56
55
|
* Events occurred during the benchmark in the scenario.
|
|
57
56
|
*
|
|
@@ -60,7 +59,7 @@ export declare namespace IAgenticaSelectBenchmarkResult {
|
|
|
60
59
|
* {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
|
|
61
60
|
* And the event is one of the repeated benchmark results.
|
|
62
61
|
*/
|
|
63
|
-
events: IAgenticaSelectBenchmarkEvent
|
|
62
|
+
events: IAgenticaSelectBenchmarkEvent[];
|
|
64
63
|
/**
|
|
65
64
|
* LLM token usage information.
|
|
66
65
|
*/
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
7
|
import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
9
8
|
/**
|
|
10
9
|
* Scenario of function selection.
|
|
@@ -20,7 +19,7 @@ import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
20
19
|
*
|
|
21
20
|
* @author Samchon
|
|
22
21
|
*/
|
|
23
|
-
export interface IAgenticaSelectBenchmarkScenario
|
|
22
|
+
export interface IAgenticaSelectBenchmarkScenario {
|
|
24
23
|
/**
|
|
25
24
|
* Name of the scenario.
|
|
26
25
|
*
|
|
@@ -39,5 +38,5 @@ export interface IAgenticaSelectBenchmarkScenario<Model extends ILlmSchema.Model
|
|
|
39
38
|
* {@link text} conversation for the LLM (Large Language Model)
|
|
40
39
|
* function selection.
|
|
41
40
|
*/
|
|
42
|
-
expected: IAgenticaBenchmarkExpected
|
|
41
|
+
expected: IAgenticaBenchmarkExpected;
|
|
43
42
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentica/benchmark",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.35.0",
|
|
4
4
|
"description": "Agentic AI Library specialized in LLM Function Calling",
|
|
5
5
|
"author": "Wrtn Technologies",
|
|
6
6
|
"license": "MIT",
|
|
@@ -35,15 +35,15 @@
|
|
|
35
35
|
"src"
|
|
36
36
|
],
|
|
37
37
|
"peerDependencies": {
|
|
38
|
-
"@agentica/core": "^0.
|
|
38
|
+
"@agentica/core": "^0.35.0"
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|
|
41
|
-
"@samchon/openapi": "^
|
|
42
|
-
"openai": "^6.
|
|
41
|
+
"@samchon/openapi": "^6.0.0",
|
|
42
|
+
"openai": "^6.15.0",
|
|
43
43
|
"tstl": "^3.0.0",
|
|
44
|
-
"typia": "^
|
|
45
|
-
"uuid": "^
|
|
46
|
-
"@agentica/core": "^0.
|
|
44
|
+
"typia": "^11.0.0",
|
|
45
|
+
"uuid": "^13.0.0",
|
|
46
|
+
"@agentica/core": "^0.35.0"
|
|
47
47
|
},
|
|
48
48
|
"devDependencies": {
|
|
49
49
|
"@rollup/plugin-terser": "^0.4.4",
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { Agentica } from "@agentica/core";
|
|
2
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
3
2
|
import type { tags } from "typia";
|
|
4
3
|
|
|
5
4
|
/**
|
|
@@ -38,18 +37,18 @@ import { AgenticaCallBenchmarkReporter } from "./internal/AgenticaCallBenchmarkR
|
|
|
38
37
|
*
|
|
39
38
|
* @author Samchon
|
|
40
39
|
*/
|
|
41
|
-
export class AgenticaCallBenchmark
|
|
42
|
-
private agent_: Agentica
|
|
43
|
-
private scenarios_: IAgenticaCallBenchmarkScenario
|
|
40
|
+
export class AgenticaCallBenchmark {
|
|
41
|
+
private agent_: Agentica;
|
|
42
|
+
private scenarios_: IAgenticaCallBenchmarkScenario[];
|
|
44
43
|
private config_: AgenticaCallBenchmark.IConfig;
|
|
45
|
-
private result_: IAgenticaCallBenchmarkResult
|
|
44
|
+
private result_: IAgenticaCallBenchmarkResult | null;
|
|
46
45
|
|
|
47
46
|
/**
|
|
48
47
|
* Initializer Constructor.
|
|
49
48
|
*
|
|
50
49
|
* @param props Properties of the selection benchmark
|
|
51
50
|
*/
|
|
52
|
-
public constructor(props: AgenticaCallBenchmark.IProps
|
|
51
|
+
public constructor(props: AgenticaCallBenchmark.IProps) {
|
|
53
52
|
this.agent_ = props.agent;
|
|
54
53
|
this.scenarios_ = props.scenarios.slice();
|
|
55
54
|
this.config_ = {
|
|
@@ -77,16 +76,16 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
77
76
|
* @returns Results of the function calling benchmark
|
|
78
77
|
*/
|
|
79
78
|
public async execute(
|
|
80
|
-
listener?: (event: IAgenticaCallBenchmarkEvent
|
|
81
|
-
): Promise<IAgenticaCallBenchmarkResult
|
|
79
|
+
listener?: (event: IAgenticaCallBenchmarkEvent) => void,
|
|
80
|
+
): Promise<IAgenticaCallBenchmarkResult> {
|
|
82
81
|
const started_at: Date = new Date();
|
|
83
82
|
const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
|
|
84
83
|
const task = this.scenarios_.map(async (scenario) => {
|
|
85
|
-
const events: IAgenticaCallBenchmarkEvent
|
|
84
|
+
const events: IAgenticaCallBenchmarkEvent[]
|
|
86
85
|
= await Promise.all(
|
|
87
86
|
Array.from({ length: this.config_.repeat }).map(async () => {
|
|
88
87
|
await semaphore.acquire();
|
|
89
|
-
const e: IAgenticaCallBenchmarkEvent
|
|
88
|
+
const e: IAgenticaCallBenchmarkEvent
|
|
90
89
|
= await this.step(scenario);
|
|
91
90
|
await semaphore.release();
|
|
92
91
|
|
|
@@ -106,7 +105,7 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
106
105
|
.reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
|
|
107
106
|
};
|
|
108
107
|
});
|
|
109
|
-
const experiments: IAgenticaCallBenchmarkResult.IExperiment
|
|
108
|
+
const experiments: IAgenticaCallBenchmarkResult.IExperiment[]
|
|
110
109
|
= await Promise.all(task);
|
|
111
110
|
return (this.result_ = {
|
|
112
111
|
experiments,
|
|
@@ -144,9 +143,9 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
144
143
|
}
|
|
145
144
|
|
|
146
145
|
private async step(
|
|
147
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
148
|
-
): Promise<IAgenticaCallBenchmarkEvent
|
|
149
|
-
const agent: Agentica
|
|
146
|
+
scenario: IAgenticaCallBenchmarkScenario,
|
|
147
|
+
): Promise<IAgenticaCallBenchmarkEvent> {
|
|
148
|
+
const agent: Agentica = this.agent_.clone();
|
|
150
149
|
const started_at: Date = new Date();
|
|
151
150
|
const success = () =>
|
|
152
151
|
AgenticaBenchmarkPredicator.success({
|
|
@@ -157,7 +156,7 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
157
156
|
.map(p => p.operation),
|
|
158
157
|
strict: false,
|
|
159
158
|
});
|
|
160
|
-
const out = (): IAgenticaCallBenchmarkEvent
|
|
159
|
+
const out = (): IAgenticaCallBenchmarkEvent => {
|
|
161
160
|
const select = AgenticaBenchmarkPredicator.success({
|
|
162
161
|
expected: scenario.expected,
|
|
163
162
|
operations: agent
|
|
@@ -176,7 +175,7 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
176
175
|
usage: agent.getTokenUsage(),
|
|
177
176
|
started_at,
|
|
178
177
|
completed_at: new Date(),
|
|
179
|
-
} satisfies IAgenticaCallBenchmarkEvent.IFailure
|
|
178
|
+
} satisfies IAgenticaCallBenchmarkEvent.IFailure;
|
|
180
179
|
};
|
|
181
180
|
|
|
182
181
|
try {
|
|
@@ -216,16 +215,16 @@ export namespace AgenticaCallBenchmark {
|
|
|
216
215
|
/**
|
|
217
216
|
* Properties of the {@link AgenticaCallBenchmark} constructor.
|
|
218
217
|
*/
|
|
219
|
-
export interface IProps
|
|
218
|
+
export interface IProps {
|
|
220
219
|
/**
|
|
221
220
|
* AI agent instance.
|
|
222
221
|
*/
|
|
223
|
-
agent: Agentica
|
|
222
|
+
agent: Agentica;
|
|
224
223
|
|
|
225
224
|
/**
|
|
226
225
|
* List of scenarios what you expect.
|
|
227
226
|
*/
|
|
228
|
-
scenarios: IAgenticaCallBenchmarkScenario
|
|
227
|
+
scenarios: IAgenticaCallBenchmarkScenario[];
|
|
229
228
|
|
|
230
229
|
/**
|
|
231
230
|
* Configuration for the benchmark.
|
|
@@ -5,7 +5,6 @@ import type {
|
|
|
5
5
|
AgenticaHistory,
|
|
6
6
|
AgenticaOperationSelection,
|
|
7
7
|
} from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
import type { tags } from "typia";
|
|
10
9
|
|
|
11
10
|
/**
|
|
@@ -42,19 +41,19 @@ import { AgenticaSelectBenchmarkReporter } from "./internal/AgenticaSelectBenchm
|
|
|
42
41
|
*
|
|
43
42
|
* @author Samchon
|
|
44
43
|
*/
|
|
45
|
-
export class AgenticaSelectBenchmark
|
|
46
|
-
private agent_: Agentica
|
|
47
|
-
private scenarios_: IAgenticaSelectBenchmarkScenario
|
|
44
|
+
export class AgenticaSelectBenchmark {
|
|
45
|
+
private agent_: Agentica;
|
|
46
|
+
private scenarios_: IAgenticaSelectBenchmarkScenario[];
|
|
48
47
|
private config_: AgenticaSelectBenchmark.IConfig;
|
|
49
|
-
private histories_: AgenticaHistory
|
|
50
|
-
private result_: IAgenticaSelectBenchmarkResult
|
|
48
|
+
private histories_: AgenticaHistory[];
|
|
49
|
+
private result_: IAgenticaSelectBenchmarkResult | null;
|
|
51
50
|
|
|
52
51
|
/**
|
|
53
52
|
* Initializer Constructor.
|
|
54
53
|
*
|
|
55
54
|
* @param props Properties of the selection benchmark
|
|
56
55
|
*/
|
|
57
|
-
public constructor(props: AgenticaSelectBenchmark.IProps
|
|
56
|
+
public constructor(props: AgenticaSelectBenchmark.IProps) {
|
|
58
57
|
this.agent_ = props.agent;
|
|
59
58
|
this.scenarios_ = props.scenarios.slice();
|
|
60
59
|
this.config_ = {
|
|
@@ -82,18 +81,18 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
82
81
|
* @returns Results of the function selection benchmark
|
|
83
82
|
*/
|
|
84
83
|
public async execute(
|
|
85
|
-
listener?: (event: IAgenticaSelectBenchmarkEvent
|
|
86
|
-
): Promise<IAgenticaSelectBenchmarkResult
|
|
84
|
+
listener?: (event: IAgenticaSelectBenchmarkEvent) => void,
|
|
85
|
+
): Promise<IAgenticaSelectBenchmarkResult> {
|
|
87
86
|
const started_at: Date = new Date();
|
|
88
87
|
const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
|
|
89
|
-
const experiments: IAgenticaSelectBenchmarkResult.IExperiment
|
|
88
|
+
const experiments: IAgenticaSelectBenchmarkResult.IExperiment[]
|
|
90
89
|
= await Promise.all(
|
|
91
90
|
this.scenarios_.map(async (scenario) => {
|
|
92
|
-
const events: IAgenticaSelectBenchmarkEvent
|
|
91
|
+
const events: IAgenticaSelectBenchmarkEvent[]
|
|
93
92
|
= await Promise.all(
|
|
94
93
|
Array.from({ length: this.config_.repeat }).map(async () => {
|
|
95
94
|
await semaphore.acquire();
|
|
96
|
-
const e: IAgenticaSelectBenchmarkEvent
|
|
95
|
+
const e: IAgenticaSelectBenchmarkEvent
|
|
97
96
|
= await this.step(scenario);
|
|
98
97
|
await semaphore.release();
|
|
99
98
|
if (listener !== undefined) {
|
|
@@ -149,13 +148,13 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
149
148
|
}
|
|
150
149
|
|
|
151
150
|
private async step(
|
|
152
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
153
|
-
): Promise<IAgenticaSelectBenchmarkEvent
|
|
151
|
+
scenario: IAgenticaSelectBenchmarkScenario,
|
|
152
|
+
): Promise<IAgenticaSelectBenchmarkEvent> {
|
|
154
153
|
const started_at: Date = new Date();
|
|
155
154
|
try {
|
|
156
155
|
const usage: AgenticaTokenUsage = AgenticaTokenUsage.zero();
|
|
157
|
-
const historyGetters: Array<() => Promise<AgenticaHistory
|
|
158
|
-
const dispatch = async (event: AgenticaEvent
|
|
156
|
+
const historyGetters: Array<() => Promise<AgenticaHistory>> = [];
|
|
157
|
+
const dispatch = async (event: AgenticaEvent): Promise<void> => {
|
|
159
158
|
if ("toHistory" in event) {
|
|
160
159
|
if ("join" in event) {
|
|
161
160
|
historyGetters.push(async () => {
|
|
@@ -168,7 +167,7 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
168
167
|
}
|
|
169
168
|
}
|
|
170
169
|
};
|
|
171
|
-
const context: AgenticaContext
|
|
170
|
+
const context: AgenticaContext = this.agent_.getContext({
|
|
172
171
|
prompt: factory.createUserMessageHistory({
|
|
173
172
|
id: v4(),
|
|
174
173
|
created_at: started_at.toISOString(),
|
|
@@ -190,11 +189,11 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
190
189
|
stack: [],
|
|
191
190
|
ready: () => true,
|
|
192
191
|
});
|
|
193
|
-
const histories: AgenticaHistory
|
|
192
|
+
const histories: AgenticaHistory[]
|
|
194
193
|
= await Promise.all(
|
|
195
194
|
historyGetters.map(async g => g()),
|
|
196
195
|
);
|
|
197
|
-
const selected: AgenticaOperationSelection
|
|
196
|
+
const selected: AgenticaOperationSelection[] = histories
|
|
198
197
|
.filter(p => p.type === "select")
|
|
199
198
|
.map(p => p.selection);
|
|
200
199
|
return {
|
|
@@ -213,8 +212,8 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
213
212
|
started_at,
|
|
214
213
|
completed_at: new Date(),
|
|
215
214
|
} satisfies
|
|
216
|
-
| IAgenticaSelectBenchmarkEvent.ISuccess
|
|
217
|
-
| IAgenticaSelectBenchmarkEvent.IFailure
|
|
215
|
+
| IAgenticaSelectBenchmarkEvent.ISuccess
|
|
216
|
+
| IAgenticaSelectBenchmarkEvent.IFailure;
|
|
218
217
|
}
|
|
219
218
|
catch (error) {
|
|
220
219
|
return {
|
|
@@ -223,7 +222,7 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
223
222
|
error,
|
|
224
223
|
started_at,
|
|
225
224
|
completed_at: new Date(),
|
|
226
|
-
} satisfies IAgenticaSelectBenchmarkEvent.IError
|
|
225
|
+
} satisfies IAgenticaSelectBenchmarkEvent.IError;
|
|
227
226
|
}
|
|
228
227
|
}
|
|
229
228
|
}
|
|
@@ -231,16 +230,16 @@ export namespace AgenticaSelectBenchmark {
|
|
|
231
230
|
/**
|
|
232
231
|
* Properties of the {@link AgenticaSelectBenchmark} constructor.
|
|
233
232
|
*/
|
|
234
|
-
export interface IProps
|
|
233
|
+
export interface IProps {
|
|
235
234
|
/**
|
|
236
235
|
* AI agent instance.
|
|
237
236
|
*/
|
|
238
|
-
agent: Agentica
|
|
237
|
+
agent: Agentica;
|
|
239
238
|
|
|
240
239
|
/**
|
|
241
240
|
* List of scenarios what you expect.
|
|
242
241
|
*/
|
|
243
|
-
scenarios: IAgenticaSelectBenchmarkScenario
|
|
242
|
+
scenarios: IAgenticaSelectBenchmarkScenario[];
|
|
244
243
|
|
|
245
244
|
/**
|
|
246
245
|
* Configuration for the benchmark.
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { MicroAgentica } from "@agentica/core";
|
|
2
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
3
2
|
import type { tags } from "typia";
|
|
4
3
|
|
|
5
4
|
/**
|
|
@@ -38,18 +37,18 @@ import { AgenticaCallBenchmarkReporter } from "./internal/AgenticaCallBenchmarkR
|
|
|
38
37
|
*
|
|
39
38
|
* @author Samchon
|
|
40
39
|
*/
|
|
41
|
-
export class MicroAgenticaCallBenchmark
|
|
42
|
-
private agent_: MicroAgentica
|
|
43
|
-
private scenarios_: IAgenticaCallBenchmarkScenario
|
|
40
|
+
export class MicroAgenticaCallBenchmark {
|
|
41
|
+
private agent_: MicroAgentica;
|
|
42
|
+
private scenarios_: IAgenticaCallBenchmarkScenario[];
|
|
44
43
|
private config_: MicroAgenticaCallBenchmark.IConfig;
|
|
45
|
-
private result_: IAgenticaCallBenchmarkResult
|
|
44
|
+
private result_: IAgenticaCallBenchmarkResult | null;
|
|
46
45
|
|
|
47
46
|
/**
|
|
48
47
|
* Initializer Constructor.
|
|
49
48
|
*
|
|
50
49
|
* @param props Properties of the selection benchmark
|
|
51
50
|
*/
|
|
52
|
-
public constructor(props: MicroAgenticaCallBenchmark.IProps
|
|
51
|
+
public constructor(props: MicroAgenticaCallBenchmark.IProps) {
|
|
53
52
|
this.agent_ = props.agent;
|
|
54
53
|
this.scenarios_ = props.scenarios.slice();
|
|
55
54
|
this.config_ = {
|
|
@@ -77,16 +76,16 @@ export class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
77
76
|
* @returns Results of the function calling benchmark
|
|
78
77
|
*/
|
|
79
78
|
public async execute(
|
|
80
|
-
listener?: (event: IAgenticaCallBenchmarkEvent
|
|
81
|
-
): Promise<IAgenticaCallBenchmarkResult
|
|
79
|
+
listener?: (event: IAgenticaCallBenchmarkEvent) => void,
|
|
80
|
+
): Promise<IAgenticaCallBenchmarkResult> {
|
|
82
81
|
const started_at: Date = new Date();
|
|
83
82
|
const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
|
|
84
83
|
const task = this.scenarios_.map(async (scenario) => {
|
|
85
|
-
const events: IAgenticaCallBenchmarkEvent
|
|
84
|
+
const events: IAgenticaCallBenchmarkEvent[]
|
|
86
85
|
= await Promise.all(
|
|
87
86
|
Array.from({ length: this.config_.repeat }).map(async () => {
|
|
88
87
|
await semaphore.acquire();
|
|
89
|
-
const e: IAgenticaCallBenchmarkEvent
|
|
88
|
+
const e: IAgenticaCallBenchmarkEvent
|
|
90
89
|
= await this.step(scenario);
|
|
91
90
|
await semaphore.release();
|
|
92
91
|
|
|
@@ -106,7 +105,7 @@ export class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
106
105
|
.reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
|
|
107
106
|
};
|
|
108
107
|
});
|
|
109
|
-
const experiments: IAgenticaCallBenchmarkResult.IExperiment
|
|
108
|
+
const experiments: IAgenticaCallBenchmarkResult.IExperiment[]
|
|
110
109
|
= await Promise.all(task);
|
|
111
110
|
return (this.result_ = {
|
|
112
111
|
experiments,
|
|
@@ -144,9 +143,9 @@ export class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
144
143
|
}
|
|
145
144
|
|
|
146
145
|
private async step(
|
|
147
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
148
|
-
): Promise<IAgenticaCallBenchmarkEvent
|
|
149
|
-
const agent: MicroAgentica
|
|
146
|
+
scenario: IAgenticaCallBenchmarkScenario,
|
|
147
|
+
): Promise<IAgenticaCallBenchmarkEvent> {
|
|
148
|
+
const agent: MicroAgentica = this.agent_.clone();
|
|
150
149
|
const started_at: Date = new Date();
|
|
151
150
|
const success = () =>
|
|
152
151
|
AgenticaBenchmarkPredicator.success({
|
|
@@ -157,7 +156,7 @@ export class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
157
156
|
.map(p => p.operation),
|
|
158
157
|
strict: false,
|
|
159
158
|
});
|
|
160
|
-
const out = (): IAgenticaCallBenchmarkEvent
|
|
159
|
+
const out = (): IAgenticaCallBenchmarkEvent => {
|
|
161
160
|
const select = AgenticaBenchmarkPredicator.success({
|
|
162
161
|
expected: scenario.expected,
|
|
163
162
|
operations: agent
|
|
@@ -176,7 +175,7 @@ export class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
|
176
175
|
usage: agent.getTokenUsage(),
|
|
177
176
|
started_at,
|
|
178
177
|
completed_at: new Date(),
|
|
179
|
-
} satisfies IAgenticaCallBenchmarkEvent.IFailure
|
|
178
|
+
} satisfies IAgenticaCallBenchmarkEvent.IFailure;
|
|
180
179
|
};
|
|
181
180
|
|
|
182
181
|
try {
|
|
@@ -216,16 +215,16 @@ export namespace MicroAgenticaCallBenchmark {
|
|
|
216
215
|
/**
|
|
217
216
|
* Properties of the {@link MicroAgenticaCallBenchmark} constructor.
|
|
218
217
|
*/
|
|
219
|
-
export interface IProps
|
|
218
|
+
export interface IProps {
|
|
220
219
|
/**
|
|
221
220
|
* AI agent instance.
|
|
222
221
|
*/
|
|
223
|
-
agent: MicroAgentica
|
|
222
|
+
agent: MicroAgentica;
|
|
224
223
|
|
|
225
224
|
/**
|
|
226
225
|
* List of scenarios what you expect.
|
|
227
226
|
*/
|
|
228
|
-
scenarios: IAgenticaCallBenchmarkScenario
|
|
227
|
+
scenarios: IAgenticaCallBenchmarkScenario[];
|
|
229
228
|
|
|
230
229
|
/**
|
|
231
230
|
* Configuration for the benchmark.
|