@agentica/benchmark 0.34.1 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/AgenticaCallBenchmark.d.ts +6 -7
- package/lib/AgenticaCallBenchmark.js.map +1 -1
- package/lib/AgenticaSelectBenchmark.d.ts +6 -7
- package/lib/AgenticaSelectBenchmark.js.map +1 -1
- package/lib/MicroAgenticaCallBenchmark.d.ts +6 -7
- package/lib/MicroAgenticaCallBenchmark.js.map +1 -1
- package/lib/index.mjs +1 -1
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaBenchmarkPredicator.d.ts +4 -5
- package/lib/internal/AgenticaBenchmarkPredicator.js +4 -4
- package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
- package/lib/internal/AgenticaBenchmarkUtil.d.ts +1 -2
- package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +1 -2
- package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
- package/lib/internal/AgenticaPromptReporter.d.ts +1 -2
- package/lib/internal/AgenticaPromptReporter.js.map +1 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +1 -2
- package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
- package/lib/structures/IAgenticaBenchmarkExpected.d.ts +9 -10
- package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +7 -8
- package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +5 -6
- package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +2 -3
- package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +8 -9
- package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +5 -6
- package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +2 -3
- package/package.json +7 -7
- package/src/AgenticaCallBenchmark.ts +18 -19
- package/src/AgenticaSelectBenchmark.ts +24 -25
- package/src/MicroAgenticaCallBenchmark.ts +18 -19
- package/src/internal/AgenticaBenchmarkPredicator.ts +11 -12
- package/src/internal/AgenticaBenchmarkUtil.ts +1 -3
- package/src/internal/AgenticaCallBenchmarkReporter.ts +7 -8
- package/src/internal/AgenticaPromptReporter.ts +1 -2
- package/src/internal/AgenticaSelectBenchmarkReporter.ts +5 -6
- package/src/structures/IAgenticaBenchmarkExpected.ts +16 -17
- package/src/structures/IAgenticaCallBenchmarkEvent.ts +13 -14
- package/src/structures/IAgenticaCallBenchmarkResult.ts +5 -6
- package/src/structures/IAgenticaCallBenchmarkScenario.ts +2 -6
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +11 -15
- package/src/structures/IAgenticaSelectBenchmarkResult.ts +5 -8
- package/src/structures/IAgenticaSelectBenchmarkScenario.ts +2 -6
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { Agentica, AgenticaHistory, AgenticaOperation, MicroAgentica } from "@agentica/core";
|
|
9
|
-
import type { ILlmFunction
|
|
9
|
+
import type { ILlmFunction } from "@samchon/openapi";
|
|
10
10
|
import type OpenAI from "openai";
|
|
11
11
|
|
|
12
12
|
import typia from "typia";
|
|
@@ -50,8 +50,8 @@ interface IConsentProps {
|
|
|
50
50
|
reply: string;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
async function isNext
|
|
54
|
-
const last: AgenticaHistory
|
|
53
|
+
async function isNext(agent: Agentica | MicroAgentica): Promise<string | null> {
|
|
54
|
+
const last: AgenticaHistory | undefined = agent
|
|
55
55
|
.getHistories()
|
|
56
56
|
.at(-1);
|
|
57
57
|
|
|
@@ -67,9 +67,8 @@ async function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model> | M
|
|
|
67
67
|
return null;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
const consent: ILlmFunction
|
|
71
|
-
IPredicatorApplication
|
|
72
|
-
"chatgpt"
|
|
70
|
+
const consent: ILlmFunction = typia.llm.application<
|
|
71
|
+
IPredicatorApplication
|
|
73
72
|
>().functions[0]!;
|
|
74
73
|
const result: OpenAI.ChatCompletion = await llmVendor.api.chat.completions.create(
|
|
75
74
|
{
|
|
@@ -128,18 +127,18 @@ async function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model> | M
|
|
|
128
127
|
* @returns `true` if the called operations match the expected operations,
|
|
129
128
|
* otherwise `false`.
|
|
130
129
|
*/
|
|
131
|
-
export function success
|
|
130
|
+
export function success(props: {
|
|
132
131
|
/**
|
|
133
132
|
* Expected operations to be called.
|
|
134
133
|
*
|
|
135
134
|
* For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
|
|
136
135
|
*/
|
|
137
|
-
expected: IAgenticaBenchmarkExpected
|
|
136
|
+
expected: IAgenticaBenchmarkExpected;
|
|
138
137
|
|
|
139
138
|
/**
|
|
140
139
|
* Specified operations.
|
|
141
140
|
*/
|
|
142
|
-
operations: Array<AgenticaOperation
|
|
141
|
+
operations: Array<AgenticaOperation>;
|
|
143
142
|
|
|
144
143
|
/**
|
|
145
144
|
* If it's `false`, check the array and let it go even if there's something wrong between them.
|
|
@@ -151,7 +150,7 @@ export function success<Model extends ILlmSchema.Model>(props: {
|
|
|
151
150
|
return successInner(props).result;
|
|
152
151
|
}
|
|
153
152
|
|
|
154
|
-
function successInner
|
|
153
|
+
function successInner(props: Parameters<typeof success>[0]):
|
|
155
154
|
| {
|
|
156
155
|
result: true;
|
|
157
156
|
take: number;
|
|
@@ -160,8 +159,8 @@ function successInner<Model extends ILlmSchema.Model>(props: Parameters<typeof s
|
|
|
160
159
|
result: false;
|
|
161
160
|
} {
|
|
162
161
|
const call = (
|
|
163
|
-
expected: IAgenticaBenchmarkExpected
|
|
164
|
-
overrideOperations?: Array<AgenticaOperation
|
|
162
|
+
expected: IAgenticaBenchmarkExpected,
|
|
163
|
+
overrideOperations?: Array<AgenticaOperation>,
|
|
165
164
|
) =>
|
|
166
165
|
successInner({
|
|
167
166
|
expected,
|
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
|
-
|
|
9
7
|
import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
|
|
10
8
|
|
|
11
9
|
export const AgenticaBenchmarkUtil = {
|
|
@@ -31,7 +29,7 @@ function errorToJson<T>(error: T): T | ({
|
|
|
31
29
|
return error;
|
|
32
30
|
}
|
|
33
31
|
|
|
34
|
-
function expectedToJson
|
|
32
|
+
function expectedToJson(expected: IAgenticaBenchmarkExpected): any {
|
|
35
33
|
if (expected.type === "standalone") {
|
|
36
34
|
return {
|
|
37
35
|
type: expected.type,
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
import type { IAgenticaCallBenchmarkEvent } from "../structures/IAgenticaCallBenchmarkEvent";
|
|
11
10
|
import type { IAgenticaCallBenchmarkResult } from "../structures/IAgenticaCallBenchmarkResult";
|
|
@@ -19,9 +18,9 @@ export const AgenticaCallBenchmarkReporter = {
|
|
|
19
18
|
markdown,
|
|
20
19
|
};
|
|
21
20
|
|
|
22
|
-
export function markdown
|
|
21
|
+
export function markdown(result: IAgenticaCallBenchmarkResult): Record<string, string> {
|
|
23
22
|
return Object.fromEntries([
|
|
24
|
-
["./README.md", writeIndex
|
|
23
|
+
["./README.md", writeIndex(result)],
|
|
25
24
|
...result.experiments
|
|
26
25
|
.map<[string, string][]>(exp => [
|
|
27
26
|
[`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
|
|
@@ -34,8 +33,8 @@ export function markdown<Model extends ILlmSchema.Model>(result: IAgenticaCallBe
|
|
|
34
33
|
]);
|
|
35
34
|
}
|
|
36
35
|
|
|
37
|
-
function writeIndex
|
|
38
|
-
const events: IAgenticaCallBenchmarkEvent
|
|
36
|
+
function writeIndex(result: IAgenticaCallBenchmarkResult): string {
|
|
37
|
+
const events: IAgenticaCallBenchmarkEvent[] = result.experiments
|
|
39
38
|
.map(r => r.events)
|
|
40
39
|
.flat();
|
|
41
40
|
const average: number
|
|
@@ -84,7 +83,7 @@ function writeIndex<Model extends ILlmSchema.Model>(result: IAgenticaCallBenchma
|
|
|
84
83
|
].join("\n");
|
|
85
84
|
}
|
|
86
85
|
|
|
87
|
-
function writeExperimentIndex
|
|
86
|
+
function writeExperimentIndex(exp: IAgenticaCallBenchmarkResult.IExperiment): string {
|
|
88
87
|
return [
|
|
89
88
|
`# ${exp.scenario.name}`,
|
|
90
89
|
"## Summary",
|
|
@@ -123,7 +122,7 @@ function writeExperimentIndex<Model extends ILlmSchema.Model>(exp: IAgenticaCall
|
|
|
123
122
|
].join("\n");
|
|
124
123
|
}
|
|
125
124
|
|
|
126
|
-
function writeExperimentEvent
|
|
125
|
+
function writeExperimentEvent(event: IAgenticaCallBenchmarkEvent, index: number): string {
|
|
127
126
|
return [
|
|
128
127
|
`# ${index + 1}. ${event.type}`,
|
|
129
128
|
"## Summary",
|
|
@@ -181,7 +180,7 @@ function writeExperimentEvent<Model extends ILlmSchema.Model>(event: IAgenticaCa
|
|
|
181
180
|
].join("\n");
|
|
182
181
|
}
|
|
183
182
|
|
|
184
|
-
function drawStatus
|
|
183
|
+
function drawStatus(events: IAgenticaCallBenchmarkEvent[], success: (e: IAgenticaCallBenchmarkEvent) => boolean): string {
|
|
185
184
|
const count: number = Math.floor(
|
|
186
185
|
(events.filter(success).length / events.length) * 10,
|
|
187
186
|
);
|
|
@@ -5,13 +5,12 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaHistory } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
export const AgenticaPromptReporter = {
|
|
11
10
|
markdown,
|
|
12
11
|
};
|
|
13
12
|
|
|
14
|
-
function markdown
|
|
13
|
+
function markdown(p: AgenticaHistory): string {
|
|
15
14
|
if (p.type === "userMessage") {
|
|
16
15
|
return [`### User Input`, p.contents, ""].join("\n");
|
|
17
16
|
}
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
import type { IAgenticaSelectBenchmarkEvent } from "../structures/IAgenticaSelectBenchmarkEvent";
|
|
11
10
|
import type { IAgenticaSelectBenchmarkResult } from "../structures/IAgenticaSelectBenchmarkResult";
|
|
@@ -21,7 +20,7 @@ export const AgenticaSelectBenchmarkReporter = {
|
|
|
21
20
|
markdown,
|
|
22
21
|
};
|
|
23
22
|
|
|
24
|
-
export function markdown
|
|
23
|
+
export function markdown(result: IAgenticaSelectBenchmarkResult): Record<string, string> {
|
|
25
24
|
const iterator = [
|
|
26
25
|
["./README.md", writeIndex(result)],
|
|
27
26
|
...result.experiments
|
|
@@ -38,8 +37,8 @@ export function markdown<Model extends ILlmSchema.Model>(result: IAgenticaSelect
|
|
|
38
37
|
return Object.fromEntries(iterator);
|
|
39
38
|
}
|
|
40
39
|
|
|
41
|
-
function writeIndex
|
|
42
|
-
const events: IAgenticaSelectBenchmarkEvent
|
|
40
|
+
function writeIndex(result: IAgenticaSelectBenchmarkResult): string {
|
|
41
|
+
const events: IAgenticaSelectBenchmarkEvent[] = result.experiments
|
|
43
42
|
.map(r => r.events)
|
|
44
43
|
.flat();
|
|
45
44
|
const average: number
|
|
@@ -98,7 +97,7 @@ function writeIndex<Model extends ILlmSchema.Model>(result: IAgenticaSelectBench
|
|
|
98
97
|
].join("\n");
|
|
99
98
|
}
|
|
100
99
|
|
|
101
|
-
function writeExperimentIndex
|
|
100
|
+
function writeExperimentIndex(exp: IAgenticaSelectBenchmarkResult.IExperiment): string {
|
|
102
101
|
const aggregate: AgenticaTokenUsage.IComponent = exp.usage.aggregate;
|
|
103
102
|
return [
|
|
104
103
|
`# ${exp.scenario.name}`,
|
|
@@ -154,7 +153,7 @@ function writeExperimentIndex<Model extends ILlmSchema.Model>(exp: IAgenticaSele
|
|
|
154
153
|
].join("\n");
|
|
155
154
|
}
|
|
156
155
|
|
|
157
|
-
function writeExperimentEvent
|
|
156
|
+
function writeExperimentEvent(event: IAgenticaSelectBenchmarkEvent, index: number): string {
|
|
158
157
|
return [
|
|
159
158
|
`# ${index + 1}. ${event.type}`,
|
|
160
159
|
`## Summary`,
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaOperation } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
/**
|
|
11
10
|
* Expected operation determinant.
|
|
@@ -19,22 +18,22 @@ import type { ILlmSchema } from "@samchon/openapi";
|
|
|
19
18
|
*
|
|
20
19
|
* @author Samchon
|
|
21
20
|
*/
|
|
22
|
-
export type IAgenticaBenchmarkExpected
|
|
23
|
-
| IAgenticaBenchmarkExpected.IAllOf
|
|
24
|
-
| IAgenticaBenchmarkExpected.IAnyOf
|
|
25
|
-
| IAgenticaBenchmarkExpected.IArray
|
|
26
|
-
| IAgenticaBenchmarkExpected.IStandalone
|
|
21
|
+
export type IAgenticaBenchmarkExpected =
|
|
22
|
+
| IAgenticaBenchmarkExpected.IAllOf
|
|
23
|
+
| IAgenticaBenchmarkExpected.IAnyOf
|
|
24
|
+
| IAgenticaBenchmarkExpected.IArray
|
|
25
|
+
| IAgenticaBenchmarkExpected.IStandalone;
|
|
27
26
|
|
|
28
27
|
export namespace IAgenticaBenchmarkExpected {
|
|
29
28
|
/**
|
|
30
29
|
* All of them must meet the condition, but sequence is not important.
|
|
31
30
|
*/
|
|
32
|
-
export interface IAllOf
|
|
31
|
+
export interface IAllOf {
|
|
33
32
|
type: "allOf";
|
|
34
33
|
allOf: Array<
|
|
35
34
|
Exclude<
|
|
36
|
-
IAgenticaBenchmarkExpected
|
|
37
|
-
IAgenticaBenchmarkExpected.IAllOf
|
|
35
|
+
IAgenticaBenchmarkExpected,
|
|
36
|
+
IAgenticaBenchmarkExpected.IAllOf
|
|
38
37
|
>
|
|
39
38
|
>;
|
|
40
39
|
}
|
|
@@ -42,12 +41,12 @@ export namespace IAgenticaBenchmarkExpected {
|
|
|
42
41
|
/**
|
|
43
42
|
* At least one of them must meet the condition.
|
|
44
43
|
*/
|
|
45
|
-
export interface IAnyOf
|
|
44
|
+
export interface IAnyOf {
|
|
46
45
|
type: "anyOf";
|
|
47
46
|
anyOf: Array<
|
|
48
47
|
Exclude<
|
|
49
|
-
IAgenticaBenchmarkExpected
|
|
50
|
-
IAgenticaBenchmarkExpected.IAnyOf
|
|
48
|
+
IAgenticaBenchmarkExpected,
|
|
49
|
+
IAgenticaBenchmarkExpected.IAnyOf
|
|
51
50
|
>
|
|
52
51
|
>;
|
|
53
52
|
}
|
|
@@ -55,12 +54,12 @@ export namespace IAgenticaBenchmarkExpected {
|
|
|
55
54
|
/**
|
|
56
55
|
* All of them must meet the condition, and sequence is important.
|
|
57
56
|
*/
|
|
58
|
-
export interface IArray
|
|
57
|
+
export interface IArray {
|
|
59
58
|
type: "array";
|
|
60
59
|
items: Array<
|
|
61
60
|
Exclude<
|
|
62
|
-
IAgenticaBenchmarkExpected
|
|
63
|
-
IAgenticaBenchmarkExpected.IArray
|
|
61
|
+
IAgenticaBenchmarkExpected,
|
|
62
|
+
IAgenticaBenchmarkExpected.IArray
|
|
64
63
|
>
|
|
65
64
|
>;
|
|
66
65
|
}
|
|
@@ -68,8 +67,8 @@ export namespace IAgenticaBenchmarkExpected {
|
|
|
68
67
|
/**
|
|
69
68
|
* Standalone operation.
|
|
70
69
|
*/
|
|
71
|
-
export interface IStandalone
|
|
70
|
+
export interface IStandalone {
|
|
72
71
|
type: "standalone";
|
|
73
|
-
operation: AgenticaOperation
|
|
72
|
+
operation: AgenticaOperation;
|
|
74
73
|
}
|
|
75
74
|
}
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaHistory, AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
11
10
|
|
|
@@ -31,10 +30,10 @@ import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkSce
|
|
|
31
30
|
*
|
|
32
31
|
* @author Samchon
|
|
33
32
|
*/
|
|
34
|
-
export type IAgenticaCallBenchmarkEvent
|
|
35
|
-
| IAgenticaCallBenchmarkEvent.ISuccess
|
|
36
|
-
| IAgenticaCallBenchmarkEvent.IFailure
|
|
37
|
-
| IAgenticaCallBenchmarkEvent.IError
|
|
33
|
+
export type IAgenticaCallBenchmarkEvent =
|
|
34
|
+
| IAgenticaCallBenchmarkEvent.ISuccess
|
|
35
|
+
| IAgenticaCallBenchmarkEvent.IFailure
|
|
36
|
+
| IAgenticaCallBenchmarkEvent.IError;
|
|
38
37
|
export namespace IAgenticaCallBenchmarkEvent {
|
|
39
38
|
/**
|
|
40
39
|
* Success event type.
|
|
@@ -42,8 +41,8 @@ export namespace IAgenticaCallBenchmarkEvent {
|
|
|
42
41
|
* The `success` event type represents that the benchmark
|
|
43
42
|
* testing is fully meet the expected scenario.
|
|
44
43
|
*/
|
|
45
|
-
export interface ISuccess
|
|
46
|
-
extends IEventBase<"success"
|
|
44
|
+
export interface ISuccess
|
|
45
|
+
extends IEventBase<"success"> {
|
|
47
46
|
/**
|
|
48
47
|
* Whether succeeded to function selection.
|
|
49
48
|
*/
|
|
@@ -62,8 +61,8 @@ export namespace IAgenticaCallBenchmarkEvent {
|
|
|
62
61
|
* or `caller` agents have not selected or called following the
|
|
63
62
|
* expected scenario in the benchmark testing.
|
|
64
63
|
*/
|
|
65
|
-
export interface IFailure
|
|
66
|
-
extends IEventBase<"failure"
|
|
64
|
+
export interface IFailure
|
|
65
|
+
extends IEventBase<"failure"> {
|
|
67
66
|
/**
|
|
68
67
|
* Whether succeeded to function selection.
|
|
69
68
|
*/
|
|
@@ -75,15 +74,15 @@ export namespace IAgenticaCallBenchmarkEvent {
|
|
|
75
74
|
call: boolean;
|
|
76
75
|
}
|
|
77
76
|
|
|
78
|
-
export interface IError
|
|
79
|
-
extends IEventBase<"error"
|
|
77
|
+
export interface IError
|
|
78
|
+
extends IEventBase<"error"> {
|
|
80
79
|
/**
|
|
81
80
|
* Error occurred during the benchmark.
|
|
82
81
|
*/
|
|
83
82
|
error: unknown;
|
|
84
83
|
}
|
|
85
84
|
|
|
86
|
-
interface IEventBase<Type extends string
|
|
85
|
+
interface IEventBase<Type extends string> {
|
|
87
86
|
/**
|
|
88
87
|
* Discriminant type.
|
|
89
88
|
*/
|
|
@@ -92,14 +91,14 @@ export namespace IAgenticaCallBenchmarkEvent {
|
|
|
92
91
|
/**
|
|
93
92
|
* Expected scenario.
|
|
94
93
|
*/
|
|
95
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
94
|
+
scenario: IAgenticaCallBenchmarkScenario;
|
|
96
95
|
|
|
97
96
|
/**
|
|
98
97
|
* Prompt histories.
|
|
99
98
|
*
|
|
100
99
|
* List of prompts occurred during the benchmark testing.
|
|
101
100
|
*/
|
|
102
|
-
prompts: AgenticaHistory
|
|
101
|
+
prompts: AgenticaHistory[];
|
|
103
102
|
|
|
104
103
|
/**
|
|
105
104
|
* Usage of the token during the benchmark.
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
import type { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
|
|
11
10
|
import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
@@ -27,11 +26,11 @@ import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkSce
|
|
|
27
26
|
*
|
|
28
27
|
* @author Samchon
|
|
29
28
|
*/
|
|
30
|
-
export interface IAgenticaCallBenchmarkResult
|
|
29
|
+
export interface IAgenticaCallBenchmarkResult {
|
|
31
30
|
/**
|
|
32
31
|
* Experiments for each scenario.
|
|
33
32
|
*/
|
|
34
|
-
experiments: IAgenticaCallBenchmarkResult.IExperiment
|
|
33
|
+
experiments: IAgenticaCallBenchmarkResult.IExperiment[];
|
|
35
34
|
|
|
36
35
|
/**
|
|
37
36
|
* Aggregated token usage information.
|
|
@@ -52,11 +51,11 @@ export namespace IAgenticaCallBenchmarkResult {
|
|
|
52
51
|
/**
|
|
53
52
|
* Experiment result about a scenario.
|
|
54
53
|
*/
|
|
55
|
-
export interface IExperiment
|
|
54
|
+
export interface IExperiment {
|
|
56
55
|
/**
|
|
57
56
|
* Scenario of the experiment.
|
|
58
57
|
*/
|
|
59
|
-
scenario: IAgenticaCallBenchmarkScenario
|
|
58
|
+
scenario: IAgenticaCallBenchmarkScenario;
|
|
60
59
|
|
|
61
60
|
/**
|
|
62
61
|
* Events occurred during the benchmark in the scenario.
|
|
@@ -66,7 +65,7 @@ export namespace IAgenticaCallBenchmarkResult {
|
|
|
66
65
|
* {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
|
|
67
66
|
* And the event is one of the repeated benchmark results.
|
|
68
67
|
*/
|
|
69
|
-
events: IAgenticaCallBenchmarkEvent
|
|
68
|
+
events: IAgenticaCallBenchmarkEvent[];
|
|
70
69
|
|
|
71
70
|
/**
|
|
72
71
|
* LLM token usage information.
|
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
|
-
|
|
9
7
|
import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
10
8
|
|
|
11
9
|
/**
|
|
@@ -22,9 +20,7 @@ import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
22
20
|
*
|
|
23
21
|
* @author Samchon
|
|
24
22
|
*/
|
|
25
|
-
export interface IAgenticaCallBenchmarkScenario
|
|
26
|
-
Model extends ILlmSchema.Model,
|
|
27
|
-
> {
|
|
23
|
+
export interface IAgenticaCallBenchmarkScenario {
|
|
28
24
|
/**
|
|
29
25
|
* Name of the scenario.
|
|
30
26
|
*
|
|
@@ -45,5 +41,5 @@ export interface IAgenticaCallBenchmarkScenario<
|
|
|
45
41
|
* the user's {@link text} conversation for the LLM
|
|
46
42
|
* (Large Language Model) function calling.
|
|
47
43
|
*/
|
|
48
|
-
expected: IAgenticaBenchmarkExpected
|
|
44
|
+
expected: IAgenticaBenchmarkExpected;
|
|
49
45
|
}
|
|
@@ -9,7 +9,6 @@ import type {
|
|
|
9
9
|
AgenticaOperationSelection,
|
|
10
10
|
AgenticaTokenUsage,
|
|
11
11
|
} from "@agentica/core";
|
|
12
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
13
12
|
|
|
14
13
|
import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
15
14
|
|
|
@@ -35,10 +34,10 @@ import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmar
|
|
|
35
34
|
*
|
|
36
35
|
* @author Samchon
|
|
37
36
|
*/
|
|
38
|
-
export type IAgenticaSelectBenchmarkEvent
|
|
39
|
-
| IAgenticaSelectBenchmarkEvent.ISuccess
|
|
40
|
-
| IAgenticaSelectBenchmarkEvent.IFailure
|
|
41
|
-
| IAgenticaSelectBenchmarkEvent.IError
|
|
37
|
+
export type IAgenticaSelectBenchmarkEvent =
|
|
38
|
+
| IAgenticaSelectBenchmarkEvent.ISuccess
|
|
39
|
+
| IAgenticaSelectBenchmarkEvent.IFailure
|
|
40
|
+
| IAgenticaSelectBenchmarkEvent.IError;
|
|
42
41
|
export namespace IAgenticaSelectBenchmarkEvent {
|
|
43
42
|
/**
|
|
44
43
|
* Success event type.
|
|
@@ -46,8 +45,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
46
45
|
* The `success` event type represents that the benchmark testing is
|
|
47
46
|
* fully meet the expected scenario.
|
|
48
47
|
*/
|
|
49
|
-
export interface ISuccess
|
|
50
|
-
extends IEventBase<"success", Model> {
|
|
48
|
+
export interface ISuccess extends IEventBase<"success"> {
|
|
51
49
|
/**
|
|
52
50
|
* Usage of the token during the benchmark.
|
|
53
51
|
*/
|
|
@@ -56,7 +54,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
56
54
|
/**
|
|
57
55
|
* Selected operations in the benchmark.
|
|
58
56
|
*/
|
|
59
|
-
selected: AgenticaOperationSelection
|
|
57
|
+
selected: AgenticaOperationSelection[];
|
|
60
58
|
|
|
61
59
|
/**
|
|
62
60
|
* Prompt messages from the assistant.
|
|
@@ -70,8 +68,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
70
68
|
* The `failure` event type represents that the `selector` had not
|
|
71
69
|
* selected the expected scenario in the benchmark testing.
|
|
72
70
|
*/
|
|
73
|
-
export interface IFailure
|
|
74
|
-
extends IEventBase<"failure", Model> {
|
|
71
|
+
export interface IFailure extends IEventBase<"failure"> {
|
|
75
72
|
/**
|
|
76
73
|
* Usage of the token during the benchmark.
|
|
77
74
|
*/
|
|
@@ -80,7 +77,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
80
77
|
/**
|
|
81
78
|
* Selected operations in the benchmark.
|
|
82
79
|
*/
|
|
83
|
-
selected: AgenticaOperationSelection
|
|
80
|
+
selected: AgenticaOperationSelection[];
|
|
84
81
|
|
|
85
82
|
/**
|
|
86
83
|
* Prompt messages from the assistant.
|
|
@@ -94,15 +91,14 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
94
91
|
* The `error` event type repsents that an error had been occurred
|
|
95
92
|
* during the benchmark testing.
|
|
96
93
|
*/
|
|
97
|
-
export interface IError
|
|
98
|
-
extends IEventBase<"error", Model> {
|
|
94
|
+
export interface IError extends IEventBase<"error"> {
|
|
99
95
|
/**
|
|
100
96
|
* Error occurred during the benchmark.
|
|
101
97
|
*/
|
|
102
98
|
error: unknown;
|
|
103
99
|
}
|
|
104
100
|
|
|
105
|
-
interface IEventBase<Type extends string
|
|
101
|
+
interface IEventBase<Type extends string> {
|
|
106
102
|
/**
|
|
107
103
|
* Discriminant type.
|
|
108
104
|
*/
|
|
@@ -111,7 +107,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
111
107
|
/**
|
|
112
108
|
* Expected scenario.
|
|
113
109
|
*/
|
|
114
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
110
|
+
scenario: IAgenticaSelectBenchmarkScenario;
|
|
115
111
|
|
|
116
112
|
/**
|
|
117
113
|
* When the benchmark testing started.
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
|
|
10
9
|
import type { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
11
10
|
import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
@@ -27,13 +26,11 @@ import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmar
|
|
|
27
26
|
*
|
|
28
27
|
* @author Samchon
|
|
29
28
|
*/
|
|
30
|
-
export interface IAgenticaSelectBenchmarkResult
|
|
31
|
-
Model extends ILlmSchema.Model,
|
|
32
|
-
> {
|
|
29
|
+
export interface IAgenticaSelectBenchmarkResult {
|
|
33
30
|
/**
|
|
34
31
|
* Experiments for each scenario.
|
|
35
32
|
*/
|
|
36
|
-
experiments: IAgenticaSelectBenchmarkResult.IExperiment
|
|
33
|
+
experiments: IAgenticaSelectBenchmarkResult.IExperiment[];
|
|
37
34
|
|
|
38
35
|
/**
|
|
39
36
|
* Aggregated token usage information.
|
|
@@ -54,11 +51,11 @@ export namespace IAgenticaSelectBenchmarkResult {
|
|
|
54
51
|
/**
|
|
55
52
|
* Experiment result about a scenario.
|
|
56
53
|
*/
|
|
57
|
-
export interface IExperiment
|
|
54
|
+
export interface IExperiment {
|
|
58
55
|
/**
|
|
59
56
|
* Expected scenario.
|
|
60
57
|
*/
|
|
61
|
-
scenario: IAgenticaSelectBenchmarkScenario
|
|
58
|
+
scenario: IAgenticaSelectBenchmarkScenario;
|
|
62
59
|
|
|
63
60
|
/**
|
|
64
61
|
* Events occurred during the benchmark in the scenario.
|
|
@@ -68,7 +65,7 @@ export namespace IAgenticaSelectBenchmarkResult {
|
|
|
68
65
|
* {@link AgenticaSelectBenchmark.IConfig.repeat repeat} count.
|
|
69
66
|
* And the event is one of the repeated benchmark results.
|
|
70
67
|
*/
|
|
71
|
-
events: IAgenticaSelectBenchmarkEvent
|
|
68
|
+
events: IAgenticaSelectBenchmarkEvent[];
|
|
72
69
|
|
|
73
70
|
/**
|
|
74
71
|
* LLM token usage information.
|
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
|
-
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
|
-
|
|
9
7
|
import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
10
8
|
|
|
11
9
|
/**
|
|
@@ -22,9 +20,7 @@ import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
|
22
20
|
*
|
|
23
21
|
* @author Samchon
|
|
24
22
|
*/
|
|
25
|
-
export interface IAgenticaSelectBenchmarkScenario
|
|
26
|
-
Model extends ILlmSchema.Model,
|
|
27
|
-
> {
|
|
23
|
+
export interface IAgenticaSelectBenchmarkScenario {
|
|
28
24
|
/**
|
|
29
25
|
* Name of the scenario.
|
|
30
26
|
*
|
|
@@ -45,5 +41,5 @@ export interface IAgenticaSelectBenchmarkScenario<
|
|
|
45
41
|
* {@link text} conversation for the LLM (Large Language Model)
|
|
46
42
|
* function selection.
|
|
47
43
|
*/
|
|
48
|
-
expected: IAgenticaBenchmarkExpected
|
|
44
|
+
expected: IAgenticaBenchmarkExpected;
|
|
49
45
|
}
|