@agentica/benchmark 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +124 -122
  2. package/lib/AgenticaCallBenchmark.d.ts +7 -6
  3. package/lib/AgenticaCallBenchmark.js.map +1 -1
  4. package/lib/AgenticaSelectBenchmark.d.ts +7 -6
  5. package/lib/AgenticaSelectBenchmark.js.map +1 -1
  6. package/lib/index.mjs +46 -1
  7. package/lib/index.mjs.map +1 -1
  8. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +5 -4
  9. package/lib/internal/AgenticaBenchmarkPredicator.js +74 -2
  10. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
  11. package/lib/internal/AgenticaBenchmarkUtil.d.ts +2 -1
  12. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
  13. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +2 -1
  14. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
  15. package/lib/internal/AgenticaPromptReporter.d.ts +2 -1
  16. package/lib/internal/AgenticaPromptReporter.js.map +1 -1
  17. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
  18. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +10 -9
  19. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +8 -7
  20. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +6 -5
  21. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +3 -2
  22. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -8
  23. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +6 -5
  24. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +3 -2
  25. package/package.json +5 -5
  26. package/src/AgenticaCallBenchmark.ts +28 -25
  27. package/src/AgenticaSelectBenchmark.ts +32 -30
  28. package/src/internal/AgenticaBenchmarkPredicator.ts +18 -10
  29. package/src/internal/AgenticaBenchmarkUtil.ts +5 -1
  30. package/src/internal/AgenticaCallBenchmarkReporter.ts +15 -12
  31. package/src/internal/AgenticaPromptReporter.ts +4 -1
  32. package/src/internal/AgenticaSelectBenchmarkReporter.ts +11 -8
  33. package/src/structures/IAgenticaBenchmarkExpected.ts +23 -13
  34. package/src/structures/IAgenticaCallBenchmarkEvent.ts +14 -10
  35. package/src/structures/IAgenticaCallBenchmarkResult.ts +6 -5
  36. package/src/structures/IAgenticaCallBenchmarkScenario.ts +6 -2
  37. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +15 -11
  38. package/src/structures/IAgenticaSelectBenchmarkResult.ts +8 -5
  39. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +6 -2
@@ -1,5 +1,6 @@
1
1
  import { Agentica } from "@agentica/core";
2
2
  import { AgenticaTokenUsageAggregator } from "@agentica/core/src/internal/AgenticaTokenUsageAggregator";
3
+ import { ILlmSchema } from "@samchon/openapi";
3
4
  import { Semaphore } from "tstl";
4
5
  import { tags } from "typia";
5
6
 
@@ -29,18 +30,18 @@ import { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchm
29
30
  *
30
31
  * @author Samchon
31
32
  */
32
- export class AgenticaCallBenchmark {
33
- private agent_: Agentica;
34
- private scenarios_: IAgenticaCallBenchmarkScenario[];
33
+ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
34
+ private agent_: Agentica<Model>;
35
+ private scenarios_: IAgenticaCallBenchmarkScenario<Model>[];
35
36
  private config_: AgenticaCallBenchmark.IConfig;
36
- private result_: IAgenticaCallBenchmarkResult | null;
37
+ private result_: IAgenticaCallBenchmarkResult<Model> | null;
37
38
 
38
39
  /**
39
40
  * Initializer Constructor.
40
41
  *
41
42
  * @param props Properties of the selection benchmark
42
43
  */
43
- public constructor(props: AgenticaCallBenchmark.IProps) {
44
+ public constructor(props: AgenticaCallBenchmark.IProps<Model>) {
44
45
  this.agent_ = props.agent;
45
46
  this.scenarios_ = props.scenarios.slice();
46
47
  this.config_ = {
@@ -68,22 +69,24 @@ export class AgenticaCallBenchmark {
68
69
  * @returns Results of the function calling benchmark
69
70
  */
70
71
  public async execute(
71
- listener?: (event: IAgenticaCallBenchmarkEvent) => void,
72
- ): Promise<IAgenticaCallBenchmarkResult> {
72
+ listener?: (event: IAgenticaCallBenchmarkEvent<Model>) => void,
73
+ ): Promise<IAgenticaCallBenchmarkResult<Model>> {
73
74
  const started_at: Date = new Date();
74
75
  const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
75
- const experiments: IAgenticaCallBenchmarkResult.IExperiment[] =
76
+ const experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[] =
76
77
  await Promise.all(
77
78
  this.scenarios_.map(async (scenario) => {
78
- const events: IAgenticaCallBenchmarkEvent[] = await Promise.all(
79
- new Array(this.config_.repeat).fill(0).map(async () => {
80
- await semaphore.acquire();
81
- const e: IAgenticaCallBenchmarkEvent = await this.step(scenario);
82
- await semaphore.release();
83
- if (listener !== undefined) listener(e);
84
- return e;
85
- }),
86
- );
79
+ const events: IAgenticaCallBenchmarkEvent<Model>[] =
80
+ await Promise.all(
81
+ new Array(this.config_.repeat).fill(0).map(async () => {
82
+ await semaphore.acquire();
83
+ const e: IAgenticaCallBenchmarkEvent<Model> =
84
+ await this.step(scenario);
85
+ await semaphore.release();
86
+ if (listener !== undefined) listener(e);
87
+ return e;
88
+ }),
89
+ );
87
90
  return {
88
91
  scenario,
89
92
  events,
@@ -135,9 +138,9 @@ export class AgenticaCallBenchmark {
135
138
  }
136
139
 
137
140
  private async step(
138
- scenario: IAgenticaCallBenchmarkScenario,
139
- ): Promise<IAgenticaCallBenchmarkEvent> {
140
- const agent: Agentica = this.agent_.clone();
141
+ scenario: IAgenticaCallBenchmarkScenario<Model>,
142
+ ): Promise<IAgenticaCallBenchmarkEvent<Model>> {
143
+ const agent: Agentica<Model> = this.agent_.clone();
141
144
  const started_at: Date = new Date();
142
145
  const success = () =>
143
146
  AgenticaBenchmarkPredicator.success({
@@ -147,7 +150,7 @@ export class AgenticaCallBenchmark {
147
150
  .filter((p) => p.type === "execute"),
148
151
  strict: false,
149
152
  });
150
- const out = (): IAgenticaCallBenchmarkEvent => {
153
+ const out = (): IAgenticaCallBenchmarkEvent<Model> => {
151
154
  const select = AgenticaBenchmarkPredicator.success({
152
155
  expected: scenario.expected,
153
156
  operations: agent
@@ -167,7 +170,7 @@ export class AgenticaCallBenchmark {
167
170
  usage: agent.getTokenUsage(),
168
171
  started_at,
169
172
  completed_at: new Date(),
170
- } satisfies IAgenticaCallBenchmarkEvent.IFailure;
173
+ } satisfies IAgenticaCallBenchmarkEvent.IFailure<Model>;
171
174
  };
172
175
 
173
176
  try {
@@ -199,16 +202,16 @@ export namespace AgenticaCallBenchmark {
199
202
  /**
200
203
  * Properties of the {@link AgenticaCallBenchmark} constructor.
201
204
  */
202
- export interface IProps {
205
+ export interface IProps<Model extends ILlmSchema.Model> {
203
206
  /**
204
207
  * AI agent instance.
205
208
  */
206
- agent: Agentica;
209
+ agent: Agentica<Model>;
207
210
 
208
211
  /**
209
212
  * List of scenarios what you expect.
210
213
  */
211
- scenarios: IAgenticaCallBenchmarkScenario[];
214
+ scenarios: IAgenticaCallBenchmarkScenario<Model>[];
212
215
 
213
216
  /**
214
217
  * Configuration for the benchmark.
@@ -7,6 +7,7 @@ import {
7
7
  } from "@agentica/core";
8
8
  import { ChatGptSelectFunctionAgent } from "@agentica/core/src/chatgpt/ChatGptSelectFunctionAgent";
9
9
  import { AgenticaTokenUsageAggregator } from "@agentica/core/src/internal/AgenticaTokenUsageAggregator";
10
+ import { ILlmSchema } from "@samchon/openapi";
10
11
  import { Semaphore } from "tstl";
11
12
  import { tags } from "typia";
12
13
 
@@ -33,19 +34,19 @@ import { IAgenticaSelectBenchmarkScenario } from "./structures/IAgenticaSelectBe
33
34
  *
34
35
  * @author Samchon
35
36
  */
36
- export class AgenticaSelectBenchmark {
37
- private agent_: Agentica;
38
- private scenarios_: IAgenticaSelectBenchmarkScenario[];
37
+ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
38
+ private agent_: Agentica<Model>;
39
+ private scenarios_: IAgenticaSelectBenchmarkScenario<Model>[];
39
40
  private config_: AgenticaSelectBenchmark.IConfig;
40
- private histories_: IAgenticaPrompt[];
41
- private result_: IAgenticaSelectBenchmarkResult | null;
41
+ private histories_: IAgenticaPrompt<Model>[];
42
+ private result_: IAgenticaSelectBenchmarkResult<Model> | null;
42
43
 
43
44
  /**
44
45
  * Initializer Constructor.
45
46
  *
46
47
  * @param props Properties of the selection benchmark
47
48
  */
48
- public constructor(props: AgenticaSelectBenchmark.IProps) {
49
+ public constructor(props: AgenticaSelectBenchmark.IProps<Model>) {
49
50
  this.agent_ = props.agent;
50
51
  this.scenarios_ = props.scenarios.slice();
51
52
  this.config_ = {
@@ -73,23 +74,24 @@ export class AgenticaSelectBenchmark {
73
74
  * @returns Results of the function selection benchmark
74
75
  */
75
76
  public async execute(
76
- listener?: (event: IAgenticaSelectBenchmarkEvent) => void,
77
- ): Promise<IAgenticaSelectBenchmarkResult> {
77
+ listener?: (event: IAgenticaSelectBenchmarkEvent<Model>) => void,
78
+ ): Promise<IAgenticaSelectBenchmarkResult<Model>> {
78
79
  const started_at: Date = new Date();
79
80
  const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
80
- const experiments: IAgenticaSelectBenchmarkResult.IExperiment[] =
81
+ const experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[] =
81
82
  await Promise.all(
82
83
  this.scenarios_.map(async (scenario) => {
83
- const events: IAgenticaSelectBenchmarkEvent[] = await Promise.all(
84
- new Array(this.config_.repeat).fill(0).map(async () => {
85
- await semaphore.acquire();
86
- const e: IAgenticaSelectBenchmarkEvent =
87
- await this.step(scenario);
88
- await semaphore.release();
89
- if (listener !== undefined) listener(e);
90
- return e;
91
- }),
92
- );
84
+ const events: IAgenticaSelectBenchmarkEvent<Model>[] =
85
+ await Promise.all(
86
+ new Array(this.config_.repeat).fill(0).map(async () => {
87
+ await semaphore.acquire();
88
+ const e: IAgenticaSelectBenchmarkEvent<Model> =
89
+ await this.step(scenario);
90
+ await semaphore.release();
91
+ if (listener !== undefined) listener(e);
92
+ return e;
93
+ }),
94
+ );
93
95
  return {
94
96
  scenario,
95
97
  events,
@@ -142,12 +144,12 @@ export class AgenticaSelectBenchmark {
142
144
  }
143
145
 
144
146
  private async step(
145
- scenario: IAgenticaSelectBenchmarkScenario,
146
- ): Promise<IAgenticaSelectBenchmarkEvent> {
147
+ scenario: IAgenticaSelectBenchmarkScenario<Model>,
148
+ ): Promise<IAgenticaSelectBenchmarkEvent<Model>> {
147
149
  const started_at: Date = new Date();
148
150
  try {
149
151
  const usage: IAgenticaTokenUsage = AgenticaTokenUsageAggregator.zero();
150
- const prompts: IAgenticaPrompt[] =
152
+ const prompts: IAgenticaPrompt<Model>[] =
151
153
  await ChatGptSelectFunctionAgent.execute({
152
154
  ...this.agent_.getContext({
153
155
  prompt: {
@@ -161,8 +163,8 @@ export class AgenticaSelectBenchmark {
161
163
  stack: [],
162
164
  ready: () => true,
163
165
  dispatch: async () => {},
164
- } satisfies IAgenticaContext);
165
- const selected: IAgenticaOperationSelection[] = prompts
166
+ } satisfies IAgenticaContext<Model>);
167
+ const selected: IAgenticaOperationSelection<Model>[] = prompts
166
168
  .filter((p) => p.type === "select")
167
169
  .map((p) => p.operations)
168
170
  .flat();
@@ -185,8 +187,8 @@ export class AgenticaSelectBenchmark {
185
187
  started_at,
186
188
  completed_at: new Date(),
187
189
  } satisfies
188
- | IAgenticaSelectBenchmarkEvent.ISuccess
189
- | IAgenticaSelectBenchmarkEvent.IFailure;
190
+ | IAgenticaSelectBenchmarkEvent.ISuccess<Model>
191
+ | IAgenticaSelectBenchmarkEvent.IFailure<Model>;
190
192
  } catch (error) {
191
193
  return {
192
194
  type: "error",
@@ -194,7 +196,7 @@ export class AgenticaSelectBenchmark {
194
196
  error,
195
197
  started_at,
196
198
  completed_at: new Date(),
197
- } satisfies IAgenticaSelectBenchmarkEvent.IError;
199
+ } satisfies IAgenticaSelectBenchmarkEvent.IError<Model>;
198
200
  }
199
201
  }
200
202
  }
@@ -202,16 +204,16 @@ export namespace AgenticaSelectBenchmark {
202
204
  /**
203
205
  * Properties of the {@link AgenticaSelectBenchmark} constructor.
204
206
  */
205
- export interface IProps {
207
+ export interface IProps<Model extends ILlmSchema.Model> {
206
208
  /**
207
209
  * AI agent instance.
208
210
  */
209
- agent: Agentica;
211
+ agent: Agentica<Model>;
210
212
 
211
213
  /**
212
214
  * List of scenarios what you expect.
213
215
  */
214
- scenarios: IAgenticaSelectBenchmarkScenario[];
216
+ scenarios: IAgenticaSelectBenchmarkScenario<Model>[];
215
217
 
216
218
  /**
217
219
  * Configuration for the benchmark.
@@ -1,13 +1,17 @@
1
1
  import { Agentica, IAgenticaOperation, IAgenticaPrompt } from "@agentica/core";
2
- import { ILlmFunction } from "@samchon/openapi";
2
+ import { ILlmFunction, ILlmSchema } from "@samchon/openapi";
3
3
  import OpenAI from "openai";
4
4
  import typia from "typia";
5
5
 
6
6
  import { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
7
7
 
8
8
  export namespace AgenticaBenchmarkPredicator {
9
- export const isNext = async (agent: Agentica): Promise<string | null> => {
10
- const last: IAgenticaPrompt | undefined = agent.getPromptHistories().at(-1);
9
+ export const isNext = async <Model extends ILlmSchema.Model>(
10
+ agent: Agentica<Model>,
11
+ ): Promise<string | null> => {
12
+ const last: IAgenticaPrompt<Model> | undefined = agent
13
+ .getPromptHistories()
14
+ .at(-1);
11
15
  if (last?.type !== "text" || last.role !== "assistant") return null;
12
16
 
13
17
  const consent: ILlmFunction<"chatgpt"> = typia.llm.application<
@@ -69,18 +73,20 @@ export namespace AgenticaBenchmarkPredicator {
69
73
  * @returns `true` if the called operations match the expected operations,
70
74
  * otherwise `false`.
71
75
  */
72
- export const success = (props: {
76
+ export const success = <Model extends ILlmSchema.Model>(props: {
73
77
  /**
74
78
  * Expected operations to be called.
75
79
  *
76
80
  * For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
77
81
  */
78
- expected: IAgenticaBenchmarkExpected;
82
+ expected: IAgenticaBenchmarkExpected<Model>;
79
83
 
80
84
  /**
81
85
  * Specified operations.
82
86
  */
83
- operations: Array<IAgenticaOperation | IAgenticaPrompt.IExecute>;
87
+ operations: Array<
88
+ IAgenticaOperation<Model> | IAgenticaPrompt.IExecute<Model>
89
+ >;
84
90
 
85
91
  /**
86
92
  * If it's `false`, check the array and let it go even if there's something wrong between them.
@@ -90,8 +96,8 @@ export namespace AgenticaBenchmarkPredicator {
90
96
  strict?: boolean;
91
97
  }): boolean => successInner(props).result;
92
98
 
93
- const successInner = (
94
- props: Parameters<typeof success>[0],
99
+ const successInner = <Model extends ILlmSchema.Model>(
100
+ props: Parameters<typeof success<Model>>[0],
95
101
  ):
96
102
  | {
97
103
  result: true;
@@ -101,8 +107,10 @@ export namespace AgenticaBenchmarkPredicator {
101
107
  result: false;
102
108
  } => {
103
109
  const call = (
104
- expected: IAgenticaBenchmarkExpected,
105
- overrideOperations?: Array<IAgenticaOperation | IAgenticaPrompt.IExecute>,
110
+ expected: IAgenticaBenchmarkExpected<Model>,
111
+ overrideOperations?: Array<
112
+ IAgenticaOperation<Model> | IAgenticaPrompt.IExecute<Model>
113
+ >,
106
114
  ) =>
107
115
  successInner({
108
116
  expected,
@@ -1,3 +1,5 @@
1
+ import { ILlmSchema } from "@samchon/openapi";
2
+
1
3
  import { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
2
4
 
3
5
  export namespace AgenticaBenchmarkUtil {
@@ -12,7 +14,9 @@ export namespace AgenticaBenchmarkUtil {
12
14
  return error;
13
15
  };
14
16
 
15
- export const expectedToJson = (expected: IAgenticaBenchmarkExpected): any => {
17
+ export const expectedToJson = <Model extends ILlmSchema.Model>(
18
+ expected: IAgenticaBenchmarkExpected<Model>,
19
+ ): any => {
16
20
  if (expected.type === "standalone")
17
21
  return {
18
22
  type: expected.type,
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  import { IAgenticaCallBenchmarkEvent } from "../structures/IAgenticaCallBenchmarkEvent";
4
5
  import { IAgenticaCallBenchmarkResult } from "../structures/IAgenticaCallBenchmarkResult";
@@ -7,11 +8,11 @@ import { AgenticaBenchmarkUtil } from "./AgenticaBenchmarkUtil";
7
8
  import { AgenticaPromptReporter } from "./AgenticaPromptReporter";
8
9
 
9
10
  export namespace AgenticaCallBenchmarkReporter {
10
- export const markdown = (
11
- result: IAgenticaCallBenchmarkResult,
11
+ export const markdown = <Model extends ILlmSchema.Model>(
12
+ result: IAgenticaCallBenchmarkResult<Model>,
12
13
  ): Record<string, string> =>
13
14
  Object.fromEntries([
14
- ["./README.md", writeIndex(result)],
15
+ ["./README.md", writeIndex<Model>(result)],
15
16
  ...result.experiments
16
17
  .map((exp) => [
17
18
  [`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
@@ -23,8 +24,10 @@ export namespace AgenticaCallBenchmarkReporter {
23
24
  .flat(),
24
25
  ]);
25
26
 
26
- const writeIndex = (result: IAgenticaCallBenchmarkResult): string => {
27
- const events: IAgenticaCallBenchmarkEvent[] = result.experiments
27
+ const writeIndex = <Model extends ILlmSchema.Model>(
28
+ result: IAgenticaCallBenchmarkResult<Model>,
29
+ ): string => {
30
+ const events: IAgenticaCallBenchmarkEvent<Model>[] = result.experiments
28
31
  .map((r) => r.events)
29
32
  .flat();
30
33
  const average: number =
@@ -73,8 +76,8 @@ export namespace AgenticaCallBenchmarkReporter {
73
76
  ].join("\n");
74
77
  };
75
78
 
76
- const writeExperimentIndex = (
77
- exp: IAgenticaCallBenchmarkResult.IExperiment,
79
+ const writeExperimentIndex = <Model extends ILlmSchema.Model>(
80
+ exp: IAgenticaCallBenchmarkResult.IExperiment<Model>,
78
81
  ): string => {
79
82
  return [
80
83
  `# ${exp.scenario.name}`,
@@ -114,8 +117,8 @@ export namespace AgenticaCallBenchmarkReporter {
114
117
  ].join("\n");
115
118
  };
116
119
 
117
- const writeExperimentEvent = (
118
- event: IAgenticaCallBenchmarkEvent,
120
+ const writeExperimentEvent = <Model extends ILlmSchema.Model>(
121
+ event: IAgenticaCallBenchmarkEvent<Model>,
119
122
  index: number,
120
123
  ): string => {
121
124
  return [
@@ -165,9 +168,9 @@ export namespace AgenticaCallBenchmarkReporter {
165
168
  ].join("\n");
166
169
  };
167
170
 
168
- const drawStatus = (
169
- events: IAgenticaCallBenchmarkEvent[],
170
- success: (e: IAgenticaCallBenchmarkEvent) => boolean,
171
+ const drawStatus = <Model extends ILlmSchema.Model>(
172
+ events: IAgenticaCallBenchmarkEvent<Model>[],
173
+ success: (e: IAgenticaCallBenchmarkEvent<Model>) => boolean,
171
174
  ): string => {
172
175
  const count: number = Math.floor(
173
176
  (events.filter(success).length / events.length) * 10,
@@ -1,7 +1,10 @@
1
1
  import { IAgenticaPrompt } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  export namespace AgenticaPromptReporter {
4
- export const markdown = (p: IAgenticaPrompt): string => {
5
+ export const markdown = <Model extends ILlmSchema.Model>(
6
+ p: IAgenticaPrompt<Model>,
7
+ ): string => {
5
8
  if (p.type === "text")
6
9
  return [`### Text (${p.role})`, p.text, ""].join("\n");
7
10
  else if (p.type === "select" || p.type === "cancel")
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  import { IAgenticaSelectBenchmarkEvent } from "../structures/IAgenticaSelectBenchmarkEvent";
4
5
  import { IAgenticaSelectBenchmarkResult } from "../structures/IAgenticaSelectBenchmarkResult";
@@ -9,8 +10,8 @@ import { AgenticaBenchmarkUtil } from "./AgenticaBenchmarkUtil";
9
10
  * @internal
10
11
  */
11
12
  export namespace AgenticaSelectBenchmarkReporter {
12
- export const markdown = (
13
- result: IAgenticaSelectBenchmarkResult,
13
+ export const markdown = <Model extends ILlmSchema.Model>(
14
+ result: IAgenticaSelectBenchmarkResult<Model>,
14
15
  ): Record<string, string> =>
15
16
  Object.fromEntries([
16
17
  ["./README.md", writeIndex(result)],
@@ -25,8 +26,10 @@ export namespace AgenticaSelectBenchmarkReporter {
25
26
  .flat(),
26
27
  ]);
27
28
 
28
- const writeIndex = (result: IAgenticaSelectBenchmarkResult): string => {
29
- const events: IAgenticaSelectBenchmarkEvent[] = result.experiments
29
+ const writeIndex = <Model extends ILlmSchema.Model>(
30
+ result: IAgenticaSelectBenchmarkResult<Model>,
31
+ ): string => {
32
+ const events: IAgenticaSelectBenchmarkEvent<Model>[] = result.experiments
30
33
  .map((r) => r.events)
31
34
  .flat();
32
35
  const average: number =
@@ -84,8 +87,8 @@ export namespace AgenticaSelectBenchmarkReporter {
84
87
  ].join("\n");
85
88
  };
86
89
 
87
- const writeExperimentIndex = (
88
- exp: IAgenticaSelectBenchmarkResult.IExperiment,
90
+ const writeExperimentIndex = <Model extends ILlmSchema.Model>(
91
+ exp: IAgenticaSelectBenchmarkResult.IExperiment<Model>,
89
92
  ): string => {
90
93
  const aggregate: IAgenticaTokenUsage.IComponent = exp.usage.aggregate;
91
94
  return [
@@ -141,8 +144,8 @@ export namespace AgenticaSelectBenchmarkReporter {
141
144
  ].join("\n");
142
145
  };
143
146
 
144
- const writeExperimentEvent = (
145
- event: IAgenticaSelectBenchmarkEvent,
147
+ const writeExperimentEvent = <Model extends ILlmSchema.Model>(
148
+ event: IAgenticaSelectBenchmarkEvent<Model>,
146
149
  index: number,
147
150
  ): string => {
148
151
  return [
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaOperation } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  /**
4
5
  * Expected operation determinant.
@@ -12,47 +13,56 @@ import { IAgenticaOperation } from "@agentica/core";
12
13
  *
13
14
  * @author Samchon
14
15
  */
15
- export type IAgenticaBenchmarkExpected =
16
- | IAgenticaBenchmarkExpected.IAllOf
17
- | IAgenticaBenchmarkExpected.IAnyOf
18
- | IAgenticaBenchmarkExpected.IArray
19
- | IAgenticaBenchmarkExpected.IStandalone;
16
+ export type IAgenticaBenchmarkExpected<Model extends ILlmSchema.Model> =
17
+ | IAgenticaBenchmarkExpected.IAllOf<Model>
18
+ | IAgenticaBenchmarkExpected.IAnyOf<Model>
19
+ | IAgenticaBenchmarkExpected.IArray<Model>
20
+ | IAgenticaBenchmarkExpected.IStandalone<Model>;
20
21
  export namespace IAgenticaBenchmarkExpected {
21
22
  /**
22
23
  * All of them must meet the condition, but sequence is not important.
23
24
  */
24
- export interface IAllOf {
25
+ export interface IAllOf<Model extends ILlmSchema.Model> {
25
26
  type: "allOf";
26
27
  allOf: Array<
27
- Exclude<IAgenticaBenchmarkExpected, IAgenticaBenchmarkExpected.IAllOf>
28
+ Exclude<
29
+ IAgenticaBenchmarkExpected<Model>,
30
+ IAgenticaBenchmarkExpected.IAllOf<Model>
31
+ >
28
32
  >;
29
33
  }
30
34
 
31
35
  /**
32
36
  * At least one of them must meet the condition.
33
37
  */
34
- export interface IAnyOf {
38
+ export interface IAnyOf<Model extends ILlmSchema.Model> {
35
39
  type: "anyOf";
36
40
  anyOf: Array<
37
- Exclude<IAgenticaBenchmarkExpected, IAgenticaBenchmarkExpected.IAnyOf>
41
+ Exclude<
42
+ IAgenticaBenchmarkExpected<Model>,
43
+ IAgenticaBenchmarkExpected.IAnyOf<Model>
44
+ >
38
45
  >;
39
46
  }
40
47
 
41
48
  /**
42
49
  * All of them must meet the condition, and sequence is important.
43
50
  */
44
- export interface IArray {
51
+ export interface IArray<Model extends ILlmSchema.Model> {
45
52
  type: "array";
46
53
  items: Array<
47
- Exclude<IAgenticaBenchmarkExpected, IAgenticaBenchmarkExpected.IArray>
54
+ Exclude<
55
+ IAgenticaBenchmarkExpected<Model>,
56
+ IAgenticaBenchmarkExpected.IArray<Model>
57
+ >
48
58
  >;
49
59
  }
50
60
 
51
61
  /**
52
62
  * Standalone operation.
53
63
  */
54
- export interface IStandalone {
64
+ export interface IStandalone<Model extends ILlmSchema.Model> {
55
65
  type: "standalone";
56
- operation: IAgenticaOperation;
66
+ operation: IAgenticaOperation<Model>;
57
67
  }
58
68
  }
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaPrompt, IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
4
5
 
@@ -24,10 +25,10 @@ import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario
24
25
  *
25
26
  * @author Samchon
26
27
  */
27
- export type IAgenticaCallBenchmarkEvent =
28
- | IAgenticaCallBenchmarkEvent.ISuccess
29
- | IAgenticaCallBenchmarkEvent.IFailure
30
- | IAgenticaCallBenchmarkEvent.IError;
28
+ export type IAgenticaCallBenchmarkEvent<Model extends ILlmSchema.Model> =
29
+ | IAgenticaCallBenchmarkEvent.ISuccess<Model>
30
+ | IAgenticaCallBenchmarkEvent.IFailure<Model>
31
+ | IAgenticaCallBenchmarkEvent.IError<Model>;
31
32
  export namespace IAgenticaCallBenchmarkEvent {
32
33
  /**
33
34
  * Success event type.
@@ -35,7 +36,8 @@ export namespace IAgenticaCallBenchmarkEvent {
35
36
  * The `success` event type represents that the benchmark
36
37
  * testing is fully meet the expected scenario.
37
38
  */
38
- export interface ISuccess extends IEventBase<"success"> {
39
+ export interface ISuccess<Model extends ILlmSchema.Model>
40
+ extends IEventBase<"success", Model> {
39
41
  /**
40
42
  * Whether succeeded to function selection.
41
43
  */
@@ -54,7 +56,8 @@ export namespace IAgenticaCallBenchmarkEvent {
54
56
  * or `caller` agents have not selected or called following the
55
57
  * expected scenario in the benchmark testing.
56
58
  */
57
- export interface IFailure extends IEventBase<"failure"> {
59
+ export interface IFailure<Model extends ILlmSchema.Model>
60
+ extends IEventBase<"failure", Model> {
58
61
  /**
59
62
  * Whether succeeded to function selection.
60
63
  */
@@ -66,14 +69,15 @@ export namespace IAgenticaCallBenchmarkEvent {
66
69
  call: boolean;
67
70
  }
68
71
 
69
- export interface IError extends IEventBase<"error"> {
72
+ export interface IError<Model extends ILlmSchema.Model>
73
+ extends IEventBase<"error", Model> {
70
74
  /**
71
75
  * Error occurred during the benchmark.
72
76
  */
73
77
  error: unknown;
74
78
  }
75
79
 
76
- interface IEventBase<Type extends string> {
80
+ interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
77
81
  /**
78
82
  * Discriminant type.
79
83
  */
@@ -82,14 +86,14 @@ export namespace IAgenticaCallBenchmarkEvent {
82
86
  /**
83
87
  * Expected scenario.
84
88
  */
85
- scenario: IAgenticaCallBenchmarkScenario;
89
+ scenario: IAgenticaCallBenchmarkScenario<Model>;
86
90
 
87
91
  /**
88
92
  * Prompt histories.
89
93
  *
90
94
  * List of prompts occurred during the benchmark testing.
91
95
  */
92
- prompts: IAgenticaPrompt[];
96
+ prompts: IAgenticaPrompt<Model>[];
93
97
 
94
98
  /**
95
99
  * Usage of the token during the benchmark.
@@ -1,4 +1,5 @@
1
1
  import { IAgenticaTokenUsage } from "@agentica/core";
2
+ import { ILlmSchema } from "@samchon/openapi";
2
3
 
3
4
  import { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
4
5
  import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
@@ -20,11 +21,11 @@ import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario
20
21
  *
21
22
  * @author Samchon
22
23
  */
23
- export interface IAgenticaCallBenchmarkResult {
24
+ export interface IAgenticaCallBenchmarkResult<Model extends ILlmSchema.Model> {
24
25
  /**
25
26
  * Experiments for each scenario.
26
27
  */
27
- experiments: IAgenticaCallBenchmarkResult.IExperiment[];
28
+ experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[];
28
29
 
29
30
  /**
30
31
  * Aggregated token usage information.
@@ -45,11 +46,11 @@ export namespace IAgenticaCallBenchmarkResult {
45
46
  /**
46
47
  * Experiment result about a scenario.
47
48
  */
48
- export interface IExperiment {
49
+ export interface IExperiment<Model extends ILlmSchema.Model> {
49
50
  /**
50
51
  * Scenario of the experiment.
51
52
  */
52
- scenario: IAgenticaCallBenchmarkScenario;
53
+ scenario: IAgenticaCallBenchmarkScenario<Model>;
53
54
 
54
55
  /**
55
56
  * Events occurred during the benchmark in the scenario.
@@ -59,7 +60,7 @@ export namespace IAgenticaCallBenchmarkResult {
59
60
  * {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
60
61
  * And the event is one of the repeated benchmark results.
61
62
  */
62
- events: IAgenticaCallBenchmarkEvent[];
63
+ events: IAgenticaCallBenchmarkEvent<Model>[];
63
64
 
64
65
  /**
65
66
  * LLM token usage information.