@agentica/benchmark 0.12.21 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +39 -33
  2. package/lib/AgenticaCallBenchmark.d.ts +12 -6
  3. package/lib/AgenticaCallBenchmark.js +24 -18
  4. package/lib/AgenticaCallBenchmark.js.map +1 -1
  5. package/lib/AgenticaSelectBenchmark.d.ts +12 -6
  6. package/lib/AgenticaSelectBenchmark.js +14 -12
  7. package/lib/AgenticaSelectBenchmark.js.map +1 -1
  8. package/lib/index.mjs +315 -236
  9. package/lib/index.mjs.map +1 -1
  10. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +38 -29
  11. package/lib/internal/AgenticaBenchmarkPredicator.js +100 -84
  12. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
  13. package/lib/internal/AgenticaBenchmarkUtil.d.ts +21 -6
  14. package/lib/internal/AgenticaBenchmarkUtil.js +39 -33
  15. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
  16. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +6 -5
  17. package/lib/internal/AgenticaCallBenchmarkReporter.js +130 -126
  18. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
  19. package/lib/internal/AgenticaPromptReporter.d.ts +13 -5
  20. package/lib/internal/AgenticaPromptReporter.js +45 -41
  21. package/lib/internal/AgenticaPromptReporter.js.map +1 -1
  22. package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +3 -1
  23. package/lib/internal/AgenticaSelectBenchmarkReporter.js +153 -150
  24. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
  25. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +8 -2
  26. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +9 -3
  27. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +10 -4
  28. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +8 -2
  29. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -3
  30. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +10 -4
  31. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +8 -2
  32. package/lib/utils/MathUtil.d.ts +15 -3
  33. package/lib/utils/MathUtil.js +15 -4
  34. package/lib/utils/MathUtil.js.map +1 -1
  35. package/package.json +12 -10
  36. package/src/AgenticaCallBenchmark.ts +64 -45
  37. package/src/AgenticaSelectBenchmark.ts +42 -30
  38. package/src/internal/AgenticaBenchmarkPredicator.ts +208 -186
  39. package/src/internal/AgenticaBenchmarkUtil.ts +58 -40
  40. package/src/internal/AgenticaCallBenchmarkReporter.ts +180 -182
  41. package/src/internal/AgenticaPromptReporter.ts +46 -33
  42. package/src/internal/AgenticaSelectBenchmarkReporter.ts +205 -203
  43. package/src/structures/IAgenticaBenchmarkExpected.ts +9 -2
  44. package/src/structures/IAgenticaCallBenchmarkEvent.ts +9 -3
  45. package/src/structures/IAgenticaCallBenchmarkResult.ts +10 -4
  46. package/src/structures/IAgenticaCallBenchmarkScenario.ts +8 -2
  47. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +9 -3
  48. package/src/structures/IAgenticaSelectBenchmarkResult.ts +10 -4
  49. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +8 -2
  50. package/src/utils/MathUtil.ts +16 -3
@@ -1,13 +1,20 @@
1
- import { Agentica, AgenticaTokenUsage } from "@agentica/core";
2
- import { ILlmSchema } from "@samchon/openapi";
3
- import { Semaphore } from "tstl";
4
- import { tags } from "typia";
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the AgenticaCallBenchmark class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { Agentica } from "@agentica/core";
8
+ import type { ILlmSchema } from "@samchon/openapi";
9
+ import type { tags } from "typia";
10
+ import type { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
11
+ import type { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
5
12
 
13
+ import type { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
14
+ import { AgenticaTokenUsage } from "@agentica/core";
15
+ import { Semaphore } from "tstl";
6
16
  import { AgenticaBenchmarkPredicator } from "./internal/AgenticaBenchmarkPredicator";
7
17
  import { AgenticaCallBenchmarkReporter } from "./internal/AgenticaCallBenchmarkReporter";
8
- import { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
9
- import { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
10
- import { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
11
18
 
12
19
  /**
13
20
  * LLM function calling selection benchmark.
@@ -72,37 +79,40 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
72
79
  ): Promise<IAgenticaCallBenchmarkResult<Model>> {
73
80
  const started_at: Date = new Date();
74
81
  const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
75
- const experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[] =
76
- await Promise.all(
77
- this.scenarios_.map(async (scenario) => {
78
- const events: IAgenticaCallBenchmarkEvent<Model>[] =
79
- await Promise.all(
80
- new Array(this.config_.repeat).fill(0).map(async () => {
81
- await semaphore.acquire();
82
- const e: IAgenticaCallBenchmarkEvent<Model> =
83
- await this.step(scenario);
84
- await semaphore.release();
85
- if (listener !== undefined) listener(e);
86
- return e;
87
- }),
88
- );
89
- return {
90
- scenario,
91
- events,
92
- usage: events
93
- .filter((e) => e.type !== "error")
94
- .map((e) => e.usage)
95
- .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
96
- };
97
- }),
98
- );
82
+ const task = this.scenarios_.map(async (scenario) => {
83
+ const events: IAgenticaCallBenchmarkEvent<Model>[]
84
+ = await Promise.all(
85
+ Array.from({ length: this.config_.repeat }).map(async () => {
86
+ await semaphore.acquire();
87
+ const e: IAgenticaCallBenchmarkEvent<Model>
88
+ = await this.step(scenario);
89
+ await semaphore.release();
90
+
91
+ if (listener !== undefined) {
92
+ listener(e);
93
+ }
94
+
95
+ return e;
96
+ }),
97
+ );
98
+ return {
99
+ scenario,
100
+ events,
101
+ usage: events
102
+ .filter(e => e.type !== "error")
103
+ .map(e => e.usage)
104
+ .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
105
+ };
106
+ });
107
+ const experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[]
108
+ = await Promise.all(task);
99
109
  return (this.result_ = {
100
110
  experiments,
101
111
  started_at,
102
112
  completed_at: new Date(),
103
113
  usage: experiments
104
- .map((p) => p.usage)
105
- .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
114
+ .map(p => p.usage)
115
+ .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
106
116
  });
107
117
  }
108
118
 
@@ -125,8 +135,9 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
125
135
  * @returns Dictionary of markdown files.
126
136
  */
127
137
  public report(): Record<string, string> {
128
- if (this.result_ === null)
138
+ if (this.result_ === null) {
129
139
  throw new Error("Benchmark is not executed yet.");
140
+ }
130
141
  return AgenticaCallBenchmarkReporter.markdown(this.result_);
131
142
  }
132
143
 
@@ -140,8 +151,8 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
140
151
  expected: scenario.expected,
141
152
  operations: agent
142
153
  .getPromptHistories()
143
- .filter((p) => p.type === "execute")
144
- .map((p) => p.operation),
154
+ .filter(p => p.type === "execute")
155
+ .map(p => p.operation),
145
156
  strict: false,
146
157
  });
147
158
  const out = (): IAgenticaCallBenchmarkEvent<Model> => {
@@ -149,10 +160,10 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
149
160
  expected: scenario.expected,
150
161
  operations: agent
151
162
  .getPromptHistories()
152
- .filter((p) => p.type === "select")
153
- .map((p) => p.selections)
163
+ .filter(p => p.type === "select")
164
+ .map(p => p.selections)
154
165
  .flat()
155
- .map((p) => p.operation),
166
+ .map(p => p.operation),
156
167
  strict: false,
157
168
  });
158
169
  const call = success();
@@ -170,17 +181,25 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
170
181
 
171
182
  try {
172
183
  await agent.conversate(scenario.text);
173
- if (success()) return out();
184
+ if (success()) {
185
+ return out();
186
+ }
187
+
174
188
  for (let i: number = 0; i < this.config_.consent; ++i) {
175
- const next: string | null =
176
- await AgenticaBenchmarkPredicator.isNext(agent);
177
- if (next === null) break;
189
+ const next: string | null
190
+ = await AgenticaBenchmarkPredicator.isNext(agent);
191
+ if (next === null) {
192
+ break;
193
+ }
178
194
 
179
195
  await agent.conversate(next);
180
- if (success()) return out();
196
+ if (success()) {
197
+ return out();
198
+ }
181
199
  }
182
200
  return out();
183
- } catch (error) {
201
+ }
202
+ catch (error) {
184
203
  return {
185
204
  type: "error",
186
205
  scenario,
@@ -1,21 +1,29 @@
1
- import {
1
+ /**
2
+ * @module
3
+ * This file contains the implementation of the AgenticaSelectBenchmark class.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type {
2
8
  Agentica,
3
9
  AgenticaContext,
4
10
  AgenticaOperationSelection,
5
11
  AgenticaPrompt,
12
+ } from "@agentica/core";
13
+ import type { ILlmSchema } from "@samchon/openapi";
14
+ import type { tags } from "typia";
15
+ import type { IAgenticaSelectBenchmarkEvent } from "./structures/IAgenticaSelectBenchmarkEvent";
16
+ import type { IAgenticaSelectBenchmarkResult } from "./structures/IAgenticaSelectBenchmarkResult";
17
+ import type { IAgenticaSelectBenchmarkScenario } from "./structures/IAgenticaSelectBenchmarkScenario";
18
+
19
+ import {
6
20
  AgenticaTextPrompt,
7
21
  AgenticaTokenUsage,
8
22
  } from "@agentica/core";
9
23
  import { ChatGptSelectFunctionAgent } from "@agentica/core/src/chatgpt/ChatGptSelectFunctionAgent";
10
- import { ILlmSchema } from "@samchon/openapi";
11
24
  import { Semaphore } from "tstl";
12
- import { tags } from "typia";
13
-
14
25
  import { AgenticaBenchmarkPredicator } from "./internal/AgenticaBenchmarkPredicator";
15
26
  import { AgenticaSelectBenchmarkReporter } from "./internal/AgenticaSelectBenchmarkReporter";
16
- import { IAgenticaSelectBenchmarkEvent } from "./structures/IAgenticaSelectBenchmarkEvent";
17
- import { IAgenticaSelectBenchmarkResult } from "./structures/IAgenticaSelectBenchmarkResult";
18
- import { IAgenticaSelectBenchmarkScenario } from "./structures/IAgenticaSelectBenchmarkScenario";
19
27
 
20
28
  /**
21
29
  * LLM function calling selection benchmark.
@@ -78,17 +86,19 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
78
86
  ): Promise<IAgenticaSelectBenchmarkResult<Model>> {
79
87
  const started_at: Date = new Date();
80
88
  const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
81
- const experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[] =
82
- await Promise.all(
89
+ const experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[]
90
+ = await Promise.all(
83
91
  this.scenarios_.map(async (scenario) => {
84
- const events: IAgenticaSelectBenchmarkEvent<Model>[] =
85
- await Promise.all(
86
- new Array(this.config_.repeat).fill(0).map(async () => {
92
+ const events: IAgenticaSelectBenchmarkEvent<Model>[]
93
+ = await Promise.all(
94
+ Array.from({ length: this.config_.repeat }).map(async () => {
87
95
  await semaphore.acquire();
88
- const e: IAgenticaSelectBenchmarkEvent<Model> =
89
- await this.step(scenario);
96
+ const e: IAgenticaSelectBenchmarkEvent<Model>
97
+ = await this.step(scenario);
90
98
  await semaphore.release();
91
- if (listener !== undefined) listener(e);
99
+ if (listener !== undefined) {
100
+ listener(e);
101
+ }
92
102
  return e;
93
103
  }),
94
104
  );
@@ -96,9 +106,9 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
96
106
  scenario,
97
107
  events,
98
108
  usage: events
99
- .filter((e) => e.type !== "error")
100
- .map((e) => e.usage)
101
- .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
109
+ .filter(e => e.type !== "error")
110
+ .map(e => e.usage)
111
+ .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
102
112
  };
103
113
  }),
104
114
  );
@@ -107,8 +117,8 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
107
117
  started_at,
108
118
  completed_at: new Date(),
109
119
  usage: experiments
110
- .map((p) => p.usage)
111
- .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
120
+ .map(p => p.usage)
121
+ .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
112
122
  });
113
123
  }
114
124
 
@@ -132,8 +142,9 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
132
142
  * @returns Dictionary of markdown files.
133
143
  */
134
144
  public report(): Record<string, string> {
135
- if (this.result_ === null)
145
+ if (this.result_ === null) {
136
146
  throw new Error("Benchmark is not executed yet.");
147
+ }
137
148
  return AgenticaSelectBenchmarkReporter.markdown(this.result_);
138
149
  }
139
150
 
@@ -143,8 +154,8 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
143
154
  const started_at: Date = new Date();
144
155
  try {
145
156
  const usage: AgenticaTokenUsage = AgenticaTokenUsage.zero();
146
- const prompts: AgenticaPrompt<Model>[] =
147
- await ChatGptSelectFunctionAgent.execute({
157
+ const prompts: AgenticaPrompt<Model>[]
158
+ = await ChatGptSelectFunctionAgent.execute({
148
159
  ...this.agent_.getContext({
149
160
  prompt: new AgenticaTextPrompt({
150
161
  role: "user",
@@ -158,13 +169,13 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
158
169
  dispatch: async () => {},
159
170
  } satisfies AgenticaContext<Model>);
160
171
  const selected: AgenticaOperationSelection<Model>[] = prompts
161
- .filter((p) => p.type === "select")
162
- .map((p) => p.selections)
172
+ .filter(p => p.type === "select")
173
+ .map(p => p.selections)
163
174
  .flat();
164
175
  return {
165
176
  type: AgenticaBenchmarkPredicator.success({
166
177
  expected: scenario.expected,
167
- operations: selected.map((s) => s.operation),
178
+ operations: selected.map(s => s.operation),
168
179
  })
169
180
  ? "success"
170
181
  : "failure",
@@ -172,16 +183,17 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
172
183
  selected,
173
184
  usage,
174
185
  assistantPrompts: prompts
175
- .filter((p) => p.type === "text")
186
+ .filter(p => p.type === "text")
176
187
  .filter(
177
188
  (p): p is AgenticaTextPrompt<"assistant"> => p.role === "assistant",
178
189
  ),
179
190
  started_at,
180
191
  completed_at: new Date(),
181
192
  } satisfies
182
- | IAgenticaSelectBenchmarkEvent.ISuccess<Model>
183
- | IAgenticaSelectBenchmarkEvent.IFailure<Model>;
184
- } catch (error) {
193
+ | IAgenticaSelectBenchmarkEvent.ISuccess<Model>
194
+ | IAgenticaSelectBenchmarkEvent.IFailure<Model>;
195
+ }
196
+ catch (error) {
185
197
  return {
186
198
  type: "error",
187
199
  scenario,