@agentica/benchmark 0.7.0-dev.20250224-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/LICENSE +21 -0
  2. package/lib/AgenticaCallBenchmark.d.ts +137 -0
  3. package/lib/AgenticaCallBenchmark.js +187 -0
  4. package/lib/AgenticaCallBenchmark.js.map +1 -0
  5. package/lib/AgenticaSelectBenchmark.d.ts +123 -0
  6. package/lib/AgenticaSelectBenchmark.js +185 -0
  7. package/lib/AgenticaSelectBenchmark.js.map +1 -0
  8. package/lib/index.d.ts +2 -0
  9. package/lib/index.js +19 -0
  10. package/lib/index.js.map +1 -0
  11. package/lib/index.mjs +449 -0
  12. package/lib/index.mjs.map +1 -0
  13. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +32 -0
  14. package/lib/internal/AgenticaBenchmarkPredicator.js +179 -0
  15. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -0
  16. package/lib/internal/AgenticaBenchmarkUtil.d.ts +5 -0
  17. package/lib/internal/AgenticaBenchmarkUtil.js +37 -0
  18. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -0
  19. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +4 -0
  20. package/lib/internal/AgenticaCallBenchmarkReporter.js +136 -0
  21. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -0
  22. package/lib/internal/AgenticaPromptReporter.d.ts +4 -0
  23. package/lib/internal/AgenticaPromptReporter.js +49 -0
  24. package/lib/internal/AgenticaPromptReporter.js.map +1 -0
  25. package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +1 -0
  26. package/lib/internal/AgenticaSelectBenchmarkReporter.js +172 -0
  27. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -0
  28. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +44 -0
  29. package/lib/structures/IAgenticaBenchmarkExpected.js +3 -0
  30. package/lib/structures/IAgenticaBenchmarkExpected.js.map +1 -0
  31. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +95 -0
  32. package/lib/structures/IAgenticaCallBenchmarkEvent.js +3 -0
  33. package/lib/structures/IAgenticaCallBenchmarkEvent.js.map +1 -0
  34. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +62 -0
  35. package/lib/structures/IAgenticaCallBenchmarkResult.js +3 -0
  36. package/lib/structures/IAgenticaCallBenchmarkResult.js.map +1 -0
  37. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +36 -0
  38. package/lib/structures/IAgenticaCallBenchmarkScenario.js +3 -0
  39. package/lib/structures/IAgenticaCallBenchmarkScenario.js.map +1 -0
  40. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +92 -0
  41. package/lib/structures/IAgenticaSelectBenchmarkEvent.js +3 -0
  42. package/lib/structures/IAgenticaSelectBenchmarkEvent.js.map +1 -0
  43. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +62 -0
  44. package/lib/structures/IAgenticaSelectBenchmarkResult.js +3 -0
  45. package/lib/structures/IAgenticaSelectBenchmarkResult.js.map +1 -0
  46. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +36 -0
  47. package/lib/structures/IAgenticaSelectBenchmarkScenario.js +3 -0
  48. package/lib/structures/IAgenticaSelectBenchmarkScenario.js.map +1 -0
  49. package/lib/utils/MathUtil.d.ts +3 -0
  50. package/lib/utils/MathUtil.js +8 -0
  51. package/lib/utils/MathUtil.js.map +1 -0
  52. package/lib/utils/TokenUsageComputer.d.ts +5 -0
  53. package/lib/utils/TokenUsageComputer.js +37 -0
  54. package/lib/utils/TokenUsageComputer.js.map +1 -0
  55. package/package.json +57 -0
  56. package/src/AgenticaCallBenchmark.ts +259 -0
  57. package/src/AgenticaSelectBenchmark.ts +262 -0
  58. package/src/index.ts +3 -0
  59. package/src/internal/AgenticaBenchmarkPredicator.ts +216 -0
  60. package/src/internal/AgenticaBenchmarkUtil.ts +40 -0
  61. package/src/internal/AgenticaCallBenchmarkReporter.ts +177 -0
  62. package/src/internal/AgenticaPromptReporter.ts +43 -0
  63. package/src/internal/AgenticaSelectBenchmarkReporter.ts +212 -0
  64. package/src/structures/IAgenticaBenchmarkExpected.ts +58 -0
  65. package/src/structures/IAgenticaCallBenchmarkEvent.ts +109 -0
  66. package/src/structures/IAgenticaCallBenchmarkResult.ts +69 -0
  67. package/src/structures/IAgenticaCallBenchmarkScenario.ts +39 -0
  68. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +110 -0
  69. package/src/structures/IAgenticaSelectBenchmarkResult.ts +69 -0
  70. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +39 -0
  71. package/src/utils/MathUtil.ts +3 -0
  72. package/src/utils/TokenUsageComputer.ts +40 -0
package/lib/index.js ADDED
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./AgenticaCallBenchmark"), exports);
18
+ __exportStar(require("./AgenticaSelectBenchmark"), exports);
19
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,0DAAwC;AAExC,4DAA0C"}
package/lib/index.mjs ADDED
@@ -0,0 +1,449 @@
1
+ import { Semaphore } from "tstl";
2
+
3
+ import "typia";
4
+
5
+ import { ChatGptSelectFunctionAgent } from "@agentica/core/src/chatgpt/ChatGptSelectFunctionAgent";
6
+
7
+ var AgenticaBenchmarkPredicator;
8
+
9
+ (function(AgenticaBenchmarkPredicator) {
10
+ AgenticaBenchmarkPredicator.isNext = async agent => {
11
+ const last = agent.getPromptHistories().at(-1);
12
+ if (last?.type !== "text" || last.role !== "assistant") return null;
13
+ const consent = {
14
+ functions: [ {
15
+ name: "consent",
16
+ parameters: {
17
+ description: "Properties for asking the user's consent",
18
+ type: "object",
19
+ properties: {
20
+ content: {
21
+ description: "Reason of the message implying what the AI agent wants\nto do at the next step after the user's consent.",
22
+ type: "string"
23
+ },
24
+ reply: {
25
+ title: "Recommended reply message for the user",
26
+ description: "Recommended reply message for the user.\n\nThe message what AI agent wants the user to reply\naccepting the AI agent's next job suggestion.",
27
+ type: "string"
28
+ }
29
+ },
30
+ required: [ "content", "reply" ],
31
+ additionalProperties: false,
32
+ $defs: {}
33
+ },
34
+ description: "Ask user to consent for what the AI agent wants to do next.\n\nIf AI agent wants to do some function calling at next,\nbut it needs the user's consent about the function calling to do,\nthen call this tool function."
35
+ } ]
36
+ }.functions[0];
37
+ const result = await agent["props"].provider.api.chat.completions.create({
38
+ model: agent["props"].provider.model,
39
+ messages: [ {
40
+ role: "system",
41
+ content: [ "You are an helpful assistant.", "", "If what the assistant said seems like to asking for", "user's consent about some function calling at the next step,", "use the tools appropriately to step to the next." ].join("\n")
42
+ }, {
43
+ role: "assistant",
44
+ content: last.text
45
+ } ],
46
+ tools: [ {
47
+ type: "function",
48
+ function: {
49
+ name: consent.name,
50
+ description: consent.description,
51
+ parameters: consent.parameters
52
+ }
53
+ } ],
54
+ tool_choice: "required",
55
+ parallel_tool_calls: false
56
+ }, agent["props"].provider.options);
57
+ const toolCall = (result.choices[0]?.message.tool_calls ?? []).filter((tc => tc.type === "function" && tc.function.name === consent.name))?.[0];
58
+ if (toolCall === undefined) return null;
59
+ const input = JSON.parse(toolCall.function.arguments);
60
+ return (() => {
61
+ const _io0 = input => "string" === typeof input.content && "string" === typeof input.reply;
62
+ return input => "object" === typeof input && null !== input && _io0(input);
63
+ })()(input) ? input.reply : null;
64
+ };
65
+ AgenticaBenchmarkPredicator.success = props => successInner(props).result;
66
+ const successInner = props => {
67
+ const call = (expected, overrideOperations) => successInner({
68
+ expected,
69
+ operations: overrideOperations ?? props.operations,
70
+ strict: props.strict
71
+ });
72
+ switch (props.expected.type) {
73
+ case "array":
74
+ {
75
+ let take = 0;
76
+ const targetIterator = props.expected.items[Symbol.iterator]();
77
+ let targeted = targetIterator.next();
78
+ while (true) {
79
+ if (targeted.done) {
80
+ return {
81
+ result: true,
82
+ take
83
+ };
84
+ }
85
+ if (take >= props.operations.length) {
86
+ return {
87
+ result: false
88
+ };
89
+ }
90
+ const result = call(targeted.value, props.operations.slice(take));
91
+ if (!result.result) {
92
+ if (!props.strict) {
93
+ take += 1;
94
+ continue;
95
+ }
96
+ return {
97
+ result: false
98
+ };
99
+ }
100
+ take += result.take;
101
+ targeted = targetIterator.next();
102
+ }
103
+ }
104
+
105
+ case "standalone":
106
+ {
107
+ const target = props.expected.operation;
108
+ const result = props.operations.some((op => op.name === target.name));
109
+ if (result) {
110
+ return {
111
+ result,
112
+ take: 1
113
+ };
114
+ }
115
+ return {
116
+ result
117
+ };
118
+ }
119
+
120
+ case "anyOf":
121
+ for (const expected of props.expected.anyOf) {
122
+ const callResult = call(expected);
123
+ if (callResult.result) {
124
+ return callResult;
125
+ }
126
+ }
127
+ return {
128
+ result: false
129
+ };
130
+
131
+ case "allOf":
132
+ {
133
+ const result = props.expected.allOf.map((expected => call(expected)));
134
+ if (result.every((r => r.result))) {
135
+ return {
136
+ result: true,
137
+ take: result.reduce(((acc, r) => Math.max(acc, r.take)), 0)
138
+ };
139
+ }
140
+ return {
141
+ result: false
142
+ };
143
+ }
144
+ }
145
+ };
146
+ })(AgenticaBenchmarkPredicator || (AgenticaBenchmarkPredicator = {}));
147
+
148
+ var MathUtil;
149
+
150
+ (function(MathUtil) {
151
+ MathUtil.round = value => Math.floor(value * 100) / 100;
152
+ })(MathUtil || (MathUtil = {}));
153
+
154
+ var AgenticaBenchmarkUtil;
155
+
156
+ (function(AgenticaBenchmarkUtil) {
157
+ AgenticaBenchmarkUtil.errorToJson = error => {
158
+ if (error instanceof Error) return {
159
+ ...error,
160
+ name: error.name,
161
+ message: error.message,
162
+ stack: error.stack
163
+ };
164
+ return error;
165
+ };
166
+ AgenticaBenchmarkUtil.expectedToJson = expected => {
167
+ if (expected.type === "standalone") return {
168
+ type: expected.type,
169
+ operation: {
170
+ name: expected.operation.name,
171
+ description: expected.operation.function.description
172
+ }
173
+ }; else if (expected.type === "array") return {
174
+ type: expected.type,
175
+ items: expected.items.map(AgenticaBenchmarkUtil.expectedToJson)
176
+ }; else if (expected.type === "allOf") return {
177
+ type: expected.type,
178
+ allOf: expected.allOf.map(AgenticaBenchmarkUtil.expectedToJson)
179
+ }; else return {
180
+ type: expected.type,
181
+ anyOf: expected.anyOf.map(AgenticaBenchmarkUtil.expectedToJson)
182
+ };
183
+ };
184
+ })(AgenticaBenchmarkUtil || (AgenticaBenchmarkUtil = {}));
185
+
186
+ var AgenticaPromptReporter;
187
+
188
+ (function(AgenticaPromptReporter) {
189
+ AgenticaPromptReporter.markdown = p => {
190
+ if (p.type === "text") return [ `### Text (${p.role})`, p.text, "" ].join("\n"); else if (p.type === "select" || p.type === "cancel") return [ `### ${p.type === "select" ? "Select" : "Cancel"}`, ...p.operations.map((op => [ `#### ${op.name}`, ` - controller: ${op.controller.name}`, ` - function: ${op.function.name}`, ` - reason: ${op.reason}`, "", ...!!op.function.description?.length ? [ op.function.description, "" ] : [] ])).flat() ].join("\n"); else if (p.type === "describe") return [ "### Describe", ...p.executions.map((e => ` - ${e.name}`)), "", ...p.text.split("\n").map((s => `> ${s}`)), "" ].join("\n");
191
+ return [ "### Execute", ` - name: ${p.name}`, ` - controller: ${p.controller.name}`, ` - function: ${p.function.name}`, "", "```json", JSON.stringify(p.arguments, null, 2), "```", "" ].join("\n");
192
+ };
193
+ })(AgenticaPromptReporter || (AgenticaPromptReporter = {}));
194
+
195
+ var AgenticaCallBenchmarkReporter;
196
+
197
+ (function(AgenticaCallBenchmarkReporter) {
198
+ AgenticaCallBenchmarkReporter.markdown = result => Object.fromEntries([ [ "./README.md", writeIndex(result) ], ...result.experiments.map((exp => [ [ `./${exp.scenario.name}/README.md`, writeExperimentIndex(exp) ], ...exp.events.map(((event, i) => [ `./${exp.scenario.name}/${i + 1}.${event.type}.md`, writeExperimentEvent(event, i) ])) ])).flat() ]);
199
+ const writeIndex = result => {
200
+ const events = result.experiments.map((r => r.events)).flat();
201
+ const average = events.map((e => e.completed_at.getTime() - e.started_at.getTime())).reduce(((a, b) => a + b), 0) / events.length;
202
+ return [ "# LLM Function Call Benchmark", "## Summary", ` - Aggregation:`, ` - Scenarios: #${result.experiments.length.toLocaleString()}`, ` - Trial: ${events.length}`, ` - Success: ${events.filter((e => e.type === "success")).length}`, ` - Failure: ${events.filter((e => e.type === "failure")).length}`, ` - Average Time: ${MathUtil.round(average).toLocaleString()} ms`, ` - Token Usage`, ` - Total: ${result.usage.total.toLocaleString()}`, ` - Prompt`, ` - Total: ${result.usage.prompt.total.toLocaleString()}`, ` - Audio: ${result.usage.prompt.audio.toLocaleString()}`, ` - Cached: ${result.usage.prompt.cached.toLocaleString()}`, ` - Completion:`, ` - Total: ${result.usage.completion.total.toLocaleString()}`, ` - Accepted Prediction: ${result.usage.completion.accepted_prediction.toLocaleString()}`, ` - Audio: ${result.usage.completion.audio.toLocaleString()}`, ` - Reasoning: ${result.usage.completion.reasoning.toLocaleString()}`, ` - Rejected Prediction: ${result.usage.completion.rejected_prediction.toLocaleString()}`, "", "## Experiments", " Name | Select | Call | Time/Avg ", ":-----|:-------|:-----|----------:", ...result.experiments.map((exp => [ `[${exp.scenario.name}](./${exp.scenario.name}/README.md)`, drawStatus(exp.events, (e => e.type !== "error" && e.select === true)), drawStatus(exp.events, (e => e.type !== "error" && e.call === true)), `${MathUtil.round(exp.events.map((e => e.completed_at.getTime() - e.started_at.getTime())).reduce(((a, b) => a + b), 0) / exp.events.length).toLocaleString()} ms` ].join(" | "))) ].join("\n");
203
+ };
204
+ const writeExperimentIndex = exp => [ `# ${exp.scenario.name}`, "## Summary", ` - Scenarios: #${exp.events.length.toLocaleString()}`, ` - Success: ${exp.events.filter((e => e.type === "success")).length}`, ` - Failure: ${exp.events.filter((e => e.type === "failure")).length}`, ` - Average Time: ${MathUtil.round(exp.events.map((e => e.completed_at.getTime() - e.started_at.getTime())).reduce(((a, b) => a + b), 0) / exp.events.length).toLocaleString()} ms`, "", "## Events", " Name | Type | Time", ":-----|:-----|----:", ...exp.events.map(((e, i) => [ `[${i + 1}.](./${i + 1}.${e.type}.md)`, e.type, `${MathUtil.round(e.completed_at.getTime() - e.started_at.getTime())} ms` ].join(" | "))), "", "## Scenario", "### User Prompt", exp.scenario.text, "", "### Expected", "```json", JSON.stringify(AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected), null, 2), "```" ].join("\n");
205
+ const writeExperimentEvent = (event, index) => [ `# ${index}. ${event.type}`, "## Summary", ` - Name: ${event.scenario.name}`, ` - Type: ${event.type}`, ` - Time: ${MathUtil.round(event.completed_at.getTime() - event.started_at.getTime()).toLocaleString()} ms`, ...event.type !== "error" ? [ ` - Select: ${event.select ? "✅" : "❌"}`, ` - Call: ${event.call ? "✅" : "❌"}` ] : [], ` - Token Usage: ${event.usage.toLocaleString()}`, "", "## Scenario", "### User Prompt", event.scenario.text, "", "### Expected", "```json", JSON.stringify(AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected), null, 2), "```", "", "## Prompt Histories", ...event.prompts.map(AgenticaPromptReporter.markdown), "", ...event.type === "error" ? [ "## Error", "```json", JSON.stringify(AgenticaBenchmarkUtil.errorToJson(event.error), null, 2), "```" ] : [] ].join("\n");
206
+ const drawStatus = (events, success) => {
207
+ const count = events.filter(success).length;
208
+ return new Array(count).fill("■").join("") + new Array(10 - count).fill("□").join("");
209
+ };
210
+ })(AgenticaCallBenchmarkReporter || (AgenticaCallBenchmarkReporter = {}));
211
+
212
+ var TokenUsageComputer;
213
+
214
+ (function(TokenUsageComputer) {
215
+ TokenUsageComputer.zero = () => ({
216
+ total: 0,
217
+ prompt: {
218
+ total: 0,
219
+ audio: 0,
220
+ cached: 0
221
+ },
222
+ completion: {
223
+ total: 0,
224
+ accepted_prediction: 0,
225
+ audio: 0,
226
+ reasoning: 0,
227
+ rejected_prediction: 0
228
+ }
229
+ });
230
+ TokenUsageComputer.plus = (a, b) => ({
231
+ total: a.total + b.total,
232
+ prompt: {
233
+ total: a.prompt.total + b.prompt.total,
234
+ audio: a.prompt.audio + b.prompt.audio,
235
+ cached: a.prompt.cached + b.prompt.cached
236
+ },
237
+ completion: {
238
+ total: a.completion.total + b.completion.total,
239
+ accepted_prediction: a.completion.accepted_prediction + b.completion.accepted_prediction,
240
+ audio: a.completion.audio + b.completion.audio,
241
+ reasoning: a.completion.reasoning + b.completion.reasoning,
242
+ rejected_prediction: a.completion.rejected_prediction + b.completion.rejected_prediction
243
+ }
244
+ });
245
+ })(TokenUsageComputer || (TokenUsageComputer = {}));
246
+
247
+ class AgenticaCallBenchmark {
248
+ constructor(props) {
249
+ this.agent_ = props.agent;
250
+ this.scenarios_ = props.scenarios.slice();
251
+ this.config_ = {
252
+ repeat: props.config?.repeat ?? 10,
253
+ simultaneous: props.config?.simultaneous ?? 10,
254
+ consent: props.config?.consent ?? 3
255
+ };
256
+ this.result_ = null;
257
+ }
258
+ async execute(listener) {
259
+ const started_at = new Date;
260
+ const semaphore = new Semaphore(this.config_.simultaneous);
261
+ const experiments = await Promise.all(this.scenarios_.map((async scenario => {
262
+ const events = await Promise.all(new Array(this.config_.repeat).fill(0).map((async () => {
263
+ await semaphore.acquire();
264
+ const e = await this.step(scenario);
265
+ await semaphore.release();
266
+ if (listener !== undefined) listener(e);
267
+ return e;
268
+ })));
269
+ return {
270
+ scenario,
271
+ events,
272
+ usage: events.filter((e => e.type !== "error")).map((e => e.usage)).reduce(TokenUsageComputer.plus, TokenUsageComputer.zero())
273
+ };
274
+ })));
275
+ return this.result_ = {
276
+ experiments,
277
+ started_at,
278
+ completed_at: new Date,
279
+ usage: experiments.map((p => p.usage)).reduce(TokenUsageComputer.plus, TokenUsageComputer.zero())
280
+ };
281
+ }
282
+ report() {
283
+ if (this.result_ === null) throw new Error("Benchmark is not executed yet.");
284
+ return AgenticaCallBenchmarkReporter.markdown(this.result_);
285
+ }
286
+ async step(scenario) {
287
+ const agent = this.agent_.clone();
288
+ const started_at = new Date;
289
+ const success = () => AgenticaBenchmarkPredicator.success({
290
+ expected: scenario.expected,
291
+ operations: agent.getPromptHistories().filter((p => p.type === "execute")),
292
+ strict: false
293
+ });
294
+ const out = () => {
295
+ const select = AgenticaBenchmarkPredicator.success({
296
+ expected: scenario.expected,
297
+ operations: agent.getPromptHistories().filter((p => p.type === "select")).map((p => p.operations)).flat(),
298
+ strict: false
299
+ });
300
+ const call = success();
301
+ return {
302
+ type: call ? "success" : "failure",
303
+ scenario,
304
+ select,
305
+ call,
306
+ prompts: agent.getPromptHistories(),
307
+ usage: agent.getTokenUsage(),
308
+ started_at,
309
+ completed_at: new Date
310
+ };
311
+ };
312
+ try {
313
+ await agent.conversate(scenario.text);
314
+ if (success()) return out();
315
+ for (let i = 0; i < this.config_.consent; ++i) {
316
+ const next = await AgenticaBenchmarkPredicator.isNext(agent);
317
+ if (next === null) break;
318
+ await agent.conversate(next);
319
+ if (success()) return out();
320
+ }
321
+ return out();
322
+ } catch (error) {
323
+ return {
324
+ type: "error",
325
+ scenario,
326
+ prompts: agent.getPromptHistories(),
327
+ usage: agent.getTokenUsage(),
328
+ error,
329
+ started_at,
330
+ completed_at: new Date
331
+ };
332
+ }
333
+ }
334
+ }
335
+
336
+ var AgenticaSelectBenchmarkReporter;
337
+
338
+ (function(AgenticaSelectBenchmarkReporter) {
339
+ AgenticaSelectBenchmarkReporter.markdown = result => Object.fromEntries([ [ "./README.md", writeIndex(result) ], ...result.experiments.map((exp => [ [ `./${exp.scenario.name}/README.md`, writeExperimentIndex(exp) ], ...exp.events.map(((event, i) => [ `./${exp.scenario.name}/${i + 1}.${event.type}.md`, writeExperimentEvent(event, i) ])) ])).flat() ]);
340
+ const writeIndex = result => {
341
+ const events = result.experiments.map((r => r.events)).flat();
342
+ const average = events.map((e => e.completed_at.getTime() - e.started_at.getTime())).reduce(((a, b) => a + b), 0) / events.length;
343
+ return [ "# LLM Function Selection Benchmark", "## Summary", ` - Aggregation:`, ` - Scenarios: #${result.experiments.length.toLocaleString()}`, ` - Trial: ${events.length}`, ` - Success: ${events.filter((e => e.type === "success")).length}`, ` - Failure: ${events.filter((e => e.type === "failure")).length}`, ` - Average Time: ${MathUtil.round(average).toLocaleString()} ms`, ` - Token Usage`, ` - Total: ${result.usage.total.toLocaleString()}`, ` - Prompt`, ` - Total: ${result.usage.prompt.total.toLocaleString()}`, ` - Audio: ${result.usage.prompt.audio.toLocaleString()}`, ` - Cached: ${result.usage.prompt.cached.toLocaleString()}`, ` - Completion:`, ` - Total: ${result.usage.completion.total.toLocaleString()}`, ` - Accepted Prediction: ${result.usage.completion.accepted_prediction.toLocaleString()}`, ` - Audio: ${result.usage.completion.audio.toLocaleString()}`, ` - Reasoning: ${result.usage.completion.reasoning.toLocaleString()}`, ` - Rejected Prediction: ${result.usage.completion.rejected_prediction.toLocaleString()}`, "", "## Experiments", " Name | Status | Time/Avg ", ":-----|:-------|----------:", ...result.experiments.map((exp => [ `[${exp.scenario.name}](./${exp.scenario.name}/README.md)`, (() => {
344
+ const success = Math.floor(exp.events.filter((e => e.type === "success")).length / exp.events.length * 10);
345
+ return new Array(success).fill("■").join("") + new Array(10 - success).fill("□").join("");
346
+ })(), MathUtil.round(exp.events.map((event => event.completed_at.getTime() - event.started_at.getTime())).reduce(((a, b) => a + b), 0) / exp.events.length).toLocaleString() + " ms" ].join(" | "))) ].join("\n");
347
+ };
348
+ const writeExperimentIndex = exp => [ `# ${exp.scenario.name}`, "## Summary", " - Aggregation:", ` - Trial: ${exp.events.length}`, ` - Success: ${exp.events.filter((e => e.type === "success")).length}`, ` - Failure: ${exp.events.filter((e => e.type === "failure")).length}`, ` - Average Time: ${MathUtil.round(exp.events.map((event => event.completed_at.getTime() - event.started_at.getTime())).reduce(((a, b) => a + b), 0) / exp.events.length).toLocaleString()} ms`, " - Token Usage", ` - Total: ${exp.usage.total.toLocaleString()}`, ` - Prompt`, ` - Total: ${exp.usage.prompt.total.toLocaleString()}`, ` - Audio: ${exp.usage.prompt.audio.toLocaleString()}`, ` - Cached: ${exp.usage.prompt.cached.toLocaleString()}`, ` - Completion:`, ` - Total: ${exp.usage.completion.total.toLocaleString()}`, ` - Accepted Prediction: ${exp.usage.completion.accepted_prediction.toLocaleString()}`, ` - Audio: ${exp.usage.completion.audio.toLocaleString()}`, ` - Reasoning: ${exp.usage.completion.reasoning.toLocaleString()}`, ` - Rejected Prediction: ${exp.usage.completion.rejected_prediction.toLocaleString()}`, "", "## Events", " No | Type | Time", "---:|:-----|----:", ...exp.events.map(((e, i) => [ `[${i + 1}.](./${i + 1}.${e.type}.md)`, e.type, MathUtil.round(e.completed_at.getTime() - e.started_at.getTime()) + " ms" ].join(" | "))), "", "## Scenario", "### User Prompt", exp.scenario.text, "", "### Expected", "```json", JSON.stringify(AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected), null, 2), "```" ].join("\n");
349
+ const writeExperimentEvent = (event, index) => [ `# ${index}. ${event.type}`, `## Summary`, ` - Name: ${event.scenario.name}`, ` - Type: ${event.type}`, ` - Time: ${(event.completed_at.getTime() - event.started_at.getTime()).toLocaleString()} ms`, ...event.type !== "error" ? [ " - Token Usage", ` - Total: ${event.usage.total.toLocaleString()}`, ` - Prompt`, ` - Total: ${event.usage.prompt.total.toLocaleString()}`, ` - Audio: ${event.usage.prompt.audio.toLocaleString()}`, ` - Cached: ${event.usage.prompt.cached.toLocaleString()}`, ` - Completion:`, ` - Total: ${event.usage.completion.total.toLocaleString()}`, ` - Accepted Prediction: ${event.usage.completion.accepted_prediction.toLocaleString()}`, ` - Audio: ${event.usage.completion.audio.toLocaleString()}`, ` - Reasoning: ${event.usage.completion.reasoning.toLocaleString()}`, ` - Rejected Prediction: ${event.usage.completion.rejected_prediction.toLocaleString()}` ] : [], "", "## Scenario", "### User Prompt", event.scenario.text, "", "### Expected", "```json", JSON.stringify(AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected), null, 2), "```", "", ...event.type === "success" || event.type === "failure" ? [ "## Result", ...event.selected.map((s => [ `### ${s.name}`, ` - Controller: \`${s.controller.name}\``, ` - Function: \`${s.function.name}\``, ` - Reason: ${s.reason}`, "", ...s.function.description ? [ s.function.description, "" ] : [] ].join("\n"))) ] : [], ...event.type === "error" ? [ "## Error", "```json", AgenticaBenchmarkUtil.errorToJson(JSON.stringify(event.error, null, 2)), "```", "" ] : [] ].join("\n");
350
+ })(AgenticaSelectBenchmarkReporter || (AgenticaSelectBenchmarkReporter = {}));
351
+
352
+ class AgenticaSelectBenchmark {
353
+ constructor(props) {
354
+ this.agent_ = props.agent;
355
+ this.scenarios_ = props.scenarios.slice();
356
+ this.config_ = {
357
+ repeat: props.config?.repeat ?? 10,
358
+ simultaneous: props.config?.simultaneous ?? 10
359
+ };
360
+ this.histories_ = props.agent.getPromptHistories().slice();
361
+ this.result_ = null;
362
+ }
363
+ async execute(listener) {
364
+ const started_at = new Date;
365
+ const semaphore = new Semaphore(this.config_.simultaneous);
366
+ const experiments = await Promise.all(this.scenarios_.map((async scenario => {
367
+ const events = await Promise.all(new Array(this.config_.repeat).fill(0).map((async () => {
368
+ await semaphore.acquire();
369
+ const e = await this.step(scenario);
370
+ await semaphore.release();
371
+ if (listener !== undefined) listener(e);
372
+ return e;
373
+ })));
374
+ return {
375
+ scenario,
376
+ events,
377
+ usage: events.filter((e => e.type !== "error")).map((e => e.usage)).reduce(TokenUsageComputer.plus, TokenUsageComputer.zero())
378
+ };
379
+ })));
380
+ return this.result_ = {
381
+ experiments,
382
+ started_at,
383
+ completed_at: new Date,
384
+ usage: experiments.map((p => p.usage)).reduce(TokenUsageComputer.plus, TokenUsageComputer.zero())
385
+ };
386
+ }
387
+ report() {
388
+ if (this.result_ === null) throw new Error("Benchmark is not executed yet.");
389
+ return AgenticaSelectBenchmarkReporter.markdown(this.result_);
390
+ }
391
+ async step(scenario) {
392
+ const started_at = new Date;
393
+ try {
394
+ const usage = {
395
+ total: 0,
396
+ prompt: {
397
+ total: 0,
398
+ audio: 0,
399
+ cached: 0
400
+ },
401
+ completion: {
402
+ total: 0,
403
+ accepted_prediction: 0,
404
+ audio: 0,
405
+ reasoning: 0,
406
+ rejected_prediction: 0
407
+ }
408
+ };
409
+ const prompts = await ChatGptSelectFunctionAgent.execute({
410
+ ...this.agent_.getContext({
411
+ prompt: {
412
+ type: "text",
413
+ role: "user",
414
+ text: scenario.text
415
+ },
416
+ usage
417
+ }),
418
+ histories: this.histories_.slice(),
419
+ stack: [],
420
+ ready: () => true,
421
+ dispatch: async () => {}
422
+ });
423
+ const selected = prompts.filter((p => p.type === "select")).map((p => p.operations)).flat();
424
+ return {
425
+ type: AgenticaBenchmarkPredicator.success({
426
+ expected: scenario.expected,
427
+ operations: selected
428
+ }) ? "success" : "failure",
429
+ scenario,
430
+ selected,
431
+ usage,
432
+ assistantPrompts: prompts.filter((p => p.type === "text")).filter((p => p.role === "assistant")),
433
+ started_at,
434
+ completed_at: new Date
435
+ };
436
+ } catch (error) {
437
+ return {
438
+ type: "error",
439
+ scenario,
440
+ error,
441
+ started_at,
442
+ completed_at: new Date
443
+ };
444
+ }
445
+ }
446
+ }
447
+
448
+ export { AgenticaCallBenchmark, AgenticaSelectBenchmark };
449
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","sources":["../src/internal/AgenticaBenchmarkPredicator.ts","../src/utils/MathUtil.ts","../src/internal/AgenticaBenchmarkUtil.ts","../src/internal/AgenticaPromptReporter.ts","../src/internal/AgenticaCallBenchmarkReporter.ts","../src/utils/TokenUsageComputer.ts","../src/AgenticaCallBenchmark.ts","../src/internal/AgenticaSelectBenchmarkReporter.ts","../src/AgenticaSelectBenchmark.ts"],"sourcesContent":[null,null,null,null,null,null,null,null,null],"names":["AgenticaBenchmarkPredicator","isNext","async","agent","last","getPromptHistories","at","type","role","consent","functions","result","provider","api","chat","completions","create","model","messages","content","join","text","tools","function","name","description","parameters","tool_choice","parallel_tool_calls","options","toolCall","choices","message","tool_calls","filter","tc","undefined","input","JSON","parse","arguments","_io0","reply","success","props","successInner","call","expected","overrideOperations","operations","strict","take","targetIterator","items","Symbol","iterator","targeted","next","done","length","value","slice","target","operation","some","op","anyOf","callResult","allOf","map","every","r","reduce","acc","Math","max","MathUtil","round","floor","AgenticaBenchmarkUtil","errorToJson","error","Error","stack","expectedToJson","AgenticaPromptReporter","markdown","p","controller","reason","flat","executions","e","split","s","stringify","AgenticaCallBenchmarkReporter","Object","fromEntries","writeIndex","experiments","exp","scenario","writeExperimentIndex","events","event","i","writeExperimentEvent","average","completed_at","getTime","started_at","a","b","toLocaleString","usage","total","prompt","audio","cached","completion","accepted_prediction","reasoning","rejected_prediction","drawStatus","select","index","prompts","count","Array","fill","TokenUsageComputer","zero","plus","AgenticaCallBenchmark","constructor","this","agent_","scenarios_","scenarios","config_","repeat","config","simultaneous","result_","execute","listener","Date","semaphore","Semaphore","Promise","all","acquire","step","release","report","clone","out","getTokenUsage","conversate","AgenticaSelectBenchmarkReporter","selected","AgenticaSelectBenchmark","histories_","ChatGptSelectFunctionAgent","getContext","histories","ready","dispatch","assistantPrompts"],"mappings":";;;;;;AAOM,IAAWA;;CAAjB,SAAiBA;IACFA,4BAAAC,SAASC,MAAOC;QAC3B,MAAMC,OAAoCD,MAAME,qBAAqBC,IAAG;QACxE,IAAIF,MAAMG,SAAS,UAAUH,KAAKI,SAAS,aAAa,OAAO;QAE/D,MAAMC,UAAmC;;;;;;;;;;;;;;;;;;;;;;;UAGrCC,UAAU;QACd,MAAMC,eAAsCR,MAC1C,SACAS,SAASC,IAAIC,KAAKC,YAAYC,OAC9B;YACEC,OAAOd,MAAM,SAASS,SAASK;YAC/BC,UAAU,EACR;gBACEV,MAAM;gBACNW,SAAS,EACP,iCACA,IACA,uDACA,gEACA,qDACAC,KAAK;eAET;gBACEZ,MAAM;gBACNW,SAASf,KAAKiB;;YAGlBC,OAAO,EACL;gBACEf,MAAM;gBACNgB,UAAU;oBACRC,MAAMf,QAAQe;oBACdC,aAAahB,QAAQgB;oBACrBC,YAAYjB,QAAQiB;;;YAI1BC,aAAa;YACbC,qBAAqB;WAEvBzB,MAAM,SAASS,SAASiB;QAE1B,MAAMC,YACJnB,OAAOoB,QAAQ,IAAIC,QAAQC,cAAc,IACzCC,QACCC,MAAOA,GAAG5B,SAAS,cAAc4B,GAAGZ,SAASC,SAASf,QAAQe,SAC7D;QACJ,IAAIM,aAAaM,WAAW,OAAO;QACnC,MAAMC,QAAuBC,KAAKC,MAAMT,SAASP,SAASiB;QAC1D,OAAO;YAAS,MAAAC,OAAAJ,SAAA,oBAAAA,MAAAlB,WAAA,oBAAAkB,MAAAK;YAAA,OAAAL,SAAA,oBAAAA,SAAA,SAAAA,SAAAI,KAAAJ;AAAA,UAAT,GAASA,SAASA,MAAMK,QAAQ;AAAI;IAYhC1C,4BAAA2C,UAAWC,SAmBTC,aAAaD,OAAOjC;IAEnC,MAAMkC,eACJD;QASA,MAAME,OAAO,CACXC,UACAC,uBAEAH,aAAa;YACXE;YACAE,YAAYD,sBAAsBJ,MAAMK;YACxCC,QAAQN,MAAMM;;QAGlB,QAAQN,MAAMG,SAASxC;UACrB,KAAK;YAAS;gBACZ,IAAI4C,OAAO;gBACX,MAAMC,iBAAiBR,MAAMG,SAASM,MAAMC,OAAOC;gBACnD,IAAIC,WAAWJ,eAAeK;gBAE9B,OAAO,MAAM;oBACX,IAAID,SAASE,MAAM;wBACjB,OAAO;4BACL/C,QAAQ;4BACRwC;;;oBAGJ,IAAIA,QAAQP,MAAMK,WAAWU,QAAQ;wBACnC,OAAO;4BAAEhD,QAAQ;;;oBAGnB,MAAMA,SAASmC,KAAKU,SAASI,OAAOhB,MAAMK,WAAWY,MAAMV;oBAC3D,KAAKxC,OAAOA,QAAQ;wBAClB,KAAKiC,MAAMM,QAAQ;4BACjBC,QAAQ;4BACR;;wBAEF,OAAO;4BAAExC,QAAQ;;;oBAGnBwC,QAAQxC,OAAOwC;oBACfK,WAAWJ,eAAeK;;;;UAG9B,KAAK;YAAc;gBACjB,MAAMK,SAASlB,MAAMG,SAASgB;gBAC9B,MAAMpD,SAASiC,MAAMK,WAAWe,MAAMC,MAAOA,GAAGzC,SAASsC,OAAOtC;gBAChE,IAAIb,QAAQ;oBACV,OAAO;wBAAEA;wBAAQwC,MAAM;;;gBAEzB,OAAO;oBACLxC;;;;UAGJ,KAAK;YACH,KAAK,MAAMoC,YAAYH,MAAMG,SAASmB,OAAO;gBAC3C,MAAMC,aAAarB,KAAKC;gBACxB,IAAIoB,WAAWxD,QAAQ;oBACrB,OAAOwD;;;YAIX,OAAO;gBAAExD,QAAQ;;;UACnB,KAAK;YAAS;gBAQZ,MAAMA,SAASiC,MAAMG,SAASqB,MAAMC,KAAKtB,YAAaD,KAAKC;gBAC3D,IAAIpC,OAAO2D,OAAOC,KAAMA,EAAE5D,UAAS;oBACjC,OAAO;wBACLA,QAAQ;wBACRwC,MAAMxC,OAAO6D,QAAO,CAACC,KAAKF,MAAMG,KAAKC,IAAIF,KAAKF,EAAEpB,QAAO;;;gBAI3D,OAAO;oBACLxC,QAAQ;;;;;AAKjB,EAhLD,CAAiBX,gCAAAA,8BAgLhB,CAAA;;ACvLK,IAAW4E;;CAAjB,SAAiBA;IACFA,SAAAC,QAASjB,SAA0Bc,KAAKI,MAAMlB,QAAQ,OAAO;AAC3E,EAFD,CAAiBgB,aAAAA,WAEhB,CAAA;;ACAK,IAAWG;;CAAjB,SAAiBA;IACFA,sBAAAC,cAAeC;QAC1B,IAAIA,iBAAiBC,OACnB,OAAO;eACFD;YACHzD,MAAMyD,MAAMzD;YACZQ,SAASiD,MAAMjD;YACfmD,OAAOF,MAAME;;QAEjB,OAAOF;AAAK;IAGDF,sBAAAK,iBAAkBrC;QAC7B,IAAIA,SAASxC,SAAS,cACpB,OAAO;YACLA,MAAMwC,SAASxC;YACfwD,WAAW;gBACTvC,MAAMuB,SAASgB,UAAUvC;gBACzBC,aAAasB,SAASgB,UAAUxC,SAASE;;gBAG1C,IAAIsB,SAASxC,SAAS,SACzB,OAAO;YACLA,MAAMwC,SAASxC;YACf8C,OAAON,SAASM,MAAMgB,IAAIU,sBAAAK;gBAEzB,IAAIrC,SAASxC,SAAS,SACzB,OAAO;YACLA,MAAMwC,SAASxC;YACf6D,OAAOrB,SAASqB,MAAMC,IAAIU,sBAAAK;gBAG5B,OAAO;YACL7E,MAAMwC,SAASxC;YACf2D,OAAOnB,SAASmB,MAAMG,IAAIU,sBAAAK;;AAC3B;AAEN,EArCD,CAAiBL,0BAAAA,wBAqChB,CAAA;;ACrCK,IAAWM;;CAAjB,SAAiBA;IACFA,uBAAAC,WAAYC;QACvB,IAAIA,EAAEhF,SAAS,QACb,OAAO,EAAC,aAAagF,EAAE/E,SAAS+E,EAAElE,MAAM,KAAID,KAAK,YAC9C,IAAImE,EAAEhF,SAAS,YAAYgF,EAAEhF,SAAS,UACzC,OAAO,EACL,OAAOgF,EAAEhF,SAAS,WAAW,WAAW,eACrCgF,EAAEtC,WACFoB,KAAKJ,MAAO,EACX,QAAQA,GAAGzC,QACX,mBAAmByC,GAAGuB,WAAWhE,QACjC,iBAAiByC,GAAG1C,SAASC,QAC7B,eAAeyC,GAAGwB,UAClB,SACMxB,GAAG1C,SAASE,aAAakC,SAC3B,EAACM,GAAG1C,SAASE,aAAa,OAC1B,OAELiE,SACHtE,KAAK,YACJ,IAAImE,EAAEhF,SAAS,YAClB,OAAO,EACL,mBACGgF,EAAEI,WAAWtB,KAAKuB,KAAM,OAAOA,EAAEpE,UACpC,OACG+D,EAAElE,KAAKwE,MAAM,MAAMxB,KAAKyB,KAAM,KAAKA,OACtC,KACA1E,KAAK;QACT,OAAO,EACL,eACA,aAAamE,EAAE/D,QACf,mBAAmB+D,EAAEC,WAAWhE,QAChC,iBAAiB+D,EAAEhE,SAASC,QAC5B,IACA,WACAc,KAAKyD,UAAUR,EAAE/C,WAAW,MAAM,IAClC,OACA,KACApB,KAAK;AAAK;AAEf,EAxCD,CAAiBiE,2BAAAA,yBAwChB,CAAA;;ACpCK,IAAWW;;CAAjB,SAAiBA;IACFA,8BAAQV,WACnB3E,UAEAsF,OAAOC,YAAY,EACjB,EAAC,eAAeC,WAAWxF,cACxBA,OAAOyF,YACP/B,KAAKgC,OAAQ,EACZ,EAAC,KAAKA,IAAIC,SAAS9E,kBAAkB+E,qBAAqBF,WACvDA,IAAIG,OAAOnC,KAAI,CAACoC,OAAOC,MAAM,EAC9B,KAAKL,IAAIC,SAAS9E,QAAQkF,IAAI,KAAKD,MAAMlG,WACzCoG,qBAAqBF,OAAOC,WAG/BhB;IAGP,MAAMS,aAAcxF;QAClB,MAAM6F,SAAwC7F,OAAOyF,YAClD/B,KAAKE,KAAMA,EAAEiC,SACbd;QACH,MAAMkB,UACJJ,OACGnC,KAAKuB,KAAMA,EAAEiB,aAAaC,YAAYlB,EAAEmB,WAAWD,YACnDtC,QAAO,CAACwC,GAAGC,MAAMD,IAAIC,IAAG,KAAKT,OAAO7C;QACzC,OAAO,EACL,iCACA,cACA,oBACA,qBAAqBhD,OAAOyF,YAAYzC,OAAOuD,oBAC/C,gBAAgBV,OAAO7C,UACvB,kBAAkB6C,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UAC7D,kBAAkB6C,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UAC7D,uBAAuBiB,SAASC,MAAM+B,SAASM,uBAC/C,mBACA,gBAAgBvG,OAAOwG,MAAMC,MAAMF,oBACnC,gBACA,kBAAkBvG,OAAOwG,MAAME,OAAOD,MAAMF,oBAC5C,kBAAkBvG,OAAOwG,MAAME,OAAOC,MAAMJ,oBAC5C,mBAAmBvG,OAAOwG,MAAME,OAAOE,OAAOL,oBAC9C,qBACA,kBAAkBvG,OAAOwG,MAAMK,WAAWJ,MAAMF,oBAChD,gCAAgCvG,OAAOwG,MAAMK,WAAWC,oBAAoBP,oBAC5E,kBAAkBvG,OAAOwG,MAAMK,WAAWF,MAAMJ,oBAChD,sBAAsBvG,OAAOwG,MAAMK,WAAWE,UAAUR,oBACxD,gCAAgCvG,OAAOwG,MAAMK,WAAWG,oBAAoBT,oBAC5E,IACA,kBACA,qCACA,yCACGvG,OAAOyF,YAAY/B,KAAKgC,OACzB,EACE,IAAIA,IAAIC,SAAS9E,WAAW6E,IAAIC,SAAS9E,mBACzCoG,WACEvB,IAAIG,SACHZ,KAAMA,EAAErF,SAAS,WAAWqF,EAAEiC,WAAW,QAE5CD,WAAWvB,IAAIG,SAASZ,KAAMA,EAAErF,SAAS,WAAWqF,EAAE9C,SAAS,QAC/D,GAAG8B,SAASC,MACVwB,IAAIG,OACDnC,KAAKuB,KAAMA,EAAEiB,aAAaC,YAAYlB,EAAEmB,WAAWD,YACnDtC,QAAO,CAACwC,GAAGC,MAAMD,IAAIC,IAAG,KAAKZ,IAAIG,OAAO7C,QAC3CuD,wBACF9F,KAAK,WAETA,KAAK;AAAK;IAGd,MAAMmF,uBACJF,OAEO,EACL,KAAKA,IAAIC,SAAS9E,QAClB,cACA,mBAAmB6E,IAAIG,OAAO7C,OAAOuD,oBACrC,gBAAgBb,IAAIG,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UAC/D,gBAAgB0C,IAAIG,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UAC/D,qBAAqBiB,SAASC,MAC5BwB,IAAIG,OACDnC,KAAKuB,KAAMA,EAAEiB,aAAaC,YAAYlB,EAAEmB,WAAWD,YACnDtC,QAAO,CAACwC,GAAGC,MAAMD,IAAIC,IAAG,KAAKZ,IAAIG,OAAO7C,QAC3CuD,uBACF,IACA,aACA,uBACA,0BACGb,IAAIG,OAAOnC,KAAI,CAACuB,GAAGc,MACpB,EACE,IAAIA,IAAI,SAASA,IAAI,KAAKd,EAAErF,YAC5BqF,EAAErF,MACF,GAAGqE,SAASC,MAAMe,EAAEiB,aAAaC,YAAYlB,EAAEmB,WAAWD,kBAC1D1F,KAAK,UAET,IACA,eACA,mBACAiF,IAAIC,SAASjF,MACb,IACA,gBACA,WACAiB,KAAKyD,UACHhB,sBAAsBK,eAAeiB,IAAIC,SAASvD,WAClD,MACA,IAEF,QACA3B,KAAK;IAGT,MAAMuF,uBAAuB,CAC3BF,OACAqB,UAEO,EACL,KAAKA,UAAUrB,MAAMlG,QACrB,cACA,aAAakG,MAAMH,SAAS9E,QAC5B,aAAaiF,MAAMlG,QACnB,aAAaqE,SAASC,MACpB4B,MAAMI,aAAaC,YAAYL,MAAMM,WAAWD,WAChDI,0BACET,MAAMlG,SAAS,UACf,EACE,eAAekG,MAAMoB,SAAS,MAAM,OACpC,aAAapB,MAAM3D,OAAO,MAAM,UAElC,IACJ,oBAAoB2D,MAAMU,MAAMD,oBAChC,IACA,eACA,mBACAT,MAAMH,SAASjF,MACf,IACA,gBACA,WACAiB,KAAKyD,UACHhB,sBAAsBK,eAAeqB,MAAMH,SAASvD,WACpD,MACA,IAEF,OACA,IACA,0BACG0D,MAAMsB,QAAQ1D,IAAIgB,uBAAuBC,WAC5C,OACImB,MAAMlG,SAAS,UACf,EACE,YACA,WACA+B,KAAKyD,UACHhB,sBAAsBC,YAAYyB,MAAMxB,QACxC,MACA,IAEF,UAEF,KACJ7D,KAAK;IAGT,MAAMwG,aAAa,CACjBpB,QACA7D;QAEA,MAAMqF,QAAgBxB,OAAOtE,OAAOS,SAASgB;QAC7C,OACE,IAAIsE,MAAMD,OAAOE,KAAK,KAAK9G,KAAK,MAChC,IAAI6G,MAAM,KAAKD,OAAOE,KAAK,KAAK9G,KAAK;AAAG;AAG7C,EA1KD,CAAiB4E,kCAAAA,gCA0KhB,CAAA;;AC9KK,IAAWmC;;CAAjB,SAAiBA;IACFA,mBAAAC,OAAO,OAA4B;QAC9ChB,OAAO;QACPC,QAAQ;YACND,OAAO;YACPE,OAAO;YACPC,QAAQ;;QAEVC,YAAY;YACVJ,OAAO;YACPK,qBAAqB;YACrBH,OAAO;YACPI,WAAW;YACXC,qBAAqB;;;IAIZQ,mBAAIE,OAAG,CAClBrB,GACAC,OACyB;QACzBG,OAAOJ,EAAEI,QAAQH,EAAEG;QACnBC,QAAQ;YACND,OAAOJ,EAAEK,OAAOD,QAAQH,EAAEI,OAAOD;YACjCE,OAAON,EAAEK,OAAOC,QAAQL,EAAEI,OAAOC;YACjCC,QAAQP,EAAEK,OAAOE,SAASN,EAAEI,OAAOE;;QAErCC,YAAY;YACVJ,OAAOJ,EAAEQ,WAAWJ,QAAQH,EAAEO,WAAWJ;YACzCK,qBACET,EAAEQ,WAAWC,sBAAsBR,EAAEO,WAAWC;YAClDH,OAAON,EAAEQ,WAAWF,QAAQL,EAAEO,WAAWF;YACzCI,WAAWV,EAAEQ,WAAWE,YAAYT,EAAEO,WAAWE;YACjDC,qBACEX,EAAEQ,WAAWG,sBAAsBV,EAAEO,WAAWG;;;AAGvD,EArCD,CAAiBQ,uBAAAA,qBAqChB,CAAA;;MCRYG;IAWX,WAAAC,CAAmB3F;QACjB4F,KAAKC,SAAS7F,MAAMzC;QACpBqI,KAAKE,aAAa9F,MAAM+F,UAAU9E;QAClC2E,KAAKI,UAAU;YACbC,QAAQjG,MAAMkG,QAAQD,UAAU;YAChCE,cAAcnG,MAAMkG,QAAQC,gBAAgB;YAC5CtI,SAASmC,MAAMkG,QAAQrI,WAAW;;QAEpC+H,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMnC,aAAmB,IAAIoC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAM3C,oBACEkD,QAAQC,IACZf,KAAKE,WAAWrE,KAAInE,MAAOoG;YACzB,MAAME,eAA8C8C,QAAQC,IAC1D,IAAItB,MAAMO,KAAKI,QAAQC,QAAQX,KAAK,GAAG7D,KAAInE;sBACnCkJ,UAAUI;gBAChB,MAAM5D,UAAuC4C,KAAKiB,KAAKnD;sBACjD8C,UAAUM;gBAChB,IAAIR,aAAa9G,WAAW8G,SAAStD;gBACrC,OAAOA;AAAC;YAGZ,OAAO;gBACLU;gBACAE;gBACAW,OAAOX,OACJtE,QAAQ0D,KAAMA,EAAErF,SAAS,UACzB8D,KAAKuB,KAAMA,EAAEuB,QACb3C,OAAO2D,mBAAmBE,MAAMF,mBAAmBC;;AACvD;QAGP,OAAQI,KAAKQ,UAAU;YACrB5C;YACAW;YACAF,cAAc,IAAIsC;YAClBhC,OAAOf,YACJ/B,KAAKkB,KAAMA,EAAE4B,QACb3C,OAAO2D,mBAAmBE,MAAMF,mBAAmBC;;;IAsBnD,MAAAuB;QACL,IAAInB,KAAKQ,YAAY,MACnB,MAAM,IAAI9D,MAAM;QAClB,OAAOc,8BAA8BV,SAASkD,KAAKQ;;IAG7C,UAAMS,CACZnD;QAEA,MAAMnG,QAAkBqI,KAAKC,OAAOmB;QACpC,MAAM7C,aAAmB,IAAIoC;QAC7B,MAAMxG,UAAU,MACd3C,4BAA4B2C,QAAQ;YAClCI,UAAUuD,SAASvD;YACnBE,YAAY9C,MACTE,qBACA6B,QAAQqD,KAAMA,EAAEhF,SAAS;YAC5B2C,QAAQ;;QAEZ,MAAM2G,MAAM;YACV,MAAMhC,SAAS7H,4BAA4B2C,QAAQ;gBACjDI,UAAUuD,SAASvD;gBACnBE,YAAY9C,MACTE,qBACA6B,QAAQqD,KAAMA,EAAEhF,SAAS,WACzB8D,KAAKkB,KAAMA,EAAEtC,aACbyC;gBACHxC,QAAQ;;YAEV,MAAMJ,OAAOH;YACb,OAAO;gBACLpC,MAAOuC,OAAO,YAAY;gBAC1BwD;gBACAuB;gBACA/E;gBACAiF,SAAS5H,MAAME;gBACf8G,OAAOhH,MAAM2J;gBACb/C;gBACAF,cAAc,IAAIsC;;AAC4B;QAGlD;kBACQhJ,MAAM4J,WAAWzD,SAASjF;YAChC,IAAIsB,WAAW,OAAOkH;YACtB,KAAK,IAAInD,IAAY,GAAGA,IAAI8B,KAAKI,QAAQnI,WAAWiG,GAAG;gBACrD,MAAMjD,aACEzD,4BAA4BC,OAAOE;gBAC3C,IAAIsD,SAAS,MAAM;sBAEbtD,MAAM4J,WAAWtG;gBACvB,IAAId,WAAW,OAAOkH;;YAExB,OAAOA;UACP,OAAO5E;YACP,OAAO;gBACL1E,MAAM;gBACN+F;gBACAyB,SAAS5H,MAAME;gBACf8G,OAAOhH,MAAM2J;gBACb7E;gBACA8B;gBACAF,cAAc,IAAIsC;;;;;;AClLpB,IAAWa;;CAAjB,SAAiBA;IACFA,gCAAQ1E,WACnB3E,UAEAsF,OAAOC,YAAY,EACjB,EAAC,eAAeC,WAAWxF,cACxBA,OAAOyF,YACP/B,KAAKgC,OAAQ,EACZ,EAAC,KAAKA,IAAIC,SAAS9E,kBAAkB+E,qBAAqBF,WACvDA,IAAIG,OAAOnC,KAAI,CAACoC,OAAOC,MAAM,EAC9B,KAAKL,IAAIC,SAAS9E,QAAQkF,IAAI,KAAKD,MAAMlG,WACzCoG,qBAAqBF,OAAOC,WAG/BhB;IAGP,MAAMS,aAAcxF;QAClB,MAAM6F,SAA0C7F,OAAOyF,YACpD/B,KAAKE,KAAMA,EAAEiC,SACbd;QACH,MAAMkB,UACJJ,OACGnC,KAAKuB,KAAMA,EAAEiB,aAAaC,YAAYlB,EAAEmB,WAAWD,YACnDtC,QAAO,CAACwC,GAAGC,MAAMD,IAAIC,IAAG,KAAKT,OAAO7C;QACzC,OAAO,EACL,sCACA,cACA,oBACA,qBAAqBhD,OAAOyF,YAAYzC,OAAOuD,oBAC/C,gBAAgBV,OAAO7C,UACvB,kBAAkB6C,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UAC7D,kBAAkB6C,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UAC7D,uBAAuBiB,SAASC,MAAM+B,SAASM,uBAC/C,mBACA,gBAAgBvG,OAAOwG,MAAMC,MAAMF,oBACnC,gBACA,kBAAkBvG,OAAOwG,MAAME,OAAOD,MAAMF,oBAC5C,kBAAkBvG,OAAOwG,MAAME,OAAOC,MAAMJ,oBAC5C,mBAAmBvG,OAAOwG,MAAME,OAAOE,OAAOL,oBAC9C,qBACA,kBAAkBvG,OAAOwG,MAAMK,WAAWJ,MAAMF,oBAChD,gCAAgCvG,OAAOwG,MAAMK,WAAWC,oBAAoBP,oBAC5E,kBAAkBvG,OAAOwG,MAAMK,WAAWF,MAAMJ,oBAChD,sBAAsBvG,OAAOwG,MAAMK,WAAWE,UAAUR,oBACxD,gCAAgCvG,OAAOwG,MAAMK,WAAWG,oBAAoBT,oBAC5E,IACA,kBACA,+BACA,kCACGvG,OAAOyF,YAAY/B,KAAKgC,OACzB,EACE,IAAIA,IAAIC,SAAS9E,WAAW6E,IAAIC,SAAS9E,mBACzC;YACE,MAAMmB,UAAkB+B,KAAKI,MAC1BuB,IAAIG,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,SAC9C0C,IAAIG,OAAO7C,SACX;YAEJ,OACE,IAAIsE,MAAMtF,SAASuF,KAAK,KAAK9G,KAAK,MAClC,IAAI6G,MAAM,KAAKtF,SAASuF,KAAK,KAAK9G,KAAK;AAE1C,UAVD,IAWAwD,SAASC,MACPwB,IAAIG,OACDnC,KACEoC,SACCA,MAAMI,aAAaC,YAAYL,MAAMM,WAAWD,YAEnDtC,QAAO,CAACwC,GAAGC,MAAMD,IAAIC,IAAG,KAAKZ,IAAIG,OAAO7C,QAC3CuD,mBAAmB,QACrB9F,KAAK,WAETA,KAAK;AAAK;IAGd,MAAMmF,uBACJF,OAEO,EACL,KAAKA,IAAIC,SAAS9E,QAClB,cACA,oBACA,gBAAgB6E,IAAIG,OAAO7C,UAC3B,kBAAkB0C,IAAIG,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UACjE,kBAAkB0C,IAAIG,OAAOtE,QAAQ0D,KAAMA,EAAErF,SAAS,YAAWoD,UACjE,uBAAuBiB,SAASC,MAC9BwB,IAAIG,OACDnC,KACEoC,SACCA,MAAMI,aAAaC,YAAYL,MAAMM,WAAWD,YAEnDtC,QAAO,CAACwC,GAAGC,MAAMD,IAAIC,IAAG,KAAKZ,IAAIG,OAAO7C,QAC3CuD,uBACF,mBACA,gBAAgBb,IAAIc,MAAMC,MAAMF,oBAChC,gBACA,kBAAkBb,IAAIc,MAAME,OAAOD,MAAMF,oBACzC,kBAAkBb,IAAIc,MAAME,OAAOC,MAAMJ,oBACzC,mBAAmBb,IAAIc,MAAME,OAAOE,OAAOL,oBAC3C,qBACA,kBAAkBb,IAAIc,MAAMK,WAAWJ,MAAMF,oBAC7C,gCAAgCb,IAAIc,MAAMK,WAAWC,oBAAoBP,oBACzE,kBAAkBb,IAAIc,MAAMK,WAAWF,MAAMJ,oBAC7C,sBAAsBb,IAAIc,MAAMK,WAAWE,UAAUR,oBACrD,gCAAgCb,IAAIc,MAAMK,WAAWG,oBAAoBT,oBACzE,IACA,aACA,qBACA,wBACGb,IAAIG,OAAOnC,KAAI,CAACuB,GAAGc,MACpB,EACE,IAAIA,IAAI,SAASA,IAAI,KAAKd,EAAErF,YAC5BqF,EAAErF,MACFqE,SAASC,MAAMe,EAAEiB,aAAaC,YAAYlB,EAAEmB,WAAWD,aACrD,QACF1F,KAAK,UAET,IACA,eACA,mBACAiF,IAAIC,SAASjF,MACb,IACA,gBACA,WACAiB,KAAKyD,UACHhB,sBAAsBK,eAAeiB,IAAIC,SAASvD,WAClD,MACA,IAEF,QACA3B,KAAK;IAGT,MAAMuF,uBAAuB,CAC3BF,OACAqB,UAEO,EACL,KAAKA,UAAUrB,MAAMlG,QACrB,cACA,aAAakG,MAAMH,SAAS9E,QAC5B,aAAaiF,MAAMlG,QACnB,cAAckG,MAAMI,aAAaC,YAAYL,MAAMM,WAAWD,WAAWI,0BACrET,MAAMlG,SAAS,UACf,EACE,mBACA,gBAAgBkG,MAAMU,MAAMC,MAAMF,oBAClC,gBACA,kBAAkBT,MAAMU,MAAME,OAAOD,MAAMF,oBAC3C,kBAAkBT,MAAMU,MAAME,OAAOC,MAAMJ,oBAC3C,mBAAmBT,MAAMU,MAAME,OAAOE,OAAOL,oBAC7C,qBACA,kBAAkBT,MAAMU,MAAMK,WAAWJ,MAAMF,oBAC/C,gCAAgCT,MAAMU,MAAMK,WAAWC,oBAAoBP,oBAC3E,kBAAkBT,MAAMU,MAAMK,WAAWF,MAAMJ,oBAC/C,sBAAsBT,MAAMU,MAAMK,WAAWE,UAAUR,oBACvD,gCAAgCT,MAAMU,MAAMK,WAAWG,oBAAoBT,uBAE7E,IACJ,IACA,eACA,mBACAT,MAAMH,SAASjF,MACf,IACA,gBACA,WACAiB,KAAKyD,UACHhB,sBAAsBK,eAAeqB,MAAMH,SAASvD,WACpD,MACA,IAEF,OACA,OACI0D,MAAMlG,SAAS,aAAakG,MAAMlG,SAAS,YAC3C,EACE,gBACGkG,MAAMwD,SAAS5F,KAAKyB,KACrB,EACE,OAAOA,EAAEtE,QACT,qBAAqBsE,EAAEN,WAAWhE,UAClC,mBAAmBsE,EAAEvE,SAASC,UAC9B,eAAesE,EAAEL,UACjB,OACIK,EAAEvE,SAASE,cAAc,EAACqE,EAAEvE,SAASE,aAAa,OAAM,KAC5DL,KAAK,YAGX,OACAqF,MAAMlG,SAAS,UACf,EACE,YACA,WACAwE,sBAAsBC,YACpB1C,KAAKyD,UAAUU,MAAMxB,OAAO,MAAM,KAEpC,OACA,OAEF,KACJ7D,KAAK;AAEV,EA3MD,CAAiB4I,oCAAAA,kCA2MhB,CAAA;;MChLYE;IAYX,WAAA3B,CAAmB3F;QACjB4F,KAAKC,SAAS7F,MAAMzC;QACpBqI,KAAKE,aAAa9F,MAAM+F,UAAU9E;QAClC2E,KAAKI,UAAU;YACbC,QAAQjG,MAAMkG,QAAQD,UAAU;YAChCE,cAAcnG,MAAMkG,QAAQC,gBAAgB;;QAE9CP,KAAK2B,aAAavH,MAAMzC,MAAME,qBAAqBwD;QACnD2E,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMnC,aAAmB,IAAIoC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAM3C,oBACEkD,QAAQC,IACZf,KAAKE,WAAWrE,KAAInE,MAAOoG;YACzB,MAAME,eAAgD8C,QAAQC,IAC5D,IAAItB,MAAMO,KAAKI,QAAQC,QAAQX,KAAK,GAAG7D,KAAInE;sBACnCkJ,UAAUI;gBAChB,MAAM5D,UACE4C,KAAKiB,KAAKnD;sBACZ8C,UAAUM;gBAChB,IAAIR,aAAa9G,WAAW8G,SAAStD;gBACrC,OAAOA;AAAC;YAGZ,OAAO;gBACLU;gBACAE;gBACAW,OAAOX,OACJtE,QAAQ0D,KAAMA,EAAErF,SAAS,UACzB8D,KAAKuB,KAAMA,EAAEuB,QACb3C,OAAO2D,mBAAmBE,MAAMF,mBAAmBC;;AACvD;QAGP,OAAQI,KAAKQ,UAAU;YACrB5C;YACAW;YACAF,cAAc,IAAIsC;YAClBhC,OAAOf,YACJ/B,KAAKkB,KAAMA,EAAE4B,QACb3C,OAAO2D,mBAAmBE,MAAMF,mBAAmBC;;;IAuBnD,MAAAuB;QACL,IAAInB,KAAKQ,YAAY,MACnB,MAAM,IAAI9D,MAAM;QAClB,OAAO8E,gCAAgC1E,SAASkD,KAAKQ;;IAG/C,UAAMS,CACZnD;QAEA,MAAMS,aAAmB,IAAIoC;QAC7B;YACE,MAAMhC,QAA6B;gBACjCC,OAAO;gBACPC,QAAQ;oBACND,OAAO;oBACPE,OAAO;oBACPC,QAAQ;;gBAEVC,YAAY;oBACVJ,OAAO;oBACPK,qBAAqB;oBACrBH,OAAO;oBACPI,WAAW;oBACXC,qBAAqB;;;YAGzB,MAAMI,gBACEqC,2BAA2BnB,QAAQ;mBACpCT,KAAKC,OAAO4B,WAAW;oBACxBhD,QAAQ;wBACN9G,MAAM;wBACNC,MAAM;wBACNa,MAAMiF,SAASjF;;oBAEjB8F;;gBAEFmD,WAAW9B,KAAK2B,WAAWtG;gBAC3BsB,OAAO;gBACPoF,OAAO,MAAM;gBACbC,UAAUtK;;YAEd,MAAM+J,WAA0ClC,QAC7C7F,QAAQqD,KAAMA,EAAEhF,SAAS,WACzB8D,KAAKkB,KAAMA,EAAEtC,aACbyC;YACH,OAAO;gBACLnF,MAAMP,4BAA4B2C,QAAQ;oBACxCI,UAAUuD,SAASvD;oBACnBE,YAAYgH;qBAEV,YACA;gBACJ3D;gBACA2D;gBACA9C;gBACAsD,kBAAkB1C,QACf7F,QAAQqD,KAAMA,EAAEhF,SAAS,SACzB2B,QACEqD,KACCA,EAAE/E,SAAS;gBAEjBuG;gBACAF,cAAc,IAAIsC;;UAIpB,OAAOlE;YACP,OAAO;gBACL1E,MAAM;gBACN+F;gBACArB;gBACA8B;gBACAF,cAAc,IAAIsC;;;;;;"}
@@ -0,0 +1,32 @@
1
+ import { Agentica, IAgenticaOperation, IAgenticaPrompt } from "@agentica/core";
2
+ import { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
3
+ export declare namespace AgenticaBenchmarkPredicator {
4
+ const isNext: (agent: Agentica) => Promise<string | null>;
5
+ /**
6
+ * Check if the called operations match the expected operations.
7
+ *
8
+ * @param props Properties for checking the match of the called operations
9
+ * and the expected operations
10
+ *
11
+ * @returns `true` if the called operations match the expected operations,
12
+ * otherwise `false`.
13
+ */
14
+ const success: (props: {
15
+ /**
16
+ * Expected operations to be called.
17
+ *
18
+ * For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
19
+ */
20
+ expected: IAgenticaBenchmarkExpected;
21
+ /**
22
+ * Specified operations.
23
+ */
24
+ operations: Array<IAgenticaOperation | IAgenticaPrompt.IExecute>;
25
+ /**
26
+ * If it's `false`, check the array and let it go even if there's something wrong between them.
27
+ *
28
+ * @default `false`
29
+ */
30
+ strict?: boolean;
31
+ }) => boolean;
32
+ }