@agentica/benchmark 0.7.0-dev.20250224-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/LICENSE +21 -0
  2. package/lib/AgenticaCallBenchmark.d.ts +137 -0
  3. package/lib/AgenticaCallBenchmark.js +187 -0
  4. package/lib/AgenticaCallBenchmark.js.map +1 -0
  5. package/lib/AgenticaSelectBenchmark.d.ts +123 -0
  6. package/lib/AgenticaSelectBenchmark.js +185 -0
  7. package/lib/AgenticaSelectBenchmark.js.map +1 -0
  8. package/lib/index.d.ts +2 -0
  9. package/lib/index.js +19 -0
  10. package/lib/index.js.map +1 -0
  11. package/lib/index.mjs +449 -0
  12. package/lib/index.mjs.map +1 -0
  13. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +32 -0
  14. package/lib/internal/AgenticaBenchmarkPredicator.js +179 -0
  15. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -0
  16. package/lib/internal/AgenticaBenchmarkUtil.d.ts +5 -0
  17. package/lib/internal/AgenticaBenchmarkUtil.js +37 -0
  18. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -0
  19. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +4 -0
  20. package/lib/internal/AgenticaCallBenchmarkReporter.js +136 -0
  21. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -0
  22. package/lib/internal/AgenticaPromptReporter.d.ts +4 -0
  23. package/lib/internal/AgenticaPromptReporter.js +49 -0
  24. package/lib/internal/AgenticaPromptReporter.js.map +1 -0
  25. package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +1 -0
  26. package/lib/internal/AgenticaSelectBenchmarkReporter.js +172 -0
  27. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -0
  28. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +44 -0
  29. package/lib/structures/IAgenticaBenchmarkExpected.js +3 -0
  30. package/lib/structures/IAgenticaBenchmarkExpected.js.map +1 -0
  31. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +95 -0
  32. package/lib/structures/IAgenticaCallBenchmarkEvent.js +3 -0
  33. package/lib/structures/IAgenticaCallBenchmarkEvent.js.map +1 -0
  34. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +62 -0
  35. package/lib/structures/IAgenticaCallBenchmarkResult.js +3 -0
  36. package/lib/structures/IAgenticaCallBenchmarkResult.js.map +1 -0
  37. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +36 -0
  38. package/lib/structures/IAgenticaCallBenchmarkScenario.js +3 -0
  39. package/lib/structures/IAgenticaCallBenchmarkScenario.js.map +1 -0
  40. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +92 -0
  41. package/lib/structures/IAgenticaSelectBenchmarkEvent.js +3 -0
  42. package/lib/structures/IAgenticaSelectBenchmarkEvent.js.map +1 -0
  43. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +62 -0
  44. package/lib/structures/IAgenticaSelectBenchmarkResult.js +3 -0
  45. package/lib/structures/IAgenticaSelectBenchmarkResult.js.map +1 -0
  46. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +36 -0
  47. package/lib/structures/IAgenticaSelectBenchmarkScenario.js +3 -0
  48. package/lib/structures/IAgenticaSelectBenchmarkScenario.js.map +1 -0
  49. package/lib/utils/MathUtil.d.ts +3 -0
  50. package/lib/utils/MathUtil.js +8 -0
  51. package/lib/utils/MathUtil.js.map +1 -0
  52. package/lib/utils/TokenUsageComputer.d.ts +5 -0
  53. package/lib/utils/TokenUsageComputer.js +37 -0
  54. package/lib/utils/TokenUsageComputer.js.map +1 -0
  55. package/package.json +57 -0
  56. package/src/AgenticaCallBenchmark.ts +259 -0
  57. package/src/AgenticaSelectBenchmark.ts +262 -0
  58. package/src/index.ts +3 -0
  59. package/src/internal/AgenticaBenchmarkPredicator.ts +216 -0
  60. package/src/internal/AgenticaBenchmarkUtil.ts +40 -0
  61. package/src/internal/AgenticaCallBenchmarkReporter.ts +177 -0
  62. package/src/internal/AgenticaPromptReporter.ts +43 -0
  63. package/src/internal/AgenticaSelectBenchmarkReporter.ts +212 -0
  64. package/src/structures/IAgenticaBenchmarkExpected.ts +58 -0
  65. package/src/structures/IAgenticaCallBenchmarkEvent.ts +109 -0
  66. package/src/structures/IAgenticaCallBenchmarkResult.ts +69 -0
  67. package/src/structures/IAgenticaCallBenchmarkScenario.ts +39 -0
  68. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +110 -0
  69. package/src/structures/IAgenticaSelectBenchmarkResult.ts +69 -0
  70. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +39 -0
  71. package/src/utils/MathUtil.ts +3 -0
  72. package/src/utils/TokenUsageComputer.ts +40 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Wrtn Technologies
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,137 @@
1
+ import { Agentica } from "@agentica/core";
2
+ import { tags } from "typia";
3
+ import { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
4
+ import { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
5
+ import { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
6
+ /**
7
+ * LLM function calling selection benchmark.
8
+ *
9
+ * `AgenticaCallBenchmark` is a class for the benchmark of the
10
+ * LLM (Large Model Language) function calling part. It utilizes both
11
+ * `selector` and `caller` agents and tests whether the expected
12
+ * {@link IAgenticaOperation operations} are properly selected and
13
+ * called from the given
14
+ * {@link IAgenticaCallBenchmarkScenario scenarios}.
15
+ *
16
+ * Note that, this `AgenticaCallBenchmark` consumes a lot of time and
17
+ * LLM token costs because it needs the whole process of the
18
+ * {@link Agentica} class with a lot of repetitions. If you don't want
19
+ * such a heavy benchmark, consider to using
20
+ * {@link AgenticaSelectBenchmark} instead. In my experience,
21
+ * {@link Agentica} does not fail to function calling, so the function
22
+ * selection benchmark is much economical.
23
+ *
24
+ * @author Samchon
25
+ */
26
+ export declare class AgenticaCallBenchmark {
27
+ private agent_;
28
+ private scenarios_;
29
+ private config_;
30
+ private result_;
31
+ /**
32
+ * Initializer Constructor.
33
+ *
34
+ * @param props Properties of the selection benchmark
35
+ */
36
+ constructor(props: AgenticaCallBenchmark.IProps);
37
+ /**
38
+ * Execute the benchmark.
39
+ *
40
+ * Execute the benchmark of the LLM function calling, and returns
41
+ * the result of the benchmark.
42
+ *
43
+ * If you wanna see progress of the benchmark, you can pass a callback
44
+ * function as the argument of the `listener`. The callback function
45
+ * would be called whenever a benchmark event is occurred.
46
+ *
47
+ * Also, you can publish a markdown format report by calling
48
+ * the {@link report} function after the benchmark execution.
49
+ *
50
+ * @param listener Callback function listening the benchmark events
51
+ * @returns Results of the function calling benchmark
52
+ */
53
+ execute(listener?: (event: IAgenticaCallBenchmarkEvent) => void): Promise<IAgenticaCallBenchmarkResult>;
54
+ /**
55
+ * Report the benchmark result as markdown files.
56
+ *
57
+ * Report the benchmark result {@link execute}d by
58
+ * `AgenticaCallBenchmark` as markdown files, and returns a dictionary
59
+ * object of the markdown reporting files. The key of the dictionary
60
+ * would be file name, and the value would be the markdown content.
61
+ *
62
+ * For reference, the markdown files are composed like below:
63
+ *
64
+ * - `./README.md`
65
+ * - `./scenario-1/README.md`
66
+ * - `./scenario-1/1.success.md`
67
+ * - `./scenario-1/2.failure.md`
68
+ * - `./scenario-1/3.error.md`
69
+ *
70
+ * @returns Dictionary of markdown files.
71
+ */
72
+ report(): Record<string, string>;
73
+ private step;
74
+ }
75
+ export declare namespace AgenticaCallBenchmark {
76
+ /**
77
+ * Properties of the {@link AgenticaCallBenchmark} constructor.
78
+ */
79
+ interface IProps {
80
+ /**
81
+ * AI agent instance.
82
+ */
83
+ agent: Agentica;
84
+ /**
85
+ * List of scenarios what you expect.
86
+ */
87
+ scenarios: IAgenticaCallBenchmarkScenario[];
88
+ /**
89
+ * Configuration for the benchmark.
90
+ */
91
+ config?: Partial<IConfig>;
92
+ }
93
+ /**
94
+ * Configuration for the benchmark.
95
+ *
96
+ * `AgenticaSelectBenchmark.IConfig` is a data structure which
97
+ * represents a configuration for the benchmark, especially the
98
+ * capacity information of the benchmark execution.
99
+ */
100
+ interface IConfig {
101
+ /**
102
+ * Repeat count.
103
+ *
104
+ * The number of repeating count for the benchmark execution
105
+ * for each scenario.
106
+ *
107
+ * @default 10
108
+ */
109
+ repeat: number & tags.Type<"uint32"> & tags.Minimum<1>;
110
+ /**
111
+ * Simultaneous count.
112
+ *
113
+ * The number of simultaneous count for the parallel benchmark
114
+ * execution.
115
+ *
116
+ * If you configure this property greater than `1`, the benchmark
117
+ * for each scenario would be executed in parallel in the given
118
+ * count.
119
+ *
120
+ * @default 10
121
+ */
122
+ simultaneous: number & tags.Type<"uint32"> & tags.Minimum<1>;
123
+ /**
124
+ * Number of consents.
125
+ *
126
+ * AI agent sometimes asks user to consent to the function
127
+ * calling, and perform it at the next step.
128
+ *
129
+ * This property represents the number of consents to allow.
130
+ * If the number of consents from the AI agent exceeds the
131
+ * configured value, the benchmark will be failed.
132
+ *
133
+ * @default 3
134
+ */
135
+ consent: number;
136
+ }
137
+ }
@@ -0,0 +1,187 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.AgenticaCallBenchmark = void 0;
13
+ const tstl_1 = require("tstl");
14
+ const AgenticaBenchmarkPredicator_1 = require("./internal/AgenticaBenchmarkPredicator");
15
+ const AgenticaCallBenchmarkReporter_1 = require("./internal/AgenticaCallBenchmarkReporter");
16
+ const TokenUsageComputer_1 = require("./utils/TokenUsageComputer");
17
+ /**
18
+ * LLM function calling selection benchmark.
19
+ *
20
+ * `AgenticaCallBenchmark` is a class for the benchmark of the
21
+ * LLM (Large Model Language) function calling part. It utilizes both
22
+ * `selector` and `caller` agents and tests whether the expected
23
+ * {@link IAgenticaOperation operations} are properly selected and
24
+ * called from the given
25
+ * {@link IAgenticaCallBenchmarkScenario scenarios}.
26
+ *
27
+ * Note that, this `AgenticaCallBenchmark` consumes a lot of time and
28
+ * LLM token costs because it needs the whole process of the
29
+ * {@link Agentica} class with a lot of repetitions. If you don't want
30
+ * such a heavy benchmark, consider to using
31
+ * {@link AgenticaSelectBenchmark} instead. In my experience,
32
+ * {@link Agentica} does not fail to function calling, so the function
33
+ * selection benchmark is much economical.
34
+ *
35
+ * @author Samchon
36
+ */
37
+ class AgenticaCallBenchmark {
38
+ /**
39
+ * Initializer Constructor.
40
+ *
41
+ * @param props Properties of the selection benchmark
42
+ */
43
+ constructor(props) {
44
+ var _a, _b, _c, _d, _e, _f;
45
+ this.agent_ = props.agent;
46
+ this.scenarios_ = props.scenarios.slice();
47
+ this.config_ = {
48
+ repeat: (_b = (_a = props.config) === null || _a === void 0 ? void 0 : _a.repeat) !== null && _b !== void 0 ? _b : 10,
49
+ simultaneous: (_d = (_c = props.config) === null || _c === void 0 ? void 0 : _c.simultaneous) !== null && _d !== void 0 ? _d : 10,
50
+ consent: (_f = (_e = props.config) === null || _e === void 0 ? void 0 : _e.consent) !== null && _f !== void 0 ? _f : 3,
51
+ };
52
+ this.result_ = null;
53
+ }
54
+ /**
55
+ * Execute the benchmark.
56
+ *
57
+ * Execute the benchmark of the LLM function calling, and returns
58
+ * the result of the benchmark.
59
+ *
60
+ * If you wanna see progress of the benchmark, you can pass a callback
61
+ * function as the argument of the `listener`. The callback function
62
+ * would be called whenever a benchmark event is occurred.
63
+ *
64
+ * Also, you can publish a markdown format report by calling
65
+ * the {@link report} function after the benchmark execution.
66
+ *
67
+ * @param listener Callback function listening the benchmark events
68
+ * @returns Results of the function calling benchmark
69
+ */
70
+ execute(listener) {
71
+ return __awaiter(this, void 0, void 0, function* () {
72
+ const started_at = new Date();
73
+ const semaphore = new tstl_1.Semaphore(this.config_.simultaneous);
74
+ const experiments = yield Promise.all(this.scenarios_.map((scenario) => __awaiter(this, void 0, void 0, function* () {
75
+ const events = yield Promise.all(new Array(this.config_.repeat).fill(0).map(() => __awaiter(this, void 0, void 0, function* () {
76
+ yield semaphore.acquire();
77
+ const e = yield this.step(scenario);
78
+ yield semaphore.release();
79
+ if (listener !== undefined)
80
+ listener(e);
81
+ return e;
82
+ })));
83
+ return {
84
+ scenario,
85
+ events,
86
+ usage: events
87
+ .filter((e) => e.type !== "error")
88
+ .map((e) => e.usage)
89
+ .reduce(TokenUsageComputer_1.TokenUsageComputer.plus, TokenUsageComputer_1.TokenUsageComputer.zero()),
90
+ };
91
+ })));
92
+ return (this.result_ = {
93
+ experiments,
94
+ started_at,
95
+ completed_at: new Date(),
96
+ usage: experiments
97
+ .map((p) => p.usage)
98
+ .reduce(TokenUsageComputer_1.TokenUsageComputer.plus, TokenUsageComputer_1.TokenUsageComputer.zero()),
99
+ });
100
+ });
101
+ }
102
+ /**
103
+ * Report the benchmark result as markdown files.
104
+ *
105
+ * Report the benchmark result {@link execute}d by
106
+ * `AgenticaCallBenchmark` as markdown files, and returns a dictionary
107
+ * object of the markdown reporting files. The key of the dictionary
108
+ * would be file name, and the value would be the markdown content.
109
+ *
110
+ * For reference, the markdown files are composed like below:
111
+ *
112
+ * - `./README.md`
113
+ * - `./scenario-1/README.md`
114
+ * - `./scenario-1/1.success.md`
115
+ * - `./scenario-1/2.failure.md`
116
+ * - `./scenario-1/3.error.md`
117
+ *
118
+ * @returns Dictionary of markdown files.
119
+ */
120
+ report() {
121
+ if (this.result_ === null)
122
+ throw new Error("Benchmark is not executed yet.");
123
+ return AgenticaCallBenchmarkReporter_1.AgenticaCallBenchmarkReporter.markdown(this.result_);
124
+ }
125
+ step(scenario) {
126
+ return __awaiter(this, void 0, void 0, function* () {
127
+ const agent = this.agent_.clone();
128
+ const started_at = new Date();
129
+ const success = () => AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.success({
130
+ expected: scenario.expected,
131
+ operations: agent
132
+ .getPromptHistories()
133
+ .filter((p) => p.type === "execute"),
134
+ strict: false,
135
+ });
136
+ const out = () => {
137
+ const select = AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.success({
138
+ expected: scenario.expected,
139
+ operations: agent
140
+ .getPromptHistories()
141
+ .filter((p) => p.type === "select")
142
+ .map((p) => p.operations)
143
+ .flat(),
144
+ strict: false,
145
+ });
146
+ const call = success();
147
+ return {
148
+ type: (call ? "success" : "failure"),
149
+ scenario,
150
+ select,
151
+ call,
152
+ prompts: agent.getPromptHistories(),
153
+ usage: agent.getTokenUsage(),
154
+ started_at,
155
+ completed_at: new Date(),
156
+ };
157
+ };
158
+ try {
159
+ yield agent.conversate(scenario.text);
160
+ if (success())
161
+ return out();
162
+ for (let i = 0; i < this.config_.consent; ++i) {
163
+ const next = yield AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.isNext(agent);
164
+ if (next === null)
165
+ break;
166
+ yield agent.conversate(next);
167
+ if (success())
168
+ return out();
169
+ }
170
+ return out();
171
+ }
172
+ catch (error) {
173
+ return {
174
+ type: "error",
175
+ scenario,
176
+ prompts: agent.getPromptHistories(),
177
+ usage: agent.getTokenUsage(),
178
+ error,
179
+ started_at,
180
+ completed_at: new Date(),
181
+ };
182
+ }
183
+ });
184
+ }
185
+ }
186
+ exports.AgenticaCallBenchmark = AgenticaCallBenchmark;
187
+ //# sourceMappingURL=AgenticaCallBenchmark.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AgenticaCallBenchmark.js","sourceRoot":"","sources":["../src/AgenticaCallBenchmark.ts"],"names":[],"mappings":";;;;;;;;;;;;AACA,+BAAiC;AAGjC,wFAAqF;AACrF,4FAAyF;AAIzF,mEAAgE;AAEhE;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAa,qBAAqB;IAMhC;;;;OAIG;IACH,YAAmB,KAAmC;;QACpD,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1C,IAAI,CAAC,OAAO,GAAG;YACb,MAAM,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,MAAM,mCAAI,EAAE;YAClC,YAAY,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,YAAY,mCAAI,EAAE;YAC9C,OAAO,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,OAAO,mCAAI,CAAC;SACpC,CAAC;QACF,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACU,OAAO,CAClB,QAAuD;;YAEvD,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,SAAS,GAAc,IAAI,gBAAS,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACtE,MAAM,WAAW,GACf,MAAM,OAAO,CAAC,GAAG,CACf,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAO,QAAQ,EAAE,EAAE;gBACrC,MAAM,MAAM,GAAkC,MAAM,OAAO,CAAC,GAAG,CAC7D,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAS,EAAE;oBACpD,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,MAAM,CAAC,GAAgC,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBACjE,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,KAAK,SAAS;wBAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;oBACxC,OAAO,CAAC,CAAC;gBACX,CAAC,CAAA,CAAC,CACH,CAAC;gBACF,OAAO;oBACL,QAAQ;oBACR,MAAM;oBACN,KAAK,EAAE,MAAM;yBACV,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC;yBACjC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;yBACnB,MAAM,CAAC,uCAAkB,CAAC,IAAI,EAAE,uCAAkB,CAAC,IAAI,EAAE,CAAC;iBAC9D,CAAC;YACJ,CAAC,CAAA,CAAC,CACH,CAAC;YACJ,OAAO,CAAC,IAAI,CAAC,OAAO,GAAG;gBACrB,WAAW;gBACX,UAAU;gBACV,YAAY,EAAE,IAAI,IAAI,EAAE;gBACxB,KAAK,EAAE,WAAW;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;qBACnB,MAAM,CAAC,uCAAkB,CAAC,IAAI,EAAE,uCAAkB,CAAC,IAAI,EAAE,CAAC;aAC9D,CAAC,CAAC;QACL,CAAC;KAAA;IAED;;;;;;;;;;;;;;;;;OAiBG;IACI,MAAM;QACX,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI;YACvB,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,OAAO,6DAA6B,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC9D,CAAC;IAEa,IAAI,CAChB,QAAwC;;YAExC,MAAM,KAAK,GAAa,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YAC5C,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,OAAO,GAAG,GAAG,EAAE,CACnB,yDAA2B,CAAC,OAAO,CAAC;gBAClC,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,UAAU,EAAE,KAAK;qBACd,kBAAkB,EAAE;qBACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC;gBACtC,MAAM,EAAE,KAAK;aACd,CAAC,CAAC;YACL,MAAM,GAAG,GAAG,GAAgC,EAAE;gBAC5C,MAAM,MAAM,GAAG,yDAA2B,CAAC,OAAO,CAAC;oBACjD,QAAQ,EAAE,QAAQ,CAAC,QAAQ;oBAC3B,UAAU,EAAE,KAAK;yBACd,kBAAkB,EAAE;yBACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC;yBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;yBACxB,IAAI,EAAE;oBACT,MAAM,EAAE,KAAK;iBACd,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;gBACvB,OAAO;oBACL,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAc;oBACjD,QAAQ;oBACR,MAAM;oBACN,IAAI;oBACJ,OAAO,EAAE,KAAK,CAAC,kBAAkB,EAAE;oBACnC,KAAK,EAAE,KAAK,CAAC,aAAa,EAAE;oBAC5B,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBACsB,CAAC;YACnD,CAAC,CAAC;YAEF,IAAI,CAAC;gBACH,MAAM,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACtC,IAAI,OAAO,EAAE;oBAAE,OAAO,GAAG,EAAE,CAAC;gBAC5B,KAAK,IAAI,CAAC,GAAW,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC;oBACtD,MAAM,IAAI,GACR,MAAM,yDAA2B,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBAClD,IAAI,IAAI,KAAK,IAAI;wBAAE,MAAM;oBAEzB,MAAM,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,OAAO,EAAE;wBAAE,OAAO,GAAG,EAAE,CAAC;gBAC9B,CAAC;gBACD,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO;oBACL,IAAI,EAAE,OAAO;oBACb,QAAQ;oBACR,OAAO,EAAE,KAAK,CAAC,kBAAkB,EAAE;oBACnC,KAAK,EAAE,KAAK,CAAC,aAAa,EAAE;oBAC5B,KAAK;oBACL,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBACzB,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF;AA/JD,sDA+JC"}
@@ -0,0 +1,123 @@
1
+ import { Agentica } from "@agentica/core";
2
+ import { tags } from "typia";
3
+ import { IAgenticaSelectBenchmarkEvent } from "./structures/IAgenticaSelectBenchmarkEvent";
4
+ import { IAgenticaSelectBenchmarkResult } from "./structures/IAgenticaSelectBenchmarkResult";
5
+ import { IAgenticaSelectBenchmarkScenario } from "./structures/IAgenticaSelectBenchmarkScenario";
6
+ /**
7
+ * LLM function calling selection benchmark.
8
+ *
9
+ * `AgenticaSelectBenchmark` is a class for the benchmark of the
10
+ * LLM (Large Model Language) function calling's selection part.
11
+ * It utilizes the `selector` agent and tests whether the expected
12
+ * {@link IAgenticaOperation operations} are properly selected from
13
+ * the given {@link IAgenticaSelectBenchmarkScenario scenarios}.
14
+ *
15
+ * Note that, this `AgenticaSelectBenchmark` class measures only the
16
+ * selection benchmark, testing whether the `selector` agent can select
17
+ * candidate functions to call as expected. Therefore, it does not test
18
+ * about the actual function calling which is done by the `executor` agent.
19
+ * If you want that feature, use {@link AgenticaCallBenchmark} class instead.
20
+ *
21
+ * @author Samchon
22
+ */
23
+ export declare class AgenticaSelectBenchmark {
24
+ private agent_;
25
+ private scenarios_;
26
+ private config_;
27
+ private histories_;
28
+ private result_;
29
+ /**
30
+ * Initializer Constructor.
31
+ *
32
+ * @param props Properties of the selection benchmark
33
+ */
34
+ constructor(props: AgenticaSelectBenchmark.IProps);
35
+ /**
36
+ * Execute the benchmark.
37
+ *
38
+ * Execute the benchmark of the LLM function selection, and returns
39
+ * the result of the benchmark.
40
+ *
41
+ * If you wanna see progress of the benchmark, you can pass a callback
42
+ * function as the argument of the `listener`. The callback function
43
+ * would be called whenever a benchmark event is occurred.
44
+ *
45
+ * Also, you can publish a markdown format report by calling
46
+ * the {@link report} function after the benchmark execution.
47
+ *
48
+ * @param listener Callback function listening the benchmark events
49
+ * @returns Results of the function selection benchmark
50
+ */
51
+ execute(listener?: (event: IAgenticaSelectBenchmarkEvent) => void): Promise<IAgenticaSelectBenchmarkResult>;
52
+ /**
53
+ * Report the benchmark result as markdown files.
54
+ *
55
+ * Report the benchmark result {@link execute}d by
56
+ * `AgenticaSelectBenchmark` as markdown files, and returns a
57
+ * dictionary object of the markdown reporting files. The key of
58
+ * the dictionary would be file name, and the value would be the
59
+ * markdown content.
60
+ *
61
+ * For reference, the markdown files are composed like below:
62
+ *
63
+ * - `./README.md`
64
+ * - `./scenario-1/README.md`
65
+ * - `./scenario-1/1.success.md`
66
+ * - `./scenario-1/2.failure.md`
67
+ * - `./scenario-1/3.error.md`
68
+ *
69
+ * @returns Dictionary of markdown files.
70
+ */
71
+ report(): Record<string, string>;
72
+ private step;
73
+ }
74
+ export declare namespace AgenticaSelectBenchmark {
75
+ /**
76
+ * Properties of the {@link AgenticaSelectBenchmark} constructor.
77
+ */
78
+ interface IProps {
79
+ /**
80
+ * AI agent instance.
81
+ */
82
+ agent: Agentica;
83
+ /**
84
+ * List of scenarios what you expect.
85
+ */
86
+ scenarios: IAgenticaSelectBenchmarkScenario[];
87
+ /**
88
+ * Configuration for the benchmark.
89
+ */
90
+ config?: Partial<IConfig>;
91
+ }
92
+ /**
93
+ * Configuration for the benchmark.
94
+ *
95
+ * `AgenticaSelectBenchmark.IConfig` is a data structure which
96
+ * represents a configuration for the benchmark, especially the
97
+ * capacity information of the benchmark execution.
98
+ */
99
+ interface IConfig {
100
+ /**
101
+ * Repeat count.
102
+ *
103
+ * The number of repeating count for the benchmark execution
104
+ * for each scenario.
105
+ *
106
+ * @default 10
107
+ */
108
+ repeat: number & tags.Type<"uint32"> & tags.Minimum<1>;
109
+ /**
110
+ * Simultaneous count.
111
+ *
112
+ * The number of simultaneous count for the parallel benchmark
113
+ * execution.
114
+ *
115
+ * If you configure this property greater than `1`, the benchmark
116
+ * for each scenario would be executed in parallel in the given
117
+ * count.
118
+ *
119
+ * @default 10
120
+ */
121
+ simultaneous: number & tags.Type<"uint32"> & tags.Minimum<1>;
122
+ }
123
+ }
@@ -0,0 +1,185 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.AgenticaSelectBenchmark = void 0;
13
+ const ChatGptSelectFunctionAgent_1 = require("@agentica/core/src/chatgpt/ChatGptSelectFunctionAgent");
14
+ const tstl_1 = require("tstl");
15
+ const AgenticaBenchmarkPredicator_1 = require("./internal/AgenticaBenchmarkPredicator");
16
+ const AgenticaSelectBenchmarkReporter_1 = require("./internal/AgenticaSelectBenchmarkReporter");
17
+ const TokenUsageComputer_1 = require("./utils/TokenUsageComputer");
18
+ /**
19
+ * LLM function calling selection benchmark.
20
+ *
21
+ * `AgenticaSelectBenchmark` is a class for the benchmark of the
22
+ * LLM (Large Model Language) function calling's selection part.
23
+ * It utilizes the `selector` agent and tests whether the expected
24
+ * {@link IAgenticaOperation operations} are properly selected from
25
+ * the given {@link IAgenticaSelectBenchmarkScenario scenarios}.
26
+ *
27
+ * Note that, this `AgenticaSelectBenchmark` class measures only the
28
+ * selection benchmark, testing whether the `selector` agent can select
29
+ * candidate functions to call as expected. Therefore, it does not test
30
+ * about the actual function calling which is done by the `executor` agent.
31
+ * If you want that feature, use {@link AgenticaCallBenchmark} class instead.
32
+ *
33
+ * @author Samchon
34
+ */
35
+ class AgenticaSelectBenchmark {
36
+ /**
37
+ * Initializer Constructor.
38
+ *
39
+ * @param props Properties of the selection benchmark
40
+ */
41
+ constructor(props) {
42
+ var _a, _b, _c, _d;
43
+ this.agent_ = props.agent;
44
+ this.scenarios_ = props.scenarios.slice();
45
+ this.config_ = {
46
+ repeat: (_b = (_a = props.config) === null || _a === void 0 ? void 0 : _a.repeat) !== null && _b !== void 0 ? _b : 10,
47
+ simultaneous: (_d = (_c = props.config) === null || _c === void 0 ? void 0 : _c.simultaneous) !== null && _d !== void 0 ? _d : 10,
48
+ };
49
+ this.histories_ = props.agent.getPromptHistories().slice();
50
+ this.result_ = null;
51
+ }
52
+ /**
53
+ * Execute the benchmark.
54
+ *
55
+ * Execute the benchmark of the LLM function selection, and returns
56
+ * the result of the benchmark.
57
+ *
58
+ * If you wanna see progress of the benchmark, you can pass a callback
59
+ * function as the argument of the `listener`. The callback function
60
+ * would be called whenever a benchmark event is occurred.
61
+ *
62
+ * Also, you can publish a markdown format report by calling
63
+ * the {@link report} function after the benchmark execution.
64
+ *
65
+ * @param listener Callback function listening the benchmark events
66
+ * @returns Results of the function selection benchmark
67
+ */
68
+ execute(listener) {
69
+ return __awaiter(this, void 0, void 0, function* () {
70
+ const started_at = new Date();
71
+ const semaphore = new tstl_1.Semaphore(this.config_.simultaneous);
72
+ const experiments = yield Promise.all(this.scenarios_.map((scenario) => __awaiter(this, void 0, void 0, function* () {
73
+ const events = yield Promise.all(new Array(this.config_.repeat).fill(0).map(() => __awaiter(this, void 0, void 0, function* () {
74
+ yield semaphore.acquire();
75
+ const e = yield this.step(scenario);
76
+ yield semaphore.release();
77
+ if (listener !== undefined)
78
+ listener(e);
79
+ return e;
80
+ })));
81
+ return {
82
+ scenario,
83
+ events,
84
+ usage: events
85
+ .filter((e) => e.type !== "error")
86
+ .map((e) => e.usage)
87
+ .reduce(TokenUsageComputer_1.TokenUsageComputer.plus, TokenUsageComputer_1.TokenUsageComputer.zero()),
88
+ };
89
+ })));
90
+ return (this.result_ = {
91
+ experiments,
92
+ started_at,
93
+ completed_at: new Date(),
94
+ usage: experiments
95
+ .map((p) => p.usage)
96
+ .reduce(TokenUsageComputer_1.TokenUsageComputer.plus, TokenUsageComputer_1.TokenUsageComputer.zero()),
97
+ });
98
+ });
99
+ }
100
+ /**
101
+ * Report the benchmark result as markdown files.
102
+ *
103
+ * Report the benchmark result {@link execute}d by
104
+ * `AgenticaSelectBenchmark` as markdown files, and returns a
105
+ * dictionary object of the markdown reporting files. The key of
106
+ * the dictionary would be file name, and the value would be the
107
+ * markdown content.
108
+ *
109
+ * For reference, the markdown files are composed like below:
110
+ *
111
+ * - `./README.md`
112
+ * - `./scenario-1/README.md`
113
+ * - `./scenario-1/1.success.md`
114
+ * - `./scenario-1/2.failure.md`
115
+ * - `./scenario-1/3.error.md`
116
+ *
117
+ * @returns Dictionary of markdown files.
118
+ */
119
+ report() {
120
+ if (this.result_ === null)
121
+ throw new Error("Benchmark is not executed yet.");
122
+ return AgenticaSelectBenchmarkReporter_1.AgenticaSelectBenchmarkReporter.markdown(this.result_);
123
+ }
124
+ step(scenario) {
125
+ return __awaiter(this, void 0, void 0, function* () {
126
+ const started_at = new Date();
127
+ try {
128
+ const usage = {
129
+ total: 0,
130
+ prompt: {
131
+ total: 0,
132
+ audio: 0,
133
+ cached: 0,
134
+ },
135
+ completion: {
136
+ total: 0,
137
+ accepted_prediction: 0,
138
+ audio: 0,
139
+ reasoning: 0,
140
+ rejected_prediction: 0,
141
+ },
142
+ };
143
+ const prompts = yield ChatGptSelectFunctionAgent_1.ChatGptSelectFunctionAgent.execute(Object.assign(Object.assign({}, this.agent_.getContext({
144
+ prompt: {
145
+ type: "text",
146
+ role: "user",
147
+ text: scenario.text,
148
+ },
149
+ usage,
150
+ })), { histories: this.histories_.slice(), stack: [], ready: () => true, dispatch: () => __awaiter(this, void 0, void 0, function* () { }) }));
151
+ const selected = prompts
152
+ .filter((p) => p.type === "select")
153
+ .map((p) => p.operations)
154
+ .flat();
155
+ return {
156
+ type: AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.success({
157
+ expected: scenario.expected,
158
+ operations: selected,
159
+ })
160
+ ? "success"
161
+ : "failure",
162
+ scenario,
163
+ selected,
164
+ usage,
165
+ assistantPrompts: prompts
166
+ .filter((p) => p.type === "text")
167
+ .filter((p) => p.role === "assistant"),
168
+ started_at,
169
+ completed_at: new Date(),
170
+ };
171
+ }
172
+ catch (error) {
173
+ return {
174
+ type: "error",
175
+ scenario,
176
+ error,
177
+ started_at,
178
+ completed_at: new Date(),
179
+ };
180
+ }
181
+ });
182
+ }
183
+ }
184
+ exports.AgenticaSelectBenchmark = AgenticaSelectBenchmark;
185
+ //# sourceMappingURL=AgenticaSelectBenchmark.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AgenticaSelectBenchmark.js","sourceRoot":"","sources":["../src/AgenticaSelectBenchmark.ts"],"names":[],"mappings":";;;;;;;;;;;;AAOA,sGAAmG;AACnG,+BAAiC;AAGjC,wFAAqF;AACrF,gGAA6F;AAI7F,mEAAgE;AAEhE;;;;;;;;;;;;;;;;GAgBG;AACH,MAAa,uBAAuB;IAOlC;;;;OAIG;IACH,YAAmB,KAAqC;;QACtD,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1C,IAAI,CAAC,OAAO,GAAG;YACb,MAAM,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,MAAM,mCAAI,EAAE;YAClC,YAAY,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,YAAY,mCAAI,EAAE;SAC/C,CAAC;QACF,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,kBAAkB,EAAE,CAAC,KAAK,EAAE,CAAC;QAC3D,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACU,OAAO,CAClB,QAAyD;;YAEzD,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,SAAS,GAAc,IAAI,gBAAS,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACtE,MAAM,WAAW,GACf,MAAM,OAAO,CAAC,GAAG,CACf,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAO,QAAQ,EAAE,EAAE;gBACrC,MAAM,MAAM,GAAoC,MAAM,OAAO,CAAC,GAAG,CAC/D,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAS,EAAE;oBACpD,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,MAAM,CAAC,GACL,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBAC5B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,KAAK,SAAS;wBAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;oBACxC,OAAO,CAAC,CAAC;gBACX,CAAC,CAAA,CAAC,CACH,CAAC;gBACF,OAAO;oBACL,QAAQ;oBACR,MAAM;oBACN,KAAK,EAAE,MAAM;yBACV,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC;yBACjC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;yBACnB,MAAM,CAAC,uCAAkB,CAAC,IAAI,EAAE,uCAAkB,CAAC,IAAI,EAAE,CAAC;iBAC9D,CAAC;YACJ,CAAC,CAAA,CAAC,CACH,CAAC;YACJ,OAAO,CAAC,IAAI,CAAC,OAAO,GAAG;gBACrB,WAAW;gBACX,UAAU;gBACV,YAAY,EAAE,IAAI,IAAI,EAAE;gBACxB,KAAK,EAAE,WAAW;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;qBACnB,MAAM,CAAC,uCAAkB,CAAC,IAAI,EAAE,uCAAkB,CAAC,IAAI,EAAE,CAAC;aAC9D,CAAC,CAAC;QACL,CAAC;KAAA;IAED;;;;;;;;;;;;;;;;;;OAkBG;IACI,MAAM;QACX,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI;YACvB,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,OAAO,iEAA+B,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAChE,CAAC;IAEa,IAAI,CAChB,QAA0C;;YAE1C,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,IAAI,CAAC;gBACH,MAAM,KAAK,GAAwB;oBACjC,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE;wBACN,KAAK,EAAE,CAAC;wBACR,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,CAAC;qBACV;oBACD,UAAU,EAAE;wBACV,KAAK,EAAE,CAAC;wBACR,mBAAmB,EAAE,CAAC;wBACtB,KAAK,EAAE,CAAC;wBACR,SAAS,EAAE,CAAC;wBACZ,mBAAmB,EAAE,CAAC;qBACvB;iBACF,CAAC;gBACF,MAAM,OAAO,GACX,MAAM,uDAA0B,CAAC,OAAO,CAAC,gCACpC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;oBACxB,MAAM,EAAE;wBACN,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,QAAQ,CAAC,IAAI;qBACpB;oBACD,KAAK;iBACN,CAAC,KACF,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,EAClC,KAAK,EAAE,EAAE,EACT,KAAK,EAAE,GAAG,EAAE,CAAC,IAAI,EACjB,QAAQ,EAAE,GAAS,EAAE,gDAAE,CAAC,CAAA,GACE,CAAC,CAAC;gBAChC,MAAM,QAAQ,GAAkC,OAAO;qBACpD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC;qBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;qBACxB,IAAI,EAAE,CAAC;gBACV,OAAO;oBACL,IAAI,EAAE,yDAA2B,CAAC,OAAO,CAAC;wBACxC,QAAQ,EAAE,QAAQ,CAAC,QAAQ;wBAC3B,UAAU,EAAE,QAAQ;qBACrB,CAAC;wBACA,CAAC,CAAC,SAAS;wBACX,CAAC,CAAC,SAAS;oBACb,QAAQ;oBACR,QAAQ;oBACR,KAAK;oBACL,gBAAgB,EAAE,OAAO;yBACtB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;yBAChC,MAAM,CACL,CAAC,CAAC,EAA2C,EAAE,CAC7C,CAAC,CAAC,IAAI,KAAK,WAAW,CACzB;oBACH,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBAGgB,CAAC;YAC7C,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO;oBACL,IAAI,EAAE,OAAO;oBACb,QAAQ;oBACR,KAAK;oBACL,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBACsB,CAAC;YACnD,CAAC;QACH,CAAC;KAAA;CACF;AA5KD,0DA4KC"}
package/lib/index.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ export * from "./AgenticaCallBenchmark";
2
+ export * from "./AgenticaSelectBenchmark";