@agentica/benchmark 0.12.21 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +39 -33
  2. package/lib/AgenticaCallBenchmark.d.ts +12 -6
  3. package/lib/AgenticaCallBenchmark.js +24 -18
  4. package/lib/AgenticaCallBenchmark.js.map +1 -1
  5. package/lib/AgenticaSelectBenchmark.d.ts +12 -6
  6. package/lib/AgenticaSelectBenchmark.js +14 -12
  7. package/lib/AgenticaSelectBenchmark.js.map +1 -1
  8. package/lib/index.mjs +315 -236
  9. package/lib/index.mjs.map +1 -1
  10. package/lib/internal/AgenticaBenchmarkPredicator.d.ts +38 -29
  11. package/lib/internal/AgenticaBenchmarkPredicator.js +100 -84
  12. package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
  13. package/lib/internal/AgenticaBenchmarkUtil.d.ts +21 -6
  14. package/lib/internal/AgenticaBenchmarkUtil.js +39 -33
  15. package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
  16. package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +6 -5
  17. package/lib/internal/AgenticaCallBenchmarkReporter.js +130 -126
  18. package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
  19. package/lib/internal/AgenticaPromptReporter.d.ts +13 -5
  20. package/lib/internal/AgenticaPromptReporter.js +45 -41
  21. package/lib/internal/AgenticaPromptReporter.js.map +1 -1
  22. package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +3 -1
  23. package/lib/internal/AgenticaSelectBenchmarkReporter.js +153 -150
  24. package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
  25. package/lib/structures/IAgenticaBenchmarkExpected.d.ts +8 -2
  26. package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +9 -3
  27. package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +10 -4
  28. package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +8 -2
  29. package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -3
  30. package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +10 -4
  31. package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +8 -2
  32. package/lib/utils/MathUtil.d.ts +15 -3
  33. package/lib/utils/MathUtil.js +15 -4
  34. package/lib/utils/MathUtil.js.map +1 -1
  35. package/package.json +12 -10
  36. package/src/AgenticaCallBenchmark.ts +64 -45
  37. package/src/AgenticaSelectBenchmark.ts +42 -30
  38. package/src/internal/AgenticaBenchmarkPredicator.ts +208 -186
  39. package/src/internal/AgenticaBenchmarkUtil.ts +58 -40
  40. package/src/internal/AgenticaCallBenchmarkReporter.ts +180 -182
  41. package/src/internal/AgenticaPromptReporter.ts +46 -33
  42. package/src/internal/AgenticaSelectBenchmarkReporter.ts +205 -203
  43. package/src/structures/IAgenticaBenchmarkExpected.ts +9 -2
  44. package/src/structures/IAgenticaCallBenchmarkEvent.ts +9 -3
  45. package/src/structures/IAgenticaCallBenchmarkResult.ts +10 -4
  46. package/src/structures/IAgenticaCallBenchmarkScenario.ts +8 -2
  47. package/src/structures/IAgenticaSelectBenchmarkEvent.ts +9 -3
  48. package/src/structures/IAgenticaSelectBenchmarkResult.ts +10 -4
  49. package/src/structures/IAgenticaSelectBenchmarkScenario.ts +8 -2
  50. package/src/utils/MathUtil.ts +16 -3
@@ -1,191 +1,21 @@
1
- import { Agentica, AgenticaOperation, AgenticaPrompt } from "@agentica/core";
2
- import { ILlmFunction, ILlmSchema } from "@samchon/openapi";
3
- import OpenAI from "openai";
4
- import typia from "typia";
5
-
6
- import { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
7
-
8
- export namespace AgenticaBenchmarkPredicator {
9
- export const isNext = async <Model extends ILlmSchema.Model>(
10
- agent: Agentica<Model>,
11
- ): Promise<string | null> => {
12
- const last: AgenticaPrompt<Model> | undefined = agent
13
- .getPromptHistories()
14
- .at(-1);
15
- if (last?.type !== "text" || last.role !== "assistant") return null;
16
-
17
- const consent: ILlmFunction<"chatgpt"> = typia.llm.application<
18
- IPredicatorApplication,
19
- "chatgpt"
20
- >().functions[0]!;
21
- const result: OpenAI.ChatCompletion = await agent[
22
- "props"
23
- ].vendor.api.chat.completions.create(
24
- {
25
- model: agent["props"].vendor.model,
26
- messages: [
27
- {
28
- role: "system",
29
- content: [
30
- "You are an helpful assistant.",
31
- "",
32
- "If what the assistant said seems like to asking for",
33
- "user's consent about some function calling at the next step,",
34
- "use the tools appropriately to step to the next.",
35
- ].join("\n"),
36
- },
37
- {
38
- role: "assistant",
39
- content: last.text,
40
- },
41
- ],
42
- tools: [
43
- {
44
- type: "function",
45
- function: {
46
- name: consent.name,
47
- description: consent.description,
48
- parameters: consent.parameters as Record<string, any>,
49
- },
50
- },
51
- ],
52
- tool_choice: "required",
53
- parallel_tool_calls: false,
54
- },
55
- agent["props"].vendor.options,
56
- );
57
- const toolCall: OpenAI.ChatCompletionMessageToolCall | undefined = (
58
- result.choices[0]?.message.tool_calls ?? []
59
- ).filter(
60
- (tc) => tc.type === "function" && tc.function.name === consent.name,
61
- )?.[0];
62
- if (toolCall === undefined) return null;
63
- const input: IConsentProps = JSON.parse(toolCall.function.arguments);
64
- return typia.is(input) ? input.reply : null;
65
- };
66
-
67
- /**
68
- * Check if the called operations match the expected operations.
69
- *
70
- * @param props Properties for checking the match of the called operations
71
- * and the expected operations
72
- *
73
- * @returns `true` if the called operations match the expected operations,
74
- * otherwise `false`.
75
- */
76
- export const success = <Model extends ILlmSchema.Model>(props: {
77
- /**
78
- * Expected operations to be called.
79
- *
80
- * For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
81
- */
82
- expected: IAgenticaBenchmarkExpected<Model>;
83
-
84
- /**
85
- * Specified operations.
86
- */
87
- operations: Array<AgenticaOperation<Model>>;
88
-
89
- /**
90
- * If it's `false`, check the array and let it go even if there's something wrong between them.
91
- *
92
- * @default `false`
93
- */
94
- strict?: boolean;
95
- }): boolean => successInner(props).result;
96
-
97
- const successInner = <Model extends ILlmSchema.Model>(
98
- props: Parameters<typeof success<Model>>[0],
99
- ):
100
- | {
101
- result: true;
102
- take: number;
103
- }
104
- | {
105
- result: false;
106
- } => {
107
- const call = (
108
- expected: IAgenticaBenchmarkExpected<Model>,
109
- overrideOperations?: Array<AgenticaOperation<Model>>,
110
- ) =>
111
- successInner({
112
- expected,
113
- operations: overrideOperations ?? props.operations,
114
- strict: props.strict,
115
- });
116
-
117
- switch (props.expected.type) {
118
- case "array": {
119
- let take = 0;
120
- const targetIterator = props.expected.items[Symbol.iterator]();
121
- let targeted = targetIterator.next();
122
-
123
- while (true) {
124
- if (targeted.done) {
125
- return {
126
- result: true,
127
- take,
128
- };
129
- }
130
- if (take >= props.operations.length) {
131
- return { result: false };
132
- }
1
+ /**
2
+ * @module
3
+ * This file contains functions to work with AgenticaBenchmarkPredicator.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
133
7
 
134
- const result = call(targeted.value, props.operations.slice(take));
135
- if (!result.result) {
136
- if (!props.strict) {
137
- take += 1;
138
- continue;
139
- }
140
- return { result: false };
141
- }
8
+ import type { Agentica, AgenticaOperation, AgenticaPrompt } from "@agentica/core";
9
+ import type { ILlmFunction, ILlmSchema } from "@samchon/openapi";
10
+ import type OpenAI from "openai";
11
+ import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
142
12
 
143
- take += result.take;
144
- targeted = targetIterator.next();
145
- }
146
- }
147
- case "standalone": {
148
- const target = props.expected.operation;
149
- const result = props.operations.some((op) => op.name === target.name);
150
- if (result) {
151
- return { result, take: 1 };
152
- }
153
- return {
154
- result,
155
- };
156
- }
157
- case "anyOf":
158
- for (const expected of props.expected.anyOf) {
159
- const callResult = call(expected);
160
- if (callResult.result) {
161
- return callResult;
162
- }
163
- }
164
-
165
- return { result: false };
166
- case "allOf": {
167
- /**
168
- * @example
169
- * expected = [4, 2];
170
- * called = [1, 2, 3, 4, 5];
171
- *
172
- * { result: true, take: 3 };
173
- */
174
- const result = props.expected.allOf.map((expected) => call(expected));
175
- if (result.every((r) => r.result)) {
176
- return {
177
- result: true,
178
- take: result.reduce((acc, r) => Math.max(acc, r.take), 0),
179
- };
180
- }
13
+ import typia from "typia";
181
14
 
182
- return {
183
- result: false,
184
- };
185
- }
186
- }
187
- };
188
- }
15
+ export const AgenticaBenchmarkPredicator = {
16
+ isNext,
17
+ success,
18
+ };
189
19
 
190
20
  interface IPredicatorApplication {
191
21
  /**
@@ -197,7 +27,7 @@ interface IPredicatorApplication {
197
27
  *
198
28
  * @param props Properties for asking the user's consent
199
29
  */
200
- consent(props: IConsentProps): void;
30
+ consent: (props: IConsentProps) => void;
201
31
  }
202
32
 
203
33
  /**
@@ -218,3 +48,195 @@ interface IConsentProps {
218
48
  */
219
49
  reply: string;
220
50
  }
51
+
52
+ async function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model>): Promise<string | null> {
53
+ const last: AgenticaPrompt<Model> | undefined = agent
54
+ .getPromptHistories()
55
+ .at(-1);
56
+
57
+ /**
58
+ * Agentica Props is private, we can't access it
59
+ * The provided code follows the original source prior to modification.
60
+ * However, due to compilation errors, a workaround was implemented.
61
+ * Please apply any available patches to resolve this issue.
62
+ */
63
+ const llmVendor = agent.getVendor();
64
+ const isTextPrompt = last?.type === "text" && last.role === "assistant";
65
+ if (!isTextPrompt) {
66
+ return null;
67
+ }
68
+
69
+ const consent: ILlmFunction<"chatgpt"> = typia.llm.application<
70
+ IPredicatorApplication,
71
+ "chatgpt"
72
+ >().functions[0]!;
73
+ const result: OpenAI.ChatCompletion = await llmVendor.api.chat.completions.create(
74
+ {
75
+ model: llmVendor.model,
76
+ messages: [
77
+ {
78
+ role: "system",
79
+ content: [
80
+ "You are an helpful assistant.",
81
+ "",
82
+ "If what the assistant said seems like to asking for",
83
+ "user's consent about some function calling at the next step,",
84
+ "use the tools appropriately to step to the next.",
85
+ ].join("\n"),
86
+ },
87
+ {
88
+ role: "assistant",
89
+ content: last.text,
90
+ },
91
+ ],
92
+ tools: [
93
+ {
94
+ type: "function",
95
+ function: {
96
+ name: consent.name,
97
+ description: consent.description,
98
+ parameters: consent.parameters as Record<string, any>,
99
+ },
100
+ },
101
+ ],
102
+ tool_choice: "required",
103
+ parallel_tool_calls: false,
104
+ },
105
+ llmVendor.options,
106
+ );
107
+
108
+ const toolCall: OpenAI.ChatCompletionMessageToolCall | undefined = (
109
+ result.choices[0]?.message.tool_calls ?? []
110
+ ).filter(
111
+ tc => tc.type === "function" && tc.function.name === consent.name,
112
+ )?.[0];
113
+
114
+ if (toolCall === undefined) {
115
+ return null;
116
+ }
117
+
118
+ const input = typia.json.isParse<IConsentProps>(toolCall.function.arguments);
119
+ return input !== null ? input.reply : null;
120
+ }
121
+
122
+ /**
123
+ * Check if the called operations match the expected operations.
124
+ *
125
+ * @param props Properties for checking the match of the called operations
126
+ * and the expected operations
127
+ *
128
+ * @returns `true` if the called operations match the expected operations,
129
+ * otherwise `false`.
130
+ */
131
+ export function success<Model extends ILlmSchema.Model>(props: {
132
+ /**
133
+ * Expected operations to be called.
134
+ *
135
+ * For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
136
+ */
137
+ expected: IAgenticaBenchmarkExpected<Model>;
138
+
139
+ /**
140
+ * Specified operations.
141
+ */
142
+ operations: Array<AgenticaOperation<Model>>;
143
+
144
+ /**
145
+ * If it's `false`, check the array and let it go even if there's something wrong between them.
146
+ *
147
+ * @default `false`
148
+ */
149
+ strict?: boolean;
150
+ }): boolean {
151
+ return successInner(props).result;
152
+ }
153
+
154
+ function successInner<Model extends ILlmSchema.Model>(props: Parameters<typeof success<Model>>[0]):
155
+ | {
156
+ result: true;
157
+ take: number;
158
+ }
159
+ | {
160
+ result: false;
161
+ } {
162
+ const call = (
163
+ expected: IAgenticaBenchmarkExpected<Model>,
164
+ overrideOperations?: Array<AgenticaOperation<Model>>,
165
+ ) =>
166
+ successInner({
167
+ expected,
168
+ operations: overrideOperations ?? props.operations,
169
+ strict: props.strict,
170
+ });
171
+
172
+ switch (props.expected.type) {
173
+ case "array": {
174
+ let take = 0;
175
+ const targetIterator = props.expected.items[Symbol.iterator]();
176
+ let targeted = targetIterator.next();
177
+
178
+ while (true) {
179
+ if (targeted.done === true) {
180
+ return {
181
+ result: true,
182
+ take,
183
+ };
184
+ }
185
+ if (take >= props.operations.length) {
186
+ return { result: false };
187
+ }
188
+
189
+ const result = call(targeted.value, props.operations.slice(take));
190
+ if (!result.result) {
191
+ if (props.strict === true) {
192
+ return { result: false };
193
+ }
194
+ take += 1;
195
+ continue;
196
+ }
197
+
198
+ take += result.take;
199
+ targeted = targetIterator.next();
200
+ }
201
+ }
202
+ case "standalone": {
203
+ const target = props.expected.operation;
204
+ const result = props.operations.some(op => op.name === target.name);
205
+ if (result) {
206
+ return { result, take: 1 };
207
+ }
208
+ return {
209
+ result,
210
+ };
211
+ }
212
+ case "anyOf":
213
+ for (const expected of props.expected.anyOf) {
214
+ const callResult = call(expected);
215
+ if (callResult.result) {
216
+ return callResult;
217
+ }
218
+ }
219
+
220
+ return { result: false };
221
+ case "allOf": {
222
+ /**
223
+ * @example
224
+ * expected = [4, 2];
225
+ * called = [1, 2, 3, 4, 5];
226
+ *
227
+ * { result: true, take: 3 };
228
+ */
229
+ const result = props.expected.allOf.map(expected => call(expected));
230
+ if (result.every(r => r.result)) {
231
+ return {
232
+ result: true,
233
+ take: result.reduce((acc, r) => Math.max(acc, r.take), 0),
234
+ };
235
+ }
236
+
237
+ return {
238
+ result: false,
239
+ };
240
+ }
241
+ }
242
+ }
@@ -1,44 +1,62 @@
1
- import { ILlmSchema } from "@samchon/openapi";
1
+ /**
2
+ * @module
3
+ * This file contains functions to work with AgenticaBenchmarkUtil.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { ILlmSchema } from "@samchon/openapi";
2
8
 
3
- import { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
9
+ import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
4
10
 
5
- export namespace AgenticaBenchmarkUtil {
6
- export const errorToJson = (error: any): any => {
7
- if (error instanceof Error)
8
- return {
9
- ...error,
10
- name: error.name,
11
- message: error.message,
12
- stack: error.stack,
13
- };
14
- return error;
15
- };
11
+ export const AgenticaBenchmarkUtil = {
12
+ errorToJson,
13
+ expectedToJson,
14
+ };
16
15
 
17
- export const expectedToJson = <Model extends ILlmSchema.Model>(
18
- expected: IAgenticaBenchmarkExpected<Model>,
19
- ): any => {
20
- if (expected.type === "standalone")
21
- return {
22
- type: expected.type,
23
- operation: {
24
- name: expected.operation.name,
25
- description: expected.operation.function.description,
26
- },
27
- };
28
- else if (expected.type === "array")
29
- return {
30
- type: expected.type,
31
- items: expected.items.map(expectedToJson),
32
- };
33
- else if (expected.type === "allOf")
34
- return {
35
- type: expected.type,
36
- allOf: expected.allOf.map(expectedToJson),
37
- };
38
- else
39
- return {
40
- type: expected.type,
41
- anyOf: expected.anyOf.map(expectedToJson),
42
- };
43
- };
16
+ function errorToJson<T>(error: T): T | ({
17
+ [k in keyof T]: T[k]
18
+ } & {
19
+ name: string;
20
+ message: string;
21
+ stack: string;
22
+ }) {
23
+ if (error instanceof Error) {
24
+ return {
25
+ ...error,
26
+ name: error.name,
27
+ message: error.message,
28
+ stack: error.stack,
29
+ };
30
+ }
31
+ return error;
32
+ }
33
+
34
+ function expectedToJson<Model extends ILlmSchema.Model>(expected: IAgenticaBenchmarkExpected<Model>): any {
35
+ if (expected.type === "standalone") {
36
+ return {
37
+ type: expected.type,
38
+ operation: {
39
+ name: expected.operation.name,
40
+ description: expected.operation.function.description,
41
+ },
42
+ };
43
+ }
44
+ else if (expected.type === "array") {
45
+ return {
46
+ type: expected.type,
47
+ items: expected.items.map(expectedToJson),
48
+ };
49
+ }
50
+ else if (expected.type === "allOf") {
51
+ return {
52
+ type: expected.type,
53
+ allOf: expected.allOf.map(expectedToJson),
54
+ };
55
+ }
56
+ else {
57
+ return {
58
+ type: expected.type,
59
+ anyOf: expected.anyOf.map(expectedToJson),
60
+ };
61
+ }
44
62
  }