@agentica/benchmark 0.43.3 → 0.44.0-dev.20260313

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,241 +1,240 @@
1
- /**
2
- * @module
3
- * This file contains functions to work with AgenticaBenchmarkPredicator.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
-
8
- import type { Agentica, AgenticaHistory, AgenticaOperation, MicroAgentica } from "@agentica/core";
9
- import type { ILlmFunction } from "@samchon/openapi";
10
- import type OpenAI from "openai";
11
-
12
- import typia from "typia";
13
-
14
- import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
15
-
16
- export const AgenticaBenchmarkPredicator = {
17
- isNext,
18
- success,
19
- };
20
-
21
- interface IPredicatorApplication {
22
- /**
23
- * Ask user to consent for what the AI agent wants to do next.
24
- *
25
- * If AI agent wants to do some function calling at next,
26
- * but it needs the user's consent about the function calling to do,
27
- * then call this tool function.
28
- *
29
- * @param props Properties for asking the user's consent
30
- */
31
- consent: (props: IConsentProps) => void;
32
- }
33
-
34
- /**
35
- * Properties for asking the user's consent
36
- */
37
- interface IConsentProps {
38
- /**
39
- * Reason of the message implying what the AI agent wants
40
- * to do at the next step after the user's consent.
41
- */
42
- content: string;
43
-
44
- /**
45
- * Recommended reply message for the user.
46
- *
47
- * The message what AI agent wants the user to reply
48
- * accepting the AI agent's next job suggestion.
49
- */
50
- reply: string;
51
- }
52
-
53
- async function isNext(agent: Agentica | MicroAgentica): Promise<string | null> {
54
- const last: AgenticaHistory | undefined = agent
55
- .getHistories()
56
- .at(-1);
57
-
58
- /**
59
- * Agentica Props is private, we can't access it
60
- * The provided code follows the original source prior to modification.
61
- * However, due to compilation errors, a workaround was implemented.
62
- * Please apply any available patches to resolve this issue.
63
- */
64
- const llmVendor = agent.getVendor();
65
- const isAssistantHistory = last?.type === "assistantMessage";
66
- if (!isAssistantHistory) {
67
- return null;
68
- }
69
-
70
- const consent: ILlmFunction = typia.llm.application<
71
- IPredicatorApplication
72
- >().functions[0]!;
73
- const result: OpenAI.ChatCompletion = await llmVendor.api.chat.completions.create(
74
- {
75
- model: llmVendor.model,
76
- messages: [
77
- {
78
- role: "system",
79
- content: [
80
- "You are an helpful assistant.",
81
- "",
82
- "If what the assistant said seems like to asking for",
83
- "user's consent about some function calling at the next step,",
84
- "use the tools appropriately to step to the next.",
85
- ].join("\n"),
86
- },
87
- {
88
- role: "assistant",
89
- content: last.text,
90
- },
91
- ],
92
- tools: [
93
- {
94
- type: "function",
95
- function: {
96
- name: consent.name,
97
- description: consent.description,
98
- parameters: consent.parameters as Record<string, any>,
99
- },
100
- },
101
- ],
102
- tool_choice: "required",
103
- // parallel_tool_calls: false,
104
- },
105
- llmVendor.options,
106
- );
107
-
108
- const toolCall: OpenAI.ChatCompletionMessageToolCall | undefined = (
109
- result.choices[0]?.message.tool_calls ?? []
110
- ).filter(
111
- tc => tc.type === "function" && tc.function.name === consent.name,
112
- )?.[0];
113
-
114
- if (toolCall === undefined || toolCall.type !== "function") {
115
- return null;
116
- }
117
-
118
- const input = typia.json.isParse<IConsentProps>(toolCall.function.arguments);
119
- return input !== null ? input.reply : null;
120
- }
121
-
122
- /**
123
- * Check if the called operations match the expected operations.
124
- *
125
- * @param props Properties for checking the match of the called operations
126
- * and the expected operations
127
- * @returns `true` if the called operations match the expected operations,
128
- * otherwise `false`.
129
- */
130
- export function success(props: {
131
- /**
132
- * Expected operations to be called.
133
- *
134
- * For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
135
- */
136
- expected: IAgenticaBenchmarkExpected;
137
-
138
- /**
139
- * Specified operations.
140
- */
141
- operations: Array<AgenticaOperation>;
142
-
143
- /**
144
- * If it's `false`, check the array and let it go even if there's something wrong between them.
145
- *
146
- * @default `false`
147
- */
148
- strict?: boolean;
149
- }): boolean {
150
- return successInner(props).result;
151
- }
152
-
153
- function successInner(props: Parameters<typeof success>[0]):
154
- | {
155
- result: true;
156
- take: number;
157
- }
158
- | {
159
- result: false;
160
- } {
161
- const call = (
162
- expected: IAgenticaBenchmarkExpected,
163
- overrideOperations?: Array<AgenticaOperation>,
164
- ) =>
165
- successInner({
166
- expected,
167
- operations: overrideOperations ?? props.operations,
168
- strict: props.strict,
169
- });
170
-
171
- switch (props.expected.type) {
172
- case "array": {
173
- let take = 0;
174
- const targetIterator = props.expected.items[Symbol.iterator]();
175
- let targeted = targetIterator.next();
176
-
177
- while (true) {
178
- if (targeted.done === true) {
179
- return {
180
- result: true,
181
- take,
182
- };
183
- }
184
- if (take >= props.operations.length) {
185
- return { result: false };
186
- }
187
-
188
- const result = call(targeted.value, props.operations.slice(take));
189
- if (!result.result) {
190
- if (props.strict === true) {
191
- return { result: false };
192
- }
193
- take += 1;
194
- continue;
195
- }
196
-
197
- take += result.take;
198
- targeted = targetIterator.next();
199
- }
200
- }
201
- case "standalone": {
202
- const target = props.expected.operation;
203
- const result = props.operations.some(op => op.name === target.name);
204
- if (result) {
205
- return { result, take: 1 };
206
- }
207
- return {
208
- result,
209
- };
210
- }
211
- case "anyOf":
212
- for (const expected of props.expected.anyOf) {
213
- const callResult = call(expected);
214
- if (callResult.result) {
215
- return callResult;
216
- }
217
- }
218
-
219
- return { result: false };
220
- case "allOf": {
221
- /**
222
- * @example
223
- * expected = [4, 2];
224
- * called = [1, 2, 3, 4, 5];
225
- *
226
- * { result: true, take: 3 };
227
- */
228
- const result = props.expected.allOf.map(expected => call(expected));
229
- if (result.every(r => r.result)) {
230
- return {
231
- result: true,
232
- take: result.reduce((acc, r) => Math.max(acc, r.take), 0),
233
- };
234
- }
235
-
236
- return {
237
- result: false,
238
- };
239
- }
240
- }
241
- }
1
+ /**
2
+ * @module
3
+ * This file contains functions to work with AgenticaBenchmarkPredicator.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+
8
+ import type { Agentica, AgenticaHistory, AgenticaOperation, MicroAgentica } from "@agentica/core";
9
+ import type OpenAI from "openai";
10
+ import type { ILlmFunction } from "typia";
11
+
12
+ import typia from "typia";
13
+
14
+ import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
15
+
16
+ export const AgenticaBenchmarkPredicator = {
17
+ isNext,
18
+ success,
19
+ };
20
+
21
+ interface IPredicatorApplication {
22
+ /**
23
+ * Ask user to consent for what the AI agent wants to do next.
24
+ *
25
+ * If AI agent wants to do some function calling at next,
26
+ * but it needs the user's consent about the function calling to do,
27
+ * then call this tool function.
28
+ *
29
+ * @param props Properties for asking the user's consent
30
+ */
31
+ consent: (props: IConsentProps) => void;
32
+ }
33
+
34
+ /**
35
+ * Properties for asking the user's consent
36
+ */
37
+ interface IConsentProps {
38
+ /**
39
+ * Reason of the message implying what the AI agent wants
40
+ * to do at the next step after the user's consent.
41
+ */
42
+ content: string;
43
+
44
+ /**
45
+ * Recommended reply message for the user.
46
+ *
47
+ * The message what AI agent wants the user to reply
48
+ * accepting the AI agent's next job suggestion.
49
+ */
50
+ reply: string;
51
+ }
52
+
53
+ async function isNext(agent: Agentica | MicroAgentica): Promise<string | null> {
54
+ const histories: AgenticaHistory[] = agent.getHistories();
55
+ const last: AgenticaHistory | undefined = histories[histories.length - 1];
56
+
57
+ /**
58
+ * Agentica Props is private, we can't access it
59
+ * The provided code follows the original source prior to modification.
60
+ * However, due to compilation errors, a workaround was implemented.
61
+ * Please apply any available patches to resolve this issue.
62
+ */
63
+ const llmVendor = agent.getVendor();
64
+ const isAssistantHistory = last?.type === "assistantMessage";
65
+ if (!isAssistantHistory) {
66
+ return null;
67
+ }
68
+
69
+ const consent: ILlmFunction = typia.llm.application<
70
+ IPredicatorApplication
71
+ >().functions[0]!;
72
+ const result: OpenAI.ChatCompletion = await llmVendor.api.chat.completions.create(
73
+ {
74
+ model: llmVendor.model,
75
+ messages: [
76
+ {
77
+ role: "system",
78
+ content: [
79
+ "You are an helpful assistant.",
80
+ "",
81
+ "If what the assistant said seems like to asking for",
82
+ "user's consent about some function calling at the next step,",
83
+ "use the tools appropriately to step to the next.",
84
+ ].join("\n"),
85
+ },
86
+ {
87
+ role: "assistant",
88
+ content: last.text,
89
+ },
90
+ ],
91
+ tools: [
92
+ {
93
+ type: "function",
94
+ function: {
95
+ name: consent.name,
96
+ description: consent.description,
97
+ parameters: consent.parameters as Record<string, any>,
98
+ },
99
+ },
100
+ ],
101
+ tool_choice: "required",
102
+ // parallel_tool_calls: false,
103
+ },
104
+ llmVendor.options,
105
+ );
106
+
107
+ const toolCall: OpenAI.ChatCompletionMessageToolCall | undefined = (
108
+ result.choices[0]?.message.tool_calls ?? []
109
+ ).filter(
110
+ tc => tc.type === "function" && tc.function.name === consent.name,
111
+ )?.[0];
112
+
113
+ if (toolCall === undefined || toolCall.type !== "function") {
114
+ return null;
115
+ }
116
+
117
+ const input = typia.json.isParse<IConsentProps>(toolCall.function.arguments);
118
+ return input !== null ? input.reply : null;
119
+ }
120
+
121
+ /**
122
+ * Check if the called operations match the expected operations.
123
+ *
124
+ * @param props Properties for checking the match of the called operations
125
+ * and the expected operations
126
+ * @returns `true` if the called operations match the expected operations,
127
+ * otherwise `false`.
128
+ */
129
+ export function success(props: {
130
+ /**
131
+ * Expected operations to be called.
132
+ *
133
+ * For 'allOf' within an 'array', the next expected element starts checking from the element that follows the last called element in 'allOf'.
134
+ */
135
+ expected: IAgenticaBenchmarkExpected;
136
+
137
+ /**
138
+ * Specified operations.
139
+ */
140
+ operations: Array<AgenticaOperation>;
141
+
142
+ /**
143
+ * If it's `false`, check the array and let it go even if there's something wrong between them.
144
+ *
145
+ * @default `false`
146
+ */
147
+ strict?: boolean;
148
+ }): boolean {
149
+ return successInner(props).result;
150
+ }
151
+
152
+ function successInner(props: Parameters<typeof success>[0]):
153
+ | {
154
+ result: true;
155
+ take: number;
156
+ }
157
+ | {
158
+ result: false;
159
+ } {
160
+ const call = (
161
+ expected: IAgenticaBenchmarkExpected,
162
+ overrideOperations?: Array<AgenticaOperation>,
163
+ ) =>
164
+ successInner({
165
+ expected,
166
+ operations: overrideOperations ?? props.operations,
167
+ strict: props.strict,
168
+ });
169
+
170
+ switch (props.expected.type) {
171
+ case "array": {
172
+ let take = 0;
173
+ const targetIterator = props.expected.items[Symbol.iterator]();
174
+ let targeted = targetIterator.next();
175
+
176
+ while (true) {
177
+ if (targeted.done === true) {
178
+ return {
179
+ result: true,
180
+ take,
181
+ };
182
+ }
183
+ if (take >= props.operations.length) {
184
+ return { result: false };
185
+ }
186
+
187
+ const result = call(targeted.value, props.operations.slice(take));
188
+ if (!result.result) {
189
+ if (props.strict === true) {
190
+ return { result: false };
191
+ }
192
+ take += 1;
193
+ continue;
194
+ }
195
+
196
+ take += result.take;
197
+ targeted = targetIterator.next();
198
+ }
199
+ }
200
+ case "standalone": {
201
+ const target = props.expected.operation;
202
+ const result = props.operations.some(op => op.name === target.name);
203
+ if (result) {
204
+ return { result, take: 1 };
205
+ }
206
+ return {
207
+ result,
208
+ };
209
+ }
210
+ case "anyOf":
211
+ for (const expected of props.expected.anyOf) {
212
+ const callResult = call(expected);
213
+ if (callResult.result) {
214
+ return callResult;
215
+ }
216
+ }
217
+
218
+ return { result: false };
219
+ case "allOf": {
220
+ /**
221
+ * @example
222
+ * expected = [4, 2];
223
+ * called = [1, 2, 3, 4, 5];
224
+ *
225
+ * { result: true, take: 3 };
226
+ */
227
+ const result = props.expected.allOf.map(expected => call(expected));
228
+ if (result.every(r => r.result)) {
229
+ return {
230
+ result: true,
231
+ take: result.reduce((acc, r) => Math.max(acc, r.take), 0),
232
+ };
233
+ }
234
+
235
+ return {
236
+ result: false,
237
+ };
238
+ }
239
+ }
240
+ }
@@ -1,60 +1,60 @@
1
- /**
2
- * @module
3
- * This file contains functions to work with AgenticaBenchmarkUtil.
4
- *
5
- * @author Wrtn Technologies
6
- */
7
- import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
8
-
9
- export const AgenticaBenchmarkUtil = {
10
- errorToJson,
11
- expectedToJson,
12
- };
13
-
14
- function errorToJson<T>(error: T): T | ({
15
- [k in keyof T]: T[k]
16
- } & {
17
- name: string;
18
- message: string;
19
- stack: string;
20
- }) {
21
- if (error instanceof Error) {
22
- return {
23
- ...error,
24
- name: error.name,
25
- message: error.message,
26
- stack: error.stack,
27
- };
28
- }
29
- return error;
30
- }
31
-
32
- function expectedToJson(expected: IAgenticaBenchmarkExpected): any {
33
- if (expected.type === "standalone") {
34
- return {
35
- type: expected.type,
36
- operation: {
37
- name: expected.operation.name,
38
- description: expected.operation.function.description,
39
- },
40
- };
41
- }
42
- else if (expected.type === "array") {
43
- return {
44
- type: expected.type,
45
- items: expected.items.map(expectedToJson),
46
- };
47
- }
48
- else if (expected.type === "allOf") {
49
- return {
50
- type: expected.type,
51
- allOf: expected.allOf.map(expectedToJson),
52
- };
53
- }
54
- else {
55
- return {
56
- type: expected.type,
57
- anyOf: expected.anyOf.map(expectedToJson),
58
- };
59
- }
60
- }
1
+ /**
2
+ * @module
3
+ * This file contains functions to work with AgenticaBenchmarkUtil.
4
+ *
5
+ * @author Wrtn Technologies
6
+ */
7
+ import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
8
+
9
+ export const AgenticaBenchmarkUtil = {
10
+ errorToJson,
11
+ expectedToJson,
12
+ };
13
+
14
+ function errorToJson<T>(error: T): T | ({
15
+ [k in keyof T]: T[k]
16
+ } & {
17
+ name: string;
18
+ message: string;
19
+ stack: string;
20
+ }) {
21
+ if (error instanceof Error) {
22
+ return {
23
+ ...error,
24
+ name: error.name,
25
+ message: error.message,
26
+ stack: error.stack,
27
+ };
28
+ }
29
+ return error;
30
+ }
31
+
32
+ function expectedToJson(expected: IAgenticaBenchmarkExpected): any {
33
+ if (expected.type === "standalone") {
34
+ return {
35
+ type: expected.type,
36
+ operation: {
37
+ name: expected.operation.name,
38
+ description: expected.operation.function.description,
39
+ },
40
+ };
41
+ }
42
+ else if (expected.type === "array") {
43
+ return {
44
+ type: expected.type,
45
+ items: expected.items.map(expectedToJson),
46
+ };
47
+ }
48
+ else if (expected.type === "allOf") {
49
+ return {
50
+ type: expected.type,
51
+ allOf: expected.allOf.map(expectedToJson),
52
+ };
53
+ }
54
+ else {
55
+ return {
56
+ type: expected.type,
57
+ anyOf: expected.anyOf.map(expectedToJson),
58
+ };
59
+ }
60
+ }