@agentica/benchmark 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -11
- package/lib/AgenticaSelectBenchmark.js +7 -6
- package/lib/AgenticaSelectBenchmark.js.map +1 -1
- package/lib/MicroAgenticaCallBenchmark.d.ts +138 -0
- package/lib/MicroAgenticaCallBenchmark.js +199 -0
- package/lib/MicroAgenticaCallBenchmark.js.map +1 -0
- package/lib/index.d.ts +1 -0
- package/lib/index.js +1 -0
- package/lib/index.js.map +1 -1
- package/lib/index.mjs +120 -8
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaBenchmarkPredicator.d.ts +2 -2
- package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
- package/lib/internal/AgenticaPromptReporter.js +20 -14
- package/lib/internal/AgenticaPromptReporter.js.map +1 -1
- package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +2 -2
- package/package.json +3 -3
- package/src/AgenticaSelectBenchmark.ts +7 -9
- package/src/MicroAgenticaCallBenchmark.ts +282 -0
- package/src/index.ts +1 -1
- package/src/internal/AgenticaBenchmarkPredicator.ts +2 -2
- package/src/internal/AgenticaPromptReporter.ts +24 -14
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +2 -2
package/README.md
CHANGED
|
@@ -26,7 +26,7 @@ Are you a TypeScript developer? Then you're already an AI developer. Familiar wi
|
|
|
26
26
|
<!-- eslint-skip -->
|
|
27
27
|
|
|
28
28
|
```typescript
|
|
29
|
-
import { Agentica,
|
|
29
|
+
import { Agentica, assertHttpController } from "@agentica/core";
|
|
30
30
|
import OpenAI from "openai";
|
|
31
31
|
import typia from "typia";
|
|
32
32
|
|
|
@@ -40,24 +40,23 @@ const agent = new Agentica({
|
|
|
40
40
|
controllers: [
|
|
41
41
|
// functions from TypeScript class
|
|
42
42
|
{
|
|
43
|
-
protocol: "
|
|
43
|
+
protocol: "class",
|
|
44
|
+
name: "filesystem",
|
|
44
45
|
application: typia.llm.application<MobileFileSystem, "chatgpt">(),
|
|
45
46
|
execute: new MobileFileSystem(),
|
|
46
47
|
},
|
|
47
48
|
// functions from Swagger/OpenAPI
|
|
48
|
-
{
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
).then(r => r.json()),
|
|
55
|
-
}),
|
|
49
|
+
assertHttpController({
|
|
50
|
+
name: "shopping",
|
|
51
|
+
model: "chatgpt",
|
|
52
|
+
document: await fetch(
|
|
53
|
+
"https://shopping-be.wrtn.ai/editor/swagger.json",
|
|
54
|
+
).then(r => r.json()),
|
|
56
55
|
connection: {
|
|
57
56
|
host: "https://shopping-be.wrtn.ai",
|
|
58
57
|
headers: { Authorization: "Bearer ********" },
|
|
59
58
|
},
|
|
60
|
-
},
|
|
59
|
+
}),
|
|
61
60
|
],
|
|
62
61
|
});
|
|
63
62
|
await agent.conversate("I wanna buy MacBook Pro");
|
|
@@ -135,10 +135,11 @@ class AgenticaSelectBenchmark {
|
|
|
135
135
|
try {
|
|
136
136
|
const usage = core_1.AgenticaTokenUsage.zero();
|
|
137
137
|
const context = this.agent_.getContext({
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
138
|
+
prompt: core_1.factory.createUserInputHistory({
|
|
139
|
+
contents: [{
|
|
140
|
+
type: "text",
|
|
141
|
+
text: scenario.text,
|
|
142
|
+
}],
|
|
142
143
|
}),
|
|
143
144
|
usage,
|
|
144
145
|
});
|
|
@@ -161,8 +162,8 @@ class AgenticaSelectBenchmark {
|
|
|
161
162
|
selected,
|
|
162
163
|
usage,
|
|
163
164
|
assistantPrompts: histories
|
|
164
|
-
|
|
165
|
-
.filter(
|
|
165
|
+
// Only the assistant is allowed to emit text events.
|
|
166
|
+
.filter(p => p.type === "text"),
|
|
166
167
|
started_at,
|
|
167
168
|
completed_at: new Date(),
|
|
168
169
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AgenticaSelectBenchmark.js","sourceRoot":"","sources":["../src/AgenticaSelectBenchmark.ts"],"names":[],"mappings":";;;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"AgenticaSelectBenchmark.js","sourceRoot":"","sources":["../src/AgenticaSelectBenchmark.ts"],"names":[],"mappings":";;;;;;;;;;;;AASA;;;;;GAKG;AACH,yCAA0E;AAC1E,+BAAiC;AAMjC,wFAAqF;AACrF,gGAA6F;AAE7F;;;;;;;;;;;;;;;;GAgBG;AACH,MAAa,uBAAuB;IAOlC;;;;OAIG;IACH,YAAmB,KAA4C;;QAC7D,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1C,IAAI,CAAC,OAAO,GAAG;YACb,MAAM,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,MAAM,mCAAI,EAAE;YAClC,YAAY,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,YAAY,mCAAI,EAAE;SAC/C,CAAC;QACF,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,KAAK,EAAE,CAAC;QACrD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACU,OAAO,CAClB,QAAgE;;YAEhE,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,SAAS,GAAc,IAAI,gBAAS,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACtE,MAAM,WAAW,GACb,MAAM,OAAO,CAAC,GAAG,CACjB,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAO,QAAQ,EAAE,EAAE;gBACrC,MAAM,MAAM,GACR,MAAM,OAAO,CAAC,GAAG,CACjB,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,GAAS,EAAE;oBACzD,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,MAAM,CAAC,GACH,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBAC9B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;wBAC3B,QAAQ,CAAC,CAAC,CAAC,CAAC;oBACd,CAAC;oBACD,OAAO,CAAC,CAAC;gBACX,CAAC,CAAA,CAAC,CACH,CAAC;gBACJ,OAAO;oBACL,QAAQ;oBACR,MAAM;oBACN,KAAK,EAAE,MAAM;yBACV,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC;yBAC/B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;yBACjB,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,yBAAkB,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,yBAAkB,CAAC,IAAI,EAAE,CAAC;iBACtF,CAAC;YACJ,CAAC,CAAA,CAAC,CACH,CAAC;YACJ,OAAO,CAAC,IAAI,CAAC,OAAO,GAAG;gBACrB,WAAW;gBACX,UAAU;gBACV,YAAY,EAAE,IAAI,IAAI,EAAE;gBACxB,KAAK,EAAE,WAAW;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;qBACjB,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,yBAAkB,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,yBAAkB,CAAC,IAAI,EAAE,CAAC;aACtF,CAAC,CAAC;QACL,CAAC;KAAA;IAED;;;;;;;;;;;;;;;;;;OAkBG;IACI,MAAM;QACX,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;QACD,OAAO,iEAA+B,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAChE,CAAC;IAEa,IAAI,CAChB,QAAiD;;;YAEjD,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,IAAI,CAAC;gBACH,MAAM,KAAK,GAAuB,yBAAkB,CAAC,IAAI,EAAE,CAAC;gBAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;oBACrC,MAAM,EAAE,cAAO,CAAC,sBAAsB,CAAC;wBACrC,QAAQ,EAAE,CAAC;gCACT,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,QAAQ,CAAC,IAAI;6BACpB,CAAC;qBACH,CAAC;oBACF,KAAK;iBACN,CAAC,CAAC;gBACH,IAAI,OAAO,CAAA,MAAA,OAAO,CAAC,MAAM,0CAAE,QAAQ,CAAA,KAAK,UAAU,EAAE,CAAC;oBACnD,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;gBACtD,CAAC;gBAED,MAAM,SAAS,GACX,MAAM,CAAC,MAAA,MAAA,MAAA,OAAO,CAAC,MAAM,0CAAE,QAAQ,0CAAE,MAAM,mCAAI,kBAAW,CAAC,MAAM,CAAC,CAAC,gCAC5D,OAAO,KACV,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,EAClC,KAAK,EAAE,EAAE,EACT,KAAK,EAAE,GAAG,EAAE,CAAC,IAAI,EACjB,QAAQ,EAAE,GAAS,EAAE,gDAAE,CAAC,CAAA,GACQ,CAAC,CAAC;gBACtC,MAAM,QAAQ,GAAwC,SAAS;qBAC5D,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC;qBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;qBACtB,IAAI,EAAE,CAAC;gBACV,OAAO;oBACL,IAAI,EAAE,yDAA2B,CAAC,OAAO,CAAC;wBACxC,QAAQ,EAAE,QAAQ,CAAC,QAAQ;wBAC3B,UAAU,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;qBAC3C,CAAC;wBACA,CAAC,CAAC,SAAS;wBACX,CAAC,CAAC,SAAS;oBACb,QAAQ;oBACR,QAAQ;oBACR,KAAK;oBACL,gBAAgB,EAAE,SAAS;wBACzB,qDAAqD;yBACpD,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;oBACjC,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBAGqB,CAAC;YAClD,CAAC;YACD,OAAO,KAAK,EAAE,CAAC;gBACb,OAAO;oBACL,IAAI,EAAE,OAAO;oBACb,QAAQ;oBACR,KAAK;oBACL,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBAC6B,CAAC;YAC1D,CAAC;QACH,CAAC;KAAA;CACF;AAtKD,0DAsKC"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import type { MicroAgentica } from "@agentica/core";
|
|
2
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
3
|
+
import type { tags } from "typia";
|
|
4
|
+
import type { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
|
|
5
|
+
import type { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
|
|
6
|
+
import type { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
|
|
7
|
+
/**
|
|
8
|
+
* LLM function calling selection benchmark.
|
|
9
|
+
*
|
|
10
|
+
* `AgenticaCallBenchmark` is a class for the benchmark of the
|
|
11
|
+
* LLM (Large Model Language) function calling part. It utilizes both
|
|
12
|
+
* `selector` and `caller` agents and tests whether the expected
|
|
13
|
+
* {@link IAgenticaOperation operations} are properly selected and
|
|
14
|
+
* called from the given
|
|
15
|
+
* {@link IAgenticaCallBenchmarkScenario scenarios}.
|
|
16
|
+
*
|
|
17
|
+
* Note that, this `MicroAgenticaCallBenchmark` consumes a lot of time and
|
|
18
|
+
* LLM token costs because it needs the whole process of the
|
|
19
|
+
* {@link MicroAgentica} class with a lot of repetitions. If you don't want
|
|
20
|
+
* such a heavy benchmark, consider to using
|
|
21
|
+
* {@link AgenticaSelectBenchmark} instead. In my experience,
|
|
22
|
+
* {@link MicroAgentica} does not fail to function calling, so the function
|
|
23
|
+
* selection benchmark is much economical.
|
|
24
|
+
*
|
|
25
|
+
* @author Samchon
|
|
26
|
+
*/
|
|
27
|
+
export declare class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
28
|
+
private agent_;
|
|
29
|
+
private scenarios_;
|
|
30
|
+
private config_;
|
|
31
|
+
private result_;
|
|
32
|
+
/**
|
|
33
|
+
* Initializer Constructor.
|
|
34
|
+
*
|
|
35
|
+
* @param props Properties of the selection benchmark
|
|
36
|
+
*/
|
|
37
|
+
constructor(props: MicroAgenticaCallBenchmark.IProps<Model>);
|
|
38
|
+
/**
|
|
39
|
+
* Execute the benchmark.
|
|
40
|
+
*
|
|
41
|
+
* Execute the benchmark of the LLM function calling, and returns
|
|
42
|
+
* the result of the benchmark.
|
|
43
|
+
*
|
|
44
|
+
* If you wanna see progress of the benchmark, you can pass a callback
|
|
45
|
+
* function as the argument of the `listener`. The callback function
|
|
46
|
+
* would be called whenever a benchmark event is occurred.
|
|
47
|
+
*
|
|
48
|
+
* Also, you can publish a markdown format report by calling
|
|
49
|
+
* the {@link report} function after the benchmark execution.
|
|
50
|
+
*
|
|
51
|
+
* @param listener Callback function listening the benchmark events
|
|
52
|
+
* @returns Results of the function calling benchmark
|
|
53
|
+
*/
|
|
54
|
+
execute(listener?: (event: IAgenticaCallBenchmarkEvent<Model>) => void): Promise<IAgenticaCallBenchmarkResult<Model>>;
|
|
55
|
+
/**
|
|
56
|
+
* Report the benchmark result as markdown files.
|
|
57
|
+
*
|
|
58
|
+
* Report the benchmark result {@link execute}d by
|
|
59
|
+
* `AgenticaCallBenchmark` as markdown files, and returns a dictionary
|
|
60
|
+
* object of the markdown reporting files. The key of the dictionary
|
|
61
|
+
* would be file name, and the value would be the markdown content.
|
|
62
|
+
*
|
|
63
|
+
* For reference, the markdown files are composed like below:
|
|
64
|
+
*
|
|
65
|
+
* - `./README.md`
|
|
66
|
+
* - `./scenario-1/README.md`
|
|
67
|
+
* - `./scenario-1/1.success.md`
|
|
68
|
+
* - `./scenario-1/2.failure.md`
|
|
69
|
+
* - `./scenario-1/3.error.md`
|
|
70
|
+
*
|
|
71
|
+
* @returns Dictionary of markdown files.
|
|
72
|
+
*/
|
|
73
|
+
report(): Record<string, string>;
|
|
74
|
+
private step;
|
|
75
|
+
}
|
|
76
|
+
export declare namespace MicroAgenticaCallBenchmark {
|
|
77
|
+
/**
|
|
78
|
+
* Properties of the {@link MicroAgenticaCallBenchmark} constructor.
|
|
79
|
+
*/
|
|
80
|
+
interface IProps<Model extends ILlmSchema.Model> {
|
|
81
|
+
/**
|
|
82
|
+
* AI agent instance.
|
|
83
|
+
*/
|
|
84
|
+
agent: MicroAgentica<Model>;
|
|
85
|
+
/**
|
|
86
|
+
* List of scenarios what you expect.
|
|
87
|
+
*/
|
|
88
|
+
scenarios: IAgenticaCallBenchmarkScenario<Model>[];
|
|
89
|
+
/**
|
|
90
|
+
* Configuration for the benchmark.
|
|
91
|
+
*/
|
|
92
|
+
config?: Partial<IConfig>;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Configuration for the benchmark.
|
|
96
|
+
*
|
|
97
|
+
* `AgenticaSelectBenchmark.IConfig` is a data structure which
|
|
98
|
+
* represents a configuration for the benchmark, especially the
|
|
99
|
+
* capacity information of the benchmark execution.
|
|
100
|
+
*/
|
|
101
|
+
interface IConfig {
|
|
102
|
+
/**
|
|
103
|
+
* Repeat count.
|
|
104
|
+
*
|
|
105
|
+
* The number of repeating count for the benchmark execution
|
|
106
|
+
* for each scenario.
|
|
107
|
+
*
|
|
108
|
+
* @default 10
|
|
109
|
+
*/
|
|
110
|
+
repeat: number & tags.Type<"uint32"> & tags.Minimum<1>;
|
|
111
|
+
/**
|
|
112
|
+
* Simultaneous count.
|
|
113
|
+
*
|
|
114
|
+
* The number of simultaneous count for the parallel benchmark
|
|
115
|
+
* execution.
|
|
116
|
+
*
|
|
117
|
+
* If you configure this property greater than `1`, the benchmark
|
|
118
|
+
* for each scenario would be executed in parallel in the given
|
|
119
|
+
* count.
|
|
120
|
+
*
|
|
121
|
+
* @default 10
|
|
122
|
+
*/
|
|
123
|
+
simultaneous: number & tags.Type<"uint32"> & tags.Minimum<1>;
|
|
124
|
+
/**
|
|
125
|
+
* Number of consents.
|
|
126
|
+
*
|
|
127
|
+
* AI agent sometimes asks user to consent to the function
|
|
128
|
+
* calling, and perform it at the next step.
|
|
129
|
+
*
|
|
130
|
+
* This property represents the number of consents to allow.
|
|
131
|
+
* If the number of consents from the AI agent exceeds the
|
|
132
|
+
* configured value, the benchmark will be failed.
|
|
133
|
+
*
|
|
134
|
+
* @default 3
|
|
135
|
+
*/
|
|
136
|
+
consent: number;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.MicroAgenticaCallBenchmark = void 0;
|
|
13
|
+
/**
|
|
14
|
+
* @module
|
|
15
|
+
* This file contains the implementation of the AgenticaCallBenchmark class.
|
|
16
|
+
*
|
|
17
|
+
* @author Wrtn Technologies
|
|
18
|
+
*/
|
|
19
|
+
const core_1 = require("@agentica/core");
|
|
20
|
+
const tstl_1 = require("tstl");
|
|
21
|
+
const AgenticaBenchmarkPredicator_1 = require("./internal/AgenticaBenchmarkPredicator");
|
|
22
|
+
const AgenticaCallBenchmarkReporter_1 = require("./internal/AgenticaCallBenchmarkReporter");
|
|
23
|
+
/**
|
|
24
|
+
* LLM function calling selection benchmark.
|
|
25
|
+
*
|
|
26
|
+
* `AgenticaCallBenchmark` is a class for the benchmark of the
|
|
27
|
+
* LLM (Large Model Language) function calling part. It utilizes both
|
|
28
|
+
* `selector` and `caller` agents and tests whether the expected
|
|
29
|
+
* {@link IAgenticaOperation operations} are properly selected and
|
|
30
|
+
* called from the given
|
|
31
|
+
* {@link IAgenticaCallBenchmarkScenario scenarios}.
|
|
32
|
+
*
|
|
33
|
+
* Note that, this `MicroAgenticaCallBenchmark` consumes a lot of time and
|
|
34
|
+
* LLM token costs because it needs the whole process of the
|
|
35
|
+
* {@link MicroAgentica} class with a lot of repetitions. If you don't want
|
|
36
|
+
* such a heavy benchmark, consider to using
|
|
37
|
+
* {@link AgenticaSelectBenchmark} instead. In my experience,
|
|
38
|
+
* {@link MicroAgentica} does not fail to function calling, so the function
|
|
39
|
+
* selection benchmark is much economical.
|
|
40
|
+
*
|
|
41
|
+
* @author Samchon
|
|
42
|
+
*/
|
|
43
|
+
class MicroAgenticaCallBenchmark {
|
|
44
|
+
/**
|
|
45
|
+
* Initializer Constructor.
|
|
46
|
+
*
|
|
47
|
+
* @param props Properties of the selection benchmark
|
|
48
|
+
*/
|
|
49
|
+
constructor(props) {
|
|
50
|
+
var _a, _b, _c, _d, _e, _f;
|
|
51
|
+
this.agent_ = props.agent;
|
|
52
|
+
this.scenarios_ = props.scenarios.slice();
|
|
53
|
+
this.config_ = {
|
|
54
|
+
repeat: (_b = (_a = props.config) === null || _a === void 0 ? void 0 : _a.repeat) !== null && _b !== void 0 ? _b : 10,
|
|
55
|
+
simultaneous: (_d = (_c = props.config) === null || _c === void 0 ? void 0 : _c.simultaneous) !== null && _d !== void 0 ? _d : 10,
|
|
56
|
+
consent: (_f = (_e = props.config) === null || _e === void 0 ? void 0 : _e.consent) !== null && _f !== void 0 ? _f : 3,
|
|
57
|
+
};
|
|
58
|
+
this.result_ = null;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Execute the benchmark.
|
|
62
|
+
*
|
|
63
|
+
* Execute the benchmark of the LLM function calling, and returns
|
|
64
|
+
* the result of the benchmark.
|
|
65
|
+
*
|
|
66
|
+
* If you wanna see progress of the benchmark, you can pass a callback
|
|
67
|
+
* function as the argument of the `listener`. The callback function
|
|
68
|
+
* would be called whenever a benchmark event is occurred.
|
|
69
|
+
*
|
|
70
|
+
* Also, you can publish a markdown format report by calling
|
|
71
|
+
* the {@link report} function after the benchmark execution.
|
|
72
|
+
*
|
|
73
|
+
* @param listener Callback function listening the benchmark events
|
|
74
|
+
* @returns Results of the function calling benchmark
|
|
75
|
+
*/
|
|
76
|
+
execute(listener) {
|
|
77
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
78
|
+
const started_at = new Date();
|
|
79
|
+
const semaphore = new tstl_1.Semaphore(this.config_.simultaneous);
|
|
80
|
+
const task = this.scenarios_.map((scenario) => __awaiter(this, void 0, void 0, function* () {
|
|
81
|
+
const events = yield Promise.all(Array.from({ length: this.config_.repeat }).map(() => __awaiter(this, void 0, void 0, function* () {
|
|
82
|
+
yield semaphore.acquire();
|
|
83
|
+
const e = yield this.step(scenario);
|
|
84
|
+
yield semaphore.release();
|
|
85
|
+
if (listener !== undefined) {
|
|
86
|
+
listener(e);
|
|
87
|
+
}
|
|
88
|
+
return e;
|
|
89
|
+
})));
|
|
90
|
+
return {
|
|
91
|
+
scenario,
|
|
92
|
+
events,
|
|
93
|
+
usage: events
|
|
94
|
+
.filter(e => e.type !== "error")
|
|
95
|
+
.map(e => e.usage)
|
|
96
|
+
.reduce((acc, cur) => core_1.AgenticaTokenUsage.plus(acc, cur), core_1.AgenticaTokenUsage.zero()),
|
|
97
|
+
};
|
|
98
|
+
}));
|
|
99
|
+
const experiments = yield Promise.all(task);
|
|
100
|
+
return (this.result_ = {
|
|
101
|
+
experiments,
|
|
102
|
+
started_at,
|
|
103
|
+
completed_at: new Date(),
|
|
104
|
+
usage: experiments
|
|
105
|
+
.map(p => p.usage)
|
|
106
|
+
.reduce((acc, cur) => core_1.AgenticaTokenUsage.plus(acc, cur), core_1.AgenticaTokenUsage.zero()),
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Report the benchmark result as markdown files.
|
|
112
|
+
*
|
|
113
|
+
* Report the benchmark result {@link execute}d by
|
|
114
|
+
* `AgenticaCallBenchmark` as markdown files, and returns a dictionary
|
|
115
|
+
* object of the markdown reporting files. The key of the dictionary
|
|
116
|
+
* would be file name, and the value would be the markdown content.
|
|
117
|
+
*
|
|
118
|
+
* For reference, the markdown files are composed like below:
|
|
119
|
+
*
|
|
120
|
+
* - `./README.md`
|
|
121
|
+
* - `./scenario-1/README.md`
|
|
122
|
+
* - `./scenario-1/1.success.md`
|
|
123
|
+
* - `./scenario-1/2.failure.md`
|
|
124
|
+
* - `./scenario-1/3.error.md`
|
|
125
|
+
*
|
|
126
|
+
* @returns Dictionary of markdown files.
|
|
127
|
+
*/
|
|
128
|
+
report() {
|
|
129
|
+
if (this.result_ === null) {
|
|
130
|
+
throw new Error("Benchmark is not executed yet.");
|
|
131
|
+
}
|
|
132
|
+
return AgenticaCallBenchmarkReporter_1.AgenticaCallBenchmarkReporter.markdown(this.result_);
|
|
133
|
+
}
|
|
134
|
+
step(scenario) {
|
|
135
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
136
|
+
const agent = this.agent_.clone();
|
|
137
|
+
const started_at = new Date();
|
|
138
|
+
const success = () => AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.success({
|
|
139
|
+
expected: scenario.expected,
|
|
140
|
+
operations: agent
|
|
141
|
+
.getHistories()
|
|
142
|
+
.filter(p => p.type === "execute")
|
|
143
|
+
.map(p => p.operation),
|
|
144
|
+
strict: false,
|
|
145
|
+
});
|
|
146
|
+
const out = () => {
|
|
147
|
+
const select = AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.success({
|
|
148
|
+
expected: scenario.expected,
|
|
149
|
+
operations: agent
|
|
150
|
+
.getHistories()
|
|
151
|
+
.filter(p => p.type === "execute")
|
|
152
|
+
.map(p => p.operation),
|
|
153
|
+
strict: false,
|
|
154
|
+
});
|
|
155
|
+
const call = success();
|
|
156
|
+
return {
|
|
157
|
+
type: (call ? "success" : "failure"),
|
|
158
|
+
scenario,
|
|
159
|
+
select,
|
|
160
|
+
call,
|
|
161
|
+
prompts: agent.getHistories(),
|
|
162
|
+
usage: agent.getTokenUsage(),
|
|
163
|
+
started_at,
|
|
164
|
+
completed_at: new Date(),
|
|
165
|
+
};
|
|
166
|
+
};
|
|
167
|
+
try {
|
|
168
|
+
yield agent.conversate(scenario.text);
|
|
169
|
+
if (success()) {
|
|
170
|
+
return out();
|
|
171
|
+
}
|
|
172
|
+
for (let i = 0; i < this.config_.consent; ++i) {
|
|
173
|
+
const next = yield AgenticaBenchmarkPredicator_1.AgenticaBenchmarkPredicator.isNext(agent);
|
|
174
|
+
if (next === null) {
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
yield agent.conversate(next);
|
|
178
|
+
if (success()) {
|
|
179
|
+
return out();
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return out();
|
|
183
|
+
}
|
|
184
|
+
catch (error) {
|
|
185
|
+
return {
|
|
186
|
+
type: "error",
|
|
187
|
+
scenario,
|
|
188
|
+
prompts: agent.getHistories(),
|
|
189
|
+
usage: agent.getTokenUsage(),
|
|
190
|
+
error,
|
|
191
|
+
started_at,
|
|
192
|
+
completed_at: new Date(),
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
exports.MicroAgenticaCallBenchmark = MicroAgenticaCallBenchmark;
|
|
199
|
+
//# sourceMappingURL=MicroAgenticaCallBenchmark.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MicroAgenticaCallBenchmark.js","sourceRoot":"","sources":["../src/MicroAgenticaCallBenchmark.ts"],"names":[],"mappings":";;;;;;;;;;;;AAIA;;;;;GAKG;AACH,yCAAoD;AACpD,+BAAiC;AAMjC,wFAAqF;AACrF,4FAAyF;AAEzF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAa,0BAA0B;IAMrC;;;;OAIG;IACH,YAAmB,KAA+C;;QAChE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1C,IAAI,CAAC,OAAO,GAAG;YACb,MAAM,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,MAAM,mCAAI,EAAE;YAClC,YAAY,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,YAAY,mCAAI,EAAE;YAC9C,OAAO,EAAE,MAAA,MAAA,KAAK,CAAC,MAAM,0CAAE,OAAO,mCAAI,CAAC;SACpC,CAAC;QACF,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACU,OAAO,CAClB,QAA8D;;YAE9D,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,SAAS,GAAc,IAAI,gBAAS,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACtE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAO,QAAQ,EAAE,EAAE;gBAClD,MAAM,MAAM,GACR,MAAM,OAAO,CAAC,GAAG,CACjB,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,GAAS,EAAE;oBACzD,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAC1B,MAAM,CAAC,GACH,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBAC9B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;oBAE1B,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;wBAC3B,QAAQ,CAAC,CAAC,CAAC,CAAC;oBACd,CAAC;oBAED,OAAO,CAAC,CAAC;gBACX,CAAC,CAAA,CAAC,CACH,CAAC;gBACJ,OAAO;oBACL,QAAQ;oBACR,MAAM;oBACN,KAAK,EAAE,MAAM;yBACV,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC;yBAC/B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;yBACjB,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,yBAAkB,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,yBAAkB,CAAC,IAAI,EAAE,CAAC;iBACtF,CAAC;YACJ,CAAC,CAAA,CAAC,CAAC;YACH,MAAM,WAAW,GACb,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC5B,OAAO,CAAC,IAAI,CAAC,OAAO,GAAG;gBACrB,WAAW;gBACX,UAAU;gBACV,YAAY,EAAE,IAAI,IAAI,EAAE;gBACxB,KAAK,EAAE,WAAW;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;qBACjB,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,yBAAkB,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,yBAAkB,CAAC,IAAI,EAAE,CAAC;aACtF,CAAC,CAAC;QACL,CAAC;KAAA;IAED;;;;;;;;;;;;;;;;;OAiBG;IACI,MAAM;QACX,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;QACD,OAAO,6DAA6B,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC9D,CAAC;IAEa,IAAI,CAChB,QAA+C;;YAE/C,MAAM,KAAK,GAAyB,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACxD,MAAM,UAAU,GAAS,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,OAAO,GAAG,GAAG,EAAE,CACnB,yDAA2B,CAAC,OAAO,CAAC;gBAClC,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,UAAU,EAAE,KAAK;qBACd,YAAY,EAAE;qBACd,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC;qBACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;gBACxB,MAAM,EAAE,KAAK;aACd,CAAC,CAAC;YACL,MAAM,GAAG,GAAG,GAAuC,EAAE;gBACnD,MAAM,MAAM,GAAG,yDAA2B,CAAC,OAAO,CAAC;oBACjD,QAAQ,EAAE,QAAQ,CAAC,QAAQ;oBAC3B,UAAU,EAAE,KAAK;yBACd,YAAY,EAAE;yBACd,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC;yBACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;oBACxB,MAAM,EAAE,KAAK;iBACd,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;gBACvB,OAAO;oBACL,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAc;oBACjD,QAAQ;oBACR,MAAM;oBACN,IAAI;oBACJ,OAAO,EAAE,KAAK,CAAC,YAAY,EAAE;oBAC7B,KAAK,EAAE,KAAK,CAAC,aAAa,EAAE;oBAC5B,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBAC6B,CAAC;YAC1D,CAAC,CAAC;YAEF,IAAI,CAAC;gBACH,MAAM,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACtC,IAAI,OAAO,EAAE,EAAE,CAAC;oBACd,OAAO,GAAG,EAAE,CAAC;gBACf,CAAC;gBAED,KAAK,IAAI,CAAC,GAAW,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC;oBACtD,MAAM,IAAI,GACN,MAAM,yDAA2B,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACpD,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;wBAClB,MAAM;oBACR,CAAC;oBAED,MAAM,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,OAAO,EAAE,EAAE,CAAC;wBACd,OAAO,GAAG,EAAE,CAAC;oBACf,CAAC;gBACH,CAAC;gBACD,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,OAAO,KAAK,EAAE,CAAC;gBACb,OAAO;oBACL,IAAI,EAAE,OAAO;oBACb,QAAQ;oBACR,OAAO,EAAE,KAAK,CAAC,YAAY,EAAE;oBAC7B,KAAK,EAAE,KAAK,CAAC,aAAa,EAAE;oBAC5B,KAAK;oBACL,UAAU;oBACV,YAAY,EAAE,IAAI,IAAI,EAAE;iBACzB,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF;AA7KD,gEA6KC"}
|
package/lib/index.d.ts
CHANGED
package/lib/index.js
CHANGED
|
@@ -15,5 +15,6 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
__exportStar(require("./AgenticaCallBenchmark"), exports);
|
|
18
|
+
__exportStar(require("./MicroAgenticaCallBenchmark"), exports);
|
|
18
19
|
__exportStar(require("./AgenticaSelectBenchmark"), exports);
|
|
19
20
|
//# sourceMappingURL=index.js.map
|
package/lib/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,0DAAwC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,0DAAwC;AACxC,+DAA6C;AAC7C,4DAA0C"}
|
package/lib/index.mjs
CHANGED
|
@@ -258,17 +258,25 @@ const AgenticaPromptReporter = {
|
|
|
258
258
|
};
|
|
259
259
|
|
|
260
260
|
function markdown$2(p) {
|
|
261
|
+
if (p.type === "user_input") {
|
|
262
|
+
return [ `### User Input`, p.contents, "" ].join("\n");
|
|
263
|
+
}
|
|
261
264
|
if (p.type === "text") {
|
|
262
265
|
return [ `### Text (${p.role})`, p.text, "" ].join("\n");
|
|
263
|
-
}
|
|
266
|
+
}
|
|
267
|
+
if (p.type === "select" || p.type === "cancel") {
|
|
264
268
|
return [ `### ${p.type === "select" ? "Select" : "Cancel"}`, ...p.selections.flatMap((s => {
|
|
265
269
|
const functionDescriptionCount = s.operation.function.description?.length ?? 0;
|
|
266
270
|
return [ `#### ${s.operation.name}`, ` - controller: ${s.operation.controller.name}`, ` - function: ${s.operation.function.name}`, ` - reason: ${s.reason}`, "", ...functionDescriptionCount > 0 ? [ s.operation.function.description, "" ] : [] ];
|
|
267
271
|
})) ].join("\n");
|
|
268
|
-
}
|
|
272
|
+
}
|
|
273
|
+
if (p.type === "describe") {
|
|
269
274
|
return [ "### Describe", ...p.executes.map((e => ` - ${e.operation.name}`)), "", ...p.text.split("\n").map((s => `> ${s}`)), "" ].join("\n");
|
|
270
275
|
}
|
|
271
|
-
|
|
276
|
+
if (p.type === "execute") {
|
|
277
|
+
return [ "### Execute", ` - name: ${p.operation.name}`, ` - controller: ${p.operation.controller.name}`, ` - function: ${p.operation.function.name}`, "", "```json", JSON.stringify(p.arguments, null, 2), "```", "" ].join("\n");
|
|
278
|
+
}
|
|
279
|
+
throw new Error("Invalid history type");
|
|
272
280
|
}
|
|
273
281
|
|
|
274
282
|
const AgenticaCallBenchmarkReporter = {
|
|
@@ -405,6 +413,108 @@ class AgenticaCallBenchmark {
|
|
|
405
413
|
}
|
|
406
414
|
}
|
|
407
415
|
|
|
416
|
+
class MicroAgenticaCallBenchmark {
|
|
417
|
+
constructor(props) {
|
|
418
|
+
this.agent_ = props.agent;
|
|
419
|
+
this.scenarios_ = props.scenarios.slice();
|
|
420
|
+
this.config_ = {
|
|
421
|
+
repeat: props.config?.repeat ?? 10,
|
|
422
|
+
simultaneous: props.config?.simultaneous ?? 10,
|
|
423
|
+
consent: props.config?.consent ?? 3
|
|
424
|
+
};
|
|
425
|
+
this.result_ = null;
|
|
426
|
+
}
|
|
427
|
+
async execute(listener) {
|
|
428
|
+
const started_at = new Date;
|
|
429
|
+
const semaphore = new Semaphore(this.config_.simultaneous);
|
|
430
|
+
const task = this.scenarios_.map((async scenario => {
|
|
431
|
+
const events = await Promise.all(Array.from({
|
|
432
|
+
length: this.config_.repeat
|
|
433
|
+
}).map((async () => {
|
|
434
|
+
await semaphore.acquire();
|
|
435
|
+
const e = await this.step(scenario);
|
|
436
|
+
await semaphore.release();
|
|
437
|
+
if (listener !== undefined) {
|
|
438
|
+
listener(e);
|
|
439
|
+
}
|
|
440
|
+
return e;
|
|
441
|
+
})));
|
|
442
|
+
return {
|
|
443
|
+
scenario,
|
|
444
|
+
events,
|
|
445
|
+
usage: events.filter((e => e.type !== "error")).map((e => e.usage)).reduce(((acc, cur) => AgenticaTokenUsage.plus(acc, cur)), AgenticaTokenUsage.zero())
|
|
446
|
+
};
|
|
447
|
+
}));
|
|
448
|
+
const experiments = await Promise.all(task);
|
|
449
|
+
return this.result_ = {
|
|
450
|
+
experiments,
|
|
451
|
+
started_at,
|
|
452
|
+
completed_at: new Date,
|
|
453
|
+
usage: experiments.map((p => p.usage)).reduce(((acc, cur) => AgenticaTokenUsage.plus(acc, cur)), AgenticaTokenUsage.zero())
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
report() {
|
|
457
|
+
if (this.result_ === null) {
|
|
458
|
+
throw new Error("Benchmark is not executed yet.");
|
|
459
|
+
}
|
|
460
|
+
return AgenticaCallBenchmarkReporter.markdown(this.result_);
|
|
461
|
+
}
|
|
462
|
+
async step(scenario) {
|
|
463
|
+
const agent = this.agent_.clone();
|
|
464
|
+
const started_at = new Date;
|
|
465
|
+
const success = () => AgenticaBenchmarkPredicator.success({
|
|
466
|
+
expected: scenario.expected,
|
|
467
|
+
operations: agent.getHistories().filter((p => p.type === "execute")).map((p => p.operation)),
|
|
468
|
+
strict: false
|
|
469
|
+
});
|
|
470
|
+
const out = () => {
|
|
471
|
+
const select = AgenticaBenchmarkPredicator.success({
|
|
472
|
+
expected: scenario.expected,
|
|
473
|
+
operations: agent.getHistories().filter((p => p.type === "execute")).map((p => p.operation)),
|
|
474
|
+
strict: false
|
|
475
|
+
});
|
|
476
|
+
const call = success();
|
|
477
|
+
return {
|
|
478
|
+
type: call ? "success" : "failure",
|
|
479
|
+
scenario,
|
|
480
|
+
select,
|
|
481
|
+
call,
|
|
482
|
+
prompts: agent.getHistories(),
|
|
483
|
+
usage: agent.getTokenUsage(),
|
|
484
|
+
started_at,
|
|
485
|
+
completed_at: new Date
|
|
486
|
+
};
|
|
487
|
+
};
|
|
488
|
+
try {
|
|
489
|
+
await agent.conversate(scenario.text);
|
|
490
|
+
if (success()) {
|
|
491
|
+
return out();
|
|
492
|
+
}
|
|
493
|
+
for (let i = 0; i < this.config_.consent; ++i) {
|
|
494
|
+
const next = await AgenticaBenchmarkPredicator.isNext(agent);
|
|
495
|
+
if (next === null) {
|
|
496
|
+
break;
|
|
497
|
+
}
|
|
498
|
+
await agent.conversate(next);
|
|
499
|
+
if (success()) {
|
|
500
|
+
return out();
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
return out();
|
|
504
|
+
} catch (error) {
|
|
505
|
+
return {
|
|
506
|
+
type: "error",
|
|
507
|
+
scenario,
|
|
508
|
+
prompts: agent.getHistories(),
|
|
509
|
+
usage: agent.getTokenUsage(),
|
|
510
|
+
error,
|
|
511
|
+
started_at,
|
|
512
|
+
completed_at: new Date
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
408
518
|
const AgenticaSelectBenchmarkReporter = {
|
|
409
519
|
markdown
|
|
410
520
|
};
|
|
@@ -487,9 +597,11 @@ class AgenticaSelectBenchmark {
|
|
|
487
597
|
try {
|
|
488
598
|
const usage = AgenticaTokenUsage.zero();
|
|
489
599
|
const context = this.agent_.getContext({
|
|
490
|
-
prompt: factory.
|
|
491
|
-
|
|
492
|
-
|
|
600
|
+
prompt: factory.createUserInputHistory({
|
|
601
|
+
contents: [ {
|
|
602
|
+
type: "text",
|
|
603
|
+
text: scenario.text
|
|
604
|
+
} ]
|
|
493
605
|
}),
|
|
494
606
|
usage
|
|
495
607
|
});
|
|
@@ -512,7 +624,7 @@ class AgenticaSelectBenchmark {
|
|
|
512
624
|
scenario,
|
|
513
625
|
selected,
|
|
514
626
|
usage,
|
|
515
|
-
assistantPrompts: histories.filter((p => p.type === "text"))
|
|
627
|
+
assistantPrompts: histories.filter((p => p.type === "text")),
|
|
516
628
|
started_at,
|
|
517
629
|
completed_at: new Date
|
|
518
630
|
};
|
|
@@ -528,5 +640,5 @@ class AgenticaSelectBenchmark {
|
|
|
528
640
|
}
|
|
529
641
|
}
|
|
530
642
|
|
|
531
|
-
export { AgenticaCallBenchmark, AgenticaSelectBenchmark };
|
|
643
|
+
export { AgenticaCallBenchmark, AgenticaSelectBenchmark, MicroAgenticaCallBenchmark };
|
|
532
644
|
//# sourceMappingURL=index.mjs.map
|
package/lib/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","sources":["../src/internal/AgenticaBenchmarkPredicator.ts","../src/utils/MathUtil.ts","../src/internal/AgenticaBenchmarkUtil.ts","../src/internal/AgenticaPromptReporter.ts","../src/internal/AgenticaCallBenchmarkReporter.ts","../src/AgenticaCallBenchmark.ts","../src/internal/AgenticaSelectBenchmarkReporter.ts","../src/AgenticaSelectBenchmark.ts"],"sourcesContent":[null,null,null,null,null,null,null,null],"names":["AgenticaBenchmarkPredicator","isNext","success","async","agent","last","getHistories","at","llmVendor","getVendor","isTextPrompt","type","role","consent","functions","result","api","chat","completions","create","model","messages","content","join","text","tools","function","name","description","parameters","tool_choice","parallel_tool_calls","options","toolCall","choices","message","tool_calls","filter","tc","undefined","input","arguments","reply","props","successInner","call","expected","overrideOperations","operations","strict","take","targetIterator","items","Symbol","iterator","targeted","next","done","length","value","slice","target","operation","some","op","anyOf","callResult","allOf","map","every","r","reduce","acc","Math","max","MathUtil","round","floor","AgenticaBenchmarkUtil","errorToJson","expectedToJson","error","Error","stack","AgenticaPromptReporter","markdown","p","selections","flatMap","s","functionDescriptionCount","controller","reason","executes","e","split","JSON","stringify","AgenticaCallBenchmarkReporter","Object","fromEntries","writeIndex","experiments","exp","scenario","writeExperimentIndex","events","event","i","writeExperimentEvent","flat","average","completed_at","getTime","started_at","a","b","aggregate","usage","toLocaleString","total","cached","output","reasoning","accepted_prediction","rejected_prediction","drawStatus","select","index","prompts","count","Array","from","fill","AgenticaCallBenchmark","constructor","this","agent_","scenarios_","scenarios","config_","repeat","config","simultaneous","result_","execute","listener","Date","semaphore","Semaphore","task","Promise","all","acquire","step","release","cur","AgenticaTokenUsage","plus","zero","report","clone","out","getTokenUsage","conversate","AgenticaSelectBenchmarkReporter","selected","AgenticaSelectBenchmark","histories_","context","getContext","prompt","factory","createTextHistory","executor","TypeError","histories","orchestrate","ready","dispatch","assistantPrompts"],"mappings":";;;;;;;;AAeO,MAAMA,8BAA8B;IACzCC;IACAC;;;AAmCFC,eAAeF,OAAuCG;IACpD,MAAMC,OAA2CD,MAC9CE,eACAC,IAAG;IAQN,MAAMC,YAAYJ,MAAMK;IACxB,MAAMC,eAAeL,MAAMM,SAAS,UAAUN,KAAKO,SAAS;IAC5D,KAAKF,cAAc;QACjB,OAAO;;IAGT,MAAMG,UAAmC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAGrCC,UAAU;IACd,MAAMC,eAAsCP,UAAUQ,IAAIC,KAAKC,YAAYC,OACzE;QACEC,OAAOZ,UAAUY;QACjBC,UAAU,EACR;YACET,MAAM;YACNU,SAAS,EACP,iCACA,IACA,uDACA,gEACA,qDACAC,KAAK;WAET;YACEX,MAAM;YACNU,SAASjB,KAAKmB;;QAGlBC,OAAO,EACL;YACEd,MAAM;YACNe,UAAU;gBACRC,MAAMd,QAAQc;gBACdC,aAAaf,QAAQe;gBACrBC,YAAYhB,QAAQgB;;;QAI1BC,aAAa;QACbC,qBAAqB;OAEvBvB,UAAUwB;IAGZ,MAAMC,YACJlB,OAAOmB,QAAQ,IAAIC,QAAQC,cAAc,IACzCC,QACAC,MAAMA,GAAG3B,SAAS,cAAc2B,GAAGZ,SAASC,SAASd,QAAQc,SAC3D;IAEJ,IAAIM,aAAaM,WAAW;QAC1B,OAAO;;IAGT,MAAMC;;;;;;;SAA0CP,SAASP,SAASe;IAClE,OAAOD,UAAU,OAAOA,MAAME,QAAQ;AACxC;;AAWM,SAAUxC,QAAwCyC;IAoBtD,OAAOC,aAAaD,OAAO5B;AAC7B;;AAEA,SAAS6B,aAA6CD;IAQpD,MAAME,OAAO,CACXC,UACAC,uBAEAH,aAAa;QACXE;QACAE,YAAYD,sBAAsBJ,MAAMK;QACxCC,QAAQN,MAAMM;;IAGlB,QAAQN,MAAMG,SAASnC;MACrB,KAAK;QAAS;YACZ,IAAIuC,OAAO;YACX,MAAMC,iBAAiBR,MAAMG,SAASM,MAAMC,OAAOC;YACnD,IAAIC,WAAWJ,eAAeK;YAE9B,OAAO,MAAM;gBACX,IAAID,SAASE,SAAS,MAAM;oBAC1B,OAAO;wBACL1C,QAAQ;wBACRmC;;;gBAGJ,IAAIA,QAAQP,MAAMK,WAAWU,QAAQ;oBACnC,OAAO;wBAAE3C,QAAQ;;;gBAGnB,MAAMA,SAAS8B,KAAKU,SAASI,OAAOhB,MAAMK,WAAWY,MAAMV;gBAC3D,KAAKnC,OAAOA,QAAQ;oBAClB,IAAI4B,MAAMM,WAAW,MAAM;wBACzB,OAAO;4BAAElC,QAAQ;;;oBAEnBmC,QAAQ;oBACR;;gBAGFA,QAAQnC,OAAOmC;gBACfK,WAAWJ,eAAeK;;;;MAG9B,KAAK;QAAc;YACjB,MAAMK,SAASlB,MAAMG,SAASgB;YAC9B,MAAM/C,SAAS4B,MAAMK,WAAWe,MAAKC,MAAMA,GAAGrC,SAASkC,OAAOlC;YAC9D,IAAIZ,QAAQ;gBACV,OAAO;oBAAEA;oBAAQmC,MAAM;;;YAEzB,OAAO;gBACLnC;;;;MAGJ,KAAK;QACH,KAAK,MAAM+B,YAAYH,MAAMG,SAASmB,OAAO;YAC3C,MAAMC,aAAarB,KAAKC;YACxB,IAAIoB,WAAWnD,QAAQ;gBACrB,OAAOmD;;;QAIX,OAAO;YAAEnD,QAAQ;;;MACnB,KAAK;QAAS;YAQZ,MAAMA,SAAS4B,MAAMG,SAASqB,MAAMC,KAAItB,YAAYD,KAAKC;YACzD,IAAI/B,OAAOsD,OAAMC,KAAKA,EAAEvD,UAAS;gBAC/B,OAAO;oBACLA,QAAQ;oBACRmC,MAAMnC,OAAOwD,QAAO,CAACC,KAAKF,MAAMG,KAAKC,IAAIF,KAAKF,EAAEpB,QAAO;;;YAI3D,OAAO;gBACLnC,QAAQ;;;;AAIhB;;AC3OO,MAAM4D,WAAW;IAOtBC,OAAQjB,SAA0Bc,KAAKI,MAAMlB,QAAQ,OAAO;;;ACJvD,MAAMmB,wBAAwB;IACnCC;IACAC;;;AAGF,SAASD,YAAeE;IAOtB,IAAIA,iBAAiBC,OAAO;QAC1B,OAAO;eACFD;YACHtD,MAAMsD,MAAMtD;YACZQ,SAAS8C,MAAM9C;YACfgD,OAAOF,MAAME;;;IAGjB,OAAOF;AACT;;AAEA,SAASD,eAA+ClC;IACtD,IAAIA,SAASnC,SAAS,cAAc;QAClC,OAAO;YACLA,MAAMmC,SAASnC;YACfmD,WAAW;gBACTnC,MAAMmB,SAASgB,UAAUnC;gBACzBC,aAAakB,SAASgB,UAAUpC,SAASE;;;WAI1C,IAAIkB,SAASnC,SAAS,SAAS;QAClC,OAAO;YACLA,MAAMmC,SAASnC;YACfyC,OAAON,SAASM,MAAMgB,IAAIY;;WAGzB,IAAIlC,SAASnC,SAAS,SAAS;QAClC,OAAO;YACLA,MAAMmC,SAASnC;YACfwD,OAAOrB,SAASqB,MAAMC,IAAIY;;WAGzB;QACH,OAAO;YACLrE,MAAMmC,SAASnC;YACfsD,OAAOnB,SAASmB,MAAMG,IAAIY;;;AAGhC;;ACpDO,MAAMI,yBAAyB;cACpCC;;;AAGF,SAASA,WAAyCC;IAEhD,IAAIA,EAAE3E,SAAS,QAAQ;QACrB,OAAO,EAAC,aAAa2E,EAAE1E,SAAS0E,EAAE9D,MAAM,KAAID,KAAK;WAE9C,IAAI+D,EAAE3E,SAAS,YAAY2E,EAAE3E,SAAS,UAAU;QACnD,OAAO,EACL,OAAO2E,EAAE3E,SAAS,WAAW,WAAW,eACrC2E,EAAEC,WACFC,SAASC;YACR,MAAMC,2BAA2BD,EAAE3B,UAAUpC,SAASE,aAAa8B,UAAU;YAE7E,OAAO,EACL,QAAQ+B,EAAE3B,UAAUnC,QACpB,mBAAmB8D,EAAE3B,UAAU6B,WAAWhE,QAC1C,iBAAiB8D,EAAE3B,UAAUpC,SAASC,QACtC,eAAe8D,EAAEG,UACjB,OACIF,2BAA2B,IAC3B,EAACD,EAAE3B,UAAUpC,SAASE,aAAa,OACnC;AACL,cAELL,KAAK;WAEJ,IAAI+D,EAAE3E,SAAS,YAAY;QAC9B,OAAO,EACL,mBACG2E,EAAEO,SAASzB,KAAI0B,KAAK,OAAOA,EAAEhC,UAAUnC,UAC1C,OACG2D,EAAE9D,KAAKuE,MAAM,MAAM3B,KAAIqB,KAAK,KAAKA,OACpC,KACAlE,KAAK;;IAET,OAAO,EACL,eACA,aAAa+D,EAAExB,UAAUnC,QACzB,mBAAmB2D,EAAExB,UAAU6B,WAAWhE,QAC1C,iBAAiB2D,EAAExB,UAAUpC,SAASC,QACtC,IACA,WACAqE,KAAKC,UAAUX,EAAE7C,WAAW,MAAM,IAClC,OACA,KACAlB,KAAK;AACT;;ACzCO,MAAM2E,gCAAgC;cAC3Cb;;;AAGI,SAAUA,WAAyCtE;IACvD,OAAOoF,OAAOC,YAAY,EACxB,EAAC,eAAeC,aAAkBtF,cAC/BA,OAAOuF,YACPlC,KAAwBmC,OAAO,EAC9B,EAAC,KAAKA,IAAIC,SAAS7E,kBAAkB8E,uBAAqBF,WACvDA,IAAIG,OAAOtC,KAAsB,CAACuC,OAAOC,MAAM,EAChD,KAAKL,IAAIC,SAAS7E,QAAQiF,IAAI,KAAKD,MAAMhG,WACzCkG,uBAAqBF,OAAOC,WAG/BE;AAEP;;AAEA,SAAST,aAA2CtF;IAClD,MAAM2F,SAA+C3F,OAAOuF,YACzDlC,KAAIE,KAAKA,EAAEoC,SACXI;IACH,MAAMC,UACFL,OACCtC,KAAI0B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD1C,QAAO,CAAC4C,GAAGC,MAAMD,IAAIC,IAAG,KAAKV,OAAOhD;IACzC,MAAM2D,YAA2CtG,OAAOuG,MAAMD;IAC9D,OAAO,EACL,iCACA,cACA,oBACA,qBAAqBtG,OAAOuF,YAAY5C,OAAO6D,oBAC/C,gBAAgBb,OAAOhD,UACvB,kBAAkBgD,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC3D,kBAAkBgD,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC3D,uBAAuBiB,SAASC,MAAMmC,SAASQ,uBAC/C,mBACA,gBAAgBF,UAAUG,MAAMD,oBAChC,eACA,kBAAkBF,UAAU7E,MAAMgF,MAAMD,oBACxC,mBAAmBF,UAAU7E,MAAMiF,OAAOF,oBAC1C,iBACA,kBAAkBF,UAAUK,OAAOF,MAAMD,oBACzC,sBAAsBF,UAAUK,OAAOC,UAAUJ,oBACjD,gCAAgCF,UAAUK,OAAOE,oBAAoBL,oBACrE,gCAAgCF,UAAUK,OAAOG,oBAAoBN,oBACrE,IACA,kBACA,qCACA,yCACGxG,OAAOuF,YAAYlC,KAAImC,OACxB,EACE,IAAIA,IAAIC,SAAS7E,WAAW4E,IAAIC,SAAS7E,mBACzCmG,WACEvB,IAAIG,SACJZ,KAAKA,EAAEnF,SAAS,WAAWmF,EAAEiC,WAAW,QAE1CD,WAAWvB,IAAIG,SAAQZ,KAAKA,EAAEnF,SAAS,WAAWmF,EAAEjD,SAAS,QAC7D,GAAG8B,SAASC,MACV2B,IAAIG,OACDtC,KAAI0B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD1C,QAAO,CAAC4C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOhD,QAC3C6D,wBACFhG,KAAK,WAETA,KAAK;AACT;;AAEA,SAASkF,uBAAqDF;IAC5D,OAAO,EACL,KAAKA,IAAIC,SAAS7E,QAClB,cACA,mBAAmB4E,IAAIG,OAAOhD,OAAO6D,oBACrC,gBAAgBhB,IAAIG,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC7D,gBAAgB6C,IAAIG,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC7D,qBAAqBiB,SAASC,MAC5B2B,IAAIG,OACDtC,KAAI0B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD1C,QAAO,CAAC4C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOhD,QAC3C6D,uBACF,IACA,aACA,uBACA,0BACGhB,IAAIG,OAAOtC,KAAI,CAAC0B,GAAGc,MACpB,EACE,IAAIA,IAAI,SAASA,IAAI,KAAKd,EAAEnF,YAC5BmF,EAAEnF,MACF,GAAGgE,SAASC,MAAMkB,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,kBAC1D1F,KAAK,UAET,IACA,eACA,mBACAgF,IAAIC,SAAShF,MACb,IACA,gBACA,WACAwE,KAAKC,UACHnB,sBAAsBE,eAAeuB,IAAIC,SAAS1D,WAClD,MACA,IAEF,QACAvB,KAAK;AACT;;AAEA,SAASsF,uBAAqDF,OAA2CqB;IACvG,OAAO,EACL,KAAKA,QAAQ,MAAMrB,MAAMhG,QACzB,cACA,aAAagG,MAAMH,SAAS7E,QAC5B,aAAagF,MAAMhG,QACnB,aAAagE,SAASC,MACpB+B,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,WAChDM,0BACEZ,MAAMhG,SAAS,UACf,EACE,eAAegG,MAAMoB,SAAS,MAAM,OACpC,aAAapB,MAAM9D,OAAO,MAAM,UAElC,IACJ,oBACA,gBAAgBmD,KAAKC,UAAUU,MAAMW,MAAMD,UAAUG,UACrD,eACA,kBAAkBb,MAAMW,MAAMD,UAAU7E,MAAMgF,SAC9C,mBAAmBb,MAAMW,MAAMD,UAAU7E,MAAMiF,UAC/C,iBACA,kBAAkBd,MAAMW,MAAMD,UAAUK,OAAOF,SAC/C,gCAAgCb,MAAMW,MAAMD,UAAUK,OAAOE,uBAC7D,sBAAsBjB,MAAMW,MAAMD,UAAUK,OAAOC,aACnD,gCAAgChB,MAAMW,MAAMD,UAAUK,OAAOG,uBAE7D,IACA,eACA,mBACAlB,MAAMH,SAAShF,MACf,IACA,gBACA,WACAwE,KAAKC,UACHnB,sBAAsBE,eAAe2B,MAAMH,SAAS1D,WACpD,MACA,IAEF,OACA,IACA,0BACG6D,MAAMsB,QAAQ7D,IAAIgB,uBAAuBC,WAC5C,OACIsB,MAAMhG,SAAS,UACf,EACE,YACA,WACAqF,KAAKC,UACHnB,sBAAsBC,YAAY4B,MAAM1B,QACxC,MACA,IAEF,UAEF,KACJ1D,KAAK;AACT;;AAEA,SAASuG,WAA2CpB,QAA8CxG;IAChG,MAAMgI,QAAgBzD,KAAKI,MACxB6B,OAAOrE,OAAOnC,SAASwD,SAASgD,OAAOhD,SAAU;IAGpD,OACEyE,MAAMC,KAAK;QAAE1E,QAAQwE;OAASG,KAAK,KAAK9G,KAAK,MAC3C4G,MAAMC,KAAK;QAAE1E,QAAQ,KAAKwE;OAASG,KAAK,KAAK9G,KAAK;AAExD;;MCxJa+G;IAWX,WAAAC,CAAmB5F;QACjB6F,KAAKC,SAAS9F,MAAMvC;QACpBoI,KAAKE,aAAa/F,MAAMgG,UAAU/E;QAClC4E,KAAKI,UAAU;YACbC,QAAQlG,MAAMmG,QAAQD,UAAU;YAChCE,cAAcpG,MAAMmG,QAAQC,gBAAgB;YAC5ClI,SAAS8B,MAAMmG,QAAQjI,WAAW;;QAEpC2H,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMhC,aAAmB,IAAIiC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAMO,OAAOd,KAAKE,WAAWtE,KAAIjE,MAAOqG;YACtC,MAAME,eACI6C,QAAQC,IACdrB,MAAMC,KAAK;gBAAE1E,QAAQ8E,KAAKI,QAAQC;eAAUzE,KAAIjE;sBACxCiJ,UAAUK;gBAChB,MAAM3D,UACI0C,KAAKkB,KAAKlD;sBACd4C,UAAUO;gBAEhB,IAAIT,aAAa3G,WAAW;oBAC1B2G,SAASpD;;gBAGX,OAAOA;AAAC;YAGd,OAAO;gBACLU;gBACAE;gBACAY,OAAOZ,OACJrE,QAAOyD,KAAKA,EAAEnF,SAAS,UACvByD,KAAI0B,KAAKA,EAAEwB,QACX/C,QAAO,CAACC,KAAKoF,QAAQC,mBAAmBC,KAAKtF,KAAKoF,OAAMC,mBAAmBE;;AAC/E;QAEH,MAAMzD,oBACIiD,QAAQC,IAAIF;QACtB,OAAQd,KAAKQ,UAAU;YACrB1C;YACAY;YACAF,cAAc,IAAImC;YAClB7B,OAAOhB,YACJlC,KAAIkB,KAAKA,EAAEgC,QACX/C,QAAO,CAACC,KAAKoF,QAAQC,mBAAmBC,KAAKtF,KAAKoF,OAAMC,mBAAmBE;;;IAsB3E,MAAAC;QACL,IAAIxB,KAAKQ,YAAY,MAAM;YACzB,MAAM,IAAI9D,MAAM;;QAElB,OAAOgB,8BAA8Bb,SAASmD,KAAKQ;;IAG7C,UAAMU,CACZlD;QAEA,MAAMpG,QAAyBoI,KAAKC,OAAOwB;QAC3C,MAAM/C,aAAmB,IAAIiC;QAC7B,MAAMjJ,UAAU,MACdF,4BAA4BE,QAAQ;YAClC4C,UAAU0D,SAAS1D;YACnBE,YAAY5C,MACTE,eACA+B,QAAOiD,KAAKA,EAAE3E,SAAS,YACvByD,KAAIkB,KAAKA,EAAExB;YACdb,QAAQ;;QAEZ,MAAMiH,MAAM;YACV,MAAMnC,SAAS/H,4BAA4BE,QAAQ;gBACjD4C,UAAU0D,SAAS1D;gBACnBE,YAAY5C,MACTE,eACA+B,QAAOiD,KAAKA,EAAE3E,SAAS,WACvByD,KAAIkB,KAAKA,EAAEC,aACXuB,OACA1C,KAAIkB,KAAKA,EAAExB;gBACdb,QAAQ;;YAEV,MAAMJ,OAAO3C;YACb,OAAO;gBACLS,MAAOkC,OAAO,YAAY;gBAC1B2D;gBACAuB;gBACAlF;gBACAoF,SAAS7H,MAAME;gBACfgH,OAAOlH,MAAM+J;gBACbjD;gBACAF,cAAc,IAAImC;;AACmC;QAGzD;kBACQ/I,MAAMgK,WAAW5D,SAAShF;YAChC,IAAItB,WAAW;gBACb,OAAOgK;;YAGT,KAAK,IAAItD,IAAY,GAAGA,IAAI4B,KAAKI,QAAQ/H,WAAW+F,GAAG;gBACrD,MAAMpD,aACIxD,4BAA4BC,OAAOG;gBAC7C,IAAIoD,SAAS,MAAM;oBACjB;;sBAGIpD,MAAMgK,WAAW5G;gBACvB,IAAItD,WAAW;oBACb,OAAOgK;;;YAGX,OAAOA;UAET,OAAOjF;YACL,OAAO;gBACLtE,MAAM;gBACN6F;gBACAyB,SAAS7H,MAAME;gBACfgH,OAAOlH,MAAM+J;gBACblF;gBACAiC;gBACAF,cAAc,IAAImC;;;;;;AChMnB,MAAMkB,kCAAkC;IAC7ChF;;;AAGI,SAAUA,SAAyCtE;IACvD,MAAMuC,WAAW,EACf,EAAC,eAAe+C,WAAWtF,cACxBA,OAAOuF,YACPlC,KAAwBmC,OAAO,EAC9B,EAAC,KAAKA,IAAIC,SAAS7E,kBAAkB8E,qBAAqBF,WACvDA,IAAIG,OAAOtC,KAAsB,CAACuC,OAAOC,MAAM,EAChD,KAAKL,IAAIC,SAAS7E,QAAQiF,IAAI,KAAKD,MAAMhG,WACzCkG,qBAAqBF,OAAOC,WAG/BE;IAGL,OAAOX,OAAOC,YAAY9C;AAC5B;;AAEA,SAAS+C,WAA2CtF;IAClD,MAAM2F,SAAiD3F,OAAOuF,YAC3DlC,KAAIE,KAAKA,EAAEoC,SACXI;IACH,MAAMC,UACAL,OACCtC,KAAI0B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD1C,QAAO,CAAC4C,GAAGC,MAAMD,IAAIC,IAAG,KAAKV,OAAOhD;IAC3C,MAAM2D,YAA2CtG,OAAOuG,MAAMD;IAC9D,OAAO,EACL,sCACA,cACA,oBACA,qBAAqBtG,OAAOuF,YAAY5C,OAAO6D,oBAC/C,gBAAgBb,OAAOhD,UACvB,kBAAkBgD,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC3D,kBAAkBgD,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC3D,gBAAgBgD,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,UAAS+C,UACvD,uBAAuBiB,SAASC,MAAMmC,SAASQ,uBAC/C,mBACA,gBAAgBF,UAAUG,MAAMD,oBAChC,eACA,kBAAkBF,UAAU7E,MAAMgF,MAAMD,oBACxC,mBAAmBF,UAAU7E,MAAMiF,OAAOF,oBAC1C,iBACA,kBAAkBF,UAAUK,OAAOF,MAAMD,oBACzC,gCAAgCF,UAAUK,OAAOE,oBAAoBL,oBACrE,sBAAsBF,UAAUK,OAAOC,UAAUJ,oBACjD,gCAAgCF,UAAUK,OAAOG,oBAAoBN,oBACrE,IACA,kBACA,+BACA,kCACGxG,OAAOuF,YAAYlC,KAAImC,OACxB,EACE,IAAIA,IAAIC,SAAS7E,WAAW4E,IAAIC,SAAS7E,mBACzC;QACE,MAAMzB,UAAkBuE,KAAKI,MAC1B0B,IAAIG,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,SAC1C6C,IAAIG,OAAOhD,SACb;QAEJ,OACEyE,MAAMC,KAAK;YAAE1E,QAAQxD;WAAWmI,KAAK,KAAK9G,KAAK,MAC7C4G,MAAMC,KAAK;YAAE1E,QAAQ,KAAKxD;WAAWmI,KAAK,KAAK9G,KAAK;AAEzD,MAVD,IAWA,GAAGoD,SAASC,MACV2B,IAAIG,OACDtC,KACCuC,SACEA,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,YAEnD1C,QAAO,CAAC4C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOhD,QAC3C6D,wBACFhG,KAAK,WAETA,KAAK;AACT;;AAEA,SAASkF,qBAAqDF;IAC5D,MAAMc,YAA2Cd,IAAIe,MAAMD;IAC3D,OAAO,EACL,KAAKd,IAAIC,SAAS7E,QAClB,cACA,oBACA,gBAAgB4E,IAAIG,OAAOhD,UAC3B,kBAAkB6C,IAAIG,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC/D,kBAAkB6C,IAAIG,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,YAAW+C,UAC/D,gBAAgB6C,IAAIG,OAAOrE,QAAOyD,KAAKA,EAAEnF,SAAS,UAAS+C,UAC3D,uBAAuBiB,SAASC,MAC9B2B,IAAIG,OACDtC,KACCuC,SACEA,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,YAEnD1C,QAAO,CAAC4C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOhD,QAC3C6D,uBACF,mBACA,gBAAgBF,UAAUG,MAAMD,oBAChC,eACA,kBAAkBF,UAAU7E,MAAMgF,MAAMD,oBACxC,mBAAmBF,UAAU7E,MAAMiF,OAAOF,oBAC1C,iBACA,kBAAkBF,UAAUK,OAAOF,MAAMD,oBACzC,gCAAgCF,UAAUK,OAAOE,oBAAoBL,oBACrE,sBAAsBF,UAAUK,OAAOC,UAAUJ,oBACjD,gCAAgCF,UAAUK,OAAOG,oBAAoBN,oBACrE,IACA,aACA,qBACA,wBACGhB,IAAIG,OAAOtC,KAAI,CAAC0B,GAAGc,MACpB,EACE,IAAIA,IAAI,SAASA,IAAI,KAAKd,EAAEnF,YAC5BmF,EAAEnF,MACF,GAAGgE,SAASC,MAAMkB,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,kBAE1D1F,KAAK,UAET,IACA,eACA,mBACAgF,IAAIC,SAAShF,MACb,IACA,gBACA,WACAwE,KAAKC,UACHnB,sBAAsBE,eAAeuB,IAAIC,SAAS1D,WAClD,MACA,IAEF,QACAvB,KAAK;AACT;;AAEA,SAASsF,qBAAqDF,OAA6CqB;IACzG,OAAO,EACL,KAAKA,QAAQ,MAAMrB,MAAMhG,QACzB,cACA,aAAagG,MAAMH,SAAS7E,QAC5B,aAAagF,MAAMhG,QACnB,cAAcgG,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,WAAWM,0BACrEZ,MAAMhG,SAAS,UACf,EACE,mBACA,gBAAgBgG,MAAMW,MAAMD,UAAUG,MAAMD,oBAC5C,gBACA,kBAAkBZ,MAAMW,MAAMD,UAAU7E,MAAMgF,MAAMD,oBACpD,mBAAmBZ,MAAMW,MAAMD,UAAU7E,MAAMiF,OAAOF,oBACtD,qBACA,kBAAkBZ,MAAMW,MAAMD,UAAUK,OAAOF,MAAMD,oBACrD,sBAAsBZ,MAAMW,MAAMD,UAAUK,OAAOC,UAAUJ,oBAC7D,gCAAgCZ,MAAMW,MAAMD,UAAUK,OAAOE,oBAAoBL,oBACjF,gCAAgCZ,MAAMW,MAAMD,UAAUK,OAAOG,oBAAoBN,uBAEnF,IACJ,IACA,eACA,mBACAZ,MAAMH,SAAShF,MACf,IACA,gBACA,WACAwE,KAAKC,UACHnB,sBAAsBE,eAAe2B,MAAMH,SAAS1D,WACpD,MACA,IAEF,OACA,OACI6D,MAAMhG,SAAS,aAAagG,MAAMhG,SAAS,YAC3C,EACE,gBACGgG,MAAM2D,SAASlG,KAAIqB,KACpB,EACE,OAAOA,EAAE3B,UAAUnC,QACnB,qBAAqB8D,EAAE3B,UAAU6B,WAAWhE,UAC5C,mBAAmB8D,EAAE3B,UAAUpC,SAASC,UACxC,eAAe8D,EAAEG,UACjB,OACIH,EAAE3B,UAAUpC,SAASE,gBAAgBW,aAAakD,EAAE3B,UAAUpC,SAASE,gBAAgB,KACvF,EAAC6D,EAAE3B,UAAUpC,SAASE,aAAa,OACnC,KACJL,KAAK,YAGX,OACAoF,MAAMhG,SAAS,UACf,EACE,YACA,WACAmE,sBAAsBC,YACpBiB,KAAKC,UAAUU,MAAM1B,OAAO,MAAM,KAEpC,OACA,OAEF,KACJ1D,KAAK;AACT;;MCjLagJ;IAYX,WAAAhC,CAAmB5F;QACjB6F,KAAKC,SAAS9F,MAAMvC;QACpBoI,KAAKE,aAAa/F,MAAMgG,UAAU/E;QAClC4E,KAAKI,UAAU;YACbC,QAAQlG,MAAMmG,QAAQD,UAAU;YAChCE,cAAcpG,MAAMmG,QAAQC,gBAAgB;;QAE9CP,KAAKgC,aAAa7H,MAAMvC,MAAME,eAAesD;QAC7C4E,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMhC,aAAmB,IAAIiC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAMzC,oBACIiD,QAAQC,IACdhB,KAAKE,WAAWtE,KAAIjE,MAAOqG;YACzB,MAAME,eACI6C,QAAQC,IACdrB,MAAMC,KAAK;gBAAE1E,QAAQ8E,KAAKI,QAAQC;eAAUzE,KAAIjE;sBACxCiJ,UAAUK;gBAChB,MAAM3D,UACI0C,KAAKkB,KAAKlD;sBACd4C,UAAUO;gBAChB,IAAIT,aAAa3G,WAAW;oBAC1B2G,SAASpD;;gBAEX,OAAOA;AAAC;YAGd,OAAO;gBACLU;gBACAE;gBACAY,OAAOZ,OACJrE,QAAOyD,KAAKA,EAAEnF,SAAS,UACvByD,KAAI0B,KAAKA,EAAEwB,QACX/C,QAAO,CAACC,KAAKoF,QAAQC,mBAAmBC,KAAKtF,KAAKoF,OAAMC,mBAAmBE;;AAC/E;QAGP,OAAQvB,KAAKQ,UAAU;YACrB1C;YACAY;YACAF,cAAc,IAAImC;YAClB7B,OAAOhB,YACJlC,KAAIkB,KAAKA,EAAEgC,QACX/C,QAAO,CAACC,KAAKoF,QAAQC,mBAAmBC,KAAKtF,KAAKoF,OAAMC,mBAAmBE;;;IAuB3E,MAAAC;QACL,IAAIxB,KAAKQ,YAAY,MAAM;YACzB,MAAM,IAAI9D,MAAM;;QAElB,OAAOmF,gCAAgChF,SAASmD,KAAKQ;;IAG/C,UAAMU,CACZlD;QAEA,MAAMU,aAAmB,IAAIiC;QAC7B;YACE,MAAM7B,QAA4BuC,mBAAmBE;YACrD,MAAMU,UAAUjC,KAAKC,OAAOiC,WAAW;gBAErCC,QAAQC,QAAQC,kBAAkB;oBAChCjK,MAAM;oBACNY,MAAMgF,SAAShF;;gBAEjB8F;;YAEF,WAAWmD,QAAQ3B,QAAQgC,aAAa,YAAY;gBAClD,MAAM,IAAIC,UAAU;;YAGtB,MAAMC,mBACKP,QAAQ3B,QAAQgC,UAAU/C,UAAUkD,YAAYlD,QAAQ;mBAC5D0C;gBACHO,WAAWxC,KAAKgC,WAAW5G;gBAC3BuB,OAAO;gBACP+F,OAAO,MAAM;gBACbC,UAAUhL;;YAEd,MAAMmK,WAAgDU,UACnD3I,QAAOiD,KAAKA,EAAE3E,SAAS,WACvByD,KAAIkB,KAAKA,EAAEC,aACXuB;YACH,OAAO;gBACLnG,MAAMX,4BAA4BE,QAAQ;oBACxC4C,UAAU0D,SAAS1D;oBACnBE,YAAYsH,SAASlG,KAAIqB,KAAKA,EAAE3B;qBAE9B,YACA;gBACJ0C;gBACA8D;gBACAhD;gBACA8D,kBAAkBJ,UACf3I,QAAOiD,KAAKA,EAAE3E,SAAS,SACvB0B,QACEiD,KAA6CA,EAAE1E,SAAS;gBAE7DsG;gBACAF,cAAc,IAAImC;;UAKtB,OAAOlE;YACL,OAAO;gBACLtE,MAAM;gBACN6F;gBACAvB;gBACAiC;gBACAF,cAAc,IAAImC;;;;;;"}
|
|
1
|
+
{"version":3,"file":"index.mjs","sources":["../src/internal/AgenticaBenchmarkPredicator.ts","../src/utils/MathUtil.ts","../src/internal/AgenticaBenchmarkUtil.ts","../src/internal/AgenticaPromptReporter.ts","../src/internal/AgenticaCallBenchmarkReporter.ts","../src/AgenticaCallBenchmark.ts","../src/MicroAgenticaCallBenchmark.ts","../src/internal/AgenticaSelectBenchmarkReporter.ts","../src/AgenticaSelectBenchmark.ts"],"sourcesContent":[null,null,null,null,null,null,null,null,null],"names":["AgenticaBenchmarkPredicator","isNext","success","async","agent","last","getHistories","at","llmVendor","getVendor","isTextPrompt","type","role","consent","functions","result","api","chat","completions","create","model","messages","content","join","text","tools","function","name","description","parameters","tool_choice","parallel_tool_calls","options","toolCall","choices","message","tool_calls","filter","tc","undefined","input","arguments","reply","props","successInner","call","expected","overrideOperations","operations","strict","take","targetIterator","items","Symbol","iterator","targeted","next","done","length","value","slice","target","operation","some","op","anyOf","callResult","allOf","map","every","r","reduce","acc","Math","max","MathUtil","round","floor","AgenticaBenchmarkUtil","errorToJson","expectedToJson","error","Error","stack","AgenticaPromptReporter","markdown","p","contents","selections","flatMap","s","functionDescriptionCount","controller","reason","executes","e","split","JSON","stringify","AgenticaCallBenchmarkReporter","Object","fromEntries","writeIndex","experiments","exp","scenario","writeExperimentIndex","events","event","i","writeExperimentEvent","flat","average","completed_at","getTime","started_at","a","b","aggregate","usage","toLocaleString","total","cached","output","reasoning","accepted_prediction","rejected_prediction","drawStatus","select","index","prompts","count","Array","from","fill","AgenticaCallBenchmark","constructor","this","agent_","scenarios_","scenarios","config_","repeat","config","simultaneous","result_","execute","listener","Date","semaphore","Semaphore","task","Promise","all","acquire","step","release","cur","AgenticaTokenUsage","plus","zero","report","clone","out","getTokenUsage","conversate","MicroAgenticaCallBenchmark","AgenticaSelectBenchmarkReporter","selected","AgenticaSelectBenchmark","histories_","context","getContext","prompt","factory","createUserInputHistory","executor","TypeError","histories","orchestrate","ready","dispatch","assistantPrompts"],"mappings":";;;;;;;;AAeO,MAAMA,8BAA8B;IACzCC;IACAC;;;AAmCFC,eAAeF,OAAuCG;IACpD,MAAMC,OAA2CD,MAC9CE,eACAC,IAAG;IAQN,MAAMC,YAAYJ,MAAMK;IACxB,MAAMC,eAAeL,MAAMM,SAAS,UAAUN,KAAKO,SAAS;IAC5D,KAAKF,cAAc;QACjB,OAAO;;IAGT,MAAMG,UAAmC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAGrCC,UAAU;IACd,MAAMC,eAAsCP,UAAUQ,IAAIC,KAAKC,YAAYC,OACzE;QACEC,OAAOZ,UAAUY;QACjBC,UAAU,EACR;YACET,MAAM;YACNU,SAAS,EACP,iCACA,IACA,uDACA,gEACA,qDACAC,KAAK;WAET;YACEX,MAAM;YACNU,SAASjB,KAAKmB;;QAGlBC,OAAO,EACL;YACEd,MAAM;YACNe,UAAU;gBACRC,MAAMd,QAAQc;gBACdC,aAAaf,QAAQe;gBACrBC,YAAYhB,QAAQgB;;;QAI1BC,aAAa;QACbC,qBAAqB;OAEvBvB,UAAUwB;IAGZ,MAAMC,YACJlB,OAAOmB,QAAQ,IAAIC,QAAQC,cAAc,IACzCC,QACAC,MAAMA,GAAG3B,SAAS,cAAc2B,GAAGZ,SAASC,SAASd,QAAQc,SAC3D;IAEJ,IAAIM,aAAaM,WAAW;QAC1B,OAAO;;IAGT,MAAMC;;;;;;;SAA0CP,SAASP,SAASe;IAClE,OAAOD,UAAU,OAAOA,MAAME,QAAQ;AACxC;;AAWM,SAAUxC,QAAwCyC;IAoBtD,OAAOC,aAAaD,OAAO5B;AAC7B;;AAEA,SAAS6B,aAA6CD;IAQpD,MAAME,OAAO,CACXC,UACAC,uBAEAH,aAAa;QACXE;QACAE,YAAYD,sBAAsBJ,MAAMK;QACxCC,QAAQN,MAAMM;;IAGlB,QAAQN,MAAMG,SAASnC;MACrB,KAAK;QAAS;YACZ,IAAIuC,OAAO;YACX,MAAMC,iBAAiBR,MAAMG,SAASM,MAAMC,OAAOC;YACnD,IAAIC,WAAWJ,eAAeK;YAE9B,OAAO,MAAM;gBACX,IAAID,SAASE,SAAS,MAAM;oBAC1B,OAAO;wBACL1C,QAAQ;wBACRmC;;;gBAGJ,IAAIA,QAAQP,MAAMK,WAAWU,QAAQ;oBACnC,OAAO;wBAAE3C,QAAQ;;;gBAGnB,MAAMA,SAAS8B,KAAKU,SAASI,OAAOhB,MAAMK,WAAWY,MAAMV;gBAC3D,KAAKnC,OAAOA,QAAQ;oBAClB,IAAI4B,MAAMM,WAAW,MAAM;wBACzB,OAAO;4BAAElC,QAAQ;;;oBAEnBmC,QAAQ;oBACR;;gBAGFA,QAAQnC,OAAOmC;gBACfK,WAAWJ,eAAeK;;;;MAG9B,KAAK;QAAc;YACjB,MAAMK,SAASlB,MAAMG,SAASgB;YAC9B,MAAM/C,SAAS4B,MAAMK,WAAWe,MAAKC,MAAMA,GAAGrC,SAASkC,OAAOlC;YAC9D,IAAIZ,QAAQ;gBACV,OAAO;oBAAEA;oBAAQmC,MAAM;;;YAEzB,OAAO;gBACLnC;;;;MAGJ,KAAK;QACH,KAAK,MAAM+B,YAAYH,MAAMG,SAASmB,OAAO;YAC3C,MAAMC,aAAarB,KAAKC;YACxB,IAAIoB,WAAWnD,QAAQ;gBACrB,OAAOmD;;;QAIX,OAAO;YAAEnD,QAAQ;;;MACnB,KAAK;QAAS;YAQZ,MAAMA,SAAS4B,MAAMG,SAASqB,MAAMC,KAAItB,YAAYD,KAAKC;YACzD,IAAI/B,OAAOsD,OAAMC,KAAKA,EAAEvD,UAAS;gBAC/B,OAAO;oBACLA,QAAQ;oBACRmC,MAAMnC,OAAOwD,QAAO,CAACC,KAAKF,MAAMG,KAAKC,IAAIF,KAAKF,EAAEpB,QAAO;;;YAI3D,OAAO;gBACLnC,QAAQ;;;;AAIhB;;AC3OO,MAAM4D,WAAW;IAOtBC,OAAQjB,SAA0Bc,KAAKI,MAAMlB,QAAQ,OAAO;;;ACJvD,MAAMmB,wBAAwB;IACnCC;IACAC;;;AAGF,SAASD,YAAeE;IAOtB,IAAIA,iBAAiBC,OAAO;QAC1B,OAAO;eACFD;YACHtD,MAAMsD,MAAMtD;YACZQ,SAAS8C,MAAM9C;YACfgD,OAAOF,MAAME;;;IAGjB,OAAOF;AACT;;AAEA,SAASD,eAA+ClC;IACtD,IAAIA,SAASnC,SAAS,cAAc;QAClC,OAAO;YACLA,MAAMmC,SAASnC;YACfmD,WAAW;gBACTnC,MAAMmB,SAASgB,UAAUnC;gBACzBC,aAAakB,SAASgB,UAAUpC,SAASE;;;WAI1C,IAAIkB,SAASnC,SAAS,SAAS;QAClC,OAAO;YACLA,MAAMmC,SAASnC;YACfyC,OAAON,SAASM,MAAMgB,IAAIY;;WAGzB,IAAIlC,SAASnC,SAAS,SAAS;QAClC,OAAO;YACLA,MAAMmC,SAASnC;YACfwD,OAAOrB,SAASqB,MAAMC,IAAIY;;WAGzB;QACH,OAAO;YACLrE,MAAMmC,SAASnC;YACfsD,OAAOnB,SAASmB,MAAMG,IAAIY;;;AAGhC;;ACpDO,MAAMI,yBAAyB;cACpCC;;;AAGF,SAASA,WAAyCC;IAChD,IAAIA,EAAE3E,SAAS,cAAc;QAC3B,OAAO,EAAC,kBAAkB2E,EAAEC,UAAU,KAAIhE,KAAK;;IAEjD,IAAI+D,EAAE3E,SAAS,QAAQ;QACrB,OAAO,EAAC,aAAa2E,EAAE1E,SAAS0E,EAAE9D,MAAM,KAAID,KAAK;;IAGnD,IAAI+D,EAAE3E,SAAS,YAAY2E,EAAE3E,SAAS,UAAU;QAC9C,OAAO,EACL,OAAO2E,EAAE3E,SAAS,WAAW,WAAW,eACrC2E,EAAEE,WACFC,SAASC;YACR,MAAMC,2BAA2BD,EAAE5B,UAAUpC,SAASE,aAAa8B,UAAU;YAE7E,OAAO,EACL,QAAQgC,EAAE5B,UAAUnC,QACpB,mBAAmB+D,EAAE5B,UAAU8B,WAAWjE,QAC1C,iBAAiB+D,EAAE5B,UAAUpC,SAASC,QACtC,eAAe+D,EAAEG,UACjB,OACIF,2BAA2B,IAC3B,EAACD,EAAE5B,UAAUpC,SAASE,aAAa,OACnC;AACL,cAELL,KAAK;;IAGT,IAAI+D,EAAE3E,SAAS,YAAY;QACzB,OAAO,EACL,mBACG2E,EAAEQ,SAAS1B,KAAI2B,KAAK,OAAOA,EAAEjC,UAAUnC,UAC1C,OACG2D,EAAE9D,KAAKwE,MAAM,MAAM5B,KAAIsB,KAAK,KAAKA,OACpC,KACAnE,KAAK;;IAGT,IAAI+D,EAAE3E,SAAS,WAAW;QACxB,OAAO,EACL,eACA,aAAa2E,EAAExB,UAAUnC,QACzB,mBAAmB2D,EAAExB,UAAU8B,WAAWjE,QAC1C,iBAAiB2D,EAAExB,UAAUpC,SAASC,QACtC,IACA,WACAsE,KAAKC,UAAUZ,EAAE7C,WAAW,MAAM,IAClC,OACA,KACAlB,KAAK;;IAIT,MAAM,IAAI2D,MAAM;AAClB;;ACnDO,MAAMiB,gCAAgC;cAC3Cd;;;AAGI,SAAUA,WAAyCtE;IACvD,OAAOqF,OAAOC,YAAY,EACxB,EAAC,eAAeC,aAAkBvF,cAC/BA,OAAOwF,YACPnC,KAAwBoC,OAAO,EAC9B,EAAC,KAAKA,IAAIC,SAAS9E,kBAAkB+E,uBAAqBF,WACvDA,IAAIG,OAAOvC,KAAsB,CAACwC,OAAOC,MAAM,EAChD,KAAKL,IAAIC,SAAS9E,QAAQkF,IAAI,KAAKD,MAAMjG,WACzCmG,uBAAqBF,OAAOC,WAG/BE;AAEP;;AAEA,SAAST,aAA2CvF;IAClD,MAAM4F,SAA+C5F,OAAOwF,YACzDnC,KAAIE,KAAKA,EAAEqC,SACXI;IACH,MAAMC,UACFL,OACCvC,KAAI2B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD3C,QAAO,CAAC6C,GAAGC,MAAMD,IAAIC,IAAG,KAAKV,OAAOjD;IACzC,MAAM4D,YAA2CvG,OAAOwG,MAAMD;IAC9D,OAAO,EACL,iCACA,cACA,oBACA,qBAAqBvG,OAAOwF,YAAY7C,OAAO8D,oBAC/C,gBAAgBb,OAAOjD,UACvB,kBAAkBiD,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC3D,kBAAkBiD,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC3D,uBAAuBiB,SAASC,MAAMoC,SAASQ,uBAC/C,mBACA,gBAAgBF,UAAUG,MAAMD,oBAChC,eACA,kBAAkBF,UAAU9E,MAAMiF,MAAMD,oBACxC,mBAAmBF,UAAU9E,MAAMkF,OAAOF,oBAC1C,iBACA,kBAAkBF,UAAUK,OAAOF,MAAMD,oBACzC,sBAAsBF,UAAUK,OAAOC,UAAUJ,oBACjD,gCAAgCF,UAAUK,OAAOE,oBAAoBL,oBACrE,gCAAgCF,UAAUK,OAAOG,oBAAoBN,oBACrE,IACA,kBACA,qCACA,yCACGzG,OAAOwF,YAAYnC,KAAIoC,OACxB,EACE,IAAIA,IAAIC,SAAS9E,WAAW6E,IAAIC,SAAS9E,mBACzCoG,WACEvB,IAAIG,SACJZ,KAAKA,EAAEpF,SAAS,WAAWoF,EAAEiC,WAAW,QAE1CD,WAAWvB,IAAIG,SAAQZ,KAAKA,EAAEpF,SAAS,WAAWoF,EAAElD,SAAS,QAC7D,GAAG8B,SAASC,MACV4B,IAAIG,OACDvC,KAAI2B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD3C,QAAO,CAAC6C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOjD,QAC3C8D,wBACFjG,KAAK,WAETA,KAAK;AACT;;AAEA,SAASmF,uBAAqDF;IAC5D,OAAO,EACL,KAAKA,IAAIC,SAAS9E,QAClB,cACA,mBAAmB6E,IAAIG,OAAOjD,OAAO8D,oBACrC,gBAAgBhB,IAAIG,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC7D,gBAAgB8C,IAAIG,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC7D,qBAAqBiB,SAASC,MAC5B4B,IAAIG,OACDvC,KAAI2B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD3C,QAAO,CAAC6C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOjD,QAC3C8D,uBACF,IACA,aACA,uBACA,0BACGhB,IAAIG,OAAOvC,KAAI,CAAC2B,GAAGc,MACpB,EACE,IAAIA,IAAI,SAASA,IAAI,KAAKd,EAAEpF,YAC5BoF,EAAEpF,MACF,GAAGgE,SAASC,MAAMmB,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,kBAC1D3F,KAAK,UAET,IACA,eACA,mBACAiF,IAAIC,SAASjF,MACb,IACA,gBACA,WACAyE,KAAKC,UACHpB,sBAAsBE,eAAewB,IAAIC,SAAS3D,WAClD,MACA,IAEF,QACAvB,KAAK;AACT;;AAEA,SAASuF,uBAAqDF,OAA2CqB;IACvG,OAAO,EACL,KAAKA,QAAQ,MAAMrB,MAAMjG,QACzB,cACA,aAAaiG,MAAMH,SAAS9E,QAC5B,aAAaiF,MAAMjG,QACnB,aAAagE,SAASC,MACpBgC,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,WAChDM,0BACEZ,MAAMjG,SAAS,UACf,EACE,eAAeiG,MAAMoB,SAAS,MAAM,OACpC,aAAapB,MAAM/D,OAAO,MAAM,UAElC,IACJ,oBACA,gBAAgBoD,KAAKC,UAAUU,MAAMW,MAAMD,UAAUG,UACrD,eACA,kBAAkBb,MAAMW,MAAMD,UAAU9E,MAAMiF,SAC9C,mBAAmBb,MAAMW,MAAMD,UAAU9E,MAAMkF,UAC/C,iBACA,kBAAkBd,MAAMW,MAAMD,UAAUK,OAAOF,SAC/C,gCAAgCb,MAAMW,MAAMD,UAAUK,OAAOE,uBAC7D,sBAAsBjB,MAAMW,MAAMD,UAAUK,OAAOC,aACnD,gCAAgChB,MAAMW,MAAMD,UAAUK,OAAOG,uBAE7D,IACA,eACA,mBACAlB,MAAMH,SAASjF,MACf,IACA,gBACA,WACAyE,KAAKC,UACHpB,sBAAsBE,eAAe4B,MAAMH,SAAS3D,WACpD,MACA,IAEF,OACA,IACA,0BACG8D,MAAMsB,QAAQ9D,IAAIgB,uBAAuBC,WAC5C,OACIuB,MAAMjG,SAAS,UACf,EACE,YACA,WACAsF,KAAKC,UACHpB,sBAAsBC,YAAY6B,MAAM3B,QACxC,MACA,IAEF,UAEF,KACJ1D,KAAK;AACT;;AAEA,SAASwG,WAA2CpB,QAA8CzG;IAChG,MAAMiI,QAAgB1D,KAAKI,MACxB8B,OAAOtE,OAAOnC,SAASwD,SAASiD,OAAOjD,SAAU;IAGpD,OACE0E,MAAMC,KAAK;QAAE3E,QAAQyE;OAASG,KAAK,KAAK/G,KAAK,MAC3C6G,MAAMC,KAAK;QAAE3E,QAAQ,KAAKyE;OAASG,KAAK,KAAK/G,KAAK;AAExD;;MCxJagH;IAWX,WAAAC,CAAmB7F;QACjB8F,KAAKC,SAAS/F,MAAMvC;QACpBqI,KAAKE,aAAahG,MAAMiG,UAAUhF;QAClC6E,KAAKI,UAAU;YACbC,QAAQnG,MAAMoG,QAAQD,UAAU;YAChCE,cAAcrG,MAAMoG,QAAQC,gBAAgB;YAC5CnI,SAAS8B,MAAMoG,QAAQlI,WAAW;;QAEpC4H,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMhC,aAAmB,IAAIiC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAMO,OAAOd,KAAKE,WAAWvE,KAAIjE,MAAOsG;YACtC,MAAME,eACI6C,QAAQC,IACdrB,MAAMC,KAAK;gBAAE3E,QAAQ+E,KAAKI,QAAQC;eAAU1E,KAAIjE;sBACxCkJ,UAAUK;gBAChB,MAAM3D,UACI0C,KAAKkB,KAAKlD;sBACd4C,UAAUO;gBAEhB,IAAIT,aAAa5G,WAAW;oBAC1B4G,SAASpD;;gBAGX,OAAOA;AAAC;YAGd,OAAO;gBACLU;gBACAE;gBACAY,OAAOZ,OACJtE,QAAO0D,KAAKA,EAAEpF,SAAS,UACvByD,KAAI2B,KAAKA,EAAEwB,QACXhD,QAAO,CAACC,KAAKqF,QAAQC,mBAAmBC,KAAKvF,KAAKqF,OAAMC,mBAAmBE;;AAC/E;QAEH,MAAMzD,oBACIiD,QAAQC,IAAIF;QACtB,OAAQd,KAAKQ,UAAU;YACrB1C;YACAY;YACAF,cAAc,IAAImC;YAClB7B,OAAOhB,YACJnC,KAAIkB,KAAKA,EAAEiC,QACXhD,QAAO,CAACC,KAAKqF,QAAQC,mBAAmBC,KAAKvF,KAAKqF,OAAMC,mBAAmBE;;;IAsB3E,MAAAC;QACL,IAAIxB,KAAKQ,YAAY,MAAM;YACzB,MAAM,IAAI/D,MAAM;;QAElB,OAAOiB,8BAA8Bd,SAASoD,KAAKQ;;IAG7C,UAAMU,CACZlD;QAEA,MAAMrG,QAAyBqI,KAAKC,OAAOwB;QAC3C,MAAM/C,aAAmB,IAAIiC;QAC7B,MAAMlJ,UAAU,MACdF,4BAA4BE,QAAQ;YAClC4C,UAAU2D,SAAS3D;YACnBE,YAAY5C,MACTE,eACA+B,QAAOiD,KAAKA,EAAE3E,SAAS,YACvByD,KAAIkB,KAAKA,EAAExB;YACdb,QAAQ;;QAEZ,MAAMkH,MAAM;YACV,MAAMnC,SAAShI,4BAA4BE,QAAQ;gBACjD4C,UAAU2D,SAAS3D;gBACnBE,YAAY5C,MACTE,eACA+B,QAAOiD,KAAKA,EAAE3E,SAAS,WACvByD,KAAIkB,KAAKA,EAAEE,aACXuB,OACA3C,KAAIkB,KAAKA,EAAExB;gBACdb,QAAQ;;YAEV,MAAMJ,OAAO3C;YACb,OAAO;gBACLS,MAAOkC,OAAO,YAAY;gBAC1B4D;gBACAuB;gBACAnF;gBACAqF,SAAS9H,MAAME;gBACfiH,OAAOnH,MAAMgK;gBACbjD;gBACAF,cAAc,IAAImC;;AACmC;QAGzD;kBACQhJ,MAAMiK,WAAW5D,SAASjF;YAChC,IAAItB,WAAW;gBACb,OAAOiK;;YAGT,KAAK,IAAItD,IAAY,GAAGA,IAAI4B,KAAKI,QAAQhI,WAAWgG,GAAG;gBACrD,MAAMrD,aACIxD,4BAA4BC,OAAOG;gBAC7C,IAAIoD,SAAS,MAAM;oBACjB;;sBAGIpD,MAAMiK,WAAW7G;gBACvB,IAAItD,WAAW;oBACb,OAAOiK;;;YAGX,OAAOA;UAET,OAAOlF;YACL,OAAO;gBACLtE,MAAM;gBACN8F;gBACAyB,SAAS9H,MAAME;gBACfiH,OAAOnH,MAAMgK;gBACbnF;gBACAkC;gBACAF,cAAc,IAAImC;;;;;;MC3KbkB;IAWX,WAAA9B,CAAmB7F;QACjB8F,KAAKC,SAAS/F,MAAMvC;QACpBqI,KAAKE,aAAahG,MAAMiG,UAAUhF;QAClC6E,KAAKI,UAAU;YACbC,QAAQnG,MAAMoG,QAAQD,UAAU;YAChCE,cAAcrG,MAAMoG,QAAQC,gBAAgB;YAC5CnI,SAAS8B,MAAMoG,QAAQlI,WAAW;;QAEpC4H,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMhC,aAAmB,IAAIiC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAMO,OAAOd,KAAKE,WAAWvE,KAAIjE,MAAOsG;YACtC,MAAME,eACI6C,QAAQC,IACdrB,MAAMC,KAAK;gBAAE3E,QAAQ+E,KAAKI,QAAQC;eAAU1E,KAAIjE;sBACxCkJ,UAAUK;gBAChB,MAAM3D,UACI0C,KAAKkB,KAAKlD;sBACd4C,UAAUO;gBAEhB,IAAIT,aAAa5G,WAAW;oBAC1B4G,SAASpD;;gBAGX,OAAOA;AAAC;YAGd,OAAO;gBACLU;gBACAE;gBACAY,OAAOZ,OACJtE,QAAO0D,KAAKA,EAAEpF,SAAS,UACvByD,KAAI2B,KAAKA,EAAEwB,QACXhD,QAAO,CAACC,KAAKqF,QAAQC,mBAAmBC,KAAKvF,KAAKqF,OAAMC,mBAAmBE;;AAC/E;QAEH,MAAMzD,oBACIiD,QAAQC,IAAIF;QACtB,OAAQd,KAAKQ,UAAU;YACrB1C;YACAY;YACAF,cAAc,IAAImC;YAClB7B,OAAOhB,YACJnC,KAAIkB,KAAKA,EAAEiC,QACXhD,QAAO,CAACC,KAAKqF,QAAQC,mBAAmBC,KAAKvF,KAAKqF,OAAMC,mBAAmBE;;;IAsB3E,MAAAC;QACL,IAAIxB,KAAKQ,YAAY,MAAM;YACzB,MAAM,IAAI/D,MAAM;;QAElB,OAAOiB,8BAA8Bd,SAASoD,KAAKQ;;IAG7C,UAAMU,CACZlD;QAEA,MAAMrG,QAA8BqI,KAAKC,OAAOwB;QAChD,MAAM/C,aAAmB,IAAIiC;QAC7B,MAAMlJ,UAAU,MACdF,4BAA4BE,QAAQ;YAClC4C,UAAU2D,SAAS3D;YACnBE,YAAY5C,MACTE,eACA+B,QAAOiD,KAAKA,EAAE3E,SAAS,YACvByD,KAAIkB,KAAKA,EAAExB;YACdb,QAAQ;;QAEZ,MAAMkH,MAAM;YACV,MAAMnC,SAAShI,4BAA4BE,QAAQ;gBACjD4C,UAAU2D,SAAS3D;gBACnBE,YAAY5C,MACTE,eACA+B,QAAOiD,KAAKA,EAAE3E,SAAS,YACvByD,KAAIkB,KAAKA,EAAExB;gBACdb,QAAQ;;YAEV,MAAMJ,OAAO3C;YACb,OAAO;gBACLS,MAAOkC,OAAO,YAAY;gBAC1B4D;gBACAuB;gBACAnF;gBACAqF,SAAS9H,MAAME;gBACfiH,OAAOnH,MAAMgK;gBACbjD;gBACAF,cAAc,IAAImC;;AACmC;QAGzD;kBACQhJ,MAAMiK,WAAW5D,SAASjF;YAChC,IAAItB,WAAW;gBACb,OAAOiK;;YAGT,KAAK,IAAItD,IAAY,GAAGA,IAAI4B,KAAKI,QAAQhI,WAAWgG,GAAG;gBACrD,MAAMrD,aACIxD,4BAA4BC,OAAOG;gBAC7C,IAAIoD,SAAS,MAAM;oBACjB;;sBAGIpD,MAAMiK,WAAW7G;gBACvB,IAAItD,WAAW;oBACb,OAAOiK;;;YAGX,OAAOA;UAET,OAAOlF;YACL,OAAO;gBACLtE,MAAM;gBACN8F;gBACAyB,SAAS9H,MAAME;gBACfiH,OAAOnH,MAAMgK;gBACbnF;gBACAkC;gBACAF,cAAc,IAAImC;;;;;;AC9LnB,MAAMmB,kCAAkC;IAC7ClF;;;AAGI,SAAUA,SAAyCtE;IACvD,MAAMuC,WAAW,EACf,EAAC,eAAegD,WAAWvF,cACxBA,OAAOwF,YACPnC,KAAwBoC,OAAO,EAC9B,EAAC,KAAKA,IAAIC,SAAS9E,kBAAkB+E,qBAAqBF,WACvDA,IAAIG,OAAOvC,KAAsB,CAACwC,OAAOC,MAAM,EAChD,KAAKL,IAAIC,SAAS9E,QAAQkF,IAAI,KAAKD,MAAMjG,WACzCmG,qBAAqBF,OAAOC,WAG/BE;IAGL,OAAOX,OAAOC,YAAY/C;AAC5B;;AAEA,SAASgD,WAA2CvF;IAClD,MAAM4F,SAAiD5F,OAAOwF,YAC3DnC,KAAIE,KAAKA,EAAEqC,SACXI;IACH,MAAMC,UACAL,OACCvC,KAAI2B,KAAKA,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,YACjD3C,QAAO,CAAC6C,GAAGC,MAAMD,IAAIC,IAAG,KAAKV,OAAOjD;IAC3C,MAAM4D,YAA2CvG,OAAOwG,MAAMD;IAC9D,OAAO,EACL,sCACA,cACA,oBACA,qBAAqBvG,OAAOwF,YAAY7C,OAAO8D,oBAC/C,gBAAgBb,OAAOjD,UACvB,kBAAkBiD,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC3D,kBAAkBiD,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC3D,gBAAgBiD,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,UAAS+C,UACvD,uBAAuBiB,SAASC,MAAMoC,SAASQ,uBAC/C,mBACA,gBAAgBF,UAAUG,MAAMD,oBAChC,eACA,kBAAkBF,UAAU9E,MAAMiF,MAAMD,oBACxC,mBAAmBF,UAAU9E,MAAMkF,OAAOF,oBAC1C,iBACA,kBAAkBF,UAAUK,OAAOF,MAAMD,oBACzC,gCAAgCF,UAAUK,OAAOE,oBAAoBL,oBACrE,sBAAsBF,UAAUK,OAAOC,UAAUJ,oBACjD,gCAAgCF,UAAUK,OAAOG,oBAAoBN,oBACrE,IACA,kBACA,+BACA,kCACGzG,OAAOwF,YAAYnC,KAAIoC,OACxB,EACE,IAAIA,IAAIC,SAAS9E,WAAW6E,IAAIC,SAAS9E,mBACzC;QACE,MAAMzB,UAAkBuE,KAAKI,MAC1B2B,IAAIG,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,SAC1C8C,IAAIG,OAAOjD,SACb;QAEJ,OACE0E,MAAMC,KAAK;YAAE3E,QAAQxD;WAAWoI,KAAK,KAAK/G,KAAK,MAC7C6G,MAAMC,KAAK;YAAE3E,QAAQ,KAAKxD;WAAWoI,KAAK,KAAK/G,KAAK;AAEzD,MAVD,IAWA,GAAGoD,SAASC,MACV4B,IAAIG,OACDvC,KACCwC,SACEA,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,YAEnD3C,QAAO,CAAC6C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOjD,QAC3C8D,wBACFjG,KAAK,WAETA,KAAK;AACT;;AAEA,SAASmF,qBAAqDF;IAC5D,MAAMc,YAA2Cd,IAAIe,MAAMD;IAC3D,OAAO,EACL,KAAKd,IAAIC,SAAS9E,QAClB,cACA,oBACA,gBAAgB6E,IAAIG,OAAOjD,UAC3B,kBAAkB8C,IAAIG,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC/D,kBAAkB8C,IAAIG,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,YAAW+C,UAC/D,gBAAgB8C,IAAIG,OAAOtE,QAAO0D,KAAKA,EAAEpF,SAAS,UAAS+C,UAC3D,uBAAuBiB,SAASC,MAC9B4B,IAAIG,OACDvC,KACCwC,SACEA,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,YAEnD3C,QAAO,CAAC6C,GAAGC,MAAMD,IAAIC,IAAG,KAAKb,IAAIG,OAAOjD,QAC3C8D,uBACF,mBACA,gBAAgBF,UAAUG,MAAMD,oBAChC,eACA,kBAAkBF,UAAU9E,MAAMiF,MAAMD,oBACxC,mBAAmBF,UAAU9E,MAAMkF,OAAOF,oBAC1C,iBACA,kBAAkBF,UAAUK,OAAOF,MAAMD,oBACzC,gCAAgCF,UAAUK,OAAOE,oBAAoBL,oBACrE,sBAAsBF,UAAUK,OAAOC,UAAUJ,oBACjD,gCAAgCF,UAAUK,OAAOG,oBAAoBN,oBACrE,IACA,aACA,qBACA,wBACGhB,IAAIG,OAAOvC,KAAI,CAAC2B,GAAGc,MACpB,EACE,IAAIA,IAAI,SAASA,IAAI,KAAKd,EAAEpF,YAC5BoF,EAAEpF,MACF,GAAGgE,SAASC,MAAMmB,EAAEkB,aAAaC,YAAYnB,EAAEoB,WAAWD,kBAE1D3F,KAAK,UAET,IACA,eACA,mBACAiF,IAAIC,SAASjF,MACb,IACA,gBACA,WACAyE,KAAKC,UACHpB,sBAAsBE,eAAewB,IAAIC,SAAS3D,WAClD,MACA,IAEF,QACAvB,KAAK;AACT;;AAEA,SAASuF,qBAAqDF,OAA6CqB;IACzG,OAAO,EACL,KAAKA,QAAQ,MAAMrB,MAAMjG,QACzB,cACA,aAAaiG,MAAMH,SAAS9E,QAC5B,aAAaiF,MAAMjG,QACnB,cAAciG,MAAMK,aAAaC,YAAYN,MAAMO,WAAWD,WAAWM,0BACrEZ,MAAMjG,SAAS,UACf,EACE,mBACA,gBAAgBiG,MAAMW,MAAMD,UAAUG,MAAMD,oBAC5C,gBACA,kBAAkBZ,MAAMW,MAAMD,UAAU9E,MAAMiF,MAAMD,oBACpD,mBAAmBZ,MAAMW,MAAMD,UAAU9E,MAAMkF,OAAOF,oBACtD,qBACA,kBAAkBZ,MAAMW,MAAMD,UAAUK,OAAOF,MAAMD,oBACrD,sBAAsBZ,MAAMW,MAAMD,UAAUK,OAAOC,UAAUJ,oBAC7D,gCAAgCZ,MAAMW,MAAMD,UAAUK,OAAOE,oBAAoBL,oBACjF,gCAAgCZ,MAAMW,MAAMD,UAAUK,OAAOG,oBAAoBN,uBAEnF,IACJ,IACA,eACA,mBACAZ,MAAMH,SAASjF,MACf,IACA,gBACA,WACAyE,KAAKC,UACHpB,sBAAsBE,eAAe4B,MAAMH,SAAS3D,WACpD,MACA,IAEF,OACA,OACI8D,MAAMjG,SAAS,aAAaiG,MAAMjG,SAAS,YAC3C,EACE,gBACGiG,MAAM4D,SAASpG,KAAIsB,KACpB,EACE,OAAOA,EAAE5B,UAAUnC,QACnB,qBAAqB+D,EAAE5B,UAAU8B,WAAWjE,UAC5C,mBAAmB+D,EAAE5B,UAAUpC,SAASC,UACxC,eAAe+D,EAAEG,UACjB,OACIH,EAAE5B,UAAUpC,SAASE,gBAAgBW,aAAamD,EAAE5B,UAAUpC,SAASE,gBAAgB,KACvF,EAAC8D,EAAE5B,UAAUpC,SAASE,aAAa,OACnC,KACJL,KAAK,YAGX,OACAqF,MAAMjG,SAAS,UACf,EACE,YACA,WACAmE,sBAAsBC,YACpBkB,KAAKC,UAAUU,MAAM3B,OAAO,MAAM,KAEpC,OACA,OAEF,KACJ1D,KAAK;AACT;;MClLakJ;IAYX,WAAAjC,CAAmB7F;QACjB8F,KAAKC,SAAS/F,MAAMvC;QACpBqI,KAAKE,aAAahG,MAAMiG,UAAUhF;QAClC6E,KAAKI,UAAU;YACbC,QAAQnG,MAAMoG,QAAQD,UAAU;YAChCE,cAAcrG,MAAMoG,QAAQC,gBAAgB;;QAE9CP,KAAKiC,aAAa/H,MAAMvC,MAAME,eAAesD;QAC7C6E,KAAKQ,UAAU;;IAmBV,aAAMC,CACXC;QAEA,MAAMhC,aAAmB,IAAIiC;QAC7B,MAAMC,YAAuB,IAAIC,UAAUb,KAAKI,QAAQG;QACxD,MAAMzC,oBACIiD,QAAQC,IACdhB,KAAKE,WAAWvE,KAAIjE,MAAOsG;YACzB,MAAME,eACI6C,QAAQC,IACdrB,MAAMC,KAAK;gBAAE3E,QAAQ+E,KAAKI,QAAQC;eAAU1E,KAAIjE;sBACxCkJ,UAAUK;gBAChB,MAAM3D,UACI0C,KAAKkB,KAAKlD;sBACd4C,UAAUO;gBAChB,IAAIT,aAAa5G,WAAW;oBAC1B4G,SAASpD;;gBAEX,OAAOA;AAAC;YAGd,OAAO;gBACLU;gBACAE;gBACAY,OAAOZ,OACJtE,QAAO0D,KAAKA,EAAEpF,SAAS,UACvByD,KAAI2B,KAAKA,EAAEwB,QACXhD,QAAO,CAACC,KAAKqF,QAAQC,mBAAmBC,KAAKvF,KAAKqF,OAAMC,mBAAmBE;;AAC/E;QAGP,OAAQvB,KAAKQ,UAAU;YACrB1C;YACAY;YACAF,cAAc,IAAImC;YAClB7B,OAAOhB,YACJnC,KAAIkB,KAAKA,EAAEiC,QACXhD,QAAO,CAACC,KAAKqF,QAAQC,mBAAmBC,KAAKvF,KAAKqF,OAAMC,mBAAmBE;;;IAuB3E,MAAAC;QACL,IAAIxB,KAAKQ,YAAY,MAAM;YACzB,MAAM,IAAI/D,MAAM;;QAElB,OAAOqF,gCAAgClF,SAASoD,KAAKQ;;IAG/C,UAAMU,CACZlD;QAEA,MAAMU,aAAmB,IAAIiC;QAC7B;YACE,MAAM7B,QAA4BuC,mBAAmBE;YACrD,MAAMW,UAAUlC,KAAKC,OAAOkC,WAAW;gBACrCC,QAAQC,QAAQC,uBAAuB;oBACrCxF,UAAU,EAAC;wBACT5E,MAAM;wBACNa,MAAMiF,SAASjF;;;gBAGnB+F;;YAEF,WAAWoD,QAAQ5B,QAAQiC,aAAa,YAAY;gBAClD,MAAM,IAAIC,UAAU;;YAGtB,MAAMC,mBACKP,QAAQ5B,QAAQiC,UAAUhD,UAAUmD,YAAYnD,QAAQ;mBAC5D2C;gBACHO,WAAWzC,KAAKiC,WAAW9G;gBAC3BuB,OAAO;gBACPiG,OAAO,MAAM;gBACbC,UAAUlL;;YAEd,MAAMqK,WAAgDU,UACnD7I,QAAOiD,KAAKA,EAAE3E,SAAS,WACvByD,KAAIkB,KAAKA,EAAEE,aACXuB;YACH,OAAO;gBACLpG,MAAMX,4BAA4BE,QAAQ;oBACxC4C,UAAU2D,SAAS3D;oBACnBE,YAAYwH,SAASpG,KAAIsB,KAAKA,EAAE5B;qBAE9B,YACA;gBACJ2C;gBACA+D;gBACAjD;gBACA+D,kBAAkBJ,UAEf7I,QAAOiD,KAAKA,EAAE3E,SAAS;gBAC1BwG;gBACAF,cAAc,IAAImC;;UAKtB,OAAOnE;YACL,OAAO;gBACLtE,MAAM;gBACN8F;gBACAxB;gBACAkC;gBACAF,cAAc,IAAImC;;;;;;"}
|
|
@@ -4,14 +4,14 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
|
-
import type { Agentica, AgenticaOperation } from "@agentica/core";
|
|
7
|
+
import type { Agentica, AgenticaOperation, MicroAgentica } from "@agentica/core";
|
|
8
8
|
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
9
|
import type { IAgenticaBenchmarkExpected } from "../structures/IAgenticaBenchmarkExpected";
|
|
10
10
|
export declare const AgenticaBenchmarkPredicator: {
|
|
11
11
|
isNext: typeof isNext;
|
|
12
12
|
success: typeof success;
|
|
13
13
|
};
|
|
14
|
-
declare function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model>): Promise<string | null>;
|
|
14
|
+
declare function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model> | MicroAgentica<Model>): Promise<string | null>;
|
|
15
15
|
/**
|
|
16
16
|
* Check if the called operations match the expected operations.
|
|
17
17
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AgenticaBenchmarkPredicator.js","sourceRoot":"","sources":["../../src/internal/AgenticaBenchmarkPredicator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAmIA,0BAqBC;;AA7ID,kDAA0B;AAIb,QAAA,2BAA2B,GAAG;IACzC,MAAM;IACN,OAAO;CACR,CAAC;AAkCF,SAAe,MAAM,CAAiC,
|
|
1
|
+
{"version":3,"file":"AgenticaBenchmarkPredicator.js","sourceRoot":"","sources":["../../src/internal/AgenticaBenchmarkPredicator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAmIA,0BAqBC;;AA7ID,kDAA0B;AAIb,QAAA,2BAA2B,GAAG;IACzC,MAAM;IACN,OAAO;CACR,CAAC;AAkCF,SAAe,MAAM,CAAiC,KAA6C;;;QACjG,MAAM,IAAI,GAAuC,KAAK;aACnD,YAAY,EAAE;aACd,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAEV;;;;;WAKG;QACH,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;QACpC,MAAM,YAAY,GAAG,CAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,IAAI,MAAK,MAAM,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,CAAC;QACxE,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,OAAO,GAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;UAGrC,SAAS,CAAC,CAAC,CAAE,CAAC;QAClB,MAAM,MAAM,GAA0B,MAAM,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAC/E;YACE,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE;wBACP,+BAA+B;wBAC/B,EAAE;wBACF,qDAAqD;wBACrD,8DAA8D;wBAC9D,kDAAkD;qBACnD,CAAC,IAAI,CAAC,IAAI,CAAC;iBACb;gBACD;oBACE,IAAI,EAAE,WAAW;oBACjB,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB;aACF;YACD,KAAK,EAAE;gBACL;oBACE,IAAI,EAAE,UAAU;oBAChB,QAAQ,EAAE;wBACR,IAAI,EAAE,OAAO,CAAC,IAAI;wBAClB,WAAW,EAAE,OAAO,CAAC,WAAW;wBAChC,UAAU,EAAE,OAAO,CAAC,UAAiC;qBACtD;iBACF;aACF;YACD,WAAW,EAAE,UAAU;YACvB,mBAAmB,EAAE,KAAK;SAC3B,EACD,SAAS,CAAC,OAAO,CAClB,CAAC;QAEF,MAAM,QAAQ,GAAqD,MAAA,CACjE,MAAA,MAAA,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,0CAAE,OAAO,CAAC,UAAU,mCAAI,EAAE,CAC5C,CAAC,MAAM,CACN,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,UAAU,IAAI,EAAE,CAAC,QAAQ,CAAC,IAAI,KAAK,OAAO,CAAC,IAAI,CAClE,0CAAG,CAAC,CAAC,CAAC;QAEP,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,kRAAqC,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAC7E,OAAO,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;CAAA;AAED;;;;;;;;GAQG;AACH,SAAgB,OAAO,CAAiC,KAmBvD;IACC,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACpC,CAAC;AAED,SAAS,YAAY,CAAiC,KAA2C;IAQ/F,MAAM,IAAI,GAAG,CACX,QAA2C,EAC3C,kBAAoD,EACpD,EAAE,CACF,YAAY,CAAC;QACX,QAAQ;QACR,UAAU,EAAE,kBAAkB,aAAlB,kBAAkB,cAAlB,kBAAkB,GAAI,KAAK,CAAC,UAAU;QAClD,MAAM,EAAE,KAAK,CAAC,MAAM;KACrB,CAAC,CAAC;IAEL,QAAQ,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC5B,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,IAAI,IAAI,GAAG,CAAC,CAAC;YACb,MAAM,cAAc,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/D,IAAI,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC;YAErC,OAAO,IAAI,EAAE,CAAC;gBACZ,IAAI,QAAQ,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;oBAC3B,OAAO;wBACL,MAAM,EAAE,IAAI;wBACZ,IAAI;qBACL,CAAC;gBACJ,CAAC;gBACD,IAAI,IAAI,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;oBACpC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;gBAC3B,CAAC;gBAED,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;gBAClE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;oBACnB,IAAI,KAAK,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;wBAC1B,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;oBAC3B,CAAC;oBACD,IAAI,IAAI,CAAC,CAAC;oBACV,SAAS;gBACX,CAAC;gBAED,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC;gBACpB,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC;YACnC,CAAC;QACH,CAAC;QACD,KAAK,YAAY,CAAC,CAAC,CAAC;YAClB,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;YACxC,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC;YACpE,IAAI,MAAM,EAAE,CAAC;gBACX,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;YAC7B,CAAC;YACD,OAAO;gBACL,MAAM;aACP,CAAC;QACJ,CAAC;QACD,KAAK,OAAO;YACV,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;gBAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;oBACtB,OAAO,UAAU,CAAC;gBACpB,CAAC;YACH,CAAC;YAED,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;QAC3B,KAAK,OAAO,CAAC,CAAC,CAAC;YACb;;;;;;eAMG;YACH,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YACpE,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;gBAChC,OAAO;oBACL,MAAM,EAAE,IAAI;oBACZ,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;iBAC1D,CAAC;YACJ,CAAC;YAED,OAAO;gBACL,MAAM,EAAE,KAAK;aACd,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -5,11 +5,13 @@ exports.AgenticaPromptReporter = {
|
|
|
5
5
|
markdown,
|
|
6
6
|
};
|
|
7
7
|
function markdown(p) {
|
|
8
|
-
|
|
8
|
+
if (p.type === "user_input") {
|
|
9
|
+
return [`### User Input`, p.contents, ""].join("\n");
|
|
10
|
+
}
|
|
9
11
|
if (p.type === "text") {
|
|
10
12
|
return [`### Text (${p.role})`, p.text, ""].join("\n");
|
|
11
13
|
}
|
|
12
|
-
|
|
14
|
+
if (p.type === "select" || p.type === "cancel") {
|
|
13
15
|
return [
|
|
14
16
|
`### ${p.type === "select" ? "Select" : "Cancel"}`,
|
|
15
17
|
...p.selections
|
|
@@ -29,7 +31,7 @@ function markdown(p) {
|
|
|
29
31
|
}),
|
|
30
32
|
].join("\n");
|
|
31
33
|
}
|
|
32
|
-
|
|
34
|
+
if (p.type === "describe") {
|
|
33
35
|
return [
|
|
34
36
|
"### Describe",
|
|
35
37
|
...p.executes.map(e => ` - ${e.operation.name}`),
|
|
@@ -38,16 +40,20 @@ function markdown(p) {
|
|
|
38
40
|
"",
|
|
39
41
|
].join("\n");
|
|
40
42
|
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
43
|
+
if (p.type === "execute") {
|
|
44
|
+
return [
|
|
45
|
+
"### Execute",
|
|
46
|
+
` - name: ${p.operation.name}`,
|
|
47
|
+
` - controller: ${p.operation.controller.name}`,
|
|
48
|
+
` - function: ${p.operation.function.name}`,
|
|
49
|
+
"",
|
|
50
|
+
"```json",
|
|
51
|
+
JSON.stringify(p.arguments, null, 2),
|
|
52
|
+
"```",
|
|
53
|
+
"",
|
|
54
|
+
].join("\n");
|
|
55
|
+
}
|
|
56
|
+
p;
|
|
57
|
+
throw new Error("Invalid history type");
|
|
52
58
|
}
|
|
53
59
|
//# sourceMappingURL=AgenticaPromptReporter.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AgenticaPromptReporter.js","sourceRoot":"","sources":["../../src/internal/AgenticaPromptReporter.ts"],"names":[],"mappings":";;;AASa,QAAA,sBAAsB,GAAG;IACpC,QAAQ;CACT,CAAC;AAEF,SAAS,QAAQ,CAAiC,CAAyB;IACzE,
|
|
1
|
+
{"version":3,"file":"AgenticaPromptReporter.js","sourceRoot":"","sources":["../../src/internal/AgenticaPromptReporter.ts"],"names":[],"mappings":";;;AASa,QAAA,sBAAsB,GAAG;IACpC,QAAQ;CACT,CAAC;AAEF,SAAS,QAAQ,CAAiC,CAAyB;IACzE,IAAI,CAAC,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;QAC5B,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvD,CAAC;IACD,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACtB,OAAO,CAAC,aAAa,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC/C,OAAO;YACL,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,EAAE;YAClD,GAAG,CAAC,CAAC,UAAU;iBACZ,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;;gBACb,MAAM,wBAAwB,GAAG,MAAA,MAAA,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,0CAAE,MAAM,mCAAI,CAAC,CAAC;gBAE/E,OAAO;oBACL,QAAQ,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE;oBAC1B,mBAAmB,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,IAAI,EAAE;oBAChD,iBAAiB,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE;oBAC5C,eAAe,CAAC,CAAC,MAAM,EAAE;oBACzB,EAAE;oBACF,GAAG,CAAC,wBAAwB,GAAG,CAAC;wBAC9B,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC;wBACxC,CAAC,CAAC,EAAE,CAAC;iBACR,CAAC;YACJ,CAAC,CAAC;SACL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,IAAI,CAAC,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;QAC1B,OAAO;YACL,cAAc;YACd,GAAG,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;YACjD,EAAE;YACF,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;YACxC,EAAE;SACH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,IAAI,CAAC,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACzB,OAAO;YACL,aAAa;YACb,aAAa,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE;YAC/B,mBAAmB,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,IAAI,EAAE;YAChD,iBAAiB,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE;YAC5C,EAAE;YACF,SAAS;YACT,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACpC,KAAK;YACL,EAAE;SACH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,CAAiB,CAAC;IAClB,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;AAC1C,CAAC"}
|
|
@@ -49,7 +49,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
49
49
|
/**
|
|
50
50
|
* Prompt messages from the assistant.
|
|
51
51
|
*/
|
|
52
|
-
assistantPrompts: AgenticaTextHistory
|
|
52
|
+
assistantPrompts: AgenticaTextHistory[];
|
|
53
53
|
}
|
|
54
54
|
/**
|
|
55
55
|
* Failure event type.
|
|
@@ -69,7 +69,7 @@ export declare namespace IAgenticaSelectBenchmarkEvent {
|
|
|
69
69
|
/**
|
|
70
70
|
* Prompt messages from the assistant.
|
|
71
71
|
*/
|
|
72
|
-
assistantPrompts: AgenticaTextHistory
|
|
72
|
+
assistantPrompts: AgenticaTextHistory[];
|
|
73
73
|
}
|
|
74
74
|
/**
|
|
75
75
|
* Error event type.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentica/benchmark",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.23.0",
|
|
4
4
|
"description": "Agentic AI Library specialized in LLM Function Calling",
|
|
5
5
|
"author": "Wrtn Technologies",
|
|
6
6
|
"license": "MIT",
|
|
@@ -35,14 +35,14 @@
|
|
|
35
35
|
"src"
|
|
36
36
|
],
|
|
37
37
|
"peerDependencies": {
|
|
38
|
-
"@agentica/core": "^0.
|
|
38
|
+
"@agentica/core": "^0.23.0"
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@samchon/openapi": "^4.2.0",
|
|
42
42
|
"openai": "^4.80.0",
|
|
43
43
|
"tstl": "^3.0.0",
|
|
44
44
|
"typia": "^9.0.1",
|
|
45
|
-
"@agentica/core": "^0.
|
|
45
|
+
"@agentica/core": "^0.23.0"
|
|
46
46
|
},
|
|
47
47
|
"devDependencies": {
|
|
48
48
|
"@rollup/plugin-terser": "^0.4.4",
|
|
@@ -3,7 +3,6 @@ import type {
|
|
|
3
3
|
AgenticaContext,
|
|
4
4
|
AgenticaHistory,
|
|
5
5
|
AgenticaOperationSelection,
|
|
6
|
-
AgenticaTextHistory,
|
|
7
6
|
} from "@agentica/core";
|
|
8
7
|
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
8
|
import type { tags } from "typia";
|
|
@@ -154,10 +153,11 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
154
153
|
try {
|
|
155
154
|
const usage: AgenticaTokenUsage = AgenticaTokenUsage.zero();
|
|
156
155
|
const context = this.agent_.getContext({
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
156
|
+
prompt: factory.createUserInputHistory({
|
|
157
|
+
contents: [{
|
|
158
|
+
type: "text",
|
|
159
|
+
text: scenario.text,
|
|
160
|
+
}],
|
|
161
161
|
}),
|
|
162
162
|
usage,
|
|
163
163
|
});
|
|
@@ -188,10 +188,8 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
|
|
|
188
188
|
selected,
|
|
189
189
|
usage,
|
|
190
190
|
assistantPrompts: histories
|
|
191
|
-
|
|
192
|
-
.filter(
|
|
193
|
-
(p): p is AgenticaTextHistory<"assistant"> => p.role === "assistant",
|
|
194
|
-
),
|
|
191
|
+
// Only the assistant is allowed to emit text events.
|
|
192
|
+
.filter(p => p.type === "text"),
|
|
195
193
|
started_at,
|
|
196
194
|
completed_at: new Date(),
|
|
197
195
|
} satisfies
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import type { MicroAgentica } from "@agentica/core";
|
|
2
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
3
|
+
import type { tags } from "typia";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @module
|
|
7
|
+
* This file contains the implementation of the AgenticaCallBenchmark class.
|
|
8
|
+
*
|
|
9
|
+
* @author Wrtn Technologies
|
|
10
|
+
*/
|
|
11
|
+
import { AgenticaTokenUsage } from "@agentica/core";
|
|
12
|
+
import { Semaphore } from "tstl";
|
|
13
|
+
|
|
14
|
+
import type { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
|
|
15
|
+
import type { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
|
|
16
|
+
import type { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
|
|
17
|
+
|
|
18
|
+
import { AgenticaBenchmarkPredicator } from "./internal/AgenticaBenchmarkPredicator";
|
|
19
|
+
import { AgenticaCallBenchmarkReporter } from "./internal/AgenticaCallBenchmarkReporter";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* LLM function calling selection benchmark.
|
|
23
|
+
*
|
|
24
|
+
* `AgenticaCallBenchmark` is a class for the benchmark of the
|
|
25
|
+
* LLM (Large Model Language) function calling part. It utilizes both
|
|
26
|
+
* `selector` and `caller` agents and tests whether the expected
|
|
27
|
+
* {@link IAgenticaOperation operations} are properly selected and
|
|
28
|
+
* called from the given
|
|
29
|
+
* {@link IAgenticaCallBenchmarkScenario scenarios}.
|
|
30
|
+
*
|
|
31
|
+
* Note that, this `MicroAgenticaCallBenchmark` consumes a lot of time and
|
|
32
|
+
* LLM token costs because it needs the whole process of the
|
|
33
|
+
* {@link MicroAgentica} class with a lot of repetitions. If you don't want
|
|
34
|
+
* such a heavy benchmark, consider to using
|
|
35
|
+
* {@link AgenticaSelectBenchmark} instead. In my experience,
|
|
36
|
+
* {@link MicroAgentica} does not fail to function calling, so the function
|
|
37
|
+
* selection benchmark is much economical.
|
|
38
|
+
*
|
|
39
|
+
* @author Samchon
|
|
40
|
+
*/
|
|
41
|
+
export class MicroAgenticaCallBenchmark<Model extends ILlmSchema.Model> {
|
|
42
|
+
private agent_: MicroAgentica<Model>;
|
|
43
|
+
private scenarios_: IAgenticaCallBenchmarkScenario<Model>[];
|
|
44
|
+
private config_: MicroAgenticaCallBenchmark.IConfig;
|
|
45
|
+
private result_: IAgenticaCallBenchmarkResult<Model> | null;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Initializer Constructor.
|
|
49
|
+
*
|
|
50
|
+
* @param props Properties of the selection benchmark
|
|
51
|
+
*/
|
|
52
|
+
public constructor(props: MicroAgenticaCallBenchmark.IProps<Model>) {
|
|
53
|
+
this.agent_ = props.agent;
|
|
54
|
+
this.scenarios_ = props.scenarios.slice();
|
|
55
|
+
this.config_ = {
|
|
56
|
+
repeat: props.config?.repeat ?? 10,
|
|
57
|
+
simultaneous: props.config?.simultaneous ?? 10,
|
|
58
|
+
consent: props.config?.consent ?? 3,
|
|
59
|
+
};
|
|
60
|
+
this.result_ = null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Execute the benchmark.
|
|
65
|
+
*
|
|
66
|
+
* Execute the benchmark of the LLM function calling, and returns
|
|
67
|
+
* the result of the benchmark.
|
|
68
|
+
*
|
|
69
|
+
* If you wanna see progress of the benchmark, you can pass a callback
|
|
70
|
+
* function as the argument of the `listener`. The callback function
|
|
71
|
+
* would be called whenever a benchmark event is occurred.
|
|
72
|
+
*
|
|
73
|
+
* Also, you can publish a markdown format report by calling
|
|
74
|
+
* the {@link report} function after the benchmark execution.
|
|
75
|
+
*
|
|
76
|
+
* @param listener Callback function listening the benchmark events
|
|
77
|
+
* @returns Results of the function calling benchmark
|
|
78
|
+
*/
|
|
79
|
+
public async execute(
|
|
80
|
+
listener?: (event: IAgenticaCallBenchmarkEvent<Model>) => void,
|
|
81
|
+
): Promise<IAgenticaCallBenchmarkResult<Model>> {
|
|
82
|
+
const started_at: Date = new Date();
|
|
83
|
+
const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
|
|
84
|
+
const task = this.scenarios_.map(async (scenario) => {
|
|
85
|
+
const events: IAgenticaCallBenchmarkEvent<Model>[]
|
|
86
|
+
= await Promise.all(
|
|
87
|
+
Array.from({ length: this.config_.repeat }).map(async () => {
|
|
88
|
+
await semaphore.acquire();
|
|
89
|
+
const e: IAgenticaCallBenchmarkEvent<Model>
|
|
90
|
+
= await this.step(scenario);
|
|
91
|
+
await semaphore.release();
|
|
92
|
+
|
|
93
|
+
if (listener !== undefined) {
|
|
94
|
+
listener(e);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return e;
|
|
98
|
+
}),
|
|
99
|
+
);
|
|
100
|
+
return {
|
|
101
|
+
scenario,
|
|
102
|
+
events,
|
|
103
|
+
usage: events
|
|
104
|
+
.filter(e => e.type !== "error")
|
|
105
|
+
.map(e => e.usage)
|
|
106
|
+
.reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
|
|
107
|
+
};
|
|
108
|
+
});
|
|
109
|
+
const experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[]
|
|
110
|
+
= await Promise.all(task);
|
|
111
|
+
return (this.result_ = {
|
|
112
|
+
experiments,
|
|
113
|
+
started_at,
|
|
114
|
+
completed_at: new Date(),
|
|
115
|
+
usage: experiments
|
|
116
|
+
.map(p => p.usage)
|
|
117
|
+
.reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Report the benchmark result as markdown files.
|
|
123
|
+
*
|
|
124
|
+
* Report the benchmark result {@link execute}d by
|
|
125
|
+
* `AgenticaCallBenchmark` as markdown files, and returns a dictionary
|
|
126
|
+
* object of the markdown reporting files. The key of the dictionary
|
|
127
|
+
* would be file name, and the value would be the markdown content.
|
|
128
|
+
*
|
|
129
|
+
* For reference, the markdown files are composed like below:
|
|
130
|
+
*
|
|
131
|
+
* - `./README.md`
|
|
132
|
+
* - `./scenario-1/README.md`
|
|
133
|
+
* - `./scenario-1/1.success.md`
|
|
134
|
+
* - `./scenario-1/2.failure.md`
|
|
135
|
+
* - `./scenario-1/3.error.md`
|
|
136
|
+
*
|
|
137
|
+
* @returns Dictionary of markdown files.
|
|
138
|
+
*/
|
|
139
|
+
public report(): Record<string, string> {
|
|
140
|
+
if (this.result_ === null) {
|
|
141
|
+
throw new Error("Benchmark is not executed yet.");
|
|
142
|
+
}
|
|
143
|
+
return AgenticaCallBenchmarkReporter.markdown(this.result_);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
private async step(
|
|
147
|
+
scenario: IAgenticaCallBenchmarkScenario<Model>,
|
|
148
|
+
): Promise<IAgenticaCallBenchmarkEvent<Model>> {
|
|
149
|
+
const agent: MicroAgentica<Model> = this.agent_.clone();
|
|
150
|
+
const started_at: Date = new Date();
|
|
151
|
+
const success = () =>
|
|
152
|
+
AgenticaBenchmarkPredicator.success({
|
|
153
|
+
expected: scenario.expected,
|
|
154
|
+
operations: agent
|
|
155
|
+
.getHistories()
|
|
156
|
+
.filter(p => p.type === "execute")
|
|
157
|
+
.map(p => p.operation),
|
|
158
|
+
strict: false,
|
|
159
|
+
});
|
|
160
|
+
const out = (): IAgenticaCallBenchmarkEvent<Model> => {
|
|
161
|
+
const select = AgenticaBenchmarkPredicator.success({
|
|
162
|
+
expected: scenario.expected,
|
|
163
|
+
operations: agent
|
|
164
|
+
.getHistories()
|
|
165
|
+
.filter(p => p.type === "execute")
|
|
166
|
+
.map(p => p.operation),
|
|
167
|
+
strict: false,
|
|
168
|
+
});
|
|
169
|
+
const call = success();
|
|
170
|
+
return {
|
|
171
|
+
type: (call ? "success" : "failure") as "failure",
|
|
172
|
+
scenario,
|
|
173
|
+
select,
|
|
174
|
+
call,
|
|
175
|
+
prompts: agent.getHistories(),
|
|
176
|
+
usage: agent.getTokenUsage(),
|
|
177
|
+
started_at,
|
|
178
|
+
completed_at: new Date(),
|
|
179
|
+
} satisfies IAgenticaCallBenchmarkEvent.IFailure<Model>;
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
await agent.conversate(scenario.text);
|
|
184
|
+
if (success()) {
|
|
185
|
+
return out();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
for (let i: number = 0; i < this.config_.consent; ++i) {
|
|
189
|
+
const next: string | null
|
|
190
|
+
= await AgenticaBenchmarkPredicator.isNext(agent);
|
|
191
|
+
if (next === null) {
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
await agent.conversate(next);
|
|
196
|
+
if (success()) {
|
|
197
|
+
return out();
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return out();
|
|
201
|
+
}
|
|
202
|
+
catch (error) {
|
|
203
|
+
return {
|
|
204
|
+
type: "error",
|
|
205
|
+
scenario,
|
|
206
|
+
prompts: agent.getHistories(),
|
|
207
|
+
usage: agent.getTokenUsage(),
|
|
208
|
+
error,
|
|
209
|
+
started_at,
|
|
210
|
+
completed_at: new Date(),
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
export namespace MicroAgenticaCallBenchmark {
|
|
216
|
+
/**
|
|
217
|
+
* Properties of the {@link MicroAgenticaCallBenchmark} constructor.
|
|
218
|
+
*/
|
|
219
|
+
export interface IProps<Model extends ILlmSchema.Model> {
|
|
220
|
+
/**
|
|
221
|
+
* AI agent instance.
|
|
222
|
+
*/
|
|
223
|
+
agent: MicroAgentica<Model>;
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* List of scenarios what you expect.
|
|
227
|
+
*/
|
|
228
|
+
scenarios: IAgenticaCallBenchmarkScenario<Model>[];
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Configuration for the benchmark.
|
|
232
|
+
*/
|
|
233
|
+
config?: Partial<IConfig>;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Configuration for the benchmark.
|
|
238
|
+
*
|
|
239
|
+
* `AgenticaSelectBenchmark.IConfig` is a data structure which
|
|
240
|
+
* represents a configuration for the benchmark, especially the
|
|
241
|
+
* capacity information of the benchmark execution.
|
|
242
|
+
*/
|
|
243
|
+
export interface IConfig {
|
|
244
|
+
/**
|
|
245
|
+
* Repeat count.
|
|
246
|
+
*
|
|
247
|
+
* The number of repeating count for the benchmark execution
|
|
248
|
+
* for each scenario.
|
|
249
|
+
*
|
|
250
|
+
* @default 10
|
|
251
|
+
*/
|
|
252
|
+
repeat: number & tags.Type<"uint32"> & tags.Minimum<1>;
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Simultaneous count.
|
|
256
|
+
*
|
|
257
|
+
* The number of simultaneous count for the parallel benchmark
|
|
258
|
+
* execution.
|
|
259
|
+
*
|
|
260
|
+
* If you configure this property greater than `1`, the benchmark
|
|
261
|
+
* for each scenario would be executed in parallel in the given
|
|
262
|
+
* count.
|
|
263
|
+
*
|
|
264
|
+
* @default 10
|
|
265
|
+
*/
|
|
266
|
+
simultaneous: number & tags.Type<"uint32"> & tags.Minimum<1>;
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Number of consents.
|
|
270
|
+
*
|
|
271
|
+
* AI agent sometimes asks user to consent to the function
|
|
272
|
+
* calling, and perform it at the next step.
|
|
273
|
+
*
|
|
274
|
+
* This property represents the number of consents to allow.
|
|
275
|
+
* If the number of consents from the AI agent exceeds the
|
|
276
|
+
* configured value, the benchmark will be failed.
|
|
277
|
+
*
|
|
278
|
+
* @default 3
|
|
279
|
+
*/
|
|
280
|
+
consent: number;
|
|
281
|
+
}
|
|
282
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* @author Wrtn Technologies
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import type { Agentica, AgenticaHistory, AgenticaOperation } from "@agentica/core";
|
|
8
|
+
import type { Agentica, AgenticaHistory, AgenticaOperation, MicroAgentica } from "@agentica/core";
|
|
9
9
|
import type { ILlmFunction, ILlmSchema } from "@samchon/openapi";
|
|
10
10
|
import type OpenAI from "openai";
|
|
11
11
|
|
|
@@ -50,7 +50,7 @@ interface IConsentProps {
|
|
|
50
50
|
reply: string;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
async function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model>): Promise<string | null> {
|
|
53
|
+
async function isNext<Model extends ILlmSchema.Model>(agent: Agentica<Model> | MicroAgentica<Model>): Promise<string | null> {
|
|
54
54
|
const last: AgenticaHistory<Model> | undefined = agent
|
|
55
55
|
.getHistories()
|
|
56
56
|
.at(-1);
|
|
@@ -12,11 +12,14 @@ export const AgenticaPromptReporter = {
|
|
|
12
12
|
};
|
|
13
13
|
|
|
14
14
|
function markdown<Model extends ILlmSchema.Model>(p: AgenticaHistory<Model>): string {
|
|
15
|
-
|
|
15
|
+
if (p.type === "user_input") {
|
|
16
|
+
return [`### User Input`, p.contents, ""].join("\n");
|
|
17
|
+
}
|
|
16
18
|
if (p.type === "text") {
|
|
17
19
|
return [`### Text (${p.role})`, p.text, ""].join("\n");
|
|
18
20
|
}
|
|
19
|
-
|
|
21
|
+
|
|
22
|
+
if (p.type === "select" || p.type === "cancel") {
|
|
20
23
|
return [
|
|
21
24
|
`### ${p.type === "select" ? "Select" : "Cancel"}`,
|
|
22
25
|
...p.selections
|
|
@@ -36,7 +39,8 @@ function markdown<Model extends ILlmSchema.Model>(p: AgenticaHistory<Model>): st
|
|
|
36
39
|
}),
|
|
37
40
|
].join("\n");
|
|
38
41
|
}
|
|
39
|
-
|
|
42
|
+
|
|
43
|
+
if (p.type === "describe") {
|
|
40
44
|
return [
|
|
41
45
|
"### Describe",
|
|
42
46
|
...p.executes.map(e => ` - ${e.operation.name}`),
|
|
@@ -45,15 +49,21 @@ function markdown<Model extends ILlmSchema.Model>(p: AgenticaHistory<Model>): st
|
|
|
45
49
|
"",
|
|
46
50
|
].join("\n");
|
|
47
51
|
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
52
|
+
|
|
53
|
+
if (p.type === "execute") {
|
|
54
|
+
return [
|
|
55
|
+
"### Execute",
|
|
56
|
+
` - name: ${p.operation.name}`,
|
|
57
|
+
` - controller: ${p.operation.controller.name}`,
|
|
58
|
+
` - function: ${p.operation.function.name}`,
|
|
59
|
+
"",
|
|
60
|
+
"```json",
|
|
61
|
+
JSON.stringify(p.arguments, null, 2),
|
|
62
|
+
"```",
|
|
63
|
+
"",
|
|
64
|
+
].join("\n");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
p satisfies never;
|
|
68
|
+
throw new Error("Invalid history type");
|
|
59
69
|
}
|
|
@@ -61,7 +61,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
61
61
|
/**
|
|
62
62
|
* Prompt messages from the assistant.
|
|
63
63
|
*/
|
|
64
|
-
assistantPrompts: AgenticaTextHistory
|
|
64
|
+
assistantPrompts: AgenticaTextHistory[];
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
/**
|
|
@@ -85,7 +85,7 @@ export namespace IAgenticaSelectBenchmarkEvent {
|
|
|
85
85
|
/**
|
|
86
86
|
* Prompt messages from the assistant.
|
|
87
87
|
*/
|
|
88
|
-
assistantPrompts: AgenticaTextHistory
|
|
88
|
+
assistantPrompts: AgenticaTextHistory[];
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
/**
|