@agentica/benchmark 0.12.21 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -33
- package/lib/AgenticaCallBenchmark.d.ts +12 -6
- package/lib/AgenticaCallBenchmark.js +24 -18
- package/lib/AgenticaCallBenchmark.js.map +1 -1
- package/lib/AgenticaSelectBenchmark.d.ts +12 -6
- package/lib/AgenticaSelectBenchmark.js +14 -12
- package/lib/AgenticaSelectBenchmark.js.map +1 -1
- package/lib/index.mjs +315 -236
- package/lib/index.mjs.map +1 -1
- package/lib/internal/AgenticaBenchmarkPredicator.d.ts +38 -29
- package/lib/internal/AgenticaBenchmarkPredicator.js +100 -84
- package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
- package/lib/internal/AgenticaBenchmarkUtil.d.ts +21 -6
- package/lib/internal/AgenticaBenchmarkUtil.js +39 -33
- package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
- package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +6 -5
- package/lib/internal/AgenticaCallBenchmarkReporter.js +130 -126
- package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
- package/lib/internal/AgenticaPromptReporter.d.ts +13 -5
- package/lib/internal/AgenticaPromptReporter.js +45 -41
- package/lib/internal/AgenticaPromptReporter.js.map +1 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +3 -1
- package/lib/internal/AgenticaSelectBenchmarkReporter.js +153 -150
- package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
- package/lib/structures/IAgenticaBenchmarkExpected.d.ts +8 -2
- package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +9 -3
- package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +10 -4
- package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +8 -2
- package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -3
- package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +10 -4
- package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +8 -2
- package/lib/utils/MathUtil.d.ts +15 -3
- package/lib/utils/MathUtil.js +15 -4
- package/lib/utils/MathUtil.js.map +1 -1
- package/package.json +12 -10
- package/src/AgenticaCallBenchmark.ts +64 -45
- package/src/AgenticaSelectBenchmark.ts +42 -30
- package/src/internal/AgenticaBenchmarkPredicator.ts +208 -186
- package/src/internal/AgenticaBenchmarkUtil.ts +58 -40
- package/src/internal/AgenticaCallBenchmarkReporter.ts +180 -182
- package/src/internal/AgenticaPromptReporter.ts +46 -33
- package/src/internal/AgenticaSelectBenchmarkReporter.ts +205 -203
- package/src/structures/IAgenticaBenchmarkExpected.ts +9 -2
- package/src/structures/IAgenticaCallBenchmarkEvent.ts +9 -3
- package/src/structures/IAgenticaCallBenchmarkResult.ts +10 -4
- package/src/structures/IAgenticaCallBenchmarkScenario.ts +8 -2
- package/src/structures/IAgenticaSelectBenchmarkEvent.ts +9 -3
- package/src/structures/IAgenticaSelectBenchmarkResult.ts +10 -4
- package/src/structures/IAgenticaSelectBenchmarkScenario.ts +8 -2
- package/src/utils/MathUtil.ts +16 -3
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.AgenticaSelectBenchmarkReporter = void 0;
|
|
4
|
+
exports.markdown = markdown;
|
|
4
5
|
const MathUtil_1 = require("../utils/MathUtil");
|
|
5
6
|
const AgenticaBenchmarkUtil_1 = require("./AgenticaBenchmarkUtil");
|
|
6
7
|
/**
|
|
7
8
|
* @internal
|
|
8
9
|
*/
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
exports.AgenticaSelectBenchmarkReporter = {
|
|
11
|
+
markdown,
|
|
12
|
+
};
|
|
13
|
+
function markdown(result) {
|
|
14
|
+
const iterator = [
|
|
12
15
|
["./README.md", writeIndex(result)],
|
|
13
16
|
...result.experiments
|
|
14
|
-
.map(
|
|
17
|
+
.map(exp => [
|
|
15
18
|
[`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
|
|
16
19
|
...exp.events.map((event, i) => [
|
|
17
20
|
`./${exp.scenario.name}/${i + 1}.${event.type}.md`,
|
|
@@ -19,152 +22,152 @@ var AgenticaSelectBenchmarkReporter;
|
|
|
19
22
|
]),
|
|
20
23
|
])
|
|
21
24
|
.flat(),
|
|
22
|
-
]
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
})()
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
].join("\n");
|
|
68
|
-
};
|
|
69
|
-
const writeExperimentIndex = (exp) => {
|
|
70
|
-
const aggregate = exp.usage.aggregate;
|
|
71
|
-
return [
|
|
72
|
-
`# ${exp.scenario.name}`,
|
|
73
|
-
"## Summary",
|
|
74
|
-
" - Aggregation:",
|
|
75
|
-
` - Trial: ${exp.events.length}`,
|
|
76
|
-
` - Success: ${exp.events.filter((e) => e.type === "success").length}`,
|
|
77
|
-
` - Failure: ${exp.events.filter((e) => e.type === "failure").length}`,
|
|
78
|
-
` - Average Time: ${MathUtil_1.MathUtil.round(exp.events
|
|
79
|
-
.map((event) => event.completed_at.getTime() - event.started_at.getTime())
|
|
25
|
+
];
|
|
26
|
+
return Object.fromEntries(iterator);
|
|
27
|
+
}
|
|
28
|
+
function writeIndex(result) {
|
|
29
|
+
const events = result.experiments
|
|
30
|
+
.map(r => r.events)
|
|
31
|
+
.flat();
|
|
32
|
+
const average = events
|
|
33
|
+
.map(e => e.completed_at.getTime() - e.started_at.getTime())
|
|
34
|
+
.reduce((a, b) => a + b, 0) / events.length;
|
|
35
|
+
const aggregate = result.usage.aggregate;
|
|
36
|
+
return [
|
|
37
|
+
"# LLM Function Selection Benchmark",
|
|
38
|
+
"## Summary",
|
|
39
|
+
` - Aggregation:`,
|
|
40
|
+
` - Scenarios: #${result.experiments.length.toLocaleString()}`,
|
|
41
|
+
` - Trial: ${events.length}`,
|
|
42
|
+
` - Success: ${events.filter(e => e.type === "success").length}`,
|
|
43
|
+
` - Failure: ${events.filter(e => e.type === "failure").length}`,
|
|
44
|
+
` - Average Time: ${MathUtil_1.MathUtil.round(average).toLocaleString()} ms`,
|
|
45
|
+
` - Token Usage`,
|
|
46
|
+
` - Total: ${aggregate.total.toLocaleString()}`,
|
|
47
|
+
` - Input`,
|
|
48
|
+
` - Total: ${aggregate.input.total.toLocaleString()}`,
|
|
49
|
+
` - Cached: ${aggregate.input.cached.toLocaleString()}`,
|
|
50
|
+
` - Output:`,
|
|
51
|
+
` - Total: ${aggregate.output.total.toLocaleString()}`,
|
|
52
|
+
` - Accepted Prediction: ${aggregate.output.accepted_prediction.toLocaleString()}`,
|
|
53
|
+
` - Reasoning: ${aggregate.output.reasoning.toLocaleString()}`,
|
|
54
|
+
` - Rejected Prediction: ${aggregate.output.rejected_prediction.toLocaleString()}`,
|
|
55
|
+
"",
|
|
56
|
+
"## Experiments",
|
|
57
|
+
" Name | Status | Time/Avg ",
|
|
58
|
+
":-----|:-------|----------:",
|
|
59
|
+
...result.experiments.map(exp => [
|
|
60
|
+
`[${exp.scenario.name}](./${exp.scenario.name}/README.md)`,
|
|
61
|
+
(() => {
|
|
62
|
+
const success = Math.floor((exp.events.filter(e => e.type === "success").length
|
|
63
|
+
/ exp.events.length)
|
|
64
|
+
* 10);
|
|
65
|
+
return (Array.from({ length: success }).fill("■").join("")
|
|
66
|
+
+ Array.from({ length: 10 - success }).fill("□").join(""));
|
|
67
|
+
})(),
|
|
68
|
+
`${MathUtil_1.MathUtil.round(exp.events
|
|
69
|
+
.map(event => event.completed_at.getTime() - event.started_at.getTime())
|
|
80
70
|
.reduce((a, b) => a + b, 0) / exp.events.length).toLocaleString()} ms`,
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
71
|
+
].join(" | ")),
|
|
72
|
+
].join("\n");
|
|
73
|
+
}
|
|
74
|
+
function writeExperimentIndex(exp) {
|
|
75
|
+
const aggregate = exp.usage.aggregate;
|
|
76
|
+
return [
|
|
77
|
+
`# ${exp.scenario.name}`,
|
|
78
|
+
"## Summary",
|
|
79
|
+
" - Aggregation:",
|
|
80
|
+
` - Trial: ${exp.events.length}`,
|
|
81
|
+
` - Success: ${exp.events.filter(e => e.type === "success").length}`,
|
|
82
|
+
` - Failure: ${exp.events.filter(e => e.type === "failure").length}`,
|
|
83
|
+
` - Average Time: ${MathUtil_1.MathUtil.round(exp.events
|
|
84
|
+
.map(event => event.completed_at.getTime() - event.started_at.getTime())
|
|
85
|
+
.reduce((a, b) => a + b, 0) / exp.events.length).toLocaleString()} ms`,
|
|
86
|
+
` - Token Usage`,
|
|
87
|
+
` - Total: ${aggregate.total.toLocaleString()}`,
|
|
88
|
+
` - Input`,
|
|
89
|
+
` - Total: ${aggregate.input.total.toLocaleString()}`,
|
|
90
|
+
` - Cached: ${aggregate.input.cached.toLocaleString()}`,
|
|
91
|
+
` - Output:`,
|
|
92
|
+
` - Total: ${aggregate.output.total.toLocaleString()}`,
|
|
93
|
+
` - Accepted Prediction: ${aggregate.output.accepted_prediction.toLocaleString()}`,
|
|
94
|
+
` - Reasoning: ${aggregate.output.reasoning.toLocaleString()}`,
|
|
95
|
+
` - Rejected Prediction: ${aggregate.output.rejected_prediction.toLocaleString()}`,
|
|
96
|
+
"",
|
|
97
|
+
"## Events",
|
|
98
|
+
" No | Type | Time",
|
|
99
|
+
"---:|:-----|----:",
|
|
100
|
+
...exp.events.map((e, i) => [
|
|
101
|
+
`[${i + 1}.](./${i + 1}.${e.type}.md)`,
|
|
102
|
+
e.type,
|
|
103
|
+
`${MathUtil_1.MathUtil.round(e.completed_at.getTime() - e.started_at.getTime())} ms`,
|
|
104
|
+
].join(" | ")),
|
|
105
|
+
"",
|
|
106
|
+
"## Scenario",
|
|
107
|
+
"### User Prompt",
|
|
108
|
+
exp.scenario.text,
|
|
109
|
+
"",
|
|
110
|
+
"### Expected",
|
|
111
|
+
"```json",
|
|
112
|
+
JSON.stringify(AgenticaBenchmarkUtil_1.AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected), null, 2),
|
|
113
|
+
"```",
|
|
114
|
+
].join("\n");
|
|
115
|
+
}
|
|
116
|
+
function writeExperimentEvent(event, index) {
|
|
117
|
+
return [
|
|
118
|
+
`# ${index + 1}. ${event.type}`,
|
|
119
|
+
`## Summary`,
|
|
120
|
+
` - Name: ${event.scenario.name}`,
|
|
121
|
+
` - Type: ${event.type}`,
|
|
122
|
+
` - Time: ${(event.completed_at.getTime() - event.started_at.getTime()).toLocaleString()} ms`,
|
|
123
|
+
...(event.type !== "error"
|
|
124
|
+
? [
|
|
125
|
+
" - Token Usage",
|
|
126
|
+
` - Total: ${event.usage.aggregate.toLocaleString()}`,
|
|
127
|
+
` - Prompt`,
|
|
128
|
+
` - Total: ${event.usage.aggregate.input.total.toLocaleString()}`,
|
|
129
|
+
` - Cached: ${event.usage.aggregate.input.cached.toLocaleString()}`,
|
|
130
|
+
` - Completion:`,
|
|
131
|
+
` - Total: ${event.usage.aggregate.output.total.toLocaleString()}`,
|
|
132
|
+
` - Reasoning: ${event.usage.aggregate.output.reasoning.toLocaleString()}`,
|
|
133
|
+
` - Accepted Prediction: ${event.usage.aggregate.output.accepted_prediction.toLocaleString()}`,
|
|
134
|
+
` - Rejected Prediction: ${event.usage.aggregate.output.rejected_prediction.toLocaleString()}`,
|
|
135
|
+
]
|
|
136
|
+
: []),
|
|
137
|
+
"",
|
|
138
|
+
"## Scenario",
|
|
139
|
+
"### User Prompt",
|
|
140
|
+
event.scenario.text,
|
|
141
|
+
"",
|
|
142
|
+
"### Expected",
|
|
143
|
+
"```json",
|
|
144
|
+
JSON.stringify(AgenticaBenchmarkUtil_1.AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected), null, 2),
|
|
145
|
+
"```",
|
|
146
|
+
"",
|
|
147
|
+
...(event.type === "success" || event.type === "failure"
|
|
148
|
+
? [
|
|
149
|
+
"## Result",
|
|
150
|
+
...event.selected.map(s => [
|
|
151
|
+
`### ${s.operation.name}`,
|
|
152
|
+
` - Controller: \`${s.operation.controller.name}\``,
|
|
153
|
+
` - Function: \`${s.operation.function.name}\``,
|
|
154
|
+
` - Reason: ${s.reason}`,
|
|
164
155
|
"",
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
156
|
+
...(s.operation.function.description !== undefined && s.operation.function.description !== ""
|
|
157
|
+
? [s.operation.function.description, ""]
|
|
158
|
+
: []),
|
|
159
|
+
].join("\n")),
|
|
160
|
+
]
|
|
161
|
+
: []),
|
|
162
|
+
...(event.type === "error"
|
|
163
|
+
? [
|
|
164
|
+
"## Error",
|
|
165
|
+
"```json",
|
|
166
|
+
AgenticaBenchmarkUtil_1.AgenticaBenchmarkUtil.errorToJson(JSON.stringify(event.error, null, 2)),
|
|
167
|
+
"```",
|
|
168
|
+
"",
|
|
169
|
+
]
|
|
170
|
+
: []),
|
|
171
|
+
].join("\n");
|
|
172
|
+
}
|
|
170
173
|
//# sourceMappingURL=AgenticaSelectBenchmarkReporter.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AgenticaSelectBenchmarkReporter.js","sourceRoot":"","sources":["../../src/internal/AgenticaSelectBenchmarkReporter.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"AgenticaSelectBenchmarkReporter.js","sourceRoot":"","sources":["../../src/internal/AgenticaSelectBenchmarkReporter.ts"],"names":[],"mappings":";;;AAqBA,4BAeC;AAzBD,gDAA6C;AAC7C,mEAAgE;AAEhE;;GAEG;AACU,QAAA,+BAA+B,GAAG;IAC7C,QAAQ;CACT,CAAC;AAEF,SAAgB,QAAQ,CAAiC,MAA6C;IACpG,MAAM,QAAQ,GAAG;QACf,CAAC,aAAa,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;QACnC,GAAG,MAAM,CAAC,WAAW;aAClB,GAAG,CAAqB,GAAG,CAAC,EAAE,CAAC;YAC9B,CAAC,KAAK,GAAG,CAAC,QAAQ,CAAC,IAAI,YAAY,EAAE,oBAAoB,CAAC,GAAG,CAAC,CAAC;YAC/D,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAmB,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChD,KAAK,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,KAAK;gBAClD,oBAAoB,CAAC,KAAK,EAAE,CAAC,CAAC;aAC/B,CAAC;SACH,CAAC;aACD,IAAI,EAAE;KACmB,CAAC;IAE/B,OAAO,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,UAAU,CAAiC,MAA6C;IAC/F,MAAM,MAAM,GAA2C,MAAM,CAAC,WAAW;SACtE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;SAClB,IAAI,EAAE,CAAC;IACV,MAAM,OAAO,GACP,MAAM;SACL,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;SAC3D,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAClD,MAAM,SAAS,GAAkC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC;IACxE,OAAO;QACL,oCAAoC;QACpC,YAAY;QACZ,kBAAkB;QAClB,qBAAqB,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE;QACjE,gBAAgB,MAAM,CAAC,MAAM,EAAE;QAC/B,kBAAkB,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM,EAAE;QACnE,kBAAkB,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM,EAAE;QACnE,uBAAuB,mBAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,cAAc,EAAE,KAAK;QACpE,iBAAiB;QACjB,gBAAgB,SAAS,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;QAClD,aAAa;QACb,kBAAkB,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;QAC1D,mBAAmB,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE;QAC5D,eAAe;QACf,kBAAkB,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;QAC3D,gCAAgC,SAAS,CAAC,MAAM,CAAC,mBAAmB,CAAC,cAAc,EAAE,EAAE;QACvF,sBAAsB,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE;QACnE,gCAAgC,SAAS,CAAC,MAAM,CAAC,mBAAmB,CAAC,cAAc,EAAE,EAAE;QACvF,EAAE;QACF,gBAAgB;QAChB,6BAA6B;QAC7B,6BAA6B;QAC7B,GAAG,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAC9B;YACE,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAC,IAAI,aAAa;YAC1D,CAAC,GAAG,EAAE;gBACJ,MAAM,OAAO,GAAW,IAAI,CAAC,KAAK,CAChC,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM;sBAChD,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC;sBACpB,EAAE,CACL,CAAC;gBACF,OAAO,CACL,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;sBAChD,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAC1D,CAAC;YACJ,CAAC,CAAC,EAAE;YACJ,GAAG,mBAAQ,CAAC,KAAK,CACf,GAAG,CAAC,MAAM;iBACP,GAAG,CACF,KAAK,CAAC,EAAE,CACN,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,OAAO,EAAE,CAC5D;iBACA,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAClD,CAAC,cAAc,EAAE,KAAK;SACxB,CAAC,IAAI,CAAC,KAAK,CAAC,CACd;KACF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,oBAAoB,CAAiC,GAAsD;IAClH,MAAM,SAAS,GAAkC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;IACrE,OAAO;QACL,KAAK,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE;QACxB,YAAY;QACZ,kBAAkB;QAClB,gBAAgB,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE;QACnC,kBAAkB,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM,EAAE;QACvE,kBAAkB,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM,EAAE;QACvE,uBAAuB,mBAAQ,CAAC,KAAK,CACnC,GAAG,CAAC,MAAM;aACP,GAAG,CACF,KAAK,CAAC,EAAE,CACN,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,OAAO,EAAE,CAC5D;aACA,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAClD,CAAC,cAAc,EAAE,KAAK;QACvB,iBAAiB;QACjB,gBAAgB,SAAS,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;QAClD,aAAa;QACb,kBAAkB,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;QAC1D,mBAAmB,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE;QAC5D,eAAe;QACf,kBAAkB,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;QAC3D,gCAAgC,SAAS,CAAC,MAAM,CAAC,mBAAmB,CAAC,cAAc,EAAE,EAAE;QACvF,sBAAsB,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE;QACnE,gCAAgC,SAAS,CAAC,MAAM,CAAC,mBAAmB,CAAC,cAAc,EAAE,EAAE;QACvF,EAAE;QACF,WAAW;QACX,mBAAmB;QACnB,mBAAmB;QACnB,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACzB;YACE,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,MAAM;YACtC,CAAC,CAAC,IAAI;YACN,GAAG,mBAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,UAAU,CAAC,OAAO,EAAE,CACnE,KAAK;SACN,CAAC,IAAI,CAAC,KAAK,CAAC,CACd;QACD,EAAE;QACF,aAAa;QACb,iBAAiB;QACjB,GAAG,CAAC,QAAQ,CAAC,IAAI;QACjB,EAAE;QACF,cAAc;QACd,SAAS;QACT,IAAI,CAAC,SAAS,CACZ,6CAAqB,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAC3D,IAAI,EACJ,CAAC,CACF;QACD,KAAK;KACN,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,oBAAoB,CAAiC,KAA2C,EAAE,KAAa;IACtH,OAAO;QACL,KAAK,KAAK,GAAG,CAAC,KAAK,KAAK,CAAC,IAAI,EAAE;QAC/B,YAAY;QACZ,aAAa,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE;QAClC,aAAa,KAAK,CAAC,IAAI,EAAE;QACzB,aAAa,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,cAAc,EAAE,KAAK;QAC9F,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,OAAO;YACxB,CAAC,CAAC;gBACE,iBAAiB;gBACjB,gBAAgB,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE;gBACxD,cAAc;gBACd,kBAAkB,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;gBACtE,mBAAmB,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE;gBACxE,mBAAmB;gBACnB,kBAAkB,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE;gBACvE,sBAAsB,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE;gBAC/E,gCAAgC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,mBAAmB,CAAC,cAAc,EAAE,EAAE;gBACnG,gCAAgC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,mBAAmB,CAAC,cAAc,EAAE,EAAE;aACpG;YACH,CAAC,CAAC,EAAE,CAAC;QACP,EAAE;QACF,aAAa;QACb,iBAAiB;QACjB,KAAK,CAAC,QAAQ,CAAC,IAAI;QACnB,EAAE;QACF,cAAc;QACd,SAAS;QACT,IAAI,CAAC,SAAS,CACZ,6CAAqB,CAAC,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAC7D,IAAI,EACJ,CAAC,CACF;QACD,KAAK;QACL,EAAE;QACF,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS;YACtD,CAAC,CAAC;gBACE,WAAW;gBACX,GAAG,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CACxB;oBACE,OAAO,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE;oBACzB,qBAAqB,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,IAAI,IAAI;oBACpD,mBAAmB,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,IAAI;oBAChD,eAAe,CAAC,CAAC,MAAM,EAAE;oBACzB,EAAE;oBACF,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,KAAK,SAAS,IAAI,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,KAAK,EAAE;wBAC3F,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC;wBACxC,CAAC,CAAC,EAAE,CAAC;iBACR,CAAC,IAAI,CAAC,IAAI,CAAC,CACb;aACF;YACH,CAAC,CAAC,EAAE,CAAC;QACP,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,OAAO;YACxB,CAAC,CAAC;gBACE,UAAU;gBACV,SAAS;gBACT,6CAAqB,CAAC,WAAW,CAC/B,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CACrC;gBACD,KAAK;gBACL,EAAE;aACH;YACH,CAAC,CAAC,EAAE,CAAC;KACR,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC"}
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaBenchmarkExpected class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { AgenticaOperation } from "@agentica/core";
|
|
8
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
3
9
|
/**
|
|
4
10
|
* Expected operation determinant.
|
|
5
11
|
*
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaCallBenchmarkEvent class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { AgenticaPrompt, AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
|
+
import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
4
10
|
/**
|
|
5
11
|
* Event of LLM function selection benchmark.
|
|
6
12
|
*
|
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaCallBenchmarkResult class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
|
+
import type { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
|
|
10
|
+
import type { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
|
|
5
11
|
/**
|
|
6
12
|
* Result of the LLM function calling benchmark.
|
|
7
13
|
*
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaCallBenchmarkScenario class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
|
+
import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
3
9
|
/**
|
|
4
10
|
* Scenario of function calling.
|
|
5
11
|
*
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaSelectBenchmarkEvent class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { AgenticaOperationSelection, AgenticaTextPrompt, AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
|
+
import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
4
10
|
/**
|
|
5
11
|
* Event of LLM function selection benchmark.
|
|
6
12
|
*
|
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaSelectBenchmarkResult class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { AgenticaTokenUsage } from "@agentica/core";
|
|
8
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
9
|
+
import type { IAgenticaSelectBenchmarkEvent } from "./IAgenticaSelectBenchmarkEvent";
|
|
10
|
+
import type { IAgenticaSelectBenchmarkScenario } from "./IAgenticaSelectBenchmarkScenario";
|
|
5
11
|
/**
|
|
6
12
|
* Result of the LLM function selection benchmark.
|
|
7
13
|
*
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains the implementation of the IAgenticaSelectBenchmarkScenario class.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
import type { ILlmSchema } from "@samchon/openapi";
|
|
8
|
+
import type { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
|
|
3
9
|
/**
|
|
4
10
|
* Scenario of function selection.
|
|
5
11
|
*
|
package/lib/utils/MathUtil.d.ts
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @module
|
|
3
|
+
* This file contains functions to work with MathUtil.
|
|
4
|
+
*
|
|
5
|
+
* @author Wrtn Technologies
|
|
6
|
+
*/
|
|
7
|
+
export declare const MathUtil: {
|
|
8
|
+
/**
|
|
9
|
+
* Round a number to 2 decimal places.
|
|
10
|
+
*
|
|
11
|
+
* @param value - The number to round.
|
|
12
|
+
* @returns The rounded number.
|
|
13
|
+
*/
|
|
14
|
+
round: (value: number) => number;
|
|
15
|
+
};
|
package/lib/utils/MathUtil.js
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.MathUtil = void 0;
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
/**
|
|
5
|
+
* @module
|
|
6
|
+
* This file contains functions to work with MathUtil.
|
|
7
|
+
*
|
|
8
|
+
* @author Wrtn Technologies
|
|
9
|
+
*/
|
|
10
|
+
exports.MathUtil = {
|
|
11
|
+
/**
|
|
12
|
+
* Round a number to 2 decimal places.
|
|
13
|
+
*
|
|
14
|
+
* @param value - The number to round.
|
|
15
|
+
* @returns The rounded number.
|
|
16
|
+
*/
|
|
17
|
+
round: (value) => Math.floor(value * 100) / 100,
|
|
18
|
+
};
|
|
8
19
|
//# sourceMappingURL=MathUtil.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"MathUtil.js","sourceRoot":"","sources":["../../src/utils/MathUtil.ts"],"names":[],"mappings":";;;AAAA,
|
|
1
|
+
{"version":3,"file":"MathUtil.js","sourceRoot":"","sources":["../../src/utils/MathUtil.ts"],"names":[],"mappings":";;;AAAA;;;;;GAKG;AAEU,QAAA,QAAQ,GAAG;IACtB;;;;;OAKG;IACH,KAAK,EAAE,CAAC,KAAa,EAAU,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG;CAChE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentica/benchmark",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"main": "lib/index.js",
|
|
5
|
-
"publishConfig": {
|
|
6
|
-
"access": "public"
|
|
7
|
-
},
|
|
3
|
+
"version": "0.13.0",
|
|
8
4
|
"description": "Agentic AI Library specialized in LLM Function Calling",
|
|
9
5
|
"author": "Wrtn Technologies",
|
|
10
|
-
"homepage": "https://wrtnlabs.io/agentica",
|
|
11
6
|
"license": "MIT",
|
|
7
|
+
"homepage": "https://wrtnlabs.io/agentica",
|
|
12
8
|
"repository": {
|
|
13
9
|
"type": "git",
|
|
14
10
|
"url": "https://github.com/wrtnlabs/agentica"
|
|
@@ -27,11 +23,15 @@
|
|
|
27
23
|
"swagger",
|
|
28
24
|
"openapi"
|
|
29
25
|
],
|
|
26
|
+
"main": "lib/index.js",
|
|
27
|
+
"publishConfig": {
|
|
28
|
+
"access": "public"
|
|
29
|
+
},
|
|
30
30
|
"files": [
|
|
31
|
-
"README.md",
|
|
32
31
|
"LICENSE",
|
|
33
|
-
"
|
|
32
|
+
"README.md",
|
|
34
33
|
"lib",
|
|
34
|
+
"package.json",
|
|
35
35
|
"src"
|
|
36
36
|
],
|
|
37
37
|
"dependencies": {
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"openai": "^4.80.0",
|
|
40
40
|
"tstl": "^3.0.0",
|
|
41
41
|
"typia": "^8.0.0",
|
|
42
|
-
"@agentica/core": "^0.
|
|
42
|
+
"@agentica/core": "^0.13.0"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@rollup/plugin-terser": "^0.4.4",
|
|
@@ -53,7 +53,9 @@
|
|
|
53
53
|
},
|
|
54
54
|
"scripts": {
|
|
55
55
|
"build": "rimraf lib && tsc && rollup -c",
|
|
56
|
-
"dev": "rimraf lib && tsc --watch"
|
|
56
|
+
"dev": "rimraf lib && tsc --watch",
|
|
57
|
+
"lint": "eslint .",
|
|
58
|
+
"lint:fix": "eslint --fix ."
|
|
57
59
|
},
|
|
58
60
|
"module": "lib/index.mjs",
|
|
59
61
|
"typings": "lib/index.d.ts"
|