@autobe/benchmark 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AutoBeReplayStorage.js","sourceRoot":"","sources":["../../src/replay/AutoBeReplayStorage.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAOA,kDAA0B;AAE1B,0EAAuE;AAEvE,IAAiB,mBAAmB,CAkEnC;AAlED,WAAiB,mBAAmB;IACrB,0BAAM,GAAG,CACpB,MAAc,EACd,aAA0D,EACtB,EAAE;QACtC,MAAM,QAAQ,GAA2B,eAAK,CAAC,IAAI;aAChD,QAAQ,EAAwB;aAChC,MAAM,CAAC,aAAa,aAAb,aAAa,cAAb,aAAa,GAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;QACzC,MAAM,OAAO,GAA0C,MAAM,OAAO,CAAC,GAAG,CACtE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACjB,mBAAmB,CAAC,GAAG,CAAC;YACtB,MAAM;YACN,OAAO,EAAE,CAAC;SACX,CAAC,CACH,CACF,CAAC;QACF,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAA,CAAC;IAEW,uBAAG,GAAG,CAAO,KAGzB,EAA2C,EAAE;QAC5C,MAAM,SAAS,GAA2B,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC;QACpE,IAAI,SAAS,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC;QAEpC,MAAM,SAAS,GAAG,CAChB,KAAkB,EACqB,EAAE;YACzC,IAAI,CAAC;gBACH,OAAO,MAAM,2CAAoB,CAAC,YAAY,CAAC;oBAC7C,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,KAAK;iBACN,CAAC,CAAC;YACL,CAAC;YAAC,WAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAA,CAAC;QACF,OAAO;YACL,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS;YACT,OAAO,EAAE,MAAM,SAAS,CAAC,SAAS,CAAC;YACnC,MAAM,EAAE,MAAM,SAAS,CAAC,QAAQ,CAAC;YACjC,SAAS,EAAE,MAAM,SAAS,CAAC,WAAW,CAAC;YACvC,IAAI,EAAE,MAAM,SAAS,CAAC,MAAM,CAAC;YAC7B,OAAO,EAAE,MAAM,SAAS,CAAC,SAAS,CAAC;SACpC,CAAC;IACJ,CAAC,CAAA,CAAC;IAEF,MAAM,YAAY,GAAG,CAAO,KAG3B,EAAmC,EAAE;QACpC,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,OAAO,MAAM,2CAAoB,CAAC,YAAY,CAAC;oBAC7C,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,KAAK;iBACN,CAAC,CAAC;YACL,CAAC;YAAC,WAAM,CAAC,CAAA,CAAC;QACZ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAA,CAAC;AACJ,CAAC,EAlEgB,mBAAmB,mCAAnB,mBAAmB,QAkEnC;AAED,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,CAAU,CAAC"}
@@ -0,0 +1,3 @@
1
+ export * from "./AutoBeReplayComputer";
2
+ export * from "./AutoBeReplayDocumentation";
3
+ export * from "./AutoBeReplayStorage";
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./AutoBeReplayComputer"), exports);
18
+ __exportStar(require("./AutoBeReplayDocumentation"), exports);
19
+ __exportStar(require("./AutoBeReplayStorage"), exports);
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/replay/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,yDAAuC;AACvC,8DAA4C;AAC5C,wDAAsC"}
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@autobe/benchmark",
3
+ "version": "0.27.0",
4
+ "description": "AI backend server code generator",
5
+ "main": "lib/index.js",
6
+ "keywords": [],
7
+ "author": "Wrtn Technologies",
8
+ "license": "AGPL-3.0",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "https://github.com/wrtnlabs/autobe"
12
+ },
13
+ "bugs": {
14
+ "url": "https://github.com/wrtnlabs/autobe/issues"
15
+ },
16
+ "files": [
17
+ "lib",
18
+ "src",
19
+ "package.json",
20
+ "LICENSE",
21
+ "README.md"
22
+ ],
23
+ "publishConfig": {
24
+ "access": "public"
25
+ },
26
+ "dependencies": {
27
+ "tstl": "^3.0.0",
28
+ "typia": "^9.7.2",
29
+ "uuid": "^11.1.0",
30
+ "@autobe/agent": "^0.27.0",
31
+ "@autobe/filesystem": "^0.27.0",
32
+ "@autobe/utils": "^0.27.0",
33
+ "@autobe/interface": "^0.27.0"
34
+ },
35
+ "devDependencies": {
36
+ "@types/uuid": "^10.0.0",
37
+ "rimraf": "^6.0.1",
38
+ "ts-patch": "^3.3.0",
39
+ "typescript": "~5.9.2"
40
+ },
41
+ "scripts": {
42
+ "build": "rimraf lib && tsc",
43
+ "postbuild": "node ../../internals/config/assertBuild.js",
44
+ "dev": "rimraf lib && tsc --watch"
45
+ },
46
+ "typings": "lib/index.d.ts"
47
+ }
@@ -0,0 +1,213 @@
1
+ import { CompressUtil } from "@autobe/filesystem";
2
+ import {
3
+ AutoBeEventSnapshot,
4
+ AutoBeExampleProject,
5
+ AutoBeHistory,
6
+ AutoBePhase,
7
+ AutoBeUserMessageHistory,
8
+ IAutoBeTokenUsageJson,
9
+ } from "@autobe/interface";
10
+ import cp from "child_process";
11
+ import fs from "fs";
12
+ import path from "path";
13
+ import { Singleton, VariadicSingleton } from "tstl";
14
+ import { v7 } from "uuid";
15
+
16
+ export namespace AutoBeExampleStorage {
17
+ export const repository = (): string => examples.get();
18
+ export const getDirectory = (props: {
19
+ vendor: string;
20
+ project: string;
21
+ }): string =>
22
+ `${examples.get()}/raw/${slugModel(props.vendor, false)}/${props.project}`;
23
+
24
+ export const save = async (props: {
25
+ vendor: string;
26
+ project: AutoBeExampleProject;
27
+ files: Record<string, string>;
28
+ }): Promise<void> => {
29
+ await saveWithGzip({
30
+ root: `${getDirectory(props)}`,
31
+ files: props.files,
32
+ overwrite: true,
33
+ });
34
+ };
35
+
36
+ export const getUserMessage = async (props: {
37
+ project: AutoBeExampleProject;
38
+ phase: AutoBePhase;
39
+ }): Promise<AutoBeUserMessageHistory> => {
40
+ const full: string = `${TEST_ROOT}/scripts/${props.project}/${props.phase}`;
41
+ if (fs.existsSync(`${full}.md`) === false) {
42
+ const text: string =
43
+ props.phase === "analyze"
44
+ ? await fs.promises.readFile(
45
+ `${TEST_ROOT}/scripts/${props.project}.md`,
46
+ "utf8",
47
+ )
48
+ : PROMPT_TEMPLATE[props.phase];
49
+ return {
50
+ type: "userMessage",
51
+ id: v7(),
52
+ created_at: new Date().toISOString(),
53
+ contents: [
54
+ {
55
+ type: "text",
56
+ text,
57
+ },
58
+ ],
59
+ };
60
+ }
61
+ const text: string = await fs.promises.readFile(`${full}.md`, "utf8");
62
+ return {
63
+ type: "userMessage",
64
+ id: v7(),
65
+ created_at: new Date().toISOString(),
66
+ contents: [
67
+ {
68
+ type: "text",
69
+ text: text,
70
+ },
71
+ ],
72
+ };
73
+ };
74
+
75
+ export const getVendorModels = async (): Promise<string[]> => {
76
+ const result: string[] = [];
77
+ const repoPath: string = repository();
78
+ for (const vendor of await fs.promises.readdir(repoPath))
79
+ for (const model of await fs.promises.readdir(`${repoPath}/${vendor}`)) {
80
+ const stat: fs.Stats = await fs.promises.lstat(
81
+ `${repoPath}/${vendor}/${model}`,
82
+ );
83
+ if (stat.isDirectory() === true) result.push(`${vendor}/${model}`);
84
+ }
85
+ return result.sort();
86
+ };
87
+
88
+ export const getHistories = async (props: {
89
+ vendor: string;
90
+ project: AutoBeExampleProject;
91
+ phase: AutoBePhase;
92
+ }): Promise<AutoBeHistory[]> => {
93
+ const location: string = `${getDirectory(props)}/${props.phase}.histories.json.gz`;
94
+ const content: string = await CompressUtil.gunzip(
95
+ await fs.promises.readFile(location),
96
+ );
97
+ return JSON.parse(content);
98
+ };
99
+
100
+ export const getSnapshots = async (props: {
101
+ vendor: string;
102
+ project: AutoBeExampleProject;
103
+ phase: AutoBePhase;
104
+ }): Promise<AutoBeEventSnapshot[]> => {
105
+ const location: string = `${getDirectory(props)}/${props.phase}.snapshots.json.gz`;
106
+ const content: string = await CompressUtil.gunzip(
107
+ await fs.promises.readFile(location),
108
+ );
109
+ return JSON.parse(content);
110
+ };
111
+
112
+ export const getTokenUsage = async (props: {
113
+ vendor: string;
114
+ project: AutoBeExampleProject;
115
+ phase: AutoBePhase;
116
+ }): Promise<IAutoBeTokenUsageJson> => {
117
+ const snapshots: AutoBeEventSnapshot[] = await getSnapshots(props);
118
+ return (
119
+ snapshots.at(-1)?.tokenUsage ??
120
+ (() => {
121
+ const component = (): IAutoBeTokenUsageJson.IComponent => ({
122
+ total: 0,
123
+ input: {
124
+ total: 0,
125
+ cached: 0,
126
+ },
127
+ output: {
128
+ total: 0,
129
+ reasoning: 0,
130
+ accepted_prediction: 0,
131
+ rejected_prediction: 0,
132
+ },
133
+ });
134
+ return {
135
+ aggregate: component(),
136
+ facade: component(),
137
+ analyze: component(),
138
+ prisma: component(),
139
+ interface: component(),
140
+ test: component(),
141
+ realize: component(),
142
+ };
143
+ })()
144
+ );
145
+ };
146
+
147
+ export const has = async (props: {
148
+ vendor: string;
149
+ project: AutoBeExampleProject;
150
+ phase: AutoBePhase;
151
+ }): Promise<boolean> => {
152
+ return fs.existsSync(
153
+ `${getDirectory(props)}/${props.phase}.histories.json.gz`,
154
+ );
155
+ };
156
+
157
+ export const slugModel = (model: string, replaceSlash: boolean): string => {
158
+ model = model.replaceAll(":", "-");
159
+ if (replaceSlash) model = model.replaceAll("/", "-");
160
+ return model;
161
+ };
162
+ }
163
+
164
+ const PROMPT_TEMPLATE = {
165
+ prisma: "Design the database schema.",
166
+ interface: "Create the API interface specification.",
167
+ test: "Make the e2e test functions.",
168
+ realize: "Implement API functions.",
169
+ };
170
+ const TEST_ROOT: string = `${__dirname}/../../../../test`;
171
+
172
+ const examples = new Singleton(() => {
173
+ const location: string = `${TEST_ROOT}/repositories/autobe-examples`;
174
+ if (fs.existsSync(location) === false) {
175
+ try {
176
+ fs.mkdirSync(`${TEST_ROOT}/repositories`);
177
+ } catch {}
178
+ cp.execSync(`git clone https://github.com/wrtnlabs/autobe-examples`, {
179
+ cwd: `${TEST_ROOT}/repositories`,
180
+ stdio: "inherit",
181
+ });
182
+ }
183
+ cp.execSync("git pull", {
184
+ cwd: location,
185
+ stdio: "ignore",
186
+ });
187
+ if (fs.existsSync(`${location}/raw`) === false)
188
+ fs.mkdirSync(`${location}/raw`);
189
+ return location;
190
+ });
191
+
192
+ const saveWithGzip = async (props: {
193
+ root: string;
194
+ files: Record<string, string>;
195
+ overwrite?: boolean;
196
+ }): Promise<void> => {
197
+ if (props.overwrite !== true && fs.existsSync(props.root))
198
+ await fs.promises.rm(props.root, {
199
+ recursive: true,
200
+ });
201
+ const directory = new VariadicSingleton(async (location: string) => {
202
+ try {
203
+ await fs.promises.mkdir(location, {
204
+ recursive: true,
205
+ });
206
+ } catch {}
207
+ });
208
+ for (const [key, value] of Object.entries(props.files)) {
209
+ const file: string = path.resolve(`${props.root}/${key}.gz`);
210
+ await directory.get(path.dirname(file));
211
+ await fs.promises.writeFile(file, await CompressUtil.gzip(value ?? ""));
212
+ }
213
+ };
@@ -0,0 +1 @@
1
+ export * from "./AutoBeExampleStorage";
package/src/index.ts ADDED
@@ -0,0 +1,2 @@
1
+ export * from "./example";
2
+ export * from "./replay";
@@ -0,0 +1,189 @@
1
+ import { AutoBeProcessAggregateFactory } from "@autobe/agent/src/factory/AutoBeProcessAggregateFactory";
2
+ import {
3
+ AutoBeExampleProject,
4
+ AutoBeHistory,
5
+ AutoBePhase,
6
+ IAutoBePlaygroundBenchmarkScore,
7
+ IAutoBePlaygroundReplay,
8
+ } from "@autobe/interface";
9
+
10
+ export namespace AutoBeReplayComputer {
11
+ export const SIGNIFICANT_PROJECTS: AutoBeExampleProject[] = [
12
+ "todo",
13
+ "bbs",
14
+ "reddit",
15
+ "shopping",
16
+ ];
17
+
18
+ export const emoji = (
19
+ summaries: IAutoBePlaygroundReplay.ISummary[],
20
+ ): string => {
21
+ const success: number = summaries.filter(
22
+ (s) => s.realize !== null && s.realize.success === true,
23
+ ).length;
24
+ if (success >= 3) return "🟢";
25
+
26
+ const tested: boolean = !!summaries.find((s) => s.test !== null);
27
+ return tested ? "🟡" : "❌";
28
+ };
29
+
30
+ export const score = (
31
+ summaries: IAutoBePlaygroundReplay.ISummary[],
32
+ ): IAutoBePlaygroundBenchmarkScore => {
33
+ // list up significant projects
34
+ summaries = summaries.filter((s) =>
35
+ ["todo", "bbs", "reddit", "shopping"].includes(s.project),
36
+ );
37
+
38
+ // the formula to compute the benchmark score
39
+ const compute = (summary: IAutoBePlaygroundReplay.ISummary): number => {
40
+ const add = (
41
+ phase: IAutoBePlaygroundReplay.IPhaseState | null,
42
+ success: number,
43
+ failure?: number,
44
+ ): number =>
45
+ phase !== null
46
+ ? phase.success === true
47
+ ? success
48
+ : (failure ?? success / 2)
49
+ : 0;
50
+ return (
51
+ add(summary.analyze, 10) +
52
+ add(summary.prisma, 20) +
53
+ add(summary.interface, 30) +
54
+ add(summary.test, 20) +
55
+ add(summary.realize, 20)
56
+ );
57
+ };
58
+ const individual = (project: AutoBeExampleProject): number => {
59
+ const found = summaries.find((s) => s.project === project);
60
+ if (found === undefined) return 0;
61
+ return compute(found);
62
+ };
63
+ return {
64
+ aggregate: summaries.map(compute).reduce((a, b) => a + b, 0) / 4,
65
+ todo: individual("todo"),
66
+ bbs: individual("bbs"),
67
+ reddit: individual("reddit"),
68
+ shopping: individual("shopping"),
69
+ };
70
+ };
71
+
72
+ export const summarize = (
73
+ replay: IAutoBePlaygroundReplay,
74
+ ): IAutoBePlaygroundReplay.ISummary => {
75
+ const predicate = <Type extends AutoBePhase>(
76
+ type: Type,
77
+ success: (history: AutoBeHistory.Mapper[Type]) => boolean,
78
+ commodity: (
79
+ history: AutoBeHistory.Mapper[Type],
80
+ ) => Record<string, number>,
81
+ ): IAutoBePlaygroundReplay.IPhaseState | null => {
82
+ const reversed: AutoBeHistory[] = replay.histories.slice().reverse();
83
+ const step: number | undefined = reversed.find(
84
+ (h) => h.type === "analyze",
85
+ )?.step;
86
+ if (step === undefined) return null;
87
+
88
+ const history: AutoBeHistory.Mapper[Type] | undefined = reversed.find(
89
+ (h) => h.type === type && h.step === step,
90
+ ) as AutoBeHistory.Mapper[Type] | undefined;
91
+ if (history === undefined) return null;
92
+ return {
93
+ success: success(history),
94
+ commodity: commodity(history),
95
+ elapsed:
96
+ new Date(history.completed_at).getTime() -
97
+ new Date(history.created_at).getTime(),
98
+ aggregates: history.aggregates,
99
+ };
100
+ };
101
+ const phaseStates: Record<
102
+ AutoBePhase,
103
+ IAutoBePlaygroundReplay.IPhaseState | null
104
+ > = {
105
+ analyze: predicate(
106
+ "analyze",
107
+ () => true,
108
+ (h) => ({
109
+ actors: h.actors.length,
110
+ documents: h.files.length,
111
+ }),
112
+ ),
113
+ prisma: predicate(
114
+ "prisma",
115
+ (h) => h.compiled.type === "success",
116
+ (h) => ({
117
+ namespaces: h.result.data.files.length,
118
+ models: h.result.data.files.map((f) => f.models).flat().length,
119
+ }),
120
+ ),
121
+ interface: predicate(
122
+ "interface",
123
+ (h) => h.missed.length === 0,
124
+ (h) => ({
125
+ operations: h.document.operations.length,
126
+ schemas: Object.keys(h.document.components.schemas).length,
127
+ }),
128
+ ),
129
+ test: predicate(
130
+ "test",
131
+ (h) => h.compiled.type === "success",
132
+ (h) => ({
133
+ functions: h.files.length,
134
+ ...(h.compiled.type === "failure"
135
+ ? {
136
+ errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
137
+ .size,
138
+ }
139
+ : {}),
140
+ }),
141
+ ),
142
+ realize: predicate(
143
+ "realize",
144
+ (h) => h.compiled.type === "success",
145
+ (h) => ({
146
+ functions: h.functions.length,
147
+ ...(h.compiled.type === "failure"
148
+ ? {
149
+ errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
150
+ .size,
151
+ }
152
+ : {}),
153
+ }),
154
+ ),
155
+ };
156
+ const phase: AutoBePhase | null =
157
+ (["realize", "test", "interface", "prisma", "analyze"] as const).find(
158
+ (key) => phaseStates[key] !== null,
159
+ ) ?? null;
160
+ return {
161
+ vendor: replay.vendor,
162
+ project: replay.project,
163
+ aggregates: AutoBeProcessAggregateFactory.reduce(
164
+ replay.histories
165
+ .filter(
166
+ (h) =>
167
+ h.type === "analyze" ||
168
+ h.type === "prisma" ||
169
+ h.type === "interface" ||
170
+ h.type === "test" ||
171
+ h.type === "realize",
172
+ )
173
+ .map((h) => h.aggregates),
174
+ ),
175
+ elapsed: replay.histories
176
+ .filter(
177
+ (h) => h.type !== "userMessage" && h.type !== "assistantMessage",
178
+ )
179
+ .map(
180
+ (h) =>
181
+ new Date(h.completed_at).getTime() -
182
+ new Date(h.created_at).getTime(),
183
+ )
184
+ .reduce((a, b) => a + b, 0),
185
+ ...phaseStates,
186
+ phase,
187
+ };
188
+ };
189
+ }
@@ -0,0 +1,173 @@
1
+ import {
2
+ AutoBeExampleProject,
3
+ AutoBePhase,
4
+ IAutoBePlaygroundBenchmark,
5
+ IAutoBePlaygroundReplay,
6
+ } from "@autobe/interface";
7
+ import { StringUtil } from "@autobe/utils";
8
+
9
+ import { AutoBeExampleStorage } from "../example/AutoBeExampleStorage";
10
+
11
+ export namespace AutoBeReplayDocumentation {
12
+ export const readme = (experiments: IAutoBePlaygroundBenchmark[]): string => {
13
+ return StringUtil.trim`
14
+ # AutoBe Generated Examples
15
+
16
+ ## Benchmark
17
+
18
+ AI Model | Score | FCSR | Status
19
+ :--------|------:|-----:|:------:
20
+ ${experiments
21
+ .map((e) =>
22
+ [
23
+ `[\`${AutoBeExampleStorage.slugModel(
24
+ e.vendor,
25
+ false,
26
+ )}\`](#${AutoBeExampleStorage.slugModel(e.vendor, false)
27
+ .replaceAll("/", "")
28
+ .replaceAll(".", "")})`,
29
+ e.score.aggregate,
30
+ (() => {
31
+ const [x, y] = e.replays
32
+ .map((r) => r.aggregates.total.metric)
33
+ .map((m) => [m.success, m.attempt])
34
+ .reduce((a, b) => [a[0] + b[0], a[1] + b[1]], [0, 0]);
35
+ return y === 0 ? "0%" : Math.floor((x / y) * 100) + "%";
36
+ })(),
37
+ e.emoji,
38
+ ].join(" | "),
39
+ )
40
+ .join("\n")}
41
+
42
+ - FCSR: Function Calling Success Rate
43
+ - Status:
44
+ - 🟢: All projects completed successfully
45
+ - 🟡: Some projects failed
46
+ - ❌: All projects failed or not executed
47
+
48
+ ${experiments.map(vendor).join("\n\n\n")}
49
+ `;
50
+ };
51
+
52
+ const vendor = (exp: IAutoBePlaygroundBenchmark): string => {
53
+ const row = (project: AutoBeExampleProject): string => {
54
+ const found = exp.replays.find((r) => r.project === project);
55
+ if (found === undefined)
56
+ return `\`${project}\` | 0 | ❌ | ❌ | ❌ | ❌ | ❌`;
57
+ const phase = (
58
+ state: IAutoBePlaygroundReplay.IPhaseState | null,
59
+ ): string => {
60
+ if (state === null) return "❌";
61
+ else if (state.success === false) return "🟡";
62
+ else return "🟢";
63
+ };
64
+ return [
65
+ `[\`${found.project}\`](./${exp.vendor}/${found.project}/)`,
66
+ (exp.score as any)[project],
67
+ phase(found.analyze),
68
+ phase(found.prisma),
69
+ phase(found.interface),
70
+ phase(found.test),
71
+ phase(found.realize),
72
+ ].join(" | ");
73
+ };
74
+ return StringUtil.trim`
75
+ ## \`${exp.vendor}\`
76
+
77
+ Project | Score | Analyze | Prisma | Interface | Test | Realize
78
+ :-------|------:|:-------:|:------:|:----------|:----:|:-------:
79
+ ${row("todo")}
80
+ ${row("bbs")}
81
+ ${row("reddit")}
82
+ ${row("shopping")}
83
+
84
+ ${exp.replays
85
+ .map((r) =>
86
+ project({
87
+ replay: r,
88
+ score: (exp.score as any)[r.project],
89
+ }),
90
+ )
91
+ .join("\n\n\n")}
92
+ `;
93
+ };
94
+
95
+ const project = (props: {
96
+ replay: IAutoBePlaygroundReplay.ISummary;
97
+ score: number;
98
+ }): string => {
99
+ const phase = (key: AutoBePhase): string => {
100
+ const title: string = key.charAt(0).toUpperCase() + key.slice(1);
101
+ const state: IAutoBePlaygroundReplay.IPhaseState | null =
102
+ props.replay[key];
103
+ if (state === null) return [`⚪ ${title}`, "", "", "", ""].join(" | ");
104
+ return [
105
+ `${state.success === true ? "🟢" : "🔴"} ${title}`,
106
+ Object.entries(state.commodity)
107
+ .map(([key, value]) => `\`${key}\`: ${value}`)
108
+ .join(", "),
109
+ formatTokens(state.aggregates.total.tokenUsage.total),
110
+ formatElapsedTime(state.elapsed),
111
+ Math.floor(
112
+ (state.aggregates.total.metric.success /
113
+ state.aggregates.total.metric.attempt) *
114
+ 100,
115
+ ) + "%",
116
+ ].join(" | ");
117
+ };
118
+ return StringUtil.trim`
119
+ ### \`${props.replay.vendor}\` - \`${props.replay.project}\`
120
+
121
+ - Source Code: ${`[\`${AutoBeExampleStorage.slugModel(
122
+ props.replay.vendor,
123
+ false,
124
+ )}/${props.replay.project}\`](./${AutoBeExampleStorage.slugModel(
125
+ props.replay.vendor,
126
+ false,
127
+ )}/${props.replay.project}/)`}
128
+ - Score: ${props.score}
129
+ - Elapsed Time: ${formatElapsedTime(props.replay.elapsed)}
130
+ - Token Usage: ${formatTokens(
131
+ props.replay.aggregates.total.tokenUsage.total,
132
+ )}
133
+ - Function Calling Success Rate: ${(
134
+ (props.replay.aggregates.total.metric.success /
135
+ props.replay.aggregates.total.metric.attempt) *
136
+ 100
137
+ ).toFixed(2)}%
138
+
139
+ Phase | Generated | Token Usage | Elapsed Time | FCSR
140
+ :-----|:----------|------------:|-------------:|------:
141
+ ${(["analyze", "prisma", "interface", "test", "realize"] as const)
142
+ .map((key) => phase(key))
143
+ .join("\n")}
144
+ `;
145
+ };
146
+ }
147
+
148
+ function formatElapsedTime(ms: number): string {
149
+ const seconds = Math.floor(ms / 1000);
150
+ const minutes = Math.floor(seconds / 60);
151
+ const hours = Math.floor(minutes / 60);
152
+
153
+ const s = seconds % 60;
154
+ const m = minutes % 60;
155
+ const h = hours;
156
+
157
+ if (h > 0) {
158
+ return `${h}h ${m}m ${s}s`;
159
+ } else if (m > 0) {
160
+ return `${m}m ${s}s`;
161
+ } else {
162
+ return `${s}s`;
163
+ }
164
+ }
165
+
166
+ function formatTokens(num: number): string {
167
+ if (num >= 1000000) {
168
+ return `${(num / 1000000).toFixed(2)}M`;
169
+ } else if (num >= 1000) {
170
+ return `${(num / 1000).toFixed(1)}K`;
171
+ }
172
+ return num.toString();
173
+ }