@autobe/benchmark 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/lib/example/AutoBeExampleStorage.d.ts +39 -0
- package/lib/example/AutoBeExampleStorage.js +169 -0
- package/lib/example/AutoBeExampleStorage.js.map +1 -0
- package/lib/example/index.d.ts +1 -0
- package/lib/example/index.js +18 -0
- package/lib/example/index.js.map +1 -0
- package/lib/index.d.ts +2 -0
- package/lib/index.js +19 -0
- package/lib/index.js.map +1 -0
- package/lib/replay/AutoBeReplayComputer.d.ts +7 -0
- package/lib/replay/AutoBeReplayComputer.js +109 -0
- package/lib/replay/AutoBeReplayComputer.js.map +1 -0
- package/lib/replay/AutoBeReplayDocumentation.d.ts +4 -0
- package/lib/replay/AutoBeReplayDocumentation.js +146 -0
- package/lib/replay/AutoBeReplayDocumentation.js.map +1 -0
- package/lib/replay/AutoBeReplayStorage.d.ts +8 -0
- package/lib/replay/AutoBeReplayStorage.js +72 -0
- package/lib/replay/AutoBeReplayStorage.js.map +1 -0
- package/lib/replay/index.d.ts +3 -0
- package/lib/replay/index.js +20 -0
- package/lib/replay/index.js.map +1 -0
- package/package.json +47 -0
- package/src/example/AutoBeExampleStorage.ts +213 -0
- package/src/example/index.ts +1 -0
- package/src/index.ts +2 -0
- package/src/replay/AutoBeReplayComputer.ts +189 -0
- package/src/replay/AutoBeReplayDocumentation.ts +173 -0
- package/src/replay/AutoBeReplayStorage.ts +80 -0
- package/src/replay/index.ts +3 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AutoBeReplayStorage.js","sourceRoot":"","sources":["../../src/replay/AutoBeReplayStorage.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAOA,kDAA0B;AAE1B,0EAAuE;AAEvE,IAAiB,mBAAmB,CAkEnC;AAlED,WAAiB,mBAAmB;IACrB,0BAAM,GAAG,CACpB,MAAc,EACd,aAA0D,EACtB,EAAE;QACtC,MAAM,QAAQ,GAA2B,eAAK,CAAC,IAAI;aAChD,QAAQ,EAAwB;aAChC,MAAM,CAAC,aAAa,aAAb,aAAa,cAAb,aAAa,GAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;QACzC,MAAM,OAAO,GAA0C,MAAM,OAAO,CAAC,GAAG,CACtE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACjB,mBAAmB,CAAC,GAAG,CAAC;YACtB,MAAM;YACN,OAAO,EAAE,CAAC;SACX,CAAC,CACH,CACF,CAAC;QACF,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAA,CAAC;IAEW,uBAAG,GAAG,CAAO,KAGzB,EAA2C,EAAE;QAC5C,MAAM,SAAS,GAA2B,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC;QACpE,IAAI,SAAS,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC;QAEpC,MAAM,SAAS,GAAG,CAChB,KAAkB,EACqB,EAAE;YACzC,IAAI,CAAC;gBACH,OAAO,MAAM,2CAAoB,CAAC,YAAY,CAAC;oBAC7C,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,KAAK;iBACN,CAAC,CAAC;YACL,CAAC;YAAC,WAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAA,CAAC;QACF,OAAO;YACL,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS;YACT,OAAO,EAAE,MAAM,SAAS,CAAC,SAAS,CAAC;YACnC,MAAM,EAAE,MAAM,SAAS,CAAC,QAAQ,CAAC;YACjC,SAAS,EAAE,MAAM,SAAS,CAAC,WAAW,CAAC;YACvC,IAAI,EAAE,MAAM,SAAS,CAAC,MAAM,CAAC;YAC7B,OAAO,EAAE,MAAM,SAAS,CAAC,SAAS,CAAC;SACpC,CAAC;IACJ,CAAC,CAAA,CAAC;IAEF,MAAM,YAAY,GAAG,CAAO,KAG3B,EAAmC,EAAE;QACpC,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,OAAO,MAAM,2CAAoB,CAAC,YAAY,CAAC;oBAC7C,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,KAAK;iBACN,CAAC,CAAC;YACL,CAAC;YAAC,WAAM,CAAC,CAAA,CAAC;QACZ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAA,CAAC;AACJ,CAAC,EAlEgB,mBAAmB,mCAAnB,mBAAmB,QAkEnC;AAED,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,CAAU,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./AutoBeReplayComputer"), exports);
|
|
18
|
+
__exportStar(require("./AutoBeReplayDocumentation"), exports);
|
|
19
|
+
__exportStar(require("./AutoBeReplayStorage"), exports);
|
|
20
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/replay/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,yDAAuC;AACvC,8DAA4C;AAC5C,wDAAsC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@autobe/benchmark",
|
|
3
|
+
"version": "0.27.0",
|
|
4
|
+
"description": "AI backend server code generator",
|
|
5
|
+
"main": "lib/index.js",
|
|
6
|
+
"keywords": [],
|
|
7
|
+
"author": "Wrtn Technologies",
|
|
8
|
+
"license": "AGPL-3.0",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "https://github.com/wrtnlabs/autobe"
|
|
12
|
+
},
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/wrtnlabs/autobe/issues"
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"lib",
|
|
18
|
+
"src",
|
|
19
|
+
"package.json",
|
|
20
|
+
"LICENSE",
|
|
21
|
+
"README.md"
|
|
22
|
+
],
|
|
23
|
+
"publishConfig": {
|
|
24
|
+
"access": "public"
|
|
25
|
+
},
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"tstl": "^3.0.0",
|
|
28
|
+
"typia": "^9.7.2",
|
|
29
|
+
"uuid": "^11.1.0",
|
|
30
|
+
"@autobe/agent": "^0.27.0",
|
|
31
|
+
"@autobe/filesystem": "^0.27.0",
|
|
32
|
+
"@autobe/utils": "^0.27.0",
|
|
33
|
+
"@autobe/interface": "^0.27.0"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/uuid": "^10.0.0",
|
|
37
|
+
"rimraf": "^6.0.1",
|
|
38
|
+
"ts-patch": "^3.3.0",
|
|
39
|
+
"typescript": "~5.9.2"
|
|
40
|
+
},
|
|
41
|
+
"scripts": {
|
|
42
|
+
"build": "rimraf lib && tsc",
|
|
43
|
+
"postbuild": "node ../../internals/config/assertBuild.js",
|
|
44
|
+
"dev": "rimraf lib && tsc --watch"
|
|
45
|
+
},
|
|
46
|
+
"typings": "lib/index.d.ts"
|
|
47
|
+
}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { CompressUtil } from "@autobe/filesystem";
|
|
2
|
+
import {
|
|
3
|
+
AutoBeEventSnapshot,
|
|
4
|
+
AutoBeExampleProject,
|
|
5
|
+
AutoBeHistory,
|
|
6
|
+
AutoBePhase,
|
|
7
|
+
AutoBeUserMessageHistory,
|
|
8
|
+
IAutoBeTokenUsageJson,
|
|
9
|
+
} from "@autobe/interface";
|
|
10
|
+
import cp from "child_process";
|
|
11
|
+
import fs from "fs";
|
|
12
|
+
import path from "path";
|
|
13
|
+
import { Singleton, VariadicSingleton } from "tstl";
|
|
14
|
+
import { v7 } from "uuid";
|
|
15
|
+
|
|
16
|
+
export namespace AutoBeExampleStorage {
|
|
17
|
+
export const repository = (): string => examples.get();
|
|
18
|
+
export const getDirectory = (props: {
|
|
19
|
+
vendor: string;
|
|
20
|
+
project: string;
|
|
21
|
+
}): string =>
|
|
22
|
+
`${examples.get()}/raw/${slugModel(props.vendor, false)}/${props.project}`;
|
|
23
|
+
|
|
24
|
+
export const save = async (props: {
|
|
25
|
+
vendor: string;
|
|
26
|
+
project: AutoBeExampleProject;
|
|
27
|
+
files: Record<string, string>;
|
|
28
|
+
}): Promise<void> => {
|
|
29
|
+
await saveWithGzip({
|
|
30
|
+
root: `${getDirectory(props)}`,
|
|
31
|
+
files: props.files,
|
|
32
|
+
overwrite: true,
|
|
33
|
+
});
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export const getUserMessage = async (props: {
|
|
37
|
+
project: AutoBeExampleProject;
|
|
38
|
+
phase: AutoBePhase;
|
|
39
|
+
}): Promise<AutoBeUserMessageHistory> => {
|
|
40
|
+
const full: string = `${TEST_ROOT}/scripts/${props.project}/${props.phase}`;
|
|
41
|
+
if (fs.existsSync(`${full}.md`) === false) {
|
|
42
|
+
const text: string =
|
|
43
|
+
props.phase === "analyze"
|
|
44
|
+
? await fs.promises.readFile(
|
|
45
|
+
`${TEST_ROOT}/scripts/${props.project}.md`,
|
|
46
|
+
"utf8",
|
|
47
|
+
)
|
|
48
|
+
: PROMPT_TEMPLATE[props.phase];
|
|
49
|
+
return {
|
|
50
|
+
type: "userMessage",
|
|
51
|
+
id: v7(),
|
|
52
|
+
created_at: new Date().toISOString(),
|
|
53
|
+
contents: [
|
|
54
|
+
{
|
|
55
|
+
type: "text",
|
|
56
|
+
text,
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const text: string = await fs.promises.readFile(`${full}.md`, "utf8");
|
|
62
|
+
return {
|
|
63
|
+
type: "userMessage",
|
|
64
|
+
id: v7(),
|
|
65
|
+
created_at: new Date().toISOString(),
|
|
66
|
+
contents: [
|
|
67
|
+
{
|
|
68
|
+
type: "text",
|
|
69
|
+
text: text,
|
|
70
|
+
},
|
|
71
|
+
],
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
export const getVendorModels = async (): Promise<string[]> => {
|
|
76
|
+
const result: string[] = [];
|
|
77
|
+
const repoPath: string = repository();
|
|
78
|
+
for (const vendor of await fs.promises.readdir(repoPath))
|
|
79
|
+
for (const model of await fs.promises.readdir(`${repoPath}/${vendor}`)) {
|
|
80
|
+
const stat: fs.Stats = await fs.promises.lstat(
|
|
81
|
+
`${repoPath}/${vendor}/${model}`,
|
|
82
|
+
);
|
|
83
|
+
if (stat.isDirectory() === true) result.push(`${vendor}/${model}`);
|
|
84
|
+
}
|
|
85
|
+
return result.sort();
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
export const getHistories = async (props: {
|
|
89
|
+
vendor: string;
|
|
90
|
+
project: AutoBeExampleProject;
|
|
91
|
+
phase: AutoBePhase;
|
|
92
|
+
}): Promise<AutoBeHistory[]> => {
|
|
93
|
+
const location: string = `${getDirectory(props)}/${props.phase}.histories.json.gz`;
|
|
94
|
+
const content: string = await CompressUtil.gunzip(
|
|
95
|
+
await fs.promises.readFile(location),
|
|
96
|
+
);
|
|
97
|
+
return JSON.parse(content);
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
export const getSnapshots = async (props: {
|
|
101
|
+
vendor: string;
|
|
102
|
+
project: AutoBeExampleProject;
|
|
103
|
+
phase: AutoBePhase;
|
|
104
|
+
}): Promise<AutoBeEventSnapshot[]> => {
|
|
105
|
+
const location: string = `${getDirectory(props)}/${props.phase}.snapshots.json.gz`;
|
|
106
|
+
const content: string = await CompressUtil.gunzip(
|
|
107
|
+
await fs.promises.readFile(location),
|
|
108
|
+
);
|
|
109
|
+
return JSON.parse(content);
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
export const getTokenUsage = async (props: {
|
|
113
|
+
vendor: string;
|
|
114
|
+
project: AutoBeExampleProject;
|
|
115
|
+
phase: AutoBePhase;
|
|
116
|
+
}): Promise<IAutoBeTokenUsageJson> => {
|
|
117
|
+
const snapshots: AutoBeEventSnapshot[] = await getSnapshots(props);
|
|
118
|
+
return (
|
|
119
|
+
snapshots.at(-1)?.tokenUsage ??
|
|
120
|
+
(() => {
|
|
121
|
+
const component = (): IAutoBeTokenUsageJson.IComponent => ({
|
|
122
|
+
total: 0,
|
|
123
|
+
input: {
|
|
124
|
+
total: 0,
|
|
125
|
+
cached: 0,
|
|
126
|
+
},
|
|
127
|
+
output: {
|
|
128
|
+
total: 0,
|
|
129
|
+
reasoning: 0,
|
|
130
|
+
accepted_prediction: 0,
|
|
131
|
+
rejected_prediction: 0,
|
|
132
|
+
},
|
|
133
|
+
});
|
|
134
|
+
return {
|
|
135
|
+
aggregate: component(),
|
|
136
|
+
facade: component(),
|
|
137
|
+
analyze: component(),
|
|
138
|
+
prisma: component(),
|
|
139
|
+
interface: component(),
|
|
140
|
+
test: component(),
|
|
141
|
+
realize: component(),
|
|
142
|
+
};
|
|
143
|
+
})()
|
|
144
|
+
);
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
export const has = async (props: {
|
|
148
|
+
vendor: string;
|
|
149
|
+
project: AutoBeExampleProject;
|
|
150
|
+
phase: AutoBePhase;
|
|
151
|
+
}): Promise<boolean> => {
|
|
152
|
+
return fs.existsSync(
|
|
153
|
+
`${getDirectory(props)}/${props.phase}.histories.json.gz`,
|
|
154
|
+
);
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
export const slugModel = (model: string, replaceSlash: boolean): string => {
|
|
158
|
+
model = model.replaceAll(":", "-");
|
|
159
|
+
if (replaceSlash) model = model.replaceAll("/", "-");
|
|
160
|
+
return model;
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const PROMPT_TEMPLATE = {
|
|
165
|
+
prisma: "Design the database schema.",
|
|
166
|
+
interface: "Create the API interface specification.",
|
|
167
|
+
test: "Make the e2e test functions.",
|
|
168
|
+
realize: "Implement API functions.",
|
|
169
|
+
};
|
|
170
|
+
const TEST_ROOT: string = `${__dirname}/../../../../test`;
|
|
171
|
+
|
|
172
|
+
const examples = new Singleton(() => {
|
|
173
|
+
const location: string = `${TEST_ROOT}/repositories/autobe-examples`;
|
|
174
|
+
if (fs.existsSync(location) === false) {
|
|
175
|
+
try {
|
|
176
|
+
fs.mkdirSync(`${TEST_ROOT}/repositories`);
|
|
177
|
+
} catch {}
|
|
178
|
+
cp.execSync(`git clone https://github.com/wrtnlabs/autobe-examples`, {
|
|
179
|
+
cwd: `${TEST_ROOT}/repositories`,
|
|
180
|
+
stdio: "inherit",
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
cp.execSync("git pull", {
|
|
184
|
+
cwd: location,
|
|
185
|
+
stdio: "ignore",
|
|
186
|
+
});
|
|
187
|
+
if (fs.existsSync(`${location}/raw`) === false)
|
|
188
|
+
fs.mkdirSync(`${location}/raw`);
|
|
189
|
+
return location;
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
const saveWithGzip = async (props: {
|
|
193
|
+
root: string;
|
|
194
|
+
files: Record<string, string>;
|
|
195
|
+
overwrite?: boolean;
|
|
196
|
+
}): Promise<void> => {
|
|
197
|
+
if (props.overwrite !== true && fs.existsSync(props.root))
|
|
198
|
+
await fs.promises.rm(props.root, {
|
|
199
|
+
recursive: true,
|
|
200
|
+
});
|
|
201
|
+
const directory = new VariadicSingleton(async (location: string) => {
|
|
202
|
+
try {
|
|
203
|
+
await fs.promises.mkdir(location, {
|
|
204
|
+
recursive: true,
|
|
205
|
+
});
|
|
206
|
+
} catch {}
|
|
207
|
+
});
|
|
208
|
+
for (const [key, value] of Object.entries(props.files)) {
|
|
209
|
+
const file: string = path.resolve(`${props.root}/${key}.gz`);
|
|
210
|
+
await directory.get(path.dirname(file));
|
|
211
|
+
await fs.promises.writeFile(file, await CompressUtil.gzip(value ?? ""));
|
|
212
|
+
}
|
|
213
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./AutoBeExampleStorage";
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import { AutoBeProcessAggregateFactory } from "@autobe/agent/src/factory/AutoBeProcessAggregateFactory";
|
|
2
|
+
import {
|
|
3
|
+
AutoBeExampleProject,
|
|
4
|
+
AutoBeHistory,
|
|
5
|
+
AutoBePhase,
|
|
6
|
+
IAutoBePlaygroundBenchmarkScore,
|
|
7
|
+
IAutoBePlaygroundReplay,
|
|
8
|
+
} from "@autobe/interface";
|
|
9
|
+
|
|
10
|
+
export namespace AutoBeReplayComputer {
|
|
11
|
+
export const SIGNIFICANT_PROJECTS: AutoBeExampleProject[] = [
|
|
12
|
+
"todo",
|
|
13
|
+
"bbs",
|
|
14
|
+
"reddit",
|
|
15
|
+
"shopping",
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
export const emoji = (
|
|
19
|
+
summaries: IAutoBePlaygroundReplay.ISummary[],
|
|
20
|
+
): string => {
|
|
21
|
+
const success: number = summaries.filter(
|
|
22
|
+
(s) => s.realize !== null && s.realize.success === true,
|
|
23
|
+
).length;
|
|
24
|
+
if (success >= 3) return "🟢";
|
|
25
|
+
|
|
26
|
+
const tested: boolean = !!summaries.find((s) => s.test !== null);
|
|
27
|
+
return tested ? "🟡" : "❌";
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export const score = (
|
|
31
|
+
summaries: IAutoBePlaygroundReplay.ISummary[],
|
|
32
|
+
): IAutoBePlaygroundBenchmarkScore => {
|
|
33
|
+
// list up significant projects
|
|
34
|
+
summaries = summaries.filter((s) =>
|
|
35
|
+
["todo", "bbs", "reddit", "shopping"].includes(s.project),
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
// the formula to compute the benchmark score
|
|
39
|
+
const compute = (summary: IAutoBePlaygroundReplay.ISummary): number => {
|
|
40
|
+
const add = (
|
|
41
|
+
phase: IAutoBePlaygroundReplay.IPhaseState | null,
|
|
42
|
+
success: number,
|
|
43
|
+
failure?: number,
|
|
44
|
+
): number =>
|
|
45
|
+
phase !== null
|
|
46
|
+
? phase.success === true
|
|
47
|
+
? success
|
|
48
|
+
: (failure ?? success / 2)
|
|
49
|
+
: 0;
|
|
50
|
+
return (
|
|
51
|
+
add(summary.analyze, 10) +
|
|
52
|
+
add(summary.prisma, 20) +
|
|
53
|
+
add(summary.interface, 30) +
|
|
54
|
+
add(summary.test, 20) +
|
|
55
|
+
add(summary.realize, 20)
|
|
56
|
+
);
|
|
57
|
+
};
|
|
58
|
+
const individual = (project: AutoBeExampleProject): number => {
|
|
59
|
+
const found = summaries.find((s) => s.project === project);
|
|
60
|
+
if (found === undefined) return 0;
|
|
61
|
+
return compute(found);
|
|
62
|
+
};
|
|
63
|
+
return {
|
|
64
|
+
aggregate: summaries.map(compute).reduce((a, b) => a + b, 0) / 4,
|
|
65
|
+
todo: individual("todo"),
|
|
66
|
+
bbs: individual("bbs"),
|
|
67
|
+
reddit: individual("reddit"),
|
|
68
|
+
shopping: individual("shopping"),
|
|
69
|
+
};
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
export const summarize = (
|
|
73
|
+
replay: IAutoBePlaygroundReplay,
|
|
74
|
+
): IAutoBePlaygroundReplay.ISummary => {
|
|
75
|
+
const predicate = <Type extends AutoBePhase>(
|
|
76
|
+
type: Type,
|
|
77
|
+
success: (history: AutoBeHistory.Mapper[Type]) => boolean,
|
|
78
|
+
commodity: (
|
|
79
|
+
history: AutoBeHistory.Mapper[Type],
|
|
80
|
+
) => Record<string, number>,
|
|
81
|
+
): IAutoBePlaygroundReplay.IPhaseState | null => {
|
|
82
|
+
const reversed: AutoBeHistory[] = replay.histories.slice().reverse();
|
|
83
|
+
const step: number | undefined = reversed.find(
|
|
84
|
+
(h) => h.type === "analyze",
|
|
85
|
+
)?.step;
|
|
86
|
+
if (step === undefined) return null;
|
|
87
|
+
|
|
88
|
+
const history: AutoBeHistory.Mapper[Type] | undefined = reversed.find(
|
|
89
|
+
(h) => h.type === type && h.step === step,
|
|
90
|
+
) as AutoBeHistory.Mapper[Type] | undefined;
|
|
91
|
+
if (history === undefined) return null;
|
|
92
|
+
return {
|
|
93
|
+
success: success(history),
|
|
94
|
+
commodity: commodity(history),
|
|
95
|
+
elapsed:
|
|
96
|
+
new Date(history.completed_at).getTime() -
|
|
97
|
+
new Date(history.created_at).getTime(),
|
|
98
|
+
aggregates: history.aggregates,
|
|
99
|
+
};
|
|
100
|
+
};
|
|
101
|
+
const phaseStates: Record<
|
|
102
|
+
AutoBePhase,
|
|
103
|
+
IAutoBePlaygroundReplay.IPhaseState | null
|
|
104
|
+
> = {
|
|
105
|
+
analyze: predicate(
|
|
106
|
+
"analyze",
|
|
107
|
+
() => true,
|
|
108
|
+
(h) => ({
|
|
109
|
+
actors: h.actors.length,
|
|
110
|
+
documents: h.files.length,
|
|
111
|
+
}),
|
|
112
|
+
),
|
|
113
|
+
prisma: predicate(
|
|
114
|
+
"prisma",
|
|
115
|
+
(h) => h.compiled.type === "success",
|
|
116
|
+
(h) => ({
|
|
117
|
+
namespaces: h.result.data.files.length,
|
|
118
|
+
models: h.result.data.files.map((f) => f.models).flat().length,
|
|
119
|
+
}),
|
|
120
|
+
),
|
|
121
|
+
interface: predicate(
|
|
122
|
+
"interface",
|
|
123
|
+
(h) => h.missed.length === 0,
|
|
124
|
+
(h) => ({
|
|
125
|
+
operations: h.document.operations.length,
|
|
126
|
+
schemas: Object.keys(h.document.components.schemas).length,
|
|
127
|
+
}),
|
|
128
|
+
),
|
|
129
|
+
test: predicate(
|
|
130
|
+
"test",
|
|
131
|
+
(h) => h.compiled.type === "success",
|
|
132
|
+
(h) => ({
|
|
133
|
+
functions: h.files.length,
|
|
134
|
+
...(h.compiled.type === "failure"
|
|
135
|
+
? {
|
|
136
|
+
errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
|
|
137
|
+
.size,
|
|
138
|
+
}
|
|
139
|
+
: {}),
|
|
140
|
+
}),
|
|
141
|
+
),
|
|
142
|
+
realize: predicate(
|
|
143
|
+
"realize",
|
|
144
|
+
(h) => h.compiled.type === "success",
|
|
145
|
+
(h) => ({
|
|
146
|
+
functions: h.functions.length,
|
|
147
|
+
...(h.compiled.type === "failure"
|
|
148
|
+
? {
|
|
149
|
+
errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
|
|
150
|
+
.size,
|
|
151
|
+
}
|
|
152
|
+
: {}),
|
|
153
|
+
}),
|
|
154
|
+
),
|
|
155
|
+
};
|
|
156
|
+
const phase: AutoBePhase | null =
|
|
157
|
+
(["realize", "test", "interface", "prisma", "analyze"] as const).find(
|
|
158
|
+
(key) => phaseStates[key] !== null,
|
|
159
|
+
) ?? null;
|
|
160
|
+
return {
|
|
161
|
+
vendor: replay.vendor,
|
|
162
|
+
project: replay.project,
|
|
163
|
+
aggregates: AutoBeProcessAggregateFactory.reduce(
|
|
164
|
+
replay.histories
|
|
165
|
+
.filter(
|
|
166
|
+
(h) =>
|
|
167
|
+
h.type === "analyze" ||
|
|
168
|
+
h.type === "prisma" ||
|
|
169
|
+
h.type === "interface" ||
|
|
170
|
+
h.type === "test" ||
|
|
171
|
+
h.type === "realize",
|
|
172
|
+
)
|
|
173
|
+
.map((h) => h.aggregates),
|
|
174
|
+
),
|
|
175
|
+
elapsed: replay.histories
|
|
176
|
+
.filter(
|
|
177
|
+
(h) => h.type !== "userMessage" && h.type !== "assistantMessage",
|
|
178
|
+
)
|
|
179
|
+
.map(
|
|
180
|
+
(h) =>
|
|
181
|
+
new Date(h.completed_at).getTime() -
|
|
182
|
+
new Date(h.created_at).getTime(),
|
|
183
|
+
)
|
|
184
|
+
.reduce((a, b) => a + b, 0),
|
|
185
|
+
...phaseStates,
|
|
186
|
+
phase,
|
|
187
|
+
};
|
|
188
|
+
};
|
|
189
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AutoBeExampleProject,
|
|
3
|
+
AutoBePhase,
|
|
4
|
+
IAutoBePlaygroundBenchmark,
|
|
5
|
+
IAutoBePlaygroundReplay,
|
|
6
|
+
} from "@autobe/interface";
|
|
7
|
+
import { StringUtil } from "@autobe/utils";
|
|
8
|
+
|
|
9
|
+
import { AutoBeExampleStorage } from "../example/AutoBeExampleStorage";
|
|
10
|
+
|
|
11
|
+
export namespace AutoBeReplayDocumentation {
|
|
12
|
+
export const readme = (experiments: IAutoBePlaygroundBenchmark[]): string => {
|
|
13
|
+
return StringUtil.trim`
|
|
14
|
+
# AutoBe Generated Examples
|
|
15
|
+
|
|
16
|
+
## Benchmark
|
|
17
|
+
|
|
18
|
+
AI Model | Score | FCSR | Status
|
|
19
|
+
:--------|------:|-----:|:------:
|
|
20
|
+
${experiments
|
|
21
|
+
.map((e) =>
|
|
22
|
+
[
|
|
23
|
+
`[\`${AutoBeExampleStorage.slugModel(
|
|
24
|
+
e.vendor,
|
|
25
|
+
false,
|
|
26
|
+
)}\`](#${AutoBeExampleStorage.slugModel(e.vendor, false)
|
|
27
|
+
.replaceAll("/", "")
|
|
28
|
+
.replaceAll(".", "")})`,
|
|
29
|
+
e.score.aggregate,
|
|
30
|
+
(() => {
|
|
31
|
+
const [x, y] = e.replays
|
|
32
|
+
.map((r) => r.aggregates.total.metric)
|
|
33
|
+
.map((m) => [m.success, m.attempt])
|
|
34
|
+
.reduce((a, b) => [a[0] + b[0], a[1] + b[1]], [0, 0]);
|
|
35
|
+
return y === 0 ? "0%" : Math.floor((x / y) * 100) + "%";
|
|
36
|
+
})(),
|
|
37
|
+
e.emoji,
|
|
38
|
+
].join(" | "),
|
|
39
|
+
)
|
|
40
|
+
.join("\n")}
|
|
41
|
+
|
|
42
|
+
- FCSR: Function Calling Success Rate
|
|
43
|
+
- Status:
|
|
44
|
+
- 🟢: All projects completed successfully
|
|
45
|
+
- 🟡: Some projects failed
|
|
46
|
+
- ❌: All projects failed or not executed
|
|
47
|
+
|
|
48
|
+
${experiments.map(vendor).join("\n\n\n")}
|
|
49
|
+
`;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const vendor = (exp: IAutoBePlaygroundBenchmark): string => {
|
|
53
|
+
const row = (project: AutoBeExampleProject): string => {
|
|
54
|
+
const found = exp.replays.find((r) => r.project === project);
|
|
55
|
+
if (found === undefined)
|
|
56
|
+
return `\`${project}\` | 0 | ❌ | ❌ | ❌ | ❌ | ❌`;
|
|
57
|
+
const phase = (
|
|
58
|
+
state: IAutoBePlaygroundReplay.IPhaseState | null,
|
|
59
|
+
): string => {
|
|
60
|
+
if (state === null) return "❌";
|
|
61
|
+
else if (state.success === false) return "🟡";
|
|
62
|
+
else return "🟢";
|
|
63
|
+
};
|
|
64
|
+
return [
|
|
65
|
+
`[\`${found.project}\`](./${exp.vendor}/${found.project}/)`,
|
|
66
|
+
(exp.score as any)[project],
|
|
67
|
+
phase(found.analyze),
|
|
68
|
+
phase(found.prisma),
|
|
69
|
+
phase(found.interface),
|
|
70
|
+
phase(found.test),
|
|
71
|
+
phase(found.realize),
|
|
72
|
+
].join(" | ");
|
|
73
|
+
};
|
|
74
|
+
return StringUtil.trim`
|
|
75
|
+
## \`${exp.vendor}\`
|
|
76
|
+
|
|
77
|
+
Project | Score | Analyze | Prisma | Interface | Test | Realize
|
|
78
|
+
:-------|------:|:-------:|:------:|:----------|:----:|:-------:
|
|
79
|
+
${row("todo")}
|
|
80
|
+
${row("bbs")}
|
|
81
|
+
${row("reddit")}
|
|
82
|
+
${row("shopping")}
|
|
83
|
+
|
|
84
|
+
${exp.replays
|
|
85
|
+
.map((r) =>
|
|
86
|
+
project({
|
|
87
|
+
replay: r,
|
|
88
|
+
score: (exp.score as any)[r.project],
|
|
89
|
+
}),
|
|
90
|
+
)
|
|
91
|
+
.join("\n\n\n")}
|
|
92
|
+
`;
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
const project = (props: {
|
|
96
|
+
replay: IAutoBePlaygroundReplay.ISummary;
|
|
97
|
+
score: number;
|
|
98
|
+
}): string => {
|
|
99
|
+
const phase = (key: AutoBePhase): string => {
|
|
100
|
+
const title: string = key.charAt(0).toUpperCase() + key.slice(1);
|
|
101
|
+
const state: IAutoBePlaygroundReplay.IPhaseState | null =
|
|
102
|
+
props.replay[key];
|
|
103
|
+
if (state === null) return [`⚪ ${title}`, "", "", "", ""].join(" | ");
|
|
104
|
+
return [
|
|
105
|
+
`${state.success === true ? "🟢" : "🔴"} ${title}`,
|
|
106
|
+
Object.entries(state.commodity)
|
|
107
|
+
.map(([key, value]) => `\`${key}\`: ${value}`)
|
|
108
|
+
.join(", "),
|
|
109
|
+
formatTokens(state.aggregates.total.tokenUsage.total),
|
|
110
|
+
formatElapsedTime(state.elapsed),
|
|
111
|
+
Math.floor(
|
|
112
|
+
(state.aggregates.total.metric.success /
|
|
113
|
+
state.aggregates.total.metric.attempt) *
|
|
114
|
+
100,
|
|
115
|
+
) + "%",
|
|
116
|
+
].join(" | ");
|
|
117
|
+
};
|
|
118
|
+
return StringUtil.trim`
|
|
119
|
+
### \`${props.replay.vendor}\` - \`${props.replay.project}\`
|
|
120
|
+
|
|
121
|
+
- Source Code: ${`[\`${AutoBeExampleStorage.slugModel(
|
|
122
|
+
props.replay.vendor,
|
|
123
|
+
false,
|
|
124
|
+
)}/${props.replay.project}\`](./${AutoBeExampleStorage.slugModel(
|
|
125
|
+
props.replay.vendor,
|
|
126
|
+
false,
|
|
127
|
+
)}/${props.replay.project}/)`}
|
|
128
|
+
- Score: ${props.score}
|
|
129
|
+
- Elapsed Time: ${formatElapsedTime(props.replay.elapsed)}
|
|
130
|
+
- Token Usage: ${formatTokens(
|
|
131
|
+
props.replay.aggregates.total.tokenUsage.total,
|
|
132
|
+
)}
|
|
133
|
+
- Function Calling Success Rate: ${(
|
|
134
|
+
(props.replay.aggregates.total.metric.success /
|
|
135
|
+
props.replay.aggregates.total.metric.attempt) *
|
|
136
|
+
100
|
|
137
|
+
).toFixed(2)}%
|
|
138
|
+
|
|
139
|
+
Phase | Generated | Token Usage | Elapsed Time | FCSR
|
|
140
|
+
:-----|:----------|------------:|-------------:|------:
|
|
141
|
+
${(["analyze", "prisma", "interface", "test", "realize"] as const)
|
|
142
|
+
.map((key) => phase(key))
|
|
143
|
+
.join("\n")}
|
|
144
|
+
`;
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function formatElapsedTime(ms: number): string {
|
|
149
|
+
const seconds = Math.floor(ms / 1000);
|
|
150
|
+
const minutes = Math.floor(seconds / 60);
|
|
151
|
+
const hours = Math.floor(minutes / 60);
|
|
152
|
+
|
|
153
|
+
const s = seconds % 60;
|
|
154
|
+
const m = minutes % 60;
|
|
155
|
+
const h = hours;
|
|
156
|
+
|
|
157
|
+
if (h > 0) {
|
|
158
|
+
return `${h}h ${m}m ${s}s`;
|
|
159
|
+
} else if (m > 0) {
|
|
160
|
+
return `${m}m ${s}s`;
|
|
161
|
+
} else {
|
|
162
|
+
return `${s}s`;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function formatTokens(num: number): string {
|
|
167
|
+
if (num >= 1000000) {
|
|
168
|
+
return `${(num / 1000000).toFixed(2)}M`;
|
|
169
|
+
} else if (num >= 1000) {
|
|
170
|
+
return `${(num / 1000).toFixed(1)}K`;
|
|
171
|
+
}
|
|
172
|
+
return num.toString();
|
|
173
|
+
}
|