@autobe/benchmark 0.30.4-dev.20260324 → 0.30.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,13 +6,13 @@ const AutoBeExampleStorage_1 = require("../example/AutoBeExampleStorage");
6
6
  var AutoBeReplayDocumentation;
7
7
  (function (AutoBeReplayDocumentation) {
8
8
  AutoBeReplayDocumentation.readme = (experiments) => {
9
- return utils_1.StringUtil.trim `
10
- # AutoBe Generated Examples
11
-
12
- ## Benchmark
13
-
14
- AI Model | Success | Score | FCSR | Status
15
- :--------|---------|------:|-----:|:------:
9
+ return utils_1.StringUtil.trim `
10
+ # AutoBe Generated Examples
11
+
12
+ ## Benchmark
13
+
14
+ AI Model | Success | Score | FCSR | Status
15
+ :--------|---------|------:|-----:|:------:
16
16
  ${experiments
17
17
  .map((e) => [
18
18
  `[\`${AutoBeExampleStorage_1.AutoBeExampleStorage.slugModel(e.vendor, false)}\`](#${AutoBeExampleStorage_1.AutoBeExampleStorage.slugModel(e.vendor, false)
@@ -29,15 +29,15 @@ var AutoBeReplayDocumentation;
29
29
  })(),
30
30
  e.emoji,
31
31
  ].join(" | "))
32
- .join("\n")}
33
-
34
- - FCSR: Function Calling Success Rate
35
- - Status:
36
- - 🟢: All projects completed successfully
37
- - 🟡: Some projects failed
38
- - ❌: All projects failed or not executed
39
-
40
- ${experiments.map(vendor).join("\n\n\n")}
32
+ .join("\n")}
33
+
34
+ - FCSR: Function Calling Success Rate
35
+ - Status:
36
+ - 🟢: All projects completed successfully
37
+ - 🟡: Some projects failed
38
+ - ❌: All projects failed or not executed
39
+
40
+ ${experiments.map(vendor).join("\n\n\n")}
41
41
  `;
42
42
  };
43
43
  const vendor = (exp) => {
@@ -64,23 +64,23 @@ var AutoBeReplayDocumentation;
64
64
  phase(found.realize),
65
65
  ].join(" | ");
66
66
  };
67
- return utils_1.StringUtil.trim `
68
- ## \`${exp.vendor}\`
69
-
70
- Project | Score | Analyze | Prisma | Interface | Test | Realize
71
- :-------|------:|:-------:|:------:|:----------|:----:|:-------:
72
- ${row("todo")}
73
- ${row("reddit")}
74
- ${row("shopping")}
75
- ${row("erp")}
76
-
67
+ return utils_1.StringUtil.trim `
68
+ ## \`${exp.vendor}\`
69
+
70
+ Project | Score | Analyze | Prisma | Interface | Test | Realize
71
+ :-------|------:|:-------:|:------:|:----------|:----:|:-------:
72
+ ${row("todo")}
73
+ ${row("reddit")}
74
+ ${row("shopping")}
75
+ ${row("erp")}
76
+
77
77
  ${exp.replays
78
78
  .map((r) => project({
79
79
  replay: r,
80
80
  // biome-ignore lint: intended
81
81
  score: exp.score[r.project],
82
82
  }))
83
- .join("\n\n\n")}
83
+ .join("\n\n\n")}
84
84
  `;
85
85
  };
86
86
  const project = (props) => {
@@ -101,22 +101,22 @@ var AutoBeReplayDocumentation;
101
101
  100) + "%",
102
102
  ].join(" | ");
103
103
  };
104
- return utils_1.StringUtil.trim `
105
- ### \`${props.replay.vendor}\` - \`${props.replay.project}\`
106
-
107
- - Source Code: ${`[\`${AutoBeExampleStorage_1.AutoBeExampleStorage.slugModel(props.replay.vendor, false)}/${props.replay.project}\`](./${AutoBeExampleStorage_1.AutoBeExampleStorage.slugModel(props.replay.vendor, false)}/${props.replay.project}/)`}
108
- - Score: ${props.score}
109
- - Elapsed Time: ${formatElapsedTime(props.replay.elapsed)}
110
- - Token Usage: ${formatTokens(props.replay.aggregates.total.tokenUsage.total)}
104
+ return utils_1.StringUtil.trim `
105
+ ### \`${props.replay.vendor}\` - \`${props.replay.project}\`
106
+
107
+ - Source Code: ${`[\`${AutoBeExampleStorage_1.AutoBeExampleStorage.slugModel(props.replay.vendor, false)}/${props.replay.project}\`](./${AutoBeExampleStorage_1.AutoBeExampleStorage.slugModel(props.replay.vendor, false)}/${props.replay.project}/)`}
108
+ - Score: ${props.score}
109
+ - Elapsed Time: ${formatElapsedTime(props.replay.elapsed)}
110
+ - Token Usage: ${formatTokens(props.replay.aggregates.total.tokenUsage.total)}
111
111
  - Function Calling Success Rate: ${((props.replay.aggregates.total.metric.success /
112
112
  props.replay.aggregates.total.metric.attempt) *
113
- 100).toFixed(2)}%
114
-
115
- Phase | Generated | Token Usage | Elapsed Time | FCSR
116
- :-----|:----------|------------:|-------------:|------:
113
+ 100).toFixed(2)}%
114
+
115
+ Phase | Generated | Token Usage | Elapsed Time | FCSR
116
+ :-----|:----------|------------:|-------------:|------:
117
117
  ${["analyze", "database", "interface", "test", "realize"]
118
118
  .map((key) => phase(key))
119
- .join("\n")}
119
+ .join("\n")}
120
120
  `;
121
121
  };
122
122
  })(AutoBeReplayDocumentation || (exports.AutoBeReplayDocumentation = AutoBeReplayDocumentation = {}));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@autobe/benchmark",
3
- "version": "0.30.4-dev.20260324",
3
+ "version": "0.30.4",
4
4
  "description": "AI backend server code generator",
5
5
  "main": "lib/index.js",
6
6
  "keywords": [],
@@ -27,9 +27,9 @@
27
27
  "tstl": "^3.0.0",
28
28
  "typia": "^12.0.1",
29
29
  "uuid": "^11.1.0",
30
- "@autobe/utils": "^0.30.4-dev.20260324",
31
- "@autobe/filesystem": "^0.30.4-dev.20260324",
32
- "@autobe/interface": "^0.30.4-dev.20260324"
30
+ "@autobe/interface": "^0.30.4",
31
+ "@autobe/filesystem": "^0.30.4",
32
+ "@autobe/utils": "^0.30.4"
33
33
  },
34
34
  "devDependencies": {
35
35
  "@types/uuid": "^10.0.0",
@@ -1,183 +1,183 @@
1
- import {
2
- AutoBeExampleProject,
3
- AutoBeHistory,
4
- AutoBePhase,
5
- IAutoBePlaygroundBenchmarkScore,
6
- IAutoBePlaygroundReplay,
7
- } from "@autobe/interface";
8
- import { AutoBeProcessAggregateFactory } from "@autobe/utils";
9
- import typia from "typia";
10
-
11
- export namespace AutoBeReplayComputer {
12
- export const SIGNIFICANT_PROJECTS: AutoBeExampleProject[] = [
13
- "todo",
14
- "reddit",
15
- "shopping",
16
- "erp",
17
- ];
18
-
19
- export const emoji = (
20
- summaries: IAutoBePlaygroundReplay.ISummary[],
21
- ): string => {
22
- const success: number = summaries.filter(
23
- (s) => s.realize !== null && s.realize.success === true,
24
- ).length;
25
- if (success >= 3) return "🟢";
26
-
27
- const tested: boolean = !!summaries.find((s) => s.test !== null);
28
- return tested ? "🟡" : "❌";
29
- };
30
-
31
- export const score = (
32
- summaries: IAutoBePlaygroundReplay.ISummary[],
33
- ): IAutoBePlaygroundBenchmarkScore => {
34
- // list up significant projects
35
- summaries = summaries.filter((s) =>
36
- ["todo", "reddit", "shopping", "erp"].includes(s.project),
37
- );
38
-
39
- const individual = (project: AutoBeExampleProject): number => {
40
- const found = summaries.find((s) => s.project === project);
41
- if (found === undefined) return 0;
42
- return compute(found);
43
- };
44
- return {
45
- aggregate: round(summaries.map(compute).reduce((a, b) => a + b, 0) / 4),
46
- todo: individual("todo"),
47
- reddit: individual("reddit"),
48
- shopping: individual("shopping"),
49
- erp: individual("erp"),
50
- };
51
- };
52
-
53
- export const summarize = (
54
- replay: IAutoBePlaygroundReplay,
55
- ): IAutoBePlaygroundReplay.ISummary => {
56
- const predicate = <Type extends AutoBePhase>(
57
- type: Type,
58
- success: (history: AutoBeHistory.Mapper[Type]) => boolean,
59
- commodity: (
60
- history: AutoBeHistory.Mapper[Type],
61
- ) => Record<string, number>,
62
- ): IAutoBePlaygroundReplay.IPhaseState | null => {
63
- const reversed: AutoBeHistory[] = replay.histories.slice().reverse();
64
- const step: number | undefined = reversed.find(
65
- (h) => h.type === "analyze",
66
- )?.step;
67
- if (step === undefined) return null;
68
-
69
- const history: AutoBeHistory.Mapper[Type] | undefined = reversed.find(
70
- (h) => h.type === type && h.step === step,
71
- ) as AutoBeHistory.Mapper[Type] | undefined;
72
- if (history === undefined) return null;
73
- return {
74
- success: success(history),
75
- commodity: commodity(history),
76
- elapsed:
77
- new Date(history.completed_at).getTime() -
78
- new Date(history.created_at).getTime(),
79
- aggregates: history.aggregates,
80
- };
81
- };
82
- const phaseStates: Record<
83
- AutoBePhase,
84
- IAutoBePlaygroundReplay.IPhaseState | null
85
- > = {
86
- analyze: predicate(
87
- "analyze",
88
- () => true,
89
- (h) => ({
90
- actors: h.actors.length,
91
- documents: h.files.length,
92
- }),
93
- ),
94
- database: predicate(
95
- "database",
96
- (h) => h.compiled.type === "success",
97
- (h) => ({
98
- namespaces: h.result.data.files.length,
99
- models: h.result.data.files.map((f) => f.models).flat().length,
100
- }),
101
- ),
102
- interface: predicate(
103
- "interface",
104
- (h) => h.missed.length === 0,
105
- (h) => ({
106
- operations: h.document.operations.length,
107
- schemas: Object.keys(h.document.components.schemas).length,
108
- }),
109
- ),
110
- test: predicate(
111
- "test",
112
- (h) => h.compiled.type === "success",
113
- (h) => ({
114
- functions: h.functions.length,
115
- ...(h.compiled.type === "failure"
116
- ? {
117
- errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
118
- .size,
119
- }
120
- : {}),
121
- }),
122
- ),
123
- realize: predicate(
124
- "realize",
125
- (h) => h.compiled.type === "success",
126
- (h) => ({
127
- functions: h.functions.length,
128
- ...(h.compiled.type === "failure"
129
- ? {
130
- errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
131
- .size,
132
- }
133
- : {}),
134
- }),
135
- ),
136
- };
137
- const phase: AutoBePhase | null =
138
- (["realize", "test", "interface", "database", "analyze"] as const).find(
139
- (key) => phaseStates[key] !== null,
140
- ) ?? null;
141
- return {
142
- vendor: replay.vendor,
143
- project: replay.project,
144
- ...phaseStates,
145
- aggregates: AutoBeProcessAggregateFactory.reduce(
146
- Object.values(phaseStates)
147
- .filter((p) => p !== null)
148
- .map((p) => p.aggregates),
149
- ),
150
- phase,
151
- elapsed: Object.values(phaseStates)
152
- .map((p) => p?.elapsed ?? 0)
153
- .reduce((a, b) => a + (b ?? 0), 0),
154
- };
155
- };
156
- }
157
-
158
- const compute = (summary: IAutoBePlaygroundReplay.ISummary): number => {
159
- const getScore = (phase: AutoBePhase): number => {
160
- const state = summary[phase];
161
- if (state === null) return 0;
162
-
163
- const [success, failure] = FORMULA[phase];
164
- return state.success === true
165
- ? success
166
- : success * failure(state.commodity);
167
- };
168
- return round(sum(typia.misc.literals<AutoBePhase>().map(getScore)));
169
- };
170
- const round = (value: number) => Math.round(value * 100) / 100;
171
- const sum = (targets: number[]): number => targets.reduce((a, b) => a + b, 0);
172
-
173
- // for type safety
174
- const FORMULA: Record<
175
- AutoBePhase,
176
- [number, (commodity: Record<string, number>) => number]
177
- > = {
178
- analyze: [10, () => 0],
179
- database: [20, () => 0.5],
180
- interface: [30, () => 0.5],
181
- test: [20, (c) => Math.max(0.5, 1 - (c.errors * 3) / c.functions)],
182
- realize: [20, (c) => Math.max(0.5, 1 - (c.errors * 3) / c.functions)],
183
- };
1
+ import {
2
+ AutoBeExampleProject,
3
+ AutoBeHistory,
4
+ AutoBePhase,
5
+ IAutoBePlaygroundBenchmarkScore,
6
+ IAutoBePlaygroundReplay,
7
+ } from "@autobe/interface";
8
+ import { AutoBeProcessAggregateFactory } from "@autobe/utils";
9
+ import typia from "typia";
10
+
11
+ export namespace AutoBeReplayComputer {
12
+ export const SIGNIFICANT_PROJECTS: AutoBeExampleProject[] = [
13
+ "todo",
14
+ "reddit",
15
+ "shopping",
16
+ "erp",
17
+ ];
18
+
19
+ export const emoji = (
20
+ summaries: IAutoBePlaygroundReplay.ISummary[],
21
+ ): string => {
22
+ const success: number = summaries.filter(
23
+ (s) => s.realize !== null && s.realize.success === true,
24
+ ).length;
25
+ if (success >= 3) return "🟢";
26
+
27
+ const tested: boolean = !!summaries.find((s) => s.test !== null);
28
+ return tested ? "🟡" : "❌";
29
+ };
30
+
31
+ export const score = (
32
+ summaries: IAutoBePlaygroundReplay.ISummary[],
33
+ ): IAutoBePlaygroundBenchmarkScore => {
34
+ // list up significant projects
35
+ summaries = summaries.filter((s) =>
36
+ ["todo", "reddit", "shopping", "erp"].includes(s.project),
37
+ );
38
+
39
+ const individual = (project: AutoBeExampleProject): number => {
40
+ const found = summaries.find((s) => s.project === project);
41
+ if (found === undefined) return 0;
42
+ return compute(found);
43
+ };
44
+ return {
45
+ aggregate: round(summaries.map(compute).reduce((a, b) => a + b, 0) / 4),
46
+ todo: individual("todo"),
47
+ reddit: individual("reddit"),
48
+ shopping: individual("shopping"),
49
+ erp: individual("erp"),
50
+ };
51
+ };
52
+
53
+ export const summarize = (
54
+ replay: IAutoBePlaygroundReplay,
55
+ ): IAutoBePlaygroundReplay.ISummary => {
56
+ const predicate = <Type extends AutoBePhase>(
57
+ type: Type,
58
+ success: (history: AutoBeHistory.Mapper[Type]) => boolean,
59
+ commodity: (
60
+ history: AutoBeHistory.Mapper[Type],
61
+ ) => Record<string, number>,
62
+ ): IAutoBePlaygroundReplay.IPhaseState | null => {
63
+ const reversed: AutoBeHistory[] = replay.histories.slice().reverse();
64
+ const step: number | undefined = reversed.find(
65
+ (h) => h.type === "analyze",
66
+ )?.step;
67
+ if (step === undefined) return null;
68
+
69
+ const history: AutoBeHistory.Mapper[Type] | undefined = reversed.find(
70
+ (h) => h.type === type && h.step === step,
71
+ ) as AutoBeHistory.Mapper[Type] | undefined;
72
+ if (history === undefined) return null;
73
+ return {
74
+ success: success(history),
75
+ commodity: commodity(history),
76
+ elapsed:
77
+ new Date(history.completed_at).getTime() -
78
+ new Date(history.created_at).getTime(),
79
+ aggregates: history.aggregates,
80
+ };
81
+ };
82
+ const phaseStates: Record<
83
+ AutoBePhase,
84
+ IAutoBePlaygroundReplay.IPhaseState | null
85
+ > = {
86
+ analyze: predicate(
87
+ "analyze",
88
+ () => true,
89
+ (h) => ({
90
+ actors: h.actors.length,
91
+ documents: h.files.length,
92
+ }),
93
+ ),
94
+ database: predicate(
95
+ "database",
96
+ (h) => h.compiled.type === "success",
97
+ (h) => ({
98
+ namespaces: h.result.data.files.length,
99
+ models: h.result.data.files.map((f) => f.models).flat().length,
100
+ }),
101
+ ),
102
+ interface: predicate(
103
+ "interface",
104
+ (h) => h.missed.length === 0,
105
+ (h) => ({
106
+ operations: h.document.operations.length,
107
+ schemas: Object.keys(h.document.components.schemas).length,
108
+ }),
109
+ ),
110
+ test: predicate(
111
+ "test",
112
+ (h) => h.compiled.type === "success",
113
+ (h) => ({
114
+ functions: h.functions.length,
115
+ ...(h.compiled.type === "failure"
116
+ ? {
117
+ errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
118
+ .size,
119
+ }
120
+ : {}),
121
+ }),
122
+ ),
123
+ realize: predicate(
124
+ "realize",
125
+ (h) => h.compiled.type === "success",
126
+ (h) => ({
127
+ functions: h.functions.length,
128
+ ...(h.compiled.type === "failure"
129
+ ? {
130
+ errors: new Set(h.compiled.diagnostics.map((d) => d.file ?? ""))
131
+ .size,
132
+ }
133
+ : {}),
134
+ }),
135
+ ),
136
+ };
137
+ const phase: AutoBePhase | null =
138
+ (["realize", "test", "interface", "database", "analyze"] as const).find(
139
+ (key) => phaseStates[key] !== null,
140
+ ) ?? null;
141
+ return {
142
+ vendor: replay.vendor,
143
+ project: replay.project,
144
+ ...phaseStates,
145
+ aggregates: AutoBeProcessAggregateFactory.reduce(
146
+ Object.values(phaseStates)
147
+ .filter((p) => p !== null)
148
+ .map((p) => p.aggregates),
149
+ ),
150
+ phase,
151
+ elapsed: Object.values(phaseStates)
152
+ .map((p) => p?.elapsed ?? 0)
153
+ .reduce((a, b) => a + (b ?? 0), 0),
154
+ };
155
+ };
156
+ }
157
+
158
+ const compute = (summary: IAutoBePlaygroundReplay.ISummary): number => {
159
+ const getScore = (phase: AutoBePhase): number => {
160
+ const state = summary[phase];
161
+ if (state === null) return 0;
162
+
163
+ const [success, failure] = FORMULA[phase];
164
+ return state.success === true
165
+ ? success
166
+ : success * failure(state.commodity);
167
+ };
168
+ return round(sum(typia.misc.literals<AutoBePhase>().map(getScore)));
169
+ };
170
+ const round = (value: number) => Math.round(value * 100) / 100;
171
+ const sum = (targets: number[]): number => targets.reduce((a, b) => a + b, 0);
172
+
173
+ // for type safety
174
+ const FORMULA: Record<
175
+ AutoBePhase,
176
+ [number, (commodity: Record<string, number>) => number]
177
+ > = {
178
+ analyze: [10, () => 0],
179
+ database: [20, () => 0.5],
180
+ interface: [30, () => 0.5],
181
+ test: [20, (c) => Math.max(0.5, 1 - (c.errors * 3) / c.functions)],
182
+ realize: [20, (c) => Math.max(0.5, 1 - (c.errors * 3) / c.functions)],
183
+ };