@agjs/tsforge 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/scripts/analyze-malformed.ts +264 -0
- package/scripts/analyze-runs.ts +279 -0
- package/scripts/benchmark-catalog.ts +387 -0
- package/scripts/browser-check.ts +87 -0
- package/scripts/build-rule-docs.ts +122 -0
- package/scripts/build-rules-md.ts +129 -0
- package/scripts/cli-metrics.ts +203 -0
- package/scripts/coverage-check.ts +33 -0
- package/scripts/edit-benchmark.ts +314 -0
- package/scripts/eval-create.ts +48 -0
- package/scripts/eval-spec.ts +47 -0
- package/scripts/eval-sum.ts +79 -0
- package/scripts/gen-tests.ts +140 -0
- package/scripts/headless-build.ts +292 -0
- package/scripts/interactive-eval.ts +172 -0
- package/scripts/rejudge.ts +135 -0
- package/scripts/run-eval-todo.ts +59 -0
- package/scripts/smoke.ts +18 -0
- package/scripts/stub-check.ts +44 -0
- package/scripts/sweep-report.ts +76 -0
- package/scripts/sweep.ts +389 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
// The TSForge benchmark catalog — a FIXED pool of production-grade app domains
|
|
2
|
+
// (per ChatGPT's guidance, 2026-06-08) used to drive the headless self-improvement
|
|
3
|
+
// loop. A fixed catalog (not generator-invented prompts) is deliberate: it kills
|
|
4
|
+
// the variety-noise that made the overnight apps converge to one CRUD shape, and
|
|
5
|
+
// every domain is chosen to force complex type relationships, async state, forms,
|
|
6
|
+
// permissions, nested entities and edge cases — where a 27b starts to break down.
|
|
7
|
+
//
|
|
8
|
+
// The generation spec is ChatGPT's stress-test spec RECONCILED to our boringstack
|
|
9
|
+
// per-feature layout (user decision 2026-06-08): the layout differs from ChatGPT's
|
|
10
|
+
// by-layer folders, but the SEPARATION it demands (UI / logic / validation / types)
|
|
11
|
+
// is satisfied per-domain instead of in global folders.
|
|
12
|
+
|
|
13
|
+
export interface IBenchmarkApp {
|
|
14
|
+
readonly slug: string;
|
|
15
|
+
readonly name: string;
|
|
16
|
+
/** One line on what the app IS — keeps each domain genuinely distinct. */
|
|
17
|
+
readonly summary: string;
|
|
18
|
+
/** ≥8 entity types the model must model (the domain's data spine). */
|
|
19
|
+
readonly entities: readonly string[];
|
|
20
|
+
/** The concrete user flows that exercise forms / async / edge cases. */
|
|
21
|
+
readonly flows: readonly string[];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const BENCHMARK_CATALOG: readonly IBenchmarkApp[] = [
|
|
25
|
+
{
|
|
26
|
+
slug: "saas-crm",
|
|
27
|
+
name: "Multi-tenant SaaS CRM",
|
|
28
|
+
summary:
|
|
29
|
+
"A sales CRM scoped to an organization (tenant), with accounts, contacts, deals moving through a pipeline, and per-user roles.",
|
|
30
|
+
entities: [
|
|
31
|
+
"Organization (tenant)",
|
|
32
|
+
"User (with Role: owner | admin | rep)",
|
|
33
|
+
"Account",
|
|
34
|
+
"Contact",
|
|
35
|
+
"Deal (with a Stage discriminated union)",
|
|
36
|
+
"Activity (call | email | meeting — discriminated)",
|
|
37
|
+
"Note",
|
|
38
|
+
"Tag",
|
|
39
|
+
],
|
|
40
|
+
flows: [
|
|
41
|
+
"Dashboard: pipeline value by stage, win-rate, recent activity",
|
|
42
|
+
"Accounts list (search/filter/sort/paginate) → account detail with contacts + deals",
|
|
43
|
+
"Create/edit a deal (nested contact picker, conditional 'lost reason' when stage=lost)",
|
|
44
|
+
"Log an activity against a contact (discriminated form: fields change by type)",
|
|
45
|
+
"Optimistic stage change on the deal board with rollback on failure",
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
slug: "udemy",
|
|
50
|
+
name: "Online Course Marketplace",
|
|
51
|
+
summary:
|
|
52
|
+
"A Udemy-like learning marketplace: instructors publish courses made of sections and lessons; students enrol, track progress, and leave reviews.",
|
|
53
|
+
entities: [
|
|
54
|
+
"User (with Role: student | instructor | admin)",
|
|
55
|
+
"Course",
|
|
56
|
+
"Section",
|
|
57
|
+
"Lesson (with Content: video | article | quiz — discriminated)",
|
|
58
|
+
"Enrollment (with progress)",
|
|
59
|
+
"Review (rating 1-5)",
|
|
60
|
+
"Category",
|
|
61
|
+
"Coupon (with Discount: percentage | fixed — discriminated)",
|
|
62
|
+
],
|
|
63
|
+
flows: [
|
|
64
|
+
"Catalog: browse/search courses, filter by category + rating + price, paginate",
|
|
65
|
+
"Course detail: curriculum (sections → lessons), instructor bio, reviews, enrol button",
|
|
66
|
+
"Instructor studio: create/edit a course, add sections, add lessons (discriminated form — fields change by content type)",
|
|
67
|
+
"My learning: enrolled courses with progress bars, mark a lesson complete (optimistic) with rollback",
|
|
68
|
+
"Apply a coupon at enrol (discriminated discount math); leave a review after enrolling",
|
|
69
|
+
],
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
slug: "pm-platform",
|
|
73
|
+
name: "Project Management Platform",
|
|
74
|
+
summary:
|
|
75
|
+
"A Linear/Jira-like issue tracker: projects contain issues with status, priority, assignees, sub-tasks and comments.",
|
|
76
|
+
entities: [
|
|
77
|
+
"Workspace",
|
|
78
|
+
"Project",
|
|
79
|
+
"Issue (Status + Priority discriminated)",
|
|
80
|
+
"SubTask",
|
|
81
|
+
"User",
|
|
82
|
+
"Label",
|
|
83
|
+
"Comment",
|
|
84
|
+
"Milestone",
|
|
85
|
+
],
|
|
86
|
+
flows: [
|
|
87
|
+
"Dashboard: issues by status, overdue count, per-assignee load",
|
|
88
|
+
"Issue list with multi-facet filters (status, priority, assignee, label) + sort + pagination",
|
|
89
|
+
"Issue detail: edit inline, add sub-tasks, comment thread",
|
|
90
|
+
"Create issue (conditional fields: estimate only when type=story; due-date validation)",
|
|
91
|
+
"Optimistic status drag with rollback; cached project data reused across views",
|
|
92
|
+
],
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
slug: "hospital-scheduling",
|
|
96
|
+
name: "Hospital Scheduling System",
|
|
97
|
+
summary:
|
|
98
|
+
"Schedule patient appointments against clinicians, rooms and shifts, with conflict detection and waitlists.",
|
|
99
|
+
entities: [
|
|
100
|
+
"Clinician (with Specialty)",
|
|
101
|
+
"Patient",
|
|
102
|
+
"Appointment (Status discriminated)",
|
|
103
|
+
"Room",
|
|
104
|
+
"Shift",
|
|
105
|
+
"Department",
|
|
106
|
+
"WaitlistEntry",
|
|
107
|
+
"InsurancePlan",
|
|
108
|
+
],
|
|
109
|
+
flows: [
|
|
110
|
+
"Dashboard: today's schedule, utilization per room, no-show rate",
|
|
111
|
+
"Calendar/list of appointments, filterable by department/clinician/status",
|
|
112
|
+
"Book appointment (validate against clinician shift + room conflict; conditional referral field)",
|
|
113
|
+
"Reschedule with optimistic update + conflict rollback",
|
|
114
|
+
"Waitlist promotion when a slot frees up",
|
|
115
|
+
],
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
slug: "warehouse-inventory",
|
|
119
|
+
name: "Warehouse Inventory Management",
|
|
120
|
+
summary:
|
|
121
|
+
"Track SKUs across warehouses and bins, with stock movements, purchase orders and low-stock reordering.",
|
|
122
|
+
entities: [
|
|
123
|
+
"Product (SKU)",
|
|
124
|
+
"Warehouse",
|
|
125
|
+
"Bin",
|
|
126
|
+
"StockLevel",
|
|
127
|
+
"StockMovement (receipt | transfer | adjustment — discriminated)",
|
|
128
|
+
"PurchaseOrder (Status discriminated)",
|
|
129
|
+
"Supplier",
|
|
130
|
+
"Category",
|
|
131
|
+
],
|
|
132
|
+
flows: [
|
|
133
|
+
"Dashboard: total stock value, low-stock alerts, movements over time",
|
|
134
|
+
"Product list with search/filter by category & stock status, sort, paginate",
|
|
135
|
+
"Product detail: stock by warehouse/bin, movement history",
|
|
136
|
+
"Create a stock movement (discriminated form; transfer requires from+to bins)",
|
|
137
|
+
"Raise a purchase order (nested line items, async submit, success/error states)",
|
|
138
|
+
],
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
slug: "airline-ops",
|
|
142
|
+
name: "Airline Operations Dashboard",
|
|
143
|
+
summary:
|
|
144
|
+
"Monitor flights, aircraft, crew assignments and gates, with delay tracking and disruption handling.",
|
|
145
|
+
entities: [
|
|
146
|
+
"Flight (Status discriminated: scheduled | boarding | departed | delayed | cancelled)",
|
|
147
|
+
"Aircraft",
|
|
148
|
+
"Airport",
|
|
149
|
+
"CrewMember (with Role)",
|
|
150
|
+
"CrewAssignment",
|
|
151
|
+
"Gate",
|
|
152
|
+
"Route",
|
|
153
|
+
"Disruption",
|
|
154
|
+
],
|
|
155
|
+
flows: [
|
|
156
|
+
"Ops dashboard: on-time %, delays by cause, aircraft utilization",
|
|
157
|
+
"Flight board with filters (status, route, aircraft) + sort by departure + paginate",
|
|
158
|
+
"Flight detail: crew roster, gate, timeline; reassign a gate optimistically",
|
|
159
|
+
"Log a disruption (discriminated by cause; conditional weather/technical fields)",
|
|
160
|
+
"Crew assignment form with validation (rest-hours rule) + async submit",
|
|
161
|
+
],
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
slug: "portfolio-manager",
|
|
165
|
+
name: "Investment Portfolio Manager",
|
|
166
|
+
summary:
|
|
167
|
+
"Manage portfolios of holdings across asset classes, with transactions, allocations and performance.",
|
|
168
|
+
entities: [
|
|
169
|
+
"Portfolio",
|
|
170
|
+
"Holding",
|
|
171
|
+
"Asset (AssetClass discriminated: equity | bond | cash | fund)",
|
|
172
|
+
"Transaction (buy | sell | dividend — discriminated)",
|
|
173
|
+
"Account",
|
|
174
|
+
"Watchlist",
|
|
175
|
+
"PriceQuote",
|
|
176
|
+
"AllocationTarget",
|
|
177
|
+
],
|
|
178
|
+
flows: [
|
|
179
|
+
"Dashboard: total value, allocation pie, gain/loss, top movers",
|
|
180
|
+
"Holdings table (search/filter by asset class, sort by value/return, paginate)",
|
|
181
|
+
"Holding detail: transaction history, allocation vs target",
|
|
182
|
+
"Record a transaction (discriminated form; sell validates against quantity held)",
|
|
183
|
+
"Rebalance workflow: optimistic allocation edits with cached quotes",
|
|
184
|
+
],
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
slug: "procurement",
|
|
188
|
+
name: "Procurement & Vendor Platform",
|
|
189
|
+
summary:
|
|
190
|
+
"Manage vendors, requisitions, purchase orders and approvals through an approval chain.",
|
|
191
|
+
entities: [
|
|
192
|
+
"Vendor",
|
|
193
|
+
"Requisition (Status discriminated)",
|
|
194
|
+
"PurchaseOrder",
|
|
195
|
+
"LineItem",
|
|
196
|
+
"ApprovalStep",
|
|
197
|
+
"Contract",
|
|
198
|
+
"Budget",
|
|
199
|
+
"User (with ApprovalRole)",
|
|
200
|
+
],
|
|
201
|
+
flows: [
|
|
202
|
+
"Dashboard: spend by category, pending approvals, budget burn",
|
|
203
|
+
"Requisition list with filters (status, requester, budget) + sort + paginate",
|
|
204
|
+
"Requisition detail: line items, approval chain progress",
|
|
205
|
+
"Create requisition (nested line items, conditional justification when over budget)",
|
|
206
|
+
"Approve/reject step with optimistic update + rollback on async failure",
|
|
207
|
+
],
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
slug: "billing-console",
|
|
211
|
+
name: "Subscription Billing Console",
|
|
212
|
+
summary:
|
|
213
|
+
"Manage customers, subscription plans, invoices and payments, with proration and dunning.",
|
|
214
|
+
entities: [
|
|
215
|
+
"Customer",
|
|
216
|
+
"Plan (BillingInterval discriminated)",
|
|
217
|
+
"Subscription (Status discriminated: trialing | active | past_due | canceled)",
|
|
218
|
+
"Invoice (Status discriminated)",
|
|
219
|
+
"InvoiceLineItem",
|
|
220
|
+
"Payment (method discriminated: card | bank | credit)",
|
|
221
|
+
"Coupon",
|
|
222
|
+
"UsageRecord",
|
|
223
|
+
],
|
|
224
|
+
flows: [
|
|
225
|
+
"Dashboard: MRR, churn, overdue invoices, revenue trend",
|
|
226
|
+
"Invoice list (search/filter by status & customer, sort, paginate)",
|
|
227
|
+
"Customer detail: subscription, invoices, payment methods",
|
|
228
|
+
"Change plan (proration preview; conditional coupon field; async submit)",
|
|
229
|
+
"Record a payment (discriminated by method) with optimistic invoice status update",
|
|
230
|
+
],
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
slug: "ecommerce-admin",
|
|
234
|
+
name: "E-commerce Admin Suite",
|
|
235
|
+
summary:
|
|
236
|
+
"Back-office for an online store: products with variants, orders, fulfilment, customers and discounts.",
|
|
237
|
+
entities: [
|
|
238
|
+
"Product",
|
|
239
|
+
"Variant",
|
|
240
|
+
"Order (Status discriminated: pending | paid | fulfilled | refunded)",
|
|
241
|
+
"OrderItem",
|
|
242
|
+
"Customer",
|
|
243
|
+
"Discount (type discriminated: percent | fixed | bogo)",
|
|
244
|
+
"Fulfilment",
|
|
245
|
+
"Category",
|
|
246
|
+
],
|
|
247
|
+
flows: [
|
|
248
|
+
"Dashboard: sales today, top products, orders by status, low stock",
|
|
249
|
+
"Order list with filters (status, customer, date) + sort + paginate",
|
|
250
|
+
"Order detail: items, fulfilment timeline, refund workflow",
|
|
251
|
+
"Create/edit a product (nested variants array; conditional inventory per variant)",
|
|
252
|
+
"Create a discount (discriminated form; optimistic apply with rollback)",
|
|
253
|
+
],
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
slug: "incident-management",
|
|
257
|
+
name: "Incident Management Platform",
|
|
258
|
+
summary:
|
|
259
|
+
"Track operational incidents, severity, services affected, on-call responders, timelines and postmortems.",
|
|
260
|
+
entities: [
|
|
261
|
+
"Incident (Severity + Status discriminated)",
|
|
262
|
+
"Service",
|
|
263
|
+
"Responder",
|
|
264
|
+
"OnCallSchedule",
|
|
265
|
+
"TimelineEvent (type discriminated)",
|
|
266
|
+
"Postmortem",
|
|
267
|
+
"AlertRule",
|
|
268
|
+
"Team",
|
|
269
|
+
],
|
|
270
|
+
flows: [
|
|
271
|
+
"Dashboard: open incidents by severity, MTTR, services at risk",
|
|
272
|
+
"Incident list with filters (severity, status, service, team) + sort + paginate",
|
|
273
|
+
"Incident detail: timeline, responders, status updates",
|
|
274
|
+
"Declare incident (conditional fields by severity; async submit; validation)",
|
|
275
|
+
"Post a timeline update (discriminated event form) with optimistic append + rollback",
|
|
276
|
+
],
|
|
277
|
+
},
|
|
278
|
+
];
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* The generation spec — ChatGPT's production-grade stress-test, reconciled to our
|
|
282
|
+
* boringstack per-feature layout. Shared verbatim across every benchmark domain so
|
|
283
|
+
* results are comparable; only the DOMAIN section (built per app) changes.
|
|
284
|
+
*/
|
|
285
|
+
export const GENERATION_SPEC = `You are building a PRODUCTION-GRADE React + Vite + TypeScript application — not a demo.
|
|
286
|
+
The goal is a real, complete app a company would actually ship. Build the WHOLE thing; do not simplify, stub, or leave anything incomplete.
|
|
287
|
+
|
|
288
|
+
# Hard rules (the gate enforces these — code that breaks them does not pass)
|
|
289
|
+
FORBIDDEN: \`any\`, \`as\` casts (except \`as const\`), \`@ts-ignore\`/\`@ts-nocheck\`, non-null \`!\`, placeholder/TODO/dead code, fake or partial implementations.
|
|
290
|
+
REQUIRED: strict typing everywhere; interfaces are \`I\`-prefixed (\`IInvoice\`); ONE React component per .tsx file; functional components only.
|
|
291
|
+
|
|
292
|
+
# Type safety
|
|
293
|
+
Every entity has explicit types. Use discriminated unions, branded IDs where useful, \`readonly\` where appropriate, and EXHAUSTIVE \`switch\` statements to narrow them. Use a \`Result<T, E>\` style ONLY for genuinely fallible RUNTIME ops (e.g. a mock-async service call that can fail) — never for data you already typed.
|
|
294
|
+
There is NO backend, network, or uploaded data in this app: EVERY value originates from your own typed code + seed, so TypeScript has already proven its shape. The TYPE SYSTEM is the validation. NEVER write runtime parsers, entity validators, type-guard functions, or a \`*.validators.ts\` to "check" data the compiler already guarantees — type it correctly at the source and use it directly (\`x satisfies IType\` for a literal). Never use \`any\`, \`Record<string, any>\`, or \`as\` casts.
|
|
295
|
+
|
|
296
|
+
# UI surface (all of these must exist and work)
|
|
297
|
+
Dashboard · list view · detail view · creation workflow · editing workflow · search · filtering · sorting · pagination · modal workflow · form validation · toast notifications · loading states · error states · empty states.
|
|
298
|
+
|
|
299
|
+
# State
|
|
300
|
+
local state · derived state · async state · optimistic updates (with rollback on failure) · cached data · filtering/sorting/selection state.
|
|
301
|
+
|
|
302
|
+
# Forms (≥3)
|
|
303
|
+
Each form: validation · nested fields · conditional fields · async submission · error handling · success handling.
|
|
304
|
+
|
|
305
|
+
# Async / errors
|
|
306
|
+
Every async workflow handles loading, success, failure AND retry. No silent failures.
|
|
307
|
+
|
|
308
|
+
# Accessibility
|
|
309
|
+
Keyboard navigation, proper labels, aria attributes, focus management, accessible dialogs.
|
|
310
|
+
|
|
311
|
+
# Project structure — BORINGSTACK, per-feature (NOT by-layer)
|
|
312
|
+
Co-locate by domain under \`src/features/<domain>/\`:
|
|
313
|
+
<domain>.types.ts — entity types, discriminated unions, branded IDs
|
|
314
|
+
<domain>.constants.ts — \`as const\` registries / label maps (typed Record<Union, V>)
|
|
315
|
+
<domain>.service.ts — async data access (seeded/mock async with latency + failure paths)
|
|
316
|
+
<domain>.hooks.ts — ONLY genuine derived/computed state (the data hook is the SDK's useCollection; do NOT write a fetch/query wrapper)
|
|
317
|
+
<PascalCase>.tsx — ONE component per file
|
|
318
|
+
index.ts — barrel re-exporting the public surface
|
|
319
|
+
Shared shadcn primitives live in \`src/components/ui/\` (already scaffolded). Routes/pages are TanStack files under \`src/routes/\`.
|
|
320
|
+
This separates UI / business logic / data access / type definitions — colocated per domain, not in global folders.
|
|
321
|
+
|
|
322
|
+
# Domain complexity (minimum bar)
|
|
323
|
+
≥8 entity types · 20+ interfaces/types · multiple relationships · nested structures · enums-as-const · discriminated unions.
|
|
324
|
+
|
|
325
|
+
# Deliverables
|
|
326
|
+
Generate ALL files. Imports must resolve. TypeScript must compile (strict). React must render (no blank screen). Every listed user flow must be implemented and reachable in the UI. Do NOT simplify the requirements.`;
|
|
327
|
+
|
|
328
|
+
/** Compose the full build prompt for one benchmark domain. */
|
|
329
|
+
export function buildBenchmarkPrompt(app: IBenchmarkApp): string {
|
|
330
|
+
const entities = app.entities.map((e) => ` - ${e}`).join("\n");
|
|
331
|
+
const flows = app.flows.map((f) => ` - ${f}`).join("\n");
|
|
332
|
+
|
|
333
|
+
return `${GENERATION_SPEC}
|
|
334
|
+
|
|
335
|
+
# THE APP TO BUILD: ${app.name}
|
|
336
|
+
${app.summary}
|
|
337
|
+
|
|
338
|
+
## Entities to model (at least these)
|
|
339
|
+
${entities}
|
|
340
|
+
|
|
341
|
+
## User flows to implement
|
|
342
|
+
${flows}
|
|
343
|
+
|
|
344
|
+
Build this specific application, in full, following every rule above.`;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/** Look up a benchmark app by slug or 1-based catalog index; undefined if absent. */
|
|
348
|
+
export function findBenchmarkApp(selector: string): IBenchmarkApp | undefined {
|
|
349
|
+
const bySlug = BENCHMARK_CATALOG.find((app) => app.slug === selector);
|
|
350
|
+
|
|
351
|
+
if (bySlug !== undefined) {
|
|
352
|
+
return bySlug;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const index = Number(selector);
|
|
356
|
+
|
|
357
|
+
if (
|
|
358
|
+
Number.isInteger(index) &&
|
|
359
|
+
index >= 1 &&
|
|
360
|
+
index <= BENCHMARK_CATALOG.length
|
|
361
|
+
) {
|
|
362
|
+
return BENCHMARK_CATALOG[index - 1];
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return undefined;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* The JUDGE rubric (ChatGPT's grading prompt) — for the OFFLINE flagship review of
|
|
370
|
+
* a built app (never a runtime dependency). Pair with rejudge.ts / a flagship judge
|
|
371
|
+
* via TSFORGE_JUDGE_*; it grades the diff against the same bar the spec demands.
|
|
372
|
+
*/
|
|
373
|
+
export const JUDGE_RUBRIC = `You are grading a React + Vite + TypeScript application built by a coding system, against a production-grade bar. Be a harsh, specific senior reviewer. Score 1-5 overall and per-dimension, and list concrete defects (file + what's wrong).
|
|
374
|
+
|
|
375
|
+
Dimensions:
|
|
376
|
+
1. Type safety — discriminated unions, branded IDs, readonly, exhaustive switches, Result<T,E> for genuinely-fallible runtime ops; NO any/as/!/Record<string,any>. PENALIZE runtime parsers/entity-validators/type-guards/*.validators.ts that "check" already-typed seed data — there is no untrusted input, so that is dead ceremony, not type safety.
|
|
377
|
+
2. State management — local/derived/async/optimistic(+rollback)/cached/filter/sort/selection actually present and correct.
|
|
378
|
+
3. Component architecture — one component per file, clean props, no god-components, sensible composition.
|
|
379
|
+
4. Forms — ≥3 with validation, nested + conditional fields, async submit, error AND success handling.
|
|
380
|
+
5. Data modeling — ≥8 entities, 20+ types, real relationships, nested structures.
|
|
381
|
+
6. Error/async handling — every async path handles loading/success/failure/retry; no silent failures.
|
|
382
|
+
7. UI completeness — dashboard, list, detail, create, edit, search, filter, sort, paginate, modal, toasts, loading/error/empty states ALL present and reachable.
|
|
383
|
+
8. Accessibility — keyboard nav, labels, aria, focus management, accessible dialogs.
|
|
384
|
+
9. Runtime robustness — renders (no blank screen), survives interaction, no console/uncaught errors.
|
|
385
|
+
10. Realism — feels like a real company app, not a toy.
|
|
386
|
+
|
|
387
|
+
For each dimension: score + the single most important defect. Then an overall score and the top 3 things to fix. Penalize HARD for: missing flows, stubbed/placeholder code, type holes, blank-screen or crash-on-interaction.`;
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// Gate-runnable browser check: render an HTML file in headless chromium (served
|
|
2
|
+
// over http) and exit non-zero (printing failures) if it errors or fails its
|
|
3
|
+
// checks. Used as part of a gate for web builds — proves the page actually runs
|
|
4
|
+
// AND behaves.
|
|
5
|
+
//
|
|
6
|
+
// bun browser-check.ts <htmlFile> # render-only (no errors)
|
|
7
|
+
// bun browser-check.ts <htmlFile> --smoke # render + generic behaviour smoke
|
|
8
|
+
// bun browser-check.ts <htmlFile> <checks.json> # render + interaction checks
|
|
9
|
+
// bun browser-check.ts <htmlFile> <selector> [text]
|
|
10
|
+
import { readdir } from "node:fs/promises";
|
|
11
|
+
import { dirname, join } from "node:path";
|
|
12
|
+
import { renderCheck, parseChecks, type IRenderOptions } from "../src/browser";
|
|
13
|
+
import { crawlableRoutePaths } from "../src/web-routes";
|
|
14
|
+
|
|
15
|
+
const rawArgs = process.argv.slice(2);
|
|
16
|
+
const smoke = rawArgs.includes("--smoke");
|
|
17
|
+
const crawl = rawArgs.includes("--crawl");
|
|
18
|
+
const [file, arg2, arg3] = rawArgs.filter(
|
|
19
|
+
(a) => a !== "--smoke" && a !== "--crawl"
|
|
20
|
+
);
|
|
21
|
+
|
|
22
|
+
if (file === undefined) {
|
|
23
|
+
process.stderr.write(
|
|
24
|
+
"usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [checks.json | selector [text]]\n"
|
|
25
|
+
);
|
|
26
|
+
process.exit(2);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** With --crawl, enumerate the app's static routes from `<buildDir>/src/routes/`
|
|
30
|
+
* (the build dir is the parent of dist/) so every page — not just the home —
|
|
31
|
+
* is render-checked. Dynamic ($param) routes are skipped. */
|
|
32
|
+
async function routesFor(): Promise<string[]> {
|
|
33
|
+
if (!crawl) {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const routesDir = join(dirname(dirname(file ?? ".")), "src", "routes");
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
const files = await readdir(routesDir);
|
|
41
|
+
|
|
42
|
+
return crawlableRoutePaths(files.filter((f) => f.endsWith(".tsx")));
|
|
43
|
+
} catch {
|
|
44
|
+
return [];
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function checksFor(): Promise<Partial<IRenderOptions>> {
|
|
49
|
+
if (arg2 === undefined) {
|
|
50
|
+
return {};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (arg2.endsWith(".json")) {
|
|
54
|
+
const file = Bun.file(arg2);
|
|
55
|
+
|
|
56
|
+
// Tolerate a missing checks file → render-only (the model may not write one).
|
|
57
|
+
if (!(await file.exists())) {
|
|
58
|
+
return {};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return parseChecks(JSON.parse(await file.text()));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
expect: { selector: arg2, ...(arg3 !== undefined ? { text: arg3 } : {}) },
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const result = await renderCheck({
|
|
70
|
+
file,
|
|
71
|
+
smoke,
|
|
72
|
+
routes: await routesFor(),
|
|
73
|
+
...(await checksFor()),
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
if (result.ok) {
|
|
77
|
+
process.stdout.write(`browser-check: ${file} renders + behaves correctly\n`);
|
|
78
|
+
process.exit(0);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
process.stdout.write(`browser-check FAILED for ${file}:\n`);
|
|
82
|
+
|
|
83
|
+
for (const error of result.errors) {
|
|
84
|
+
process.stdout.write(` - ${error}\n`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
process.exit(1);
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// Offline cache builder: fetch typescript-eslint rule docs from their own
|
|
2
|
+
// source `.mdx`, parse the ❌/✅ examples deterministically, and also scrape
|
|
3
|
+
// tsforge pack rule descriptions, then write the committed cache the repair
|
|
4
|
+
// loop reads. Run when rules change:
|
|
5
|
+
// bun run packages/core/scripts/build-rule-docs.ts
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { parseRuleMdx, type IRuleDoc } from "../src/loop/feedback/rule-docs";
|
|
8
|
+
import { RULE_PACKS } from "../src/rule-packs";
|
|
9
|
+
|
|
10
|
+
const BASE =
|
|
11
|
+
"https://raw.githubusercontent.com/typescript-eslint/typescript-eslint/main/packages/eslint-plugin/docs/rules";
|
|
12
|
+
|
|
13
|
+
// The strict-mode rules that actually fire on TypeScript — the ones a repair
|
|
14
|
+
// loop hits. Curated entries in rule-docs.ts override these where they exist.
|
|
15
|
+
const RULES = [
|
|
16
|
+
"no-explicit-any",
|
|
17
|
+
"no-unsafe-argument",
|
|
18
|
+
"no-unsafe-assignment",
|
|
19
|
+
"no-unsafe-call",
|
|
20
|
+
"no-unsafe-member-access",
|
|
21
|
+
"no-unsafe-return",
|
|
22
|
+
"no-non-null-assertion",
|
|
23
|
+
"restrict-plus-operands",
|
|
24
|
+
"restrict-template-expressions",
|
|
25
|
+
"strict-boolean-expressions",
|
|
26
|
+
"no-floating-promises",
|
|
27
|
+
"no-misused-promises",
|
|
28
|
+
"await-thenable",
|
|
29
|
+
"no-for-in-array",
|
|
30
|
+
"prefer-nullish-coalescing",
|
|
31
|
+
"prefer-optional-chain",
|
|
32
|
+
"no-unnecessary-condition",
|
|
33
|
+
"no-unnecessary-type-assertion",
|
|
34
|
+
"switch-exhaustiveness-check",
|
|
35
|
+
"consistent-type-assertions",
|
|
36
|
+
"no-base-to-string",
|
|
37
|
+
"require-await",
|
|
38
|
+
"no-confusing-void-expression",
|
|
39
|
+
"no-redundant-type-constituents",
|
|
40
|
+
"prefer-reduce-type-parameter",
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
function getRuleDescription(obj: unknown): string | undefined {
|
|
44
|
+
const isObject = (val: unknown): val is Record<string, unknown> =>
|
|
45
|
+
val !== null && typeof val === "object";
|
|
46
|
+
|
|
47
|
+
if (!isObject(obj)) {
|
|
48
|
+
return undefined;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const meta = obj.meta;
|
|
52
|
+
|
|
53
|
+
if (!isObject(meta)) {
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const docs = meta.docs;
|
|
58
|
+
|
|
59
|
+
if (!isObject(docs)) {
|
|
60
|
+
return undefined;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const description = docs.description;
|
|
64
|
+
|
|
65
|
+
return typeof description === "string" ? description : undefined;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const out: Record<string, IRuleDoc> = {};
|
|
69
|
+
let ok = 0;
|
|
70
|
+
let missed = 0;
|
|
71
|
+
|
|
72
|
+
for (const rule of RULES) {
|
|
73
|
+
const res = await fetch(`${BASE}/${rule}.mdx`);
|
|
74
|
+
|
|
75
|
+
if (!res.ok) {
|
|
76
|
+
process.stdout.write(` miss ${rule} (HTTP ${res.status})\n`);
|
|
77
|
+
missed += 1;
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const doc = parseRuleMdx(await res.text());
|
|
82
|
+
|
|
83
|
+
if (doc === null) {
|
|
84
|
+
process.stdout.write(` miss ${rule} (unparseable)\n`);
|
|
85
|
+
missed += 1;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
out[`@typescript-eslint/${rule}`] = doc;
|
|
90
|
+
ok += 1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Add tsforge pack rules: extract description from rule meta.
|
|
94
|
+
let packRulesAdded = 0;
|
|
95
|
+
|
|
96
|
+
for (const pack of Object.values(RULE_PACKS)) {
|
|
97
|
+
for (const [ruleName, ruleModule] of Object.entries(pack.rules)) {
|
|
98
|
+
const ruleId = `tsforge/${ruleName}`;
|
|
99
|
+
const description = getRuleDescription(ruleModule) ?? ruleName;
|
|
100
|
+
|
|
101
|
+
out[ruleId] = {
|
|
102
|
+
what: description,
|
|
103
|
+
bad: `// Example that violates the rule`,
|
|
104
|
+
good: `// Corrected version`,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
packRulesAdded += 1;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const path = join(
|
|
112
|
+
import.meta.dir,
|
|
113
|
+
"..",
|
|
114
|
+
"src",
|
|
115
|
+
"loop",
|
|
116
|
+
"rule-docs.generated.json"
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
await Bun.write(path, `${JSON.stringify(out, null, 2)}\n`);
|
|
120
|
+
process.stdout.write(
|
|
121
|
+
`\nwrote ${ok} eslint rules (${missed} missed), ${packRulesAdded} pack rules → ${path}\n`
|
|
122
|
+
);
|