ai-spec-dev 0.56.0 → 0.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,7 +12,7 @@
12
12
  <p align="center">
13
13
  <a href="https://github.com/hzhongzhong/ai-spec"><img src="https://img.shields.io/badge/GitHub-ai--spec-181717?logo=github" alt="GitHub" /></a>
14
14
  <a href="https://www.npmjs.com/package/ai-spec-dev"><img src="https://img.shields.io/npm/v/ai-spec-dev?color=cb3837&logo=npm" alt="npm" /></a>
15
- <img src="https://img.shields.io/badge/version-0.55.0-blue" alt="version" />
15
+ <img src="https://img.shields.io/badge/version-0.58.0-blue" alt="version" />
16
16
  <img src="https://img.shields.io/badge/tests-913%20passed-brightgreen" alt="tests" />
17
17
  <img src="https://img.shields.io/badge/providers-9-orange" alt="providers" />
18
18
  <img src="https://img.shields.io/badge/license-MIT-green" alt="license" />
@@ -22,6 +22,10 @@
22
22
  <a href="#english">English</a> | <a href="#中文文档">中文</a>
23
23
  </p>
24
24
 
25
+ <p align="center">
26
+ <img src="https://raw.githubusercontent.com/hzhongzhong/ai-spec/main/docs-assets/demo.gif" alt="ai-spec demo" width="860" />
27
+ </p>
28
+
25
29
  ---
26
30
 
27
31
  <h2 id="english">English</h2>
package/demo/demo.gif ADDED
Binary file
package/demo/demo.sh ADDED
@@ -0,0 +1,433 @@
1
+ #!/usr/bin/env bash
2
+ # ai-spec demo simulation script
3
+ # Usage: bash demo.sh [scene]
4
+ # scene: help | create | multirepo | artifacts | observability | all
5
+
6
+ set -euo pipefail
7
+
8
+ # ── Colors ────────────────────────────────────────────────────────────────────
9
+ RESET='\033[0m'
10
+ BOLD='\033[1m'
11
+ DIM='\033[2m'
12
+ GREEN='\033[0;32m'
13
+ BGREEN='\033[1;32m'
14
+ CYAN='\033[0;36m'
15
+ BCYAN='\033[1;36m'
16
+ YELLOW='\033[0;33m'
17
+ BYELLOW='\033[1;33m'
18
+ BLUE='\033[0;34m'
19
+ BBLUE='\033[1;34m'
20
+ MAGENTA='\033[0;35m'
21
+ BMAGENTA='\033[1;35m'
22
+ RED='\033[0;31m'
23
+ WHITE='\033[1;37m'
24
+ GRAY='\033[0;90m'
25
+
26
+ p() { printf "%b\n" "$*"; }
27
+ pp() { printf "%b" "$*"; }
28
+ nl() { echo ""; }
29
+ pause() { sleep "${1:-0.6}"; }
30
+ slow_pause() { sleep "${1:-1.2}"; }
31
+
32
+ bar_full() { pp "${BGREEN}████████████████████${RESET}"; }
33
+ bar_part() { pp "${BGREEN}████████████${RESET}${GRAY}████████${RESET}"; }
34
+ bar_start() { pp "${BGREEN}████${RESET}${GRAY}████████████████${RESET}"; }
35
+
36
+ score_bar() {
37
+ local score=$1 max=10
38
+ local filled=$(( score * 2 ))
39
+ local empty=$(( (max - score) * 2 ))
40
+ pp "${BGREEN}"
41
+ for ((i=0; i<filled; i++)); do pp "█"; done
42
+ pp "${GRAY}"
43
+ for ((i=0; i<empty; i++)); do pp "░"; done
44
+ pp "${RESET}"
45
+ }
46
+
47
+ # ── Scene 1: help ─────────────────────────────────────────────────────────────
48
+ scene_help() {
49
+ node /Users/zuozhichao/Documents/ai-spec-dev-poc/dist/cli/index.js --help
50
+ }
51
+
52
+ # ── Scene 2: single-repo create pipeline ──────────────────────────────────────
53
+ scene_create() {
54
+ nl
55
+ p "${BCYAN}┌─────────────────────────────────────────────────┐${RESET}"
56
+ p "${BCYAN}│ ai-spec · Single-Repo Pipeline │${RESET}"
57
+ p "${BCYAN}└─────────────────────────────────────────────────┘${RESET}"
58
+ nl
59
+ pause 0.5
60
+
61
+ # Repo selection
62
+ p "${BOLD}[Repo]${RESET} Select repo(s) for this feature:"
63
+ pause 0.4
64
+ p " ${BGREEN}●${RESET} rushbuy-web-admin ${GRAY}(vue / frontend)${RESET}"
65
+ p " ${GRAY}○ rushbuy-node-service (node-express / backend)${RESET}"
66
+ pause 0.5
67
+ p " ${BGREEN}✔${RESET} ${BOLD}1 repo selected${RESET}"
68
+ nl
69
+ pause 0.6
70
+
71
+ # Step 1: Context
72
+ p "${BOLD}[1/10]${RESET} Loading project context..."
73
+ pause 0.5
74
+ p " Constitution : ${BGREEN}✔ found${RESET} ${GRAY}(.ai-spec-constitution.md §1–§9)${RESET}"
75
+ p " Tech stack : ${CYAN}vue · vite · pinia · axios${RESET}"
76
+ p " Routes found : ${CYAN}24${RESET}"
77
+ p " Stores found : ${CYAN}8${RESET}"
78
+ p " HTTP client : ${CYAN}import http from '@/utils/http'${RESET}"
79
+ nl
80
+ pause 0.7
81
+
82
+ # Step 1.5: Design Options Dialogue
83
+ p "${BOLD}[1.5/10]${RESET} ${MAGENTA}Design Options Dialogue${RESET}"
84
+ pause 0.4
85
+ p " AI proposes ${BOLD}3 architecture options${RESET}:"
86
+ pause 0.3
87
+ p " ${BOLD}A)${RESET} Kanban board view ${GRAY}— drag-and-drop, column per status${RESET}"
88
+ p " ${BOLD}B)${RESET} Table + filters ${GRAY}— sortable, bulk actions, pagination${RESET}"
89
+ p " ${BOLD}C)${RESET} Split-pane layout ${GRAY}— list left, detail right${RESET}"
90
+ pause 0.5
91
+ p " ${BGREEN}✔${RESET} Selected: ${BOLD}B — Table + filters${RESET}"
92
+ nl
93
+ pause 0.6
94
+
95
+ # Step 2: Spec generation
96
+ p "${BOLD}[2/10]${RESET} Generating spec with ${CYAN}glm/glm-4.5-air${RESET}..."
97
+ pause 2.0
98
+ pp " ${GRAY}▸ writing spec"; pause 0.3; pp "."; pause 0.3; pp "."; pause 0.3; pp ".${RESET}"; nl
99
+ pause 0.8
100
+ p " ${BGREEN}✔${RESET} Spec generated ${GRAY}(feature-task-management-v1.md)${RESET}"
101
+ pause 0.3
102
+ p " ${BGREEN}✔${RESET} ${BOLD}8 tasks${RESET} decomposed ${GRAY}(data → service → api → view → route → test)${RESET}"
103
+ nl
104
+ pause 0.7
105
+
106
+ # Step 3: Refinement
107
+ p "${BOLD}[3/10]${RESET} Interactive spec refinement..."
108
+ pause 1.0
109
+ p " ${CYAN}AI Changes${RESET} ── ${BGREEN}+18${RESET} ${RED}-4${RESET} lines"
110
+ p " ${GRAY} + Added: bulk delete behavior, export CSV endpoint${RESET}"
111
+ p " ${GRAY} + Added: permission check (admin only for delete)${RESET}"
112
+ p " ${GRAY} - Removed: redundant status filter duplicate${RESET}"
113
+ pause 0.5
114
+ p " ${BGREEN}✔${RESET} Spec refined and approved"
115
+ nl
116
+ pause 0.7
117
+
118
+ # Step 3.4: Quality assessment
119
+ p "${BOLD}[3.4/10]${RESET} Spec quality assessment..."
120
+ pause 0.8
121
+ pp " Coverage ["; score_bar 9; p "] ${BOLD}9${RESET}/10"
122
+ pause 0.2
123
+ pp " Clarity ["; score_bar 8; p "] ${BOLD}8${RESET}/10"
124
+ pause 0.2
125
+ pp " Constitution ["; score_bar 9; p "] ${BOLD}9${RESET}/10"
126
+ pause 0.4
127
+ p " ${BGREEN}✔${RESET} Quality gate passed ${GRAY}(minSpecScore: 7)${RESET}"
128
+ nl
129
+ pause 0.7
130
+
131
+ # Approval Gate
132
+ p "${BOLD}[Gate]${RESET} ${BYELLOW}Approval Gate${RESET} — review spec + DSL summary"
133
+ pause 0.5
134
+ p " ${GRAY}Spec:${RESET} Add task management table view with filters, bulk actions, CSV export"
135
+ p " ${GRAY}DSL preview:${RESET} Models: 3 · Endpoints: 7 · Behaviors: 3"
136
+ pause 0.5
137
+ p " ${BGREEN}✔${RESET} Proceeding..."
138
+ nl
139
+ pause 0.6
140
+
141
+ # DSL extraction
142
+ p "${BOLD}[DSL]${RESET} Extracting structured contract..."
143
+ pause 1.2
144
+ p " ${BGREEN}✔${RESET} DSL valid — Models: ${BOLD}3${RESET} Endpoints: ${BOLD}7${RESET} Behaviors: ${BOLD}3${RESET}"
145
+ p " ${GRAY} → feature-task-management-v1.dsl.json${RESET}"
146
+ nl
147
+ pause 0.7
148
+
149
+ # Git isolation
150
+ p "${BOLD}[Git]${RESET} Creating worktree branch..."
151
+ pause 0.6
152
+ p " ${BGREEN}✔${RESET} Branch: ${CYAN}feat/task-management${RESET} ${GRAY}(isolated from main)${RESET}"
153
+ nl
154
+ pause 0.6
155
+
156
+ # Step 6: Codegen
157
+ p "${BOLD}[6/10]${RESET} Code generation ${GRAY}(task-by-task, 8 files)${RESET}"
158
+ nl
159
+
160
+ local tasks=(
161
+ "data · Task.type.ts ${GRAY}types & interfaces${RESET}"
162
+ "service · src/api/task.ts ${GRAY}HTTP client layer${RESET}"
163
+ "api · src/stores/taskStore.ts ${GRAY}Pinia store + actions${RESET}"
164
+ "view · src/views/TaskList.vue ${GRAY}table + filters + bulk select${RESET}"
165
+ "view · src/views/TaskDetail.vue ${GRAY}detail panel component${RESET}"
166
+ "route · src/router/task.route.ts ${GRAY}route module${RESET}"
167
+ "test · tests/taskStore.test.ts ${GRAY}unit tests${RESET}"
168
+ "test · tests/TaskList.test.ts ${GRAY}component tests${RESET}"
169
+ )
170
+
171
+ for task in "${tasks[@]}"; do
172
+ pause 0.55
173
+ p " ${BGREEN}✔${RESET} ${task}"
174
+ done
175
+
176
+ nl
177
+ pause 0.4
178
+
179
+ # Progress bar
180
+ pp " "; bar_full; p " ${BOLD}100%${RESET} → ${BGREEN}8/8 files written${RESET}"
181
+ nl
182
+ pause 0.8
183
+
184
+ # Step 7: Test skeleton
185
+ p "${BOLD}[7/10]${RESET} Test skeleton generated"
186
+ pause 0.5
187
+ p " ${BGREEN}✔${RESET} 2 test files · ${BOLD}14 test cases${RESET} scaffolded"
188
+ nl
189
+ pause 0.6
190
+
191
+ # Step 8: Error feedback
192
+ p "${BOLD}[8/10]${RESET} Error feedback loop..."
193
+ pause 0.8
194
+ p " ${YELLOW}⚠${RESET} Cycle 1 — ${BOLD}3 errors${RESET} detected"
195
+ p " ${GRAY} src/stores/taskStore.ts:12 — import 'fetchTasks' not found in api/task.ts${RESET}"
196
+ p " ${GRAY} src/stores/taskStore.ts:31 — Property 'total' missing on TaskResponse${RESET}"
197
+ p " ${GRAY} src/views/TaskList.vue:87 — Type mismatch: string vs TaskStatus enum${RESET}"
198
+ pause 1.0
199
+ pp " ${GRAY}▸ AI auto-fixing"; pause 0.3; pp "."; pause 0.3; pp "."; pause 0.3; pp ".${RESET}"; nl
200
+ pause 1.0
201
+ p " ${BGREEN}✔${RESET} All errors resolved in ${BOLD}1 cycle${RESET}"
202
+ nl
203
+ pause 0.7
204
+
205
+ # Step 9: 3-pass review
206
+ p "${BOLD}[9/10]${RESET} 3-pass code review"
207
+ pause 0.6
208
+ p " ${BOLD}Pass 0${RESET} Spec compliance → ${BGREEN}✔ aligned${RESET}"
209
+ pause 0.4
210
+ p " ${BOLD}Pass 1${RESET} Architecture audit → ${BGREEN}✔ layer separation correct${RESET}"
211
+ pause 0.4
212
+ p " ${BOLD}Pass 2${RESET} Implementation → ${YELLOW}⚠ 1 issue: missing pagination guard${RESET}"
213
+ pause 0.4
214
+ p " ${BOLD}Pass 3${RESET} Impact & complexity → ${BGREEN}Low impact · Low complexity${RESET}"
215
+ pause 0.5
216
+ pp " Score ["; score_bar 8; p "] ${BOLD}8.2${RESET}/10"
217
+ nl
218
+ pause 0.7
219
+
220
+ # Step 10: Harness Self-Eval
221
+ p "${BOLD}[10/10]${RESET} Harness Self-Eval"
222
+ pause 0.6
223
+ p " Compliance ${GRAY}(30%)${RESET} → ${BGREEN}28/30${RESET}"
224
+ pause 0.2
225
+ p " DSL Coverage ${GRAY}(25%)${RESET} → ${BGREEN}23/25${RESET}"
226
+ pause 0.2
227
+ p " Compile ${GRAY}(20%)${RESET} → ${BGREEN}20/20${RESET}"
228
+ pause 0.2
229
+ p " Review ${GRAY}(25%)${RESET} → ${BGREEN}21/25${RESET}"
230
+ pause 0.4
231
+ pp " Total ["; score_bar 9; p "] ${BOLD}92 / 100${RESET}"
232
+ nl
233
+ pause 0.5
234
+ p " ${BGREEN}✔${RESET} ${BOLD}2 lesson(s)${RESET} → constitution §9"
235
+ p " ${BGREEN}✔${RESET} RunId: ${CYAN}20260408-143022-a7f2${RESET} ${GRAY}· 8 files written · 94.3s${RESET}"
236
+ nl
237
+ pause 0.5
238
+ }
239
+
240
+ # ── Scene 3: multi-repo workspace ─────────────────────────────────────────────
241
+ scene_multirepo() {
242
+ nl
243
+ p "${BCYAN}┌─────────────────────────────────────────────────┐${RESET}"
244
+ p "${BCYAN}│ ai-spec · Multi-Repo Workspace Mode │${RESET}"
245
+ p "${BCYAN}└─────────────────────────────────────────────────┘${RESET}"
246
+ nl
247
+ pause 0.5
248
+
249
+ # Repo selection
250
+ p "${BOLD}[Repo]${RESET} Select repo(s) for this feature:"
251
+ pause 0.4
252
+ p " ${BGREEN}●${RESET} rushbuy-node-service ${GRAY}(node-express / backend)${RESET}"
253
+ p " ${BGREEN}●${RESET} rushbuy-web-admin ${GRAY}(vue / frontend)${RESET}"
254
+ pause 0.5
255
+ p " ${BGREEN}✔${RESET} ${BOLD}2 repos selected${RESET} ${GRAY}→ workspace mode activated${RESET}"
256
+ nl
257
+ pause 0.6
258
+
259
+ # AI responsibility split
260
+ p "${BOLD}[W1]${RESET} AI splitting responsibilities..."
261
+ pause 1.0
262
+ p " ${GRAY}Backend →${RESET} user profile CRUD endpoints, avatar upload, preferences schema"
263
+ p " ${GRAY}Frontend →${RESET} profile settings page, avatar cropper, real-time form validation"
264
+ p " ${GRAY}UX decision:${RESET} modal-based edit (not full-page redirect)"
265
+ nl
266
+ pause 0.7
267
+
268
+ # Backend pipeline summary
269
+ p "${BOLD}[W2]${RESET} ${CYAN}Backend pipeline${RESET} ${GRAY}(rushbuy-node-service)${RESET}"
270
+ pause 0.4
271
+ p " ${BGREEN}✔${RESET} Spec generated ${GRAY}·${RESET} DSL extracted"
272
+ p " ${BGREEN}✔${RESET} Models: ${BOLD}2${RESET} · Endpoints: ${BOLD}5${RESET} · Behaviors: ${BOLD}2${RESET}"
273
+ pause 0.3
274
+ p " ${BGREEN}✔${RESET} Code generated ${GRAY}(6 files · 0 errors)${RESET}"
275
+ pp " Score ["; score_bar 9; p "] ${BOLD}90 / 100${RESET}"
276
+ nl
277
+ pause 0.7
278
+
279
+ # DSL contract handoff
280
+ p "${BOLD}[W3]${RESET} ${BYELLOW}DSL contract handoff${RESET} ${GRAY}→ injecting into frontend pipeline${RESET}"
281
+ pause 0.6
282
+ p " ${GRAY}Backend DSL endpoints passed to frontend:${RESET}"
283
+ p " ${BBLUE} GET${RESET} /api/users/:id/profile"
284
+ p " ${BGREEN} PUT${RESET} /api/users/:id/profile"
285
+ p " ${BMAGENTA} POST${RESET} /api/users/:id/avatar"
286
+ p " ${RED} DEL${RESET} /api/users/:id/avatar"
287
+ p " ${BBLUE} GET${RESET} /api/users/:id/preferences"
288
+ nl
289
+ pause 0.7
290
+
291
+ # Frontend pipeline summary
292
+ p "${BOLD}[W4]${RESET} ${CYAN}Frontend pipeline${RESET} ${GRAY}(rushbuy-web-admin)${RESET}"
293
+ pause 0.4
294
+ p " ${BGREEN}✔${RESET} Spec generated ${GRAY}(injected with backend DSL contract)${RESET}"
295
+ p " ${BGREEN}✔${RESET} Code generated ${GRAY}(8 files · 1 error → auto-fixed)${RESET}"
296
+ pp " Score ["; score_bar 8; p "] ${BOLD}87 / 100${RESET}"
297
+ nl
298
+ pause 0.7
299
+
300
+ # Cross-stack verifier
301
+ p "${BOLD}[W5]${RESET} ${BMAGENTA}Cross-stack contract verification${RESET}"
302
+ pause 0.8
303
+ p " Scanning frontend API calls vs backend DSL..."
304
+ pause 0.8
305
+ p " ${BGREEN}✔${RESET} Matched : ${BOLD}5 / 5${RESET} endpoints"
306
+ p " ${BGREEN}✔${RESET} Phantoms : ${BOLD}0${RESET} ${GRAY}(no hallucinated routes)${RESET}"
307
+ p " ${BGREEN}✔${RESET} Mismatches: ${BOLD}0${RESET} ${GRAY}(HTTP methods all correct)${RESET}"
308
+ pause 0.4
309
+ p " ${BGREEN}✔${RESET} Cross-stack contract ${BOLD}CLEAN${RESET}"
310
+ nl
311
+ pause 0.5
312
+ }
313
+
314
+ # ── Scene 4: DSL artifacts ─────────────────────────────────────────────────────
315
+ scene_artifacts() {
316
+ nl
317
+ p "${BCYAN}┌─────────────────────────────────────────────────┐${RESET}"
318
+ p "${BCYAN}│ ai-spec · DSL-Derived Artifacts │${RESET}"
319
+ p "${BCYAN}└─────────────────────────────────────────────────┘${RESET}"
320
+ nl
321
+ pause 0.5
322
+
323
+ # OpenAPI export
324
+ p "${BOLD}$ ai-spec export${RESET}"
325
+ pause 0.8
326
+ p " ${BGREEN}✔${RESET} Loaded DSL ${GRAY}— Models: 3 Endpoints: 7 Behaviors: 3${RESET}"
327
+ pause 0.4
328
+ p " ${BGREEN}✔${RESET} Generated: ${CYAN}openapi.yaml${RESET} ${GRAY}(OpenAPI 3.1.0)${RESET}"
329
+ pause 0.3
330
+ p " ${GRAY} openapi: 3.1.0${RESET}"
331
+ p " ${GRAY} info: { title: rushbuy-api, version: 1.0.0 }${RESET}"
332
+ p " ${GRAY} paths: { /api/tasks, /api/tasks/:id, /api/tasks/bulk, ... }${RESET}"
333
+ p " ${GRAY} → plug into Postman · Swagger UI · SDK generators${RESET}"
334
+ nl
335
+ pause 0.8
336
+
337
+ # Types generation
338
+ p "${BOLD}$ ai-spec types${RESET}"
339
+ pause 0.8
340
+ p " ${BGREEN}✔${RESET} Generated: ${CYAN}src/types/api-contracts.ts${RESET}"
341
+ pause 0.2
342
+ p " ${GRAY} export interface Task \{ id, title, status, assignee, dueDate \}${RESET}"
343
+ p " ${GRAY} export type TaskStatus = 'todo' | 'in_progress' | 'done' | 'cancelled'${RESET}"
344
+ p " ${GRAY} export const API_ENDPOINTS = \{ TASK_LIST: '/api/tasks', ... \}${RESET}"
345
+ nl
346
+ pause 0.8
347
+
348
+ # Mock server
349
+ p "${BOLD}$ ai-spec mock --serve --port 3001${RESET}"
350
+ pause 0.8
351
+ p " ${BGREEN}✔${RESET} Generated: ${CYAN}mock/server.js${RESET} ${GRAY}(Express mock server)${RESET}"
352
+ p " ${BGREEN}✔${RESET} Generated: ${CYAN}mock/handlers.ts${RESET} ${GRAY}(MSW handlers)${RESET}"
353
+ p " ${BGREEN}✔${RESET} Patched: ${CYAN}vite.config.ts${RESET} ${GRAY}(proxy /api → :3001)${RESET}"
354
+ pause 0.5
355
+ p " ${BGREEN}▶${RESET} Mock server running on ${BCYAN}http://localhost:3001${RESET}"
356
+ p " ${GRAY} GET /api/tasks → 200 [seed: 10 tasks]${RESET}"
357
+ p " ${GRAY} POST /api/tasks → 201 \{ id, title, status \}${RESET}"
358
+ p " ${GRAY} PUT /api/tasks/:id → 200 updated task${RESET}"
359
+ p " ${GRAY} DEL /api/tasks/:id → 204 no content${RESET}"
360
+ nl
361
+ pause 0.5
362
+ }
363
+
364
+ # ── Scene 5: observability ─────────────────────────────────────────────────────
365
+ scene_observability() {
366
+ nl
367
+ p "${BCYAN}┌─────────────────────────────────────────────────┐${RESET}"
368
+ p "${BCYAN}│ ai-spec · Observability Layer │${RESET}"
369
+ p "${BCYAN}└─────────────────────────────────────────────────┘${RESET}"
370
+ nl
371
+ pause 0.5
372
+
373
+ # Logs
374
+ p "${BOLD}$ ai-spec logs${RESET}"
375
+ pause 0.8
376
+ p ""
377
+ p " ${BOLD}RunId ${GRAY}Date ${RESET}${BOLD}Files Score Duration${RESET}"
378
+ p " ${CYAN}20260408-143022-a7f2${RESET} ${GRAY}2026-04-08${RESET} 8 ${BGREEN}92${RESET} 94s"
379
+ p " ${CYAN}20260407-101455-b3c1${RESET} ${GRAY}2026-04-07${RESET} 6 ${BGREEN}88${RESET} 81s"
380
+ p " ${CYAN}20260406-174230-d9e5${RESET} ${GRAY}2026-04-06${RESET} 11 ${YELLOW}74${RESET} 128s"
381
+ p " ${CYAN}20260405-093010-f1a8${RESET} ${GRAY}2026-04-05${RESET} 5 ${BGREEN}85${RESET} 67s"
382
+ nl
383
+ pause 0.8
384
+
385
+ # Trend
386
+ p "${BOLD}$ ai-spec trend${RESET}"
387
+ pause 0.8
388
+ p ""
389
+ p " Harness Score Trend ${GRAY}(last 5 runs)${RESET}"
390
+ p ""
391
+ p " 100 ┤"
392
+ p " 90 ┤ ${BGREEN}●${RESET}──────────────────${BGREEN}●${RESET}"
393
+ p " 80 ┤ ${BGREEN}●${RESET}──${BGREEN}●${RESET}"
394
+ p " 70 ┤──${YELLOW}●${RESET}"
395
+ p " 60 ┤"
396
+ p " └────────────────────────────── runs →"
397
+ p " Apr 5 Apr 6 Apr 7 Apr 8"
398
+ nl
399
+ p " ${BGREEN}↑ +18 points${RESET} over last 4 runs ${GRAY}(constitution learning in effect)${RESET}"
400
+ nl
401
+ pause 0.5
402
+
403
+ # Restore hint
404
+ p " ${GRAY}Tip: ai-spec restore 20260406-174230-d9e5 → rollback that run instantly${RESET}"
405
+ nl
406
+ pause 0.5
407
+ }
408
+
409
+ # ── Main ───────────────────────────────────────────────────────────────────────
410
+ SCENE="${1:-all}"
411
+
412
+ case "$SCENE" in
413
+ help) scene_help ;;
414
+ create) scene_create ;;
415
+ multirepo) scene_multirepo ;;
416
+ artifacts) scene_artifacts ;;
417
+ observability) scene_observability ;;
418
+ all)
419
+ scene_help
420
+ sleep 1
421
+ scene_create
422
+ sleep 1
423
+ scene_multirepo
424
+ sleep 1
425
+ scene_artifacts
426
+ sleep 1
427
+ scene_observability
428
+ ;;
429
+ *)
430
+ echo "Usage: $0 [help|create|multirepo|artifacts|observability|all]"
431
+ exit 1
432
+ ;;
433
+ esac
package/demo/demo.tape ADDED
@@ -0,0 +1,52 @@
1
+ # ai-spec demo
2
+ # Generate: vhs demo.tape
3
+
4
+ Output demo.gif
5
+ Set Theme "Dracula"
6
+ Set FontSize 13
7
+ Set Width 1100
8
+ Set Height 720
9
+ Set Padding 24
10
+ Set FontFamily "JetBrains Mono"
11
+ Set PlaybackSpeed 1.0
12
+
13
+ # ── Opening ────────────────────────────────────────────────────────────────────
14
+ Sleep 500ms
15
+ Type "ai-spec --help"
16
+ Sleep 300ms
17
+ Enter
18
+ Sleep 3s
19
+
20
+ # ── Scene 2: single-repo create pipeline ──────────────────────────────────────
21
+ Sleep 800ms
22
+ Type "ai-spec create 'Add task management feature to Vue admin'"
23
+ Sleep 300ms
24
+ Enter
25
+ Sleep 300ms
26
+ Type "bash demo.sh create"
27
+ Enter
28
+ Sleep 40s
29
+
30
+ # ── Scene 3: multi-repo workspace ─────────────────────────────────────────────
31
+ Sleep 800ms
32
+ Type "ai-spec create 'Add user profile sync' --workspace"
33
+ Sleep 300ms
34
+ Enter
35
+ Sleep 300ms
36
+ Type "bash demo.sh multirepo"
37
+ Enter
38
+ Sleep 14s
39
+
40
+ # ── Scene 4: DSL artifacts ─────────────────────────────────────────────────────
41
+ Sleep 800ms
42
+ Type "bash demo.sh artifacts"
43
+ Enter
44
+ Sleep 10s
45
+
46
+ # ── Scene 5: observability ─────────────────────────────────────────────────────
47
+ Sleep 800ms
48
+ Type "bash demo.sh observability"
49
+ Enter
50
+ Sleep 8s
51
+
52
+ Sleep 2s
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-spec-dev",
3
- "version": "0.56.0",
3
+ "version": "0.58.0",
4
4
  "description": "AI-driven Development Orchestrator SDK & CLI",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,548 @@
1
+ # Vision Frontend Workflow — Design Spec
2
+
3
+ **Date**: 2026-04-08
4
+ **Status**: Draft, awaiting user review
5
+ **Target version**: ai-spec v0.57.0+
6
+ **Author**: brainstorming session between hongzhong & Claude
7
+
8
+ ---
9
+
10
+ ## 0. Background & Motivation
11
+
12
+ ai-spec-dev 当前的 pipeline 在两类场景下表现良好:
13
+
14
+ 1. **纯后端需求** —— constitution → spec → DSL → tasks → codegen → cross-stack-verifier 的工程化闭环
15
+ 2. **前后端需求,但前端是简单后台管理** —— 复用现成组件库,无需视觉还原
16
+
17
+ 但当遇到 **带 UI 设计稿的产品功能页面**(visual customization 高、有数据流、有后端契约、需要持续迭代)时,工具链彻底缺位:
18
+
19
+ - Spec / DSL 无法承载视觉、布局、交互细节
20
+ - Codegen 看不到设计稿,产物和稿子相去甚远
21
+ - 没有"渲染产物 vs 目标稿"的校验机制
22
+ - 设计稿来源(Figma / 蓝湖 / 即时设计 / MasterGo)API 大多企业版才开放,普通团队拿不到
23
+
24
+ 本设计为这类场景新增一条 `--withui` workflow,作为现有 workflow 的**扩展**而非替代。
25
+
26
+ ---
27
+
28
+ ## 1. Scope & Non-Goals
29
+
30
+ ### 1.1 In Scope(场景 B:产品功能页面)
31
+
32
+ - 视觉定制度高、需要长期迭代的产品页面
33
+ - 有数据流、有状态管理、与后端 API 有契约关系
34
+ - 目标前端仓库已经有一些基础原子件(Button / Input / Modal 等),但可能存在重复造轮子
35
+ - 截图作为视觉真值来源(不依赖任何具体设计工具的 API)
36
+
37
+ ### 1.2 Out of Scope
38
+
39
+ - **一次性营销页 / 落地页** —— 这类场景更适合 v0 / Lovable / bolt.new 等工具,ai-spec-dev 不与之竞争
40
+ - **像素级 1:1 还原** —— vision 模型本身做不到精确测量,本设计选择"对比+迭代收敛"路径而非"一次性精确提取"
41
+ - **Figma / 蓝湖 API 集成** —— 第一版只支持本地截图文件输入,不集成任何设计工具 API
42
+ - **自动 refactor 重复原语** —— 重复原语扫描产出"建议报告",不自动改代码
43
+ - **`--resume` 从失败点续跑** —— 第一版每次从头跑,简化状态管理
44
+ - **完整状态矩阵覆盖** —— 设计师交付的截图通常只有主屏 + 1~2 关键状态,其余状态走"语义实现 + 自动测试"路径,不进 visual loop
45
+
46
+ ### 1.3 Core Insight
47
+
48
+ **像素精度问题应该靠"对比+迭代"解决,不是靠"提取"解决。**
49
+
50
+ 人类设计师还原稿子也是一边写一边比对的,没有人能看一眼就准确说出 "padding 17px"。Vision 模型有完全相同的局限。所以正确的工程做法是给 AI 一个**反馈回路**让它能像人一样调整——这恰好和 ai-spec-dev "verifier + feedback loop" 的核心哲学完全一致。
51
+
52
+ ---
53
+
54
+ ## 2. High-Level Architecture
55
+
56
+ ### 2.1 CLI 入口分流
57
+
58
+ ```bash
59
+ ai-spec create my-feature --withui # 走视觉 workflow
60
+ ai-spec create my-feature # 走现有 workflow(默认无 UI,行为不变)
61
+ ```
62
+
63
+ `--withui` 在 CLI 入口把 mode 注入 run context,下游模块通过 mode 选择 pipeline 变体。**不在已有 prompt 里塞 if/else**——而是注册新的 prompt 变体和新模块,保持两条 workflow 物理隔离。
64
+
65
+ 第一版**不支持中途切换 mode**(即创建后不能 `--resume --withui`),必须从头重建。简化第一版状态管理。
66
+
67
+ ### 2.2 两条 Workflow 关系图
68
+
69
+ ```
70
+ ┌─────────────────┐
71
+ │ ai-spec create │
72
+ │ --withui flag │
73
+ └────────┬────────┘
74
+
75
+ ┌────────────┴────────────┐
76
+ ↓ ↓
77
+ ┌───────────────┐ ┌───────────────┐
78
+ │ 现有 workflow │ │ 视觉 workflow │
79
+ │ (no UI) │ │ (with UI) │
80
+ └───────┬───────┘ └───────┬───────┘
81
+ │ │
82
+ │ ↓
83
+ │ ┌────────────────┐
84
+ │ │ 视觉前置 │
85
+ │ │ - 截图采集 │
86
+ │ │ - vision 理解 │
87
+ │ │ - canonical │
88
+ │ │ primitives │
89
+ │ └────────┬───────┘
90
+ │ ↓
91
+ │ ┌────────────────┐
92
+ │ │ Checkpoint 1 │
93
+ │ │ 用户确认 AI │
94
+ │ │ 对截图的理解 │
95
+ │ └────────┬───────┘
96
+ │ ↓
97
+ ↓ ┌────────────────┐
98
+ constitution → spec → DSL │ Spec/DSL 扩展 │
99
+ │ │ + visualRefs │
100
+ │ │ + states[] │
101
+ │ │ + componentTree│
102
+ │ └────────┬───────┘
103
+ ↓ ↓
104
+ ┌───────────────┐ ┌────────────────┐
105
+ │ codegen │ │ visual-codegen │
106
+ │ (现有) │ │ (multimodal) │
107
+ └───────┬───────┘ └────────┬───────┘
108
+ │ ↓
109
+ │ ┌────────────────┐
110
+ │ │ visual-diff │
111
+ │ │ verifier loop │
112
+ │ │ (Playwright + │
113
+ │ │ vision diff) │
114
+ │ │ 默认最多 5 轮 │
115
+ │ └────────┬───────┘
116
+ │ ↓
117
+ │ ┌────────────────┐
118
+ │ │ Checkpoint 2 │
119
+ │ │ 用户审视产物 │
120
+ │ │ + 剩余 diff │
121
+ │ │ + 可加 hint │
122
+ │ └────────┬───────┘
123
+ ↓ ↓
124
+ ┌───────────────┐ ┌────────────────┐
125
+ │ cross-stack │ ←———————│ cross-stack │
126
+ │ verifier │ │ verifier │
127
+ │ (现有,复用) │ │ (现有,复用) │
128
+ └───────────────┘ └────────────────┘
129
+ ```
130
+
131
+ ### 2.3 关键设计决定
132
+
133
+ | # | 决定 | 理由 |
134
+ |---|---|---|
135
+ | D1 | 视觉 workflow 是**扩展**而非替代 | 老 workflow 完全不动,降低风险 |
136
+ | D2 | 像素精度交给 visual diff loop 收敛 | 避免 DSL 承担它扛不动的责任 |
137
+ | D3 | DSL 永远只描述"语义结构 + 数据流 + 状态列表 + 截图引用" | 保持 DSL 作为"verifier 事实源"的角色 |
138
+ | D4 | 引入 canonical primitive scanner | 利用 ai-spec-dev "深度集成目标仓库" 的差异化优势 |
139
+ | D5 | 双 Checkpoint:理解前置 + 产物后置 | 在信息密度最高、最便宜的拐点上让人介入 |
140
+ | D6 | 两个 Checkpoint 都用同一个本地网页 UI | 简化实现,单一 UI 模块,遵循"大道至简" |
141
+ | D7 | canonical scanner 仅 `--withui` 独占 | 后管 workflow 已经在用规范组件库,扫描价值有限 |
142
+
143
+ ---
144
+
145
+ ## 3. Module Inventory
146
+
147
+ 所有视觉相关代码物理隔离到 `core/visual/` 子目录,方便独立演进、独立测试,未来可单独抽包。
148
+
149
+ ```
150
+ core/
151
+ ├── visual/ ← 新目录
152
+ │ ├── screenshot-loader.ts 截图目录加载 + 文件名约定解析
153
+ │ ├── vision-understander.ts vision 模型理解截图,产出 understanding.md
154
+ │ ├── primitive-canonicalization-scanner.ts 扫描重复原语,产出 canonical 清单 + 报告
155
+ │ ├── visual-dsl-builder.ts 合成视觉版 DSL
156
+ │ ├── visual-codegen.ts 包住现有 codegen,加多模态 prompt
157
+ │ ├── playwright-renderer.ts 起 dev server + Playwright 截图(可降级)
158
+ │ ├── visual-diff-verifier.ts vision 模型对比 target vs actual
159
+ │ ├── visual-loop-runner.ts 编排 codegen → render → diff → feedback 循环
160
+ │ └── visual-review-gate.ts 本地网页 UI,承载两个 Checkpoint
161
+
162
+ cli/commands/
163
+ └── create.ts 改动:解析 --withui flag,注入 mode
164
+
165
+ cli/pipeline/
166
+ ├── multi-repo.ts 改动:mode=withui 时插入视觉子流程
167
+ └── visual-pipeline.ts (可选)单独抽视觉 pipeline 编排器
168
+
169
+ prompts/
170
+ ├── vision-understand.prompt.ts vision-understander 的多模态 prompt
171
+ ├── visual-dsl.prompt.ts visual-dsl-builder 的合成 prompt
172
+ ├── visual-codegen.prompt.ts visual-codegen 的多模态生成 prompt
173
+ └── visual-diff.prompt.ts visual-diff-verifier 的对比 prompt
174
+ ```
175
+
176
+ ### 3.1 模块职责详表
177
+
178
+ | 模块 | 输入 | 输出 | 复用 |
179
+ |---|---|---|---|
180
+ | `screenshot-loader` | 用户的 `screenshots/` 目录 | `{ pages: [{ slug, main: path, states: {...} }] }` | fs |
181
+ | `vision-understander` | 截图 + 需求文档片段 | `understanding.md`(结构化 markdown) | provider-utils, retry |
182
+ | `primitive-canonicalization-scanner` | 目标前端仓库路径 | `canonical-primitives.json` + `duplication-report.md` | project-index, frontend-context-loader |
183
+ | `visual-dsl-builder` | understanding.md (approved) + 需求文档 + canonical-primitives.json | `visual-dsl.json` | dsl-types, dsl-validator |
184
+ | `visual-codegen` | visual-dsl.json + 截图 + canonical primitives | 代码文件 | code-generator, codegen prompts |
185
+ | `playwright-renderer` | 项目路径 + 路由 | `actual.png` 或降级到跳过 | child_process, Playwright |
186
+ | `visual-diff-verifier` | target.png + actual.png | 结构化 diff JSON + 收敛判定 | provider-utils |
187
+ | `visual-loop-runner` | DSL + 初始 codegen 产物 | 收敛后的产物 + 最终 diff 报告 | error-feedback, fix-history, vcr, token-budget |
188
+ | `visual-review-gate` | Checkpoint 数据 | 用户决策(approve / edit / hint) | http, fs |
189
+
190
+ ---
191
+
192
+ ## 4. Data Flow & Artifacts
193
+
194
+ ### 4.1 用户输入约定
195
+
196
+ ```
197
+ my-feature/ ← 用户的需求目录
198
+ ├── requirement.md 需求文档(必须)
199
+ └── screenshots/ 截图目录(必须)
200
+ ├── home.png 主屏(必须,命名 = 页面 slug)
201
+ ├── home-empty.png 可选状态变体,命名约定:<slug>-<state>.png
202
+ ├── home-loading.png 可选
203
+ ├── home-error.png 可选
204
+ ├── detail.png 另一个页面的主屏
205
+ └── detail-empty.png
206
+ ```
207
+
208
+ **约定原则**:
209
+ - **页面级**:`<slug>.png` 是页面 happy-path 主屏
210
+ - **状态级**:`<slug>-<state>.png` 是该页面的状态变体
211
+ - **零配置启动**:文件名约定即配置,无需 manifest
212
+ - **可选精细化**:用户可提供 `screenshots/manifest.yaml` 显式声明每张图的语义、对应路由、关联 API endpoint,复杂场景适用
213
+
214
+ ### 4.2 DSL 增量字段
215
+
216
+ 现有 DSL 不动,**仅在 `--withui` 模式下追加**视觉相关字段,camelCase 风格与现有保持一致。
217
+
218
+ ```typescript
219
+ // dsl-types.ts 增量定义(伪代码)
220
+
221
+ interface VisualDSL extends ExistingDSL {
222
+ visualMode: true; // 标记位
223
+
224
+ pages: VisualPage[];
225
+ }
226
+
227
+ interface VisualPage {
228
+ slug: string;
229
+ route: string;
230
+
231
+ componentTree: ComponentNode; // 语义组件树(不含像素)
232
+
233
+ visualReferences: {
234
+ main: string; // screenshots/home.png
235
+ states: Record<string, string>; // { empty: "home-empty.png", ... }
236
+ };
237
+
238
+ states: PageState[]; // 从需求文档抽 + 从截图变体补
239
+
240
+ dataBindings: DataBinding[]; // 复用现有 DSL 的数据绑定语义
241
+
242
+ hints?: VisualHint[]; // Checkpoint 2 用户加的可选 hint
243
+ }
244
+
245
+ interface ComponentNode {
246
+ role: string; // "Hero" | "ProductGrid" | "SearchBar"
247
+ primitive?: string; // 映射到 canonical primitive 时记录
248
+ children?: ComponentNode[];
249
+ // 注意:没有 width / height / padding / color 等像素字段
250
+ }
251
+ ```
252
+
253
+ ### 4.3 中间产物目录
254
+
255
+ 每次 run 在 `.ai-spec/runs/<run-id>/visual/` 下产出(具体路径自动选择,复用现有 run-snapshot 约定):
256
+
257
+ ```
258
+ .ai-spec/runs/2026-04-08-a1b2/visual/
259
+ ├── understanding.md Checkpoint 1 给用户看的"AI 理解"
260
+ ├── understanding.approved.md Checkpoint 1 用户确认/编辑后的版本
261
+ ├── canonical-primitives.json 扫描出的规范原语清单
262
+ ├── duplication-report.md 仓库重复度报告(副产物)
263
+ ├── visual-dsl.json 合成出来的视觉 DSL
264
+ ├── codegen/
265
+ │ ├── v1/ 第一轮生成的代码快照
266
+ │ ├── v2/
267
+ │ └── ...
268
+ ├── renders/
269
+ │ ├── home.v1.actual.png Playwright 截的实际渲染
270
+ │ ├── home.v2.actual.png
271
+ │ └── ...
272
+ ├── diffs/
273
+ │ ├── home.v1.diff.json vision 模型的结构化 diff
274
+ │ ├── home.v2.diff.json
275
+ │ └── home.final.md 最终给 Checkpoint 2 用的 diff 报告
276
+ └── checkpoint-decisions.json 两个 checkpoint 的用户决策记录
277
+ ```
278
+
279
+ 为什么这样存:
280
+ - 每一轮都留快照——失败后可以回看是哪一轮跑偏
281
+ - 复用现有 `run-snapshot.ts` / `run-logger.ts`
282
+ - `checkpoint-decisions.json` 让 fix-history 可以学到"用户在 Checkpoint 1 通常会修正哪类理解错误",未来用于 prompt 优化
283
+
284
+ ---
285
+
286
+ ## 5. Visual Diff Loop(核心机制)
287
+
288
+ ```
289
+ ┌─────────────────────────────────────────────────────┐
290
+ │ Visual Diff Verifier Loop │
291
+ │ │
292
+ │ [v1 codegen 产物] │
293
+ │ ↓ │
294
+ │ [Playwright 起本地渲染] → 截一张 actual.png │
295
+ │ ↓ │
296
+ │ [vision 模型对比 target.png vs actual.png] │
297
+ │ ↓ │
298
+ │ 产出结构化 diff: │
299
+ │ - 标题字号偏小约 ~4px │
300
+ │ - 主按钮颜色偏冷 │
301
+ │ - 右侧 padding 不足 │
302
+ │ - 卡片圆角太小 │
303
+ │ ↓ │
304
+ │ diff 收敛了? │
305
+ │ ├─ 是 → 退出 loop │
306
+ │ └─ 否 → diff 当作 error feedback 喂回 codegen │
307
+ │ ↓ │
308
+ │ [v2 codegen] │
309
+ │ ↓ │
310
+ │ (回到 Playwright 渲染那一步) │
311
+ │ │
312
+ │ 达到最大轮数(默认 5 轮,可配)? │
313
+ │ └─ 是 → 强制退出,未解决项写入最终 diff 报告 │
314
+ └─────────────────────────────────────────────────────┘
315
+ ```
316
+
317
+ **核心点**:
318
+ - 这是**全自动 AI ↔ AI 对齐 loop**,循环内部不需要人类介入
319
+ - 复用现有 `error-feedback.ts` 通道,diff 报告就是新类型的 error feedback
320
+ - 收敛条件:vision 模型 diff 报告里没有"显著"差异(默认阈值,可配置)
321
+ - 失败是软失败:5 轮没收敛就把剩下的差异作为"已知问题"列出来交付,不阻塞流程
322
+ - 只对"有截图的状态"跑 visual loop,其余状态走"语义实现 + 自动测试"路径
323
+ - 每轮调用走现有 `token-budget.ts` 预算管控
324
+
325
+ ---
326
+
327
+ ## 6. Human Checkpoints
328
+
329
+ ### 6.1 Checkpoint 1: Understanding Review(codegen 之前)
330
+
331
+ **时机**:vision-understander 产出 `understanding.md` 之后、visual-dsl-builder 之前
332
+
333
+ **用户看到**:
334
+ - 原始截图(每页主屏 + 关键状态)
335
+ - AI 对截图的语义理解(组件树、角色识别、配色方向、布局描述)
336
+ - 基于截图的状态推断
337
+ - 与 canonical primitives 的初步映射
338
+
339
+ **用户可做**:
340
+ - ✅ Approve(继续)
341
+ - ✏️ Edit(直接编辑 understanding.md)
342
+ - 🔁 Reject + 反馈(让 AI 重新理解,最多 3 次,超过提示用户人工撰写)
343
+
344
+ **为什么这一步最有价值**:在花 token 跑 codegen 之前先纠正 AI 的理解偏差。这是整个 workflow 中**最便宜也最关键**的人类介入点——理解错了,后面再多迭代也是白费。
345
+
346
+ ### 6.2 Checkpoint 2: Output Review(visual loop 之后)
347
+
348
+ **时机**:visual diff loop 退出之后、cross-stack-verifier 之前
349
+
350
+ **用户看到**:
351
+ - 目标截图 vs 实际渲染截图(并排对比)
352
+ - 最终的结构化 diff 报告(已收敛项 + 未解决项)
353
+ - visual loop 跑了几轮、token 消耗多少
354
+ - 生成的代码文件树
355
+
356
+ **用户可做**:
357
+ - ✅ Accept(进入 cross-stack-verifier 收尾)
358
+ - 💡 Add Hint + 再跑一轮(在网页上可视化框选某个区域,添加自然语言 hint,例如"这块卡片的圆角应该更大")
359
+ - ✋ 手动改完再继续(用户在 IDE 里改完产物后,回到网页点 continue)
360
+
361
+ ### 6.3 UI 形态
362
+
363
+ `visual-review-gate.ts` 起一个**本地 express server + 单页 SPA**,承载两个 Checkpoint。两者共享同一套基础设施(路由、状态持久化、与 pipeline 的通信通道)。
364
+
365
+ 简化原则:**只起一个网页 UI,不在 CLI 和网页之间切换**。
366
+
367
+ ---
368
+
369
+ ## 7. Error Handling & Degradation
370
+
371
+ 视觉 workflow 故障面比无 UI workflow 大得多——多了浏览器、vision 模型调用、人类交互。所有可预见失败模式必须有明确降级路径。
372
+
373
+ ### 7.1 失败模式 → 降级矩阵
374
+
375
+ | # | 失败场景 | 检测时机 | 降级策略 | 警告码 |
376
+ |---|---|---|---|---|
377
+ | F1 | 缺少 `screenshots/` 目录或为空 | CLI 入口校验 | 立即报错退出 | 硬错误 |
378
+ | F2 | vision understanding 调用失败/超时 | vision-understander | 重试 2 次 → 失败则降级到纯文本 DSL 路径,跳过 visual loop | W6 |
379
+ | F3 | Checkpoint 1 用户反复打回理解 | visual-review-gate | 给 3 次修正机会,超过提示人工撰写 | 软警告 |
380
+ | F4 | canonical 扫描部分文件解析错误 | scanner | 跳过出错文件,记入"未能扫描"区,整体不阻塞 | W7 |
381
+ | F5 | 目标项目 dev server 起不来 | playwright-renderer | 直接跳过 visual loop,仅做静态产物 | W8 |
382
+ | F6 | dev server 起来但页面渲染挂 | playwright-renderer | 抓控制台 error 喂回 codegen → 仍挂则退出 loop | W9 |
383
+ | F7 | visual diff vision 模型乱讲 | visual-diff-verifier | JSON schema 校验 + 重试 1 次 → 标为 inconclusive,loop 继续 | 静默 |
384
+ | F8 | visual loop N 轮后未收敛 | visual-loop-runner | 不报错,最后产物作为交付,未收敛项写入 Checkpoint 2 报告 | 软警告 |
385
+ | F9 | Checkpoint 2 用户加的 hint 与 DSL 冲突 | visual-codegen | 优先采用 hint,冲突记入 fix-history | 静默 |
386
+ | F10 | 视觉 workflow 某一步硬挂 | top-level pipeline | 部分交付,已成功步骤产物保留,失败步骤明确标注 | 硬错误 |
387
+
388
+ ### 7.2 降级原则
389
+
390
+ 1. **没有视觉能力 ≠ 没有交付**——cross-stack-verifier 还能跑就还能交付一个"功能能跑、视觉粗糙"的版本
391
+ 2. **降级要可见、不要静默**——所有降级在最终 summary 里明确列出
392
+ 3. **vision 模型不可靠** vs **基础设施失败**两类要分开处理
393
+ 4. **Token / 时间预算上限硬卡**——visual loop 默认 5 轮(可配),通过现有 `token-budget.ts` 管控
394
+
395
+ ### 7.3 警告码扩展
396
+
397
+ 延续 v0.56 的 W5 命名体系,新增:
398
+
399
+ - **W6**: vision understanding 失败,已降级到纯文本 DSL 路径
400
+ - **W7**: canonical primitives 扫描部分失败
401
+ - **W8**: dev server 启动失败,已跳过 visual loop
402
+ - **W9**: 渲染产物有运行时错误,已退出 visual loop
403
+
404
+ 放在统一警告体系下,可在 dashboard / report 集中展示。
405
+
406
+ ---
407
+
408
+ ## 8. Testing Strategy
409
+
410
+ ### 8.1 测试金字塔
411
+
412
+ ```
413
+ ┌────────────────────────────┐
414
+ │ E2E (1~2 个 sample 项目) │ 慢、贵、最少
415
+ ├────────────────────────────┤
416
+ │ Pipeline 集成测试 (VCR) │ 中等
417
+ ├────────────────────────────┤
418
+ │ 模块单测 (mock 一切外部) │ 快、多、最多
419
+ └────────────────────────────┘
420
+ ```
421
+
422
+ 目标覆盖率:和现有 P0 模块对齐(85%+)。
423
+
424
+ ### 8.2 单测层
425
+
426
+ 每个新模块独立单测,所有外部依赖(vision API、Playwright、文件系统)都 mock。
427
+
428
+ | 模块 | 测试重点 |
429
+ |---|---|
430
+ | `screenshot-loader` | 文件名约定解析、目录扫描、命名冲突检测 |
431
+ | `vision-understander` | prompt 构造、输出 schema 校验、重试逻辑 |
432
+ | `primitive-canonicalization-scanner` | AST 等价性判定、canonical 选择启发式、重复报告生成 |
433
+ | `visual-dsl-builder` | understanding + 需求文档 + canonical primitives 合成逻辑 |
434
+ | `visual-codegen` | 多模态 prompt 构造、canonical primitives 偏好注入 |
435
+ | `playwright-renderer` | 起服务/降级/截图状态机、错误捕获 |
436
+ | `visual-diff-verifier` | diff 报告 schema 校验、收敛阈值判断 |
437
+ | `visual-loop-runner` | 循环编排、轮数预算、退出条件、与 error-feedback 对接 |
438
+ | `visual-review-gate` | HTTP 路由、用户决策持久化、Checkpoint 状态机 |
439
+
440
+ ### 8.3 集成测试层(VCR 录制)
441
+
442
+ 复用现有 `core/vcr.ts`。第一次跑测试时真实调用 vision 模型,把请求/响应录下来;之后 CI 从 VCR 回放,**测试是确定性的**。
443
+
444
+ ```
445
+ tests/visual/
446
+ ├── fixtures/
447
+ │ ├── simple-list-page/
448
+ │ │ ├── requirement.md
449
+ │ │ └── screenshots/
450
+ │ │ ├── list.png
451
+ │ │ └── list-empty.png
452
+ │ └── card-grid-page/
453
+ ├── vcr-cassettes/
454
+ │ ├── vision-understand-list.json
455
+ │ ├── vision-diff-list-v1.json
456
+ │ └── ...
457
+ └── visual-pipeline.test.ts 端到端跑通 fixture
458
+ ```
459
+
460
+ 关键测试用例:
461
+ - 完整 happy path(understanding → DSL → codegen → render → diff → 收敛 → 交付)
462
+ - F5 降级(mock dev server 起不来)→ 部分交付
463
+ - F8 不收敛(mock diff 永远不收敛)→ 5 轮后正常退出
464
+ - F2 vision 失败 → 降级到纯文本 DSL 路径
465
+ - canonical scanner 三种仓库 fixture(干净 / 充满重复 / 空)
466
+
467
+ ### 8.4 E2E 测试层
468
+
469
+ **Sample project 来源**:使用一个已有的真实仓库,配套用户提供的 Figma 截图,逐步调试准确率。E2E **不在 CI 每次跑**,手动触发或 nightly。
470
+
471
+ E2E **不断言"代码完美还原截图"**——那是不可测的。只断言:
472
+ - 流程跑通了
473
+ - 产物结构合法
474
+ - 关键 invariants 没破
475
+ - visual loop 至少跑了 1 轮
476
+ - cross-stack-verifier 通过
477
+
478
+ ### 8.5 vision 模型非确定性的处理
479
+
480
+ 1. **断言"属性"而不是"完全相等"**——如"diff 报告包含至少一项 color 相关差异"
481
+ 2. **VCR 隔离非确定性**——CI 永远从录制回放
482
+ 3. **手动重新录制**——录制脚本独立成 npm script,开发者改 prompt 后手动跑一次重录
483
+ 4. **VCR hash 校验**——复用 v0.40 的 VCR hash 机制
484
+
485
+ ---
486
+
487
+ ## 9. Open Questions & Risks
488
+
489
+ ### 9.1 Open Questions(待实施 plan 阶段进一步细化)
490
+
491
+ 1. **vision 模型选型**:用 Claude Opus 4.6 多模态、还是分阶段用不同模型(理解阶段用 Haiku 省钱,diff 阶段用 Opus 求准)?
492
+ 2. **收敛阈值的初始值**:默认怎么定义"显著差异"?需要在第一个 sample project 上调试出基线。
493
+ 3. **canonical primitive 等价性的判定算法**:第一版用启发式(同名 + 相似 props 形状)还是用 AST 结构 hash?
494
+ 4. **本地网页 UI 的技术栈**:纯静态 HTML + vanilla JS、还是引入一个轻量框架(preact / lit)?倾向前者保持零依赖。
495
+ 5. **Checkpoint 2 的 hint 数据格式**:自然语言 + 区域坐标?还是结构化模板?
496
+
497
+ ### 9.2 Risks
498
+
499
+ 1. **R1:vision 模型对中文 UI 的理解能力**——需要在 sample project 阶段验证,可能需要 prompt 调优
500
+ 2. **R2:Playwright 在用户机器上的可移植性**——不同 OS、不同 Node 版本、headless 启动可能踩坑
501
+ 3. **R3:visual loop 的 token 成本**——5 轮 × 多模态调用 × 多页面 × 多状态,成本可能让用户却步,需要 token-budget 强约束 + 透明成本展示
502
+ 4. **R4:canonical scanner 的误判**——把不该合并的"看起来像"的组件判为重复,给用户造成误导。第一版宁可漏报不要误报,启发式偏保守
503
+ 5. **R5:第一次实施的工作量大**——9 个新模块 + 4 个 prompt + 一套本地网页 UI,需要分阶段交付(先核心 loop 跑通,再加 Checkpoint UI,再加 canonical scanner)
504
+
505
+ ---
506
+
507
+ ## 10. Implementation Phases(建议交付节奏)
508
+
509
+ 第一版不必一次到位,建议按"最小可用切片"分阶段:
510
+
511
+ **Phase 1: Skeleton + Loop Core**(最关键,验证核心假设)
512
+ - CLI `--withui` flag + 入口分流
513
+ - screenshot-loader + 最简 DSL 扩展
514
+ - vision-understander(**Phase 1 暂不接入 Checkpoint,understanding 直接进入下一步**——目的是先验证核心假设"vision 理解 → 多模态 codegen → diff loop 能收敛",避免被 UI 工程量阻塞。Phase 2 才补上 Checkpoint)
515
+ - visual-codegen(多模态 prompt)
516
+ - playwright-renderer(无降级,起不来直接报错)
517
+ - visual-diff-verifier + visual-loop-runner(loop 跑通)
518
+ - 在 sample project 上验证:能跑通、能收敛、产物合理
519
+
520
+ > Phase 1 是**开发者验证里程碑**,不是用户面向的 release。延后 Checkpoint 不削弱 §6 的"Checkpoint 1 最有价值"论点——它只是把"产品形态"和"技术验证"分开节奏。
521
+
522
+ **Phase 2: Human Checkpoints**
523
+ - visual-review-gate(本地网页 UI)
524
+ - Checkpoint 1: Understanding Review
525
+ - Checkpoint 2: Output Review with hint
526
+ - checkpoint-decisions 持久化
527
+
528
+ **Phase 3: Canonical Primitives**
529
+ - primitive-canonicalization-scanner
530
+ - canonical primitives 注入 codegen prompt
531
+ - duplication-report 产出
532
+
533
+ **Phase 4: Robustness**
534
+ - 完整降级矩阵(F1~F10)
535
+ - 警告码 W6~W9
536
+ - token budget 强约束
537
+ - 完整测试覆盖(单测 + VCR 集成 + E2E)
538
+
539
+ 每个 Phase 交付后都可以独立 release,互不阻塞。
540
+
541
+ ---
542
+
543
+ ## 11. References
544
+
545
+ - v0.56 cross-stack-verifier(核心 verifier 范式来源)
546
+ - v0.40 VCR hash 机制(测试确定性基础)
547
+ - 现有 `error-feedback.ts` / `fix-history.ts` / `project-index.ts`(复用基础设施)
548
+ - 现有 `frontend-context-loader.ts` / `frontend-spec.prompt.ts`(前端 workflow 起点)