@yuhan1124/draw-prompt 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3141 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.11"
4
+ # dependencies = ["pyyaml>=6.0", "pillow>=10.0"]
5
+ # ///
6
+ """draw-prompt CLI — 生图需求转化 + 偏好持久化 + Codex 交付块生成。
7
+
8
+ 设计边界(重要):这个 skill 只做两件事——
9
+ 1. 把自然语言画图需求转化成高质量、可执行的 gpt-image-2 prompt / handoff;
10
+ 2. Harness 辅助转化:记录你的口味、采纳/弃用样本、评分,越用越贴合。
11
+
12
+ 它【不主动调用 Codex 出图】。出图交给下游(/codex-image 插件或你手动跑
13
+ codex exec)。本 CLI 的 `handoff` 子命令负责把 prompt 包装成一段可以直接
14
+ 粘贴/转交给 Codex 的现成指令,但不会自己去执行它。
15
+
16
+ CLI 只干确定性的脏活:
17
+ convert 自然语言画图需求 -> Prompt / handoff
18
+ compose 长输入/文档 -> 多张配套图的视觉计划 + Prompt
19
+ series 多张同风格系列图 -> 稳定一致的 Prompt 组
20
+ edit 参考图/改图需求 -> 保留项、修改项、编辑 Prompt
21
+ brand 品牌风格档案 -> 可复用品牌 Prompt 块
22
+ character 角色设定 -> 角色一致性 bible + 场景 Prompt
23
+ data-viz 数据/报表需求 -> 信息图/图表 Prompt
24
+ rewrite 低质量 prompt -> 结构化高质量 Prompt
25
+ adapt 同一需求 -> 多尺寸/多画幅适配 Prompt
26
+ overlay 按 text_overlay_spec 对成品图做精确文字后处理
27
+ visual-check / edit-check / visual-regress 成品图质量门与回归验证
28
+ lint 检查 Prompt 是否满足生图转化硬约束
29
+ benchmark 批量跑 golden cases,检查转化稳定性
30
+ revise 按失败分类生成修订版 Prompt
31
+ profile 读写风格偏好档案 style_profile.md
32
+ samples 记录 / 检索出图样本(few-shot 来源)
33
+ feedback 对样本回填采纳 / 弃用
34
+ judge 存储 agent 给出的评分(CLI 不评分、不看图、不调 Codex)
35
+ handoff 生成交给 Codex 的现成指令块(仅打印,不执行)
36
+ status 数据与下游通道健康检查
37
+
38
+ 所有运行时数据写到数据目录(默认 ~/.local/share/draw-prompt/,可用
39
+ DRAW_PROMPT_HOME 覆盖),不进 git repo。
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ import argparse
45
+ import csv
46
+ import hashlib
47
+ import json
48
+ import os
49
+ import re
50
+ import sys
51
+ from datetime import datetime, timezone
52
+ from pathlib import Path
53
+ from shutil import which
54
+ from tempfile import NamedTemporaryFile
55
+
56
+ # ---- yaml:有 pyyaml 用它,没有就退化到极简解析,保证裸 python 也能跑 ----
57
+ try:
58
+ import yaml # type: ignore
59
+
60
+ def _yaml_load(text: str) -> dict:
61
+ return yaml.safe_load(text) or {}
62
+
63
+ def _yaml_dump(data: dict) -> str:
64
+ return yaml.safe_dump(data, allow_unicode=True, sort_keys=False).strip()
65
+
66
+ except Exception: # pragma: no cover - 仅在无 pyyaml 时走到
67
+
68
+ def _yaml_load(text: str) -> dict:
69
+ out: dict = {}
70
+ for line in text.splitlines():
71
+ line = line.rstrip()
72
+ if not line or line.lstrip().startswith("#") or ":" not in line:
73
+ continue
74
+ key, _, val = line.partition(":")
75
+ key, val = key.strip(), val.strip()
76
+ if val.startswith("[") and val.endswith("]"):
77
+ inner = val[1:-1].strip()
78
+ parts = re.findall(r'"[^"]*"|\'[^\']*\'|[^,]+', inner)
79
+ out[key] = [p.strip().strip("\"'") for p in parts if p.strip()]
80
+ elif val:
81
+ out[key] = val.strip("\"'")
82
+ else:
83
+ out[key] = ""
84
+ return out
85
+
86
+ def _yaml_dump(data: dict) -> str:
87
+ lines = []
88
+ for key, val in data.items():
89
+ if isinstance(val, list):
90
+ inner = ", ".join(f'"{v}"' for v in val)
91
+ lines.append(f"{key}: [{inner}]")
92
+ else:
93
+ lines.append(f"{key}: {val}")
94
+ return "\n".join(lines)
95
+
96
+
97
+ # --------------------------------------------------------------------------- #
98
+ # 路径与时间
99
+ # --------------------------------------------------------------------------- #
100
+ def data_home() -> Path:
101
+ env = os.environ.get("DRAW_PROMPT_HOME")
102
+ if env:
103
+ return Path(env).expanduser()
104
+ xdg = os.environ.get("XDG_DATA_HOME")
105
+ base = Path(xdg).expanduser() if xdg else Path.home() / ".local" / "share"
106
+ return base / "draw-prompt"
107
+
108
+
109
+ def profile_path() -> Path:
110
+ return data_home() / "style_profile.md"
111
+
112
+
113
+ def samples_path() -> Path:
114
+ return data_home() / "samples.jsonl"
115
+
116
+
117
+ def judgements_path() -> Path:
118
+ return data_home() / "judgements.jsonl"
119
+
120
+
121
+ def now_iso() -> str:
122
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
123
+
124
+
125
+ def now_stamp() -> str:
126
+ return datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
127
+
128
+
129
+ def ensure_home() -> None:
130
+ data_home().mkdir(parents=True, exist_ok=True)
131
+
132
+
133
+ SCHEMA_VERSION = 1
134
+ COMPILER_VERSION = "0.4.0"
135
+
136
+
137
+ # --------------------------------------------------------------------------- #
138
+ # profile:style_profile.md = yaml frontmatter + markdown body
139
+ # --------------------------------------------------------------------------- #
140
+ PROFILE_TEMPLATE_FM = {
141
+ "default_aspect": "未设置",
142
+ "default_quality": "high",
143
+ "favored_styles": [],
144
+ "avoided_elements": [],
145
+ "text_language": "zh",
146
+ "updated_at": "",
147
+ }
148
+
149
+ PROFILE_TEMPLATE_BODY = """# 风格偏好笔记
150
+
151
+ (agent 在这里用自然语言累积对你口味的观察:你反复要求的构图、留白、
152
+ 配色、镜头语言、排斥的"AI 味"等。每条尽量带一句依据,并标注日期。)
153
+ """
154
+
155
+
156
+ def read_profile() -> tuple[dict, str]:
157
+ """返回 (frontmatter dict, body str)。文件不存在则返回模板。"""
158
+ path = profile_path()
159
+ if not path.exists():
160
+ return dict(PROFILE_TEMPLATE_FM), PROFILE_TEMPLATE_BODY
161
+ text = path.read_text(encoding="utf-8")
162
+ if text.startswith("---"):
163
+ parts = text.split("---", 2)
164
+ if len(parts) >= 3:
165
+ fm = _yaml_load(parts[1])
166
+ body = parts[2].lstrip("\n")
167
+ return fm, body
168
+ return dict(PROFILE_TEMPLATE_FM), text
169
+
170
+
171
+ def write_profile(fm: dict, body: str) -> None:
172
+ ensure_home()
173
+ fm = dict(fm)
174
+ fm["updated_at"] = now_iso()
175
+ text = f"---\n{_yaml_dump(fm)}\n---\n\n{body.rstrip()}\n"
176
+ profile_path().write_text(text, encoding="utf-8")
177
+
178
+
179
+ def cmd_profile(args: argparse.Namespace) -> int:
180
+ action = args.action
181
+ if action == "path":
182
+ print(profile_path())
183
+ return 0
184
+ if action == "init":
185
+ if profile_path().exists() and not args.force:
186
+ print(f"已存在:{profile_path()}(加 --force 覆盖)", file=sys.stderr)
187
+ return 0
188
+ write_profile(dict(PROFILE_TEMPLATE_FM), PROFILE_TEMPLATE_BODY)
189
+ print(f"已初始化:{profile_path()}")
190
+ return 0
191
+ if action == "show":
192
+ fm, body = read_profile()
193
+ if not profile_path().exists():
194
+ print("(档案尚未创建,下面是模板默认值。运行 `profile init` 创建。)\n", file=sys.stderr)
195
+ print(f"---\n{_yaml_dump(fm)}\n---\n\n{body.rstrip()}")
196
+ return 0
197
+ if action == "set":
198
+ if not args.key:
199
+ print("用法:profile set KEY VALUE (列表字段用逗号分隔)", file=sys.stderr)
200
+ return 2
201
+ fm, body = read_profile()
202
+ val = args.value or ""
203
+ # 只对显式列表字段做逗号拆分,避免标量值(如 "high, low")被误拆成列表
204
+ if args.key in ("favored_styles", "avoided_elements"):
205
+ fm[args.key] = [v.strip() for v in val.split(",") if v.strip()]
206
+ else:
207
+ fm[args.key] = val
208
+ write_profile(fm, body)
209
+ print(f"已更新 {args.key} = {fm[args.key]}")
210
+ return 0
211
+ if action == "note":
212
+ text = args.value or args.key # 单参数会落在 key 槽
213
+ if not text:
214
+ print("用法:profile note \"一条偏好观察\"", file=sys.stderr)
215
+ return 2
216
+ fm, body = read_profile()
217
+ stamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")
218
+ body = body.rstrip() + f"\n\n- ({stamp}) {text}"
219
+ write_profile(fm, body)
220
+ print("已追加一条偏好笔记。")
221
+ return 0
222
+ print(f"未知 profile 动作:{action}", file=sys.stderr)
223
+ return 2
224
+
225
+
226
+ # --------------------------------------------------------------------------- #
227
+ # samples:jsonl,每行一条出图记录
228
+ # --------------------------------------------------------------------------- #
229
+ def append_jsonl(path: Path, record: dict) -> None:
230
+ ensure_home()
231
+ with path.open("a", encoding="utf-8") as fh:
232
+ fh.write(json.dumps(record, ensure_ascii=False) + "\n")
233
+
234
+
235
+ def read_jsonl(path: Path) -> list[dict]:
236
+ if not path.exists():
237
+ return []
238
+ out = []
239
+ for line in path.read_text(encoding="utf-8").splitlines():
240
+ line = line.strip()
241
+ if line:
242
+ try:
243
+ out.append(json.loads(line))
244
+ except json.JSONDecodeError:
245
+ continue
246
+ return out
247
+
248
+
249
+ def rewrite_jsonl(path: Path, records: list[dict]) -> None:
250
+ ensure_home()
251
+ # 全量重写前先备份,避免读取时跳过的损坏行被静默永久删除
252
+ if path.exists():
253
+ backup = path.with_suffix(path.suffix + ".bak")
254
+ backup.write_text(path.read_text(encoding="utf-8"), encoding="utf-8")
255
+ with NamedTemporaryFile("w", encoding="utf-8", delete=False, dir=str(data_home())) as fh:
256
+ for rec in records:
257
+ fh.write(json.dumps(rec, ensure_ascii=False) + "\n")
258
+ tmp_name = fh.name
259
+ os.replace(tmp_name, path)
260
+
261
+
262
+ def new_id() -> str:
263
+ # 秒级时间戳 + 随机后缀,避免同秒同进程批量写入时 id 碰撞
264
+ return now_stamp() + "-" + os.urandom(2).hex()
265
+
266
+
267
+ # --------------------------------------------------------------------------- #
268
+ # convert / lint:自然语言生图需求 -> Prompt -> 可验证 handoff
269
+ # --------------------------------------------------------------------------- #
270
+ ASSET_ROUTES = {
271
+ "poster": {
272
+ "label": "poster",
273
+ "title": "海报 / 视觉主 KV",
274
+ "keywords": ["海报", "poster", "banner", "主视觉", "kv", "促销", "活动图", "封面"],
275
+ "aspect": "3:4",
276
+ "size": "portrait",
277
+ "quality": "high",
278
+ "tags": ["poster"],
279
+ },
280
+ "ui": {
281
+ "label": "ui",
282
+ "title": "UI / 产品界面",
283
+ "keywords": ["ui", "界面", "app", "dashboard", "仪表盘", "网页", "web", "小程序", "mockup"],
284
+ "aspect": "9:16",
285
+ "size": "portrait",
286
+ "quality": "high",
287
+ "tags": ["ui"],
288
+ },
289
+ "infographic": {
290
+ "label": "infographic",
291
+ "title": "信息图 / 科普图解",
292
+ "keywords": ["信息图", "infographic", "图解", "科普", "说明图", "流程图", "时间线"],
293
+ "aspect": "3:4",
294
+ "size": "portrait",
295
+ "quality": "high",
296
+ "tags": ["infographic"],
297
+ },
298
+ "diagram": {
299
+ "label": "diagram",
300
+ "title": "学术 / 系统架构图",
301
+ "keywords": ["架构图", "系统图", "论文图", "diagram", "architecture", "模型图", "流程架构"],
302
+ "aspect": "16:9",
303
+ "size": "landscape",
304
+ "quality": "high",
305
+ "tags": ["diagram"],
306
+ },
307
+ "product": {
308
+ "label": "product",
309
+ "title": "产品 / 食品商业渲染",
310
+ "keywords": ["产品", "商品", "渲染", "电商", "食品", "饮料", "包装", "瓶", "杯", "茶饮"],
311
+ "aspect": "3:4",
312
+ "size": "portrait",
313
+ "quality": "high",
314
+ "tags": ["product"],
315
+ },
316
+ "character": {
317
+ "label": "character",
318
+ "title": "角色设计",
319
+ "keywords": ["角色", "人物", "立绘", "character", "人设", "表情", "三视图"],
320
+ "aspect": "16:9",
321
+ "size": "landscape",
322
+ "quality": "high",
323
+ "tags": ["character"],
324
+ },
325
+ "photography": {
326
+ "label": "photography",
327
+ "title": "写实摄影",
328
+ "keywords": ["摄影", "写实", "照片", "photo", "photography", "街拍", "纪实"],
329
+ "aspect": "16:9",
330
+ "size": "landscape",
331
+ "quality": "high",
332
+ "tags": ["photography"],
333
+ },
334
+ "illustration": {
335
+ "label": "illustration",
336
+ "title": "插画 / 风格化图像",
337
+ "keywords": ["插画", "illustration", "绘本", "漫画", "二次元", "扁平", "风格化"],
338
+ "aspect": "1:1",
339
+ "size": "square",
340
+ "quality": "medium",
341
+ "tags": ["illustration"],
342
+ },
343
+ "logo": {
344
+ "label": "logo",
345
+ "title": "Logo / 品牌系统板",
346
+ "keywords": ["logo", "标识", "品牌", "字标", "brand", "vi"],
347
+ "aspect": "1:1",
348
+ "size": "square",
349
+ "quality": "high",
350
+ "tags": ["logo"],
351
+ },
352
+ }
353
+
354
+ STYLE_HINTS = [
355
+ (["国风", "新中式", "东方", "中式"], "New Chinese visual style, restrained and elegant"),
356
+ (["极简", "minimal", "简洁"], "minimal editorial design, clean grid, generous negative space"),
357
+ (["高级", "轻奢", "质感"], "premium commercial visual style, refined and not flashy"),
358
+ (["赛博", "cyber", "科技感"], "futuristic cyber-tech aesthetic with controlled neon accents"),
359
+ (["复古", "retro"], "tasteful retro print design with contemporary restraint"),
360
+ (["瑞士", "swiss"], "Swiss International Typographic style, strict grid discipline"),
361
+ (["手绘", "水彩"], "hand-drawn illustration with tactile paper texture"),
362
+ (["写实", "真实"], "realistic, natural, unprocessed visual language"),
363
+ ]
364
+
365
+ ASPECT_SIZE = {
366
+ "3:4": "portrait",
367
+ "4:3": "landscape",
368
+ "16:9": "landscape",
369
+ "9:16": "portrait",
370
+ "1:1": "square",
371
+ }
372
+
373
+ BUZZWORDS = ["stunning", "beautiful", "professional", "high quality", "nice", "modern", "高级感"]
374
+
375
+ TEMPLATE_DEFS = {
376
+ "poster_zh_promo": {
377
+ "asset_type": "poster",
378
+ "label": "中文促销海报",
379
+ "layout": "large headline zone, hero product zone, price/offer block, quiet footer",
380
+ "keywords": ["促销", "新品", "价格", "优惠", "茶饮", "冷泡", "海报"],
381
+ },
382
+ "poster_brand_kv": {
383
+ "asset_type": "poster",
384
+ "label": "品牌主视觉 KV",
385
+ "layout": "single hero visual, brand message zone, generous negative space",
386
+ "keywords": ["品牌", "主视觉", "kv", "发布", "形象"],
387
+ },
388
+ "poster_event": {
389
+ "asset_type": "poster",
390
+ "label": "活动海报",
391
+ "layout": "title, date/venue, speaker or theme block, organizer footer",
392
+ "keywords": ["活动", "会议", "展览", "event", "workshop", "讲座"],
393
+ },
394
+ "poster_info_dense": {
395
+ "asset_type": "poster",
396
+ "label": "信息密集海报",
397
+ "layout": "modular grid with clear title, sections, callouts, and footer rules",
398
+ "keywords": ["日程", "规则", "清单", "信息", "流程", "说明"],
399
+ },
400
+ "ui_mobile_home": {
401
+ "asset_type": "ui",
402
+ "label": "移动 App 首页",
403
+ "layout": "phone status bar, app header, content cards, primary action, bottom navigation",
404
+ "keywords": ["app", "首页", "手机", "移动", "小程序"],
405
+ },
406
+ "ui_dashboard": {
407
+ "asset_type": "ui",
408
+ "label": "Web / SaaS Dashboard",
409
+ "layout": "sidebar, top bar, KPI cards, chart panel, data table",
410
+ "keywords": ["dashboard", "仪表盘", "后台", "saas", "web"],
411
+ },
412
+ "diagram_rag": {
413
+ "asset_type": "diagram",
414
+ "label": "RAG 架构图",
415
+ "layout": "left-to-right pipeline: User -> Retriever -> Vector DB -> LLM -> Answer",
416
+ "keywords": ["rag", "retriever", "vector db", "llm", "answer", "检索"],
417
+ },
418
+ "diagram_system": {
419
+ "asset_type": "diagram",
420
+ "label": "系统架构图",
421
+ "layout": "layered system boxes with directional arrows and a small legend",
422
+ "keywords": ["架构", "系统", "模块", "服务", "流程"],
423
+ },
424
+ "product_hero": {
425
+ "asset_type": "product",
426
+ "label": "产品英雄图",
427
+ "layout": "single product hero, controlled props, clear material close-up, editorial finish",
428
+ "keywords": ["产品", "商品", "渲染", "电商", "新品"],
429
+ },
430
+ "illustration_scene": {
431
+ "asset_type": "illustration",
432
+ "label": "场景插画",
433
+ "layout": "main subject, environment details, foreground/midground/background depth",
434
+ "keywords": ["插画", "场景", "绘本", "风格化"],
435
+ },
436
+ }
437
+
438
+ FAILURE_PLAYBOOK = {
439
+ "text_error": {
440
+ "label": "文字错误 / 乱码",
441
+ "add_negative": ["avoid garbled text", "avoid tiny unreadable type"],
442
+ "revision": "Switch to strict text mode: generate the visual background and layout first, reserve clean text areas, then apply exact text through overlay_spec.",
443
+ },
444
+ "layout_error": {
445
+ "label": "布局错误",
446
+ "add_negative": ["avoid ambiguous layout", "avoid overlapping sections"],
447
+ "revision": "Make the layout explicit with named regions, fixed reading order, and stable spacing.",
448
+ },
449
+ "missing_subject": {
450
+ "label": "主体缺失",
451
+ "add_negative": ["avoid missing the main subject"],
452
+ "revision": "Restate the main subject as the first visible object and require it to dominate the composition.",
453
+ },
454
+ "wrong_style": {
455
+ "label": "风格不匹配",
456
+ "add_negative": ["avoid style drift"],
457
+ "revision": "Replace vague style words with a concrete visual production context and palette.",
458
+ },
459
+ "too_cluttered": {
460
+ "label": "画面过满",
461
+ "add_negative": ["avoid clutter", "avoid excessive decorative elements"],
462
+ "revision": "Increase negative space, reduce supporting objects, and prioritize one visual hierarchy.",
463
+ },
464
+ "bad_composition": {
465
+ "label": "构图差",
466
+ "add_negative": ["avoid weak composition"],
467
+ "revision": "Add composition anchors such as hero position, foreground/midground/background, and reading path.",
468
+ },
469
+ "brand_risk": {
470
+ "label": "品牌 / 版权风险",
471
+ "add_negative": ["avoid real brand logos", "avoid stock clip-art", "avoid existing IP resemblance"],
472
+ "revision": "Use invented branding only and explicitly require original marks.",
473
+ },
474
+ "low_readability": {
475
+ "label": "可读性差",
476
+ "add_negative": ["avoid low contrast", "avoid unreadable microtext"],
477
+ "revision": "Increase contrast, enlarge labels, simplify background behind text, and reserve clear reading zones.",
478
+ },
479
+ }
480
+
481
+ REFERENCE_REWRITE = {
482
+ "宫崎骏": "warm hand-painted fantasy animation mood with original characters",
483
+ "吉卜力": "warm hand-painted fantasy animation mood with original characters",
484
+ "迪士尼": "family-friendly polished animation mood with original characters",
485
+ "Disney": "family-friendly polished animation mood with original characters",
486
+ "皮克斯": "polished 3D animation mood with original characters",
487
+ "Pixar": "polished 3D animation mood with original characters",
488
+ "漫威": "cinematic superhero-comic energy with original characters",
489
+ "Marvel": "cinematic superhero-comic energy with original characters",
490
+ "Nike": "invented athletic brand language",
491
+ "耐克": "invented athletic brand language",
492
+ "Apple": "invented premium consumer electronics brand language",
493
+ "苹果": "invented premium consumer electronics brand language",
494
+ "可口可乐": "invented beverage brand language",
495
+ "Coca-Cola": "invented beverage brand language",
496
+ }
497
+
498
+
499
+ def split_csv(value: str | None) -> list[str]:
500
+ if not value:
501
+ return []
502
+ return [v.strip() for v in re.split(r"[,,]", value) if v.strip()]
503
+
504
+
505
+ def normalize_ws(text: str) -> str:
506
+ return " ".join(text.split())
507
+
508
+
509
+ def has_cjk(text: str) -> bool:
510
+ return bool(re.search(r"[\u4e00-\u9fff]", text))
511
+
512
+
513
+ def sanitize_reference_risks(text: str) -> tuple[str, list[str]]:
514
+ """把真实 IP/品牌引用改成可执行的原创视觉描述,避免 prompt 直接要求仿冒。"""
515
+ safe = text
516
+ notes: list[str] = []
517
+ for key, replacement in REFERENCE_REWRITE.items():
518
+ if key == "苹果" and not any(ctx in safe for ctx in ["品牌", "logo", "手机", "电脑", "发布会", "科技"]):
519
+ continue
520
+ if key.isascii():
521
+ pattern = rf"\b{re.escape(key)}\b"
522
+ flags = 0 if key == "Apple" else re.IGNORECASE
523
+ else:
524
+ pattern = re.escape(key)
525
+ flags = 0
526
+ if re.search(pattern, safe, flags=flags):
527
+ safe = re.sub(pattern, replacement, safe, flags=flags)
528
+ notes.append(f"Rewrote risky reference '{key}' to original visual language.")
529
+ return safe, notes
530
+
531
+
532
+ def safety_avoid_list(notes: list[str]) -> list[str]:
533
+ if not notes:
534
+ return []
535
+ return [
536
+ "avoid copying a living artist or studio house style",
537
+ "avoid real brand logos and trademarks",
538
+ "avoid resemblance to existing IP characters",
539
+ "use invented names, marks, and original character shapes",
540
+ ]
541
+
542
+
543
+ def route_asset_type(request: str, override: str | None = None) -> str:
544
+ if override:
545
+ return override
546
+ lower = request.lower()
547
+ best = ("poster", 0)
548
+ for asset_type, meta in ASSET_ROUTES.items():
549
+ score = 0
550
+ for kw in meta["keywords"]:
551
+ if kw.lower() in lower:
552
+ score += 1
553
+ if score > best[1]:
554
+ best = (asset_type, score)
555
+ return best[0]
556
+
557
+
558
+ def infer_aspect(request: str, asset_type: str, override: str | None, profile: dict) -> str:
559
+ if override:
560
+ return override
561
+ match = re.search(r"(\d{1,2}\s*[:x×]\s*\d{1,2})", request)
562
+ if match:
563
+ return match.group(1).replace(" ", "").replace("x", ":").replace("×", ":")
564
+ lower = request.lower()
565
+ if any(k in lower for k in ["横版", "横图", "landscape", "宽屏"]):
566
+ return "16:9"
567
+ if any(k in lower for k in ["竖版", "竖图", "portrait"]):
568
+ return "3:4"
569
+ if any(k in lower for k in ["方图", "方形", "square"]):
570
+ return "1:1"
571
+ prof = str(profile.get("default_aspect") or "").strip()
572
+ if prof and prof != "未设置":
573
+ return prof
574
+ return str(ASSET_ROUTES[asset_type]["aspect"])
575
+
576
+
577
+ def infer_size(aspect: str, override: str | None, asset_type: str) -> str:
578
+ if override:
579
+ return override
580
+ return ASPECT_SIZE.get(aspect, str(ASSET_ROUTES[asset_type]["size"]))
581
+
582
+
583
+ def infer_quality(request: str, asset_type: str, texts: list[str], override: str | None, profile: dict) -> str:
584
+ if override:
585
+ return override
586
+ lower = request.lower()
587
+ if any(k in lower for k in ["草稿", "draft", "探索"]):
588
+ return "medium"
589
+ if texts or asset_type in {"poster", "ui", "infographic", "diagram", "logo"}:
590
+ return "high"
591
+ prof = str(profile.get("default_quality") or "").strip()
592
+ if prof:
593
+ return prof
594
+ return str(ASSET_ROUTES[asset_type]["quality"])
595
+
596
+
597
+ def extract_required_texts(request: str, explicit_texts: list[str]) -> list[str]:
598
+ seen: set[str] = set()
599
+ out: list[str] = []
600
+
601
+ def add(text: str) -> None:
602
+ text = text.strip(" \t\n\r,,。;;::")
603
+ key = re.sub(r"\s+", "", text)
604
+ if 0 < len(text) <= 40 and key not in seen:
605
+ seen.add(key)
606
+ out.append(text)
607
+
608
+ for item in explicit_texts:
609
+ add(item)
610
+ if explicit_texts:
611
+ return out
612
+ patterns = [
613
+ r'"([^"\n]{1,40})"',
614
+ r"'([^'\n]{1,40})'",
615
+ r"“([^”\n]{1,40})”",
616
+ r"「([^」\n]{1,40})」",
617
+ r"『([^』\n]{1,40})』",
618
+ ]
619
+ for pat in patterns:
620
+ for match in re.findall(pat, request):
621
+ add(match)
622
+ text_hint = r"(?:写上|写|显示|包含|文案|标题|文字)"
623
+ for match in re.findall(rf"{text_hint}[::\s]*(?:写上|写|显示|包含)?[::\s]*([^,。;;,.]{{2,24}})", request):
624
+ add(match)
625
+ for match in re.findall(r"\d+(?:\s*/\s*\d+)?\s*元", request):
626
+ add(match)
627
+ return out
628
+
629
+
630
+ def infer_style_anchors(request: str, override: str | None, profile: dict) -> list[str]:
631
+ anchors: list[str] = []
632
+ if override:
633
+ anchors.extend(split_csv(override))
634
+ lower = request.lower()
635
+ for keys, anchor in STYLE_HINTS:
636
+ if any(k.lower() in lower for k in keys):
637
+ anchors.append(anchor)
638
+ favored = profile.get("favored_styles") or []
639
+ if isinstance(favored, str):
640
+ favored = split_csv(favored)
641
+ for item in favored:
642
+ if item and item not in anchors:
643
+ anchors.append(str(item))
644
+ if not anchors:
645
+ anchors.append("clear, reproducible visual direction with concrete materials and composition")
646
+ return anchors[:4]
647
+
648
+
649
+ def infer_negative(asset_type: str, texts: list[str], profile: dict) -> list[str]:
650
+ negative = ["avoid vague generic AI gloss", "avoid clutter"]
651
+ if texts:
652
+ negative.append("avoid garbled or wrong text")
653
+ if asset_type in {"poster", "ui", "infographic", "diagram", "logo"}:
654
+ negative.append("avoid fake logos and unreadable microtext")
655
+ if asset_type == "photography":
656
+ negative.extend(["avoid HDR over-processing", "avoid plastic skin"])
657
+ avoided = profile.get("avoided_elements") or []
658
+ if isinstance(avoided, str):
659
+ avoided = split_csv(avoided)
660
+ for item in avoided:
661
+ if item and item not in negative:
662
+ negative.append(str(item))
663
+ return negative[:8]
664
+
665
+
666
+ def infer_tags(asset_type: str, request: str, extra: str | None = None) -> list[str]:
667
+ tags = list(ASSET_ROUTES[asset_type]["tags"])
668
+ if has_cjk(request):
669
+ tags.append("zh")
670
+ for key, vals in {
671
+ "tea": ["茶", "茶饮", "冷泡"],
672
+ "brand": ["品牌", "logo", "标识"],
673
+ "promo": ["促销", "价格", "优惠", "新品"],
674
+ "academic": ["论文", "学术", "系统", "模型"],
675
+ }.items():
676
+ if any(v.lower() in request.lower() for v in vals) and key not in tags:
677
+ tags.append(key)
678
+ for item in split_csv(extra):
679
+ if item not in tags:
680
+ tags.append(item)
681
+ return tags
682
+
683
+
684
+ def infer_template_id(request: str, asset_type: str, override: str | None = None) -> str:
685
+ if override:
686
+ return override
687
+ lower = request.lower()
688
+ if asset_type == "diagram" and "rag" in lower:
689
+ return "diagram_rag"
690
+ candidates = {tid: meta for tid, meta in TEMPLATE_DEFS.items() if meta["asset_type"] == asset_type}
691
+ best_id = ""
692
+ best_score = -1
693
+ for tid, meta in candidates.items():
694
+ score = sum(1 for kw in meta["keywords"] if kw.lower() in lower)
695
+ if score > best_score:
696
+ best_id = tid
697
+ best_score = score
698
+ if best_score > 0 and best_id:
699
+ return best_id
700
+ defaults = {
701
+ "poster": "poster_zh_promo",
702
+ "ui": "ui_mobile_home",
703
+ "diagram": "diagram_system",
704
+ "product": "product_hero",
705
+ "illustration": "illustration_scene",
706
+ }
707
+ return defaults.get(asset_type, asset_type)
708
+
709
+
710
+ def infer_layout(template_id: str, asset_type: str) -> str:
711
+ if template_id in TEMPLATE_DEFS:
712
+ return str(TEMPLATE_DEFS[template_id]["layout"])
713
+ fallback = {
714
+ "photography": "single realistic capture with foreground, subject, and environmental context",
715
+ "character": "reference sheet grid with turnaround, expressions, details, and palette",
716
+ "logo": "brand board grid with mark, wordmark, palette, type sample, and applications",
717
+ "infographic": "title band, main diagram, summary modules, and legend",
718
+ }
719
+ return fallback.get(asset_type, "clear composition with named regions and stable visual hierarchy")
720
+
721
+
722
+ def infer_text_hierarchy(asset_type: str, texts: list[str], request: str) -> list[dict]:
723
+ if not texts:
724
+ return []
725
+ roles = []
726
+ for idx, text in enumerate(texts):
727
+ role = "label"
728
+ area = "content area"
729
+ priority = "medium"
730
+ if asset_type == "poster":
731
+ if idx == 0:
732
+ role, area, priority = "headline", "largest title zone", "high"
733
+ elif re.search(r"\d", text):
734
+ role, area, priority = "price_or_offer", "prominent price block", "high"
735
+ else:
736
+ role, area, priority = "supporting_copy", "secondary copy zone", "medium"
737
+ elif asset_type == "ui":
738
+ role, area, priority = ("app_name" if idx == 0 else "ui_label", "top header or relevant component", "high")
739
+ elif asset_type == "diagram":
740
+ role, area, priority = "component_label", "inside its corresponding node box", "high"
741
+ elif asset_type == "logo":
742
+ role, area, priority = ("wordmark" if idx == 0 else "brand_label", "brand board", "high")
743
+ roles.append({"text": text, "role": role, "area": area, "priority": priority})
744
+ return roles
745
+
746
+
747
+ def infer_must_include(asset_type: str, template_id: str, texts: list[str]) -> list[str]:
748
+ base = {
749
+ "poster": ["main visual subject", "readable title/offer hierarchy", "clear negative space"],
750
+ "ui": ["device screen frame", "navigation", "primary action", "realistic content cards"],
751
+ "infographic": ["title band", "main diagram", "callout labels", "legend"],
752
+ "diagram": ["labeled components", "directional arrows", "legend or flow semantics"],
753
+ "product": ["single hero product", "visible material texture", "controlled studio lighting"],
754
+ "photography": ["realistic subject", "specific scene details", "natural imperfections"],
755
+ "character": ["consistent character identity", "turnaround views", "expression close-ups"],
756
+ "illustration": ["main subject", "environment details", "controlled palette"],
757
+ "logo": ["original mark", "wordmark", "palette swatches", "application mockups"],
758
+ }.get(asset_type, ["main subject", "clear visual hierarchy"])
759
+ if template_id == "diagram_rag":
760
+ base = ["User node", "Retriever node", "Vector DB node", "LLM node", "Answer node", "left-to-right arrows"]
761
+ if texts:
762
+ base.append("reserved readable text zones")
763
+ return base
764
+
765
+
766
+ def infer_acceptance_criteria(spec: dict) -> list[str]:
767
+ criteria = [
768
+ f"Image uses {spec['aspect']} composition and matches {spec['asset_type']} intent.",
769
+ "Main subject is visible and matches the request.",
770
+ "Composition follows the named layout without incoherent overlap.",
771
+ "No fake logos, garbled filler text, or unrelated decorative clutter.",
772
+ ]
773
+ if spec.get("required_text"):
774
+ criteria.append("Every required text string appears exactly once, unchanged, and readable.")
775
+ if spec["asset_type"] in {"diagram", "ui", "infographic"}:
776
+ criteria.append("Labels are large enough to read and aligned to their components.")
777
+ if spec["asset_type"] == "product":
778
+ criteria.append("Product material and silhouette are clear, with no CGI-plastic tell.")
779
+ if spec.get("strict_text"):
780
+ criteria.append("Generated image reserves clean text areas; final text is applied from text_overlay_spec.")
781
+ return criteria
782
+
783
+
784
+ def overlay_align_hint(item: dict) -> str:
785
+ role = str(item.get("role") or "")
786
+ if role in {"price_or_offer", "supporting_copy"}:
787
+ return "center"
788
+ if role in {"component_label", "ui_label"}:
789
+ return "center"
790
+ return "center"
791
+
792
+
793
+ def overlay_box_hint(spec: dict, item: dict, idx: int, total: int) -> list[float]:
794
+ """返回归一化 box: x, y, w, h。用于 deterministic text overlay。"""
795
+ asset_type = str(spec.get("asset_type") or "")
796
+ role = str(item.get("role") or "")
797
+ area = str(item.get("area") or "")
798
+
799
+ if asset_type == "poster":
800
+ if role == "headline" or "largest title" in area:
801
+ return [0.08, 0.08, 0.84, 0.16]
802
+ if role == "price_or_offer" or "price" in area:
803
+ return [0.58, 0.72, 0.34, 0.12]
804
+ return [0.08, 0.72, 0.50, 0.10]
805
+ if asset_type == "ui":
806
+ return [0.12, 0.08 + idx * 0.10, 0.76, 0.08]
807
+ if asset_type == "diagram":
808
+ count = max(1, total)
809
+ width = min(0.24, 0.82 / count)
810
+ gap = (0.84 - width * count) / max(1, count - 1) if count > 1 else 0
811
+ return [0.08 + idx * (width + gap), 0.42, width, 0.11]
812
+ if asset_type == "infographic":
813
+ if idx == 0:
814
+ return [0.08, 0.06, 0.84, 0.12]
815
+ return [0.10, 0.22 + (idx - 1) * 0.14, 0.32, 0.09]
816
+ if asset_type == "logo":
817
+ return [0.12, 0.72, 0.76, 0.12]
818
+ if asset_type == "character":
819
+ return [0.06, 0.05, 0.28, 0.08]
820
+ if asset_type == "product":
821
+ return [0.10, 0.08, 0.80, 0.12]
822
+ return [0.10, 0.08 + idx * 0.10, 0.80, 0.10]
823
+
824
+
825
+ def build_text_overlay_spec(spec: dict) -> dict:
826
+ overlays = []
827
+ items = spec.get("text_hierarchy", [])
828
+ for idx, item in enumerate(items):
829
+ overlays.append(
830
+ {
831
+ "text": item["text"],
832
+ "role": item["role"],
833
+ "target_area": item["area"],
834
+ "priority": item["priority"],
835
+ "box": overlay_box_hint(spec, item, idx, len(items)),
836
+ "align": overlay_align_hint(item),
837
+ "font_weight": "semibold" if item["priority"] == "high" else "regular",
838
+ "style": "crisp typography, high contrast, no distortion",
839
+ }
840
+ )
841
+ return {
842
+ "mode": "postprocess_overlay",
843
+ "reason": "Strict text mode keeps exact copy out of the generative image pass to improve text reliability.",
844
+ "overlays": overlays,
845
+ "rules": [
846
+ "Render text after image generation with exact string matching.",
847
+ "Do not translate, rewrite, or stylize text into illegibility.",
848
+ "Keep text inside reserved clear areas with sufficient contrast.",
849
+ ],
850
+ }
851
+
852
+
853
+ def build_spec(args: argparse.Namespace) -> dict:
854
+ request = args.request_text
855
+ safe_request, safety_notes = sanitize_reference_risks(request)
856
+ profile, _ = read_profile()
857
+ asset_type = route_asset_type(safe_request, getattr(args, "asset_type", None))
858
+ texts = extract_required_texts(request, getattr(args, "text", None) or [])
859
+ aspect = infer_aspect(request, asset_type, getattr(args, "aspect", None), profile)
860
+ size = infer_size(aspect, getattr(args, "size", None), asset_type)
861
+ quality = infer_quality(request, asset_type, texts, getattr(args, "quality", None), profile)
862
+ subject = getattr(args, "subject", None) or safe_request
863
+ template_id = infer_template_id(safe_request, asset_type, getattr(args, "template", None))
864
+ negative = list(dict.fromkeys(infer_negative(asset_type, texts, profile) + safety_avoid_list(safety_notes)))
865
+ spec = {
866
+ "schema_version": SCHEMA_VERSION,
867
+ "compiler_version": COMPILER_VERSION,
868
+ "request": request,
869
+ "safe_request": safe_request,
870
+ "safety_rewrite": safety_notes,
871
+ "asset_type": asset_type,
872
+ "asset_title": ASSET_ROUTES[asset_type]["title"],
873
+ "template_id": template_id,
874
+ "template_label": TEMPLATE_DEFS.get(template_id, {}).get("label", ASSET_ROUTES[asset_type]["title"]),
875
+ "aspect": aspect,
876
+ "size": size,
877
+ "quality": quality,
878
+ "subject": subject,
879
+ "required_text": texts,
880
+ "strict_text": bool(getattr(args, "strict_text", False)),
881
+ "layout": getattr(args, "layout", None) or infer_layout(template_id, asset_type),
882
+ "text_hierarchy": infer_text_hierarchy(asset_type, texts, request),
883
+ "style_anchors": infer_style_anchors(safe_request, getattr(args, "style", None), profile),
884
+ "materials": split_csv(getattr(args, "materials", None)) or ["tactile, specific visible materials chosen for the subject"],
885
+ "lighting": getattr(args, "lighting", None) or "controlled, readable light with clear subject hierarchy",
886
+ "palette": split_csv(getattr(args, "palette", None)) or ["restrained palette matched to the asset type"],
887
+ "negative": negative,
888
+ "must_include": infer_must_include(asset_type, template_id, texts),
889
+ "must_avoid": negative,
890
+ "tags": infer_tags(asset_type, request, getattr(args, "tags", None)),
891
+ }
892
+ spec["acceptance_criteria"] = infer_acceptance_criteria(spec)
893
+ if spec["strict_text"]:
894
+ spec["text_overlay_spec"] = build_text_overlay_spec(spec)
895
+ return spec
896
+
897
+
898
+ def exact_text_block(texts: list[str]) -> str:
899
+ if not texts:
900
+ return "No required in-image text unless explicitly useful; avoid decorative fake text."
901
+ quoted = " / ".join(f'"{t}"' for t in texts)
902
+ return (
903
+ "The image must accurately display these exact strings, unchanged and large enough to read: "
904
+ f"{quoted}. Use crisp, legible typography; keep Chinese text exactly as written."
905
+ )
906
+
907
+
908
+ def reserved_text_block(spec: dict) -> str:
909
+ if not spec.get("required_text"):
910
+ return "No required in-image text unless explicitly useful; avoid decorative fake text."
911
+ roles = ", ".join(f"{item['role']} in {item['area']}" for item in spec.get("text_hierarchy", []))
912
+ return (
913
+ "Strict text mode: do not render the exact copy in the generated image. "
914
+ f"Reserve clean, high-contrast text areas for later overlay ({roles}). "
915
+ "Use subtle placeholder-free layout guides only; no fake characters."
916
+ )
917
+
918
+
919
+ def render_visual_prompt(spec: dict) -> str:
920
+ visual_spec = dict(spec)
921
+ visual_spec["required_text"] = []
922
+ prompt = render_prompt(visual_spec)
923
+ return prompt + "\n" + reserved_text_block(spec)
924
+
925
+
926
+ def render_prompt(spec: dict) -> str:
927
+ if spec.get("strict_text") and spec.get("required_text"):
928
+ return render_visual_prompt(spec)
929
+ asset_type = spec["asset_type"]
930
+ style = "; ".join(spec["style_anchors"])
931
+ materials = ", ".join(spec["materials"])
932
+ palette = ", ".join(spec["palette"])
933
+ negative = "; ".join(spec["negative"])
934
+ must_include = ", ".join(spec.get("must_include", []))
935
+ text_block = exact_text_block(spec["required_text"])
936
+ aspect = spec["aspect"]
937
+ subject = spec["subject"]
938
+ layout = spec.get("layout", "clear composition with named regions")
939
+
940
+ if asset_type == "poster":
941
+ return "\n".join(
942
+ [
943
+ f"Design a {aspect} vertical poster for: {subject}.",
944
+ f"Visual direction: {style}. Use a strong layout grid, clear hierarchy, and enough negative space for a readable commercial poster.",
945
+ f"Template: {spec.get('template_label', 'poster')}. Layout: {layout}.",
946
+ f"Main subject and scene density: make the core subject specific and visible, with 5-8 relevant supporting details from the brief.",
947
+ f"Must include: {must_include}.",
948
+ f"Materials: {materials}. Lighting: {spec['lighting']}. Palette: {palette}.",
949
+ text_block,
950
+ "Promotional information hierarchy must pass the three-glance test: silhouette first, key message second, texture/details third.",
951
+ f"Avoid: {negative}.",
952
+ ]
953
+ )
954
+ if asset_type == "ui":
955
+ return "\n".join(
956
+ [
957
+ f"Design a production-quality {aspect} UI mockup for: {subject}.",
958
+ f"Visual system: {style}. Use a coherent component system, precise spacing, realistic invented data, and crisp typography.",
959
+ f"Template: {spec.get('template_label', 'UI')}. Layout: {layout}.",
960
+ "Include clear navigation, primary content cards, relevant charts/lists/actions, and believable interaction states.",
961
+ f"Must include: {must_include}.",
962
+ f"Materials: {materials}. Lighting/rendering: {spec['lighting']}. Palette: {palette}.",
963
+ text_block,
964
+ f"Avoid: {negative}.",
965
+ ]
966
+ )
967
+ if asset_type == "infographic":
968
+ return "\n".join(
969
+ [
970
+ f"Create a {aspect} educational infographic about: {subject}.",
971
+ f"Editorial direction: {style}. Fixed regions: title band, primary diagram, 3 concise summary modules, and a bottom legend.",
972
+ f"Template: {spec.get('template_label', 'infographic')}. Layout: {layout}.",
973
+ "Use leader lines, numbered callouts, labeled parts, and calm classroom-wall clarity.",
974
+ f"Must include: {must_include}.",
975
+ f"Materials/linework: {materials}. Lighting: {spec['lighting']}. Palette: {palette}.",
976
+ text_block,
977
+ f"Avoid: {negative}.",
978
+ ]
979
+ )
980
+ if asset_type == "diagram":
981
+ return "\n".join(
982
+ [
983
+ f"Landscape {aspect} academic system-architecture figure for: {subject}.",
984
+ f"Style: {style}. White background, large readable labels, clean boxes, and precise alignment.",
985
+ f"Template: {spec.get('template_label', 'diagram')}. Layout: {layout}.",
986
+ "Show layers/components, directional arrows, data/control semantics, and a small legend if useful.",
987
+ f"Must include: {must_include}.",
988
+ f"Rendering: {materials}. Palette: {palette}.",
989
+ text_block,
990
+ f"Avoid: {negative}.",
991
+ ]
992
+ )
993
+ if asset_type == "product":
994
+ return "\n".join(
995
+ [
996
+ f"/* PRODUCT_RENDER_CONFIG VERSION: 1.0 ASPECT: {aspect} */",
997
+ "{",
998
+ f' "SUBJECT": "{subject}",',
999
+ f' "AESTHETIC": "{style}",',
1000
+ f' "MATERIALS": "{materials}",',
1001
+ f' "LIGHTING": "{spec["lighting"]}",',
1002
+ f' "PALETTE": "{palette}",',
1003
+ f' "LAYOUT": "{layout}",',
1004
+ f' "MUST_INCLUDE": "{must_include}",',
1005
+ ' "COMPOSITION": "single hero product shot, low three-quarter angle, sharp foreground, editorial finish"',
1006
+ "}",
1007
+ text_block,
1008
+ f"Avoid: {negative}.",
1009
+ ]
1010
+ )
1011
+ if asset_type == "photography":
1012
+ return "\n".join(
1013
+ [
1014
+ f"A candid documentary-style {aspect} photograph of: {subject}.",
1015
+ f"Capture language: {style}. Natural full-frame look, unprocessed realism, ordinary imperfect details.",
1016
+ f"Layout: {layout}.",
1017
+ "Scene density: include 8-12 concrete visible nouns from the brief; no staged studio posing.",
1018
+ f"Must include: {must_include}.",
1019
+ f"Light: {spec['lighting']}. Palette: {palette}.",
1020
+ text_block,
1021
+ f"Avoid: {negative}.",
1022
+ ]
1023
+ )
1024
+ if asset_type == "character":
1025
+ return "\n".join(
1026
+ [
1027
+ f"Create a {aspect} original character design reference sheet for: {subject}.",
1028
+ f"Art direction: {style}. Clean model-sheet clarity on a neutral background.",
1029
+ f"Layout: {layout}.",
1030
+ "Panels: front/side/back turnaround, 4 expression close-ups, one prop/detail breakout, and a small color swatch strip.",
1031
+ f"Must include: {must_include}.",
1032
+ f"Materials/linework: {materials}. Lighting: {spec['lighting']}. Palette: {palette}.",
1033
+ text_block,
1034
+ f"Avoid: {negative}; avoid resemblance to existing IP.",
1035
+ ]
1036
+ )
1037
+ if asset_type == "logo":
1038
+ return "\n".join(
1039
+ [
1040
+ f"Create a {aspect} brand identity presentation board for: {subject}.",
1041
+ f"Brand direction: {style}. Show an original geometric mark, wordmark, color palette, typography sample, and two small application mockups.",
1042
+ f"Layout: {layout}. Must include: {must_include}.",
1043
+ f"Materials: {materials}. Palette: {palette}.",
1044
+ text_block,
1045
+ f"Avoid: {negative}; no stock clip-art, no resemblance to real-world brands.",
1046
+ ]
1047
+ )
1048
+ return "\n".join(
1049
+ [
1050
+ f"Create a {aspect} stylized illustration for: {subject}.",
1051
+ f"Art direction: {style}. Use concrete subject details, controlled composition, and a clear visual hierarchy.",
1052
+ f"Template: {spec.get('template_label', 'illustration')}. Layout: {layout}. Must include: {must_include}.",
1053
+ f"Materials/texture: {materials}. Lighting: {spec['lighting']}. Palette: {palette}.",
1054
+ text_block,
1055
+ f"Avoid: {negative}.",
1056
+ ]
1057
+ )
1058
+
1059
+
1060
+ def lint_prompt(prompt: str, asset_type: str | None, quality: str | None, required_texts: list[str]) -> list[dict]:
1061
+ findings: list[dict] = []
1062
+
1063
+ def add(severity: str, rule: str, message: str) -> None:
1064
+ findings.append({"severity": severity, "rule": rule, "message": message})
1065
+
1066
+ compact = prompt.strip()
1067
+ lower = compact.lower()
1068
+ if len(compact) < 80:
1069
+ add("error", "prompt.too_short", "Prompt 太短,缺少可执行的视觉约束。")
1070
+ if not re.search(r"\b(3:4|4:3|16:9|9:16|1:1|portrait|landscape|square)\b", lower):
1071
+ add("error", "prompt.missing_aspect", "Prompt 缺少画幅/宽高比/制品类型开场。")
1072
+ if not re.search(r"\b(avoid|no |without|不要|避免)\b", lower):
1073
+ add("warning", "prompt.missing_negative", "Prompt 缺少针对常见失败模式的否定项。")
1074
+ for text in required_texts:
1075
+ quoted_patterns = [
1076
+ f'"{re.escape(text)}"',
1077
+ f"“{re.escape(text)}”",
1078
+ f"「{re.escape(text)}」",
1079
+ f"『{re.escape(text)}』",
1080
+ ]
1081
+ if not any(re.search(pat, compact) for pat in quoted_patterns):
1082
+ add("error", "text.not_quoted", f"必显文字未用引号包住:{text}")
1083
+ if (required_texts or asset_type in {"poster", "ui", "infographic", "diagram", "logo"}) and quality and quality != "high":
1084
+ add("error", "quality.not_high", "文字/海报/UI/图表类转化应使用 high quality。")
1085
+ if has_cjk(compact) and not re.search(r"(garbled|legible|readable|乱码|可读|清晰)", lower):
1086
+ add("warning", "text.no_legibility_guard", "含中文文字时建议加入 legible / no garbled characters 约束。")
1087
+ buzz_hits = [word for word in BUZZWORDS if word in lower]
1088
+ if len(buzz_hits) >= 3:
1089
+ add("warning", "prompt.too_many_buzzwords", "空泛形容词过多,建议换成具体材料、光照、构图。")
1090
+ if "logo" in lower and "no real" not in lower and "fake logo" not in lower and "真实品牌" not in compact:
1091
+ add("warning", "brand.logo_guard", "涉及 logo 时建议明确 no real brand logos / original mark。")
1092
+ return findings
1093
+
1094
+
1095
+ def print_lint(findings: list[dict]) -> None:
1096
+ if not findings:
1097
+ print("lint: pass")
1098
+ return
1099
+ for item in findings:
1100
+ print(f"{item['severity']}: {item['rule']} - {item['message']}")
1101
+
1102
+
1103
+ def sample_record(
1104
+ *,
1105
+ request: str,
1106
+ prompt: str,
1107
+ verdict: str,
1108
+ reason: str = "",
1109
+ image: str = "",
1110
+ size: str = "",
1111
+ quality: str = "",
1112
+ tags: list[str] | None = None,
1113
+ spec: dict | None = None,
1114
+ source: str = "convert",
1115
+ confidence: str = "medium",
1116
+ scope: str = "",
1117
+ ) -> dict:
1118
+ return {
1119
+ "schema_version": SCHEMA_VERSION,
1120
+ "compiler_version": COMPILER_VERSION,
1121
+ "id": new_id(),
1122
+ "ts": now_iso(),
1123
+ "request": request,
1124
+ "prompt": prompt,
1125
+ "verdict": verdict,
1126
+ "reason": reason,
1127
+ "image": image,
1128
+ "size": size,
1129
+ "quality": quality,
1130
+ "tags": tags or [],
1131
+ "spec": spec or {},
1132
+ "source": source,
1133
+ "confidence": confidence,
1134
+ "scope": scope,
1135
+ }
1136
+
1137
+
1138
+ def cmd_samples(args: argparse.Namespace) -> int:
1139
+ action = args.action
1140
+ if action == "add":
1141
+ rec = sample_record(
1142
+ request=args.request or "",
1143
+ prompt=args.prompt or "",
1144
+ verdict=args.verdict or "pending",
1145
+ reason=args.reason or "",
1146
+ image=args.image or "",
1147
+ size=args.size or "",
1148
+ quality=args.quality or "",
1149
+ tags=split_csv(args.tags),
1150
+ source=args.source or "manual",
1151
+ confidence=args.confidence or "medium",
1152
+ scope=args.scope or "",
1153
+ )
1154
+ append_jsonl(samples_path(), rec)
1155
+ print(rec["id"])
1156
+ return 0
1157
+ if action == "search":
1158
+ records = read_jsonl(samples_path())
1159
+ query = (args.query or "").lower()
1160
+ terms = [t for t in re.split(r"[\s,]+", query) if t]
1161
+
1162
+ def term_score(term: str, rec: dict) -> int:
1163
+ s = 0
1164
+ tags = [t.lower() for t in rec.get("tags", [])]
1165
+ if term in tags:
1166
+ s += 10 # tag 精确命中权重最高
1167
+ text = (rec.get("request", "") + " " + rec.get("prompt", "")).lower()
1168
+ if term.isascii() and term.isalpha() and len(term) <= 4:
1169
+ # 短英文词用词边界,避免 ui 误命中 build/gui 这类子串噪声
1170
+ s += 2 * len(re.findall(rf"\b{re.escape(term)}\b", text))
1171
+ else:
1172
+ s += text.count(term)
1173
+ return s
1174
+
1175
+ scored = []
1176
+ for rec in records:
1177
+ if args.verdict and rec.get("verdict") != args.verdict:
1178
+ continue
1179
+ score = sum(term_score(t, rec) for t in terms) if terms else 1
1180
+ if score > 0:
1181
+ # 采纳样本优先,其次按时间倒序
1182
+ bonus = 5 if rec.get("verdict") == "accept" else 0
1183
+ scored.append((score + bonus, rec.get("ts", ""), rec))
1184
+ scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
1185
+ hits = [r for _, _, r in scored][: args.limit]
1186
+ if args.json:
1187
+ print(json.dumps(hits, ensure_ascii=False, indent=2))
1188
+ else:
1189
+ if not hits:
1190
+ print("(无匹配样本)")
1191
+ for rec in hits:
1192
+ print(f"\n● [{rec.get('verdict')}] {rec.get('id')} tags={rec.get('tags')}")
1193
+ print(f" 需求: {rec.get('request')}")
1194
+ print(f" prompt: {rec.get('prompt')[:400]}")
1195
+ if rec.get("reason"):
1196
+ print(f" 反馈: {rec.get('reason')}")
1197
+ return 0
1198
+ if action == "list":
1199
+ records = read_jsonl(samples_path())[-args.limit :]
1200
+ print(json.dumps(records, ensure_ascii=False, indent=2))
1201
+ return 0
1202
+ print(f"未知 samples 动作:{action}", file=sys.stderr)
1203
+ return 2
1204
+
1205
+
1206
+ def cmd_feedback(args: argparse.Namespace) -> int:
1207
+ records = read_jsonl(samples_path())
1208
+ if not records:
1209
+ print("没有任何样本记录。", file=sys.stderr)
1210
+ return 1
1211
+ target = args.id
1212
+ if target in (None, "", "last"):
1213
+ idx = len(records) - 1
1214
+ else:
1215
+ idx = next((i for i, r in enumerate(records) if r.get("id") == target), None)
1216
+ if idx is None:
1217
+ print(f"找不到样本 id:{target}", file=sys.stderr)
1218
+ return 1
1219
+ records[idx]["verdict"] = args.verdict
1220
+ if args.reason:
1221
+ records[idx]["reason"] = args.reason
1222
+ if args.category:
1223
+ records[idx]["failure_category"] = args.category
1224
+ if args.image:
1225
+ records[idx]["image"] = args.image
1226
+ records[idx]["feedback_ts"] = now_iso()
1227
+ rewrite_jsonl(samples_path(), records)
1228
+ print(f"已记录反馈:{records[idx]['id']} -> {args.verdict}")
1229
+ print("提示:若这条揭示了稳定口味,记得 `profile note` 或 `profile set` 沉淀到偏好档案。")
1230
+ return 0
1231
+
1232
+
1233
+ # --------------------------------------------------------------------------- #
1234
+ # judge:CLI 只存储 agent 给出的评分,自己不看图、不评分、不调 Codex
1235
+ # --------------------------------------------------------------------------- #
1236
+ JUDGE_RUBRIC = {
1237
+ "text_accuracy": "图中文字是否与 prompt 要求逐字一致、无乱码(无文字给 10)",
1238
+ "composition": "构图/布局/层级是否清晰",
1239
+ "style_match": "是否贴合 prompt 指定的风格锚点",
1240
+ "artifacts": "反向分,10=无瑕疵,0=明显畸变/多指/糊",
1241
+ "verdict": "任一轴 <=5 则 revise,否则 pass",
1242
+ "failure_category": "verdict=revise 时建议填 text_error/layout_error/missing_subject/wrong_style/too_cluttered/bad_composition/brand_risk/low_readability",
1243
+ }
1244
+
1245
+
1246
+ def cmd_judge(args: argparse.Namespace) -> int:
1247
+ if args.action == "rubric":
1248
+ print(json.dumps(JUDGE_RUBRIC, ensure_ascii=False, indent=2))
1249
+ print(
1250
+ '\n评分由 agent(具备视觉)对照 prompt 给出后,用:\n'
1251
+ ' judge record --image <path> --prompt "<prompt>" '
1252
+ "--score '{\"text_accuracy\":9,\"composition\":8,\"style_match\":9,"
1253
+ '"artifacts":9,"verdict":"pass","notes":"..."}\''
1254
+ )
1255
+ return 0
1256
+ if args.action == "record":
1257
+ if not args.image or not args.prompt or not args.score:
1258
+ print("用法:judge record --image path --prompt \"...\" --score '<json>'", file=sys.stderr)
1259
+ return 2
1260
+ try:
1261
+ score = json.loads(args.score)
1262
+ except json.JSONDecodeError as exc:
1263
+ print(f"--score 不是合法 JSON:{exc}", file=sys.stderr)
1264
+ return 2
1265
+ if not isinstance(score, dict):
1266
+ print("--score 必须是 JSON 对象(含 verdict 等字段)", file=sys.stderr)
1267
+ return 2
1268
+ rec = {
1269
+ "ts": now_iso(),
1270
+ "image": str(Path(args.image).expanduser()),
1271
+ "prompt": args.prompt,
1272
+ "score": score,
1273
+ "sample_id": args.sample_id or "",
1274
+ }
1275
+ append_jsonl(judgements_path(), rec)
1276
+ print(f"已记录评分 verdict={score.get('verdict', '?')}")
1277
+ return 0
1278
+ if args.action == "list":
1279
+ recs = read_jsonl(judgements_path())[-args.limit :]
1280
+ print(json.dumps(recs, ensure_ascii=False, indent=2))
1281
+ return 0
1282
+ print(f"未知 judge 动作:{args.action}", file=sys.stderr)
1283
+ return 2
1284
+
1285
+
1286
+ # --------------------------------------------------------------------------- #
1287
+ # handoff:把 prompt 包装成可交给 Codex 的现成指令块(只打印,不执行)
1288
+ # --------------------------------------------------------------------------- #
1289
+ HANDOFF_HEADER = (
1290
+ "Use the imagegen skill with the built-in image_gen tool "
1291
+ "(no CLI fallback, no OPENAI_API_KEY). "
1292
+ "Use the gpt-image-2 model specifically if a model choice is exposed."
1293
+ )
1294
+
1295
+
1296
+ def handoff_text(prompt: str, out: str | None, size: str | None, quality: str | None, target: str) -> str:
1297
+ save_clause = f"Save to {out}." if out else "Save under ./codex-images/<UTC-timestamp>-<n>.png."
1298
+ spec = []
1299
+ if size:
1300
+ spec.append(f"at size {size}")
1301
+ if quality:
1302
+ spec.append(f"{quality} quality")
1303
+ spec_clause = (" " + " ".join(spec) + ".") if spec else ""
1304
+
1305
+ if target == "raw":
1306
+ return prompt
1307
+
1308
+ if target == "codex-image":
1309
+ # codex-image 插件把整段 $ARGUMENTS 原样透传给 imagegen,所以这里【不加外层
1310
+ # 引号】——prompt 内部用于精确文字的双引号(如 "山川茶事")才不会引号冲突损坏。
1311
+ line = " ".join(prompt.split())
1312
+ return f"/codex-image:generate {line} {save_clause}{spec_clause} Use the gpt-image-2 model."
1313
+
1314
+ if target == "codex-exec":
1315
+ instruction = (
1316
+ f"{HANDOFF_HEADER}\n{save_clause}{spec_clause}\n"
1317
+ "For each saved image print exactly one line: SAVED: <absolute path>\n\n"
1318
+ f"Image prompt:\n{prompt}"
1319
+ )
1320
+ # 用单引号包裹,转义内部单引号
1321
+ safe = instruction.replace("'", "'\\''")
1322
+ return f"codex exec --full-auto --skip-git-repo-check -- '{safe}'"
1323
+
1324
+ raise ValueError(f"未知 target:{target}")
1325
+
1326
+
1327
+ def cmd_handoff(args: argparse.Namespace) -> int:
1328
+ if not args.prompt:
1329
+ print("用法:handoff --request \"<原始需求>\" --prompt \"<prompt>\" [--out path] [--target codex-image|codex-exec|raw]", file=sys.stderr)
1330
+ return 2
1331
+ if args.record_pending and not args.request:
1332
+ print("--record-pending 需要同时提供 --request,避免样本缺少原始需求。", file=sys.stderr)
1333
+ return 2
1334
+
1335
+ if args.record_pending:
1336
+ spec = {}
1337
+ if args.spec:
1338
+ try:
1339
+ spec = json.loads(args.spec)
1340
+ except json.JSONDecodeError as exc:
1341
+ print(f"--spec 不是合法 JSON:{exc}", file=sys.stderr)
1342
+ return 2
1343
+ rec = sample_record(
1344
+ request=args.request or "",
1345
+ prompt=args.prompt,
1346
+ verdict="pending",
1347
+ image=args.out or "",
1348
+ size=args.size or "",
1349
+ quality=args.quality or "",
1350
+ tags=split_csv(args.tags),
1351
+ spec=spec,
1352
+ source="handoff",
1353
+ confidence="medium",
1354
+ scope=spec.get("asset_type", "") if isinstance(spec, dict) else "",
1355
+ )
1356
+ append_jsonl(samples_path(), rec)
1357
+ print(f"# sample_id: {rec['id']}")
1358
+
1359
+ if args.target == "codex-image":
1360
+ print("# 复制到 Claude Code 里执行(已装 codex-image 插件时):")
1361
+ elif args.target == "codex-exec":
1362
+ print("# 终端直接执行:")
1363
+ print(handoff_text(args.prompt, args.out, args.size, args.quality, args.target))
1364
+ return 0
1365
+
1366
+
1367
+ def cmd_lint(args: argparse.Namespace) -> int:
1368
+ prompt = args.prompt or ""
1369
+ findings = lint_prompt(prompt, args.asset_type, args.quality, args.text or [])
1370
+ if args.json:
1371
+ print(json.dumps({"findings": findings}, ensure_ascii=False, indent=2))
1372
+ else:
1373
+ print_lint(findings)
1374
+ return 1 if any(item["severity"] == "error" for item in findings) else 0
1375
+
1376
+
1377
+ def cmd_convert(args: argparse.Namespace) -> int:
1378
+ if isinstance(args.request_text, list):
1379
+ args.request_text = " ".join(args.request_text)
1380
+ if not args.request_text:
1381
+ print("用法:convert \"<自然语言画图需求>\"", file=sys.stderr)
1382
+ return 2
1383
+ spec = build_spec(args)
1384
+ prompt = render_prompt(spec)
1385
+ lint_texts = [] if spec.get("strict_text") else spec["required_text"]
1386
+ findings = lint_prompt(prompt, spec["asset_type"], spec["quality"], lint_texts)
1387
+
1388
+ if args.record_pending:
1389
+ rec = sample_record(
1390
+ request=spec["request"],
1391
+ prompt=prompt,
1392
+ verdict="pending",
1393
+ image=args.out or "",
1394
+ size=spec["size"],
1395
+ quality=spec["quality"],
1396
+ tags=spec["tags"],
1397
+ spec=spec,
1398
+ source="convert",
1399
+ confidence="medium",
1400
+ scope=spec["asset_type"],
1401
+ )
1402
+ append_jsonl(samples_path(), rec)
1403
+ spec["sample_id"] = rec["id"]
1404
+
1405
+ handoff = None if args.no_handoff else handoff_text(prompt, args.out, spec["size"], spec["quality"], args.target)
1406
+
1407
+ if args.json:
1408
+ print(
1409
+ json.dumps(
1410
+ {
1411
+ "spec": spec,
1412
+ "prompt": prompt,
1413
+ "text_overlay_spec": spec.get("text_overlay_spec"),
1414
+ "acceptance_criteria": spec.get("acceptance_criteria", []),
1415
+ "lint": findings,
1416
+ "handoff": handoff,
1417
+ },
1418
+ ensure_ascii=False,
1419
+ indent=2,
1420
+ )
1421
+ )
1422
+ return 1 if any(item["severity"] == "error" for item in findings) else 0
1423
+
1424
+ print("## Prompt")
1425
+ print(prompt)
1426
+ print()
1427
+ print(f"推荐:size={spec['size']} quality={spec['quality']} asset_type={spec['asset_type']}")
1428
+ print()
1429
+ print("## Lint")
1430
+ print_lint(findings)
1431
+ if handoff:
1432
+ print()
1433
+ print("## Handoff")
1434
+ print(handoff)
1435
+ if spec.get("text_overlay_spec"):
1436
+ print()
1437
+ print("## Text Overlay Spec")
1438
+ print(json.dumps(spec["text_overlay_spec"], ensure_ascii=False, indent=2))
1439
+ print()
1440
+ print("## Acceptance Criteria")
1441
+ for item in spec.get("acceptance_criteria", []):
1442
+ print(f"- {item}")
1443
+ if "sample_id" in spec:
1444
+ print()
1445
+ print(f"sample_id: {spec['sample_id']}")
1446
+ return 1 if any(item["severity"] == "error" for item in findings) else 0
1447
+
1448
+
1449
+ def prompt_digest(prompt: str) -> str:
1450
+ return hashlib.sha256(prompt.encode("utf-8")).hexdigest()[:12]
1451
+
1452
+
1453
+ def normalize_text_list(value: object) -> list[str]:
1454
+ if value is None:
1455
+ return []
1456
+ if isinstance(value, list):
1457
+ return [str(v) for v in value]
1458
+ return [str(value)]
1459
+
1460
+
1461
+ def namespace_from_case(case: dict) -> argparse.Namespace:
1462
+ request = case.get("request") or case.get("brief") or case.get("prompt_request")
1463
+ if not request:
1464
+ raise ValueError("case 缺少 request 字段")
1465
+ return argparse.Namespace(
1466
+ request_text=str(request),
1467
+ asset_type=case.get("asset_type"),
1468
+ aspect=case.get("aspect"),
1469
+ text=normalize_text_list(case.get("text") or case.get("required_text")),
1470
+ subject=case.get("subject"),
1471
+ style=case.get("style"),
1472
+ materials=case.get("materials"),
1473
+ lighting=case.get("lighting"),
1474
+ palette=case.get("palette"),
1475
+ size=case.get("size"),
1476
+ quality=case.get("quality"),
1477
+ out=case.get("out"),
1478
+ tags=case.get("tags"),
1479
+ target=case.get("target") or "codex-image",
1480
+ template=case.get("template") or case.get("template_id"),
1481
+ layout=case.get("layout"),
1482
+ strict_text=bool(case.get("strict_text", False)),
1483
+ record_pending=False,
1484
+ no_handoff=True,
1485
+ json=True,
1486
+ )
1487
+
1488
+
1489
+ def load_benchmark_cases(path: Path) -> list[dict]:
1490
+ text = path.read_text(encoding="utf-8").strip()
1491
+ if not text:
1492
+ return []
1493
+ if text.startswith("["):
1494
+ data = json.loads(text)
1495
+ if not isinstance(data, list):
1496
+ raise ValueError("JSON benchmark 文件必须是数组或 JSONL")
1497
+ return [dict(item) for item in data]
1498
+ if text.startswith("{"):
1499
+ try:
1500
+ data = json.loads(text)
1501
+ except json.JSONDecodeError:
1502
+ return [json.loads(line) for line in text.splitlines() if line.strip()]
1503
+ cases = data.get("cases") if isinstance(data, dict) else None
1504
+ if isinstance(cases, list):
1505
+ return [dict(item) for item in cases]
1506
+ return [dict(data)]
1507
+ return [json.loads(line) for line in text.splitlines() if line.strip()]
1508
+
1509
+
1510
+ def convert_for_benchmark(case: dict) -> dict:
1511
+ args = namespace_from_case(case)
1512
+ spec = build_spec(args)
1513
+ prompt = render_prompt(spec)
1514
+ lint_texts = [] if spec.get("strict_text") else spec["required_text"]
1515
+ findings = lint_prompt(prompt, spec["asset_type"], spec["quality"], lint_texts)
1516
+ return {
1517
+ "case_id": case.get("id") or case.get("name") or "",
1518
+ "spec": spec,
1519
+ "prompt": prompt,
1520
+ "prompt_digest": prompt_digest(prompt),
1521
+ "lint": findings,
1522
+ "acceptance_criteria": spec.get("acceptance_criteria", []),
1523
+ }
1524
+
1525
+
1526
+ def cmd_benchmark(args: argparse.Namespace) -> int:
1527
+ try:
1528
+ cases = load_benchmark_cases(Path(args.cases).expanduser())
1529
+ except (OSError, ValueError, json.JSONDecodeError) as exc:
1530
+ print(f"读取 benchmark cases 失败:{exc}", file=sys.stderr)
1531
+ return 2
1532
+ results = []
1533
+ total_errors = 0
1534
+ unstable = 0
1535
+ for idx, case in enumerate(cases, start=1):
1536
+ runs = []
1537
+ try:
1538
+ for _ in range(args.runs):
1539
+ runs.append(convert_for_benchmark(case))
1540
+ except ValueError as exc:
1541
+ result = {"case_index": idx, "case_id": case.get("id", ""), "error": str(exc), "runs": []}
1542
+ results.append(result)
1543
+ total_errors += 1
1544
+ continue
1545
+ digests = {run["prompt_digest"] for run in runs}
1546
+ lint_errors = [item for run in runs for item in run["lint"] if item["severity"] == "error"]
1547
+ if lint_errors:
1548
+ total_errors += len(lint_errors)
1549
+ if len(digests) > 1:
1550
+ unstable += 1
1551
+ results.append(
1552
+ {
1553
+ "case_index": idx,
1554
+ "case_id": runs[0]["case_id"] or f"case-{idx}",
1555
+ "asset_type": runs[0]["spec"]["asset_type"],
1556
+ "template_id": runs[0]["spec"]["template_id"],
1557
+ "strict_text": runs[0]["spec"].get("strict_text", False),
1558
+ "runs": args.runs,
1559
+ "stable": len(digests) == 1,
1560
+ "prompt_digest": runs[0]["prompt_digest"],
1561
+ "lint_errors": lint_errors,
1562
+ "lint_warnings": [item for run in runs for item in run["lint"] if item["severity"] == "warning"],
1563
+ "acceptance_criteria": runs[0]["acceptance_criteria"],
1564
+ }
1565
+ )
1566
+ summary = {
1567
+ "cases": len(cases),
1568
+ "runs_per_case": args.runs,
1569
+ "lint_error_count": total_errors,
1570
+ "unstable_case_count": unstable,
1571
+ "pass": total_errors == 0 and unstable == 0,
1572
+ }
1573
+ output = {"summary": summary, "results": results}
1574
+ if args.json:
1575
+ print(json.dumps(output, ensure_ascii=False, indent=2))
1576
+ else:
1577
+ print(f"benchmark: cases={summary['cases']} runs={args.runs} pass={summary['pass']}")
1578
+ for result in results:
1579
+ if result.get("error"):
1580
+ print(f"- {result['case_id'] or result['case_index']}: error {result['error']}")
1581
+ continue
1582
+ status = "PASS" if result["stable"] and not result["lint_errors"] else "FAIL"
1583
+ print(
1584
+ f"- {result['case_id']}: {status} asset={result['asset_type']} "
1585
+ f"template={result['template_id']} digest={result['prompt_digest']}"
1586
+ )
1587
+ for item in result["lint_errors"]:
1588
+ print(f" error: {item['rule']} - {item['message']}")
1589
+ return 0 if summary["pass"] else 1
1590
+
1591
+
1592
+ def find_sample(sample_id: str) -> tuple[list[dict], int] | tuple[None, None]:
1593
+ records = read_jsonl(samples_path())
1594
+ if not records:
1595
+ return None, None
1596
+ if sample_id in ("", "last"):
1597
+ return records, len(records) - 1
1598
+ idx = next((i for i, rec in enumerate(records) if rec.get("id") == sample_id), None)
1599
+ if idx is None:
1600
+ return None, None
1601
+ return records, idx
1602
+
1603
+
1604
+ def revise_from_spec(spec: dict, category: str) -> tuple[dict, str, list[str]]:
1605
+ playbook = FAILURE_PLAYBOOK[category]
1606
+ revised = dict(spec)
1607
+ revised["compiler_version"] = COMPILER_VERSION
1608
+ revised["revision_reason"] = category
1609
+ revised["revision_label"] = playbook["label"]
1610
+ revised.setdefault("negative", [])
1611
+ for item in playbook["add_negative"]:
1612
+ if item not in revised["negative"]:
1613
+ revised["negative"].append(item)
1614
+ revised["must_avoid"] = list(dict.fromkeys((revised.get("must_avoid") or []) + playbook["add_negative"]))
1615
+ criteria = list(revised.get("acceptance_criteria") or [])
1616
+ criteria.append(f"Revision addresses {category}: {playbook['label']}.")
1617
+ revised["acceptance_criteria"] = criteria
1618
+ if category == "text_error" and revised.get("required_text"):
1619
+ revised["strict_text"] = True
1620
+ revised["text_overlay_spec"] = build_text_overlay_spec(revised)
1621
+ if category in {"layout_error", "bad_composition", "too_cluttered"}:
1622
+ revised["layout"] = (
1623
+ str(revised.get("layout") or "clear layout")
1624
+ + "; stronger region boundaries, fixed reading order, no overlapping elements"
1625
+ )
1626
+ if category == "missing_subject":
1627
+ revised["subject"] = f"{revised.get('subject', '').strip()} — main subject must dominate the frame"
1628
+ prompt = render_prompt(revised)
1629
+ return revised, prompt, [playbook["revision"]]
1630
+
1631
+
1632
+ def revise_plain_prompt(prompt: str, category: str) -> tuple[str, list[str]]:
1633
+ playbook = FAILURE_PLAYBOOK[category]
1634
+ additions = [
1635
+ "",
1636
+ f"Revision focus: {playbook['revision']}",
1637
+ "Additional avoid constraints: " + "; ".join(playbook["add_negative"]) + ".",
1638
+ ]
1639
+ return prompt.rstrip() + "\n".join(additions), [playbook["revision"]]
1640
+
1641
+
1642
+ def cmd_revise(args: argparse.Namespace) -> int:
1643
+ if args.list_reasons:
1644
+ print(json.dumps(FAILURE_PLAYBOOK, ensure_ascii=False, indent=2))
1645
+ return 0
1646
+ category = args.reason
1647
+ if category not in FAILURE_PLAYBOOK:
1648
+ print(f"未知失败分类:{category}", file=sys.stderr)
1649
+ return 2
1650
+ sample = None
1651
+ if args.sample_id:
1652
+ records, idx = find_sample(args.sample_id)
1653
+ if records is None or idx is None:
1654
+ print(f"找不到样本:{args.sample_id}", file=sys.stderr)
1655
+ return 1
1656
+ sample = records[idx]
1657
+ if sample:
1658
+ spec = sample.get("spec") or {}
1659
+ base_prompt = sample.get("prompt", "")
1660
+ request = sample.get("request", "")
1661
+ else:
1662
+ spec = json.loads(args.spec) if args.spec else {}
1663
+ base_prompt = args.prompt or ""
1664
+ request = args.request or ""
1665
+ if spec:
1666
+ revised_spec, revised_prompt, changes = revise_from_spec(spec, category)
1667
+ else:
1668
+ if not base_prompt:
1669
+ print("没有可修订的 spec 或 prompt。请提供 --sample-id、--spec 或 --prompt。", file=sys.stderr)
1670
+ return 2
1671
+ revised_spec = {}
1672
+ revised_prompt, changes = revise_plain_prompt(base_prompt, category)
1673
+ findings = lint_prompt(
1674
+ revised_prompt,
1675
+ revised_spec.get("asset_type") if revised_spec else None,
1676
+ revised_spec.get("quality") if revised_spec else None,
1677
+ [] if revised_spec.get("strict_text") else revised_spec.get("required_text", []),
1678
+ )
1679
+ new_sample_id = ""
1680
+ if args.record_pending:
1681
+ rec = sample_record(
1682
+ request=request,
1683
+ prompt=revised_prompt,
1684
+ verdict="pending",
1685
+ reason=f"revised from {category}",
1686
+ image=args.out or "",
1687
+ size=revised_spec.get("size", ""),
1688
+ quality=revised_spec.get("quality", ""),
1689
+ tags=revised_spec.get("tags", []),
1690
+ spec=revised_spec,
1691
+ source="revise",
1692
+ confidence="medium",
1693
+ scope=revised_spec.get("asset_type", ""),
1694
+ )
1695
+ append_jsonl(samples_path(), rec)
1696
+ new_sample_id = rec["id"]
1697
+ result = {
1698
+ "reason": category,
1699
+ "reason_label": FAILURE_PLAYBOOK[category]["label"],
1700
+ "changes": changes,
1701
+ "spec": revised_spec,
1702
+ "prompt": revised_prompt,
1703
+ "text_overlay_spec": revised_spec.get("text_overlay_spec") if revised_spec else None,
1704
+ "lint": findings,
1705
+ "sample_id": new_sample_id,
1706
+ }
1707
+ if args.json:
1708
+ print(json.dumps(result, ensure_ascii=False, indent=2))
1709
+ else:
1710
+ print(f"reason: {category} ({FAILURE_PLAYBOOK[category]['label']})")
1711
+ print("## Revised Prompt")
1712
+ print(revised_prompt)
1713
+ if result["text_overlay_spec"]:
1714
+ print("\n## Text Overlay Spec")
1715
+ print(json.dumps(result["text_overlay_spec"], ensure_ascii=False, indent=2))
1716
+ print("\n## Lint")
1717
+ print_lint(findings)
1718
+ if new_sample_id:
1719
+ print(f"\nsample_id: {new_sample_id}")
1720
+ return 1 if any(item["severity"] == "error" for item in findings) else 0
1721
+
1722
+
1723
+ # --------------------------------------------------------------------------- #
1724
+ # overlay / visual gates:把“高质量”变成可执行后处理与回归门
1725
+ # --------------------------------------------------------------------------- #
1726
+ def load_json_value(value: str | None) -> dict:
1727
+ if not value:
1728
+ return {}
1729
+ raw = value.strip()
1730
+ if raw.startswith("@"):
1731
+ raw = Path(raw[1:]).expanduser().read_text(encoding="utf-8")
1732
+ elif raw.startswith("{"):
1733
+ pass
1734
+ else:
1735
+ path = Path(raw).expanduser()
1736
+ if path.exists():
1737
+ raw = path.read_text(encoding="utf-8")
1738
+ data = json.loads(raw)
1739
+ if not isinstance(data, dict):
1740
+ raise ValueError("JSON 必须是对象")
1741
+ return data
1742
+
1743
+
1744
+ def extract_spec_payload(data: dict) -> dict:
1745
+ if "spec" in data and isinstance(data["spec"], dict):
1746
+ return data["spec"]
1747
+ if "compiled" in data and isinstance(data["compiled"], dict):
1748
+ compiled = data["compiled"]
1749
+ if isinstance(compiled.get("spec"), dict):
1750
+ return compiled["spec"]
1751
+ if "reference_sheet" in data and isinstance(data["reference_sheet"], dict):
1752
+ sheet = data["reference_sheet"]
1753
+ if isinstance(sheet.get("spec"), dict):
1754
+ return sheet["spec"]
1755
+ return data
1756
+
1757
+
1758
+ def default_overlay_out(image_path: Path) -> Path:
1759
+ return image_path.with_name(f"{image_path.stem}.final{image_path.suffix or '.png'}")
1760
+
1761
+
1762
+ def font_candidates() -> list[str]:
1763
+ return [
1764
+ "/System/Library/Fonts/PingFang.ttc",
1765
+ "/System/Library/Fonts/Hiragino Sans GB.ttc",
1766
+ "/System/Library/Fonts/Supplemental/Songti.ttc",
1767
+ "/System/Library/Fonts/Supplemental/Arial Unicode.ttf",
1768
+ "/Library/Fonts/Arial Unicode.ttf",
1769
+ "/System/Library/Fonts/Helvetica.ttc",
1770
+ ]
1771
+
1772
+
1773
+ def choose_font_path(explicit: str | None = None) -> str | None:
1774
+ if explicit:
1775
+ path = Path(explicit).expanduser()
1776
+ return str(path) if path.exists() else explicit
1777
+ for item in font_candidates():
1778
+ if Path(item).exists():
1779
+ return item
1780
+ return None
1781
+
1782
+
1783
+ def load_pil_font(size: int, font_path: str | None):
1784
+ from PIL import ImageFont
1785
+
1786
+ if font_path:
1787
+ try:
1788
+ return ImageFont.truetype(font_path, size=size)
1789
+ except OSError:
1790
+ pass
1791
+ try:
1792
+ return ImageFont.truetype("Arial.ttf", size=size)
1793
+ except OSError:
1794
+ return ImageFont.load_default()
1795
+
1796
+
1797
+ def text_bbox(draw, text: str, font) -> tuple[int, int]:
1798
+ bbox = draw.multiline_textbbox((0, 0), text, font=font, spacing=max(2, int(getattr(font, "size", 12) * 0.14)))
1799
+ return max(1, bbox[2] - bbox[0]), max(1, bbox[3] - bbox[1])
1800
+
1801
+
1802
+ def wrap_text(draw, text: str, font, max_width: int) -> str:
1803
+ if not text:
1804
+ return ""
1805
+ if text_bbox(draw, text, font)[0] <= max_width:
1806
+ return text
1807
+ lines: list[str] = []
1808
+ current = ""
1809
+ tokens = list(text) if has_cjk(text) else re.split(r"(\s+)", text)
1810
+ for token in tokens:
1811
+ candidate = current + token
1812
+ if current and text_bbox(draw, candidate, font)[0] > max_width:
1813
+ lines.append(current.rstrip())
1814
+ current = token.lstrip()
1815
+ else:
1816
+ current = candidate
1817
+ if current.strip():
1818
+ lines.append(current.strip())
1819
+ return "\n".join(lines)
1820
+
1821
+
1822
+ def fit_overlay_text(draw, text: str, box: tuple[int, int, int, int], font_path: str | None):
1823
+ max_width = max(8, box[2] - box[0])
1824
+ max_height = max(8, box[3] - box[1])
1825
+ lo, hi = 8, max(12, int(max_height * 0.72))
1826
+ best_font = load_pil_font(lo, font_path)
1827
+ best_text = text
1828
+ while lo <= hi:
1829
+ mid = (lo + hi) // 2
1830
+ font = load_pil_font(mid, font_path)
1831
+ wrapped = wrap_text(draw, text, font, max_width)
1832
+ w, h = text_bbox(draw, wrapped, font)
1833
+ if w <= max_width and h <= max_height:
1834
+ best_font, best_text = font, wrapped
1835
+ lo = mid + 1
1836
+ else:
1837
+ hi = mid - 1
1838
+ return best_font, best_text
1839
+
1840
+
1841
+ def normalized_box_to_pixels(box: list[float], width: int, height: int) -> tuple[int, int, int, int]:
1842
+ x, y, w, h = box
1843
+ return (
1844
+ int(max(0, min(1, x)) * width),
1845
+ int(max(0, min(1, y)) * height),
1846
+ int(max(0.01, min(1, x + w)) * width),
1847
+ int(max(0.01, min(1, y + h)) * height),
1848
+ )
1849
+
1850
+
1851
+ def overlay_spec_from_texts(texts: list[str], spec: dict | None = None) -> dict:
1852
+ base = spec or {"asset_type": "poster", "aspect": "3:4"}
1853
+ text_hierarchy = []
1854
+ for idx, text in enumerate(texts):
1855
+ role = "headline" if idx == 0 else "supporting_copy"
1856
+ text_hierarchy.append({"text": text, "role": role, "area": "largest title zone", "priority": "high"})
1857
+ enriched = dict(base)
1858
+ enriched["text_hierarchy"] = text_hierarchy
1859
+ return build_text_overlay_spec(enriched)
1860
+
1861
+
1862
+ def chart_overlay_to_overlays(spec: dict) -> list[dict]:
1863
+ title = spec.get("title") or spec.get("chart_data_spec", {}).get("title") or ""
1864
+ overlays = []
1865
+ if title:
1866
+ overlays.append(
1867
+ {
1868
+ "text": str(title),
1869
+ "role": "headline",
1870
+ "target_area": "chart title",
1871
+ "box": [0.08, 0.04, 0.84, 0.10],
1872
+ "align": "center",
1873
+ "style": "crisp chart title",
1874
+ }
1875
+ )
1876
+ return overlays
1877
+
1878
+
1879
+ def extract_overlay_items(spec: dict, explicit_texts: list[str] | None = None) -> list[dict]:
1880
+ if explicit_texts:
1881
+ return overlay_spec_from_texts(explicit_texts, spec).get("overlays", [])
1882
+ overlay_spec = spec.get("text_overlay_spec") if isinstance(spec, dict) else None
1883
+ if isinstance(overlay_spec, dict):
1884
+ if overlay_spec.get("mode") == "chart_text_overlay":
1885
+ return chart_overlay_to_overlays(overlay_spec)
1886
+ overlays = overlay_spec.get("overlays")
1887
+ if isinstance(overlays, list):
1888
+ return [item for item in overlays if isinstance(item, dict)]
1889
+ if spec.get("mode") == "chart_text_overlay":
1890
+ return chart_overlay_to_overlays(spec)
1891
+ if isinstance(spec.get("overlays"), list):
1892
+ return [item for item in spec["overlays"] if isinstance(item, dict)]
1893
+ texts = normalize_text_list(spec.get("required_text"))
1894
+ if texts:
1895
+ return overlay_spec_from_texts(texts, spec).get("overlays", [])
1896
+ return []
1897
+
1898
+
1899
+ def draw_text_overlays(image_path: Path, out_path: Path, spec: dict, texts: list[str] | None, font_path: str | None) -> dict:
1900
+ from PIL import Image, ImageDraw
1901
+
1902
+ image = Image.open(image_path).convert("RGBA")
1903
+ width, height = image.size
1904
+ layer = Image.new("RGBA", image.size, (0, 0, 0, 0))
1905
+ draw = ImageDraw.Draw(layer)
1906
+ overlays = extract_overlay_items(spec, texts)
1907
+ font_file = choose_font_path(font_path)
1908
+ rendered = []
1909
+
1910
+ for idx, item in enumerate(overlays):
1911
+ text = str(item.get("text") or "").strip()
1912
+ if not text:
1913
+ continue
1914
+ role = str(item.get("role") or "")
1915
+ box = item.get("box")
1916
+ if not isinstance(box, list) or len(box) != 4:
1917
+ box = overlay_box_hint(spec, item, idx, len(overlays))
1918
+ px = normalized_box_to_pixels([float(v) for v in box], width, height)
1919
+ pad = max(8, int(min(width, height) * 0.012))
1920
+ inner = (px[0] + pad, px[1] + pad, px[2] - pad, px[3] - pad)
1921
+ font, wrapped = fit_overlay_text(draw, text, inner, font_file)
1922
+ tw, th = text_bbox(draw, wrapped, font)
1923
+ align = str(item.get("align") or "center")
1924
+ if align == "left":
1925
+ tx = inner[0]
1926
+ elif align == "right":
1927
+ tx = inner[2] - tw
1928
+ else:
1929
+ tx = inner[0] + max(0, (inner[2] - inner[0] - tw) // 2)
1930
+ ty = inner[1] + max(0, (inner[3] - inner[1] - th) // 2)
1931
+
1932
+ if role == "price_or_offer":
1933
+ fill = (31, 84, 55, 232)
1934
+ outline = (255, 255, 255, 90)
1935
+ text_fill = (255, 255, 245, 255)
1936
+ else:
1937
+ fill = (255, 255, 255, 218)
1938
+ outline = (31, 84, 55, 65)
1939
+ text_fill = (19, 35, 27, 255)
1940
+ radius = max(6, int(min(px[2] - px[0], px[3] - px[1]) * 0.12))
1941
+ draw.rounded_rectangle(px, radius=radius, fill=fill, outline=outline, width=max(1, int(pad * 0.14)))
1942
+ draw.multiline_text((tx, ty), wrapped, font=font, fill=text_fill, align=align, spacing=max(2, int(getattr(font, "size", 12) * 0.14)))
1943
+ rendered.append({"text": text, "box": box, "font_size": getattr(font, "size", 0), "role": role})
1944
+
1945
+ final = Image.alpha_composite(image, layer).convert("RGB")
1946
+ out_path.parent.mkdir(parents=True, exist_ok=True)
1947
+ final.save(out_path)
1948
+ return {"image": str(image_path), "out": str(out_path), "overlay_count": len(rendered), "rendered": rendered}
1949
+
1950
+
1951
+ def cmd_overlay(args: argparse.Namespace) -> int:
1952
+ try:
1953
+ data = load_json_value(args.spec) if args.spec else {}
1954
+ spec = extract_spec_payload(data) if data else {}
1955
+ image_path = Path(args.image).expanduser()
1956
+ out_path = Path(args.out).expanduser() if args.out else default_overlay_out(image_path)
1957
+ report = draw_text_overlays(image_path, out_path, spec, args.text, args.font)
1958
+ except Exception as exc:
1959
+ print(f"overlay 失败:{exc}", file=sys.stderr)
1960
+ return 2
1961
+ if args.json:
1962
+ print(json.dumps(report, ensure_ascii=False, indent=2))
1963
+ else:
1964
+ print(f"overlay: rendered={report['overlay_count']} out={report['out']}")
1965
+ for item in report["rendered"]:
1966
+ print(f"- {item['text']} font={item['font_size']} box={item['box']}")
1967
+ return 0 if report["overlay_count"] > 0 else 1
1968
+
1969
+
1970
+ def parse_aspect_ratio(value: str | None) -> float | None:
1971
+ if not value:
1972
+ return None
1973
+ match = re.match(r"^\s*(\d+(?:\.\d+)?)\s*[:x×]\s*(\d+(?:\.\d+)?)\s*$", value)
1974
+ if not match:
1975
+ return None
1976
+ w, h = float(match.group(1)), float(match.group(2))
1977
+ return w / h if h else None
1978
+
1979
+
1980
+ def image_quality_metrics(image_path: Path, expected_aspect: str | None = None) -> dict:
1981
+ from PIL import Image, ImageFilter, ImageStat
1982
+
1983
+ image = Image.open(image_path).convert("RGB")
1984
+ width, height = image.size
1985
+ gray = image.convert("L")
1986
+ stat = ImageStat.Stat(gray)
1987
+ brightness = stat.mean[0] / 255.0
1988
+ contrast = stat.stddev[0] / 255.0
1989
+ edge = gray.filter(ImageFilter.FIND_EDGES)
1990
+ edge_density = ImageStat.Stat(edge).mean[0] / 255.0
1991
+ aspect = width / height if height else 0
1992
+ expected = parse_aspect_ratio(expected_aspect)
1993
+ aspect_error = abs(aspect - expected) / expected if expected else 0.0
1994
+ return {
1995
+ "width": width,
1996
+ "height": height,
1997
+ "aspect": round(aspect, 4),
1998
+ "expected_aspect": expected_aspect or "",
1999
+ "aspect_error": round(aspect_error, 4),
2000
+ "brightness": round(brightness, 4),
2001
+ "contrast": round(contrast, 4),
2002
+ "edge_density": round(edge_density, 4),
2003
+ }
2004
+
2005
+
2006
+ def image_quality_findings(metrics: dict, strict: bool = False) -> list[dict]:
2007
+ findings = []
2008
+
2009
+ def add(severity: str, rule: str, message: str) -> None:
2010
+ findings.append({"severity": severity, "rule": rule, "message": message})
2011
+
2012
+ if metrics["width"] < 512 or metrics["height"] < 512:
2013
+ add("error", "image.too_small", "成品图尺寸低于 512px,不适合作为高质量输出。")
2014
+ if metrics["contrast"] < 0.045:
2015
+ add("error", "image.low_contrast", "图像对比度过低,疑似空白、过曝或不可读。")
2016
+ if metrics["brightness"] < 0.04 or metrics["brightness"] > 0.96:
2017
+ add("error" if strict else "warning", "image.bad_brightness", "图像整体亮度异常。")
2018
+ if metrics["edge_density"] < 0.008:
2019
+ add("warning", "image.low_detail", "图像边缘细节偏少,可能过空或糊。")
2020
+ if metrics.get("expected_aspect") and metrics["aspect_error"] > 0.18:
2021
+ add("error", "image.aspect_mismatch", "成品图画幅与 spec 期望偏差过大。")
2022
+ return findings
2023
+
2024
+
2025
+ def cmd_visual_check(args: argparse.Namespace) -> int:
2026
+ try:
2027
+ spec = extract_spec_payload(load_json_value(args.spec)) if args.spec else {}
2028
+ expected = args.aspect or spec.get("aspect")
2029
+ metrics = image_quality_metrics(Path(args.image).expanduser(), expected)
2030
+ except Exception as exc:
2031
+ print(f"visual-check 失败:{exc}", file=sys.stderr)
2032
+ return 2
2033
+ findings = image_quality_findings(metrics, args.strict)
2034
+ result = {"image": args.image, "metrics": metrics, "findings": findings, "pass": not any(f["severity"] == "error" for f in findings)}
2035
+ if args.json:
2036
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2037
+ else:
2038
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2039
+ return 0 if result["pass"] else 1
2040
+
2041
+
2042
+ def center_crop(image, ratio: float = 0.36):
2043
+ width, height = image.size
2044
+ crop_w, crop_h = int(width * ratio), int(height * ratio)
2045
+ left = (width - crop_w) // 2
2046
+ top = (height - crop_h) // 2
2047
+ return image.crop((left, top, left + crop_w, top + crop_h))
2048
+
2049
+
2050
+ def histogram_similarity(a, b) -> float:
2051
+ a_hist = a.convert("RGB").histogram()
2052
+ b_hist = b.convert("RGB").histogram()
2053
+ total = max(1, sum(a_hist))
2054
+ return sum(min(x, y) for x, y in zip(a_hist, b_hist)) / total
2055
+
2056
+
2057
+ def mean_image_difference(a, b) -> float:
2058
+ from PIL import ImageChops, ImageStat
2059
+
2060
+ if a.size != b.size:
2061
+ b = b.resize(a.size)
2062
+ diff = ImageChops.difference(a.convert("RGB"), b.convert("RGB"))
2063
+ stat = ImageStat.Stat(diff)
2064
+ return sum(stat.mean) / (3 * 255.0)
2065
+
2066
+
2067
+ def edit_validation_report(reference: Path, output: Path, threshold: float, min_change: float) -> dict:
2068
+ from PIL import Image
2069
+
2070
+ ref = Image.open(reference).convert("RGB")
2071
+ out = Image.open(output).convert("RGB")
2072
+ if out.size != ref.size:
2073
+ out = out.resize(ref.size)
2074
+ center_similarity = histogram_similarity(center_crop(ref), center_crop(out))
2075
+ full_change = mean_image_difference(ref, out)
2076
+ findings = []
2077
+ if center_similarity < threshold:
2078
+ findings.append(
2079
+ {
2080
+ "severity": "error",
2081
+ "rule": "edit.subject_drift",
2082
+ "message": "中心主体相似度低,参考图主体可能没有被稳定保留。",
2083
+ }
2084
+ )
2085
+ if full_change < min_change:
2086
+ findings.append(
2087
+ {
2088
+ "severity": "error",
2089
+ "rule": "edit.no_effective_change",
2090
+ "message": "输出图与参考图差异过小,改图目标可能没有生效。",
2091
+ }
2092
+ )
2093
+ return {
2094
+ "reference": str(reference),
2095
+ "output": str(output),
2096
+ "center_subject_similarity": round(center_similarity, 4),
2097
+ "full_image_change": round(full_change, 4),
2098
+ "threshold": threshold,
2099
+ "min_change": min_change,
2100
+ "findings": findings,
2101
+ "pass": not any(item["severity"] == "error" for item in findings),
2102
+ }
2103
+
2104
+
2105
+ def cmd_edit_check(args: argparse.Namespace) -> int:
2106
+ try:
2107
+ report = edit_validation_report(
2108
+ Path(args.reference).expanduser(),
2109
+ Path(args.output).expanduser(),
2110
+ args.threshold,
2111
+ args.min_change,
2112
+ )
2113
+ except Exception as exc:
2114
+ print(f"edit-check 失败:{exc}", file=sys.stderr)
2115
+ return 2
2116
+ if args.json:
2117
+ print(json.dumps(report, ensure_ascii=False, indent=2))
2118
+ else:
2119
+ print(json.dumps(report, ensure_ascii=False, indent=2))
2120
+ return 0 if report["pass"] else 1
2121
+
2122
+
2123
+ def visual_case_compile(case: dict) -> dict:
2124
+ tool = str(case.get("tool") or case.get("cmd") or "convert")
2125
+ if tool in {"convert", "rewrite", "adapt", "series", "compose", "brand", "character", "data-viz"}:
2126
+ compiled = compile_visual_case(case, target=case.get("target") or "codex-image")
2127
+ return {**compiled, "tool": tool}
2128
+ raise ValueError(f"visual-regress 暂不支持 tool={tool}")
2129
+
2130
+
2131
+ def cmd_visual_regress(args: argparse.Namespace) -> int:
2132
+ try:
2133
+ cases = load_benchmark_cases(Path(args.cases).expanduser())
2134
+ except (OSError, ValueError, json.JSONDecodeError) as exc:
2135
+ print(f"读取 visual cases 失败:{exc}", file=sys.stderr)
2136
+ return 2
2137
+ results = []
2138
+ lint_errors = 0
2139
+ visual_errors = 0
2140
+ missing_images = 0
2141
+ for idx, case in enumerate(cases, start=1):
2142
+ case_id = case.get("id") or f"visual-{idx}"
2143
+ try:
2144
+ if case.get("reference") and case.get("output") and not (case.get("request") or case.get("brief")):
2145
+ edit_report = edit_validation_report(
2146
+ Path(str(case["reference"])).expanduser(),
2147
+ Path(str(case["output"])).expanduser(),
2148
+ args.edit_threshold,
2149
+ args.edit_min_change,
2150
+ )
2151
+ visual_errors += sum(1 for f in edit_report["findings"] if f.get("severity") == "error")
2152
+ results.append(
2153
+ {
2154
+ "id": case_id,
2155
+ "scenario": case.get("scenario") or "edit-check",
2156
+ "status": "edit_checked",
2157
+ "edit_check": edit_report,
2158
+ }
2159
+ )
2160
+ continue
2161
+ compiled = visual_case_compile(case)
2162
+ lint = compiled["lint"]
2163
+ lint_errors += sum(1 for item in lint if item.get("severity") == "error")
2164
+ item = {
2165
+ "id": case_id,
2166
+ "scenario": case.get("scenario") or case.get("tool") or "convert",
2167
+ "prompt_digest": compiled["prompt_digest"],
2168
+ "asset_type": compiled["spec"]["asset_type"],
2169
+ "aspect": compiled["spec"]["aspect"],
2170
+ "lint": lint,
2171
+ "status": "compiled",
2172
+ }
2173
+ image = case.get("image") or case.get("output")
2174
+ if image:
2175
+ metrics = image_quality_metrics(Path(str(image)).expanduser(), compiled["spec"].get("aspect"))
2176
+ findings = image_quality_findings(metrics, args.strict)
2177
+ visual_errors += sum(1 for f in findings if f.get("severity") == "error")
2178
+ item.update({"status": "checked", "metrics": metrics, "visual_findings": findings})
2179
+ elif args.require_images:
2180
+ missing_images += 1
2181
+ item.update({"status": "missing_image", "visual_findings": [{"severity": "error", "rule": "visual.image_missing", "message": "case 缺少 image/output,无法做真实视觉回归。"}]})
2182
+ if case.get("reference") and case.get("output"):
2183
+ edit_report = edit_validation_report(
2184
+ Path(str(case["reference"])).expanduser(),
2185
+ Path(str(case["output"])).expanduser(),
2186
+ args.edit_threshold,
2187
+ args.edit_min_change,
2188
+ )
2189
+ visual_errors += sum(1 for f in edit_report["findings"] if f.get("severity") == "error")
2190
+ item["edit_check"] = edit_report
2191
+ results.append(item)
2192
+ except Exception as exc:
2193
+ visual_errors += 1
2194
+ results.append({"id": case_id, "status": "error", "error": str(exc)})
2195
+ summary = {
2196
+ "cases": len(cases),
2197
+ "lint_error_count": lint_errors,
2198
+ "visual_error_count": visual_errors,
2199
+ "missing_image_count": missing_images,
2200
+ "pass": lint_errors == 0 and visual_errors == 0 and missing_images == 0,
2201
+ }
2202
+ output = {"summary": summary, "results": results}
2203
+ if args.json:
2204
+ print(json.dumps(output, ensure_ascii=False, indent=2))
2205
+ else:
2206
+ print(f"visual-regress: cases={summary['cases']} pass={summary['pass']} lint_errors={lint_errors} visual_errors={visual_errors} missing_images={missing_images}")
2207
+ for item in results:
2208
+ print(f"- {item['id']}: {item['status']} asset={item.get('asset_type', '?')} digest={item.get('prompt_digest', '?')}")
2209
+ return 0 if summary["pass"] else 1
2210
+
2211
+
2212
+ # --------------------------------------------------------------------------- #
2213
+ # 真实场景命令:长输入、多图、改图、品牌/角色一致性、数据图、改写、适配
2214
+ # --------------------------------------------------------------------------- #
2215
+ def read_text_argument(parts: list[str] | None, file_path: str | None) -> str:
2216
+ chunks: list[str] = []
2217
+ if file_path:
2218
+ chunks.append(Path(file_path).expanduser().read_text(encoding="utf-8"))
2219
+ if parts:
2220
+ chunks.append(" ".join(parts))
2221
+ return "\n".join(chunks).strip()
2222
+
2223
+
2224
+ def compact_title(text: str, limit: int = 28) -> str:
2225
+ clean = normalize_ws(re.sub(r"^[#\-\*\d\.\)、\s]+", "", text))
2226
+ if len(clean) <= limit:
2227
+ return clean
2228
+ return clean[:limit].rstrip() + "..."
2229
+
2230
+
2231
+ def normalize_case_values(case: dict) -> dict:
2232
+ out = dict(case)
2233
+ for key in ("style", "materials", "palette", "tags"):
2234
+ value = out.get(key)
2235
+ if isinstance(value, list):
2236
+ out[key] = ",".join(str(v) for v in value if str(v).strip())
2237
+ return out
2238
+
2239
+
2240
+ def compile_visual_case(
2241
+ case: dict,
2242
+ *,
2243
+ target: str = "codex-image",
2244
+ out: str | None = None,
2245
+ include_handoff: bool = True,
2246
+ ) -> dict:
2247
+ args = namespace_from_case(normalize_case_values(case))
2248
+ args.target = case.get("target") or target
2249
+ args.out = case.get("out") or out
2250
+ spec = build_spec(args)
2251
+ prompt = render_prompt(spec)
2252
+ lint_texts = [] if spec.get("strict_text") else spec["required_text"]
2253
+ findings = lint_prompt(prompt, spec["asset_type"], spec["quality"], lint_texts)
2254
+ return {
2255
+ "spec": spec,
2256
+ "prompt": prompt,
2257
+ "prompt_digest": prompt_digest(prompt),
2258
+ "text_overlay_spec": spec.get("text_overlay_spec"),
2259
+ "acceptance_criteria": spec.get("acceptance_criteria", []),
2260
+ "lint": findings,
2261
+ "handoff": handoff_text(prompt, args.out, spec["size"], spec["quality"], args.target) if include_handoff else None,
2262
+ }
2263
+
2264
+
2265
+ def has_lint_error(items: list[dict]) -> bool:
2266
+ return any(item.get("severity") == "error" for item in items)
2267
+
2268
+
2269
+ def split_document_sections(text: str, max_images: int) -> list[str]:
2270
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
2271
+ marker = re.compile(r"^(#{1,6}\s+|[-*]\s+|\d+[.)、]\s+|第[一二三四五六七八九十0-9]+[章节部分步][::\s]*)")
2272
+ chunks: list[str] = []
2273
+ buf: list[str] = []
2274
+ for line in lines:
2275
+ starts_new = bool(marker.match(line))
2276
+ clean = marker.sub("", line).strip()
2277
+ if starts_new and buf:
2278
+ chunks.append(" ".join(buf).strip())
2279
+ buf = [clean]
2280
+ else:
2281
+ buf.append(clean)
2282
+ if buf:
2283
+ chunks.append(" ".join(buf).strip())
2284
+
2285
+ if len(chunks) <= 1:
2286
+ sentences = [s.strip() for s in re.split(r"(?<=[。!?!?;;])\s*", text) if s.strip()]
2287
+ chunks = []
2288
+ bucket = ""
2289
+ for sentence in sentences:
2290
+ if bucket and len(bucket) + len(sentence) > 220:
2291
+ chunks.append(bucket.strip())
2292
+ bucket = sentence
2293
+ else:
2294
+ bucket = f"{bucket} {sentence}".strip()
2295
+ if bucket:
2296
+ chunks.append(bucket.strip())
2297
+
2298
+ chunks = [c for c in chunks if len(c) >= 8]
2299
+ if not chunks:
2300
+ chunks = [text.strip()]
2301
+ return chunks[: max(1, max_images)]
2302
+
2303
+
2304
+ def choose_compose_asset(chunk: str, index: int) -> str:
2305
+ lower = chunk.lower()
2306
+ if any(k in lower for k in ["架构", "系统", "模块", "链路", "rag", "llm", "retriever", "pipeline"]):
2307
+ return "diagram"
2308
+ if any(k in lower for k in ["数据", "指标", "报表", "趋势", "占比", "转化率", "漏斗", "图表"]):
2309
+ return "infographic"
2310
+ if any(k in lower for k in ["步骤", "教程", "操作", "sop", "流程", "指南", "怎么"]):
2311
+ return "infographic"
2312
+ if any(k in lower for k in ["ui", "界面", "app", "dashboard", "页面", "产品后台", "控制台"]):
2313
+ return "ui"
2314
+ if any(k in lower for k in ["产品", "商品", "包装", "电商", "渲染", "饮料", "食品"]):
2315
+ return "product"
2316
+ if any(k in lower for k in ["角色", "人物", "人设", "表情", "立绘"]):
2317
+ return "character"
2318
+ if index == 0 and any(k in lower for k in ["标题", "主题", "发布", "活动", "封面", "总结"]):
2319
+ return "poster"
2320
+ return "illustration"
2321
+
2322
+
2323
+ def infer_compose_style(text: str) -> str:
2324
+ lower = text.lower()
2325
+ if any(k in lower for k in ["架构", "系统", "论文", "rag", "llm", "技术"]):
2326
+ return "clean technical editorial style, white background, strict alignment, large readable labels"
2327
+ if any(k in lower for k in ["品牌", "营销", "活动", "海报", "新品"]):
2328
+ return "consistent premium brand campaign style, controlled palette, clear commercial hierarchy"
2329
+ if any(k in lower for k in ["教程", "步骤", "指南", "科普"]):
2330
+ return "calm instructional editorial style, numbered modules, high readability"
2331
+ return "coherent editorial visual system, concrete subject details, restrained palette, stable hierarchy"
2332
+
2333
+
2334
+ def extract_visual_labels(chunk: str, asset_type: str, limit: int = 5) -> list[str]:
2335
+ labels: list[str] = []
2336
+
2337
+ def add(value: str) -> None:
2338
+ value = value.strip(" \t\n\r,,。;;::")
2339
+ if 1 < len(value) <= 28 and value not in labels:
2340
+ labels.append(value)
2341
+
2342
+ for match in re.findall(r'"([^"\n]{1,28})"|“([^”\n]{1,28})”|「([^」\n]{1,28})」', chunk):
2343
+ add(next((m for m in match if m), ""))
2344
+ for match in re.findall(r"\b[A-Z][A-Za-z0-9_-]{1,20}\b", chunk):
2345
+ add(match)
2346
+ for match in re.findall(r"(?:标题|主题|模块|步骤|节点|页面)[::\s]*([^,。;;\n]{2,24})", chunk):
2347
+ add(match)
2348
+ if not labels and asset_type in {"diagram", "infographic", "ui"}:
2349
+ add(compact_title(chunk, 18))
2350
+ return labels[:limit]
2351
+
2352
+
2353
+ def compose_purpose(asset_type: str, index: int) -> str:
2354
+ purpose_map = {
2355
+ "poster": "封面/主视觉",
2356
+ "diagram": "结构关系图",
2357
+ "infographic": "信息整理图",
2358
+ "ui": "界面概念图",
2359
+ "product": "产品视觉图",
2360
+ "character": "角色设定图",
2361
+ "illustration": "场景说明图",
2362
+ }
2363
+ return f"{index}. {purpose_map.get(asset_type, '配图')}"
2364
+
2365
+
2366
+ def cmd_compose(args: argparse.Namespace) -> int:
2367
+ text = read_text_argument(args.input_text, args.file)
2368
+ if not text:
2369
+ print("用法:compose \"<长输入>\" 或 compose --file doc.md", file=sys.stderr)
2370
+ return 2
2371
+ shared_style = args.shared_style or infer_compose_style(text)
2372
+ chunks = split_document_sections(text, args.max_images)
2373
+ visual_plan = []
2374
+ for idx, chunk in enumerate(chunks, start=1):
2375
+ asset_type = choose_compose_asset(chunk, idx - 1)
2376
+ labels = extract_visual_labels(chunk, asset_type)
2377
+ purpose = compose_purpose(asset_type, idx)
2378
+ out = None
2379
+ if args.out_dir:
2380
+ out = str(Path(args.out_dir).expanduser() / f"compose-{idx:02d}-{asset_type}.png")
2381
+ case = {
2382
+ "id": f"compose-{idx:02d}",
2383
+ "request": f"{purpose}。根据这段内容生成对应画面:{chunk}",
2384
+ "asset_type": asset_type,
2385
+ "style": shared_style,
2386
+ "palette": args.palette,
2387
+ "text": labels if (args.strict_text or asset_type in {"diagram", "infographic", "ui"}) else [],
2388
+ "strict_text": args.strict_text,
2389
+ "target": args.target,
2390
+ "out": out,
2391
+ "tags": "compose,long-input",
2392
+ }
2393
+ compiled = compile_visual_case(case, target=args.target, out=out)
2394
+ visual_plan.append(
2395
+ {
2396
+ "id": case["id"],
2397
+ "purpose": purpose,
2398
+ "brief": chunk,
2399
+ "asset_type": compiled["spec"]["asset_type"],
2400
+ "template_id": compiled["spec"]["template_id"],
2401
+ "prompt": compiled["prompt"],
2402
+ "handoff": compiled["handoff"],
2403
+ "lint": compiled["lint"],
2404
+ "text_overlay_spec": compiled["text_overlay_spec"],
2405
+ "acceptance_criteria": compiled["acceptance_criteria"],
2406
+ "spec": compiled["spec"],
2407
+ }
2408
+ )
2409
+ result = {
2410
+ "summary": compact_title(text, 80),
2411
+ "shared_style": shared_style,
2412
+ "visual_plan": visual_plan,
2413
+ "bundle_acceptance": [
2414
+ "每张图对应长输入中的一个明确信息单元,不混淆主题。",
2415
+ "整组图使用同一风格、配色和信息层级规则。",
2416
+ "图表/架构/UI 类标签清晰可读;strict_text 时文字走 overlay_spec。",
2417
+ "任一单图 lint 出现 error 时必须先修 prompt 再交给下游出图。",
2418
+ ],
2419
+ }
2420
+ if args.json:
2421
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2422
+ else:
2423
+ print(f"compose: {len(visual_plan)} 张图 shared_style={shared_style}")
2424
+ for item in visual_plan:
2425
+ status = "FAIL" if has_lint_error(item["lint"]) else "PASS"
2426
+ print(f"\n## {item['id']} {item['purpose']} [{status}]")
2427
+ print(item["prompt"])
2428
+ if item["handoff"]:
2429
+ print("\nHandoff:")
2430
+ print(item["handoff"])
2431
+ return 1 if any(has_lint_error(item["lint"]) for item in visual_plan) else 0
2432
+
2433
+
2434
+ def load_series_cases(file_path: str | None) -> list[dict | str]:
2435
+ if not file_path:
2436
+ return []
2437
+ text = Path(file_path).expanduser().read_text(encoding="utf-8").strip()
2438
+ if not text:
2439
+ return []
2440
+ if text.startswith("["):
2441
+ data = json.loads(text)
2442
+ if not isinstance(data, list):
2443
+ raise ValueError("series JSON 文件必须是数组")
2444
+ return data
2445
+ if text.startswith("{"):
2446
+ data = json.loads(text)
2447
+ briefs = data.get("briefs") if isinstance(data, dict) else None
2448
+ if isinstance(briefs, list):
2449
+ return briefs
2450
+ return [data]
2451
+ items: list[dict | str] = []
2452
+ for line in text.splitlines():
2453
+ line = line.strip()
2454
+ if not line:
2455
+ continue
2456
+ if line.startswith("{"):
2457
+ items.append(json.loads(line))
2458
+ else:
2459
+ items.append(line)
2460
+ return items
2461
+
2462
+
2463
+ def cmd_series(args: argparse.Namespace) -> int:
2464
+ try:
2465
+ items = load_series_cases(args.file)
2466
+ except (OSError, ValueError, json.JSONDecodeError) as exc:
2467
+ print(f"读取 series 文件失败:{exc}", file=sys.stderr)
2468
+ return 2
2469
+ items.extend(args.brief or [])
2470
+ if not items:
2471
+ print("用法:series --brief \"图1\" --brief \"图2\" 或 series --file briefs.jsonl", file=sys.stderr)
2472
+ return 2
2473
+ shared_style = args.style or "single coherent series style, same camera language, same palette discipline, same visual density"
2474
+ variants = []
2475
+ for idx, item in enumerate(items, start=1):
2476
+ case = dict(item) if isinstance(item, dict) else {"request": str(item)}
2477
+ brief = str(case.get("request") or case.get("brief") or "")
2478
+ if not brief:
2479
+ print(f"series 第 {idx} 项缺少 request/brief", file=sys.stderr)
2480
+ return 2
2481
+ case["request"] = brief
2482
+ if args.asset_type:
2483
+ case["asset_type"] = args.asset_type
2484
+ case["style"] = "; ".join([shared_style, str(case.get("style") or "").strip()]).strip("; ")
2485
+ if args.palette:
2486
+ case["palette"] = args.palette
2487
+ if args.strict_text:
2488
+ case["strict_text"] = True
2489
+ case.setdefault("text", extract_visual_labels(brief, str(case.get("asset_type") or route_asset_type(brief))))
2490
+ case["target"] = args.target
2491
+ case["tags"] = "series,consistent-set"
2492
+ compiled = compile_visual_case(case, target=args.target)
2493
+ variants.append(
2494
+ {
2495
+ "id": f"series-{idx:02d}",
2496
+ "brief": brief,
2497
+ "asset_type": compiled["spec"]["asset_type"],
2498
+ "prompt": compiled["prompt"],
2499
+ "handoff": compiled["handoff"],
2500
+ "lint": compiled["lint"],
2501
+ "spec": compiled["spec"],
2502
+ }
2503
+ )
2504
+ result = {"shared_style": shared_style, "count": len(variants), "variants": variants}
2505
+ if args.json:
2506
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2507
+ else:
2508
+ print(f"series: {len(variants)} 张图")
2509
+ for item in variants:
2510
+ print(f"\n## {item['id']} {item['asset_type']}")
2511
+ print(item["prompt"])
2512
+ return 1 if any(has_lint_error(item["lint"]) for item in variants) else 0
2513
+
2514
+
2515
+ def parse_reference(value: str) -> dict:
2516
+ if ":" in value and not value.startswith("/"):
2517
+ role, _, ref = value.partition(":")
2518
+ if role.strip() and ref.strip():
2519
+ return {"role": role.strip(), "ref": ref.strip()}
2520
+ return {"role": "reference", "ref": value.strip()}
2521
+
2522
+
2523
+ def cmd_edit(args: argparse.Namespace) -> int:
2524
+ if not args.goal:
2525
+ print("用法:edit --goal \"<改图目标>\" --reference role:path --preserve ... --change ...", file=sys.stderr)
2526
+ return 2
2527
+ references = [parse_reference(v) for v in (args.reference or [])]
2528
+ preserve = args.preserve or ["main subject identity, silhouette, material cues, and composition anchors"]
2529
+ changes = args.change or [args.goal]
2530
+ required_text = args.text or []
2531
+ aspect = args.aspect or "3:4"
2532
+ quality = args.quality or "high"
2533
+ reference_block = "; ".join(f"{item['role']}={item['ref']}" for item in references) or "provided reference image(s)"
2534
+ prompt = "\n".join(
2535
+ [
2536
+ f"Edit the provided reference image(s) into a {aspect} {args.asset_type} result for: {args.goal}.",
2537
+ f"References: {reference_block}.",
2538
+ "Preserve exactly: " + "; ".join(preserve) + ".",
2539
+ "Change only: " + "; ".join(changes) + ".",
2540
+ f"Visual target: {args.style or 'production-quality realistic edit, consistent lighting, no visible seams'}; quality={quality}.",
2541
+ exact_text_block(required_text),
2542
+ "Avoid: identity drift; unwanted background changes; mismatched perspective; fake logos; garbled text; low-resolution artifacts.",
2543
+ ]
2544
+ )
2545
+ findings = lint_prompt(prompt, args.asset_type, quality, required_text)
2546
+ result = {
2547
+ "edit_spec": {
2548
+ "goal": args.goal,
2549
+ "references": references,
2550
+ "preserve": preserve,
2551
+ "change": changes,
2552
+ "required_text": required_text,
2553
+ "aspect": aspect,
2554
+ "asset_type": args.asset_type,
2555
+ "quality": quality,
2556
+ },
2557
+ "prompt": prompt,
2558
+ "lint": findings,
2559
+ "handoff": handoff_text(prompt, args.out, infer_size(aspect, None, args.asset_type), quality, args.target),
2560
+ }
2561
+ if args.json:
2562
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2563
+ else:
2564
+ print("## Edit Prompt")
2565
+ print(prompt)
2566
+ print("\n## Lint")
2567
+ print_lint(findings)
2568
+ print("\n## Handoff")
2569
+ print(result["handoff"])
2570
+ return 1 if has_lint_error(findings) else 0
2571
+
2572
+
2573
+ def cmd_brand(args: argparse.Namespace) -> int:
2574
+ if not args.name:
2575
+ print("用法:brand --name <品牌名> [--request <要生成的图>]", file=sys.stderr)
2576
+ return 2
2577
+ palette = split_csv(args.palette)
2578
+ avoid = split_csv(args.avoid)
2579
+ values = split_csv(args.values)
2580
+ brand_profile = {
2581
+ "name": args.name,
2582
+ "industry": args.industry or "unspecified",
2583
+ "values": values,
2584
+ "palette": palette,
2585
+ "style": args.style or "original, consistent, premium but restrained brand system",
2586
+ "avoid": avoid + ["real brand logos", "stock clip-art", "existing trademark resemblance"],
2587
+ }
2588
+ brand_block = "\n".join(
2589
+ [
2590
+ f"Brand system for invented brand \"{args.name}\":",
2591
+ f"- Industry: {brand_profile['industry']}.",
2592
+ f"- Values: {', '.join(values) if values else 'clear, trustworthy, distinctive'}.",
2593
+ f"- Palette: {', '.join(palette) if palette else '2-3 controlled brand colors plus neutral support'}.",
2594
+ f"- Style: {brand_profile['style']}.",
2595
+ "- Rules: keep mark shapes original; use the same spacing, palette, and typography logic across assets; no real brand logos.",
2596
+ f"- Avoid: {', '.join(brand_profile['avoid'])}.",
2597
+ ]
2598
+ )
2599
+ result: dict = {"brand_profile": brand_profile, "brand_prompt_block": brand_block}
2600
+ if args.request:
2601
+ case = {
2602
+ "request": f"{args.request}\n{brand_block}",
2603
+ "asset_type": args.asset_type,
2604
+ "style": brand_profile["style"],
2605
+ "palette": args.palette,
2606
+ "text": args.text or [],
2607
+ "strict_text": args.strict_text,
2608
+ "target": args.target,
2609
+ "tags": "brand,consistency",
2610
+ }
2611
+ result["compiled"] = compile_visual_case(case, target=args.target)
2612
+ if args.json:
2613
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2614
+ else:
2615
+ print(brand_block)
2616
+ if "compiled" in result:
2617
+ print("\n## Prompt")
2618
+ print(result["compiled"]["prompt"])
2619
+ compiled = result.get("compiled")
2620
+ return 1 if compiled and has_lint_error(compiled["lint"]) else 0
2621
+
2622
+
2623
+ def cmd_character(args: argparse.Namespace) -> int:
2624
+ if not args.name or not args.description:
2625
+ print("用法:character --name <角色名> --description <外观设定> [--scene ...]", file=sys.stderr)
2626
+ return 2
2627
+ palette = split_csv(args.palette)
2628
+ bible = {
2629
+ "name": args.name,
2630
+ "description": args.description,
2631
+ "style": args.style or "original character model-sheet clarity, no existing IP resemblance",
2632
+ "outfit": args.outfit or "stable signature outfit and silhouette",
2633
+ "palette": palette,
2634
+ "consistency_rules": [
2635
+ "same face shape, hair silhouette, outfit anchors, and palette in every image",
2636
+ "do not copy or resemble existing IP characters",
2637
+ "describe identity anchors before scene action in every prompt",
2638
+ ],
2639
+ }
2640
+ identity = (
2641
+ f"Original character \"{args.name}\": {args.description}. "
2642
+ f"Outfit anchors: {bible['outfit']}. Palette: {', '.join(palette) if palette else 'stable limited palette'}. "
2643
+ "Keep the same identity anchors in every image; no existing IP resemblance."
2644
+ )
2645
+ reference_case = {
2646
+ "request": f"角色设定三视图和表情板:{identity}",
2647
+ "asset_type": "character",
2648
+ "style": bible["style"],
2649
+ "palette": args.palette,
2650
+ "text": [args.name],
2651
+ "target": args.target,
2652
+ "tags": "character,consistency",
2653
+ }
2654
+ reference = compile_visual_case(reference_case, target=args.target)
2655
+ scenes = []
2656
+ for idx, scene in enumerate(args.scene or [], start=1):
2657
+ case = {
2658
+ "request": f"{identity} 场景图:{scene}",
2659
+ "asset_type": "illustration",
2660
+ "style": bible["style"],
2661
+ "palette": args.palette,
2662
+ "target": args.target,
2663
+ "tags": "character,scene",
2664
+ }
2665
+ compiled = compile_visual_case(case, target=args.target)
2666
+ scenes.append({"id": f"scene-{idx:02d}", "scene": scene, **compiled})
2667
+ result = {"character_bible": bible, "reference_sheet": reference, "scene_prompts": scenes}
2668
+ if args.json:
2669
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2670
+ else:
2671
+ print(json.dumps(bible, ensure_ascii=False, indent=2))
2672
+ print("\n## Reference Sheet Prompt")
2673
+ print(reference["prompt"])
2674
+ for item in scenes:
2675
+ print(f"\n## {item['id']}")
2676
+ print(item["prompt"])
2677
+ return 1 if has_lint_error(reference["lint"]) or any(has_lint_error(item["lint"]) for item in scenes) else 0
2678
+
2679
+
2680
+ def read_data_preview(file_path: str | None) -> dict:
2681
+ if not file_path:
2682
+ return {"columns": [], "rows": [], "row_count": 0, "source": ""}
2683
+ path = Path(file_path).expanduser()
2684
+ text = path.read_text(encoding="utf-8")
2685
+ rows: list[dict] = []
2686
+ columns: list[str] = []
2687
+ if path.suffix.lower() == ".json":
2688
+ data = json.loads(text)
2689
+ if isinstance(data, dict):
2690
+ data = data.get("rows") or data.get("data") or data.get("items") or []
2691
+ if isinstance(data, list):
2692
+ rows = [dict(item) for item in data if isinstance(item, dict)]
2693
+ columns = list(rows[0].keys()) if rows else []
2694
+ else:
2695
+ delimiter = "\t" if path.suffix.lower() == ".tsv" else ","
2696
+ reader = csv.DictReader(text.splitlines(), delimiter=delimiter)
2697
+ rows = [dict(row) for row in reader]
2698
+ columns = list(reader.fieldnames or [])
2699
+ return {"columns": columns, "rows": rows[:5], "row_count": len(rows), "source": str(path)}
2700
+
2701
+
2702
+ def infer_chart_type(request: str, columns: list[str], override: str | None) -> str:
2703
+ if override:
2704
+ return override
2705
+ lower = request.lower()
2706
+ if any(k in lower for k in ["dashboard", "看板", "仪表盘"]):
2707
+ return "dashboard"
2708
+ if any(k in lower for k in ["趋势", "时间", "date", "day", "month", "week"]):
2709
+ return "line chart"
2710
+ if any(k in lower for k in ["占比", "比例", "份额", "share", "ratio"]):
2711
+ return "donut chart"
2712
+ if len(columns) >= 3:
2713
+ return "bar chart with supporting table"
2714
+ return "bar chart"
2715
+
2716
+
2717
+ def cmd_data_viz(args: argparse.Namespace) -> int:
2718
+ request = args.request or "根据数据生成清晰的信息图"
2719
+ try:
2720
+ data_preview = read_data_preview(args.file)
2721
+ except (OSError, json.JSONDecodeError) as exc:
2722
+ print(f"读取数据失败:{exc}", file=sys.stderr)
2723
+ return 2
2724
+ title = args.title or compact_title(request, 24)
2725
+ chart_type = infer_chart_type(request, data_preview["columns"], args.chart_type)
2726
+ facts = {
2727
+ "title": title,
2728
+ "request": request,
2729
+ "chart_type": chart_type,
2730
+ "columns": data_preview["columns"],
2731
+ "row_count": data_preview["row_count"],
2732
+ "rows_preview": data_preview["rows"],
2733
+ "source": data_preview["source"],
2734
+ }
2735
+ if args.strict_text:
2736
+ text_rule = (
2737
+ "Strict chart text mode: reserve clean title, axis, legend, and callout zones; "
2738
+ "exact labels and numbers will be applied as deterministic vector/text overlay."
2739
+ )
2740
+ required_text: list[str] = []
2741
+ else:
2742
+ text_rule = f'The title must read exactly "{title}". Keep labels large and readable.'
2743
+ required_text = [title]
2744
+ prompt = "\n".join(
2745
+ [
2746
+ f"Create a 16:9 high-quality data visualization infographic for: {request}.",
2747
+ f"Chart type: {chart_type}.",
2748
+ f"Data schema: columns={', '.join(data_preview['columns']) or 'not provided'}, rows={data_preview['row_count']}.",
2749
+ f"Rows preview for visual truthfulness: {json.dumps(data_preview['rows'], ensure_ascii=False)}.",
2750
+ "Layout: title band, main chart area, short insight callouts, compact legend, and optional source note.",
2751
+ "Use honest scales, aligned axes, restrained colors, and no decorative fake data.",
2752
+ text_rule,
2753
+ "Avoid: misleading charts; unreadable microtext; random numbers not present in the data; fake logos; visual clutter.",
2754
+ ]
2755
+ )
2756
+ overlay_spec = None
2757
+ if args.strict_text:
2758
+ overlay_spec = {
2759
+ "mode": "chart_text_overlay",
2760
+ "title": title,
2761
+ "columns": data_preview["columns"],
2762
+ "rows_preview": data_preview["rows"],
2763
+ "rules": ["Render all labels and numbers as deterministic text/vector overlays.", "Do not invent values."],
2764
+ }
2765
+ findings = lint_prompt(prompt, "infographic", "high", required_text)
2766
+ result = {"chart_data_spec": facts, "prompt": prompt, "text_overlay_spec": overlay_spec, "lint": findings}
2767
+ if args.json:
2768
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2769
+ else:
2770
+ print("## Data Viz Prompt")
2771
+ print(prompt)
2772
+ print("\n## Lint")
2773
+ print_lint(findings)
2774
+ return 1 if has_lint_error(findings) else 0
2775
+
2776
+
2777
+ def cmd_rewrite(args: argparse.Namespace) -> int:
2778
+ source = read_text_argument(args.prompt_text, args.file)
2779
+ if not source:
2780
+ print("用法:rewrite \"<低质量 prompt 或画图需求>\"", file=sys.stderr)
2781
+ return 2
2782
+ case = {
2783
+ "request": source,
2784
+ "asset_type": args.asset_type,
2785
+ "style": args.style,
2786
+ "strict_text": args.strict_text,
2787
+ "text": args.text or [],
2788
+ "target": args.target,
2789
+ "tags": "rewrite,prompt-cleanup",
2790
+ }
2791
+ compiled = compile_visual_case(case, target=args.target)
2792
+ result = {
2793
+ "original": source,
2794
+ "rewrite_notes": [
2795
+ "Converted vague adjectives into concrete asset type, layout, material, lighting, palette, and negative constraints.",
2796
+ "Applied safety rewrite for risky brand/IP/style references when detected.",
2797
+ "Ran lint on the rewritten prompt.",
2798
+ ],
2799
+ "spec": compiled["spec"],
2800
+ "prompt": compiled["prompt"],
2801
+ "handoff": compiled["handoff"],
2802
+ "lint": compiled["lint"],
2803
+ }
2804
+ if args.json:
2805
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2806
+ else:
2807
+ print("## Rewritten Prompt")
2808
+ print(compiled["prompt"])
2809
+ print("\n## Lint")
2810
+ print_lint(compiled["lint"])
2811
+ return 1 if has_lint_error(compiled["lint"]) else 0
2812
+
2813
+
2814
+ def adapt_layout_for_aspect(aspect: str, asset_type: str) -> str:
2815
+ if aspect == "16:9":
2816
+ return "wide layout with horizontal reading path, hero subject left or center, supporting details to the side"
2817
+ if aspect == "9:16":
2818
+ return "vertical mobile layout with stacked hierarchy, safe top/bottom margins, large central subject"
2819
+ if aspect == "1:1":
2820
+ return "square layout with centered subject, balanced margins, compact supporting details"
2821
+ if aspect == "3:4":
2822
+ return "portrait layout with clear top title zone, central visual, and bottom detail zone"
2823
+ if aspect == "4:3":
2824
+ return "landscape editorial layout with stable side margins and readable detail panels"
2825
+ return infer_layout(infer_template_id("", asset_type), asset_type)
2826
+
2827
+
2828
+ def cmd_adapt(args: argparse.Namespace) -> int:
2829
+ request = read_text_argument(args.request_text, None)
2830
+ if not request:
2831
+ print("用法:adapt \"<画图需求>\" --aspects 1:1,3:4,16:9", file=sys.stderr)
2832
+ return 2
2833
+ variants = []
2834
+ for aspect in split_csv(args.aspects):
2835
+ asset_type = args.asset_type or route_asset_type(request)
2836
+ case = {
2837
+ "request": request,
2838
+ "asset_type": asset_type,
2839
+ "aspect": aspect,
2840
+ "layout": adapt_layout_for_aspect(aspect, asset_type),
2841
+ "style": args.style,
2842
+ "text": args.text or [],
2843
+ "strict_text": args.strict_text,
2844
+ "target": args.target,
2845
+ "tags": "adapt,multi-size",
2846
+ }
2847
+ compiled = compile_visual_case(case, target=args.target)
2848
+ variants.append({"aspect": aspect, "size": compiled["spec"]["size"], **compiled})
2849
+ result = {
2850
+ "request": request,
2851
+ "variants": variants,
2852
+ "adaptation_rules": [
2853
+ "同一主体、同一风格、同一关键文案在所有画幅保持一致。",
2854
+ "每个画幅使用专属布局,不做简单裁切。",
2855
+ "移动竖图保留上下安全区,横图保留左右信息区,方图保持中心识别度。",
2856
+ ],
2857
+ }
2858
+ if args.json:
2859
+ print(json.dumps(result, ensure_ascii=False, indent=2))
2860
+ else:
2861
+ for item in variants:
2862
+ print(f"\n## {item['aspect']} / {item['size']}")
2863
+ print(item["prompt"])
2864
+ return 1 if any(has_lint_error(item["lint"]) for item in variants) else 0
2865
+
2866
+
2867
+ # --------------------------------------------------------------------------- #
2868
+ # status
2869
+ # --------------------------------------------------------------------------- #
2870
+ def cmd_status(args: argparse.Namespace) -> int:
2871
+ print("draw-prompt 环境检查")
2872
+ print(f" 数据目录 : {data_home()} ({'存在' if data_home().exists() else '未创建'})")
2873
+ print(f" 偏好档案 : {profile_path()} ({'已有' if profile_path().exists() else '未创建'})")
2874
+ samples = read_jsonl(samples_path())
2875
+ accepted = sum(1 for r in samples if r.get("verdict") == "accept")
2876
+ print(f" 样本数 : {len(samples)}(采纳 {accepted})")
2877
+ print(f" 评分记录 : {len(read_jsonl(judgements_path()))}")
2878
+ own = Path(__file__).resolve().parent.parent / "references" / "gallery.md"
2879
+ print(f" 自带范例库 : {own} ({'可用' if own.exists() else '缺失!'})")
2880
+ gi = Path.home() / ".claude" / "skills" / "gpt-image" / "references" / "gallery.md"
2881
+ print(f" 可选扩展库 : gpt-image ({'可用' if gi.exists() else '未装(不影响)'})")
2882
+ print(" ── 下游出图通道(本 skill 不主动调用,仅提示可用性)──")
2883
+ print(f" codex CLI : {which('codex') or '未找到'}")
2884
+ plugin = Path.home() / ".claude" / "plugins" / "cache" / "codex-image-in-cc"
2885
+ print(f" codex-image: {'已安装' if plugin.exists() else '未安装(可 /codex-image:generate 出图)'}")
2886
+ print(" 核心转化命令: convert / compose / series / edit / brand / character / data-viz / rewrite / adapt")
2887
+ print(" 稳定性命令 : overlay / visual-check / edit-check / visual-regress / lint / benchmark / revise")
2888
+ return 0
2889
+
2890
+
2891
+ # --------------------------------------------------------------------------- #
2892
+ # argparse
2893
+ # --------------------------------------------------------------------------- #
2894
+ def build_parser() -> argparse.ArgumentParser:
2895
+ p = argparse.ArgumentParser(prog="prompt_cli.py", description="draw-prompt:自然语言画图需求 -> 生图 Prompt / handoff(不主动出图)")
2896
+ sub = p.add_subparsers(dest="cmd", required=True)
2897
+
2898
+ pc = sub.add_parser("convert", help="自然语言画图需求 -> 高质量生图 Prompt / handoff")
2899
+ pc.add_argument("request_text", nargs="+", help="自然语言画图需求")
2900
+ pc.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()), help="覆盖自动识别的资产类型")
2901
+ pc.add_argument("--template", choices=sorted(TEMPLATE_DEFS.keys()), help="覆盖细分模板")
2902
+ pc.add_argument("--aspect", help="覆盖画幅,如 3:4 / 16:9 / 1:1")
2903
+ pc.add_argument("--text", action="append", help="必须逐字显示的文字,可重复")
2904
+ pc.add_argument("--subject", help="覆盖主体描述")
2905
+ pc.add_argument("--layout", help="覆盖布局规格")
2906
+ pc.add_argument("--style", help="风格锚点,逗号分隔")
2907
+ pc.add_argument("--materials", help="材料/质感,逗号分隔")
2908
+ pc.add_argument("--lighting", help="光照描述")
2909
+ pc.add_argument("--palette", help="配色,逗号分隔")
2910
+ pc.add_argument("--size", help="size 预设或像素,如 portrait / 1024x1536")
2911
+ pc.add_argument("--quality", choices=["low", "medium", "high"], help="质量档")
2912
+ pc.add_argument("--out", help="期望输出路径")
2913
+ pc.add_argument("--tags", help="额外样本标签,逗号分隔")
2914
+ pc.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
2915
+ pc.add_argument("--strict-text", action="store_true", help="输出 visual prompt + text_overlay_spec,提升文字稳定性")
2916
+ pc.add_argument("--record-pending", action="store_true", help="把本次转化记录为 pending 样本")
2917
+ pc.add_argument("--no-handoff", action="store_true", help="只输出 Prompt,不输出下游指令")
2918
+ pc.add_argument("--json", action="store_true")
2919
+ pc.set_defaults(func=cmd_convert)
2920
+
2921
+ pco = sub.add_parser("compose", help="长输入/文档 -> 多张配套图视觉计划 + Prompt")
2922
+ pco.add_argument("input_text", nargs="*", help="长输入内容;也可用 --file")
2923
+ pco.add_argument("--file", help="从文件读取长输入")
2924
+ pco.add_argument("--max-images", type=int, default=6, help="最多拆成多少张图")
2925
+ pco.add_argument("--shared-style", help="整组图共享风格锚点")
2926
+ pco.add_argument("--palette", help="整组图共享配色,逗号分隔")
2927
+ pco.add_argument("--strict-text", action="store_true", help="文字/标签走 overlay_spec")
2928
+ pco.add_argument("--out-dir", help="为每张 handoff 生成建议输出路径")
2929
+ pco.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
2930
+ pco.add_argument("--json", action="store_true")
2931
+ pco.set_defaults(func=cmd_compose)
2932
+
2933
+ psr = sub.add_parser("series", help="多张同风格系列图 -> Prompt 组")
2934
+ psr.add_argument("--brief", action="append", help="单张图 brief,可重复")
2935
+ psr.add_argument("--file", help="JSON/JSONL/纯文本 briefs")
2936
+ psr.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()), help="覆盖所有 brief 的资产类型")
2937
+ psr.add_argument("--style", help="整组图共享风格")
2938
+ psr.add_argument("--palette", help="整组图共享配色")
2939
+ psr.add_argument("--strict-text", action="store_true")
2940
+ psr.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
2941
+ psr.add_argument("--json", action="store_true")
2942
+ psr.set_defaults(func=cmd_series)
2943
+
2944
+ pe = sub.add_parser("edit", help="参考图/改图需求 -> 编辑 Prompt")
2945
+ pe.add_argument("--goal", required=True, help="改图目标")
2946
+ pe.add_argument("--reference", action="append", help="参考图,格式 role:path 或 path,可重复")
2947
+ pe.add_argument("--preserve", action="append", help="必须保留的元素,可重复")
2948
+ pe.add_argument("--change", action="append", help="必须修改的元素,可重复")
2949
+ pe.add_argument("--text", action="append", help="必须逐字显示的文字,可重复")
2950
+ pe.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()), default="product")
2951
+ pe.add_argument("--aspect", default="3:4")
2952
+ pe.add_argument("--style")
2953
+ pe.add_argument("--quality", choices=["low", "medium", "high"], default="high")
2954
+ pe.add_argument("--out")
2955
+ pe.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
2956
+ pe.add_argument("--json", action="store_true")
2957
+ pe.set_defaults(func=cmd_edit)
2958
+
2959
+ pbr = sub.add_parser("brand", help="品牌一致性档案 -> 品牌 Prompt 块/可选编译")
2960
+ pbr.add_argument("--name", required=True)
2961
+ pbr.add_argument("--industry")
2962
+ pbr.add_argument("--values", help="品牌价值,逗号分隔")
2963
+ pbr.add_argument("--palette", help="品牌配色,逗号分隔")
2964
+ pbr.add_argument("--style", help="品牌视觉风格")
2965
+ pbr.add_argument("--avoid", help="禁用项,逗号分隔")
2966
+ pbr.add_argument("--request", help="可选:直接用品牌档案编译一张图")
2967
+ pbr.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()), default="poster")
2968
+ pbr.add_argument("--text", action="append", help="编译图时必须显示的文字")
2969
+ pbr.add_argument("--strict-text", action="store_true")
2970
+ pbr.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
2971
+ pbr.add_argument("--json", action="store_true")
2972
+ pbr.set_defaults(func=cmd_brand)
2973
+
2974
+ pch = sub.add_parser("character", help="角色一致性 bible + 参考表/场景 Prompt")
2975
+ pch.add_argument("--name", required=True)
2976
+ pch.add_argument("--description", required=True)
2977
+ pch.add_argument("--style")
2978
+ pch.add_argument("--outfit")
2979
+ pch.add_argument("--palette")
2980
+ pch.add_argument("--scene", action="append", help="用同一角色生成的场景,可重复")
2981
+ pch.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
2982
+ pch.add_argument("--json", action="store_true")
2983
+ pch.set_defaults(func=cmd_character)
2984
+
2985
+ pdv = sub.add_parser("data-viz", help="数据/报表需求 -> 信息图/图表 Prompt")
2986
+ pdv.add_argument("--file", help="CSV/TSV/JSON 数据文件")
2987
+ pdv.add_argument("--request", help="图表需求")
2988
+ pdv.add_argument("--chart-type", help="覆盖图表类型")
2989
+ pdv.add_argument("--title", help="图表标题")
2990
+ pdv.add_argument("--strict-text", action="store_true", help="标题/轴/数字走 overlay_spec")
2991
+ pdv.add_argument("--json", action="store_true")
2992
+ pdv.set_defaults(func=cmd_data_viz)
2993
+
2994
+ prw = sub.add_parser("rewrite", help="低质量 prompt/画图需求 -> 结构化高质量 Prompt")
2995
+ prw.add_argument("prompt_text", nargs="*", help="原始 prompt;也可用 --file")
2996
+ prw.add_argument("--file", help="从文件读取原始 prompt")
2997
+ prw.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()))
2998
+ prw.add_argument("--style")
2999
+ prw.add_argument("--text", action="append", help="必须逐字显示的文字,可重复")
3000
+ prw.add_argument("--strict-text", action="store_true")
3001
+ prw.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
3002
+ prw.add_argument("--json", action="store_true")
3003
+ prw.set_defaults(func=cmd_rewrite)
3004
+
3005
+ pad = sub.add_parser("adapt", help="同一需求 -> 多尺寸/多画幅适配 Prompt")
3006
+ pad.add_argument("request_text", nargs="+", help="画图需求")
3007
+ pad.add_argument("--aspects", default="1:1,3:4,16:9,9:16", help="逗号分隔画幅列表")
3008
+ pad.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()))
3009
+ pad.add_argument("--style")
3010
+ pad.add_argument("--text", action="append", help="必须逐字显示的文字,可重复")
3011
+ pad.add_argument("--strict-text", action="store_true")
3012
+ pad.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
3013
+ pad.add_argument("--json", action="store_true")
3014
+ pad.set_defaults(func=cmd_adapt)
3015
+
3016
+ pov = sub.add_parser("overlay", help="按 text_overlay_spec 给成品图叠加精确文字")
3017
+ pov.add_argument("--image", required=True, help="下游生成的原始图片路径")
3018
+ pov.add_argument("--spec", help="spec JSON 字符串、JSON 文件路径,或 @path")
3019
+ pov.add_argument("--text", action="append", help="没有 spec 时直接叠加的文字,可重复")
3020
+ pov.add_argument("--out", help="输出路径,默认 <image>.final.png")
3021
+ pov.add_argument("--font", help="字体文件路径;默认优先用系统中文字体")
3022
+ pov.add_argument("--json", action="store_true")
3023
+ pov.set_defaults(func=cmd_overlay)
3024
+
3025
+ pvc = sub.add_parser("visual-check", help="对单张成品图做尺寸/画幅/亮度/对比度质量门")
3026
+ pvc.add_argument("--image", required=True)
3027
+ pvc.add_argument("--spec", help="spec JSON 字符串、JSON 文件路径,或 @path")
3028
+ pvc.add_argument("--aspect", help="覆盖期望画幅,如 3:4 / 16:9")
3029
+ pvc.add_argument("--strict", action="store_true", help="更严格地处理亮度异常")
3030
+ pvc.add_argument("--json", action="store_true")
3031
+ pvc.set_defaults(func=cmd_visual_check)
3032
+
3033
+ pec = sub.add_parser("edit-check", help="验证参考图编辑是否保留中心主体并产生有效变化")
3034
+ pec.add_argument("--reference", required=True)
3035
+ pec.add_argument("--output", required=True)
3036
+ pec.add_argument("--threshold", type=float, default=0.58, help="中心主体直方图相似度阈值")
3037
+ pec.add_argument("--min-change", type=float, default=0.03, help="整图最小变化量")
3038
+ pec.add_argument("--json", action="store_true")
3039
+ pec.set_defaults(func=cmd_edit_check)
3040
+
3041
+ pvr = sub.add_parser("visual-regress", help="批量跑场景 prompt + 可选成品图质量回归")
3042
+ pvr.add_argument("cases", help="JSONL/JSON visual cases 文件")
3043
+ pvr.add_argument("--require-images", action="store_true", help="case 必须带 image/output 才算通过")
3044
+ pvr.add_argument("--strict", action="store_true")
3045
+ pvr.add_argument("--edit-threshold", type=float, default=0.58)
3046
+ pvr.add_argument("--edit-min-change", type=float, default=0.03)
3047
+ pvr.add_argument("--json", action="store_true")
3048
+ pvr.set_defaults(func=cmd_visual_regress)
3049
+
3050
+ pl = sub.add_parser("lint", help="检查 Prompt 是否满足生图转化硬约束")
3051
+ pl.add_argument("--prompt", required=True)
3052
+ pl.add_argument("--asset-type", choices=sorted(ASSET_ROUTES.keys()))
3053
+ pl.add_argument("--quality", choices=["low", "medium", "high"])
3054
+ pl.add_argument("--text", action="append", help="必须逐字显示的文字,可重复")
3055
+ pl.add_argument("--json", action="store_true")
3056
+ pl.set_defaults(func=cmd_lint)
3057
+
3058
+ pb = sub.add_parser("benchmark", help="批量跑 golden cases,检查转化稳定性和 lint")
3059
+ pb.add_argument("cases", help="JSONL 或 JSON cases 文件")
3060
+ pb.add_argument("--runs", type=int, default=3, help="每个 case 重复转化次数,用于检查确定性")
3061
+ pb.add_argument("--json", action="store_true")
3062
+ pb.set_defaults(func=cmd_benchmark)
3063
+
3064
+ pr = sub.add_parser("revise", help="按失败分类生成修订版 Prompt")
3065
+ pr.add_argument("--sample-id", help="样本 id 或 last")
3066
+ pr.add_argument("--prompt", help="没有样本时直接修订这段 prompt")
3067
+ pr.add_argument("--request", help="没有样本时提供原始需求")
3068
+ pr.add_argument("--spec", help="没有样本时提供 Prompt Spec JSON")
3069
+ pr.add_argument("--reason", choices=sorted(FAILURE_PLAYBOOK.keys()), default="text_error")
3070
+ pr.add_argument("--out", help="记录 pending 时补记输出路径")
3071
+ pr.add_argument("--record-pending", action="store_true")
3072
+ pr.add_argument("--list-reasons", action="store_true", help="列出支持的失败分类")
3073
+ pr.add_argument("--json", action="store_true")
3074
+ pr.set_defaults(func=cmd_revise)
3075
+
3076
+ pp = sub.add_parser("profile", help="读写风格偏好档案")
3077
+ pp.add_argument("action", choices=["show", "init", "set", "note", "path"])
3078
+ pp.add_argument("key", nargs="?", help="set 的字段名")
3079
+ pp.add_argument("value", nargs="?", help="set 的值 / note 的文本")
3080
+ pp.add_argument("--force", action="store_true", help="init 时覆盖已有档案")
3081
+ pp.set_defaults(func=cmd_profile)
3082
+
3083
+ ps = sub.add_parser("samples", help="记录/检索出图样本")
3084
+ ps.add_argument("action", choices=["add", "search", "list"])
3085
+ ps.add_argument("query", nargs="?", help="search 的关键词")
3086
+ ps.add_argument("--request")
3087
+ ps.add_argument("--prompt")
3088
+ ps.add_argument("--verdict", choices=["accept", "reject", "pending"])
3089
+ ps.add_argument("--reason")
3090
+ ps.add_argument("--image")
3091
+ ps.add_argument("--size")
3092
+ ps.add_argument("--quality")
3093
+ ps.add_argument("--tags")
3094
+ ps.add_argument("--source")
3095
+ ps.add_argument("--confidence", choices=["low", "medium", "high"])
3096
+ ps.add_argument("--scope")
3097
+ ps.add_argument("--limit", type=int, default=5)
3098
+ ps.add_argument("--json", action="store_true")
3099
+ ps.set_defaults(func=cmd_samples)
3100
+
3101
+ pf = sub.add_parser("feedback", help="对上一次/指定样本记录采纳或弃用")
3102
+ pf.add_argument("id", nargs="?", default="last", help="样本 id 或 'last'")
3103
+ pf.add_argument("--verdict", required=True, choices=["accept", "reject"])
3104
+ pf.add_argument("--reason")
3105
+ pf.add_argument("--category", choices=sorted(FAILURE_PLAYBOOK.keys()), help="reject 时记录结构化失败分类")
3106
+ pf.add_argument("--image", help="补记成品图路径")
3107
+ pf.set_defaults(func=cmd_feedback)
3108
+
3109
+ pj = sub.add_parser("judge", help="存储 agent 给出的评分(CLI 不评分、不调 Codex)")
3110
+ pj.add_argument("action", choices=["rubric", "record", "list"])
3111
+ pj.add_argument("--image")
3112
+ pj.add_argument("--prompt")
3113
+ pj.add_argument("--score", help="agent 算好的评分 JSON 字符串")
3114
+ pj.add_argument("--sample-id", dest="sample_id")
3115
+ pj.add_argument("--limit", type=int, default=10)
3116
+ pj.set_defaults(func=cmd_judge)
3117
+
3118
+ ph = sub.add_parser("handoff", help="生成交给 Codex 的现成指令块(仅打印,不执行)")
3119
+ ph.add_argument("--prompt", required=True)
3120
+ ph.add_argument("--request", help="原始画图需求;--record-pending 时必填")
3121
+ ph.add_argument("--out", help="期望输出路径")
3122
+ ph.add_argument("--size", help="size 预设或像素,如 portrait / 1024x1536")
3123
+ ph.add_argument("--quality", help="质量档:low/medium/high")
3124
+ ph.add_argument("--tags", help="样本标签,逗号分隔")
3125
+ ph.add_argument("--spec", help="关联的 Prompt Spec JSON;--record-pending 时写入样本")
3126
+ ph.add_argument("--record-pending", action="store_true", help="把 handoff 记录为 pending 样本")
3127
+ ph.add_argument("--target", choices=["codex-image", "codex-exec", "raw"], default="codex-image")
3128
+ ph.set_defaults(func=cmd_handoff)
3129
+
3130
+ sub.add_parser("status", help="数据与下游通道健康检查").set_defaults(func=cmd_status)
3131
+ return p
3132
+
3133
+
3134
+ def main(argv: list[str] | None = None) -> int:
3135
+ parser = build_parser()
3136
+ args = parser.parse_args(argv)
3137
+ return int(args.func(args) or 0)
3138
+
3139
+
3140
+ if __name__ == "__main__":
3141
+ raise SystemExit(main())