npm - ai-spec-dev - Versions diffs - 0.30.1 → 0.33.0 - Mend

ai-spec-dev 0.30.1 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/.claude/settings.local.json +5 -1
package/README.md +29 -1
package/RELEASE_LOG.md +188 -0
package/cli/commands/config.ts +93 -0
package/cli/commands/export.ts +66 -0
package/cli/commands/init.ts +153 -0
package/cli/commands/learn.ts +30 -0
package/cli/commands/logs.ts +106 -0
package/cli/commands/model.ts +156 -0
package/cli/commands/restore.ts +22 -0
package/cli/commands/review.ts +63 -0
package/cli/commands/trend.ts +36 -0
package/cli/commands/update.ts +178 -0
package/cli/commands/workspace.ts +219 -0
package/cli/index.ts +301 -1
package/cli/utils.ts +83 -0
package/core/dsl-feedback.ts +255 -0
package/core/prompt-hasher.ts +42 -0
package/core/run-logger.ts +21 -0
package/core/run-trend.ts +241 -0
package/core/self-evaluator.ts +276 -0
package/dist/cli/index.js +1089 -445
package/dist/cli/index.js.map +1 -1
package/dist/cli/index.mjs +1089 -445
package/dist/cli/index.mjs.map +1 -1
package/dist/index.js.map +1 -1
package/dist/index.mjs.map +1 -1
package/package.json +6 -3
package/purpose.md +189 -2
package/tests/dsl-extractor.test.ts +264 -0
package/tests/dsl-feedback.test.ts +266 -0
package/tests/dsl-validator.test.ts +283 -0
package/tests/error-feedback.test.ts +292 -0
package/tests/provider-utils.test.ts +173 -0
package/tests/run-trend.test.ts +186 -0
package/tests/self-evaluator.test.ts +339 -0
package/tests/spec-assessor.test.ts +142 -0
package/tests/task-generator.test.ts +230 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-spec-dev",
-  "version": "0.30.1",
+  "version": "0.33.0",
   "description": "AI-driven Development Orchestrator SDK & CLI",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
@@ -9,7 +9,9 @@
   },
   "scripts": {
     "build": "tsup",
-    "dev": "tsup --watch"
+    "dev": "tsup --watch",
+    "test": "vitest run",
+    "test:watch": "vitest"
   },
   "keywords": [
     "ai",
@@ -41,6 +43,7 @@
     "glob": "^13.0.6",
     "ts-node": "^10.9.2",
     "tsup": "^8.4.0",
-    "typescript": "^5.7.3"
+    "typescript": "^5.7.3",
+    "vitest": "^2.1.0"
   }
 }

package/purpose.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > 痛点 · 架构创新 · 边界处理 · DSL 的意义 · 当前局限 · 未来方向
 >
-> 当前版本：v0.30.0 · 最后更新：2026-03-29
+> 当前版本：v0.31.0 · 最后更新：2026-03-29
 ***
@@ -19,7 +19,7 @@
 7. [当前局限](#7-当前局限)
 8. [未来优化方向](#8-未来优化方向)
-> **版本记录速览**：v0.17.0 宪法截断修复 · v0.18.0 `learn` + `minSpecScore` + 行为契约提取 · v0.19.0 错误解析重写 + Auto Gate 修复 · v0.20.0 `mock --serve` 一键联调 · v0.21.0 store 公开 API 提取修复 · v0.22.0 service/api 层分离 · v0.23.0 view/route 双层 + 文件名幻觉修复 · v0.24.0 四项质量修复（export default、impliesRegistration、依赖拓扑排序、lesson 计数）· v0.25.0 HTTP import 正则、分页提取、isToolCrash 三项修复 · v0.26.0 多仓库 review 目录、batch 容错、tasks JSON 健壮性 · **v0.27.0 可靠性三件套**（Provider retry/timeout/分类、文件快照 + `restore`、RunId 结构化日志）· **v0.28.0 3-pass review**（Pass 3 影响面评估 + 代码复杂度评估）· **v0.29.0 全量审查修复**（RunLogger 完整插桩、update 快照/日志/knowledge、Score Trend 显示影响/复杂度等级、死代码清理）· **v0.30.0 错误修复依赖图排序 + 前端 Import 多行感知解析**
+> **版本记录速览**：v0.17.0 宪法截断修复 · v0.18.0 `learn` + `minSpecScore` + 行为契约提取 · v0.19.0 错误解析重写 + Auto Gate 修复 · v0.20.0 `mock --serve` 一键联调 · v0.21.0 store 公开 API 提取修复 · v0.22.0 service/api 层分离 · v0.23.0 view/route 双层 + 文件名幻觉修复 · v0.24.0 四项质量修复（export default、impliesRegistration、依赖拓扑排序、lesson 计数）· v0.25.0 HTTP import 正则、分页提取、isToolCrash 三项修复 · v0.26.0 多仓库 review 目录、batch 容错、tasks JSON 健壮性 · **v0.27.0 可靠性三件套**（Provider retry/timeout/分类、文件快照 + `restore`、RunId 结构化日志）· **v0.28.0 3-pass review**（Pass 3 影响面评估 + 代码复杂度评估）· **v0.29.0 全量审查修复**（RunLogger 完整插桩、update 快照/日志/knowledge、Score Trend 显示影响/复杂度等级、死代码清理）· **v0.30.0 错误修复依赖图排序 + 前端 Import 多行感知解析** · **v0.31.0 Harness Engineer：Prompt Hash + Create 内联 Self-Eval**
 ***
@@ -55,10 +55,107 @@ ai-spec 对每个痛点都有对应的架构设计，不是功能堆砌，而是
 **核心定位**：ai-spec 不是代码补全工具，而是一个「AI 辅助工程流程编排器」。它的目标是让工程师用最少的时间获得一个符合项目规范、通过基本质检、可直接进入 Review 的代码分支。
+### 1.3 整体架构鸟瞰
+> 在进入各模块细节之前，先建立一个全局视图。
+```mermaid
+flowchart TD
+    subgraph INPUT["输入层"]
+        IDEA["💬 需求描述\n自然语言"]
+        CONST["📜 项目宪法\n§1-§8 规则 + §9 教训"]
+        CTX["🗂️ 项目上下文\n代码结构 / 依赖 / 路由"]
+    end
+    subgraph CONTRACT["双层契约"]
+        SPEC["📄 Spec\nMarkdown 人类可读"]
+        DSL["📊 DSL\nJSON 机器可读"]
+    end
+    subgraph GATE["质量门控"]
+        SCORE_GATE["🎯 Spec 质量评分\nminSpecScore 阈值"]
+        APPROVAL["🧑‍💻 Approval Gate\n人工确认后才开始生成"]
+    end
+    subgraph GENERATE["生成层"]
+        CODEGEN["⚙️ Task 分层代码生成\ndata→service→api→view→route\n层内拓扑排序 + batch 并行"]
+        CACHE[("🗄️ File Cache\n行为契约 / 函数签名")]
+    end
+    subgraph VERIFY["验证层"]
+        ERRLOOP["🔄 错误反馈闭环\n≤2 cycle · 依赖图排序修复"]
+        REVIEW["🔬 3-pass 代码审查\n架构 + 实现 + 影响面"]
+    end
+    subgraph LEARN["学习层（闭环）"]
+        KNOW["📚 §9 知识积累\n审查 issue 自动写入"]
+        EVAL["📈 Harness Self-Eval\nharnessScore + promptHash"]
+    end
+    IDEA --> SPEC
+    CONST -->|"全文注入所有 prompt"| SPEC
+    CTX --> SPEC
+    SPEC --> SCORE_GATE
+    SCORE_GATE -->|"通过"| APPROVAL
+    SCORE_GATE -->|"不足"| STOP1(["🚫 中止"])
+    APPROVAL -->|"Proceed"| DSL
+    APPROVAL -->|"Abort"| STOP2(["🚫 退出，无残留"])
+    DSL --> CODEGEN
+    CODEGEN <-->|"读写"| CACHE
+    CODEGEN --> ERRLOOP
+    ERRLOOP --> REVIEW
+    REVIEW --> KNOW
+    KNOW -->|"更新宪法 §9"| CONST
+    REVIEW --> EVAL
+```
 ***
 ## 2. 核心架构设计
+### 2.0 `ai-spec create` 完整流水线
+> 以下流程图展示了运行一次 `ai-spec create` 时所有步骤的完整执行路径，包括每个决策门和反馈循环。后续 §2.1—§2.13 各节是对图中各模块的深度解析。
+```mermaid
+flowchart TD
+    START(["▶ ai-spec create &lt;idea&gt;"])
+    START --> S1["Step 1 · 加载项目上下文\nContextLoader 扫描代码结构 / 依赖 / 路由 / schema"]
+    S1 --> S2["Step 2 · Spec + Tasks 生成\n宪法全文注入 prompt 最高优先级"]
+    S2 --> S3["Step 3 · 交互式润色\nDiff 预览，可多轮修改"]
+    S3 --> S34["Step 3.4 · Spec 质量评估\n覆盖度 / 清晰度 / 宪法符合度打分"]
+    S34 --> G1{Score ≥ minSpecScore?}
+    G1 -->|"否"| ABORT1(["🚫 exit(1)\n--force 可强制继续"])
+    G1 -->|"是"| G2{"Approval Gate\n展示 Spec + DSL 摘要\n等待人工决策"}
+    G2 -->|"Abort"| ABORT2(["🚫 退出\nSpec 不写入磁盘"])
+    G2 -->|"Proceed"| DSL["DSL 提取 + 9 条规则校验\n失败最多重试 2 次"]
+    DSL --> TRACK["RunId 生成 + 文件快照初始化\nPrompt Hash 写入 RunLog"]
+    TRACK --> CG["Step 5–6 · Task 分层代码生成\ndata → infra → service → api → view → route → test\n层内拓扑排序 → batch 并行 → 缓存更新"]
+    CG --> TG["Step 7 · 测试骨架生成"]
+    TG --> EF["Step 8 · 错误反馈闭环"]
+    EF --> EF_RUN["运行 test / lint / tsc"]
+    EF_RUN --> EF_CHECK{全部通过?}
+    EF_CHECK -->|"通过"| RV
+    EF_CHECK -->|"有错误 · cycle ≤ 2"| EF_FIX["依赖图排序\nAI 逐文件修复\n携带 DSL 上下文"]
+    EF_FIX --> EF_RUN
+    EF_CHECK -->|"cycle 2 仍失败"| EF_WARN["⚠️ 黄色警告，继续"]
+    EF_WARN --> RV
+    RV["Step 9 · 3-pass 代码审查\nPass1 架构 · Pass2 实现 · Pass3 影响面/复杂度"]
+    RV --> KNOW["§9 知识积累\n审查 issue 自动追加宪法"]
+    KNOW --> SE["Step 10 · Harness Self-Eval\nDSL 覆盖 + Compile + Review → harnessScore\nPromptHash 关联，零 AI 调用"]
+    SE --> DONE(["✔ Done\nSpec / DSL / 代码 / RunLog 全部落盘"])
+```
+***
 ### 2.1 项目宪法：可进化的项目记忆
 绝大多数 AI 工具的「上下文注入」是静态的——你手动写一段 prompt，每次带进去。ai-spec 的宪法系统不同，它是一个会随项目迭代自动更新的活文档。
@@ -148,6 +245,43 @@ data → infra → service → api → view → route → test
 | route | — | 路由模块文件（`src/router/routes/`） |
 | test | 单测、集成测试 | 同左 |
+**执行模型图解：**
+```mermaid
+flowchart TB
+    subgraph LAYERS["七层顺序（跨层串行）"]
+        direction LR
+        LA["data"] --> LB["infra"] --> LC["service"] --> LD["api"] --> LE["view"] --> LF["route"] --> LG["test"]
+    end
+    subgraph WITHIN["单层内部执行（以 api 层为例）"]
+        direction TB
+        TOPO["拓扑排序\n按 dependencies 字段分 batch"]
+        subgraph B1["Batch 1 · 无依赖 → 并行"]
+            T1["userController.ts"]
+            T2["authController.ts"]
+        end
+        subgraph B2["Batch 2 · 依赖 Batch 1 → 并行"]
+            T3["adminController.ts\n需要 import userController"]
+        end
+        TOPO --> B1
+        B1 -->|"batch 完成\n更新 FileCache"| B2
+    end
+    CACHE[("generatedFileCache\n函数签名 / 文件路径\nbehavioral contracts")]
+    B1 <-->|"读写"| CACHE
+    B2 <-->|"读写"| CACHE
+    LAYERS -->|"进入当层"| WITHIN
+    WITHIN -->|"层完成\n更新共享 config 文件\n(routes/index.ts 等)"| NEXT(["下一层"])
+```
+> **为什么 route 必须在 view 之后？** view 完成后，`TaskManagement.vue` 的真实路径进入 FileCache；route task 生成时 prompt 里已经有 `// exists: src/views/task-management/TaskManagement.vue`，AI 不会再猜测 `index.vue`（v0.23.0 文件名幻觉修复）。
 **前端四层链路设计（v0.22.0+）**：`service`（api 调用函数）→ `api`（store，导入 service 函数）→ `view`（页面，使用 store action）→ `route`（路由模块，导入 view 组件）。`route` 必须在 `view` 之后生成，因为路由文件需要知道 view 组件的确切文件名（如 `TaskManagement.vue` 而非猜测的 `index.vue`）——这是 v0.23.0 修复文件名幻觉的核心机制。
 每个 task 完成后立即写入 `status: done` 到 `tasks.json`，`--resume` 标志让流水线跳过已完成 task，中断恢复精确到 task 粒度。`tasks.json` 文件损坏时（意外截断等）能检测并优雅降级，提示重新生成（v0.26.0）。
@@ -415,6 +549,59 @@ Pass 3 不重复 Pass 1 / Pass 2 的发现，而是站在更高的系统视角
 ***
+### 2.13 Harness Engineer：Prompt Hash + Self-Eval（v0.31.0+）
+#### 背景：缺失的自我评估能力
+v0.30.0 之前，ai-spec 是一个能自动生成代码的 Harness，但它是一个**没有自我量化能力的 Harness**。你可以感知到某次生成质量好坏，但无法回答：
+- 修改了 codegen prompt 之后，整体质量是提升还是下降了？
+- 哪个 provider / model 在这个项目上生成质量最稳定？
+- 加严宪法 §9 之后，compile 通过率有没有提高？
+这正是 **Harness Engineer** 理念的核心缺口：工程师不只是构建 AI 生成系统，还必须能**量化证明这个系统在变好**。
+#### Prompt Hash（`core/prompt-hasher.ts`）
+每次 `ai-spec create` 启动时，对 6 个核心 prompt 字符串（codegen、DSL extractor、spec generator、review 三 pass）计算 SHA-256 并取前 8 位（如 `a3f2c1d8`），写入 RunLog 根级字段 `promptHash`。
+```json
+{
+  "runId": "20260329-143022-a7f2",
+  "promptHash": "a3f2c1d8",
+  "harnessScore": 7.8,
+  ...
+}
+```
+任何 prompt 文件的改动都会产生不同的 hash。跨多个 RunLog 对比 `harnessScore` 时，先按 `promptHash` 分组，就能把「prompt 版本差异」从「模型随机性」中解耦，知道分数变化是因为 prompt 改了还是 LLM 本身的波动。
+#### Create 内联 Self-Eval（`core/self-evaluator.ts`）
+`ai-spec create` 在 Step 9（code review）之后新增 **Step 10: Harness Self-Eval**，零 AI 调用，纯确定性评分：
+| 维度 | 评分逻辑 | 权重 |
+|------|---------|------|
+| **DSL Coverage** (0-10) | 检查生成文件是否覆盖了 DSL 声明的 endpoint 层和 model 层 | 40% |
+| **Compile Score** (0-10) | error feedback 全部通过 → 10；未通过 / 跳过 → 5 | 30% |
+| **Review Score** (0-10) | 从 3-pass review 文本提取 `Score: X/10` | 30% |
+当 review 被跳过（`--skip-review`）时，权重自动调整为 DSL 55% + Compile 45%。
+**输出示例：**
+```
+─── Harness Self-Eval ───────────────────────────
+  Score  : [████████░░] 7.8/10
+  DSL    : 8/10  Compile: pass  Review: 7.2/10
+  Prompt : a3f2c1d8
+─────────────────────────────────────────────────
+```
+`harnessScore` 和 `promptHash` 同时写入 RunLog，为日后实现跨运行趋势分析奠定数据基础。
+***
 ## 3. DSL 层的意义
 DSL 是整个系统中设计投入最大的模块，也是最容易被误解为「多此一举」的部分。

package/tests/dsl-extractor.test.ts ADDED Viewed

@@ -0,0 +1,264 @@
+import { describe, it, expect, vi } from "vitest";
+import {
+  dslFilePath,
+  buildDslContextSection,
+  DslExtractor,
+} from "../core/dsl-extractor";
+import type { SpecDSL } from "../core/dsl-types";
+import type { AIProvider } from "../core/spec-generator";
+// ─── Fixtures ─────────────────────────────────────────────────────────────────
+const VALID_DSL: SpecDSL = {
+  version: "1.0",
+  feature: {
+    id: "user-login",
+    title: "User Login",
+    description: "Authenticate users with email and password",
+  },
+  models: [
+    {
+      name: "User",
+      fields: [
+        { name: "id", type: "String", required: true },
+        { name: "email", type: "String", required: true, unique: true },
+      ],
+      relations: ["has many Session"],
+    },
+  ],
+  endpoints: [
+    {
+      id: "EP-001",
+      method: "POST",
+      path: "/api/auth/login",
+      description: "Authenticate and return JWT",
+      auth: false,
+      successStatus: 200,
+      successDescription: "JWT token",
+      request: { body: { email: "string", password: "string" } },
+      errors: [
+        { status: 401, code: "INVALID_CREDENTIALS", description: "Bad password" },
+      ],
+    },
+  ],
+  behaviors: [
+    {
+      id: "BHV-001",
+      description: "Rate-limit login to 5 attempts per minute",
+      trigger: "POST /api/auth/login",
+      constraints: ["block after 5 failures"],
+    },
+  ],
+};
+function makeProvider(response: string): AIProvider {
+  return { generate: vi.fn().mockResolvedValue(response) };
+}
+// ─── dslFilePath ──────────────────────────────────────────────────────────────
+describe("dslFilePath", () => {
+  it("replaces .md extension with .dsl.json", () => {
+    expect(dslFilePath("/specs/feature-login-v1.md")).toBe("/specs/feature-login-v1.dsl.json");
+  });
+  it("works with relative paths", () => {
+    expect(dslFilePath("specs/my-feature-v2.md")).toBe("specs/my-feature-v2.dsl.json");
+  });
+  it("handles files in the current directory", () => {
+    expect(dslFilePath("feature.md")).toBe("feature.dsl.json");
+  });
+  it("preserves directory structure", () => {
+    const result = dslFilePath("/a/b/c/feature-v3.md");
+    expect(result).toContain("/a/b/c/");
+    expect(result.endsWith(".dsl.json")).toBe(true);
+  });
+});
+// ─── buildDslContextSection ───────────────────────────────────────────────────
+describe("buildDslContextSection", () => {
+  it("includes the section header and footer", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("=== Feature DSL");
+    expect(result).toContain("=== End of DSL ===");
+  });
+  it("lists model names and fields", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("User:");
+    expect(result).toContain("email: String");
+  });
+  it("marks required fields", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("required");
+  });
+  it("marks unique fields", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("unique");
+  });
+  it("includes model relations", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("has many Session");
+  });
+  it("includes endpoint method, path, and auth", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("POST");
+    expect(result).toContain("/api/auth/login");
+    expect(result).toContain("auth: false");
+  });
+  it("includes endpoint error codes", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("INVALID_CREDENTIALS");
+  });
+  it("includes request body fields", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("email");
+    expect(result).toContain("password");
+  });
+  it("includes behaviors with trigger and constraints", () => {
+    const result = buildDslContextSection(VALID_DSL);
+    expect(result).toContain("Rate-limit login");
+    expect(result).toContain("POST /api/auth/login");
+    expect(result).toContain("block after 5 failures");
+  });
+  it("handles empty models array gracefully", () => {
+    const dsl: SpecDSL = { ...VALID_DSL, models: [] };
+    const result = buildDslContextSection(dsl);
+    expect(result).not.toContain("-- Data Models --");
+  });
+  it("handles empty endpoints array gracefully", () => {
+    const dsl: SpecDSL = { ...VALID_DSL, endpoints: [] };
+    const result = buildDslContextSection(dsl);
+    expect(result).not.toContain("-- API Endpoints --");
+  });
+  it("handles empty behaviors array gracefully", () => {
+    const dsl: SpecDSL = { ...VALID_DSL, behaviors: [] };
+    const result = buildDslContextSection(dsl);
+    expect(result).not.toContain("-- Business Behaviors --");
+  });
+  it("includes UI components section when components are present", () => {
+    const dsl: SpecDSL = {
+      ...VALID_DSL,
+      components: [
+        {
+          id: "CMP-001",
+          name: "LoginForm",
+          description: "Login form component",
+          props: [{ name: "onSuccess", type: "() => void", required: true }],
+          events: [{ name: "onSubmit", payload: "FormData" }],
+          state: { isLoading: "boolean" },
+          apiCalls: ["EP-001"],
+        },
+      ],
+    };
+    const result = buildDslContextSection(dsl);
+    expect(result).toContain("-- UI Components --");
+    expect(result).toContain("LoginForm");
+    expect(result).toContain("onSuccess");
+    expect(result).toContain("onSubmit");
+    expect(result).toContain("isLoading:boolean");
+    expect(result).toContain("EP-001");
+  });
+});
+// ─── DslExtractor.extract — success path ─────────────────────────────────────
+describe("DslExtractor.extract — success", () => {
+  it("returns a valid SpecDSL when AI output is bare JSON", async () => {
+    const provider = makeProvider(JSON.stringify(VALID_DSL));
+    const extractor = new DslExtractor(provider);
+    const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const result = await extractor.extract("spec content", { auto: true });
+    consoleSpy.mockRestore();
+    expect(result).not.toBeNull();
+    expect(result?.feature.id).toBe("user-login");
+  });
+  it("returns a valid SpecDSL when AI wraps output in a JSON fence", async () => {
+    const fenced = "```json\n" + JSON.stringify(VALID_DSL) + "\n```";
+    const provider = makeProvider(fenced);
+    const extractor = new DslExtractor(provider);
+    const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const result = await extractor.extract("spec content", { auto: true });
+    consoleSpy.mockRestore();
+    expect(result?.feature.title).toBe("User Login");
+  });
+  it("sanitizes empty error entries before validation", async () => {
+    const dslWithEmptyErrors = {
+      ...VALID_DSL,
+      endpoints: [
+        {
+          ...VALID_DSL.endpoints[0],
+          errors: [
+            { status: 400, code: "", description: "" }, // invalid — should be stripped
+            { status: 401, code: "INVALID_CREDENTIALS", description: "Bad password" },
+          ],
+        },
+      ],
+    };
+    const provider = makeProvider(JSON.stringify(dslWithEmptyErrors));
+    const extractor = new DslExtractor(provider);
+    const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const result = await extractor.extract("spec content", { auto: true });
+    consoleSpy.mockRestore();
+    expect(result).not.toBeNull();
+    // The empty error entry should have been stripped
+    expect(result?.endpoints[0].errors).toHaveLength(1);
+    expect(result?.endpoints[0].errors?.[0].code).toBe("INVALID_CREDENTIALS");
+  });
+});
+// ─── DslExtractor.extract — failure paths ────────────────────────────────────
+describe("DslExtractor.extract — failure / auto mode", () => {
+  it("returns null in auto mode when AI returns invalid JSON", async () => {
+    const provider = makeProvider("Not JSON at all");
+    const extractor = new DslExtractor(provider);
+    const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const result = await extractor.extract("spec", { auto: true });
+    consoleSpy.mockRestore();
+    expect(result).toBeNull();
+  });
+  it("returns null in auto mode when provider throws", async () => {
+    const provider: AIProvider = { generate: vi.fn().mockRejectedValue(new Error("network")) };
+    const extractor = new DslExtractor(provider);
+    const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const result = await extractor.extract("spec", { auto: true });
+    consoleSpy.mockRestore();
+    expect(result).toBeNull();
+  });
+  it("retries when first attempt produces invalid DSL (missing required field)", async () => {
+    // First response: invalid DSL (missing feature.description)
+    const badDsl = { ...VALID_DSL, feature: { id: "x", title: "X", description: "" } };
+    // Second response: valid DSL
+    const provider: AIProvider = {
+      generate: vi.fn()
+        .mockResolvedValueOnce(JSON.stringify(badDsl))
+        .mockResolvedValueOnce(JSON.stringify(VALID_DSL)),
+    };
+    const extractor = new DslExtractor(provider);
+    const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const result = await extractor.extract("spec", { auto: true });
+    consoleSpy.mockRestore();
+    // Should have retried — provider called at least twice
+    expect((provider.generate as ReturnType<typeof vi.fn>).mock.calls.length).toBeGreaterThanOrEqual(2);
+    expect(result?.feature.id).toBe("user-login");
+  });
+});