npm - @sun-asterisk/sungen - Versions diffs - 3.2.1-beta.1 → 3.2.2-beta.2 - Mend

@sun-asterisk/sungen 3.2.1-beta.1 → 3.2.2-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/src/exporters/xlsx-exporter.ts CHANGED Viewed

@@ -13,10 +13,15 @@ import * as fs from 'fs';
 import * as path from 'path';
 import ExcelJS from 'exceljs';
 import JSZip from 'jszip';
-import { ScreenSummary, TestCaseRow } from './types';
+import { ApiCatalogEntry, ApiDetailRow, ScreenSummary, TestCaseRow } from './types';
 import { getPackageVersion } from './package-info';
 import { SUN_LOGO_PNG_BASE64 } from './sun-logo';
 import { deliverableBasename } from './csv-exporter';
+import {
+  extractApiCallOrder,
+  extractCasesDataset,
+  extractConcurrencyInvariant,
+} from './feature-parser';
 const COL_COUNT = 16;
 const HEADER_FILL = 'FFD9D2E9'; // lavender — matches the summary-header band on row 6
@@ -37,15 +42,31 @@ function applyBorder(cell: AnyCell): void {
   };
 }
+/**
+ * Optional context for the supplementary "API detail" worksheet.
+ * Passed only when the unit is kind:api. When omitted, only the standard
+ * Testcases sheet is emitted (non-api delivery stays byte-identical).
+ */
+export interface ApiDetailContext {
+  /** Parsed apis.yaml catalog keyed by endpoint name */
+  catalog: Record<string, ApiCatalogEntry>;
+  /** Pre-built detail rows (one per catalog endpoint) */
+  rows: ApiDetailRow[];
+}
 export function renderXlsx(
   summary: ScreenSummary,
   rows: TestCaseRow[],
-  specLink: string
+  specLink: string,
+  apiDetail?: ApiDetailContext,
 ): ExcelJS.Workbook {
   const wb = new ExcelJS.Workbook();
   wb.creator = 'sungen delivery';
   wb.created = new Date();
   addTestcaseSheet(wb, 'Testcases', summary, rows, specLink);
+  if (apiDetail) {
+    addApiDetailSheet(wb, apiDetail.rows);
+  }
   return wb;
 }
@@ -410,6 +431,159 @@ function addTestcaseSheet(
   };
 }
+// ---------------------------------------------------------------------------
+// API detail sheet (api-kind units only)
+// Second worksheet appended after Testcases — never alters the Testcases sheet.
+// ---------------------------------------------------------------------------
+const API_DETAIL_HEADER_FILL = 'FF2E5984'; // dark blue header for differentiation
+const API_DETAIL_HEADER_FONT = 'FFFFFFFF'; // white text on dark header
+/**
+ * Build ApiDetailRow[] from the apis.yaml catalog + feature-level annotations.
+ * Called once per feature file for api-kind units in the delivery pipeline.
+ *
+ * @param catalog   Parsed apis.yaml keyed by endpoint name
+ * @param scenarios Scenario-level tag arrays from parseFeatureMetadata().scenarios
+ */
+export function buildApiDetailRows(
+  catalog: Record<string, ApiCatalogEntry>,
+  scenarios: Array<{ tags: string[] }>,
+): ApiDetailRow[] {
+  const rows: ApiDetailRow[] = [];
+  for (const [endpointName, entry] of Object.entries(catalog)) {
+    const method = (entry.method ?? '').toUpperCase();
+    const endpoint = entry.path ?? endpointName;
+    const datasource = entry.datasource ?? '';
+    // Auth: look for @auth: tag in any scenario that calls this endpoint.
+    const authTags = scenarios.flatMap((s) => {
+      const calls = extractApiCallOrder(s.tags);
+      if (!calls.includes(endpointName)) return [];
+      return s.tags.filter((t) => t.startsWith('@auth:'));
+    });
+    const uniqueAuth = [...new Set(authTags.map((t) => t.slice('@auth:'.length)))];
+    const authDatasource = [datasource, ...uniqueAuth].filter(Boolean).join('; ');
+    // Request shape: compose from body + params + encoding.
+    const bodyStr = entry.body
+      ? `body: ${typeof entry.body === 'string' ? entry.body : JSON.stringify(entry.body)}`
+      : '';
+    const paramsArr = Array.isArray(entry.params) ? entry.params as string[] : [];
+    const paramsStr = paramsArr.length > 0 ? `params: [${paramsArr.join(', ')}]` : '';
+    const requestShape = [bodyStr, paramsStr].filter(Boolean).join('; ') || '—';
+    // Expected-status matrix: aggregate @cases dataset labels + expected status
+    // from scenarios that call this endpoint. Fall back to catalog expect.status.
+    const statusEntries: string[] = [];
+    for (const sc of scenarios) {
+      const calls = extractApiCallOrder(sc.tags);
+      if (!calls.includes(endpointName)) continue;
+      const dataset = extractCasesDataset(sc.tags);
+      if (dataset) {
+        // @cases dataset name as label — actual per-row statuses live in test-data.yaml
+        statusEntries.push(`@cases:${dataset}`);
+      }
+    }
+    // Show the catalog baseline status plus a pointer to any @cases matrix dataset (the per-row
+    // statuses live in test-data; resolving them into this cell is a later enrichment).
+    const catalogStatus = entry.expect?.status != null ? String(entry.expect.status) : '';
+    const expectedStatusMatrix =
+      [catalogStatus, ...new Set(statusEntries)].filter(Boolean).join('; ') || '—';
+    // Flow steps: ordered @api names from flow-tagged scenarios referencing this endpoint.
+    const flowStepsSet = new Set<string>();
+    for (const sc of scenarios) {
+      const calls = extractApiCallOrder(sc.tags);
+      if (!calls.includes(endpointName)) continue;
+      // All scenarios show their call order; flow scenarios show multi-step chains.
+      if (calls.length > 1) {
+        flowStepsSet.add(calls.join(' → '));
+      }
+    }
+    const flowSteps = [...flowStepsSet].join('; ') || '—';
+    // Concurrency invariant: from @concurrent scenarios calling this endpoint.
+    const concurrencyParts: string[] = [];
+    for (const sc of scenarios) {
+      const calls = extractApiCallOrder(sc.tags);
+      if (!calls.includes(endpointName)) continue;
+      const inv = extractConcurrencyInvariant(sc.tags);
+      if (inv) concurrencyParts.push(inv);
+    }
+    const concurrencyInvariant = concurrencyParts.join('; ') || '—';
+    rows.push({
+      endpoint,
+      method,
+      authDatasource,
+      requestShape,
+      expectedStatusMatrix,
+      flowSteps,
+      concurrencyInvariant,
+    });
+  }
+  return rows;
+}
+/**
+ * Append a second "API detail" worksheet to the workbook.
+ * Called only for api-kind units; no effect on the Testcases sheet or other sheets.
+ *
+ * Columns: Endpoint · Method · Auth/Datasource · Request shape ·
+ *          Expected-status matrix · Flow steps · Concurrency invariant
+ */
+export function addApiDetailSheet(wb: ExcelJS.Workbook, detailRows: ApiDetailRow[]): void {
+  const ws = wb.addWorksheet('API detail');
+  const ARIAL_FONT = 'Arial';
+  ws.columns = [
+    { header: 'Endpoint',                width: 35 },
+    { header: 'Method',                  width: 10 },
+    { header: 'Auth / Datasource',       width: 22 },
+    { header: 'Request shape',           width: 40 },
+    { header: 'Expected-status matrix',  width: 30 },
+    { header: 'Flow steps',              width: 40 },
+    { header: 'Concurrency invariant',   width: 35 },
+  ];
+  // Style the auto-generated header row (row 1).
+  const headerRow = ws.getRow(1);
+  headerRow.height = 30;
+  headerRow.eachCell((cell) => {
+    cell.font = { bold: true, color: { argb: API_DETAIL_HEADER_FONT }, name: ARIAL_FONT };
+    cell.fill = { type: 'pattern', pattern: 'solid', fgColor: { argb: API_DETAIL_HEADER_FILL } };
+    cell.alignment = { horizontal: 'center', vertical: 'middle', wrapText: true };
+    applyBorder(cell);
+  });
+  ws.views = [{ state: 'frozen', ySplit: 1 }];
+  for (const r of detailRows) {
+    const row = ws.addRow([
+      r.endpoint,
+      r.method,
+      r.authDatasource,
+      r.requestShape,
+      r.expectedStatusMatrix,
+      r.flowSteps,
+      r.concurrencyInvariant,
+    ]);
+    row.alignment = { vertical: 'top', wrapText: true };
+    row.eachCell({ includeEmpty: true }, (cell) => {
+      applyBorder(cell);
+      cell.font = { name: ARIAL_FONT };
+    });
+  }
+  ws.autoFilter = {
+    from: { row: 1, column: 1 },
+    to: { row: ws.rowCount, column: 7 },
+  };
+}
 /**
  * Write the workbook to `qa/deliverables/<screen>-testcases[.env].xlsx`.
  * When `SUNGEN_ENV` is set, the env name is appended so locale exports don't

package/src/harness/depth-lint.ts ADDED Viewed

@@ -0,0 +1,122 @@
+/**
+ * Depth lint (issue #384) — a deterministic, generation-time depth self-check.
+ *
+ * The audit's `assertionDepth` sensor decides WHICH business-critical scenarios are shallow
+ * (the authoritative set). This lint adds the missing half: for each shallow business-critical
+ * scenario it classifies the *fix* using the catalog's per-theme `depth` metadata —
+ *   • cross_screen:false  → DEEPEN in place (emit the theme's `depth.template` value assertion)
+ *   • cross_screen:true   → DEFER (flow-own, or @manual:Mx with a reason) — leaves the depth denominator
+ * so a generator (or the create-test repair step) can act mechanically BEFORE the first audit,
+ * instead of churning the 3-round repair budget on scenarios that can't be deepened on-screen.
+ *
+ * Reuses the audit plumbing verbatim (parse + catalog + assertionDepth) → same verdict as `sungen audit`.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { loadScenarios, parseViewpointOverview, ScenarioInfo, ViewpointEntry } from './parse';
+import { loadCatalog, viewpointGate, assertionDepth, dataThemesFor, CatalogTheme } from './sensors';
+export type DepthAction = 'deepen' | 'defer';
+export interface DepthLintItem {
+  scenario: string;
+  theme: string | null;
+  action: DepthAction;
+  /** the exact deep step to emit (deepen) or the routing hint (defer) */
+  fix: string;
+}
+export interface DepthLintReport {
+  screen: string;
+  pageType: string | null;
+  focus: string;
+  threshold: number;
+  bcDepthRatio: number;
+  verdict: 'pass' | 'warn' | 'fail';
+  businessCriticalTotal: number;
+  shallowTotal: number;
+  /** shallow business-critical scenarios that CAN be deepened on-screen (actionable now) */
+  deepen: DepthLintItem[];
+  /** shallow business-critical scenarios that are cross-screen → route to a flow / @manual */
+  defer: DepthLintItem[];
+}
+/** Find the data-theme a scenario belongs to (precise depth.keywords, fallback theme.keywords). */
+function matchTheme(s: ScenarioInfo, dataThemes: CatalogTheme[]): CatalogTheme | undefined {
+  return dataThemes.find((t) => {
+    const kws = t.depth?.keywords?.length ? t.depth.keywords : t.keywords;
+    return kws.some((k) => s.haystack.includes(k.toLowerCase()));
+  });
+}
+export function runDepthLint(screenDir: string, screenName: string, focus = 'functional'): DepthLintReport {
+  const last = screenName.split('/').pop() || screenName;
+  const featurePath = path.join(screenDir, 'features', `${last}.feature`);
+  const viewpointPath = path.join(screenDir, 'requirements', 'test-viewpoint.md');
+  const scenarios: ScenarioInfo[] = loadScenarios(featurePath);
+  const viewpoints: ViewpointEntry[] = parseViewpointOverview(viewpointPath);
+  const catalog = loadCatalog();
+  const gate = viewpointGate(scenarios, viewpoints, catalog);
+  const dataThemes = dataThemesFor(catalog, gate.pageType);
+  const depth = assertionDepth(scenarios, dataThemes, focus);
+  const byName = new Map(scenarios.map((s) => [s.name, s]));
+  const deepen: DepthLintItem[] = [];
+  const defer: DepthLintItem[] = [];
+  for (const sb of depth.shallowBusinessCritical) {
+    const s = byName.get(sb.name);
+    const theme = s ? matchTheme(s, dataThemes) : undefined;
+    const crossScreen = theme?.depth?.cross_screen ?? false;
+    if (crossScreen) {
+      defer.push({
+        scenario: sb.name,
+        theme: theme?.theme ?? null,
+        action: 'defer',
+        fix: `cross-screen — own it in a flow (sungen add-flow) or tag @manual:Mx with a reason; do not fake an on-screen data assertion`,
+      });
+    } else {
+      deepen.push({
+        scenario: sb.name,
+        theme: theme?.theme ?? null,
+        action: 'deepen',
+        fix: theme?.depth?.template ?? `add a data assertion (\`... with {{value}}\` or \`see all ... contain {{v}}\`)`,
+      });
+    }
+  }
+  return {
+    screen: screenName,
+    pageType: gate.pageType,
+    focus,
+    threshold: depth.threshold,
+    bcDepthRatio: depth.bcDepthRatio,
+    verdict: depth.verdict,
+    businessCriticalTotal: depth.businessCriticalTotal,
+    shallowTotal: depth.businessCriticalShallow,
+    deepen,
+    defer,
+  };
+}
+export function renderDepthLint(r: DepthLintReport): void {
+  const pct = (n: number) => `${Math.round(n * 100)}%`;
+  console.log('');
+  console.log(`━━━ Depth lint: ${r.screen} (page-type ${r.pageType ?? 'unknown'}) ━━━`);
+  console.log('');
+  console.log(`  businessDepth ${pct(r.bcDepthRatio)} (threshold ${pct(r.threshold)} · focus ${r.focus}) → ${r.verdict.toUpperCase()}`);
+  console.log(`  ${r.businessCriticalTotal} business-critical · ${r.shallowTotal} shallow → ${r.deepen.length} deepen-in-place · ${r.defer.length} cross-screen`);
+  if (r.deepen.length) {
+    console.log('');
+    console.log('  ── DEEPEN IN PLACE (fix before audit) ──');
+    for (const d of r.deepen) console.log(`   • ${d.scenario}\n       [${d.theme}] → ${d.fix}`);
+  }
+  if (r.defer.length) {
+    console.log('');
+    console.log('  ── CROSS-SCREEN (route to flow / @manual:Mx) ──');
+    for (const d of r.defer) console.log(`   • ${d.scenario}  [${d.theme}]`);
+  }
+  if (!r.deepen.length && !r.defer.length) console.log('  ✓ no shallow business-critical scenarios');
+  console.log('');
+}

package/src/harness/parse.ts CHANGED Viewed

@@ -106,7 +106,10 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
   const deferredToFlow = tags.includes('@deferred:flow');
   const ownedByFlow = (tags.find((t: string) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
   // @deferred:flow is owned by a flow → not automated on this screen, so it accounts like @manual (H6).
-  const manual = tags.includes('@manual') || deferredToFlow;
+  // Recognize both bare `@manual` and the reason-coded `@manual:Mx` convention (what the generator emits);
+  // must match capability-plan.ts's detection, or `@manual:Mx` scenarios stay in the businessDepth
+  // denominator and silently suppress the ratio (#386).
+  const manual = tags.some((t) => /^@manual\b/i.test(t)) || deferredToFlow;
   const casesTag = tags.find((t) => t.startsWith('@cases:'));
   const casesDataset = casesTag ? casesTag.slice('@cases:'.length).trim() : undefined;
   // Named-query references: @query:<name>[(overrides)] tags + inline `query [name]` step refs.

package/src/orchestrator/ai-rules-updater.ts CHANGED Viewed

@@ -74,6 +74,7 @@ export const AI_RULES_FILE_MAPPING: [string, string][] = [
   ['claude-agent-reviewer.md', '.claude/agents/sungen-reviewer.md'],
   ['claude-agent-discovery.md', '.claude/agents/sungen-discovery.md'],
   ['claude-agent-challenge.md', '.claude/agents/sungen-challenge.md'],
+  ['claude-agent-generator.md', '.claude/agents/sungen-generator.md'],
   // Skills — GitHub Copilot
   ['github-skill-sungen-gherkin-syntax.md', '.github/skills/sungen-gherkin-syntax/SKILL.md'],

package/src/orchestrator/templates/ai-instructions/claude-agent-generator.md ADDED Viewed

@@ -0,0 +1,44 @@
+---
+name: sungen-generator
+description: Generates Gherkin scenarios for ONE shard (a viewpoint theme or a spec section) in an isolated context and writes a self-contained fragment — so create-test can fan out many generators in parallel and the orchestrator stays lean. Each shard owns a disjoint VP-prefix namespace, so fragments merge without renumbering. Invoked by create-test/design during parallel generation.
+tools: Read, Grep, Glob, Bash, Write, Edit, Skill
+---
+You are a **single-shard test-case generator**. You run in an **isolated context** and produce the scenarios for **exactly one shard** — never the whole screen. The orchestrator runs several of you in parallel, then merges the fragments. Keeping each fragment small is also what keeps every generator under the output-token cap.
+## What a shard is
+A shard is one **coverage unit**, sized for real parallelism (not the 5 coarse viewpoint-router groups — a screen loads only 1–2 of those). It is **one of**:
+- a **viewpoint theme** — a `VP-` prefix from the viewpoint overview (e.g. `VP-SEC`, `VP-ERROR-EMPTY-STATE`, `VP-CAROUSEL`), or
+- a **spec section** — one `spec.md` section per the `sungen-tc-generation` Mapping Contract (Table 1).
+Your shard owns its `VP-` prefix, so your ids never collide with sibling shards.
+## Inputs (passed by the orchestrator)
+- **Your shard**: the theme/section name + its viewpoint items (the slice).
+- **The `sungen-discovery` report** (Step 3): condensed facts — use it instead of re-reading every source.
+- **Relevant context**: only the `spec.md` section(s) your shard maps to, and **which** `sungen-viewpoint` group file holds your shard's patterns (load only that one).
+- **Unit context**: screen vs flow, the unit name, the chosen tier (1 / 2 / 3 / full), and your fragment paths.
+## Generate (your shard ONLY)
+1. Load **only** the skills you need: `sungen-tc-generation` (output format + mapping), `sungen-gherkin-syntax` (step patterns), and the **one** `sungen-viewpoint` group file your shard belongs to. Do not load the others.
+2. Produce the scenarios for your shard's viewpoint items at the requested tier, following the skill's mapping contract. Keep every `VP-` id under **your shard's prefix** so it stays in a disjoint namespace.
+3. **Flows**: use `[Screen:Element]` namespace refs, namespace test-data by phase, add the `@flow` tag per the skill.
+4. Tag `@manual:Mx` (with a reason) only for true judgment / missing-capability items, per the skill.
+## Write your fragment (do NOT write the final feature)
+Write two self-contained fragment files (the orchestrator merges them):
+- `.sungen/fragments/<unit>/<shard>.feature` — a **headerless** block: just your `@tag`-decorated `Scenario:` / `Scenario Outline:` blocks, no `Feature:` line (the orchestrator owns the single Feature header).
+- `.sungen/fragments/<unit>/<shard>.test-data.yaml` — only the `{{variables}}` your scenarios introduce.
+Distinct paths per shard ⇒ no write conflict with sibling generators.
+## Return (compact — your only message back)
+```
+SHARD: <theme-or-section>
+SCENARIOS: <n>  (VP ids: <VP-...-001..NNN>)
+TEST-DATA KEYS: <keys you added>
+SPEC SECTIONS COVERED: <list>
+ASSUMPTIONS / DEFERRED: <items you marked @manual or could not source>
+FRAGMENT: .sungen/fragments/<unit>/<shard>.feature
+```
+Keep it tight. Do not audit, do not merge, do not touch other shards' fragments or the final `.feature`.

package/src/orchestrator/templates/ai-instructions/claude-cmd-create-test.md CHANGED Viewed

@@ -71,12 +71,34 @@ If the unit is **api-first** (`qa/api/<name>/` or `qa/api/flows/<name>/`), the d
    Summarize what you found in requirements and present to the user.
 4. Follow the `sungen-tc-generation` skill for section identification, viewpoint generation, and output format. **Viewpoint loading discipline:** `sungen-viewpoint` is a **router** — from the page-type (form / list / detail / auth / dashboard …) read **only the matching group file(s)** (e.g. a login screen → group-e-identity; a product list → group-c-data-explore), never all five groups. This keeps the generation context lean. **For flows**, use the "Flow Test Generation" section in the skill. When requirements exist, use the "Requirements-Driven Generation" strategy. **For Tier 1**, apply the **Lightweight Guard** — verify required fields, validation rules, business rules, security checks, and key state transitions all have TCs after generation. **For Tier 2+**, **MUST** apply the full **Mapping Contract** — walk every `spec.md` section top-to-bottom and produce the indicated TCs per Table 1; handle `test-viewpoint.md` per Table 2. Do not silently skip sections.
-5. Generate or update `.feature` + `test-data.yaml` following `sungen-gherkin-syntax` and `sungen-tc-generation` skills. **For flows**: use `[Screen:Element]` namespace format, namespace test-data by phase, add `@flow` tag.
+5. Generate `.feature` + `test-data.yaml` following `sungen-gherkin-syntax` and `sungen-tc-generation`. **Partition the work into shards and generate them in parallel** when there are ≥2.
+   **5a. Decide the shards.** A shard is one **coverage unit** sized for parallelism — NOT the 5 coarse viewpoint-router groups (a screen loads only 1–2 of those). Use **either**:
+   - one **viewpoint theme** per shard — a `VP-` prefix from the viewpoint overview (`VP-SEC`, `VP-ERROR-EMPTY-STATE`, `VP-CAROUSEL`, …) — preferred when the viewpoint overview is rich (test-2/home had 47 items across many themes); **or**
+   - one **`spec.md` section** per shard (the Mapping Contract walk, Table 1) — preferred when generating from spec.
+   Each shard owns a disjoint `VP-` prefix ⇒ ids never collide. One shard → skip to 5c (no fan-out gain).
+   **5b. Parallel fan-out (Claude Code).** Spawn one **`sungen-generator`** sub-agent **per shard** (Task tool, `subagent_type: sungen-generator`) — issue all the Task calls **in a single message** so they run concurrently. Pass each: its shard (theme/section) + viewpoint slice, the **`sungen-discovery` report** (Step 3), only the `spec.md` section(s) it maps to, which one `sungen-viewpoint` group file holds its patterns, the unit (screen/flow) + name + tier, and its fragment paths `.sungen/fragments/<name>/<shard>.{feature,test-data.yaml}`. Each writes a **headerless** fragment + a test-data fragment and returns a compact summary. Small fragments also keep every generator under the output-token cap (the reason the single-pass path writes incrementally).
+   **5c. Merge (orchestrator — barrier; only after all generators return).**
+   - Write the final `qa/<screens|flows>/<name>/features/<name>.feature`: one `Feature:` header (+ `@flow` for flows), then concatenate the fragments in **stable order** — spec-section order top-to-bottom (or theme order from the viewpoint overview) — so output is coherent and reproducible across runs.
+   - **Dedup** cross-shard scenarios with near-identical titles (a generic "navigation works" from two shards): keep the earlier shard's, drop the duplicate, note it. No id renumber needed — prefixes are disjoint by construction.
+   - **Union** the test-data fragments into `test-data.yaml`; dedup keys, and **flag** any key two shards define with different values.
+   - Delete `.sungen/fragments/<name>/` once merged.
+   - Guarantees a **coherent** suite (no dup, valid ids, passes `audit`), not a byte-identical one — generation is AI-authored; the determinism asset lives downstream in the Gherkin→`.spec.ts` compiler.
+   **5d. Sequential fallback.** Use the single-context incremental path (Step 2: tier-by-tier `Write`/`Edit` batches) when: only **one** shard applies, **Copilot / no sub-agents**, or a constrained setup. Same output, just no speedup. **For flows**: `[Screen:Element]` namespace refs, test-data namespaced by phase, `@flow` tag.
+5.4. **Depth self-check (deterministic — run BEFORE the audit).** Run `sungen depth-lint --screen <name>` (Bash). It reuses the audit's businessDepth classifier and splits every shallow business-critical scenario into two actionable buckets — act on them now so the audit/repair loop doesn't burn rounds on depth:
+   - **DEEPEN IN PLACE** — add a real value assertion to each (`User see all [X] contain {{v}}`, `User remember [X] as {{v}}` + `… with {{v}}`). The printed `template` is a **hint** keyed off the theme — apply judgment to the scenario's actual claim; do NOT paste a value assertion that doesn't fit (e.g. a carousel-visibility scenario should assert the product SET, not a price). If a flagged scenario is genuinely visibility/behavior (not data-correctness), that's an over-count — leave it and note it, never fake an assertion.
+   - **CROSS-SCREEN** — route to a flow (`/sungen:add-flow`) or tag `@manual:Mx` + reason; do NOT fake an on-screen data assertion. This removes it from the screen's depth denominator honestly.
+   Re-run `sungen depth-lint` until `deepen` is empty (or only honest over-counts remain), THEN proceed to the gate. This lifts first-pass `businessDepth` mechanically instead of via 2–3 organic repair rounds.
 5.5. **Quality gate & repair (harness — always run, do NOT skip).** Follow the `sungen-harness-audit` skill:
    - Run `sungen audit --screen <name>` (Bash) and read `gateStatus` + `findings` (deterministic, structural).
    - **Independent semantic review.** **Claude Code:** spawn the **`sungen-reviewer`** sub-agent (Task tool, `subagent_type: sungen-reviewer`) — it judges what the gate can't (does each scenario's steps PROVE its title/viewpoint, observable Thens, business-critical assertion depth) and returns `VERDICT` + `ISSUES` with concrete fixes. **Merge its NEEDS-REPAIR issues with the audit findings.** (Copilot / no sub-agents: run the same review inline using the `sungen-reviewer` criteria.)
    - Repair **both** the audit findings and the reviewer issues (budget 3 rounds), then re-audit:
+   - **Repair runs single-agent by default** (it edits the one `.feature` — concurrent edits to the same file conflict, and BALANCE/dedup needs whole-suite context). **Exception:** a finding that is purely **additive new coverage** (GATE missing-theme → generate scenarios for an uncovered theme) is just more shards — fan it out as `sungen-generator` sub-agent(s) (new disjoint `VP-` prefix) and merge, exactly like Step 5b. Findings that **edit existing** scenarios (DEPTH/BALANCE/TRACE) stay serial.
    - If the gate FAILs or there are findings, **repair** (budget 3 rounds), then re-audit:
      - **GATE** missing critical theme → generate scenarios for it. If it is **cross-screen** (cart-correctness, product-detail-consistency, filter-result-correctness): **automate it in the flow** (`/sungen:add-flow` if none exists) with observable data assertions (`... with {{value}}`, `see all ... contain {{v}}`) — a single home→target journey runs as one Playwright test. Do **not** write a full `@manual` duplicate of it on the screen (that is a non-running dead copy — `sungen audit` flags it `MANUAL-AUTOMATABLE`), and do **not** fake a shallow single-screen pass. Reserve `@manual` for true judgment / missing-capability, tagged `@manual:Mx`.
      - **DEPTH** → replace `see [X] page/section` on business-critical scenarios with data assertions.

package/src/orchestrator/templates/ai-instructions/claude-skill-delivery.md CHANGED Viewed

@@ -88,6 +88,33 @@ Multi-locale (no `SUNGEN_ENV`): one **`<LOCALE> Auto`** sheet per locale + a sin
 ---
+## API delivery — extra worksheet
+For **api-kind units** (`qa/api/<area>/`), the `.xlsx` gains a third worksheet **`API detail`** (appended after Auto/Manual). The main BM-2-901-13 Testcases layout is unchanged. The CSV is unchanged (16-column, no extra sheet).
+### Required sources (API detail sheet only)
+| Source | Path | Created by |
+|--------|------|------------|
+| Endpoint catalog | `qa/api/<area>/api/apis.yaml` | `sungen add --api` or `sungen api import` |
+| Scenario annotations | `qa/api/<area>/features/<feature>.feature` | `create-test` |
+### API detail column mapping
+| Column | Source |
+|--------|--------|
+| Endpoint | `path` from `apis.yaml` catalog entry |
+| Method | `method` from catalog entry (uppercased) |
+| Auth / Datasource | catalog `datasource` + any `@auth:<role>` tag from scenarios calling this endpoint |
+| Request shape | catalog `body` + `params` fields composed as `body: {…}; params: [a, b]` |
+| Expected-status matrix | `@cases:<dataset>` label for data-driven scenarios; catalog `expect.status` as fallback |
+| Flow steps | Ordered `@api:<name>` call chain from multi-call scenarios (e.g. `register → count_users`) |
+| Concurrency invariant | `@concurrent:<N>` + `@query:<oracle>` from concurrent scenarios (e.g. `ok_count=2; @query user_count`) |
+**Sources are catalog + annotations only** — Field Metadata (FM) is not required for this sheet.
+---
 ## Excluded from CSV
 - `@steps:<name>` **base** scenarios — these are setup-only, inlined into `@extend:...` scenarios at compile time

package/src/orchestrator/templates/ai-instructions/claude-skill-gherkin-syntax.md CHANGED Viewed

@@ -214,6 +214,8 @@ Options: `nth` `exact` `scope` `match` `variant` `frame` `contenteditable` `colu
 | `@cases:dataset` | Data-driven: run the scenario once per row of the `dataset` LIST in test-data → one `test()` per row |
 | `@query:name` | Database: run the named query from `database/queries.yaml` (precondition) and bind its rows to `{{name}}`; assert with `expect {{name.count}} …` + path access. Override params `@query:name(p={{v}})`. Repeatable. (Optional Data Driver — see Database verification above) |
 | `@api:name` | API: run the named request from `api/apis.yaml` (precondition) and bind the response to `{{name}}`; assert with `expect {{name.status}} …` + path access (`{{name.body.<path>}}`). Override params `@api:name(p={{v}})`. Repeatable. (Optional API Driver) |
+| `@concurrent:N` | API idempotency: fire the bound `@api` request N times in parallel, then bind aggregates on the `@api` name — `{{name.ok_count}}` (2xx count) and `{{name.status_counts}}` (status→count map). Assert the exactly-once invariant (`expect {{name.ok_count}} is 1`); pair with `@query` as a DB oracle. Tag order = run order: `@api` (mutate) before `@query` (verify). (Optional API Driver) |
+| `@hybrid` | One unit, two capabilities: a signed-in browser session (UI) authorizes the `@api` call — the API request reuses the UI `storageState`. (Optional API + UI Drivers) |
 ### Data-driven scenarios (`@cases`)

package/src/orchestrator/templates/ai-instructions/claude-skill-tc-generation.md CHANGED Viewed

@@ -9,6 +9,8 @@ user-invocable: false
 - **Write incrementally — never emit the whole suite in one response.** Build the `.feature` in batches via successive `Write`/`Edit` (≈10–15 scenarios per call). For **Full coverage**, write tier-by-tier: `Write` Tier 1 → `Edit` append Tier 2 → `Edit` append Tier 3.
   → One huge `Write` can exceed the model's output-token cap → `API Error: Claude's response exceeded the N output token maximum`. Single-pass full coverage only fits when `CLAUDE_CODE_MAX_OUTPUT_TOKENS ≥ 64000`; otherwise batch. Batching also lets the audit/reviewer run per batch — higher quality.
+- **Sharded (parallel) generation — keep each shard self-contained.** When `create-test` fans out one `sungen-generator` sub-agent per shard (a viewpoint theme like `VP-SEC`, or a `spec.md` section — see create-test Steps 5a–5c), you are generating **only your shard**: emit your scenarios under **your own `VP-` prefix** (disjoint namespace, so ids never collide), as a **headerless fragment** (no `Feature:` line — the orchestrator owns the single header). Do not reference or renumber other shards. The orchestrator concatenates fragments in stable order (spec-section / theme order), dedups by title, and unions test-data. Small fragments also stay under the output-token cap by construction.
 - `spec_figma.md` exists → read file only, **NEVER** call `mcp__figma__*`
   → PAT auth flow already done by `sungen-capture` (mode figma-pat); re-calling fails or duplicates work.
@@ -273,6 +275,7 @@ Security:         [S1 – admin only]
 **Depth is a GATE dimension (harness-roadmap P1) — self-raise, never silently go shallow:**
 - For every data-correctness theme the catalog marks `depth.requires: data-assertion`, emit its `depth.template` shape by **default** — don't wait for the repair loop. `sungen audit` measures `businessDepth` (ratio of these scenarios that assert data) against an intent threshold (functional ≥ 0.70); below it the **gate FAILs**.
+- **Verify depth deterministically before the gate:** run `sungen depth-lint --screen <name>`. It classifies every shallow business-critical scenario into **deepen-in-place** (add the theme's value assertion — the printed `template` is a hint, fit it to the actual claim) vs **cross-screen** (route to a flow / `@manual:Mx`). Clear the `deepen` list first — this is the mechanical way to hit `businessDepth` on the first pass instead of churning repair rounds. Never fake a value assertion onto a visibility/behavior scenario the lint over-counts; leave it and note the over-count.
 - `depth.cross_screen: true` (cart / detail / filter / brand correctness) → write the deep capture/compare shape as an **automated flow scenario** (in the flow — do NOT leave a full-step `@manual` duplicate on the screen). `@manual` is **only** for genuine judgment (M6 visual/UX · M8 not-worth · M9 human) or a missing capability (M1–M5/M7), and it **must** carry a reason code (`@manual:Mx`, or a reason comment the planner can infer). A `@manual` scenario that still has full automatable steps (a data assertion, no visual/mock/a11y judgment) is now flagged by `sungen audit` as `MANUAL-AUTOMATABLE`, and business-critical scenarios you defer to `@manual` are reported as `DEPTH-DEFERRED` (they do NOT silently inflate `businessDepth`). Deferring automatable work to `@manual` lowers quality — automate it in the flow instead.
 - **Pick the right `@manual:Mx` code — it decides which driver can later automate the case** (`sungen audit` flags a code↔reason mismatch). Tag the code that matches the **oracle the reason describes**:

package/src/orchestrator/templates/ai-instructions/copilot-cmd-create-test.md CHANGED Viewed

@@ -64,7 +64,9 @@ If the unit is **api-first** (`qa/api/<name>/` or `qa/api/flows/<name>/`), the d
    Summarize what you found in requirements and present to the user.
 4. Follow the `sungen-tc-generation` skill for section identification, viewpoint generation, and output format. **For flows**, use the "Flow Test Generation" section in the skill. When requirements exist, use the "Requirements-Driven Generation" strategy. **For Tier 1**, apply the **Lightweight Guard** — verify required fields, validation rules, business rules, security checks, and key state transitions all have TCs after generation. **For Tier 2+**, **MUST** apply the full **Mapping Contract** — walk every `spec.md` section top-to-bottom and produce the indicated TCs per Table 1; handle `test-viewpoint.md` per Table 2. Do not silently skip sections. Present sections as a numbered list and let user pick.
-5. Generate or update `.feature` + `test-data.yaml` following `sungen-gherkin-syntax` and `sungen-tc-generation` skills. **For flows**: use `[Screen:Element]` namespace format, namespace test-data by phase, add `@flow` tag.
+5. Generate or update `.feature` + `test-data.yaml` following `sungen-gherkin-syntax` and `sungen-tc-generation` skills. Generate **group-by-group** (one viewpoint group at a time, tier-by-tier `Write`/`Edit` batches) to stay under the output-token cap. **For flows**: use `[Screen:Element]` namespace format, namespace test-data by phase, add `@flow` tag.
+   > **No parallel fan-out here.** Copilot has no sub-agents, so generation is sequential (the Claude Code variant fans out one `sungen-generator` per viewpoint group and merges). Same output, no speedup.
+5.4. **Depth self-check (deterministic — BEFORE the audit).** Run `sungen depth-lint --screen ${input:name}`. It splits every shallow business-critical scenario into **DEEPEN IN PLACE** (add a real value assertion — the printed `template` is a theme-keyed hint, apply judgment to the actual claim; never fake one onto a visibility/behavior scenario) and **CROSS-SCREEN** (route to a flow / tag `@manual:Mx` + reason — removes it from the depth denominator honestly). Act on both, re-run until `deepen` is empty (or only honest over-counts remain), THEN gate. Lifts first-pass `businessDepth` mechanically instead of via 2–3 repair rounds.
 5.5. **Quality gate & repair (harness — always run).** Per `sungen-harness-audit`: run `sungen audit --screen ${input:name}` (structural), THEN do an **independent semantic review inline** using the `sungen-reviewer` criteria (does each scenario's steps PROVE its title/viewpoint? observable Thens? business-critical assertion depth?). Merge both sets of issues; if gate FAILs / findings exist, repair (budget 3) and re-audit — GATE missing theme → generate it (cross-screen → **automate it in the flow** via `/sungen:add-flow`, NOT a full `@manual` screen duplicate — `sungen audit` flags an automatable `@manual` as `MANUAL-AUTOMATABLE`; reserve `@manual:Mx` for true judgment/missing-capability); DEPTH → add data assertions; BALANCE → add business-core first; TRACE → align VP ids. Never fake a pass.
 5.6. **Record.** `sungen manifest --screen ${input:name}`. Ledger **each phase** (not just repair) — pick one `runId` at the start and pass it so `trace`/`ledger report` show THIS run, not a mix: `sungen ledger record --screen ${input:name} --run <runId> --step <discovery|viewpoint|gherkin|audit|repair:N> --ms <elapsed>`. On re-run, start with `sungen manifest --screen ${input:name} --diff` and only regenerate changed sections.
 6. **Converge — show the trace.** Run `sungen trace --screen ${input:name}` and present: process map (phases + repair rounds), bottlenecks, **HUMAN-LOOP FOCUS** (@manual to verify), audit score + gate + residual gaps. Then offer next steps based on which tier was just generated:

package/src/orchestrator/templates/ai-instructions/github-skill-sungen-delivery.md CHANGED Viewed

@@ -88,6 +88,33 @@ Multi-locale (no `SUNGEN_ENV`): one **`<LOCALE> Auto`** sheet per locale + a sin
 ---
+## API delivery — extra worksheet
+For **api-kind units** (`qa/api/<area>/`), the `.xlsx` gains a third worksheet **`API detail`** (appended after Auto/Manual). The main BM-2-901-13 Testcases layout is unchanged. The CSV is unchanged (16-column, no extra sheet).
+### Required sources (API detail sheet only)
+| Source | Path | Created by |
+|--------|------|------------|
+| Endpoint catalog | `qa/api/<area>/api/apis.yaml` | `sungen add --api` or `sungen api import` |
+| Scenario annotations | `qa/api/<area>/features/<feature>.feature` | `create-test` |
+### API detail column mapping
+| Column | Source |
+|--------|--------|
+| Endpoint | `path` from `apis.yaml` catalog entry |
+| Method | `method` from catalog entry (uppercased) |
+| Auth / Datasource | catalog `datasource` + any `@auth:<role>` tag from scenarios calling this endpoint |
+| Request shape | catalog `body` + `params` fields composed as `body: {…}; params: [a, b]` |
+| Expected-status matrix | `@cases:<dataset>` label for data-driven scenarios; catalog `expect.status` as fallback |
+| Flow steps | Ordered `@api:<name>` call chain from multi-call scenarios (e.g. `register → count_users`) |
+| Concurrency invariant | `@concurrent:<N>` + `@query:<oracle>` from concurrent scenarios (e.g. `ok_count=2; @query user_count`) |
+**Sources are catalog + annotations only** — Field Metadata (FM) is not required for this sheet.
+---
 ## Excluded from CSV
 - `@steps:<name>` **base** scenarios — these are setup-only, inlined into `@extend:...` scenarios at compile time

package/src/orchestrator/templates/ai-instructions/github-skill-sungen-gherkin-syntax.md CHANGED Viewed

@@ -214,6 +214,8 @@ Options: `nth` `exact` `scope` `match` `variant` `frame` `contenteditable` `colu
 | `@cases:dataset` | Data-driven: run the scenario once per row of the `dataset` LIST in test-data → one `test()` per row |
 | `@query:name` | Database: run the named query from `database/queries.yaml` (precondition) and bind its rows to `{{name}}`; assert with `expect {{name.count}} …` + path access. Override params `@query:name(p={{v}})`. Repeatable. (Optional Data Driver — see Database verification above) |
 | `@api:name` | API: run the named request from `api/apis.yaml` (precondition) and bind the response to `{{name}}`; assert with `expect {{name.status}} …` + path access (`{{name.body.<path>}}`). Override params `@api:name(p={{v}})`. Repeatable. (Optional API Driver) |
+| `@concurrent:N` | API idempotency: fire the bound `@api` request N times in parallel, then bind aggregates on the `@api` name — `{{name.ok_count}}` (2xx count) and `{{name.status_counts}}` (status→count map). Assert the exactly-once invariant (`expect {{name.ok_count}} is 1`); pair with `@query` as a DB oracle. Tag order = run order: `@api` (mutate) before `@query` (verify). (Optional API Driver) |
+| `@hybrid` | One unit, two capabilities: a signed-in browser session (UI) authorizes the `@api` call — the API request reuses the UI `storageState`. (Optional API + UI Drivers) |
 ### Data-driven scenarios (`@cases`)

package/src/orchestrator/templates/ai-instructions/github-skill-sungen-tc-generation.md CHANGED Viewed

@@ -9,6 +9,8 @@ user-invocable: false
 - **Write incrementally — never emit the whole suite in one response.** Build the `.feature` in batches via successive `Write`/`Edit` (≈10–15 scenarios per call). For **Full coverage**, write tier-by-tier: `Write` Tier 1 → `Edit` append Tier 2 → `Edit` append Tier 3.
   → One huge `Write` can exceed the model's output-token cap → `API Error: Claude's response exceeded the N output token maximum`. Single-pass full coverage only fits when `CLAUDE_CODE_MAX_OUTPUT_TOKENS ≥ 64000`; otherwise batch. Batching also lets the audit/reviewer run per batch — higher quality.
+- **Generate group-by-group (sequential here).** Copilot has no sub-agents, so generate one viewpoint group/theme at a time, tier-by-tier, keeping each `VP-` theme in its own id prefix. (The Claude Code variant fans these out as parallel `sungen-generator` shards and merges — same output shape, just no speedup. Keep each theme self-contained so it would merge cleanly either way.)
 - `spec_figma.md` exists → read file only, **NEVER** call `mcp__figma__*`
   → PAT auth flow already done by `sungen-capture` (mode figma-pat); re-calling fails or duplicates work.
@@ -273,6 +275,7 @@ Security:         [S1 – admin only]
 **Depth is a GATE dimension (harness-roadmap P1) — self-raise, never silently go shallow:**
 - For every data-correctness theme the catalog marks `depth.requires: data-assertion`, emit its `depth.template` shape by **default** — don't wait for the repair loop. `sungen audit` measures `businessDepth` (ratio of these scenarios that assert data) against an intent threshold (functional ≥ 0.70); below it the **gate FAILs**.
+- **Verify depth deterministically before the gate:** run `sungen depth-lint --screen <name>`. It classifies every shallow business-critical scenario into **deepen-in-place** (add the theme's value assertion — the printed `template` is a hint, fit it to the actual claim) vs **cross-screen** (route to a flow / `@manual:Mx`). Clear the `deepen` list first — this is the mechanical way to hit `businessDepth` on the first pass instead of churning repair rounds. Never fake a value assertion onto a visibility/behavior scenario the lint over-counts; leave it and note the over-count.
 - `depth.cross_screen: true` (cart / detail / filter / brand correctness) → write the deep capture/compare shape as an **automated flow scenario** (in the flow — do NOT leave a full-step `@manual` duplicate on the screen). `@manual` is **only** for genuine judgment (M6 visual/UX · M8 not-worth · M9 human) or a missing capability (M1–M5/M7), and it **must** carry a reason code (`@manual:Mx`, or a reason comment the planner can infer). A `@manual` scenario that still has full automatable steps (a data assertion, no visual/mock/a11y judgment) is now flagged by `sungen audit` as `MANUAL-AUTOMATABLE`, and business-critical scenarios you defer to `@manual` are reported as `DEPTH-DEFERRED` (they do NOT silently inflate `businessDepth`). Deferring automatable work to `@manual` lowers quality — automate it in the flow instead.
 - **Pick the right `@manual:Mx` code — it decides which driver can later automate the case** (`sungen audit` flags a code↔reason mismatch). Tag the code that matches the **oracle the reason describes**:

package/src/orchestrator/templates/specs-api.ts CHANGED Viewed

@@ -49,6 +49,17 @@ function substitute(text: string, params: Record<string, any>): string {
   return text.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, (_m, p) => encodeURIComponent(String(params[p] ?? '')));
 }
+/**
+ * Join a datasource base URL with a catalog path. Concatenate rather than rely on Playwright's
+ * baseURL resolution: an absolute path (`/user/1`) resolves against the base ORIGIN and would drop
+ * a base path component (`/api/v3`). Most APIs are mounted under such a prefix, so the full URL must
+ * be built explicitly.
+ */
+export function joinApiUrl(base: string, urlPath: string): string {
+  const b = base.replace(/\/$/, '');
+  return urlPath.startsWith('/') ? b + urlPath : `${b}/${urlPath}`;
+}
 class ApiClient {
   private configs: Record<string, ApiDataSource> | null = null;
@@ -103,13 +114,13 @@ class ApiClient {
     // `storageState` (the @auth role's saved session) so the request shares the browser's
     // authenticated cookies. Disposed per call so no request context lingers and hangs the process.
     const ctx: APIRequestContext = await request.newContext({
-      baseURL: base,
       extraHTTPHeaders: headers,
       timeout: conf.timeout_ms ?? 15000,
       ...(opts.storageState ? { storageState: opts.storageState } : {}),
     });
     try {
-      const res = await ctx.fetch(urlPath, { method: req.method, ...bodyOpt });
+      // Full URL (not a baseURL-relative path) so a base path component like /api/v3 is preserved.
+      const res = await ctx.fetch(joinApiUrl(base, urlPath), { method: req.method, ...bodyOpt });
       const text = await res.text();
       let parsed: any = text;
       try { parsed = text ? JSON.parse(text) : null; } catch { /* non-JSON → keep text */ }