npm - @guilz-dev/sdlc-gh - Versions diffs - 0.1.0 - Mend

@guilz-dev/sdlc-gh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

package/.github/CODEOWNERS +5 -0
package/.github/ISSUE_TEMPLATE/bug_report.yml +68 -0
package/.github/ISSUE_TEMPLATE/config.yml +1 -0
package/.github/ISSUE_TEMPLATE/feature_request.yml +39 -0
package/.github/ISSUE_TEMPLATE/support.yml +56 -0
package/.github/ISSUE_TEMPLATE/task.yml +89 -0
package/.github/agents/implementer.agent.md +17 -0
package/.github/agents/reviewer.agent.md +18 -0
package/.github/agents/triager.agent.md +13 -0
package/.github/aw/actions-lock.json +9 -0
package/.github/copilot-instructions.md +35 -0
package/.github/hooks/hooks.json +12 -0
package/.github/instructions/core.instructions.md +11 -0
package/.github/instructions/profiles/go.instructions.md +10 -0
package/.github/instructions/profiles/php.instructions.md +11 -0
package/.github/instructions/profiles/python.instructions.md +11 -0
package/.github/instructions/profiles/ruby.instructions.md +11 -0
package/.github/instructions/profiles/typescript.instructions.md +11 -0
package/.github/labels.yml +55 -0
package/.github/pull_request_template.md +33 -0
package/.github/ruleset.example.json +33 -0
package/.github/ruleset.harness-eval.example.json +29 -0
package/.github/skills/quality-loop/SKILL.md +23 -0
package/.github/workflows/agent-retry-orchestrator.yml +161 -0
package/.github/workflows/copilot-setup-steps.yml +64 -0
package/.github/workflows/eval-ci.yml +169 -0
package/.github/workflows/eval-drift.yml +75 -0
package/.github/workflows/gh-aw-dogfood-ci.yml +73 -0
package/.github/workflows/harness-ci.yml +244 -0
package/.github/workflows/harness-sync.yml +28 -0
package/.github/workflows/l1-readiness-check.yml +45 -0
package/.github/workflows/labels-sync.yml +24 -0
package/.github/workflows/nightly-harness-review.lock.yml +1643 -0
package/.github/workflows/nightly-harness-review.md +87 -0
package/.github/workflows/nightly-harness-review.yml +63 -0
package/.github/workflows/npm-publish.yml +49 -0
package/.github/workflows/pr-context-comment.yml +138 -0
package/.github/workflows/product-ci-go.yml +33 -0
package/.github/workflows/product-ci-php.yml +39 -0
package/.github/workflows/product-ci-python.yml +34 -0
package/.github/workflows/product-ci-ruby.yml +35 -0
package/.github/workflows/product-ci-ts.yml +37 -0
package/.github/workflows/task-issue-label-sync.yml +50 -0
package/.github/workflows/weekly-redteam.lock.yml +1571 -0
package/.github/workflows/weekly-redteam.md +76 -0
package/.github/zizmor.yml +11 -0
package/AGENTS.md +54 -0
package/LICENSE +21 -0
package/README.md +366 -0
package/config/stacks.json +55 -0
package/docs/adoption.md +126 -0
package/docs/arch.md +535 -0
package/docs/auth-boundaries.md +16 -0
package/docs/coding-agent-l1.md +152 -0
package/docs/exceptions/README.md +25 -0
package/docs/exceptions/TEMPLATE.md +8 -0
package/docs/failure-taxonomy.md +23 -0
package/docs/gh-aw-dogfood.md +109 -0
package/docs/kpi-baseline.md +9 -0
package/docs/nightly-harness-review.md +94 -0
package/docs/operations.md +108 -0
package/docs/publishing.md +79 -0
package/docs/revert-playbook.md +44 -0
package/docs/shared-config.md +30 -0
package/docs/telemetry-artifacts.md +78 -0
package/docs/telemetry-schema.md +60 -0
package/evals/.score-baseline.json +6 -0
package/evals/e2e-bench/README.md +28 -0
package/evals/e2e-bench/manifest.json +16 -0
package/evals/e2e-bench/tasks/e2e-001.yml +10 -0
package/evals/e2e-bench/tasks/e2e-002.yml +11 -0
package/evals/e2e-bench/tasks/e2e-003.yml +10 -0
package/evals/e2e-bench/tasks/e2e-004.yml +14 -0
package/evals/e2e-bench/tasks/e2e-005.yml +11 -0
package/evals/e2e-bench/tasks/e2e-006.yml +10 -0
package/evals/e2e-bench/tasks/e2e-007.yml +10 -0
package/evals/e2e-bench/tasks/e2e-008.yml +10 -0
package/evals/e2e-bench/tasks/e2e-009.yml +10 -0
package/evals/trajectories/rubric.md +12 -0
package/evals/trajectories/test_harness_conventions.py +271 -0
package/infra/README.md +49 -0
package/infra/langfuse/docker-compose.yml +25 -0
package/infra/otel/collector-config.yml +24 -0
package/infra/samples/gh-aw-dogfood-report.json +44 -0
package/infra/samples/harness-review-routing-plan.json +19 -0
package/infra/samples/harness-review-summary.json +61 -0
package/infra/samples/telemetry-artifact.json +29 -0
package/infra/samples/telemetry-payload.json +19 -0
package/package.json +85 -0
package/prompts/triager-classify.prompt.yml +10 -0
package/sample/go/add.go +5 -0
package/sample/go/add_test.go +9 -0
package/sample/go/go.mod +3 -0
package/sample/php/composer.json +26 -0
package/sample/php/composer.lock +1881 -0
package/sample/php/phpunit.xml +8 -0
package/sample/php/src/Add.php +13 -0
package/sample/php/tests/AddTest.php +16 -0
package/sample/python/requirements-dev.txt +2 -0
package/sample/python/src/__init__.py +0 -0
package/sample/python/src/greet.py +3 -0
package/sample/python/tests/conftest.py +4 -0
package/sample/python/tests/test_greet.py +5 -0
package/sample/ruby/.rubocop.yml +10 -0
package/sample/ruby/Gemfile +6 -0
package/sample/ruby/Gemfile.lock +58 -0
package/sample/ruby/lib/add.rb +9 -0
package/sample/ruby/spec/add_spec.rb +11 -0
package/sample/ts/biome.json +6 -0
package/sample/ts/package-lock.json +1763 -0
package/sample/ts/package.json +15 -0
package/sample/ts/src/add.ts +3 -0
package/sample/ts/tests/add.test.ts +8 -0
package/sample/ts/tsconfig.json +12 -0
package/scripts/aggregate-harness-review.mjs +48 -0
package/scripts/bootstrap-harness.sh +411 -0
package/scripts/check-diff-size.mjs +46 -0
package/scripts/check-e2e-manifest.mjs +35 -0
package/scripts/check-eval-score-drift.mjs +31 -0
package/scripts/check-gh-aw-dogfood-scope.mjs +51 -0
package/scripts/check-issue-spec.mjs +215 -0
package/scripts/check-l1-readiness.mjs +82 -0
package/scripts/check-open-pr-limit.mjs +34 -0
package/scripts/doctor.mjs +177 -0
package/scripts/emit-gh-aw-dogfood-report.mjs +112 -0
package/scripts/emit-telemetry-artifact.mjs +99 -0
package/scripts/fetch-telemetry-artifacts.mjs +176 -0
package/scripts/harness-drift-report.mjs +99 -0
package/scripts/lib/bootstrap-copy.mjs +123 -0
package/scripts/lib/ccsd-contract.mjs +212 -0
package/scripts/lib/diff-size.mjs +103 -0
package/scripts/lib/doctor-local.mjs +179 -0
package/scripts/lib/e2e-manifest.mjs +76 -0
package/scripts/lib/gh-aw-dogfood.mjs +293 -0
package/scripts/lib/github-config.mjs +94 -0
package/scripts/lib/harness-ci-fragments.mjs +98 -0
package/scripts/lib/harness-review-routing.mjs +244 -0
package/scripts/lib/harness-review.mjs +388 -0
package/scripts/lib/issue-form-label-sync.mjs +56 -0
package/scripts/lib/l1-readiness.mjs +258 -0
package/scripts/lib/merge-harness-package.mjs +36 -0
package/scripts/lib/npm-package.mjs +129 -0
package/scripts/lib/setup-wizard.mjs +224 -0
package/scripts/lib/stacks.mjs +138 -0
package/scripts/lib/telemetry-artifact.mjs +253 -0
package/scripts/lib/template-root.mjs +39 -0
package/scripts/merge-harness-package.mjs +14 -0
package/scripts/route-harness-review.mjs +168 -0
package/scripts/run-e2e-bench.mjs +216 -0
package/scripts/sdlc-gh-cli.mjs +91 -0
package/scripts/select-eval-jobs.mjs +41 -0
package/scripts/setup-github.mjs +242 -0
package/scripts/setup-github.sh +4 -0
package/scripts/setup-wizard.mjs +426 -0
package/scripts/test-bootstrap-guidance-scenarios.mjs +94 -0
package/scripts/test-diff-size-scenarios.mjs +88 -0
package/scripts/test-doctor-scenarios.mjs +70 -0
package/scripts/test-e2e-manifest-scenarios.mjs +65 -0
package/scripts/test-gh-aw-dogfood-scenarios.mjs +74 -0
package/scripts/test-harness-review-routing-scenarios.mjs +130 -0
package/scripts/test-harness-review-scenarios.mjs +92 -0
package/scripts/test-hooks-scenarios.mjs +44 -0
package/scripts/test-issue-form-label-sync-scenarios.mjs +48 -0
package/scripts/test-issue-spec-scenarios.mjs +258 -0
package/scripts/test-l1-readiness-scenarios.mjs +204 -0
package/scripts/test-merge-harness-package-scenarios.mjs +53 -0
package/scripts/test-npm-package-scenarios.mjs +31 -0
package/scripts/test-sdlc-gh-cli-scenarios.mjs +54 -0
package/scripts/test-setup-github-scenarios.mjs +103 -0
package/scripts/test-setup-wizard-scenarios.mjs +114 -0
package/scripts/test-telemetry-artifact-scenarios.mjs +69 -0
package/scripts/trim-harness-ci.mjs +18 -0
package/scripts/validate-gh-aw-compile.mjs +64 -0
package/scripts/validate-harness.mjs +199 -0
package/scripts/validate-telemetry.mjs +21 -0
package/scripts/verify-bootstrap-stacks.sh +192 -0

package/scripts/lib/harness-review-routing.mjs ADDED Viewed

@@ -0,0 +1,244 @@
+/**
+ * Route nightly harness review classifications into GitHub issues.
+ * See docs/nightly-harness-review.md and docs/failure-taxonomy.md (#4).
+ */
+export const ROUTING_SCHEMA_VERSION = "1";
+export const ROUTING_MARKER_PREFIX = "harness-routing-key:";
+export const ISSUE_KIND = {
+  HARNESS_REVISION: "harness-revision",
+  WALL_ADDITION: "wall-addition",
+};
+/** @type {Record<string, string[]>} */
+export const ISSUE_LABELS = {
+  [ISSUE_KIND.HARNESS_REVISION]: ["outer-loop:harness-revision", "autonomy:L0"],
+  [ISSUE_KIND.WALL_ADDITION]: ["outer-loop:wall-addition", "autonomy:L0"],
+};
+/**
+ * @param {string} repo
+ * @param {string} kind
+ * @param {string} signature
+ * @param {string} scope
+ * @returns {string}
+ */
+export function routingDedupeKey(repo, kind, signature, scope) {
+  return `${repo}:${kind}:${signature}:${scope}`;
+}
+/**
+ * @param {string} dedupeKey
+ * @returns {string}
+ */
+export function routingMarker(dedupeKey) {
+  return `<!-- ${ROUTING_MARKER_PREFIX}${dedupeKey} -->`;
+}
+/**
+ * @param {string} body
+ * @param {string} dedupeKey
+ * @returns {boolean}
+ */
+export function bodyHasRoutingMarker(body, dedupeKey) {
+  return String(body || "").includes(routingMarker(dedupeKey));
+}
+/**
+ * @param {Record<string, unknown>} summary
+ * @returns {boolean}
+ */
+export function hasRepeatedFfFindings(summary) {
+  const items = (summary.classifications ?? []).filter((c) => c.classification === "FF不足");
+  if (items.length >= 2) return true;
+  const sigs = summary.rollup?.repeated_failure_signatures ?? [];
+  return sigs.some(
+    (s) => s.wall_failure_type === "lint" && Number(s.record_count) >= 2,
+  );
+}
+/**
+ * @param {Record<string, unknown>} summary
+ * @returns {boolean}
+ */
+export function hasRepeatedWallFindings(summary) {
+  const items = (summary.classifications ?? []).filter((c) => c.classification === "壁不足");
+  if (items.length >= 2) return true;
+  const proxy = Number(summary.rollup?.review_rejection_proxy_count ?? 0);
+  return proxy >= 1 && items.length >= 1;
+}
+/**
+ * @param {Record<string, unknown>[]} items
+ * @returns {string}
+ */
+export function inferRoutingScope(items) {
+  const taskClasses = [...new Set(items.map((item) => String(item.task_class || "")).filter(Boolean))].sort();
+  const wallTypes = [...new Set(items.flatMap((item) => item.wall_failure_types ?? []).map(String).filter(Boolean))].sort();
+  if (taskClasses.length === 1 && wallTypes.length === 1) {
+    return `task:${taskClasses[0]}|wall:${wallTypes[0]}`;
+  }
+  if (taskClasses.length === 1) {
+    return `task:${taskClasses[0]}`;
+  }
+  if (taskClasses.length > 1) {
+    return `tasks:${taskClasses.join("+")}`;
+  }
+  if (wallTypes.length === 1) {
+    return `wall:${wallTypes[0]}`;
+  }
+  if (wallTypes.length > 1) {
+    return `walls:${wallTypes.join("+")}`;
+  }
+  return "unknown-scope";
+}
+/**
+ * @param {Record<string, unknown>} summary
+ * @param {string} kind
+ * @param {Record<string, unknown>[]} items
+ * @param {string} signature
+ * @returns {Record<string, unknown>}
+ */
+export function buildIssueAction(summary, kind, items, signature) {
+  const repo = String(summary.repo ?? "unknown/unknown");
+  const scope = inferRoutingScope(items);
+  const dedupeKey = routingDedupeKey(repo, kind, signature, scope);
+  const windowHours = summary.window_hours ?? 24;
+  const marker = routingMarker(dedupeKey);
+  const title =
+    kind === ISSUE_KIND.HARNESS_REVISION
+      ? `[outer-loop] Harness revision needed (${signature} / ${scope})`
+      : `[outer-loop] Wall addition needed (${signature} / ${scope})`;
+  const lines = [
+    marker,
+    "",
+    "## Summary",
+    "",
+    `Nightly harness review (${windowHours}h window) routed **${kind}** work.`,
+    "",
+    `Generated: ${summary.generated_at}`,
+    `Repository: ${repo}`,
+    `Scope: ${scope}`,
+    "",
+    "## Evidence",
+    "",
+    "| task_id | pr | rationale | wall_failure_types |",
+    "|---------|----|-----------|--------------------|",
+  ];
+  for (const item of items) {
+    const walls = (item.wall_failure_types ?? []).join(", ") || "—";
+    lines.push(
+      `| ${item.task_id} | ${item.pr_number} | ${item.rationale} | ${walls} |`,
+    );
+  }
+  lines.push(
+    "",
+    "## Suggested next steps",
+    "",
+    kind === ISSUE_KIND.HARNESS_REVISION
+      ? "- Update instructions / skills / agents for repeated convention gaps\n- Link eval or telemetry evidence in follow-up PRs"
+      : "- Add tests, lint rules, or contracts so CI catches review findings\n- Keep proposal PRs at `autonomy:L0` until walls are updated",
+    "",
+    "## Rollback",
+    "",
+    "Close this issue if the signature does not recur in the next nightly window.",
+    "",
+    "Automated by `scripts/route-harness-review.mjs` (issue #4).",
+  );
+  return {
+    action: "open_or_update_issue",
+    kind,
+    dedupe_key: dedupeKey,
+    signature,
+    scope,
+    labels: ISSUE_LABELS[kind] ?? [],
+    title,
+    body: `${lines.join("\n")}\n`,
+    evidence_count: items.length,
+  };
+}
+/**
+ * @param {Record<string, unknown>} summary
+ * @returns {Record<string, unknown>}
+ */
+export function buildRoutingPlan(summary) {
+  const actions = [];
+  const skipped = [];
+  const ffItems = (summary.classifications ?? []).filter((c) => c.classification === "FF不足");
+  const wallItems = (summary.classifications ?? []).filter((c) => c.classification === "壁不足");
+  if (hasRepeatedFfFindings(summary) && ffItems.length > 0) {
+    const signature = (summary.rollup?.repeated_failure_signatures ?? []).some(
+      (s) => s.wall_failure_type === "lint",
+    )
+      ? "lint"
+      : "ff-aggregate";
+    actions.push(buildIssueAction(summary, ISSUE_KIND.HARNESS_REVISION, ffItems, signature));
+  } else if (ffItems.length) {
+    skipped.push({ kind: ISSUE_KIND.HARNESS_REVISION, reason: "FF不足 present but not repeated" });
+  } else if (hasRepeatedFfFindings(summary)) {
+    skipped.push({
+      kind: ISSUE_KIND.HARNESS_REVISION,
+      reason: "lint signature repeated without FF不足 classification rows",
+    });
+  }
+  if (hasRepeatedWallFindings(summary) && wallItems.length > 0) {
+    const signature =
+      Number(summary.rollup?.review_rejection_proxy_count ?? 0) >= 1
+        ? "ci-pass-review-reject"
+        : "wall-aggregate";
+    actions.push(buildIssueAction(summary, ISSUE_KIND.WALL_ADDITION, wallItems, signature));
+  } else if (wallItems.length) {
+    skipped.push({ kind: ISSUE_KIND.WALL_ADDITION, reason: "壁不足 present but not repeated" });
+  } else if (hasRepeatedWallFindings(summary)) {
+    skipped.push({
+      kind: ISSUE_KIND.WALL_ADDITION,
+      reason: "review-rejection proxy without 壁不足 classification rows",
+    });
+  }
+  return {
+    schema_version: ROUTING_SCHEMA_VERSION,
+    generated_at: new Date().toISOString(),
+    source_summary_at: summary.generated_at ?? null,
+    repo: summary.repo ?? "unknown/unknown",
+    actions,
+    skipped,
+  };
+}
+/**
+ * @param {Record<string, unknown>} plan
+ * @param {{ existingIssues?: { number: number, body: string }[] }} [ctx]
+ * @returns {Record<string, unknown>}
+ */
+export function applyRoutingPlanDryRun(plan, ctx = {}) {
+  const existing = ctx.existingIssues ?? [];
+  const results = [];
+  for (const action of plan.actions ?? []) {
+    const match = existing.find((issue) =>
+      bodyHasRoutingMarker(issue.body, action.dedupe_key),
+    );
+    results.push({
+      dedupe_key: action.dedupe_key,
+      kind: action.kind,
+      operation: match ? "update_issue" : "create_issue",
+      issue_number: match?.number ?? null,
+      title: action.title,
+    });
+  }
+  return { ...plan, results };
+}

package/scripts/lib/harness-review.mjs ADDED Viewed

@@ -0,0 +1,388 @@
+/**
+ * Nightly harness review — aggregate telemetry artifacts and classify failures.
+ * See docs/failure-taxonomy.md and docs/nightly-harness-review.md.
+ */
+export const REVIEW_SCHEMA_VERSION = "1";
+export const REVIEW_OUT_DIR = "harness-review";
+export const MAX_RETRIES = 3;
+/** @type {readonly string[]} */
+export const FAILURE_CLASSES = ["FF不足", "壁不足", "モデル限界", "unclassified"];
+const FF_WALL_TYPES = new Set(["lint"]);
+/** Wall types that usually indicate model / execution limits when repeated */
+const MODEL_LIMIT_WALL_TYPES = new Set(["test", "type", "security", "safe-output", "diff-size"]);
+/**
+ * @param {Record<string, unknown>} record
+ * @returns {string}
+ */
+export function telemetryDedupeKey(record) {
+  const payload = record.payload ?? {};
+  return `${record.workflow_run_id}:${record.source}:${payload.pr_number}`;
+}
+/**
+ * @param {Record<string, unknown>[]} records
+ * @returns {Record<string, unknown>[]}
+ */
+export function dedupeTelemetryRecords(records) {
+  const seen = new Map();
+  for (const record of records) {
+    const key = telemetryDedupeKey(record);
+    const existing = seen.get(key);
+    if (!existing || String(record.emitted_at) > String(existing.emitted_at)) {
+      seen.set(key, record);
+    }
+  }
+  return [...seen.values()];
+}
+/**
+ * @param {Record<string, unknown>} record
+ * @returns {string}
+ */
+export function taskGroupKey(record) {
+  const payload = record.payload ?? {};
+  return `${payload.repo}|${payload.task_id}|${payload.pr_number}`;
+}
+/**
+ * @param {Record<string, unknown>[]} records
+ * @returns {Map<string, Record<string, unknown>[]>}
+ */
+export function groupRecordsByTask(records) {
+  const groups = new Map();
+  for (const record of records) {
+    const key = taskGroupKey(record);
+    const list = groups.get(key) ?? [];
+    list.push(record);
+    groups.set(key, list);
+  }
+  return groups;
+}
+/**
+ * @param {string[]} values
+ * @returns {Record<string, number>}
+ */
+export function countValues(values) {
+  const counts = {};
+  for (const value of values) {
+    if (!value) continue;
+    counts[value] = (counts[value] ?? 0) + 1;
+  }
+  return counts;
+}
+/**
+ * @param {Record<string, unknown>[]} records
+ * @returns {boolean}
+ */
+export function groupHasFailureSignal(records) {
+  for (const record of records) {
+    const payload = record.payload ?? {};
+    if (payload.wall_failure_type) return true;
+    if (payload.final_outcome === "escalated") return true;
+    if (payload.review_outcome === "changes_requested") return true;
+    if (Number(payload.retry_count) > 0) return true;
+  }
+  return false;
+}
+/**
+ * @param {Record<string, unknown>[]} records
+ * @returns {{ classification: string, rationale: string } | null}
+ */
+export function classifyTaskGroup(records) {
+  if (!groupHasFailureSignal(records)) return null;
+  const payloads = records.map((record) => ({
+    source: record.source,
+    ...(record.payload ?? {}),
+  }));
+  const maxRetry = Math.max(0, ...payloads.map((p) => Number(p.retry_count) || 0));
+  const wallTypes = payloads.map((p) => String(p.wall_failure_type || "")).filter(Boolean);
+  const wallCounts = countValues(wallTypes);
+  const escalated = payloads.some((p) => p.final_outcome === "escalated");
+  const reviewRejected = payloads.some((p) => p.review_outcome === "changes_requested");
+  const harnessGreen = payloads.some((p) => p.source === "harness-ci" && !p.wall_failure_type);
+  const retryEvents = records.filter((r) => r.source === "agent-retry-orchestrator");
+  if (harnessGreen && reviewRejected) {
+    return {
+      classification: "壁不足",
+      rationale: "Harness CI passed while review_outcome is changes_requested",
+    };
+  }
+  if (escalated || maxRetry >= MAX_RETRIES) {
+    return {
+      classification: "モデル限界",
+      rationale: `Retry budget exhausted or escalated (max_retry_count=${maxRetry})`,
+    };
+  }
+  if (wallTypes.includes("security")) {
+    return {
+      classification: "モデル限界",
+      rationale: "Security wall failures are not auto-retried",
+    };
+  }
+  const lintFailures = wallTypes.filter((w) => FF_WALL_TYPES.has(w)).length;
+  if (lintFailures >= 2) {
+    return {
+      classification: "FF不足",
+      rationale: "Repeated lint or issue-spec convention failures",
+    };
+  }
+  const repeatedWall = Object.entries(wallCounts).find(([, count]) => count >= 2);
+  if (repeatedWall && retryEvents.length >= 2) {
+    return {
+      classification: "モデル限界",
+      rationale: `Same wall_failure_type (${repeatedWall[0]}) across multiple retry events`,
+    };
+  }
+  if (repeatedWall) {
+    const [wallType] = repeatedWall;
+    if (FF_WALL_TYPES.has(wallType)) {
+      return {
+        classification: "FF不足",
+        rationale: `Repeated wall_failure_type ${wallType}`,
+      };
+    }
+    if (MODEL_LIMIT_WALL_TYPES.has(wallType)) {
+      return {
+        classification: "モデル限界",
+        rationale: `Repeated wall_failure_type ${wallType} after retries`,
+      };
+    }
+    return {
+      classification: "unclassified",
+      rationale: `Repeated wall_failure_type ${wallType} without taxonomy mapping`,
+    };
+  }
+  if (wallTypes.length > 0 || maxRetry > 0) {
+    return {
+      classification: "unclassified",
+      rationale:
+        wallTypes.length > 0
+          ? `Single wall failure (${wallTypes[0]}) without repeat pattern`
+          : `Retry activity (count=${maxRetry}) without wall_failure_type`,
+    };
+  }
+  if (reviewRejected) {
+    return {
+      classification: "壁不足",
+      rationale: "Review rejection without CI failure signal in telemetry",
+    };
+  }
+  return null;
+}
+/**
+ * @param {Map<string, Record<string, unknown[]>>} groups
+ * @returns {Record<string, unknown>[]}
+ */
+export function buildRepeatedFailureSignatures(groups) {
+  /** @type {Record<string, { record_count: number, task_ids: Set<string> }>} */
+  const byWall = {};
+  for (const groupRecords of groups.values()) {
+    const wallCounts = countValues(
+      groupRecords.map((r) => String((r.payload ?? {}).wall_failure_type || "")).filter(Boolean),
+    );
+    const taskId = String((groupRecords[0]?.payload ?? {}).task_id ?? "");
+    for (const [wallType, count] of Object.entries(wallCounts)) {
+      if (!byWall[wallType]) {
+        byWall[wallType] = { record_count: 0, task_ids: new Set() };
+      }
+      byWall[wallType].record_count += count;
+      if (taskId) byWall[wallType].task_ids.add(taskId);
+    }
+  }
+  return Object.entries(byWall)
+    .filter(([, stats]) => stats.record_count >= 2 || stats.task_ids.size >= 2)
+    .map(([wall_failure_type, stats]) => ({
+      wall_failure_type,
+      record_count: stats.record_count,
+      task_count: stats.task_ids.size,
+      task_ids: [...stats.task_ids],
+    }))
+    .sort((a, b) => b.record_count - a.record_count);
+}
+/**
+ * @param {Record<string, unknown>[]} records
+ * @param {{ repo?: string, windowHours?: number, generatedAt?: string }} [options]
+ * @returns {Record<string, unknown>}
+ */
+export function buildHarnessReviewSummary(records, options = {}) {
+  const deduped = dedupeTelemetryRecords(records);
+  const groups = groupRecordsByTask(deduped);
+  const classifications = [];
+  const wallFailureRollup = {};
+  let retryExhaustionCount = 0;
+  let reviewRejectionProxyCount = 0;
+  for (const [, groupRecords] of groups) {
+    const payloads = groupRecords.map((r) => r.payload ?? {});
+    const wallTypes = [
+      ...new Set(payloads.map((p) => String(p.wall_failure_type || "")).filter(Boolean)),
+    ];
+    for (const wallType of wallTypes) {
+      wallFailureRollup[wallType] = (wallFailureRollup[wallType] ?? 0) + 1;
+    }
+    const maxRetry = Math.max(0, ...payloads.map((p) => Number(p.retry_count) || 0));
+    const escalated = payloads.some((p) => p.final_outcome === "escalated");
+    if (escalated || maxRetry >= MAX_RETRIES) retryExhaustionCount += 1;
+    const harnessGreen = groupRecords.some(
+      (r) => r.source === "harness-ci" && !(r.payload ?? {}).wall_failure_type,
+    );
+    const reviewRejected = payloads.some((p) => p.review_outcome === "changes_requested");
+    if (harnessGreen && reviewRejected) reviewRejectionProxyCount += 1;
+    const result = classifyTaskGroup(groupRecords);
+    if (!result) continue;
+    const sample = payloads[0] ?? {};
+    classifications.push({
+      repo: sample.repo,
+      task_id: sample.task_id,
+      pr_number: sample.pr_number,
+      task_class: sample.task_class,
+      autonomy_level: sample.autonomy_level,
+      classification: result.classification,
+      rationale: result.rationale,
+      wall_failure_types: [...new Set(wallTypes)],
+      max_retry_count: maxRetry,
+      final_outcome: payloads.map((p) => p.final_outcome).find(Boolean) ?? "in_progress",
+      review_outcome: payloads.map((p) => p.review_outcome).find((v) => v && v !== "pending") ?? "pending",
+      sources: [...new Set(groupRecords.map((r) => r.source))],
+      workflow_run_ids: [...new Set(groupRecords.map((r) => r.workflow_run_id).filter(Boolean))],
+    });
+  }
+  const byClassification = countValues(classifications.map((item) => item.classification));
+  const repeatedFailureSignatures = buildRepeatedFailureSignatures(groups);
+  return {
+    schema_version: REVIEW_SCHEMA_VERSION,
+    generated_at: options.generatedAt ?? new Date().toISOString(),
+    repo: options.repo ?? deduped[0]?.payload?.repo ?? "unknown/unknown",
+    window_hours: Number(options.windowHours ?? 24),
+    rollup: {
+      telemetry_records: deduped.length,
+      task_groups: groups.size,
+      failure_groups: classifications.length,
+      by_wall_failure_type: wallFailureRollup,
+      repeated_failure_signatures: repeatedFailureSignatures,
+      retry_exhaustion_count: retryExhaustionCount,
+      review_rejection_proxy_count: reviewRejectionProxyCount,
+      by_classification: byClassification,
+    },
+    classifications,
+  };
+}
+/**
+ * @param {Record<string, unknown>} summary
+ * @returns {string}
+ */
+export function formatHarnessReviewMarkdown(summary) {
+  const rollup = summary.rollup ?? {};
+  const lines = [
+    "# Nightly harness review",
+    "",
+    `Generated: ${summary.generated_at}`,
+    `Repository: ${summary.repo}`,
+    `Window: last ${summary.window_hours}h`,
+    "",
+    "## Rollup",
+    "",
+    `| Metric | Value |`,
+    `|--------|-------|`,
+    `| Telemetry records | ${rollup.telemetry_records ?? 0} |`,
+    `| Task groups | ${rollup.task_groups ?? 0} |`,
+    `| Classified failure groups | ${rollup.failure_groups ?? 0} |`,
+    `| Retry exhaustion | ${rollup.retry_exhaustion_count ?? 0} |`,
+    `| CI pass + review rejection proxy | ${rollup.review_rejection_proxy_count ?? 0} |`,
+    "",
+    "### By wall_failure_type",
+    "",
+  ];
+  const wallTypes = rollup.by_wall_failure_type ?? {};
+  const wallEntries = Object.entries(wallTypes);
+  if (wallEntries.length === 0) {
+    lines.push("_No wall failures in window._", "");
+  } else {
+    lines.push("| wall_failure_type | count |", "|-------------------|-------|");
+    for (const [type, count] of wallEntries.sort((a, b) => b[1] - a[1])) {
+      lines.push(`| ${type} | ${count} |`);
+    }
+    lines.push("");
+  }
+  const signatures = rollup.repeated_failure_signatures ?? [];
+  lines.push("### Repeated failure signatures", "");
+  if (signatures.length === 0) {
+    lines.push("_No repeated failure signatures in window._", "");
+  } else {
+    lines.push("| wall_failure_type | record_count | task_count | task_ids |", "|---|---:|---:|---|");
+    for (const sig of signatures) {
+      const taskIds = (sig.task_ids ?? []).join(", ") || "—";
+      lines.push(
+        `| ${sig.wall_failure_type} | ${sig.record_count} | ${sig.task_count} | ${taskIds} |`,
+      );
+    }
+    lines.push("");
+  }
+  lines.push("### By classification", "");
+  const byClass = rollup.by_classification ?? {};
+  const classEntries = Object.entries(byClass);
+  if (classEntries.length === 0) {
+    lines.push("_No classified failures in window._", "");
+  } else {
+    lines.push("| classification | count |", "|----------------|-------|");
+    for (const [cls, count] of classEntries.sort((a, b) => b[1] - a[1])) {
+      lines.push(`| ${cls} | ${count} |`);
+    }
+    lines.push("");
+  }
+  lines.push("## Per-task classifications", "");
+  const items = summary.classifications ?? [];
+  if (items.length === 0) {
+    lines.push("_No per-task classification records._");
+    return `${lines.join("\n")}\n`;
+  }
+  lines.push(
+    "| task_id | pr | class | wall_failure_types | max_retry | rationale |",
+    "|---------|----|-------|--------------------|-----------|-----------|",
+  );
+  for (const item of items) {
+    const walls = (item.wall_failure_types ?? []).join(", ") || "—";
+    lines.push(
+      `| ${item.task_id} | ${item.pr_number} | ${item.classification} | ${walls} | ${item.max_retry_count} | ${item.rationale} |`,
+    );
+  }
+  return `${lines.join("\n")}\n`;
+}

package/scripts/lib/issue-form-label-sync.mjs ADDED Viewed

@@ -0,0 +1,56 @@
+const TASK_CLASS_LABELS = new Map([
+  ["docs", "task:docs"],
+  ["test-fix", "task:test-fix"],
+  ["refactor", "task:refactor"],
+  ["feature-small", "task:feature-small"],
+  ["dependency-bump", "task:dependency-bump"],
+  ["infra", "task:infra"],
+  ["security-sensitive", "task:security-sensitive"],
+]);
+const AUTONOMY_LABELS = new Map([
+  ["L0", "autonomy:L0"],
+  ["L1", "autonomy:L1"],
+  ["L2", "autonomy:L2"],
+  ["L3", "autonomy:L3"],
+]);
+function extractHeadingValue(body, heading) {
+  const pattern = new RegExp(`^### ${heading}\\s*\\n+([\\s\\S]*?)(?=\\n^### |$)`, "m");
+  const match = body.match(pattern);
+  return match?.[1]?.trim() ?? "";
+}
+export function parseTaskIssueSelections(body = "") {
+  const taskClass = extractHeadingValue(body, "Task class").split("\n")[0].trim();
+  const autonomy = extractHeadingValue(body, "Max autonomy level").split("\n")[0].trim();
+  return {
+    taskClass,
+    autonomy,
+    taskLabel: TASK_CLASS_LABELS.get(taskClass) ?? "",
+    autonomyLabel: AUTONOMY_LABELS.get(autonomy) ?? "",
+    isTaskIssue: body.includes("### Goal") && body.includes("### Task class") && body.includes("### Max autonomy level"),
+  };
+}
+export function planIssueLabels(existingLabels = [], parsed) {
+  const keep = existingLabels.filter((label) => !label.startsWith("task:") && !label.startsWith("autonomy:"));
+  if (!parsed.taskLabel || !parsed.autonomyLabel) {
+    return {
+      labels: existingLabels,
+      changed: false,
+      reason: "task issue selections could not be resolved",
+    };
+  }
+  const labels = [...keep, parsed.taskLabel, parsed.autonomyLabel];
+  const changed =
+    labels.length !== existingLabels.length || labels.some((label, index) => label !== existingLabels[index]);
+  return {
+    labels,
+    changed,
+    reason: changed ? "updated labels from Issue form selections" : "labels already matched Issue form selections",
+  };
+}