@guilz-dev/sdlc-gh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/.github/CODEOWNERS +5 -0
  2. package/.github/ISSUE_TEMPLATE/bug_report.yml +68 -0
  3. package/.github/ISSUE_TEMPLATE/config.yml +1 -0
  4. package/.github/ISSUE_TEMPLATE/feature_request.yml +39 -0
  5. package/.github/ISSUE_TEMPLATE/support.yml +56 -0
  6. package/.github/ISSUE_TEMPLATE/task.yml +89 -0
  7. package/.github/agents/implementer.agent.md +17 -0
  8. package/.github/agents/reviewer.agent.md +18 -0
  9. package/.github/agents/triager.agent.md +13 -0
  10. package/.github/aw/actions-lock.json +9 -0
  11. package/.github/copilot-instructions.md +35 -0
  12. package/.github/hooks/hooks.json +12 -0
  13. package/.github/instructions/core.instructions.md +11 -0
  14. package/.github/instructions/profiles/go.instructions.md +10 -0
  15. package/.github/instructions/profiles/php.instructions.md +11 -0
  16. package/.github/instructions/profiles/python.instructions.md +11 -0
  17. package/.github/instructions/profiles/ruby.instructions.md +11 -0
  18. package/.github/instructions/profiles/typescript.instructions.md +11 -0
  19. package/.github/labels.yml +55 -0
  20. package/.github/pull_request_template.md +33 -0
  21. package/.github/ruleset.example.json +33 -0
  22. package/.github/ruleset.harness-eval.example.json +29 -0
  23. package/.github/skills/quality-loop/SKILL.md +23 -0
  24. package/.github/workflows/agent-retry-orchestrator.yml +161 -0
  25. package/.github/workflows/copilot-setup-steps.yml +64 -0
  26. package/.github/workflows/eval-ci.yml +169 -0
  27. package/.github/workflows/eval-drift.yml +75 -0
  28. package/.github/workflows/gh-aw-dogfood-ci.yml +73 -0
  29. package/.github/workflows/harness-ci.yml +244 -0
  30. package/.github/workflows/harness-sync.yml +28 -0
  31. package/.github/workflows/l1-readiness-check.yml +45 -0
  32. package/.github/workflows/labels-sync.yml +24 -0
  33. package/.github/workflows/nightly-harness-review.lock.yml +1643 -0
  34. package/.github/workflows/nightly-harness-review.md +87 -0
  35. package/.github/workflows/nightly-harness-review.yml +63 -0
  36. package/.github/workflows/npm-publish.yml +49 -0
  37. package/.github/workflows/pr-context-comment.yml +138 -0
  38. package/.github/workflows/product-ci-go.yml +33 -0
  39. package/.github/workflows/product-ci-php.yml +39 -0
  40. package/.github/workflows/product-ci-python.yml +34 -0
  41. package/.github/workflows/product-ci-ruby.yml +35 -0
  42. package/.github/workflows/product-ci-ts.yml +37 -0
  43. package/.github/workflows/task-issue-label-sync.yml +50 -0
  44. package/.github/workflows/weekly-redteam.lock.yml +1571 -0
  45. package/.github/workflows/weekly-redteam.md +76 -0
  46. package/.github/zizmor.yml +11 -0
  47. package/AGENTS.md +54 -0
  48. package/LICENSE +21 -0
  49. package/README.md +366 -0
  50. package/config/stacks.json +55 -0
  51. package/docs/adoption.md +126 -0
  52. package/docs/arch.md +535 -0
  53. package/docs/auth-boundaries.md +16 -0
  54. package/docs/coding-agent-l1.md +152 -0
  55. package/docs/exceptions/README.md +25 -0
  56. package/docs/exceptions/TEMPLATE.md +8 -0
  57. package/docs/failure-taxonomy.md +23 -0
  58. package/docs/gh-aw-dogfood.md +109 -0
  59. package/docs/kpi-baseline.md +9 -0
  60. package/docs/nightly-harness-review.md +94 -0
  61. package/docs/operations.md +108 -0
  62. package/docs/publishing.md +79 -0
  63. package/docs/revert-playbook.md +44 -0
  64. package/docs/shared-config.md +30 -0
  65. package/docs/telemetry-artifacts.md +78 -0
  66. package/docs/telemetry-schema.md +60 -0
  67. package/evals/.score-baseline.json +6 -0
  68. package/evals/e2e-bench/README.md +28 -0
  69. package/evals/e2e-bench/manifest.json +16 -0
  70. package/evals/e2e-bench/tasks/e2e-001.yml +10 -0
  71. package/evals/e2e-bench/tasks/e2e-002.yml +11 -0
  72. package/evals/e2e-bench/tasks/e2e-003.yml +10 -0
  73. package/evals/e2e-bench/tasks/e2e-004.yml +14 -0
  74. package/evals/e2e-bench/tasks/e2e-005.yml +11 -0
  75. package/evals/e2e-bench/tasks/e2e-006.yml +10 -0
  76. package/evals/e2e-bench/tasks/e2e-007.yml +10 -0
  77. package/evals/e2e-bench/tasks/e2e-008.yml +10 -0
  78. package/evals/e2e-bench/tasks/e2e-009.yml +10 -0
  79. package/evals/trajectories/rubric.md +12 -0
  80. package/evals/trajectories/test_harness_conventions.py +271 -0
  81. package/infra/README.md +49 -0
  82. package/infra/langfuse/docker-compose.yml +25 -0
  83. package/infra/otel/collector-config.yml +24 -0
  84. package/infra/samples/gh-aw-dogfood-report.json +44 -0
  85. package/infra/samples/harness-review-routing-plan.json +19 -0
  86. package/infra/samples/harness-review-summary.json +61 -0
  87. package/infra/samples/telemetry-artifact.json +29 -0
  88. package/infra/samples/telemetry-payload.json +19 -0
  89. package/package.json +85 -0
  90. package/prompts/triager-classify.prompt.yml +10 -0
  91. package/sample/go/add.go +5 -0
  92. package/sample/go/add_test.go +9 -0
  93. package/sample/go/go.mod +3 -0
  94. package/sample/php/composer.json +26 -0
  95. package/sample/php/composer.lock +1881 -0
  96. package/sample/php/phpunit.xml +8 -0
  97. package/sample/php/src/Add.php +13 -0
  98. package/sample/php/tests/AddTest.php +16 -0
  99. package/sample/python/requirements-dev.txt +2 -0
  100. package/sample/python/src/__init__.py +0 -0
  101. package/sample/python/src/greet.py +3 -0
  102. package/sample/python/tests/conftest.py +4 -0
  103. package/sample/python/tests/test_greet.py +5 -0
  104. package/sample/ruby/.rubocop.yml +10 -0
  105. package/sample/ruby/Gemfile +6 -0
  106. package/sample/ruby/Gemfile.lock +58 -0
  107. package/sample/ruby/lib/add.rb +9 -0
  108. package/sample/ruby/spec/add_spec.rb +11 -0
  109. package/sample/ts/biome.json +6 -0
  110. package/sample/ts/package-lock.json +1763 -0
  111. package/sample/ts/package.json +15 -0
  112. package/sample/ts/src/add.ts +3 -0
  113. package/sample/ts/tests/add.test.ts +8 -0
  114. package/sample/ts/tsconfig.json +12 -0
  115. package/scripts/aggregate-harness-review.mjs +48 -0
  116. package/scripts/bootstrap-harness.sh +411 -0
  117. package/scripts/check-diff-size.mjs +46 -0
  118. package/scripts/check-e2e-manifest.mjs +35 -0
  119. package/scripts/check-eval-score-drift.mjs +31 -0
  120. package/scripts/check-gh-aw-dogfood-scope.mjs +51 -0
  121. package/scripts/check-issue-spec.mjs +215 -0
  122. package/scripts/check-l1-readiness.mjs +82 -0
  123. package/scripts/check-open-pr-limit.mjs +34 -0
  124. package/scripts/doctor.mjs +177 -0
  125. package/scripts/emit-gh-aw-dogfood-report.mjs +112 -0
  126. package/scripts/emit-telemetry-artifact.mjs +99 -0
  127. package/scripts/fetch-telemetry-artifacts.mjs +176 -0
  128. package/scripts/harness-drift-report.mjs +99 -0
  129. package/scripts/lib/bootstrap-copy.mjs +123 -0
  130. package/scripts/lib/ccsd-contract.mjs +212 -0
  131. package/scripts/lib/diff-size.mjs +103 -0
  132. package/scripts/lib/doctor-local.mjs +179 -0
  133. package/scripts/lib/e2e-manifest.mjs +76 -0
  134. package/scripts/lib/gh-aw-dogfood.mjs +293 -0
  135. package/scripts/lib/github-config.mjs +94 -0
  136. package/scripts/lib/harness-ci-fragments.mjs +98 -0
  137. package/scripts/lib/harness-review-routing.mjs +244 -0
  138. package/scripts/lib/harness-review.mjs +388 -0
  139. package/scripts/lib/issue-form-label-sync.mjs +56 -0
  140. package/scripts/lib/l1-readiness.mjs +258 -0
  141. package/scripts/lib/merge-harness-package.mjs +36 -0
  142. package/scripts/lib/npm-package.mjs +129 -0
  143. package/scripts/lib/setup-wizard.mjs +224 -0
  144. package/scripts/lib/stacks.mjs +138 -0
  145. package/scripts/lib/telemetry-artifact.mjs +253 -0
  146. package/scripts/lib/template-root.mjs +39 -0
  147. package/scripts/merge-harness-package.mjs +14 -0
  148. package/scripts/route-harness-review.mjs +168 -0
  149. package/scripts/run-e2e-bench.mjs +216 -0
  150. package/scripts/sdlc-gh-cli.mjs +91 -0
  151. package/scripts/select-eval-jobs.mjs +41 -0
  152. package/scripts/setup-github.mjs +242 -0
  153. package/scripts/setup-github.sh +4 -0
  154. package/scripts/setup-wizard.mjs +426 -0
  155. package/scripts/test-bootstrap-guidance-scenarios.mjs +94 -0
  156. package/scripts/test-diff-size-scenarios.mjs +88 -0
  157. package/scripts/test-doctor-scenarios.mjs +70 -0
  158. package/scripts/test-e2e-manifest-scenarios.mjs +65 -0
  159. package/scripts/test-gh-aw-dogfood-scenarios.mjs +74 -0
  160. package/scripts/test-harness-review-routing-scenarios.mjs +130 -0
  161. package/scripts/test-harness-review-scenarios.mjs +92 -0
  162. package/scripts/test-hooks-scenarios.mjs +44 -0
  163. package/scripts/test-issue-form-label-sync-scenarios.mjs +48 -0
  164. package/scripts/test-issue-spec-scenarios.mjs +258 -0
  165. package/scripts/test-l1-readiness-scenarios.mjs +204 -0
  166. package/scripts/test-merge-harness-package-scenarios.mjs +53 -0
  167. package/scripts/test-npm-package-scenarios.mjs +31 -0
  168. package/scripts/test-sdlc-gh-cli-scenarios.mjs +54 -0
  169. package/scripts/test-setup-github-scenarios.mjs +103 -0
  170. package/scripts/test-setup-wizard-scenarios.mjs +114 -0
  171. package/scripts/test-telemetry-artifact-scenarios.mjs +69 -0
  172. package/scripts/trim-harness-ci.mjs +18 -0
  173. package/scripts/validate-gh-aw-compile.mjs +64 -0
  174. package/scripts/validate-harness.mjs +199 -0
  175. package/scripts/validate-telemetry.mjs +21 -0
  176. package/scripts/verify-bootstrap-stacks.sh +192 -0
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Route nightly harness review classifications into GitHub issues.
3
+ * See docs/nightly-harness-review.md and docs/failure-taxonomy.md (#4).
4
+ */
5
+
6
+ export const ROUTING_SCHEMA_VERSION = "1";
7
+ export const ROUTING_MARKER_PREFIX = "harness-routing-key:";
8
+
9
+ export const ISSUE_KIND = {
10
+ HARNESS_REVISION: "harness-revision",
11
+ WALL_ADDITION: "wall-addition",
12
+ };
13
+
14
+ /** @type {Record<string, string[]>} */
15
+ export const ISSUE_LABELS = {
16
+ [ISSUE_KIND.HARNESS_REVISION]: ["outer-loop:harness-revision", "autonomy:L0"],
17
+ [ISSUE_KIND.WALL_ADDITION]: ["outer-loop:wall-addition", "autonomy:L0"],
18
+ };
19
+
20
+ /**
21
+ * @param {string} repo
22
+ * @param {string} kind
23
+ * @param {string} signature
24
+ * @param {string} scope
25
+ * @returns {string}
26
+ */
27
+ export function routingDedupeKey(repo, kind, signature, scope) {
28
+ return `${repo}:${kind}:${signature}:${scope}`;
29
+ }
30
+
31
+ /**
32
+ * @param {string} dedupeKey
33
+ * @returns {string}
34
+ */
35
+ export function routingMarker(dedupeKey) {
36
+ return `<!-- ${ROUTING_MARKER_PREFIX}${dedupeKey} -->`;
37
+ }
38
+
39
+ /**
40
+ * @param {string} body
41
+ * @param {string} dedupeKey
42
+ * @returns {boolean}
43
+ */
44
+ export function bodyHasRoutingMarker(body, dedupeKey) {
45
+ return String(body || "").includes(routingMarker(dedupeKey));
46
+ }
47
+
48
+ /**
49
+ * @param {Record<string, unknown>} summary
50
+ * @returns {boolean}
51
+ */
52
+ export function hasRepeatedFfFindings(summary) {
53
+ const items = (summary.classifications ?? []).filter((c) => c.classification === "FF不足");
54
+ if (items.length >= 2) return true;
55
+ const sigs = summary.rollup?.repeated_failure_signatures ?? [];
56
+ return sigs.some(
57
+ (s) => s.wall_failure_type === "lint" && Number(s.record_count) >= 2,
58
+ );
59
+ }
60
+
61
+ /**
62
+ * @param {Record<string, unknown>} summary
63
+ * @returns {boolean}
64
+ */
65
+ export function hasRepeatedWallFindings(summary) {
66
+ const items = (summary.classifications ?? []).filter((c) => c.classification === "壁不足");
67
+ if (items.length >= 2) return true;
68
+ const proxy = Number(summary.rollup?.review_rejection_proxy_count ?? 0);
69
+ return proxy >= 1 && items.length >= 1;
70
+ }
71
+
72
+ /**
73
+ * @param {Record<string, unknown>[]} items
74
+ * @returns {string}
75
+ */
76
+ export function inferRoutingScope(items) {
77
+ const taskClasses = [...new Set(items.map((item) => String(item.task_class || "")).filter(Boolean))].sort();
78
+ const wallTypes = [...new Set(items.flatMap((item) => item.wall_failure_types ?? []).map(String).filter(Boolean))].sort();
79
+
80
+ if (taskClasses.length === 1 && wallTypes.length === 1) {
81
+ return `task:${taskClasses[0]}|wall:${wallTypes[0]}`;
82
+ }
83
+ if (taskClasses.length === 1) {
84
+ return `task:${taskClasses[0]}`;
85
+ }
86
+ if (taskClasses.length > 1) {
87
+ return `tasks:${taskClasses.join("+")}`;
88
+ }
89
+ if (wallTypes.length === 1) {
90
+ return `wall:${wallTypes[0]}`;
91
+ }
92
+ if (wallTypes.length > 1) {
93
+ return `walls:${wallTypes.join("+")}`;
94
+ }
95
+ return "unknown-scope";
96
+ }
97
+
98
+ /**
99
+ * @param {Record<string, unknown>} summary
100
+ * @param {string} kind
101
+ * @param {Record<string, unknown>[]} items
102
+ * @param {string} signature
103
+ * @returns {Record<string, unknown>}
104
+ */
105
+ export function buildIssueAction(summary, kind, items, signature) {
106
+ const repo = String(summary.repo ?? "unknown/unknown");
107
+ const scope = inferRoutingScope(items);
108
+ const dedupeKey = routingDedupeKey(repo, kind, signature, scope);
109
+ const windowHours = summary.window_hours ?? 24;
110
+ const marker = routingMarker(dedupeKey);
111
+
112
+ const title =
113
+ kind === ISSUE_KIND.HARNESS_REVISION
114
+ ? `[outer-loop] Harness revision needed (${signature} / ${scope})`
115
+ : `[outer-loop] Wall addition needed (${signature} / ${scope})`;
116
+
117
+ const lines = [
118
+ marker,
119
+ "",
120
+ "## Summary",
121
+ "",
122
+ `Nightly harness review (${windowHours}h window) routed **${kind}** work.`,
123
+ "",
124
+ `Generated: ${summary.generated_at}`,
125
+ `Repository: ${repo}`,
126
+ `Scope: ${scope}`,
127
+ "",
128
+ "## Evidence",
129
+ "",
130
+ "| task_id | pr | rationale | wall_failure_types |",
131
+ "|---------|----|-----------|--------------------|",
132
+ ];
133
+
134
+ for (const item of items) {
135
+ const walls = (item.wall_failure_types ?? []).join(", ") || "—";
136
+ lines.push(
137
+ `| ${item.task_id} | ${item.pr_number} | ${item.rationale} | ${walls} |`,
138
+ );
139
+ }
140
+
141
+ lines.push(
142
+ "",
143
+ "## Suggested next steps",
144
+ "",
145
+ kind === ISSUE_KIND.HARNESS_REVISION
146
+ ? "- Update instructions / skills / agents for repeated convention gaps\n- Link eval or telemetry evidence in follow-up PRs"
147
+ : "- Add tests, lint rules, or contracts so CI catches review findings\n- Keep proposal PRs at `autonomy:L0` until walls are updated",
148
+ "",
149
+ "## Rollback",
150
+ "",
151
+ "Close this issue if the signature does not recur in the next nightly window.",
152
+ "",
153
+ "Automated by `scripts/route-harness-review.mjs` (issue #4).",
154
+ );
155
+
156
+ return {
157
+ action: "open_or_update_issue",
158
+ kind,
159
+ dedupe_key: dedupeKey,
160
+ signature,
161
+ scope,
162
+ labels: ISSUE_LABELS[kind] ?? [],
163
+ title,
164
+ body: `${lines.join("\n")}\n`,
165
+ evidence_count: items.length,
166
+ };
167
+ }
168
+
169
+ /**
170
+ * @param {Record<string, unknown>} summary
171
+ * @returns {Record<string, unknown>}
172
+ */
173
+ export function buildRoutingPlan(summary) {
174
+ const actions = [];
175
+ const skipped = [];
176
+
177
+ const ffItems = (summary.classifications ?? []).filter((c) => c.classification === "FF不足");
178
+ const wallItems = (summary.classifications ?? []).filter((c) => c.classification === "壁不足");
179
+
180
+ if (hasRepeatedFfFindings(summary) && ffItems.length > 0) {
181
+ const signature = (summary.rollup?.repeated_failure_signatures ?? []).some(
182
+ (s) => s.wall_failure_type === "lint",
183
+ )
184
+ ? "lint"
185
+ : "ff-aggregate";
186
+ actions.push(buildIssueAction(summary, ISSUE_KIND.HARNESS_REVISION, ffItems, signature));
187
+ } else if (ffItems.length) {
188
+ skipped.push({ kind: ISSUE_KIND.HARNESS_REVISION, reason: "FF不足 present but not repeated" });
189
+ } else if (hasRepeatedFfFindings(summary)) {
190
+ skipped.push({
191
+ kind: ISSUE_KIND.HARNESS_REVISION,
192
+ reason: "lint signature repeated without FF不足 classification rows",
193
+ });
194
+ }
195
+
196
+ if (hasRepeatedWallFindings(summary) && wallItems.length > 0) {
197
+ const signature =
198
+ Number(summary.rollup?.review_rejection_proxy_count ?? 0) >= 1
199
+ ? "ci-pass-review-reject"
200
+ : "wall-aggregate";
201
+ actions.push(buildIssueAction(summary, ISSUE_KIND.WALL_ADDITION, wallItems, signature));
202
+ } else if (wallItems.length) {
203
+ skipped.push({ kind: ISSUE_KIND.WALL_ADDITION, reason: "壁不足 present but not repeated" });
204
+ } else if (hasRepeatedWallFindings(summary)) {
205
+ skipped.push({
206
+ kind: ISSUE_KIND.WALL_ADDITION,
207
+ reason: "review-rejection proxy without 壁不足 classification rows",
208
+ });
209
+ }
210
+
211
+ return {
212
+ schema_version: ROUTING_SCHEMA_VERSION,
213
+ generated_at: new Date().toISOString(),
214
+ source_summary_at: summary.generated_at ?? null,
215
+ repo: summary.repo ?? "unknown/unknown",
216
+ actions,
217
+ skipped,
218
+ };
219
+ }
220
+
221
+ /**
222
+ * @param {Record<string, unknown>} plan
223
+ * @param {{ existingIssues?: { number: number, body: string }[] }} [ctx]
224
+ * @returns {Record<string, unknown>}
225
+ */
226
+ export function applyRoutingPlanDryRun(plan, ctx = {}) {
227
+ const existing = ctx.existingIssues ?? [];
228
+ const results = [];
229
+
230
+ for (const action of plan.actions ?? []) {
231
+ const match = existing.find((issue) =>
232
+ bodyHasRoutingMarker(issue.body, action.dedupe_key),
233
+ );
234
+ results.push({
235
+ dedupe_key: action.dedupe_key,
236
+ kind: action.kind,
237
+ operation: match ? "update_issue" : "create_issue",
238
+ issue_number: match?.number ?? null,
239
+ title: action.title,
240
+ });
241
+ }
242
+
243
+ return { ...plan, results };
244
+ }
@@ -0,0 +1,388 @@
1
+ /**
2
+ * Nightly harness review — aggregate telemetry artifacts and classify failures.
3
+ * See docs/failure-taxonomy.md and docs/nightly-harness-review.md.
4
+ */
5
+
6
+ export const REVIEW_SCHEMA_VERSION = "1";
7
+ export const REVIEW_OUT_DIR = "harness-review";
8
+ export const MAX_RETRIES = 3;
9
+
10
+ /** @type {readonly string[]} */
11
+ export const FAILURE_CLASSES = ["FF不足", "壁不足", "モデル限界", "unclassified"];
12
+
13
+ const FF_WALL_TYPES = new Set(["lint"]);
14
+
15
+ /** Wall types that usually indicate model / execution limits when repeated */
16
+ const MODEL_LIMIT_WALL_TYPES = new Set(["test", "type", "security", "safe-output", "diff-size"]);
17
+
18
+ /**
19
+ * @param {Record<string, unknown>} record
20
+ * @returns {string}
21
+ */
22
+ export function telemetryDedupeKey(record) {
23
+ const payload = record.payload ?? {};
24
+ return `${record.workflow_run_id}:${record.source}:${payload.pr_number}`;
25
+ }
26
+
27
+ /**
28
+ * @param {Record<string, unknown>[]} records
29
+ * @returns {Record<string, unknown>[]}
30
+ */
31
+ export function dedupeTelemetryRecords(records) {
32
+ const seen = new Map();
33
+ for (const record of records) {
34
+ const key = telemetryDedupeKey(record);
35
+ const existing = seen.get(key);
36
+ if (!existing || String(record.emitted_at) > String(existing.emitted_at)) {
37
+ seen.set(key, record);
38
+ }
39
+ }
40
+ return [...seen.values()];
41
+ }
42
+
43
+ /**
44
+ * @param {Record<string, unknown>} record
45
+ * @returns {string}
46
+ */
47
+ export function taskGroupKey(record) {
48
+ const payload = record.payload ?? {};
49
+ return `${payload.repo}|${payload.task_id}|${payload.pr_number}`;
50
+ }
51
+
52
+ /**
53
+ * @param {Record<string, unknown>[]} records
54
+ * @returns {Map<string, Record<string, unknown>[]>}
55
+ */
56
+ export function groupRecordsByTask(records) {
57
+ const groups = new Map();
58
+ for (const record of records) {
59
+ const key = taskGroupKey(record);
60
+ const list = groups.get(key) ?? [];
61
+ list.push(record);
62
+ groups.set(key, list);
63
+ }
64
+ return groups;
65
+ }
66
+
67
+ /**
68
+ * @param {string[]} values
69
+ * @returns {Record<string, number>}
70
+ */
71
+ export function countValues(values) {
72
+ const counts = {};
73
+ for (const value of values) {
74
+ if (!value) continue;
75
+ counts[value] = (counts[value] ?? 0) + 1;
76
+ }
77
+ return counts;
78
+ }
79
+
80
+ /**
81
+ * @param {Record<string, unknown>[]} records
82
+ * @returns {boolean}
83
+ */
84
+ export function groupHasFailureSignal(records) {
85
+ for (const record of records) {
86
+ const payload = record.payload ?? {};
87
+ if (payload.wall_failure_type) return true;
88
+ if (payload.final_outcome === "escalated") return true;
89
+ if (payload.review_outcome === "changes_requested") return true;
90
+ if (Number(payload.retry_count) > 0) return true;
91
+ }
92
+ return false;
93
+ }
94
+
95
+ /**
96
+ * @param {Record<string, unknown>[]} records
97
+ * @returns {{ classification: string, rationale: string } | null}
98
+ */
99
+ export function classifyTaskGroup(records) {
100
+ if (!groupHasFailureSignal(records)) return null;
101
+
102
+ const payloads = records.map((record) => ({
103
+ source: record.source,
104
+ ...(record.payload ?? {}),
105
+ }));
106
+
107
+ const maxRetry = Math.max(0, ...payloads.map((p) => Number(p.retry_count) || 0));
108
+ const wallTypes = payloads.map((p) => String(p.wall_failure_type || "")).filter(Boolean);
109
+ const wallCounts = countValues(wallTypes);
110
+ const escalated = payloads.some((p) => p.final_outcome === "escalated");
111
+ const reviewRejected = payloads.some((p) => p.review_outcome === "changes_requested");
112
+ const harnessGreen = payloads.some((p) => p.source === "harness-ci" && !p.wall_failure_type);
113
+ const retryEvents = records.filter((r) => r.source === "agent-retry-orchestrator");
114
+
115
+ if (harnessGreen && reviewRejected) {
116
+ return {
117
+ classification: "壁不足",
118
+ rationale: "Harness CI passed while review_outcome is changes_requested",
119
+ };
120
+ }
121
+
122
+ if (escalated || maxRetry >= MAX_RETRIES) {
123
+ return {
124
+ classification: "モデル限界",
125
+ rationale: `Retry budget exhausted or escalated (max_retry_count=${maxRetry})`,
126
+ };
127
+ }
128
+
129
+ if (wallTypes.includes("security")) {
130
+ return {
131
+ classification: "モデル限界",
132
+ rationale: "Security wall failures are not auto-retried",
133
+ };
134
+ }
135
+
136
+ const lintFailures = wallTypes.filter((w) => FF_WALL_TYPES.has(w)).length;
137
+ if (lintFailures >= 2) {
138
+ return {
139
+ classification: "FF不足",
140
+ rationale: "Repeated lint or issue-spec convention failures",
141
+ };
142
+ }
143
+
144
+ const repeatedWall = Object.entries(wallCounts).find(([, count]) => count >= 2);
145
+ if (repeatedWall && retryEvents.length >= 2) {
146
+ return {
147
+ classification: "モデル限界",
148
+ rationale: `Same wall_failure_type (${repeatedWall[0]}) across multiple retry events`,
149
+ };
150
+ }
151
+
152
+ if (repeatedWall) {
153
+ const [wallType] = repeatedWall;
154
+ if (FF_WALL_TYPES.has(wallType)) {
155
+ return {
156
+ classification: "FF不足",
157
+ rationale: `Repeated wall_failure_type ${wallType}`,
158
+ };
159
+ }
160
+ if (MODEL_LIMIT_WALL_TYPES.has(wallType)) {
161
+ return {
162
+ classification: "モデル限界",
163
+ rationale: `Repeated wall_failure_type ${wallType} after retries`,
164
+ };
165
+ }
166
+ return {
167
+ classification: "unclassified",
168
+ rationale: `Repeated wall_failure_type ${wallType} without taxonomy mapping`,
169
+ };
170
+ }
171
+
172
+ if (wallTypes.length > 0 || maxRetry > 0) {
173
+ return {
174
+ classification: "unclassified",
175
+ rationale:
176
+ wallTypes.length > 0
177
+ ? `Single wall failure (${wallTypes[0]}) without repeat pattern`
178
+ : `Retry activity (count=${maxRetry}) without wall_failure_type`,
179
+ };
180
+ }
181
+
182
+ if (reviewRejected) {
183
+ return {
184
+ classification: "壁不足",
185
+ rationale: "Review rejection without CI failure signal in telemetry",
186
+ };
187
+ }
188
+
189
+ return null;
190
+ }
191
+
192
+ /**
193
+ * @param {Map<string, Record<string, unknown[]>>} groups
194
+ * @returns {Record<string, unknown>[]}
195
+ */
196
+ export function buildRepeatedFailureSignatures(groups) {
197
+ /** @type {Record<string, { record_count: number, task_ids: Set<string> }>} */
198
+ const byWall = {};
199
+
200
+ for (const groupRecords of groups.values()) {
201
+ const wallCounts = countValues(
202
+ groupRecords.map((r) => String((r.payload ?? {}).wall_failure_type || "")).filter(Boolean),
203
+ );
204
+ const taskId = String((groupRecords[0]?.payload ?? {}).task_id ?? "");
205
+
206
+ for (const [wallType, count] of Object.entries(wallCounts)) {
207
+ if (!byWall[wallType]) {
208
+ byWall[wallType] = { record_count: 0, task_ids: new Set() };
209
+ }
210
+ byWall[wallType].record_count += count;
211
+ if (taskId) byWall[wallType].task_ids.add(taskId);
212
+ }
213
+ }
214
+
215
+ return Object.entries(byWall)
216
+ .filter(([, stats]) => stats.record_count >= 2 || stats.task_ids.size >= 2)
217
+ .map(([wall_failure_type, stats]) => ({
218
+ wall_failure_type,
219
+ record_count: stats.record_count,
220
+ task_count: stats.task_ids.size,
221
+ task_ids: [...stats.task_ids],
222
+ }))
223
+ .sort((a, b) => b.record_count - a.record_count);
224
+ }
225
+
226
+ /**
227
+ * @param {Record<string, unknown>[]} records
228
+ * @param {{ repo?: string, windowHours?: number, generatedAt?: string }} [options]
229
+ * @returns {Record<string, unknown>}
230
+ */
231
+ export function buildHarnessReviewSummary(records, options = {}) {
232
+ const deduped = dedupeTelemetryRecords(records);
233
+ const groups = groupRecordsByTask(deduped);
234
+ const classifications = [];
235
+
236
+ const wallFailureRollup = {};
237
+ let retryExhaustionCount = 0;
238
+ let reviewRejectionProxyCount = 0;
239
+
240
+ for (const [, groupRecords] of groups) {
241
+ const payloads = groupRecords.map((r) => r.payload ?? {});
242
+ const wallTypes = [
243
+ ...new Set(payloads.map((p) => String(p.wall_failure_type || "")).filter(Boolean)),
244
+ ];
245
+ for (const wallType of wallTypes) {
246
+ wallFailureRollup[wallType] = (wallFailureRollup[wallType] ?? 0) + 1;
247
+ }
248
+
249
+ const maxRetry = Math.max(0, ...payloads.map((p) => Number(p.retry_count) || 0));
250
+ const escalated = payloads.some((p) => p.final_outcome === "escalated");
251
+ if (escalated || maxRetry >= MAX_RETRIES) retryExhaustionCount += 1;
252
+
253
+ const harnessGreen = groupRecords.some(
254
+ (r) => r.source === "harness-ci" && !(r.payload ?? {}).wall_failure_type,
255
+ );
256
+ const reviewRejected = payloads.some((p) => p.review_outcome === "changes_requested");
257
+ if (harnessGreen && reviewRejected) reviewRejectionProxyCount += 1;
258
+
259
+ const result = classifyTaskGroup(groupRecords);
260
+ if (!result) continue;
261
+
262
+ const sample = payloads[0] ?? {};
263
+ classifications.push({
264
+ repo: sample.repo,
265
+ task_id: sample.task_id,
266
+ pr_number: sample.pr_number,
267
+ task_class: sample.task_class,
268
+ autonomy_level: sample.autonomy_level,
269
+ classification: result.classification,
270
+ rationale: result.rationale,
271
+ wall_failure_types: [...new Set(wallTypes)],
272
+ max_retry_count: maxRetry,
273
+ final_outcome: payloads.map((p) => p.final_outcome).find(Boolean) ?? "in_progress",
274
+ review_outcome: payloads.map((p) => p.review_outcome).find((v) => v && v !== "pending") ?? "pending",
275
+ sources: [...new Set(groupRecords.map((r) => r.source))],
276
+ workflow_run_ids: [...new Set(groupRecords.map((r) => r.workflow_run_id).filter(Boolean))],
277
+ });
278
+ }
279
+
280
+ const byClassification = countValues(classifications.map((item) => item.classification));
281
+ const repeatedFailureSignatures = buildRepeatedFailureSignatures(groups);
282
+
283
+ return {
284
+ schema_version: REVIEW_SCHEMA_VERSION,
285
+ generated_at: options.generatedAt ?? new Date().toISOString(),
286
+ repo: options.repo ?? deduped[0]?.payload?.repo ?? "unknown/unknown",
287
+ window_hours: Number(options.windowHours ?? 24),
288
+ rollup: {
289
+ telemetry_records: deduped.length,
290
+ task_groups: groups.size,
291
+ failure_groups: classifications.length,
292
+ by_wall_failure_type: wallFailureRollup,
293
+ repeated_failure_signatures: repeatedFailureSignatures,
294
+ retry_exhaustion_count: retryExhaustionCount,
295
+ review_rejection_proxy_count: reviewRejectionProxyCount,
296
+ by_classification: byClassification,
297
+ },
298
+ classifications,
299
+ };
300
+ }
301
+
302
+ /**
303
+ * @param {Record<string, unknown>} summary
304
+ * @returns {string}
305
+ */
306
+ export function formatHarnessReviewMarkdown(summary) {
307
+ const rollup = summary.rollup ?? {};
308
+ const lines = [
309
+ "# Nightly harness review",
310
+ "",
311
+ `Generated: ${summary.generated_at}`,
312
+ `Repository: ${summary.repo}`,
313
+ `Window: last ${summary.window_hours}h`,
314
+ "",
315
+ "## Rollup",
316
+ "",
317
+ `| Metric | Value |`,
318
+ `|--------|-------|`,
319
+ `| Telemetry records | ${rollup.telemetry_records ?? 0} |`,
320
+ `| Task groups | ${rollup.task_groups ?? 0} |`,
321
+ `| Classified failure groups | ${rollup.failure_groups ?? 0} |`,
322
+ `| Retry exhaustion | ${rollup.retry_exhaustion_count ?? 0} |`,
323
+ `| CI pass + review rejection proxy | ${rollup.review_rejection_proxy_count ?? 0} |`,
324
+ "",
325
+ "### By wall_failure_type",
326
+ "",
327
+ ];
328
+
329
+ const wallTypes = rollup.by_wall_failure_type ?? {};
330
+ const wallEntries = Object.entries(wallTypes);
331
+ if (wallEntries.length === 0) {
332
+ lines.push("_No wall failures in window._", "");
333
+ } else {
334
+ lines.push("| wall_failure_type | count |", "|-------------------|-------|");
335
+ for (const [type, count] of wallEntries.sort((a, b) => b[1] - a[1])) {
336
+ lines.push(`| ${type} | ${count} |`);
337
+ }
338
+ lines.push("");
339
+ }
340
+
341
+ const signatures = rollup.repeated_failure_signatures ?? [];
342
+ lines.push("### Repeated failure signatures", "");
343
+ if (signatures.length === 0) {
344
+ lines.push("_No repeated failure signatures in window._", "");
345
+ } else {
346
+ lines.push("| wall_failure_type | record_count | task_count | task_ids |", "|---|---:|---:|---|");
347
+ for (const sig of signatures) {
348
+ const taskIds = (sig.task_ids ?? []).join(", ") || "—";
349
+ lines.push(
350
+ `| ${sig.wall_failure_type} | ${sig.record_count} | ${sig.task_count} | ${taskIds} |`,
351
+ );
352
+ }
353
+ lines.push("");
354
+ }
355
+
356
+ lines.push("### By classification", "");
357
+ const byClass = rollup.by_classification ?? {};
358
+ const classEntries = Object.entries(byClass);
359
+ if (classEntries.length === 0) {
360
+ lines.push("_No classified failures in window._", "");
361
+ } else {
362
+ lines.push("| classification | count |", "|----------------|-------|");
363
+ for (const [cls, count] of classEntries.sort((a, b) => b[1] - a[1])) {
364
+ lines.push(`| ${cls} | ${count} |`);
365
+ }
366
+ lines.push("");
367
+ }
368
+
369
+ lines.push("## Per-task classifications", "");
370
+ const items = summary.classifications ?? [];
371
+ if (items.length === 0) {
372
+ lines.push("_No per-task classification records._");
373
+ return `${lines.join("\n")}\n`;
374
+ }
375
+
376
+ lines.push(
377
+ "| task_id | pr | class | wall_failure_types | max_retry | rationale |",
378
+ "|---------|----|-------|--------------------|-----------|-----------|",
379
+ );
380
+ for (const item of items) {
381
+ const walls = (item.wall_failure_types ?? []).join(", ") || "—";
382
+ lines.push(
383
+ `| ${item.task_id} | ${item.pr_number} | ${item.classification} | ${walls} | ${item.max_retry_count} | ${item.rationale} |`,
384
+ );
385
+ }
386
+
387
+ return `${lines.join("\n")}\n`;
388
+ }
@@ -0,0 +1,56 @@
1
+ const TASK_CLASS_LABELS = new Map([
2
+ ["docs", "task:docs"],
3
+ ["test-fix", "task:test-fix"],
4
+ ["refactor", "task:refactor"],
5
+ ["feature-small", "task:feature-small"],
6
+ ["dependency-bump", "task:dependency-bump"],
7
+ ["infra", "task:infra"],
8
+ ["security-sensitive", "task:security-sensitive"],
9
+ ]);
10
+
11
+ const AUTONOMY_LABELS = new Map([
12
+ ["L0", "autonomy:L0"],
13
+ ["L1", "autonomy:L1"],
14
+ ["L2", "autonomy:L2"],
15
+ ["L3", "autonomy:L3"],
16
+ ]);
17
+
18
+ function extractHeadingValue(body, heading) {
19
+ const pattern = new RegExp(`^### ${heading}\\s*\\n+([\\s\\S]*?)(?=\\n^### |$)`, "m");
20
+ const match = body.match(pattern);
21
+ return match?.[1]?.trim() ?? "";
22
+ }
23
+
24
+ export function parseTaskIssueSelections(body = "") {
25
+ const taskClass = extractHeadingValue(body, "Task class").split("\n")[0].trim();
26
+ const autonomy = extractHeadingValue(body, "Max autonomy level").split("\n")[0].trim();
27
+
28
+ return {
29
+ taskClass,
30
+ autonomy,
31
+ taskLabel: TASK_CLASS_LABELS.get(taskClass) ?? "",
32
+ autonomyLabel: AUTONOMY_LABELS.get(autonomy) ?? "",
33
+ isTaskIssue: body.includes("### Goal") && body.includes("### Task class") && body.includes("### Max autonomy level"),
34
+ };
35
+ }
36
+
37
+ export function planIssueLabels(existingLabels = [], parsed) {
38
+ const keep = existingLabels.filter((label) => !label.startsWith("task:") && !label.startsWith("autonomy:"));
39
+ if (!parsed.taskLabel || !parsed.autonomyLabel) {
40
+ return {
41
+ labels: existingLabels,
42
+ changed: false,
43
+ reason: "task issue selections could not be resolved",
44
+ };
45
+ }
46
+
47
+ const labels = [...keep, parsed.taskLabel, parsed.autonomyLabel];
48
+ const changed =
49
+ labels.length !== existingLabels.length || labels.some((label, index) => label !== existingLabels[index]);
50
+
51
+ return {
52
+ labels,
53
+ changed,
54
+ reason: changed ? "updated labels from Issue form selections" : "labels already matched Issue form selections",
55
+ };
56
+ }