@guilz-dev/sdlc-gh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/.github/CODEOWNERS +5 -0
  2. package/.github/ISSUE_TEMPLATE/bug_report.yml +68 -0
  3. package/.github/ISSUE_TEMPLATE/config.yml +1 -0
  4. package/.github/ISSUE_TEMPLATE/feature_request.yml +39 -0
  5. package/.github/ISSUE_TEMPLATE/support.yml +56 -0
  6. package/.github/ISSUE_TEMPLATE/task.yml +89 -0
  7. package/.github/agents/implementer.agent.md +17 -0
  8. package/.github/agents/reviewer.agent.md +18 -0
  9. package/.github/agents/triager.agent.md +13 -0
  10. package/.github/aw/actions-lock.json +9 -0
  11. package/.github/copilot-instructions.md +35 -0
  12. package/.github/hooks/hooks.json +12 -0
  13. package/.github/instructions/core.instructions.md +11 -0
  14. package/.github/instructions/profiles/go.instructions.md +10 -0
  15. package/.github/instructions/profiles/php.instructions.md +11 -0
  16. package/.github/instructions/profiles/python.instructions.md +11 -0
  17. package/.github/instructions/profiles/ruby.instructions.md +11 -0
  18. package/.github/instructions/profiles/typescript.instructions.md +11 -0
  19. package/.github/labels.yml +55 -0
  20. package/.github/pull_request_template.md +33 -0
  21. package/.github/ruleset.example.json +33 -0
  22. package/.github/ruleset.harness-eval.example.json +29 -0
  23. package/.github/skills/quality-loop/SKILL.md +23 -0
  24. package/.github/workflows/agent-retry-orchestrator.yml +161 -0
  25. package/.github/workflows/copilot-setup-steps.yml +64 -0
  26. package/.github/workflows/eval-ci.yml +169 -0
  27. package/.github/workflows/eval-drift.yml +75 -0
  28. package/.github/workflows/gh-aw-dogfood-ci.yml +73 -0
  29. package/.github/workflows/harness-ci.yml +244 -0
  30. package/.github/workflows/harness-sync.yml +28 -0
  31. package/.github/workflows/l1-readiness-check.yml +45 -0
  32. package/.github/workflows/labels-sync.yml +24 -0
  33. package/.github/workflows/nightly-harness-review.lock.yml +1643 -0
  34. package/.github/workflows/nightly-harness-review.md +87 -0
  35. package/.github/workflows/nightly-harness-review.yml +63 -0
  36. package/.github/workflows/npm-publish.yml +49 -0
  37. package/.github/workflows/pr-context-comment.yml +138 -0
  38. package/.github/workflows/product-ci-go.yml +33 -0
  39. package/.github/workflows/product-ci-php.yml +39 -0
  40. package/.github/workflows/product-ci-python.yml +34 -0
  41. package/.github/workflows/product-ci-ruby.yml +35 -0
  42. package/.github/workflows/product-ci-ts.yml +37 -0
  43. package/.github/workflows/task-issue-label-sync.yml +50 -0
  44. package/.github/workflows/weekly-redteam.lock.yml +1571 -0
  45. package/.github/workflows/weekly-redteam.md +76 -0
  46. package/.github/zizmor.yml +11 -0
  47. package/AGENTS.md +54 -0
  48. package/LICENSE +21 -0
  49. package/README.md +366 -0
  50. package/config/stacks.json +55 -0
  51. package/docs/adoption.md +126 -0
  52. package/docs/arch.md +535 -0
  53. package/docs/auth-boundaries.md +16 -0
  54. package/docs/coding-agent-l1.md +152 -0
  55. package/docs/exceptions/README.md +25 -0
  56. package/docs/exceptions/TEMPLATE.md +8 -0
  57. package/docs/failure-taxonomy.md +23 -0
  58. package/docs/gh-aw-dogfood.md +109 -0
  59. package/docs/kpi-baseline.md +9 -0
  60. package/docs/nightly-harness-review.md +94 -0
  61. package/docs/operations.md +108 -0
  62. package/docs/publishing.md +79 -0
  63. package/docs/revert-playbook.md +44 -0
  64. package/docs/shared-config.md +30 -0
  65. package/docs/telemetry-artifacts.md +78 -0
  66. package/docs/telemetry-schema.md +60 -0
  67. package/evals/.score-baseline.json +6 -0
  68. package/evals/e2e-bench/README.md +28 -0
  69. package/evals/e2e-bench/manifest.json +16 -0
  70. package/evals/e2e-bench/tasks/e2e-001.yml +10 -0
  71. package/evals/e2e-bench/tasks/e2e-002.yml +11 -0
  72. package/evals/e2e-bench/tasks/e2e-003.yml +10 -0
  73. package/evals/e2e-bench/tasks/e2e-004.yml +14 -0
  74. package/evals/e2e-bench/tasks/e2e-005.yml +11 -0
  75. package/evals/e2e-bench/tasks/e2e-006.yml +10 -0
  76. package/evals/e2e-bench/tasks/e2e-007.yml +10 -0
  77. package/evals/e2e-bench/tasks/e2e-008.yml +10 -0
  78. package/evals/e2e-bench/tasks/e2e-009.yml +10 -0
  79. package/evals/trajectories/rubric.md +12 -0
  80. package/evals/trajectories/test_harness_conventions.py +271 -0
  81. package/infra/README.md +49 -0
  82. package/infra/langfuse/docker-compose.yml +25 -0
  83. package/infra/otel/collector-config.yml +24 -0
  84. package/infra/samples/gh-aw-dogfood-report.json +44 -0
  85. package/infra/samples/harness-review-routing-plan.json +19 -0
  86. package/infra/samples/harness-review-summary.json +61 -0
  87. package/infra/samples/telemetry-artifact.json +29 -0
  88. package/infra/samples/telemetry-payload.json +19 -0
  89. package/package.json +85 -0
  90. package/prompts/triager-classify.prompt.yml +10 -0
  91. package/sample/go/add.go +5 -0
  92. package/sample/go/add_test.go +9 -0
  93. package/sample/go/go.mod +3 -0
  94. package/sample/php/composer.json +26 -0
  95. package/sample/php/composer.lock +1881 -0
  96. package/sample/php/phpunit.xml +8 -0
  97. package/sample/php/src/Add.php +13 -0
  98. package/sample/php/tests/AddTest.php +16 -0
  99. package/sample/python/requirements-dev.txt +2 -0
  100. package/sample/python/src/__init__.py +0 -0
  101. package/sample/python/src/greet.py +3 -0
  102. package/sample/python/tests/conftest.py +4 -0
  103. package/sample/python/tests/test_greet.py +5 -0
  104. package/sample/ruby/.rubocop.yml +10 -0
  105. package/sample/ruby/Gemfile +6 -0
  106. package/sample/ruby/Gemfile.lock +58 -0
  107. package/sample/ruby/lib/add.rb +9 -0
  108. package/sample/ruby/spec/add_spec.rb +11 -0
  109. package/sample/ts/biome.json +6 -0
  110. package/sample/ts/package-lock.json +1763 -0
  111. package/sample/ts/package.json +15 -0
  112. package/sample/ts/src/add.ts +3 -0
  113. package/sample/ts/tests/add.test.ts +8 -0
  114. package/sample/ts/tsconfig.json +12 -0
  115. package/scripts/aggregate-harness-review.mjs +48 -0
  116. package/scripts/bootstrap-harness.sh +411 -0
  117. package/scripts/check-diff-size.mjs +46 -0
  118. package/scripts/check-e2e-manifest.mjs +35 -0
  119. package/scripts/check-eval-score-drift.mjs +31 -0
  120. package/scripts/check-gh-aw-dogfood-scope.mjs +51 -0
  121. package/scripts/check-issue-spec.mjs +215 -0
  122. package/scripts/check-l1-readiness.mjs +82 -0
  123. package/scripts/check-open-pr-limit.mjs +34 -0
  124. package/scripts/doctor.mjs +177 -0
  125. package/scripts/emit-gh-aw-dogfood-report.mjs +112 -0
  126. package/scripts/emit-telemetry-artifact.mjs +99 -0
  127. package/scripts/fetch-telemetry-artifacts.mjs +176 -0
  128. package/scripts/harness-drift-report.mjs +99 -0
  129. package/scripts/lib/bootstrap-copy.mjs +123 -0
  130. package/scripts/lib/ccsd-contract.mjs +212 -0
  131. package/scripts/lib/diff-size.mjs +103 -0
  132. package/scripts/lib/doctor-local.mjs +179 -0
  133. package/scripts/lib/e2e-manifest.mjs +76 -0
  134. package/scripts/lib/gh-aw-dogfood.mjs +293 -0
  135. package/scripts/lib/github-config.mjs +94 -0
  136. package/scripts/lib/harness-ci-fragments.mjs +98 -0
  137. package/scripts/lib/harness-review-routing.mjs +244 -0
  138. package/scripts/lib/harness-review.mjs +388 -0
  139. package/scripts/lib/issue-form-label-sync.mjs +56 -0
  140. package/scripts/lib/l1-readiness.mjs +258 -0
  141. package/scripts/lib/merge-harness-package.mjs +36 -0
  142. package/scripts/lib/npm-package.mjs +129 -0
  143. package/scripts/lib/setup-wizard.mjs +224 -0
  144. package/scripts/lib/stacks.mjs +138 -0
  145. package/scripts/lib/telemetry-artifact.mjs +253 -0
  146. package/scripts/lib/template-root.mjs +39 -0
  147. package/scripts/merge-harness-package.mjs +14 -0
  148. package/scripts/route-harness-review.mjs +168 -0
  149. package/scripts/run-e2e-bench.mjs +216 -0
  150. package/scripts/sdlc-gh-cli.mjs +91 -0
  151. package/scripts/select-eval-jobs.mjs +41 -0
  152. package/scripts/setup-github.mjs +242 -0
  153. package/scripts/setup-github.sh +4 -0
  154. package/scripts/setup-wizard.mjs +426 -0
  155. package/scripts/test-bootstrap-guidance-scenarios.mjs +94 -0
  156. package/scripts/test-diff-size-scenarios.mjs +88 -0
  157. package/scripts/test-doctor-scenarios.mjs +70 -0
  158. package/scripts/test-e2e-manifest-scenarios.mjs +65 -0
  159. package/scripts/test-gh-aw-dogfood-scenarios.mjs +74 -0
  160. package/scripts/test-harness-review-routing-scenarios.mjs +130 -0
  161. package/scripts/test-harness-review-scenarios.mjs +92 -0
  162. package/scripts/test-hooks-scenarios.mjs +44 -0
  163. package/scripts/test-issue-form-label-sync-scenarios.mjs +48 -0
  164. package/scripts/test-issue-spec-scenarios.mjs +258 -0
  165. package/scripts/test-l1-readiness-scenarios.mjs +204 -0
  166. package/scripts/test-merge-harness-package-scenarios.mjs +53 -0
  167. package/scripts/test-npm-package-scenarios.mjs +31 -0
  168. package/scripts/test-sdlc-gh-cli-scenarios.mjs +54 -0
  169. package/scripts/test-setup-github-scenarios.mjs +103 -0
  170. package/scripts/test-setup-wizard-scenarios.mjs +114 -0
  171. package/scripts/test-telemetry-artifact-scenarios.mjs +69 -0
  172. package/scripts/trim-harness-ci.mjs +18 -0
  173. package/scripts/validate-gh-aw-compile.mjs +64 -0
  174. package/scripts/validate-harness.mjs +199 -0
  175. package/scripts/validate-telemetry.mjs +21 -0
  176. package/scripts/verify-bootstrap-stacks.sh +192 -0
@@ -0,0 +1,76 @@
1
+ ---
2
+ description: Weekly red team probe suite (garak).
3
+ name: Weekly red team
4
+ on:
5
+ schedule:
6
+ - cron: "0 3 * * 0"
7
+ permissions:
8
+ contents: read
9
+ issues: read
10
+ safe-outputs:
11
+ create-issue:
12
+ max: 2
13
+ ---
14
+
15
+ # Weekly red team (gh-aw source)
16
+
17
+ > **Operational baseline:** No standard GHA replacement yet — probes are **manual / scheduled stub** until garak runtime prerequisites exist. Dogfood validates compile + safe-outputs only.
18
+
19
+ ## Required inputs
20
+
21
+ | Input | Source | Required |
22
+ |-------|--------|----------|
23
+ | Probe definitions | garak / harness red-team config (future) | best-effort |
24
+ | Target scope | Repository harness surfaces (agents, hooks, workflows) | yes |
25
+ | Prior weekly summary | Previous `create-issue` or morning queue entry | optional |
26
+
27
+ ## Forbidden operations
28
+
29
+ - Do **not** open pull requests (no `create-pull-request` safe-output)
30
+ - Do **not** exfiltrate secrets or modify production credentials
31
+ - Do **not** run unbounded network probes outside AWF allowlist
32
+ - Do **not** block the GHA nightly harness review path
33
+
34
+ ## Expected outputs
35
+
36
+ | Output | Format | Limit |
37
+ |--------|--------|-------|
38
+ | Red-team findings | GitHub issue with severity + repro steps | `create-issue.max: 2` |
39
+ | Morning queue note | Markdown summary (issue body section) | human-readable |
40
+
41
+ ## Probe contract
42
+
43
+ When garak (or equivalent) is available:
44
+
45
+ 1. Run the configured probe suite against agent prompts and harness docs
46
+ 2. Record pass/fail per probe with `wall_failure_type: security` when applicable
47
+ 3. Open at most **two** issues for high-severity findings
48
+
49
+ Until runtime exists, emit a single issue stating `status: stub — probes not executed` if scheduled.
50
+
51
+ ## Escalation
52
+
53
+ - **Critical** injection or secret-leak signal → open issue immediately; do not retry autonomously
54
+ - Repeated probe failures on unchanged harness → route to [failure-taxonomy.md](../../docs/failure-taxonomy.md) **モデル限界** / human review
55
+
56
+ ## Fallback when gh-aw or garak regresses
57
+
58
+ 1. Skip probe execution; open a single tracking issue if the schedule fired
59
+ 2. Keep [nightly-harness-review.yml](./nightly-harness-review.yml) GHA path operational
60
+ 3. Revert `.md` / `.lock.yml` via dogfood rollback ([docs/gh-aw-dogfood.md](../../docs/gh-aw-dogfood.md))
61
+
62
+ ## Promotion criteria (gh-aw vs manual)
63
+
64
+ Enable gh-aw weekly execution when:
65
+
66
+ - garak (or substitute) runs in CI or AWF sandbox with pinned version
67
+ - Dogfood safe-output checks pass (`create-issue.max <= 2`, no auto-merge)
68
+ - At least one dry-run weekly report matches manual probe results
69
+
70
+ ## Agent instructions
71
+
72
+ Run the garak probe suite when tooling is present and report results to the morning queue.
73
+
74
+ When tooling is **missing**, create a stub issue documenting `garak: not available` and reference [infra/README.md](../../infra/README.md) threat-detection placeholder.
75
+
76
+ Do not auto-merge. Do not modify product code.
@@ -0,0 +1,11 @@
1
+ # Tag-pinned actions are acceptable for this harness template; hash-pin in product repos if required.
2
+ rules:
3
+ unpinned-uses:
4
+ config:
5
+ policies:
6
+ actions/*: ref-pin
7
+ github/*: ref-pin
8
+ dependabot/*: ref-pin
9
+ EndBug/*: ref-pin
10
+ ruby/*: ref-pin
11
+ shivammathur/*: ref-pin
package/AGENTS.md ADDED
@@ -0,0 +1,54 @@
1
+ # Agent Harness — Project Instructions
2
+
3
+ ## Purpose
4
+
5
+ This repository (or a product repo using this harness) follows the agent harness architecture in `docs/arch.md`. Human judgment converges on **PR review only**.
6
+
7
+ ## CC-SD contract (L1 docs / test-fix)
8
+
9
+ For `task:docs` and `task:test-fix` delegated at `autonomy:L1`, the Issue embeds a lightweight CC-SD contract with these canonical fields:
10
+
11
+ | Field | Required |
12
+ |-------|----------|
13
+ | `Goal` | yes |
14
+ | `Non-goals` | yes |
15
+ | `Constraints` | yes |
16
+ | `Acceptance criteria` | yes |
17
+ | `Rollback hints` | yes |
18
+ | `Additional context` | optional |
19
+
20
+ CI enforces completeness via `issue-spec-check`. Task Issues created from `.github/ISSUE_TEMPLATE/task.yml` sync `task:*` / `autonomy:*` labels automatically via `.github/workflows/task-issue-label-sync.yml`. v1 does not cover `feature-small`, `infra`, or `security-sensitive`.
21
+
22
+ Before starting spec-driven L1 delegation, run readiness checks:
23
+
24
+ - `npm run check-l1-readiness`
25
+ - strict mode: `npm run check-l1-readiness -- --strict`
26
+ - no local Node/gh: run `Actions -> L1 readiness check -> Run workflow` (`.github/workflows/l1-readiness-check.yml`)
27
+
28
+ ## Task classification
29
+
30
+ Limits match `docs/operations.md` (CI enforces via `check-diff-size.mjs`).
31
+
32
+ | Class | Max autonomy | Max LOC | Max files |
33
+ |-------|-------------|---------|-----------|
34
+ | `docs` | L3 | 60 | 2 |
35
+ | `test-fix` | L2 | 120 | 4 |
36
+ | `refactor` | L1 | 300 | 8 |
37
+ | `feature-small` | L1 | 300 | 8 |
38
+ | `dependency-bump` | L1 | 300 | 8 |
39
+ | `infra` | L0 | — | human gate |
40
+ | `security-sensitive` | L0 | — | proposal only |
41
+
42
+ ## Agent roles
43
+
44
+ - **triager**: Classify issues, verify CC-SD contract before L1 on docs/test-fix, assign `task:*` and `autonomy:*` labels (read only)
45
+ - **implementer**: Execute against Issue CC-SD contract with read/edit/test tools (L1 default)
46
+ - **reviewer**: Review PRs for requirement fit and non-goal preservation, no edit permission
47
+
48
+ ## Out of scope (always human)
49
+
50
+ Production DB operations, production secrets, billing/legal/PII changes.
51
+
52
+ ## Skills
53
+
54
+ Load `quality-loop` skill when verifying changes against acceptance criteria.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 sdlc-gh contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,366 @@
1
+ # SDLC-GH
2
+
3
+ **An agent harness template for GitHub Copilot — deterministic guardrails for AI coding agents.**
4
+
5
+ sdlc-gh is a template repository that keeps AI coding agents on track with **CI walls, hooks, evals, and operational policy** instead of prompt discipline alone. It is organization-agnostic and stack-agnostic (TypeScript / Python / Go / Ruby / PHP): copy it into any product repository and adapt it.
6
+
7
+ > The CI and documentation parts work standalone, but coding agent and gh-aw integration require GitHub Copilot (Business / Enterprise).
8
+
9
+ ## Why
10
+
11
+ Teams adopting Copilot coding agent quickly run into the same problems:
12
+
13
+ - Agents open oversized PRs
14
+ - Destructive operations aren't reliably blocked
15
+ - Changing instructions has untracked effects on quality
16
+ - Approval points multiply until review becomes rubber-stamping
17
+
18
+ sdlc-gh addresses these with three design rules:
19
+
20
+ 1. **Walls are deterministic** — tests, lint, diff-size limits, and hooks stop bad changes mechanically
21
+ 2. **One human gate: PR review** — decision inputs (scores, cost, traces) are collected on the PR
22
+ 3. **No harness change without an eval** — changes to instructions / agents / skills are verified in CI
23
+
24
+ The full architecture and rationale live in [docs/arch.md](docs/arch.md).
25
+
26
+ ## Quick start
27
+
28
+ Requirements: a GitHub repository with Actions enabled; Node.js 22+; `gh` CLI authenticated for GitHub setup.
29
+
30
+ **Recommended — wizard in your product repo (no clone)**
31
+
32
+ ```bash
33
+ cd /path/to/your-product
34
+ npx @guilz-dev/sdlc-gh
35
+ ```
36
+
37
+ The wizard bootstraps harness assets (if missing), syncs labels/rulesets, and runs `doctor --strict`. Non-interactive example:
38
+
39
+ ```bash
40
+ npx @guilz-dev/sdlc-gh init --yes --stack ts --codeowners @your-org/harness-engineers --mode existing
41
+ ```
42
+
43
+ New empty directory with sample stack copied to root:
44
+
45
+ ```bash
46
+ mkdir my-product && cd my-product && git init
47
+ npx @guilz-dev/sdlc-gh init --yes --stack ts --codeowners @your-org/harness-engineers --mode new --skip-github
48
+ ```
49
+
50
+ Before the first npm release: `npx github:guilz-dev/sdlc-gh`. Local dev: `node scripts/sdlc-gh-cli.mjs`.
51
+
52
+ **Option A — new repository from template (easiest)**
53
+
54
+ Click **Use this template** on GitHub, delete the `sample/` stacks you don't need, and add your code.
55
+
56
+ **Option B — add the harness to an existing repository (manual bootstrap)**
57
+
58
+ ```bash
59
+ git clone https://github.com/YOUR_ORG/sdlc-gh.git /tmp/sdlc-gh
60
+
61
+ /tmp/sdlc-gh/scripts/bootstrap-harness.sh \
62
+ --repo /path/to/your-product \
63
+ --codeowners-team @your-org/harness-engineers
64
+
65
+ cd /path/to/your-product
66
+ npx @guilz-dev/sdlc-gh --yes --stack ts --codeowners @your-org/harness-engineers
67
+ ```
68
+
69
+ **Option C — start a brand-new product**
70
+
71
+ ```bash
72
+ /tmp/sdlc-gh/scripts/bootstrap-harness.sh \
73
+ --repo /path/to/new-product \
74
+ --stack ts \
75
+ --mode new \
76
+ --codeowners-team @your-org/harness-engineers
77
+
78
+ cd /path/to/new-product
79
+ npx @guilz-dev/sdlc-gh --yes --stack ts --codeowners @your-org/harness-engineers --mode new
80
+ ```
81
+
82
+ `--mode new` expands the minimal `sample/{stack}/` project into the repository root.
83
+
84
+ ### After installing (required)
85
+
86
+ The harness is active only after GitHub setup and a clean doctor run:
87
+
88
+ 1. **Setup wizard (recommended)** — run `./scripts/setup-wizard.mjs` to configure `.harness-stack`, `CODEOWNERS`, GitHub labels/rulesets, and verify with `doctor --strict`. Use `--template` when dogfooding the multi-stack template repository.
89
+ 2. **Bootstrap** — run `./scripts/bootstrap-harness.sh` and confirm the detected stack/mode summary (alternative to the wizard for copy-only installs).
90
+ 3. **Configure GitHub** — the wizard runs `setup-github.sh` automatically; or run it manually to sync labels and create/update the `main-protection` ruleset with your stack's `product-ci-*` check. Optionally add `--with-eval-ruleset` after eval CI is stable.
91
+ 4. **Verify** — run `./scripts/doctor.mjs --strict` until every required item passes (`--template` for the template repo).
92
+
93
+ Manual fallback remains available for restricted environments:
94
+
95
+ - Apply labels from [.github/labels.yml](.github/labels.yml)
96
+ - Import [.github/ruleset.example.json](.github/ruleset.example.json) under *Settings → Rules*
97
+ - Ensure required checks include `harness-static`, `diff-size`, `issue-spec-check`, and your stack's `product-ci-*`
98
+
99
+ Detailed steps and rollback guidance: [docs/adoption.md](docs/adoption.md).
100
+
101
+ ## Configuration
102
+
103
+ | Setting | Location | Purpose |
104
+ |---------|----------|---------|
105
+ | Primary stack | `.harness-stack` (gitignored locally) | Selects `product-ci-{stack}` for rulesets and doctor |
106
+ | Harness review owners | `.github/CODEOWNERS` | Required reviewers for `.github/`, `evals/`, policy docs. **Product repos:** replace placeholder and commit. **This template repo:** keep `@your-org/harness-engineers` in git; use `--template` wizard mode locally. |
107
+ | Task / autonomy labels | `.github/labels.yml` → GitHub | Issue/PR classification (`task:*`, `autonomy:*`) |
108
+ | Branch protection | `main-protection` ruleset | Required CI checks + code owner review |
109
+ | Optional eval gate | `harness-pr-eval-required` ruleset | Requires eval-ci jobs on PRs to `main` |
110
+ | Change size / retry policy | [docs/operations.md](docs/operations.md) | Canonical thresholds (optional `DIFF_SIZE_L1_HARD_FAIL`) |
111
+ | Optional telemetry | GitHub Secrets `LANGFUSE_*` | Trace links and KPI export ([infra/README.md](infra/README.md)) |
112
+
113
+ Run `./scripts/setup-wizard.mjs` to apply the required install settings interactively. Non-interactive example:
114
+
115
+ ```bash
116
+ ./scripts/setup-wizard.mjs --yes --stack ts --codeowners @your-org/harness-engineers
117
+ ./scripts/setup-wizard.mjs --template --yes --stack ts
118
+ ```
119
+
120
+ **Template repo note:** When dogfooding this repository itself, run the wizard with `--template`. It writes gitignored `.harness-stack` and syncs GitHub rulesets, but **does not** replace the committed CODEOWNERS placeholder unless you pass `--patch-codeowners`. Use a separate product repository (or fork) when you need real code-owner enforcement with committed owners.
121
+
122
+ ## Start Spec-Driven L1 Flow
123
+
124
+ To run autonomous implementation from a spec (CC-SD contract) without guesswork:
125
+
126
+ 1. Prepare repository settings with `./scripts/setup-wizard.mjs`
127
+ 2. Verify L1 readiness:
128
+
129
+ ```bash
130
+ npm run check-l1-readiness
131
+ npm run check-l1-readiness -- --strict
132
+ ```
133
+
134
+ 3. Create an Issue from `.github/ISSUE_TEMPLATE/task.yml`
135
+ 4. Fill `Goal`, `Non-goals`, `Constraints`, `Acceptance criteria`, `Rollback hints`
136
+ 5. Confirm the synced labels (`task:docs` or `task:test-fix`) + `autonomy:L1`
137
+ 6. Assign `triager`, then `implementer`
138
+
139
+ Readiness checker notes:
140
+
141
+ - validates local harness assets and doctor checks
142
+ - validates GitHub labels/rulesets and latest `copilot-setup-steps` run when `gh` is authenticated
143
+ - reports items that still require manual confirmation (Copilot coding agent entitlement)
144
+ - supports machine-readable output via `npm run check-l1-readiness -- --json`
145
+ - without local Node/gh, run **Actions → L1 readiness check → Run workflow** via [.github/workflows/l1-readiness-check.yml](.github/workflows/l1-readiness-check.yml) (writes a job summary)
146
+
147
+ Bootstrap merges harness npm scripts into an existing root `package.json` instead of overwriting application metadata. Non-Node stacks get a minimal harness-only `package.json` when none exists.
148
+
149
+ Fresh clones without gitignored `.harness-stack` infer stack from the committed `product-ci-*.yml` workflow.
150
+
151
+ Detailed trial guide: [docs/coding-agent-l1.md](docs/coding-agent-l1.md).
152
+
153
+ ## Repository layout
154
+
155
+ ```text
156
+ sdlc-gh/
157
+ ├── AGENTS.md # project instructions for agents (task classes, roles)
158
+ ├── config/
159
+ │ └── stacks.json # stack catalog (profile, marker, workflow mapping)
160
+ ├── .github/
161
+ │ ├── copilot-instructions.md # global agent policy
162
+ │ ├── instructions/ # per-path and per-stack conventions
163
+ │ ├── agents/ # triager / implementer / reviewer (least privilege)
164
+ │ ├── skills/ # verification procedure skill (quality-loop)
165
+ │ ├── hooks/ # destructive-command blocklist
166
+ │ ├── workflows/ # walls (harness-ci, product-ci-*), evals, retry, sync
167
+ │ ├── labels.yml # task:* / autonomy:* label definitions
168
+ │ └── ruleset.example.json # branch protection example
169
+ ├── docs/ # architecture and operations docs (see below)
170
+ ├── evals/ # convention tests, rubric, e2e bench definitions
171
+ ├── prompts/ # prompts for gh models eval
172
+ ├── scripts/ # CI gate implementations, bootstrap, drift report
173
+ ├── sample/ # minimal ts / python / go / ruby / php samples (product CI targets)
174
+ └── infra/ # optional Langfuse / OTel scaffolding
175
+ ```
176
+
177
+ Inside this template, sample code lives under `sample/{stack}/` and all product CI workflows run when the corresponding marker file exists. In a bootstrapped product repository, only your selected stack's product CI workflow is copied and it targets the repository root.
178
+
179
+ ## How a task flows
180
+
181
+ ```mermaid
182
+ sequenceDiagram
183
+ participant Dev as Developer
184
+ participant Issue as Issue
185
+ participant Tri as triager
186
+ participant Imp as implementer
187
+ participant Wall as Walls (CI)
188
+ participant Eval as eval-ci
189
+ participant Rev as Reviewer
190
+
191
+ Dev->>Issue: CC-SD contract (L1 docs / test-fix)
192
+ Issue->>Tri: Classify task:* / autonomy:*
193
+ Tri->>Imp: Delegate (complete contract required)
194
+ Imp->>Wall: Draft PR
195
+ alt CI failure
196
+ Wall-->>Imp: Retry orchestrator (max 3)
197
+ else CI pass
198
+ Wall->>Eval: Harness asset changes only
199
+ Wall->>Rev: PR context comment
200
+ Rev->>Issue: Approve or request changes
201
+ end
202
+ ```
203
+
204
+ For `task:docs` and `task:test-fix` at `autonomy:L1`, the Issue embeds a lightweight CC-SD contract (`Goal`, `Non-goals`, `Constraints`, `Acceptance criteria`, `Rollback hints`). v1 does not cover `feature-small` or higher-risk classes. Details: [docs/coding-agent-l1.md](docs/coding-agent-l1.md).
205
+
206
+ On CI failure, `agent-retry-orchestrator` applies retry labels (max 3 attempts; stops after the same failure signature twice; security failures escalate immediately). Canonical thresholds live in [docs/operations.md](docs/operations.md).
207
+
208
+ ## Local checks
209
+
210
+ Run from the repository root:
211
+
212
+ ```bash
213
+ npm run validate # harness asset consistency
214
+ npm run test-hooks # hook block/allow scenarios
215
+ npm run test-issue-spec # CC-SD issue-spec validator scenarios
216
+ npm run test-diff-size # diff-size / autonomy gate scenarios
217
+ npm run test-e2e-manifest # e2e manifest structural checks
218
+ npm run test-setup-github # ruleset payload builder scenarios
219
+ npm run test-doctor # doctor local check scenarios
220
+ npm run check-e2e # e2e bench manifest checks
221
+ npm run run-e2e # e2e bench executable acceptance checks
222
+ npm run verify-bootstrap # bootstrap integration test (all stacks)
223
+ npm run check # full local gate (validate + scenarios + e2e)
224
+ ```
225
+
226
+ On Node.js versions older than 22, `run-e2e-bench.mjs` may skip verifiers that require the same runtime as CI and report them as skipped rather than failed.
227
+
228
+ Convention tests in Python:
229
+
230
+ ```bash
231
+ pip install pytest
232
+ pytest evals/trajectories -q
233
+ ```
234
+
235
+ ## Phased rollout
236
+
237
+ Don't enable everything at once. Canonical phase definitions (including Phase 0 baseline) are in [docs/arch.md](docs/arch.md) §7. This table is the quick path; details in [docs/adoption.md](docs/adoption.md).
238
+
239
+ | Phase | Enable | Risk |
240
+ |-------|--------|------|
241
+ | 0 | CI walls, rulesets (`setup-github.sh`), optional Langfuse scaffold | Low |
242
+ | 1 | instructions, agents, hooks, templates | Low |
243
+ | 2 | `harness-ci` + your stack's `product-ci` | Medium |
244
+ | 3 | `eval-ci` + optional eval ruleset | Medium |
245
+ | 4 | coding agent L1 (`task:docs` / `task:test-fix` only) | Low–Medium |
246
+
247
+ Getting started with L1 delegation: [docs/coding-agent-l1.md](docs/coding-agent-l1.md).
248
+
249
+ ## Project status
250
+
251
+ Functional today: bootstrap, harness/product CI, diff-size and autonomy gates, hooks scenarios, retry orchestrator, PR context comments, executable acceptance-style E2E checks (9 tasks), and eval scaffolding.
252
+
253
+ Known placeholders (aligned with [docs/arch.md](docs/arch.md) implementation status):
254
+
255
+ | Area | Status |
256
+ |------|--------|
257
+ | Bootstrap, stack catalog, harness/product CI | **Implemented** |
258
+ | Hooks, diff-size gate, CC-SD issue-spec check | **Implemented** |
259
+ | Custom agents (triager / implementer / reviewer) | **Implemented** |
260
+ | Eval CI with change-type job selection | **Implemented** |
261
+ | Retry orchestrator, PR context comments | **Implemented** |
262
+ | E2E bench (executable acceptance checks) | **Partial** — 9 tasks; not yet break-and-fix agent runner |
263
+ | `gh models eval` in CI | **Scaffolded** — runs when prompts exist; org must enable Models |
264
+ | gh-aw outer loop (`nightly-harness-review`, `weekly-redteam`) | **Partial** — GHA outer loop + gh-aw dogfood CI (#7); `.md`/`.lock.yml` stubs remain |
265
+ | Langfuse / OTel export | **Scaffolded** — `infra/` + schema; wiring optional |
266
+
267
+ ### Observability placeholders (spec only)
268
+
269
+ Until Langfuse / OTel is wired, PR context comments use fixed placeholders (workflow logic unchanged):
270
+
271
+ | Field | When unset / n/a |
272
+ |-------|------------------|
273
+ | Trace link | `_configure LANGFUSE_HOST; then search by repo=…, pr_number=…_` |
274
+ | AI credits | Informational — `_set max-ai-credits in org settings_` |
275
+ | Threat detection | `n/a` — gh-aw outer loop remains stub |
276
+
277
+ Validate sample payloads: `node scripts/validate-telemetry.mjs "$(cat infra/samples/telemetry-payload.json)"` and `node scripts/validate-telemetry.mjs "$(cat infra/samples/telemetry-artifact.json)"`. Inner-loop workflows emit artifacts per [docs/telemetry-artifacts.md](docs/telemetry-artifacts.md); field definitions in [docs/telemetry-schema.md](docs/telemetry-schema.md).
278
+
279
+ ## Architecture
280
+
281
+ The harness is a **dual-loop control system**: a fast inner loop (agent + deterministic walls) and a slower outer loop (eval + harness revision).
282
+
283
+ ```mermaid
284
+ flowchart LR
285
+ subgraph OUTER["Outer loop (daily–weekly)"]
286
+ EVAL[Eval / traces]
287
+ REVISE[Revise instructions / walls]
288
+ end
289
+ subgraph INNER["Inner loop (per task)"]
290
+ FF[Feed-forward<br/>instructions / agents / skills]
291
+ AGENT[Agent<br/>plan → act → test]
292
+ WALL[Walls<br/>CI / hooks / diff-size]
293
+ end
294
+ Issue[Issue + CC-SD] --> FF
295
+ FF --> AGENT --> WALL
296
+ WALL -- fail --> AGENT
297
+ WALL -- pass --> PR[Draft PR]
298
+ AGENT -.-> EVAL
299
+ PR -.-> EVAL
300
+ EVAL --> REVISE
301
+ REVISE --> FF
302
+ ```
303
+
304
+ Layers as implemented in this repo (details in [docs/arch.md](docs/arch.md)):
305
+
306
+ ```mermaid
307
+ flowchart TB
308
+ L0[L0 Governance<br/>rulesets · CODEOWNERS · labels]
309
+ L1[L1 Feed-forward<br/>instructions · agents · skills]
310
+ L2[L2 Execution<br/>coding agent · CLI · gh-aw stubs]
311
+ L3[L3 Walls<br/>harness-ci · product-ci · hooks]
312
+ L4[L4–L6 Observability · Eval · Outer loop<br/>Langfuse · eval-ci · nightly review stubs]
313
+ L0 --> L1 --> L2 --> L3 --> L4
314
+ L4 -. revise .-> L1
315
+ ```
316
+
317
+ ## Documentation
318
+
319
+ If you are adopting the harness in a product repo, start with [docs/adoption.md](docs/adoption.md) and then keep [docs/revert-playbook.md](docs/revert-playbook.md) nearby.
320
+
321
+ If you are operating an installed harness day to day, read [docs/operations.md](docs/operations.md) first, then [docs/failure-taxonomy.md](docs/failure-taxonomy.md) and [docs/telemetry-artifacts.md](docs/telemetry-artifacts.md).
322
+
323
+ If you are writing or triaging Task Issues for L1 delegation, start with [docs/coding-agent-l1.md](docs/coding-agent-l1.md) and use the Actions fallback in [.github/workflows/l1-readiness-check.yml](.github/workflows/l1-readiness-check.yml) when local `gh`/Node is unavailable.
324
+
325
+ If you are contributing to the harness itself, read [CONTRIBUTING.md](CONTRIBUTING.md), then [docs/arch.md](docs/arch.md), and use [docs/shared-config.md](docs/shared-config.md) for distribution/update strategy.
326
+
327
+ | Document | Contents |
328
+ |----------|----------|
329
+ | [docs/arch.md](docs/arch.md) | Full architecture and design principles |
330
+ | [docs/adoption.md](docs/adoption.md) | Installation and rollback |
331
+ | [docs/operations.md](docs/operations.md) | Thresholds, retry policy, forbidden ops (**canonical** policy) |
332
+ | [docs/revert-playbook.md](docs/revert-playbook.md) | Revert procedure (harness vs product) |
333
+ | [docs/coding-agent-l1.md](docs/coding-agent-l1.md) | Running the first L1 delegations |
334
+ | [docs/failure-taxonomy.md](docs/failure-taxonomy.md) | Classifying failures for outer-loop routing |
335
+ | [docs/kpi-baseline.md](docs/kpi-baseline.md) | Weekly KPI tracking template |
336
+ | [docs/telemetry-schema.md](docs/telemetry-schema.md) | Required observability fields |
337
+ | [docs/telemetry-artifacts.md](docs/telemetry-artifacts.md) | Inner-loop JSON artifact format and storage |
338
+ | [docs/gh-aw-dogfood.md](docs/gh-aw-dogfood.md) | Bounded gh-aw validation on sdlc-gh |
339
+ | [docs/auth-boundaries.md](docs/auth-boundaries.md) | Credential boundaries per execution mode |
340
+ | [docs/publishing.md](docs/publishing.md) | npm package release (`@guilz-dev/sdlc-gh`) |
341
+ | [docs/shared-config.md](docs/shared-config.md) | Distributing shared assets across repositories |
342
+ | [docs/exceptions/README.md](docs/exceptions/README.md) | Recording policy exceptions |
343
+ | [infra/README.md](infra/README.md) | Self-hosting Langfuse / OTel |
344
+ | [CONTRIBUTING.md](CONTRIBUTING.md) | Contribution workflow and review expectations |
345
+ | [SECURITY.md](SECURITY.md) | Vulnerability reporting policy |
346
+ | [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) | Community behavior expectations |
347
+ | [SUPPORT.md](SUPPORT.md) | Support routes and troubleshooting intake |
348
+
349
+ ## FAQ
350
+
351
+ **Q. How do I pull template updates into a repository that already uses the harness?**
352
+ A. Re-run `bootstrap-harness.sh` to overwrite harness assets, then review the diff with `npm run drift-report`. The `harness-sync` workflow produces a weekly drift report.
353
+
354
+ **Q. Does the harness itself need a test framework (Jest etc.)?**
355
+ A. No. The harness is guarded by the `scripts/*.mjs` checks and `eval-ci`. Your application keeps its own test runner (vitest / pytest / go test / rspec / phpunit).
356
+
357
+ ## Project policies
358
+
359
+ - Contribution guide: [CONTRIBUTING.md](CONTRIBUTING.md)
360
+ - Security reporting: [SECURITY.md](SECURITY.md)
361
+ - Code of conduct: [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
362
+ - Support: [SUPPORT.md](SUPPORT.md)
363
+
364
+ ## License
365
+
366
+ [MIT](LICENSE)
@@ -0,0 +1,55 @@
1
+ {
2
+ "version": 1,
3
+ "stacks": [
4
+ {
5
+ "id": "ts",
6
+ "label": "TypeScript",
7
+ "profile": "typescript.instructions.md",
8
+ "sampleDir": "ts",
9
+ "marker": "package.json",
10
+ "sampleMarker": "sample/ts/package.json",
11
+ "workflow": "product-ci-ts.yml",
12
+ "bootstrapCheck": "package.json"
13
+ },
14
+ {
15
+ "id": "python",
16
+ "label": "Python",
17
+ "profile": "python.instructions.md",
18
+ "sampleDir": "python",
19
+ "marker": "requirements-dev.txt",
20
+ "sampleMarker": "sample/python/requirements-dev.txt",
21
+ "workflow": "product-ci-python.yml",
22
+ "bootstrapCheck": "requirements-dev.txt"
23
+ },
24
+ {
25
+ "id": "go",
26
+ "label": "Go",
27
+ "profile": "go.instructions.md",
28
+ "sampleDir": "go",
29
+ "marker": "go.mod",
30
+ "sampleMarker": "sample/go/go.mod",
31
+ "workflow": "product-ci-go.yml",
32
+ "bootstrapCheck": "go.mod"
33
+ },
34
+ {
35
+ "id": "ruby",
36
+ "label": "Ruby",
37
+ "profile": "ruby.instructions.md",
38
+ "sampleDir": "ruby",
39
+ "marker": "Gemfile",
40
+ "sampleMarker": "sample/ruby/Gemfile",
41
+ "workflow": "product-ci-ruby.yml",
42
+ "bootstrapCheck": "Gemfile"
43
+ },
44
+ {
45
+ "id": "php",
46
+ "label": "PHP",
47
+ "profile": "php.instructions.md",
48
+ "sampleDir": "php",
49
+ "marker": "composer.json",
50
+ "sampleMarker": "sample/php/composer.json",
51
+ "workflow": "product-ci-php.yml",
52
+ "bootstrapCheck": "composer.json"
53
+ }
54
+ ]
55
+ }