npm - @guilz-dev/sdlc-gh - Versions diffs - 0.1.0 - Mend

@guilz-dev/sdlc-gh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

package/.github/CODEOWNERS +5 -0
package/.github/ISSUE_TEMPLATE/bug_report.yml +68 -0
package/.github/ISSUE_TEMPLATE/config.yml +1 -0
package/.github/ISSUE_TEMPLATE/feature_request.yml +39 -0
package/.github/ISSUE_TEMPLATE/support.yml +56 -0
package/.github/ISSUE_TEMPLATE/task.yml +89 -0
package/.github/agents/implementer.agent.md +17 -0
package/.github/agents/reviewer.agent.md +18 -0
package/.github/agents/triager.agent.md +13 -0
package/.github/aw/actions-lock.json +9 -0
package/.github/copilot-instructions.md +35 -0
package/.github/hooks/hooks.json +12 -0
package/.github/instructions/core.instructions.md +11 -0
package/.github/instructions/profiles/go.instructions.md +10 -0
package/.github/instructions/profiles/php.instructions.md +11 -0
package/.github/instructions/profiles/python.instructions.md +11 -0
package/.github/instructions/profiles/ruby.instructions.md +11 -0
package/.github/instructions/profiles/typescript.instructions.md +11 -0
package/.github/labels.yml +55 -0
package/.github/pull_request_template.md +33 -0
package/.github/ruleset.example.json +33 -0
package/.github/ruleset.harness-eval.example.json +29 -0
package/.github/skills/quality-loop/SKILL.md +23 -0
package/.github/workflows/agent-retry-orchestrator.yml +161 -0
package/.github/workflows/copilot-setup-steps.yml +64 -0
package/.github/workflows/eval-ci.yml +169 -0
package/.github/workflows/eval-drift.yml +75 -0
package/.github/workflows/gh-aw-dogfood-ci.yml +73 -0
package/.github/workflows/harness-ci.yml +244 -0
package/.github/workflows/harness-sync.yml +28 -0
package/.github/workflows/l1-readiness-check.yml +45 -0
package/.github/workflows/labels-sync.yml +24 -0
package/.github/workflows/nightly-harness-review.lock.yml +1643 -0
package/.github/workflows/nightly-harness-review.md +87 -0
package/.github/workflows/nightly-harness-review.yml +63 -0
package/.github/workflows/npm-publish.yml +49 -0
package/.github/workflows/pr-context-comment.yml +138 -0
package/.github/workflows/product-ci-go.yml +33 -0
package/.github/workflows/product-ci-php.yml +39 -0
package/.github/workflows/product-ci-python.yml +34 -0
package/.github/workflows/product-ci-ruby.yml +35 -0
package/.github/workflows/product-ci-ts.yml +37 -0
package/.github/workflows/task-issue-label-sync.yml +50 -0
package/.github/workflows/weekly-redteam.lock.yml +1571 -0
package/.github/workflows/weekly-redteam.md +76 -0
package/.github/zizmor.yml +11 -0
package/AGENTS.md +54 -0
package/LICENSE +21 -0
package/README.md +366 -0
package/config/stacks.json +55 -0
package/docs/adoption.md +126 -0
package/docs/arch.md +535 -0
package/docs/auth-boundaries.md +16 -0
package/docs/coding-agent-l1.md +152 -0
package/docs/exceptions/README.md +25 -0
package/docs/exceptions/TEMPLATE.md +8 -0
package/docs/failure-taxonomy.md +23 -0
package/docs/gh-aw-dogfood.md +109 -0
package/docs/kpi-baseline.md +9 -0
package/docs/nightly-harness-review.md +94 -0
package/docs/operations.md +108 -0
package/docs/publishing.md +79 -0
package/docs/revert-playbook.md +44 -0
package/docs/shared-config.md +30 -0
package/docs/telemetry-artifacts.md +78 -0
package/docs/telemetry-schema.md +60 -0
package/evals/.score-baseline.json +6 -0
package/evals/e2e-bench/README.md +28 -0
package/evals/e2e-bench/manifest.json +16 -0
package/evals/e2e-bench/tasks/e2e-001.yml +10 -0
package/evals/e2e-bench/tasks/e2e-002.yml +11 -0
package/evals/e2e-bench/tasks/e2e-003.yml +10 -0
package/evals/e2e-bench/tasks/e2e-004.yml +14 -0
package/evals/e2e-bench/tasks/e2e-005.yml +11 -0
package/evals/e2e-bench/tasks/e2e-006.yml +10 -0
package/evals/e2e-bench/tasks/e2e-007.yml +10 -0
package/evals/e2e-bench/tasks/e2e-008.yml +10 -0
package/evals/e2e-bench/tasks/e2e-009.yml +10 -0
package/evals/trajectories/rubric.md +12 -0
package/evals/trajectories/test_harness_conventions.py +271 -0
package/infra/README.md +49 -0
package/infra/langfuse/docker-compose.yml +25 -0
package/infra/otel/collector-config.yml +24 -0
package/infra/samples/gh-aw-dogfood-report.json +44 -0
package/infra/samples/harness-review-routing-plan.json +19 -0
package/infra/samples/harness-review-summary.json +61 -0
package/infra/samples/telemetry-artifact.json +29 -0
package/infra/samples/telemetry-payload.json +19 -0
package/package.json +85 -0
package/prompts/triager-classify.prompt.yml +10 -0
package/sample/go/add.go +5 -0
package/sample/go/add_test.go +9 -0
package/sample/go/go.mod +3 -0
package/sample/php/composer.json +26 -0
package/sample/php/composer.lock +1881 -0
package/sample/php/phpunit.xml +8 -0
package/sample/php/src/Add.php +13 -0
package/sample/php/tests/AddTest.php +16 -0
package/sample/python/requirements-dev.txt +2 -0
package/sample/python/src/__init__.py +0 -0
package/sample/python/src/greet.py +3 -0
package/sample/python/tests/conftest.py +4 -0
package/sample/python/tests/test_greet.py +5 -0
package/sample/ruby/.rubocop.yml +10 -0
package/sample/ruby/Gemfile +6 -0
package/sample/ruby/Gemfile.lock +58 -0
package/sample/ruby/lib/add.rb +9 -0
package/sample/ruby/spec/add_spec.rb +11 -0
package/sample/ts/biome.json +6 -0
package/sample/ts/package-lock.json +1763 -0
package/sample/ts/package.json +15 -0
package/sample/ts/src/add.ts +3 -0
package/sample/ts/tests/add.test.ts +8 -0
package/sample/ts/tsconfig.json +12 -0
package/scripts/aggregate-harness-review.mjs +48 -0
package/scripts/bootstrap-harness.sh +411 -0
package/scripts/check-diff-size.mjs +46 -0
package/scripts/check-e2e-manifest.mjs +35 -0
package/scripts/check-eval-score-drift.mjs +31 -0
package/scripts/check-gh-aw-dogfood-scope.mjs +51 -0
package/scripts/check-issue-spec.mjs +215 -0
package/scripts/check-l1-readiness.mjs +82 -0
package/scripts/check-open-pr-limit.mjs +34 -0
package/scripts/doctor.mjs +177 -0
package/scripts/emit-gh-aw-dogfood-report.mjs +112 -0
package/scripts/emit-telemetry-artifact.mjs +99 -0
package/scripts/fetch-telemetry-artifacts.mjs +176 -0
package/scripts/harness-drift-report.mjs +99 -0
package/scripts/lib/bootstrap-copy.mjs +123 -0
package/scripts/lib/ccsd-contract.mjs +212 -0
package/scripts/lib/diff-size.mjs +103 -0
package/scripts/lib/doctor-local.mjs +179 -0
package/scripts/lib/e2e-manifest.mjs +76 -0
package/scripts/lib/gh-aw-dogfood.mjs +293 -0
package/scripts/lib/github-config.mjs +94 -0
package/scripts/lib/harness-ci-fragments.mjs +98 -0
package/scripts/lib/harness-review-routing.mjs +244 -0
package/scripts/lib/harness-review.mjs +388 -0
package/scripts/lib/issue-form-label-sync.mjs +56 -0
package/scripts/lib/l1-readiness.mjs +258 -0
package/scripts/lib/merge-harness-package.mjs +36 -0
package/scripts/lib/npm-package.mjs +129 -0
package/scripts/lib/setup-wizard.mjs +224 -0
package/scripts/lib/stacks.mjs +138 -0
package/scripts/lib/telemetry-artifact.mjs +253 -0
package/scripts/lib/template-root.mjs +39 -0
package/scripts/merge-harness-package.mjs +14 -0
package/scripts/route-harness-review.mjs +168 -0
package/scripts/run-e2e-bench.mjs +216 -0
package/scripts/sdlc-gh-cli.mjs +91 -0
package/scripts/select-eval-jobs.mjs +41 -0
package/scripts/setup-github.mjs +242 -0
package/scripts/setup-github.sh +4 -0
package/scripts/setup-wizard.mjs +426 -0
package/scripts/test-bootstrap-guidance-scenarios.mjs +94 -0
package/scripts/test-diff-size-scenarios.mjs +88 -0
package/scripts/test-doctor-scenarios.mjs +70 -0
package/scripts/test-e2e-manifest-scenarios.mjs +65 -0
package/scripts/test-gh-aw-dogfood-scenarios.mjs +74 -0
package/scripts/test-harness-review-routing-scenarios.mjs +130 -0
package/scripts/test-harness-review-scenarios.mjs +92 -0
package/scripts/test-hooks-scenarios.mjs +44 -0
package/scripts/test-issue-form-label-sync-scenarios.mjs +48 -0
package/scripts/test-issue-spec-scenarios.mjs +258 -0
package/scripts/test-l1-readiness-scenarios.mjs +204 -0
package/scripts/test-merge-harness-package-scenarios.mjs +53 -0
package/scripts/test-npm-package-scenarios.mjs +31 -0
package/scripts/test-sdlc-gh-cli-scenarios.mjs +54 -0
package/scripts/test-setup-github-scenarios.mjs +103 -0
package/scripts/test-setup-wizard-scenarios.mjs +114 -0
package/scripts/test-telemetry-artifact-scenarios.mjs +69 -0
package/scripts/trim-harness-ci.mjs +18 -0
package/scripts/validate-gh-aw-compile.mjs +64 -0
package/scripts/validate-harness.mjs +199 -0
package/scripts/validate-telemetry.mjs +21 -0
package/scripts/verify-bootstrap-stacks.sh +192 -0

package/.github/workflows/weekly-redteam.md ADDED Viewed

@@ -0,0 +1,76 @@
+---
+description: Weekly red team probe suite (garak).
+name: Weekly red team
+on:
+  schedule:
+    - cron: "0 3 * * 0"
+permissions:
+  contents: read
+  issues: read
+safe-outputs:
+  create-issue:
+    max: 2
+---
+# Weekly red team (gh-aw source)
+> **Operational baseline:** No standard GHA replacement yet — probes are **manual / scheduled stub** until garak runtime prerequisites exist. Dogfood validates compile + safe-outputs only.
+## Required inputs
+| Input | Source | Required |
+|-------|--------|----------|
+| Probe definitions | garak / harness red-team config (future) | best-effort |
+| Target scope | Repository harness surfaces (agents, hooks, workflows) | yes |
+| Prior weekly summary | Previous `create-issue` or morning queue entry | optional |
+## Forbidden operations
+- Do **not** open pull requests (no `create-pull-request` safe-output)
+- Do **not** exfiltrate secrets or modify production credentials
+- Do **not** run unbounded network probes outside AWF allowlist
+- Do **not** block the GHA nightly harness review path
+## Expected outputs
+| Output | Format | Limit |
+|--------|--------|-------|
+| Red-team findings | GitHub issue with severity + repro steps | `create-issue.max: 2` |
+| Morning queue note | Markdown summary (issue body section) | human-readable |
+## Probe contract
+When garak (or equivalent) is available:
+1. Run the configured probe suite against agent prompts and harness docs
+2. Record pass/fail per probe with `wall_failure_type: security` when applicable
+3. Open at most **two** issues for high-severity findings
+Until runtime exists, emit a single issue stating `status: stub — probes not executed` if scheduled.
+## Escalation
+- **Critical** injection or secret-leak signal → open issue immediately; do not retry autonomously
+- Repeated probe failures on unchanged harness → route to [failure-taxonomy.md](../../docs/failure-taxonomy.md) **モデル限界** / human review
+## Fallback when gh-aw or garak regresses
+1. Skip probe execution; open a single tracking issue if the schedule fired
+2. Keep [nightly-harness-review.yml](./nightly-harness-review.yml) GHA path operational
+3. Revert `.md` / `.lock.yml` via dogfood rollback ([docs/gh-aw-dogfood.md](../../docs/gh-aw-dogfood.md))
+## Promotion criteria (gh-aw vs manual)
+Enable gh-aw weekly execution when:
+- garak (or substitute) runs in CI or AWF sandbox with pinned version
+- Dogfood safe-output checks pass (`create-issue.max <= 2`, no auto-merge)
+- At least one dry-run weekly report matches manual probe results
+## Agent instructions
+Run the garak probe suite when tooling is present and report results to the morning queue.
+When tooling is **missing**, create a stub issue documenting `garak: not available` and reference [infra/README.md](../../infra/README.md) threat-detection placeholder.
+Do not auto-merge. Do not modify product code.

package/.github/zizmor.yml ADDED Viewed

@@ -0,0 +1,11 @@
+# Tag-pinned actions are acceptable for this harness template; hash-pin in product repos if required.
+rules:
+  unpinned-uses:
+    config:
+      policies:
+        actions/*: ref-pin
+        github/*: ref-pin
+        dependabot/*: ref-pin
+        EndBug/*: ref-pin
+        ruby/*: ref-pin
+        shivammathur/*: ref-pin

package/AGENTS.md ADDED Viewed

@@ -0,0 +1,54 @@
+# Agent Harness — Project Instructions
+## Purpose
+This repository (or a product repo using this harness) follows the agent harness architecture in `docs/arch.md`. Human judgment converges on **PR review only**.
+## CC-SD contract (L1 docs / test-fix)
+For `task:docs` and `task:test-fix` delegated at `autonomy:L1`, the Issue embeds a lightweight CC-SD contract with these canonical fields:
+| Field | Required |
+|-------|----------|
+| `Goal` | yes |
+| `Non-goals` | yes |
+| `Constraints` | yes |
+| `Acceptance criteria` | yes |
+| `Rollback hints` | yes |
+| `Additional context` | optional |
+CI enforces completeness via `issue-spec-check`. Task Issues created from `.github/ISSUE_TEMPLATE/task.yml` sync `task:*` / `autonomy:*` labels automatically via `.github/workflows/task-issue-label-sync.yml`. v1 does not cover `feature-small`, `infra`, or `security-sensitive`.
+Before starting spec-driven L1 delegation, run readiness checks:
+- `npm run check-l1-readiness`
+- strict mode: `npm run check-l1-readiness -- --strict`
+- no local Node/gh: run `Actions -> L1 readiness check -> Run workflow` (`.github/workflows/l1-readiness-check.yml`)
+## Task classification
+Limits match `docs/operations.md` (CI enforces via `check-diff-size.mjs`).
+| Class | Max autonomy | Max LOC | Max files |
+|-------|-------------|---------|-----------|
+| `docs` | L3 | 60 | 2 |
+| `test-fix` | L2 | 120 | 4 |
+| `refactor` | L1 | 300 | 8 |
+| `feature-small` | L1 | 300 | 8 |
+| `dependency-bump` | L1 | 300 | 8 |
+| `infra` | L0 | — | human gate |
+| `security-sensitive` | L0 | — | proposal only |
+## Agent roles
+- **triager**: Classify issues, verify CC-SD contract before L1 on docs/test-fix, assign `task:*` and `autonomy:*` labels (read only)
+- **implementer**: Execute against Issue CC-SD contract with read/edit/test tools (L1 default)
+- **reviewer**: Review PRs for requirement fit and non-goal preservation, no edit permission
+## Out of scope (always human)
+Production DB operations, production secrets, billing/legal/PII changes.
+## Skills
+Load `quality-loop` skill when verifying changes against acceptance criteria.

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 sdlc-gh contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,366 @@
+# SDLC-GH
+**An agent harness template for GitHub Copilot — deterministic guardrails for AI coding agents.**
+sdlc-gh is a template repository that keeps AI coding agents on track with **CI walls, hooks, evals, and operational policy** instead of prompt discipline alone. It is organization-agnostic and stack-agnostic (TypeScript / Python / Go / Ruby / PHP): copy it into any product repository and adapt it.
+> The CI and documentation parts work standalone, but coding agent and gh-aw integration require GitHub Copilot (Business / Enterprise).
+## Why
+Teams adopting Copilot coding agent quickly run into the same problems:
+- Agents open oversized PRs
+- Destructive operations aren't reliably blocked
+- Changing instructions has untracked effects on quality
+- Approval points multiply until review becomes rubber-stamping
+sdlc-gh addresses these with three design rules:
+1. **Walls are deterministic** — tests, lint, diff-size limits, and hooks stop bad changes mechanically
+2. **One human gate: PR review** — decision inputs (scores, cost, traces) are collected on the PR
+3. **No harness change without an eval** — changes to instructions / agents / skills are verified in CI
+The full architecture and rationale live in [docs/arch.md](docs/arch.md).
+## Quick start
+Requirements: a GitHub repository with Actions enabled; Node.js 22+; `gh` CLI authenticated for GitHub setup.
+**Recommended — wizard in your product repo (no clone)**
+```bash
+cd /path/to/your-product
+npx @guilz-dev/sdlc-gh
+```
+The wizard bootstraps harness assets (if missing), syncs labels/rulesets, and runs `doctor --strict`. Non-interactive example:
+```bash
+npx @guilz-dev/sdlc-gh init --yes --stack ts --codeowners @your-org/harness-engineers --mode existing
+```
+New empty directory with sample stack copied to root:
+```bash
+mkdir my-product && cd my-product && git init
+npx @guilz-dev/sdlc-gh init --yes --stack ts --codeowners @your-org/harness-engineers --mode new --skip-github
+```
+Before the first npm release: `npx github:guilz-dev/sdlc-gh`. Local dev: `node scripts/sdlc-gh-cli.mjs`.
+**Option A — new repository from template (easiest)**
+Click **Use this template** on GitHub, delete the `sample/` stacks you don't need, and add your code.
+**Option B — add the harness to an existing repository (manual bootstrap)**
+```bash
+git clone https://github.com/YOUR_ORG/sdlc-gh.git /tmp/sdlc-gh
+/tmp/sdlc-gh/scripts/bootstrap-harness.sh \
+  --repo /path/to/your-product \
+  --codeowners-team @your-org/harness-engineers
+cd /path/to/your-product
+npx @guilz-dev/sdlc-gh --yes --stack ts --codeowners @your-org/harness-engineers
+```
+**Option C — start a brand-new product**
+```bash
+/tmp/sdlc-gh/scripts/bootstrap-harness.sh \
+  --repo /path/to/new-product \
+  --stack ts \
+  --mode new \
+  --codeowners-team @your-org/harness-engineers
+cd /path/to/new-product
+npx @guilz-dev/sdlc-gh --yes --stack ts --codeowners @your-org/harness-engineers --mode new
+```
+`--mode new` expands the minimal `sample/{stack}/` project into the repository root.
+### After installing (required)
+The harness is active only after GitHub setup and a clean doctor run:
+1. **Setup wizard (recommended)** — run `./scripts/setup-wizard.mjs` to configure `.harness-stack`, `CODEOWNERS`, GitHub labels/rulesets, and verify with `doctor --strict`. Use `--template` when dogfooding the multi-stack template repository.
+2. **Bootstrap** — run `./scripts/bootstrap-harness.sh` and confirm the detected stack/mode summary (alternative to the wizard for copy-only installs).
+3. **Configure GitHub** — the wizard runs `setup-github.sh` automatically; or run it manually to sync labels and create/update the `main-protection` ruleset with your stack's `product-ci-*` check. Optionally add `--with-eval-ruleset` after eval CI is stable.
+4. **Verify** — run `./scripts/doctor.mjs --strict` until every required item passes (`--template` for the template repo).
+Manual fallback remains available for restricted environments:
+- Apply labels from [.github/labels.yml](.github/labels.yml)
+- Import [.github/ruleset.example.json](.github/ruleset.example.json) under *Settings → Rules*
+- Ensure required checks include `harness-static`, `diff-size`, `issue-spec-check`, and your stack's `product-ci-*`
+Detailed steps and rollback guidance: [docs/adoption.md](docs/adoption.md).
+## Configuration
+| Setting | Location | Purpose |
+|---------|----------|---------|
+| Primary stack | `.harness-stack` (gitignored locally) | Selects `product-ci-{stack}` for rulesets and doctor |
+| Harness review owners | `.github/CODEOWNERS` | Required reviewers for `.github/`, `evals/`, policy docs. **Product repos:** replace placeholder and commit. **This template repo:** keep `@your-org/harness-engineers` in git; use `--template` wizard mode locally. |
+| Task / autonomy labels | `.github/labels.yml` → GitHub | Issue/PR classification (`task:*`, `autonomy:*`) |
+| Branch protection | `main-protection` ruleset | Required CI checks + code owner review |
+| Optional eval gate | `harness-pr-eval-required` ruleset | Requires eval-ci jobs on PRs to `main` |
+| Change size / retry policy | [docs/operations.md](docs/operations.md) | Canonical thresholds (optional `DIFF_SIZE_L1_HARD_FAIL`) |
+| Optional telemetry | GitHub Secrets `LANGFUSE_*` | Trace links and KPI export ([infra/README.md](infra/README.md)) |
+Run `./scripts/setup-wizard.mjs` to apply the required install settings interactively. Non-interactive example:
+```bash
+./scripts/setup-wizard.mjs --yes --stack ts --codeowners @your-org/harness-engineers
+./scripts/setup-wizard.mjs --template --yes --stack ts
+```
+**Template repo note:** When dogfooding this repository itself, run the wizard with `--template`. It writes gitignored `.harness-stack` and syncs GitHub rulesets, but **does not** replace the committed CODEOWNERS placeholder unless you pass `--patch-codeowners`. Use a separate product repository (or fork) when you need real code-owner enforcement with committed owners.
+## Start Spec-Driven L1 Flow
+To run autonomous implementation from a spec (CC-SD contract) without guesswork:
+1. Prepare repository settings with `./scripts/setup-wizard.mjs`
+2. Verify L1 readiness:
+```bash
+npm run check-l1-readiness
+npm run check-l1-readiness -- --strict
+```
+3. Create an Issue from `.github/ISSUE_TEMPLATE/task.yml`
+4. Fill `Goal`, `Non-goals`, `Constraints`, `Acceptance criteria`, `Rollback hints`
+5. Confirm the synced labels (`task:docs` or `task:test-fix`) + `autonomy:L1`
+6. Assign `triager`, then `implementer`
+Readiness checker notes:
+- validates local harness assets and doctor checks
+- validates GitHub labels/rulesets and latest `copilot-setup-steps` run when `gh` is authenticated
+- reports items that still require manual confirmation (Copilot coding agent entitlement)
+- supports machine-readable output via `npm run check-l1-readiness -- --json`
+- without local Node/gh, run **Actions → L1 readiness check → Run workflow** via [.github/workflows/l1-readiness-check.yml](.github/workflows/l1-readiness-check.yml) (writes a job summary)
+Bootstrap merges harness npm scripts into an existing root `package.json` instead of overwriting application metadata. Non-Node stacks get a minimal harness-only `package.json` when none exists.
+Fresh clones without gitignored `.harness-stack` infer stack from the committed `product-ci-*.yml` workflow.
+Detailed trial guide: [docs/coding-agent-l1.md](docs/coding-agent-l1.md).
+## Repository layout
+```text
+sdlc-gh/
+├── AGENTS.md                # project instructions for agents (task classes, roles)
+├── config/
+│   └── stacks.json          # stack catalog (profile, marker, workflow mapping)
+├── .github/
+│   ├── copilot-instructions.md  # global agent policy
+│   ├── instructions/        # per-path and per-stack conventions
+│   ├── agents/              # triager / implementer / reviewer (least privilege)
+│   ├── skills/              # verification procedure skill (quality-loop)
+│   ├── hooks/               # destructive-command blocklist
+│   ├── workflows/           # walls (harness-ci, product-ci-*), evals, retry, sync
+│   ├── labels.yml           # task:* / autonomy:* label definitions
+│   └── ruleset.example.json # branch protection example
+├── docs/                    # architecture and operations docs (see below)
+├── evals/                   # convention tests, rubric, e2e bench definitions
+├── prompts/                 # prompts for gh models eval
+├── scripts/                 # CI gate implementations, bootstrap, drift report
+├── sample/                  # minimal ts / python / go / ruby / php samples (product CI targets)
+└── infra/                   # optional Langfuse / OTel scaffolding
+```
+Inside this template, sample code lives under `sample/{stack}/` and all product CI workflows run when the corresponding marker file exists. In a bootstrapped product repository, only your selected stack's product CI workflow is copied and it targets the repository root.
+## How a task flows
+```mermaid
+sequenceDiagram
+    participant Dev as Developer
+    participant Issue as Issue
+    participant Tri as triager
+    participant Imp as implementer
+    participant Wall as Walls (CI)
+    participant Eval as eval-ci
+    participant Rev as Reviewer
+    Dev->>Issue: CC-SD contract (L1 docs / test-fix)
+    Issue->>Tri: Classify task:* / autonomy:*
+    Tri->>Imp: Delegate (complete contract required)
+    Imp->>Wall: Draft PR
+    alt CI failure
+        Wall-->>Imp: Retry orchestrator (max 3)
+    else CI pass
+        Wall->>Eval: Harness asset changes only
+        Wall->>Rev: PR context comment
+        Rev->>Issue: Approve or request changes
+    end
+```
+For `task:docs` and `task:test-fix` at `autonomy:L1`, the Issue embeds a lightweight CC-SD contract (`Goal`, `Non-goals`, `Constraints`, `Acceptance criteria`, `Rollback hints`). v1 does not cover `feature-small` or higher-risk classes. Details: [docs/coding-agent-l1.md](docs/coding-agent-l1.md).
+On CI failure, `agent-retry-orchestrator` applies retry labels (max 3 attempts; stops after the same failure signature twice; security failures escalate immediately). Canonical thresholds live in [docs/operations.md](docs/operations.md).
+## Local checks
+Run from the repository root:
+```bash
+npm run validate          # harness asset consistency
+npm run test-hooks        # hook block/allow scenarios
+npm run test-issue-spec   # CC-SD issue-spec validator scenarios
+npm run test-diff-size    # diff-size / autonomy gate scenarios
+npm run test-e2e-manifest # e2e manifest structural checks
+npm run test-setup-github # ruleset payload builder scenarios
+npm run test-doctor       # doctor local check scenarios
+npm run check-e2e         # e2e bench manifest checks
+npm run run-e2e           # e2e bench executable acceptance checks
+npm run verify-bootstrap  # bootstrap integration test (all stacks)
+npm run check             # full local gate (validate + scenarios + e2e)
+```
+On Node.js versions older than 22, `run-e2e-bench.mjs` may skip verifiers that require the same runtime as CI and report them as skipped rather than failed.
+Convention tests in Python:
+```bash
+pip install pytest
+pytest evals/trajectories -q
+```
+## Phased rollout
+Don't enable everything at once. Canonical phase definitions (including Phase 0 baseline) are in [docs/arch.md](docs/arch.md) §7. This table is the quick path; details in [docs/adoption.md](docs/adoption.md).
+| Phase | Enable | Risk |
+|-------|--------|------|
+| 0 | CI walls, rulesets (`setup-github.sh`), optional Langfuse scaffold | Low |
+| 1 | instructions, agents, hooks, templates | Low |
+| 2 | `harness-ci` + your stack's `product-ci` | Medium |
+| 3 | `eval-ci` + optional eval ruleset | Medium |
+| 4 | coding agent L1 (`task:docs` / `task:test-fix` only) | Low–Medium |
+Getting started with L1 delegation: [docs/coding-agent-l1.md](docs/coding-agent-l1.md).
+## Project status
+Functional today: bootstrap, harness/product CI, diff-size and autonomy gates, hooks scenarios, retry orchestrator, PR context comments, executable acceptance-style E2E checks (9 tasks), and eval scaffolding.
+Known placeholders (aligned with [docs/arch.md](docs/arch.md) implementation status):
+| Area | Status |
+|------|--------|
+| Bootstrap, stack catalog, harness/product CI | **Implemented** |
+| Hooks, diff-size gate, CC-SD issue-spec check | **Implemented** |
+| Custom agents (triager / implementer / reviewer) | **Implemented** |
+| Eval CI with change-type job selection | **Implemented** |
+| Retry orchestrator, PR context comments | **Implemented** |
+| E2E bench (executable acceptance checks) | **Partial** — 9 tasks; not yet break-and-fix agent runner |
+| `gh models eval` in CI | **Scaffolded** — runs when prompts exist; org must enable Models |
+| gh-aw outer loop (`nightly-harness-review`, `weekly-redteam`) | **Partial** — GHA outer loop + gh-aw dogfood CI (#7); `.md`/`.lock.yml` stubs remain |
+| Langfuse / OTel export | **Scaffolded** — `infra/` + schema; wiring optional |
+### Observability placeholders (spec only)
+Until Langfuse / OTel is wired, PR context comments use fixed placeholders (workflow logic unchanged):
+| Field | When unset / n/a |
+|-------|------------------|
+| Trace link | `_configure LANGFUSE_HOST; then search by repo=…, pr_number=…_` |
+| AI credits | Informational — `_set max-ai-credits in org settings_` |
+| Threat detection | `n/a` — gh-aw outer loop remains stub |
+Validate sample payloads: `node scripts/validate-telemetry.mjs "$(cat infra/samples/telemetry-payload.json)"` and `node scripts/validate-telemetry.mjs "$(cat infra/samples/telemetry-artifact.json)"`. Inner-loop workflows emit artifacts per [docs/telemetry-artifacts.md](docs/telemetry-artifacts.md); field definitions in [docs/telemetry-schema.md](docs/telemetry-schema.md).
+## Architecture
+The harness is a **dual-loop control system**: a fast inner loop (agent + deterministic walls) and a slower outer loop (eval + harness revision).
+```mermaid
+flowchart LR
+    subgraph OUTER["Outer loop (daily–weekly)"]
+        EVAL[Eval / traces]
+        REVISE[Revise instructions / walls]
+    end
+    subgraph INNER["Inner loop (per task)"]
+        FF[Feed-forward<br/>instructions / agents / skills]
+        AGENT[Agent<br/>plan → act → test]
+        WALL[Walls<br/>CI / hooks / diff-size]
+    end
+    Issue[Issue + CC-SD] --> FF
+    FF --> AGENT --> WALL
+    WALL -- fail --> AGENT
+    WALL -- pass --> PR[Draft PR]
+    AGENT -.-> EVAL
+    PR -.-> EVAL
+    EVAL --> REVISE
+    REVISE --> FF
+```
+Layers as implemented in this repo (details in [docs/arch.md](docs/arch.md)):
+```mermaid
+flowchart TB
+    L0[L0 Governance<br/>rulesets · CODEOWNERS · labels]
+    L1[L1 Feed-forward<br/>instructions · agents · skills]
+    L2[L2 Execution<br/>coding agent · CLI · gh-aw stubs]
+    L3[L3 Walls<br/>harness-ci · product-ci · hooks]
+    L4[L4–L6 Observability · Eval · Outer loop<br/>Langfuse · eval-ci · nightly review stubs]
+    L0 --> L1 --> L2 --> L3 --> L4
+    L4 -. revise .-> L1
+```
+## Documentation
+If you are adopting the harness in a product repo, start with [docs/adoption.md](docs/adoption.md) and then keep [docs/revert-playbook.md](docs/revert-playbook.md) nearby.
+If you are operating an installed harness day to day, read [docs/operations.md](docs/operations.md) first, then [docs/failure-taxonomy.md](docs/failure-taxonomy.md) and [docs/telemetry-artifacts.md](docs/telemetry-artifacts.md).
+If you are writing or triaging Task Issues for L1 delegation, start with [docs/coding-agent-l1.md](docs/coding-agent-l1.md) and use the Actions fallback in [.github/workflows/l1-readiness-check.yml](.github/workflows/l1-readiness-check.yml) when local `gh`/Node is unavailable.
+If you are contributing to the harness itself, read [CONTRIBUTING.md](CONTRIBUTING.md), then [docs/arch.md](docs/arch.md), and use [docs/shared-config.md](docs/shared-config.md) for distribution/update strategy.
+| Document | Contents |
+|----------|----------|
+| [docs/arch.md](docs/arch.md) | Full architecture and design principles |
+| [docs/adoption.md](docs/adoption.md) | Installation and rollback |
+| [docs/operations.md](docs/operations.md) | Thresholds, retry policy, forbidden ops (**canonical** policy) |
+| [docs/revert-playbook.md](docs/revert-playbook.md) | Revert procedure (harness vs product) |
+| [docs/coding-agent-l1.md](docs/coding-agent-l1.md) | Running the first L1 delegations |
+| [docs/failure-taxonomy.md](docs/failure-taxonomy.md) | Classifying failures for outer-loop routing |
+| [docs/kpi-baseline.md](docs/kpi-baseline.md) | Weekly KPI tracking template |
+| [docs/telemetry-schema.md](docs/telemetry-schema.md) | Required observability fields |
+| [docs/telemetry-artifacts.md](docs/telemetry-artifacts.md) | Inner-loop JSON artifact format and storage |
+| [docs/gh-aw-dogfood.md](docs/gh-aw-dogfood.md) | Bounded gh-aw validation on sdlc-gh |
+| [docs/auth-boundaries.md](docs/auth-boundaries.md) | Credential boundaries per execution mode |
+| [docs/publishing.md](docs/publishing.md) | npm package release (`@guilz-dev/sdlc-gh`) |
+| [docs/shared-config.md](docs/shared-config.md) | Distributing shared assets across repositories |
+| [docs/exceptions/README.md](docs/exceptions/README.md) | Recording policy exceptions |
+| [infra/README.md](infra/README.md) | Self-hosting Langfuse / OTel |
+| [CONTRIBUTING.md](CONTRIBUTING.md) | Contribution workflow and review expectations |
+| [SECURITY.md](SECURITY.md) | Vulnerability reporting policy |
+| [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) | Community behavior expectations |
+| [SUPPORT.md](SUPPORT.md) | Support routes and troubleshooting intake |
+## FAQ
+**Q. How do I pull template updates into a repository that already uses the harness?**
+A. Re-run `bootstrap-harness.sh` to overwrite harness assets, then review the diff with `npm run drift-report`. The `harness-sync` workflow produces a weekly drift report.
+**Q. Does the harness itself need a test framework (Jest etc.)?**
+A. No. The harness is guarded by the `scripts/*.mjs` checks and `eval-ci`. Your application keeps its own test runner (vitest / pytest / go test / rspec / phpunit).
+## Project policies
+- Contribution guide: [CONTRIBUTING.md](CONTRIBUTING.md)
+- Security reporting: [SECURITY.md](SECURITY.md)
+- Code of conduct: [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
+- Support: [SUPPORT.md](SUPPORT.md)
+## License
+[MIT](LICENSE)

package/config/stacks.json ADDED Viewed

@@ -0,0 +1,55 @@
+{
+  "version": 1,
+  "stacks": [
+    {
+      "id": "ts",
+      "label": "TypeScript",
+      "profile": "typescript.instructions.md",
+      "sampleDir": "ts",
+      "marker": "package.json",
+      "sampleMarker": "sample/ts/package.json",
+      "workflow": "product-ci-ts.yml",
+      "bootstrapCheck": "package.json"
+    },
+    {
+      "id": "python",
+      "label": "Python",
+      "profile": "python.instructions.md",
+      "sampleDir": "python",
+      "marker": "requirements-dev.txt",
+      "sampleMarker": "sample/python/requirements-dev.txt",
+      "workflow": "product-ci-python.yml",
+      "bootstrapCheck": "requirements-dev.txt"
+    },
+    {
+      "id": "go",
+      "label": "Go",
+      "profile": "go.instructions.md",
+      "sampleDir": "go",
+      "marker": "go.mod",
+      "sampleMarker": "sample/go/go.mod",
+      "workflow": "product-ci-go.yml",
+      "bootstrapCheck": "go.mod"
+    },
+    {
+      "id": "ruby",
+      "label": "Ruby",
+      "profile": "ruby.instructions.md",
+      "sampleDir": "ruby",
+      "marker": "Gemfile",
+      "sampleMarker": "sample/ruby/Gemfile",
+      "workflow": "product-ci-ruby.yml",
+      "bootstrapCheck": "Gemfile"
+    },
+    {
+      "id": "php",
+      "label": "PHP",
+      "profile": "php.instructions.md",
+      "sampleDir": "php",
+      "marker": "composer.json",
+      "sampleMarker": "sample/php/composer.json",
+      "workflow": "product-ci-php.yml",
+      "bootstrapCheck": "composer.json"
+    }
+  ]
+}