@guilz-dev/sdlc-gh 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +5 -0
- package/.github/ISSUE_TEMPLATE/bug_report.yml +68 -0
- package/.github/ISSUE_TEMPLATE/config.yml +1 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +39 -0
- package/.github/ISSUE_TEMPLATE/support.yml +56 -0
- package/.github/ISSUE_TEMPLATE/task.yml +89 -0
- package/.github/agents/implementer.agent.md +17 -0
- package/.github/agents/reviewer.agent.md +18 -0
- package/.github/agents/triager.agent.md +13 -0
- package/.github/aw/actions-lock.json +9 -0
- package/.github/copilot-instructions.md +35 -0
- package/.github/hooks/hooks.json +12 -0
- package/.github/instructions/core.instructions.md +11 -0
- package/.github/instructions/profiles/go.instructions.md +10 -0
- package/.github/instructions/profiles/php.instructions.md +11 -0
- package/.github/instructions/profiles/python.instructions.md +11 -0
- package/.github/instructions/profiles/ruby.instructions.md +11 -0
- package/.github/instructions/profiles/typescript.instructions.md +11 -0
- package/.github/labels.yml +55 -0
- package/.github/pull_request_template.md +33 -0
- package/.github/ruleset.example.json +33 -0
- package/.github/ruleset.harness-eval.example.json +29 -0
- package/.github/skills/quality-loop/SKILL.md +23 -0
- package/.github/workflows/agent-retry-orchestrator.yml +161 -0
- package/.github/workflows/copilot-setup-steps.yml +64 -0
- package/.github/workflows/eval-ci.yml +169 -0
- package/.github/workflows/eval-drift.yml +75 -0
- package/.github/workflows/gh-aw-dogfood-ci.yml +73 -0
- package/.github/workflows/harness-ci.yml +244 -0
- package/.github/workflows/harness-sync.yml +28 -0
- package/.github/workflows/l1-readiness-check.yml +45 -0
- package/.github/workflows/labels-sync.yml +24 -0
- package/.github/workflows/nightly-harness-review.lock.yml +1643 -0
- package/.github/workflows/nightly-harness-review.md +87 -0
- package/.github/workflows/nightly-harness-review.yml +63 -0
- package/.github/workflows/npm-publish.yml +49 -0
- package/.github/workflows/pr-context-comment.yml +138 -0
- package/.github/workflows/product-ci-go.yml +33 -0
- package/.github/workflows/product-ci-php.yml +39 -0
- package/.github/workflows/product-ci-python.yml +34 -0
- package/.github/workflows/product-ci-ruby.yml +35 -0
- package/.github/workflows/product-ci-ts.yml +37 -0
- package/.github/workflows/task-issue-label-sync.yml +50 -0
- package/.github/workflows/weekly-redteam.lock.yml +1571 -0
- package/.github/workflows/weekly-redteam.md +76 -0
- package/.github/zizmor.yml +11 -0
- package/AGENTS.md +54 -0
- package/LICENSE +21 -0
- package/README.md +366 -0
- package/config/stacks.json +55 -0
- package/docs/adoption.md +126 -0
- package/docs/arch.md +535 -0
- package/docs/auth-boundaries.md +16 -0
- package/docs/coding-agent-l1.md +152 -0
- package/docs/exceptions/README.md +25 -0
- package/docs/exceptions/TEMPLATE.md +8 -0
- package/docs/failure-taxonomy.md +23 -0
- package/docs/gh-aw-dogfood.md +109 -0
- package/docs/kpi-baseline.md +9 -0
- package/docs/nightly-harness-review.md +94 -0
- package/docs/operations.md +108 -0
- package/docs/publishing.md +79 -0
- package/docs/revert-playbook.md +44 -0
- package/docs/shared-config.md +30 -0
- package/docs/telemetry-artifacts.md +78 -0
- package/docs/telemetry-schema.md +60 -0
- package/evals/.score-baseline.json +6 -0
- package/evals/e2e-bench/README.md +28 -0
- package/evals/e2e-bench/manifest.json +16 -0
- package/evals/e2e-bench/tasks/e2e-001.yml +10 -0
- package/evals/e2e-bench/tasks/e2e-002.yml +11 -0
- package/evals/e2e-bench/tasks/e2e-003.yml +10 -0
- package/evals/e2e-bench/tasks/e2e-004.yml +14 -0
- package/evals/e2e-bench/tasks/e2e-005.yml +11 -0
- package/evals/e2e-bench/tasks/e2e-006.yml +10 -0
- package/evals/e2e-bench/tasks/e2e-007.yml +10 -0
- package/evals/e2e-bench/tasks/e2e-008.yml +10 -0
- package/evals/e2e-bench/tasks/e2e-009.yml +10 -0
- package/evals/trajectories/rubric.md +12 -0
- package/evals/trajectories/test_harness_conventions.py +271 -0
- package/infra/README.md +49 -0
- package/infra/langfuse/docker-compose.yml +25 -0
- package/infra/otel/collector-config.yml +24 -0
- package/infra/samples/gh-aw-dogfood-report.json +44 -0
- package/infra/samples/harness-review-routing-plan.json +19 -0
- package/infra/samples/harness-review-summary.json +61 -0
- package/infra/samples/telemetry-artifact.json +29 -0
- package/infra/samples/telemetry-payload.json +19 -0
- package/package.json +85 -0
- package/prompts/triager-classify.prompt.yml +10 -0
- package/sample/go/add.go +5 -0
- package/sample/go/add_test.go +9 -0
- package/sample/go/go.mod +3 -0
- package/sample/php/composer.json +26 -0
- package/sample/php/composer.lock +1881 -0
- package/sample/php/phpunit.xml +8 -0
- package/sample/php/src/Add.php +13 -0
- package/sample/php/tests/AddTest.php +16 -0
- package/sample/python/requirements-dev.txt +2 -0
- package/sample/python/src/__init__.py +0 -0
- package/sample/python/src/greet.py +3 -0
- package/sample/python/tests/conftest.py +4 -0
- package/sample/python/tests/test_greet.py +5 -0
- package/sample/ruby/.rubocop.yml +10 -0
- package/sample/ruby/Gemfile +6 -0
- package/sample/ruby/Gemfile.lock +58 -0
- package/sample/ruby/lib/add.rb +9 -0
- package/sample/ruby/spec/add_spec.rb +11 -0
- package/sample/ts/biome.json +6 -0
- package/sample/ts/package-lock.json +1763 -0
- package/sample/ts/package.json +15 -0
- package/sample/ts/src/add.ts +3 -0
- package/sample/ts/tests/add.test.ts +8 -0
- package/sample/ts/tsconfig.json +12 -0
- package/scripts/aggregate-harness-review.mjs +48 -0
- package/scripts/bootstrap-harness.sh +411 -0
- package/scripts/check-diff-size.mjs +46 -0
- package/scripts/check-e2e-manifest.mjs +35 -0
- package/scripts/check-eval-score-drift.mjs +31 -0
- package/scripts/check-gh-aw-dogfood-scope.mjs +51 -0
- package/scripts/check-issue-spec.mjs +215 -0
- package/scripts/check-l1-readiness.mjs +82 -0
- package/scripts/check-open-pr-limit.mjs +34 -0
- package/scripts/doctor.mjs +177 -0
- package/scripts/emit-gh-aw-dogfood-report.mjs +112 -0
- package/scripts/emit-telemetry-artifact.mjs +99 -0
- package/scripts/fetch-telemetry-artifacts.mjs +176 -0
- package/scripts/harness-drift-report.mjs +99 -0
- package/scripts/lib/bootstrap-copy.mjs +123 -0
- package/scripts/lib/ccsd-contract.mjs +212 -0
- package/scripts/lib/diff-size.mjs +103 -0
- package/scripts/lib/doctor-local.mjs +179 -0
- package/scripts/lib/e2e-manifest.mjs +76 -0
- package/scripts/lib/gh-aw-dogfood.mjs +293 -0
- package/scripts/lib/github-config.mjs +94 -0
- package/scripts/lib/harness-ci-fragments.mjs +98 -0
- package/scripts/lib/harness-review-routing.mjs +244 -0
- package/scripts/lib/harness-review.mjs +388 -0
- package/scripts/lib/issue-form-label-sync.mjs +56 -0
- package/scripts/lib/l1-readiness.mjs +258 -0
- package/scripts/lib/merge-harness-package.mjs +36 -0
- package/scripts/lib/npm-package.mjs +129 -0
- package/scripts/lib/setup-wizard.mjs +224 -0
- package/scripts/lib/stacks.mjs +138 -0
- package/scripts/lib/telemetry-artifact.mjs +253 -0
- package/scripts/lib/template-root.mjs +39 -0
- package/scripts/merge-harness-package.mjs +14 -0
- package/scripts/route-harness-review.mjs +168 -0
- package/scripts/run-e2e-bench.mjs +216 -0
- package/scripts/sdlc-gh-cli.mjs +91 -0
- package/scripts/select-eval-jobs.mjs +41 -0
- package/scripts/setup-github.mjs +242 -0
- package/scripts/setup-github.sh +4 -0
- package/scripts/setup-wizard.mjs +426 -0
- package/scripts/test-bootstrap-guidance-scenarios.mjs +94 -0
- package/scripts/test-diff-size-scenarios.mjs +88 -0
- package/scripts/test-doctor-scenarios.mjs +70 -0
- package/scripts/test-e2e-manifest-scenarios.mjs +65 -0
- package/scripts/test-gh-aw-dogfood-scenarios.mjs +74 -0
- package/scripts/test-harness-review-routing-scenarios.mjs +130 -0
- package/scripts/test-harness-review-scenarios.mjs +92 -0
- package/scripts/test-hooks-scenarios.mjs +44 -0
- package/scripts/test-issue-form-label-sync-scenarios.mjs +48 -0
- package/scripts/test-issue-spec-scenarios.mjs +258 -0
- package/scripts/test-l1-readiness-scenarios.mjs +204 -0
- package/scripts/test-merge-harness-package-scenarios.mjs +53 -0
- package/scripts/test-npm-package-scenarios.mjs +31 -0
- package/scripts/test-sdlc-gh-cli-scenarios.mjs +54 -0
- package/scripts/test-setup-github-scenarios.mjs +103 -0
- package/scripts/test-setup-wizard-scenarios.mjs +114 -0
- package/scripts/test-telemetry-artifact-scenarios.mjs +69 -0
- package/scripts/trim-harness-ci.mjs +18 -0
- package/scripts/validate-gh-aw-compile.mjs +64 -0
- package/scripts/validate-harness.mjs +199 -0
- package/scripts/validate-telemetry.mjs +21 -0
- package/scripts/verify-bootstrap-stacks.sh +192 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { validateManifest } from "./lib/e2e-manifest.mjs";
|
|
4
|
+
|
|
5
|
+
const baseManifest = {
|
|
6
|
+
version: 1,
|
|
7
|
+
min_tasks: 3,
|
|
8
|
+
last_rotated: "2026-07-04T00:00:00Z",
|
|
9
|
+
tasks: [
|
|
10
|
+
{ id: "e2e-001", class: "docs" },
|
|
11
|
+
{ id: "e2e-002", class: "test-fix" },
|
|
12
|
+
{ id: "e2e-003", class: "refactor" },
|
|
13
|
+
],
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const ok = validateManifest(baseManifest, ["e2e-001", "e2e-002", "e2e-003"]);
|
|
17
|
+
assert.equal(ok.errors.length, 0);
|
|
18
|
+
|
|
19
|
+
const duplicate = validateManifest(
|
|
20
|
+
{
|
|
21
|
+
...baseManifest,
|
|
22
|
+
tasks: [
|
|
23
|
+
{ id: "e2e-001", class: "docs" },
|
|
24
|
+
{ id: "e2e-001", class: "docs" },
|
|
25
|
+
{ id: "e2e-002", class: "test-fix" },
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
["e2e-001", "e2e-002"],
|
|
29
|
+
);
|
|
30
|
+
assert.ok(duplicate.errors.some((e) => e.includes("Duplicate")));
|
|
31
|
+
|
|
32
|
+
const missingFile = validateManifest(baseManifest, ["e2e-001", "e2e-002"]);
|
|
33
|
+
assert.ok(missingFile.errors.some((e) => e.includes("Missing task file")));
|
|
34
|
+
|
|
35
|
+
const orphan = validateManifest(baseManifest, ["e2e-001", "e2e-002", "e2e-003", "e2e-099"]);
|
|
36
|
+
assert.ok(orphan.errors.some((e) => e.includes("Orphan")));
|
|
37
|
+
|
|
38
|
+
const unsupported = validateManifest(
|
|
39
|
+
{
|
|
40
|
+
...baseManifest,
|
|
41
|
+
tasks: [{ id: "e2e-001", class: "unknown-class" }],
|
|
42
|
+
},
|
|
43
|
+
["e2e-001"],
|
|
44
|
+
);
|
|
45
|
+
assert.ok(unsupported.errors.some((e) => e.includes("Unsupported")));
|
|
46
|
+
|
|
47
|
+
const belowMin = validateManifest(
|
|
48
|
+
{ ...baseManifest, min_tasks: 5, tasks: [{ id: "e2e-001", class: "docs" }] },
|
|
49
|
+
["e2e-001"],
|
|
50
|
+
);
|
|
51
|
+
assert.ok(belowMin.errors.some((e) => e.includes("at least 5")));
|
|
52
|
+
|
|
53
|
+
const missingRotation = validateManifest(
|
|
54
|
+
{ ...baseManifest, last_rotated: undefined },
|
|
55
|
+
["e2e-001", "e2e-002", "e2e-003"],
|
|
56
|
+
);
|
|
57
|
+
assert.ok(missingRotation.errors.some((e) => e.includes("last_rotated")));
|
|
58
|
+
|
|
59
|
+
const invalidRotation = validateManifest(
|
|
60
|
+
{ ...baseManifest, last_rotated: "not-a-date" },
|
|
61
|
+
["e2e-001", "e2e-002", "e2e-003"],
|
|
62
|
+
);
|
|
63
|
+
assert.ok(invalidRotation.errors.some((e) => e.includes("invalid last_rotated")));
|
|
64
|
+
|
|
65
|
+
console.log("E2E manifest scenario tests passed");
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import {
|
|
4
|
+
buildDogfoodReport,
|
|
5
|
+
DOGFOOD_TASK_LABEL,
|
|
6
|
+
evaluateDogfoodScope,
|
|
7
|
+
findOutOfScopePaths,
|
|
8
|
+
isDogfoodAllowedPath,
|
|
9
|
+
parseGhAwLockMetadata,
|
|
10
|
+
parseGhAwWorkflowMarkdown,
|
|
11
|
+
validateGhAwSourceSections,
|
|
12
|
+
validateSafeOutputs,
|
|
13
|
+
} from "./lib/gh-aw-dogfood.mjs";
|
|
14
|
+
import { readFileSync } from "node:fs";
|
|
15
|
+
|
|
16
|
+
assert.equal(DOGFOOD_TASK_LABEL, "task:gh-aw-dogfood");
|
|
17
|
+
assert.equal(isDogfoodAllowedPath(".github/workflows/nightly-harness-review.md"), true);
|
|
18
|
+
assert.equal(isDogfoodAllowedPath(".github/labels.yml"), true);
|
|
19
|
+
assert.equal(isDogfoodAllowedPath("src/app.ts"), false);
|
|
20
|
+
assert.deepEqual(findOutOfScopePaths(["docs/gh-aw-dogfood.md", "src/x.ts"]), ["src/x.ts"]);
|
|
21
|
+
|
|
22
|
+
const scopeWithoutLabel = evaluateDogfoodScope(
|
|
23
|
+
[".github/labels.yml", "src/x.ts"],
|
|
24
|
+
[],
|
|
25
|
+
);
|
|
26
|
+
assert.equal(scopeWithoutLabel.ok, true);
|
|
27
|
+
assert.equal(scopeWithoutLabel.enforced, false);
|
|
28
|
+
|
|
29
|
+
const scopeWithLabel = evaluateDogfoodScope(["src/x.ts"], [DOGFOOD_TASK_LABEL]);
|
|
30
|
+
assert.equal(scopeWithLabel.ok, false);
|
|
31
|
+
assert.equal(scopeWithLabel.enforced, true);
|
|
32
|
+
|
|
33
|
+
const nightlyMd = readFileSync(".github/workflows/nightly-harness-review.md", "utf8");
|
|
34
|
+
const parsed = parseGhAwWorkflowMarkdown(nightlyMd);
|
|
35
|
+
assert.ok(parsed?.fields?.["safe-outputs"]);
|
|
36
|
+
assert.equal(validateSafeOutputs(parsed.fields).ok, true);
|
|
37
|
+
assert.equal(validateGhAwSourceSections(nightlyMd, "nightly-harness-review").ok, true);
|
|
38
|
+
assert.ok(parsed.fields["safe-outputs"]["create-issue"]);
|
|
39
|
+
|
|
40
|
+
const redteamMd = readFileSync(".github/workflows/weekly-redteam.md", "utf8");
|
|
41
|
+
const redteam = parseGhAwWorkflowMarkdown(redteamMd);
|
|
42
|
+
assert.equal(validateSafeOutputs(redteam.fields).ok, true);
|
|
43
|
+
assert.equal(validateGhAwSourceSections(redteamMd, "weekly-redteam").ok, true);
|
|
44
|
+
assert.ok(redteam.fields["safe-outputs"]["create-issue"]);
|
|
45
|
+
assert.equal(redteam.fields["safe-outputs"]["create-pull-request"], undefined);
|
|
46
|
+
|
|
47
|
+
const lockMeta = parseGhAwLockMetadata(
|
|
48
|
+
readFileSync(".github/workflows/nightly-harness-review.lock.yml", "utf8"),
|
|
49
|
+
);
|
|
50
|
+
assert.equal(lockMeta?.compiler_version, "v0.81.6");
|
|
51
|
+
|
|
52
|
+
const bad = validateSafeOutputs({
|
|
53
|
+
"safe-outputs": { "create-pull-request": { max: 5 } },
|
|
54
|
+
});
|
|
55
|
+
assert.equal(bad.ok, false);
|
|
56
|
+
|
|
57
|
+
const report = buildDogfoodReport({
|
|
58
|
+
scope: { ok: true, issues: [] },
|
|
59
|
+
safeOutputs: { nightly: { ok: true, issues: [] } },
|
|
60
|
+
compile: { ok: true, skipped: true, issues: [] },
|
|
61
|
+
lockDrift: { ok: true, issues: [] },
|
|
62
|
+
});
|
|
63
|
+
assert.equal(report.pass, true);
|
|
64
|
+
assert.ok(report.criteria.reviewability.pass);
|
|
65
|
+
|
|
66
|
+
const reportCompileSkipped = buildDogfoodReport({
|
|
67
|
+
scope: { ok: true, issues: [] },
|
|
68
|
+
safeOutputs: { nightly: { ok: true, issues: [] } },
|
|
69
|
+
compile: { ok: false, skipped: true, issues: ["gh aw CLI not available"] },
|
|
70
|
+
lockDrift: { ok: true, issues: [] },
|
|
71
|
+
});
|
|
72
|
+
assert.equal(reportCompileSkipped.pass, true);
|
|
73
|
+
|
|
74
|
+
console.log("gh-aw-dogfood scenarios ok");
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import {
|
|
5
|
+
applyRoutingPlanDryRun,
|
|
6
|
+
bodyHasRoutingMarker,
|
|
7
|
+
buildIssueAction,
|
|
8
|
+
buildRoutingPlan,
|
|
9
|
+
hasRepeatedFfFindings,
|
|
10
|
+
hasRepeatedWallFindings,
|
|
11
|
+
inferRoutingScope,
|
|
12
|
+
ISSUE_KIND,
|
|
13
|
+
routingDedupeKey,
|
|
14
|
+
routingMarker,
|
|
15
|
+
} from "./lib/harness-review-routing.mjs";
|
|
16
|
+
|
|
17
|
+
const sample = JSON.parse(
|
|
18
|
+
readFileSync("infra/samples/harness-review-summary.json", "utf8"),
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
assert.equal(hasRepeatedFfFindings(sample), true);
|
|
22
|
+
assert.equal(hasRepeatedWallFindings(sample), false);
|
|
23
|
+
|
|
24
|
+
const plan = buildRoutingPlan(sample);
|
|
25
|
+
assert.equal(plan.actions.length, 1);
|
|
26
|
+
assert.equal(plan.actions[0].kind, ISSUE_KIND.HARNESS_REVISION);
|
|
27
|
+
assert.ok(plan.actions[0].body.includes(routingMarker(plan.actions[0].dedupe_key)));
|
|
28
|
+
assert.equal(plan.actions[0].scope, "task:test-fix|wall:lint");
|
|
29
|
+
|
|
30
|
+
const wallSummary = {
|
|
31
|
+
...sample,
|
|
32
|
+
classifications: [
|
|
33
|
+
...sample.classifications,
|
|
34
|
+
{
|
|
35
|
+
repo: "org/product",
|
|
36
|
+
task_id: "9",
|
|
37
|
+
pr_number: 103,
|
|
38
|
+
classification: "壁不足",
|
|
39
|
+
rationale: "Harness CI passed while review_outcome is changes_requested",
|
|
40
|
+
wall_failure_types: [],
|
|
41
|
+
max_retry_count: 0,
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
rollup: {
|
|
45
|
+
...sample.rollup,
|
|
46
|
+
by_classification: { ...sample.rollup.by_classification, 壁不足: 2 },
|
|
47
|
+
review_rejection_proxy_count: 1,
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
const wallPlan = buildRoutingPlan(wallSummary);
|
|
51
|
+
assert.equal(wallPlan.actions.length, 2);
|
|
52
|
+
assert.ok(wallPlan.actions.some((a) => a.kind === ISSUE_KIND.WALL_ADDITION));
|
|
53
|
+
|
|
54
|
+
const singleFf = {
|
|
55
|
+
...sample,
|
|
56
|
+
classifications: [sample.classifications[1]],
|
|
57
|
+
rollup: {
|
|
58
|
+
...sample.rollup,
|
|
59
|
+
repeated_failure_signatures: [],
|
|
60
|
+
by_classification: { FF不足: 1 },
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
assert.equal(buildRoutingPlan(singleFf).actions.length, 0);
|
|
64
|
+
assert.ok(buildRoutingPlan(singleFf).skipped.length >= 1);
|
|
65
|
+
|
|
66
|
+
assert.equal(inferRoutingScope(sample.classifications), "tasks:docs+test-fix");
|
|
67
|
+
|
|
68
|
+
const dedupeKey = routingDedupeKey(
|
|
69
|
+
"org/product",
|
|
70
|
+
ISSUE_KIND.HARNESS_REVISION,
|
|
71
|
+
"lint",
|
|
72
|
+
"task:test-fix|wall:lint",
|
|
73
|
+
);
|
|
74
|
+
const action = plan.actions[0];
|
|
75
|
+
assert.equal(bodyHasRoutingMarker(action.body, dedupeKey), true);
|
|
76
|
+
assert.ok(action.title.includes("task:test-fix|wall:lint"));
|
|
77
|
+
|
|
78
|
+
const dry = applyRoutingPlanDryRun(plan, {
|
|
79
|
+
existingIssues: [{ number: 99, body: action.body }],
|
|
80
|
+
});
|
|
81
|
+
assert.equal(dry.results[0].operation, "update_issue");
|
|
82
|
+
assert.equal(dry.results[0].issue_number, 99);
|
|
83
|
+
|
|
84
|
+
const lintOnlySignature = {
|
|
85
|
+
...sample,
|
|
86
|
+
classifications: [sample.classifications[0]],
|
|
87
|
+
rollup: {
|
|
88
|
+
...sample.rollup,
|
|
89
|
+
by_classification: { モデル限界: 1 },
|
|
90
|
+
repeated_failure_signatures: [
|
|
91
|
+
{ wall_failure_type: "lint", record_count: 2, task_count: 1, task_ids: ["42"] },
|
|
92
|
+
],
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
assert.equal(hasRepeatedFfFindings(lintOnlySignature), true);
|
|
96
|
+
const lintOnlyPlan = buildRoutingPlan(lintOnlySignature);
|
|
97
|
+
assert.equal(lintOnlyPlan.actions.length, 0);
|
|
98
|
+
assert.ok(
|
|
99
|
+
lintOnlyPlan.skipped.some((s) => s.reason.includes("without FF不足 classification")),
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
const otherScopeSummary = {
|
|
103
|
+
...sample,
|
|
104
|
+
classifications: [
|
|
105
|
+
{
|
|
106
|
+
...sample.classifications[1],
|
|
107
|
+
task_id: "55",
|
|
108
|
+
pr_number: 222,
|
|
109
|
+
task_class: "docs",
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
...sample.classifications[1],
|
|
113
|
+
task_id: "56",
|
|
114
|
+
pr_number: 223,
|
|
115
|
+
task_class: "docs",
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
rollup: {
|
|
119
|
+
...sample.rollup,
|
|
120
|
+
by_classification: { FF不足: 2 },
|
|
121
|
+
repeated_failure_signatures: [
|
|
122
|
+
{ wall_failure_type: "lint", record_count: 2, task_count: 2, task_ids: ["55", "56"] },
|
|
123
|
+
],
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
const otherScopePlan = buildRoutingPlan(otherScopeSummary);
|
|
127
|
+
assert.equal(otherScopePlan.actions[0].scope, "task:docs|wall:lint");
|
|
128
|
+
assert.notEqual(otherScopePlan.actions[0].dedupe_key, plan.actions[0].dedupe_key);
|
|
129
|
+
|
|
130
|
+
console.log("harness-review-routing scenarios ok");
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import {
|
|
4
|
+
buildHarnessReviewSummary,
|
|
5
|
+
classifyTaskGroup,
|
|
6
|
+
dedupeTelemetryRecords,
|
|
7
|
+
formatHarnessReviewMarkdown,
|
|
8
|
+
} from "./lib/harness-review.mjs";
|
|
9
|
+
|
|
10
|
+
function artifact(overrides) {
|
|
11
|
+
const payload = {
|
|
12
|
+
task_id: "42",
|
|
13
|
+
pr_number: 101,
|
|
14
|
+
repo: "org/product",
|
|
15
|
+
task_class: "docs",
|
|
16
|
+
autonomy_level: "L1",
|
|
17
|
+
retry_count: 0,
|
|
18
|
+
wall_failure_type: "",
|
|
19
|
+
final_outcome: "in_progress",
|
|
20
|
+
review_outcome: "pending",
|
|
21
|
+
...overrides.payload,
|
|
22
|
+
};
|
|
23
|
+
return {
|
|
24
|
+
schema_version: "1",
|
|
25
|
+
emitted_at: overrides.emitted_at ?? "2026-07-04T12:00:00.000Z",
|
|
26
|
+
source: overrides.source ?? "harness-ci",
|
|
27
|
+
workflow_run_id: overrides.workflow_run_id ?? 1,
|
|
28
|
+
payload,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const deduped = dedupeTelemetryRecords([
|
|
33
|
+
artifact({ workflow_run_id: 1, source: "harness-ci" }),
|
|
34
|
+
artifact({ workflow_run_id: 1, source: "harness-ci", emitted_at: "2026-07-04T13:00:00.000Z" }),
|
|
35
|
+
artifact({ workflow_run_id: 2, source: "harness-ci", payload: { pr_number: 102 } }),
|
|
36
|
+
]);
|
|
37
|
+
assert.equal(deduped.length, 2);
|
|
38
|
+
|
|
39
|
+
const modelLimit = classifyTaskGroup([
|
|
40
|
+
artifact({
|
|
41
|
+
source: "agent-retry-orchestrator",
|
|
42
|
+
payload: { retry_count: 3, wall_failure_type: "test", final_outcome: "escalated" },
|
|
43
|
+
}),
|
|
44
|
+
]);
|
|
45
|
+
assert.equal(modelLimit?.classification, "モデル限界");
|
|
46
|
+
|
|
47
|
+
const wallGap = classifyTaskGroup([
|
|
48
|
+
artifact({ source: "harness-ci", payload: { wall_failure_type: "" } }),
|
|
49
|
+
artifact({
|
|
50
|
+
source: "pr-context",
|
|
51
|
+
payload: { review_outcome: "changes_requested", wall_failure_type: "" },
|
|
52
|
+
}),
|
|
53
|
+
]);
|
|
54
|
+
assert.equal(wallGap?.classification, "壁不足");
|
|
55
|
+
|
|
56
|
+
const ffGap = classifyTaskGroup([
|
|
57
|
+
artifact({ workflow_run_id: 10, source: "harness-ci", payload: { wall_failure_type: "lint" } }),
|
|
58
|
+
artifact({
|
|
59
|
+
workflow_run_id: 11,
|
|
60
|
+
source: "agent-retry-orchestrator",
|
|
61
|
+
payload: { wall_failure_type: "lint", retry_count: 1 },
|
|
62
|
+
}),
|
|
63
|
+
]);
|
|
64
|
+
assert.equal(ffGap?.classification, "FF不足");
|
|
65
|
+
|
|
66
|
+
const repeatedTest = classifyTaskGroup([
|
|
67
|
+
artifact({ workflow_run_id: 20, source: "harness-ci", payload: { wall_failure_type: "test" } }),
|
|
68
|
+
artifact({ workflow_run_id: 21, source: "harness-ci", payload: { wall_failure_type: "test" } }),
|
|
69
|
+
]);
|
|
70
|
+
assert.equal(repeatedTest?.classification, "モデル限界");
|
|
71
|
+
|
|
72
|
+
const summary = buildHarnessReviewSummary([
|
|
73
|
+
artifact({
|
|
74
|
+
source: "agent-retry-orchestrator",
|
|
75
|
+
payload: { retry_count: 3, wall_failure_type: "test", final_outcome: "escalated" },
|
|
76
|
+
}),
|
|
77
|
+
artifact({
|
|
78
|
+
source: "harness-ci",
|
|
79
|
+
payload: { pr_number: 200, task_id: "55", wall_failure_type: "" },
|
|
80
|
+
}),
|
|
81
|
+
artifact({
|
|
82
|
+
source: "pr-context",
|
|
83
|
+
payload: { pr_number: 200, task_id: "55", review_outcome: "changes_requested" },
|
|
84
|
+
}),
|
|
85
|
+
]);
|
|
86
|
+
assert.equal(summary.rollup.failure_groups, 2);
|
|
87
|
+
assert.equal(summary.rollup.by_classification["モデル限界"], 1);
|
|
88
|
+
assert.equal(summary.rollup.by_classification["壁不足"], 1);
|
|
89
|
+
assert.match(formatHarnessReviewMarkdown(summary), /Nightly harness review/);
|
|
90
|
+
assert.ok(Array.isArray(summary.rollup.repeated_failure_signatures));
|
|
91
|
+
|
|
92
|
+
console.log("harness-review scenarios ok");
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Reject/allow scenario tests for hooks.json patterns (Phase 2 static gate).
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
|
|
8
|
+
const hooksPath = join(process.cwd(), ".github/hooks/hooks.json");
|
|
9
|
+
if (!existsSync(hooksPath)) {
|
|
10
|
+
console.error("hooks.json not found");
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const { hooks } = JSON.parse(readFileSync(hooksPath, "utf8"));
|
|
15
|
+
const blockPatterns = hooks
|
|
16
|
+
.filter((h) => h.action === "block" && h.pattern)
|
|
17
|
+
.map((h) => new RegExp(h.pattern, "i"));
|
|
18
|
+
|
|
19
|
+
const mustBlock = [
|
|
20
|
+
"git push --force",
|
|
21
|
+
"git push -f origin main",
|
|
22
|
+
"rm -rf /",
|
|
23
|
+
"DROP TABLE users",
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
const mustAllow = ["git status", "npm test", "pytest -q"];
|
|
27
|
+
|
|
28
|
+
for (const cmd of mustBlock) {
|
|
29
|
+
const blocked = blockPatterns.some((re) => re.test(cmd));
|
|
30
|
+
if (!blocked) {
|
|
31
|
+
console.error(`::error::Expected block for: ${cmd}`);
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
for (const cmd of mustAllow) {
|
|
37
|
+
const blocked = blockPatterns.some((re) => re.test(cmd));
|
|
38
|
+
if (blocked) {
|
|
39
|
+
console.error(`::error::Unexpected block for: ${cmd}`);
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
console.log("Hooks scenario tests passed");
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { parseTaskIssueSelections, planIssueLabels } from "./lib/issue-form-label-sync.mjs";
|
|
3
|
+
|
|
4
|
+
const taskIssueBody = `### Goal
|
|
5
|
+
|
|
6
|
+
Tighten docs.
|
|
7
|
+
|
|
8
|
+
### Task class
|
|
9
|
+
|
|
10
|
+
docs
|
|
11
|
+
|
|
12
|
+
### Max autonomy level
|
|
13
|
+
|
|
14
|
+
L1
|
|
15
|
+
`;
|
|
16
|
+
|
|
17
|
+
const parsed = parseTaskIssueSelections(taskIssueBody);
|
|
18
|
+
assert.equal(parsed.isTaskIssue, true);
|
|
19
|
+
assert.equal(parsed.taskClass, "docs");
|
|
20
|
+
assert.equal(parsed.autonomy, "L1");
|
|
21
|
+
assert.equal(parsed.taskLabel, "task:docs");
|
|
22
|
+
assert.equal(parsed.autonomyLabel, "autonomy:L1");
|
|
23
|
+
|
|
24
|
+
const plan = planIssueLabels(["bug", "task:infra", "autonomy:L0"], parsed);
|
|
25
|
+
assert.deepEqual(plan.labels, ["bug", "task:docs", "autonomy:L1"]);
|
|
26
|
+
assert.equal(plan.changed, true);
|
|
27
|
+
|
|
28
|
+
const alreadySynced = planIssueLabels(["bug", "task:docs", "autonomy:L1"], parsed);
|
|
29
|
+
assert.equal(alreadySynced.changed, false);
|
|
30
|
+
|
|
31
|
+
const invalid = parseTaskIssueSelections(`### Goal
|
|
32
|
+
|
|
33
|
+
Test
|
|
34
|
+
|
|
35
|
+
### Task class
|
|
36
|
+
|
|
37
|
+
custom
|
|
38
|
+
|
|
39
|
+
### Max autonomy level
|
|
40
|
+
|
|
41
|
+
L9
|
|
42
|
+
`);
|
|
43
|
+
const invalidPlan = planIssueLabels(["bug"], invalid);
|
|
44
|
+
assert.equal(invalidPlan.changed, false);
|
|
45
|
+
assert.deepEqual(invalidPlan.labels, ["bug"]);
|
|
46
|
+
|
|
47
|
+
const nonTask = parseTaskIssueSelections("plain issue body");
|
|
48
|
+
assert.equal(nonTask.isTaskIssue, false);
|