@nlaprell/shipit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.cursor/commands/create_intent_from_issue.md +28 -0
  2. package/.cursor/commands/create_pr.md +28 -0
  3. package/.cursor/commands/dashboard.md +39 -0
  4. package/.cursor/commands/deploy.md +152 -0
  5. package/.cursor/commands/drift_check.md +36 -0
  6. package/.cursor/commands/fix.md +39 -0
  7. package/.cursor/commands/generate_release_plan.md +31 -0
  8. package/.cursor/commands/generate_roadmap.md +38 -0
  9. package/.cursor/commands/help.md +37 -0
  10. package/.cursor/commands/init_project.md +26 -0
  11. package/.cursor/commands/kill.md +72 -0
  12. package/.cursor/commands/new_intent.md +68 -0
  13. package/.cursor/commands/pr.md +77 -0
  14. package/.cursor/commands/revert-plan.md +58 -0
  15. package/.cursor/commands/risk.md +64 -0
  16. package/.cursor/commands/rollback.md +43 -0
  17. package/.cursor/commands/scope_project.md +53 -0
  18. package/.cursor/commands/ship.md +345 -0
  19. package/.cursor/commands/status.md +71 -0
  20. package/.cursor/commands/suggest.md +44 -0
  21. package/.cursor/commands/test_shipit.md +197 -0
  22. package/.cursor/commands/verify.md +50 -0
  23. package/.cursor/rules/architect.mdc +84 -0
  24. package/.cursor/rules/assumption-extractor.mdc +95 -0
  25. package/.cursor/rules/docs.mdc +66 -0
  26. package/.cursor/rules/implementer.mdc +112 -0
  27. package/.cursor/rules/pm.mdc +136 -0
  28. package/.cursor/rules/qa.mdc +97 -0
  29. package/.cursor/rules/security.mdc +90 -0
  30. package/.cursor/rules/steward.mdc +99 -0
  31. package/.cursor/rules/test-runner.mdc +196 -0
  32. package/AGENTS.md +121 -0
  33. package/README.md +264 -0
  34. package/_system/architecture/CANON.md +159 -0
  35. package/_system/architecture/invariants.yml +87 -0
  36. package/_system/architecture/project-schema.json +98 -0
  37. package/_system/architecture/workflow-state-layout.md +68 -0
  38. package/_system/artifacts/SYSTEM_STATE.md +43 -0
  39. package/_system/artifacts/confidence-calibration.json +16 -0
  40. package/_system/artifacts/dependencies.md +46 -0
  41. package/_system/artifacts/framework-files-manifest.json +179 -0
  42. package/_system/artifacts/usage.json +1 -0
  43. package/_system/behaviors/DO_RELEASE.md +371 -0
  44. package/_system/behaviors/DO_RELEASE_AI.md +329 -0
  45. package/_system/behaviors/PREPARE_RELEASE.md +373 -0
  46. package/_system/behaviors/PREPARE_RELEASE_AI.md +234 -0
  47. package/_system/behaviors/WORK_ROOT_PLATFORM_ISSUES.md +140 -0
  48. package/_system/behaviors/WORK_TEST_PLAN_ISSUES.md +380 -0
  49. package/_system/do-not-repeat/abandoned-designs.md +18 -0
  50. package/_system/do-not-repeat/bad-patterns.md +19 -0
  51. package/_system/do-not-repeat/failed-experiments.md +18 -0
  52. package/_system/do-not-repeat/rejected-libraries.md +19 -0
  53. package/_system/drift/baselines.md +49 -0
  54. package/_system/drift/metrics.md +33 -0
  55. package/_system/golden-data/.gitkeep +0 -0
  56. package/_system/golden-data/README.md +47 -0
  57. package/_system/reports/mutation/mutation.html +492 -0
  58. package/_system/security/audit-allowlist.json +4 -0
  59. package/bin/create-shipit-app +29 -0
  60. package/bin/shipit +183 -0
  61. package/cli/src/commands/check.js +82 -0
  62. package/cli/src/commands/create.js +195 -0
  63. package/cli/src/commands/init.js +267 -0
  64. package/cli/src/commands/upgrade.js +196 -0
  65. package/cli/src/utils/config.js +27 -0
  66. package/cli/src/utils/file-copy.js +144 -0
  67. package/cli/src/utils/gitignore-merge.js +44 -0
  68. package/cli/src/utils/manifest.js +105 -0
  69. package/cli/src/utils/package-json-merge.js +163 -0
  70. package/cli/src/utils/project-json-merge.js +57 -0
  71. package/cli/src/utils/prompts.js +30 -0
  72. package/cli/src/utils/stack-detection.js +56 -0
  73. package/cli/src/utils/stack-files.js +364 -0
  74. package/cli/src/utils/upgrade-backup.js +159 -0
  75. package/cli/src/utils/version.js +64 -0
  76. package/dashboard-app/README.md +73 -0
  77. package/dashboard-app/eslint.config.js +23 -0
  78. package/dashboard-app/index.html +13 -0
  79. package/dashboard-app/package.json +30 -0
  80. package/dashboard-app/pnpm-lock.yaml +2721 -0
  81. package/dashboard-app/public/dashboard.json +66 -0
  82. package/dashboard-app/public/vite.svg +1 -0
  83. package/dashboard-app/src/App.css +141 -0
  84. package/dashboard-app/src/App.tsx +155 -0
  85. package/dashboard-app/src/assets/react.svg +1 -0
  86. package/dashboard-app/src/index.css +68 -0
  87. package/dashboard-app/src/main.tsx +10 -0
  88. package/dashboard-app/tsconfig.app.json +28 -0
  89. package/dashboard-app/tsconfig.json +4 -0
  90. package/dashboard-app/tsconfig.node.json +26 -0
  91. package/dashboard-app/vite.config.ts +7 -0
  92. package/package.json +116 -0
  93. package/scripts/README.md +70 -0
  94. package/scripts/audit-check.sh +125 -0
  95. package/scripts/calibration-report.sh +198 -0
  96. package/scripts/check-readiness.sh +155 -0
  97. package/scripts/collect-metrics.sh +116 -0
  98. package/scripts/command-manifest.yml +131 -0
  99. package/scripts/create-test-plan-issue.sh +110 -0
  100. package/scripts/dashboard-start.sh +16 -0
  101. package/scripts/deploy.sh +170 -0
  102. package/scripts/drift-check.sh +93 -0
  103. package/scripts/execute-rollback.sh +177 -0
  104. package/scripts/export-dashboard-json.js +208 -0
  105. package/scripts/fix-intents.sh +239 -0
  106. package/scripts/generate-dashboard.sh +136 -0
  107. package/scripts/generate-docs.sh +279 -0
  108. package/scripts/generate-project-context.sh +142 -0
  109. package/scripts/generate-release-plan.sh +443 -0
  110. package/scripts/generate-roadmap.sh +189 -0
  111. package/scripts/generate-system-state.sh +95 -0
  112. package/scripts/gh/create-intent-from-issue.sh +82 -0
  113. package/scripts/gh/create-issue-from-intent.sh +59 -0
  114. package/scripts/gh/create-pr.sh +41 -0
  115. package/scripts/gh/link-issue.sh +44 -0
  116. package/scripts/gh/on-ship-update-issue.sh +42 -0
  117. package/scripts/headless/README.md +8 -0
  118. package/scripts/headless/call-llm.js +109 -0
  119. package/scripts/headless/run-phase.sh +99 -0
  120. package/scripts/help.sh +271 -0
  121. package/scripts/init-project.sh +976 -0
  122. package/scripts/kill-intent.sh +125 -0
  123. package/scripts/lib/common.sh +29 -0
  124. package/scripts/lib/intent.sh +61 -0
  125. package/scripts/lib/progress.sh +57 -0
  126. package/scripts/lib/suggest-next.sh +131 -0
  127. package/scripts/lib/validate-intents.sh +240 -0
  128. package/scripts/lib/verify-outputs.sh +55 -0
  129. package/scripts/lib/workflow_state.sh +201 -0
  130. package/scripts/new-intent.sh +271 -0
  131. package/scripts/publish-npm.sh +28 -0
  132. package/scripts/scope-project.sh +380 -0
  133. package/scripts/setup-worktrees.sh +125 -0
  134. package/scripts/status.sh +278 -0
  135. package/scripts/suggest.sh +173 -0
  136. package/scripts/test-headless.sh +47 -0
  137. package/scripts/test-shipit.sh +52 -0
  138. package/scripts/test-workflow-state.sh +49 -0
  139. package/scripts/usage-report.sh +47 -0
  140. package/scripts/usage.sh +58 -0
  141. package/scripts/validate-cursor.sh +151 -0
  142. package/scripts/validate-project.sh +71 -0
  143. package/scripts/validate-vscode.sh +146 -0
  144. package/scripts/verify.sh +153 -0
  145. package/scripts/workflow-orchestrator.sh +97 -0
  146. package/scripts/workflow-templates/01_analysis.md.tpl +25 -0
  147. package/scripts/workflow-templates/02_plan.md.tpl +30 -0
  148. package/scripts/workflow-templates/03_implementation.md.tpl +25 -0
  149. package/scripts/workflow-templates/04_verification.md.tpl +29 -0
  150. package/scripts/workflow-templates/05_release_notes.md.tpl +16 -0
  151. package/scripts/workflow-templates/05_verification_legacy.md.tpl +6 -0
  152. package/scripts/workflow-templates/active.md.tpl +18 -0
  153. package/scripts/workflow-templates/phases.yml +39 -0
  154. package/stryker.conf.json +8 -0
  155. package/work/intent/templates/api-endpoint.md +124 -0
  156. package/work/intent/templates/bugfix.md +116 -0
  157. package/work/intent/templates/frontend-feature.md +115 -0
  158. package/work/intent/templates/generic.md +122 -0
  159. package/work/intent/templates/infra-change.md +121 -0
  160. package/work/intent/templates/refactor.md +116 -0
@@ -0,0 +1,70 @@
1
+ # Scripts
2
+
3
+ Shell scripts for the ShipIt framework. Run via `pnpm <script-name>` (see `package.json`).
4
+
5
+ ## Categories
6
+
7
+ ### Intent Management
8
+
9
+ - `new-intent.sh` — Create a new intent file (feature, bug, tech-debt)
10
+ - `fix-intents.sh` — Auto-fix common intent issues (dependency ordering, whitespace)
11
+ - `kill-intent.sh` — Kill an intent with rationale
12
+
13
+ ### Workflow Orchestration
14
+
15
+ - `workflow-orchestrator.sh` — Generate workflow state files for `/ship` phases (spec-driven: reads `workflow-templates/phases.yml` and substitutes templates)
16
+ - `workflow-templates/` — Phase spec (`phases.yml`) and `.tpl` templates; add a phase by adding a spec entry and a template file
17
+ - `verify.sh` — Run verification phase (tests, mutation, audit)
18
+
19
+ Agent coordinator (task queue and agent assignment) is **experimental** and lives in `experimental/`; see `experimental/README.md`.
20
+
21
+ ### Generation
22
+
23
+ - `generate-release-plan.sh` — Build release plan from intents
24
+ - `generate-roadmap.sh` — Generate roadmap (now/next/later) and dependency graph
25
+ - `generate-docs.sh` — Update README, CHANGELOG, release notes
26
+ - `generate-dashboard.sh` — Generate project dashboard
27
+ - `generate-project-context.sh` — Generate project context for agents
28
+ - `generate-system-state.sh` — Generate SYSTEM_STATE.md for Steward
29
+
30
+ ### Validation
31
+
32
+ - `validate-project.sh` — Validate project.json against schema
33
+ - `validate-cursor.sh` — Validate Cursor integration (rules, commands)
34
+
35
+ ### Deployment
36
+
37
+ - `deploy.sh` — Deploy with readiness checks
38
+ - `check-readiness.sh` — Run readiness checks before deploy
39
+
40
+ ### Project Setup
41
+
42
+ - `init-project.sh` — Initialize a new ShipIt project
43
+ - `scope-project.sh` — AI-assisted feature breakdown
44
+
45
+ ### Drift & Metrics
46
+
47
+ - `drift-check.sh` — Calculate drift metrics (PR size, test ratio, deps)
48
+ - `collect-metrics.sh` — Collect metrics for reporting
49
+ - `audit-check.sh` — Run npm audit for vulnerabilities
50
+
51
+ ### Utilities
52
+
53
+ - `help.sh` — List all commands with descriptions (builds "Available commands" from `command-manifest.yml`)
54
+ - `command-manifest.yml` — Single source of truth for slash commands: id, slash, pnpm script, one-liner, category. Add a command by adding an entry; help.sh reads it.
55
+ - `status.sh` — Unified dashboard (intents, workflow, tests)
56
+ - `suggest.sh` — Suggest next intent to work on
57
+
58
+ ### Test & Issue Tooling
59
+
60
+ - `create-test-plan-issue.sh` — Create GitHub issues from test failures
61
+ - `setup-worktrees.sh` — Setup git worktrees for parallel work
62
+
63
+ ## Shared Libraries (`lib/`)
64
+
65
+ - `common.sh` — Plumbing: `error_exit`, color variables, optional `require_cmd`. Source this (or `intent.sh`) in new scripts to avoid duplicating error handling and colors.
66
+ - `intent.sh` — Intent domain: `resolve_intent_file`, `require_intent_file`, `INTENT_DIR`. Sources `common.sh`. Use in scripts that resolve intent IDs to paths (e.g. workflow-orchestrator, kill-intent).
67
+ - `progress.sh` — Progress indicator helpers
68
+ - `suggest-next.sh` — Next-step suggestion logic
69
+ - `validate-intents.sh` — Intent validation (dependencies, circular deps)
70
+ - `verify-outputs.sh` — Output verification and generator chaining
@@ -0,0 +1,125 @@
1
+ #!/bin/bash
2
+
3
+ # Audit guard with allowlist and expiry checks.
4
+
5
+ set -euo pipefail
6
+
7
+ AUDIT_LEVEL="${1:-moderate}"
8
+ ALLOWLIST_FILE="_system/security/audit-allowlist.json"
9
+
10
+ if ! command -v pnpm >/dev/null 2>&1; then
11
+ echo "ERROR: pnpm is required to run audit checks" >&2
12
+ exit 1
13
+ fi
14
+
15
+ if [ -f "$ALLOWLIST_FILE" ]; then
16
+ ALLOWLIST_JSON="$(cat "$ALLOWLIST_FILE")"
17
+ else
18
+ ALLOWLIST_JSON='{"advisories":[]}'
19
+ fi
20
+
21
+ AUDIT_JSON="$(pnpm audit --json || true)"
22
+ if [ -z "$AUDIT_JSON" ]; then
23
+ echo "ERROR: pnpm audit returned no output" >&2
24
+ exit 1
25
+ fi
26
+
27
+ printf "%s" "$AUDIT_JSON" | AUDIT_LEVEL="$AUDIT_LEVEL" AUDIT_ALLOWLIST_JSON="$ALLOWLIST_JSON" node <<'NODE'
28
+ const fs = require('fs');
29
+
30
+ const input = fs.readFileSync(0, 'utf8');
31
+ const audit = JSON.parse(input || '{}');
32
+ const allowlist = JSON.parse(process.env.AUDIT_ALLOWLIST_JSON || '{"advisories":[]}');
33
+ const level = process.env.AUDIT_LEVEL || 'moderate';
34
+
35
+ const levels = { info: 0, low: 1, moderate: 2, high: 3, critical: 4 };
36
+ if (!(level in levels)) {
37
+ console.error(`ERROR: Invalid audit level "${level}"`);
38
+ process.exit(2);
39
+ }
40
+
41
+ const threshold = levels[level];
42
+ const advisories = audit.advisories || {};
43
+ const findings = Object.values(advisories).map((a) => ({
44
+ id: String(a.id),
45
+ severity: a.severity || 'info',
46
+ title: a.title || '',
47
+ url: a.url || '',
48
+ recommendation: a.recommendation || '',
49
+ }));
50
+
51
+ const scoped = findings.filter((f) => (levels[f.severity] ?? 0) >= threshold);
52
+ const allow = new Map((allowlist.advisories || []).map((a) => [String(a.id), a]));
53
+ const today = new Date().toISOString().slice(0, 10);
54
+
55
+ const unlisted = [];
56
+ const expired = [];
57
+ const invalid = [];
58
+ const allowed = [];
59
+
60
+ for (const f of scoped) {
61
+ const entry = allow.get(String(f.id));
62
+ if (!entry) {
63
+ unlisted.push(f);
64
+ continue;
65
+ }
66
+
67
+ if (!entry.reason || !entry.expires) {
68
+ invalid.push({
69
+ ...f,
70
+ missingReason: !entry.reason,
71
+ missingExpires: !entry.expires,
72
+ });
73
+ continue;
74
+ }
75
+
76
+ if (entry.expires < today) {
77
+ expired.push({ ...f, expires: entry.expires, reason: entry.reason || '' });
78
+ continue;
79
+ }
80
+
81
+ allowed.push({ ...f, expires: entry.expires || '', reason: entry.reason || '' });
82
+ }
83
+
84
+ if (scoped.length === 0) {
85
+ console.log(`✓ pnpm audit: no ${level}+ vulnerabilities`);
86
+ process.exit(0);
87
+ }
88
+
89
+ if (unlisted.length || expired.length || invalid.length) {
90
+ console.error(
91
+ `✗ pnpm audit: ${unlisted.length} unlisted, ${expired.length} expired, ${invalid.length} invalid allowlist entries`
92
+ );
93
+
94
+ if (unlisted.length) {
95
+ console.error('Unlisted advisories:');
96
+ for (const f of unlisted) {
97
+ console.error(`- ${f.id} ${f.severity} ${f.title}`);
98
+ }
99
+ }
100
+
101
+ if (expired.length) {
102
+ console.error('Expired advisories:');
103
+ for (const f of expired) {
104
+ console.error(`- ${f.id} ${f.severity} ${f.title} (expired ${f.expires})`);
105
+ }
106
+ }
107
+
108
+ if (invalid.length) {
109
+ console.error('Invalid allowlist entries (missing reason/expires):');
110
+ for (const f of invalid) {
111
+ const missing = [];
112
+ if (f.missingReason) missing.push('reason');
113
+ if (f.missingExpires) missing.push('expires');
114
+ console.error(`- ${f.id} ${f.severity} ${f.title} (missing ${missing.join(', ')})`);
115
+ }
116
+ }
117
+
118
+ process.exit(1);
119
+ }
120
+
121
+ console.log(`✓ pnpm audit: ${scoped.length} ${level}+ advisories allowlisted`);
122
+ for (const f of allowed) {
123
+ console.log(`- ${f.id} ${f.severity} ${f.title}`);
124
+ }
125
+ NODE
@@ -0,0 +1,198 @@
1
+ #!/bin/bash
2
+
3
+ # Confidence calibration report: stated confidence vs actual outcomes.
4
+ # Reads _system/artifacts/confidence-calibration.json; prints metrics and optional alert.
5
+ # Usage: calibration-report.sh [--last N] [--json] [--fail-on-threshold X]
6
+ # --last N: show last N decisions in table
7
+ # --json: output metrics as JSON (for dashboard)
8
+ # --fail-on-threshold X: exit 1 if calibration error (MAE) > X (e.g. 0.2)
9
+
10
+ set -euo pipefail
11
+
12
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
13
+ REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
14
+ cd "$REPO_ROOT"
15
+
16
+ # shellcheck source=scripts/lib/common.sh
17
+ . "$SCRIPT_DIR/lib/common.sh"
18
+
19
+ require_cmd jq
20
+
21
+ CALIBRATION_FILE="_system/artifacts/confidence-calibration.json"
22
+ LAST_N=""
23
+ OUTPUT_JSON=false
24
+ FAIL_THRESHOLD=""
25
+
26
+ while [ $# -gt 0 ]; do
27
+ case "$1" in
28
+ --last)
29
+ LAST_N="${2:-}"
30
+ shift 2
31
+ ;;
32
+ --json)
33
+ OUTPUT_JSON=true
34
+ shift
35
+ ;;
36
+ --fail-on-threshold)
37
+ FAIL_THRESHOLD="${2:-}"
38
+ shift 2
39
+ ;;
40
+ *)
41
+ echo "Usage: $0 [--last N] [--json] [--fail-on-threshold X]" >&2
42
+ exit 1
43
+ ;;
44
+ esac
45
+ done
46
+
47
+ if [ ! -f "$CALIBRATION_FILE" ]; then
48
+ echo "No calibration data (missing $CALIBRATION_FILE). Run /verify to record decisions." >&2
49
+ [ "$OUTPUT_JSON" = true ] && echo '{"decisions_count":0,"message":"No calibration file"}' || true
50
+ exit 0
51
+ fi
52
+
53
+ DECISIONS=$(jq -c '.decisions // []' "$CALIBRATION_FILE")
54
+ COUNT=$(echo "$DECISIONS" | jq 'length')
55
+ if [ "$COUNT" -eq 0 ]; then
56
+ echo "No decisions yet. Run /verify to record confidence vs outcomes." >&2
57
+ [ "$OUTPUT_JSON" = true ] && echo "{\"decisions_count\":0,\"message\":\"No decisions\"}" || true
58
+ exit 0
59
+ fi
60
+
61
+ # Decisions with stated_confidence for metrics (exclude null)
62
+ WITH_CONF=$(echo "$DECISIONS" | jq '[.[] | select(.stated_confidence != null)]')
63
+ N_WITH_CONF=$(echo "$WITH_CONF" | jq 'length')
64
+
65
+ if [ "$OUTPUT_JSON" = true ]; then
66
+ # JSON output: metrics for dashboard
67
+ MAE="null"
68
+ BRIER="null"
69
+ OVER_UNDER="null"
70
+ BINS_JSON="[]"
71
+ if [ "$N_WITH_CONF" -gt 0 ]; then
72
+ MAE=$(echo "$WITH_CONF" | jq '
73
+ [.[] | (.stated_confidence - (if .actual_outcome == "success" then 1 else 0 end)) | if . < 0 then -. else . end] | add / length
74
+ ')
75
+ BRIER=$(echo "$WITH_CONF" | jq '
76
+ [.[] | ((.stated_confidence - (if .actual_outcome == "success" then 1 else 0 end)) | . * .)] | add / length
77
+ ')
78
+ AVG_STATED=$(echo "$WITH_CONF" | jq '[.[].stated_confidence] | add / length')
79
+ SUCCESS_RATE=$(echo "$WITH_CONF" | jq '[.[] | if .actual_outcome == "success" then 1 else 0 end] | add / length')
80
+ if [ "$(echo "$AVG_STATED" | jq '. > '"$SUCCESS_RATE"'')" = "true" ]; then
81
+ OVER_UNDER="over-confident"
82
+ elif [ "$(echo "$AVG_STATED" | jq '. < '"$SUCCESS_RATE"'')" = "true" ]; then
83
+ OVER_UNDER="under-confident"
84
+ else
85
+ OVER_UNDER="well-calibrated"
86
+ fi
87
+ BINS_JSON=$(echo "$WITH_CONF" | jq '
88
+ def bin_label(c):
89
+ if c < 0.5 then "0.0-0.5"
90
+ elif c < 0.7 then "0.5-0.7"
91
+ elif c < 0.9 then "0.7-0.9"
92
+ else "0.9-1.0" end;
93
+ [.[] | {bin: bin_label(.stated_confidence), stated: .stated_confidence, success: (if .actual_outcome == "success" then 1 else 0 end)}]
94
+ | group_by(.bin)
95
+ | map({
96
+ bin: .[0].bin,
97
+ total: length,
98
+ successes: (map(.success) | add),
99
+ success_rate: ((map(.success) | add) / length)
100
+ })
101
+ ')
102
+ fi
103
+ jq -n \
104
+ --argjson decisions_count "$COUNT" \
105
+ --argjson with_confidence "$N_WITH_CONF" \
106
+ --argjson mae "$MAE" \
107
+ --argjson brier "$BRIER" \
108
+ --argjson bins "$BINS_JSON" \
109
+ --arg over_under "$OVER_UNDER" \
110
+ '{decisions_count: $decisions_count, with_confidence: $with_confidence, calibration_error_mae: $mae, brier_score: $brier, bins: $bins, over_under: $over_under}'
111
+ exit 0
112
+ fi
113
+
114
+ # Human-readable report
115
+ echo -e "${BLUE}════════════════════════════════════════${NC}"
116
+ echo -e "${BLUE}Confidence Calibration Report${NC}"
117
+ echo -e "${BLUE}════════════════════════════════════════${NC}"
118
+ echo ""
119
+
120
+ if [ "$N_WITH_CONF" -eq 0 ]; then
121
+ echo "No decisions with stated_confidence yet (run /verify after analysis phases that output confidence)."
122
+ echo "Total decisions: $COUNT (outcomes only)."
123
+ echo ""
124
+ echo "Last decisions:"
125
+ echo "$DECISIONS" | jq -r '.[-10:] | .[] | " \(.id) outcome=\(.actual_outcome) stated_confidence=\(.stated_confidence // "n/a")"' 2>/dev/null || true
126
+ exit 0
127
+ fi
128
+
129
+ # Calibration error (MAE)
130
+ MAE=$(echo "$WITH_CONF" | jq '[.[] | (.stated_confidence - (if .actual_outcome == "success" then 1 else 0 end)) | if . < 0 then -. else . end] | add / length')
131
+ BRIER=$(echo "$WITH_CONF" | jq '[.[] | ((.stated_confidence - (if .actual_outcome == "success" then 1 else 0 end)) | . * .)] | add / length')
132
+
133
+ echo -e "${CYAN}Metrics (decisions with stated_confidence: $N_WITH_CONF)${NC}"
134
+ echo " Calibration error (MAE): $MAE"
135
+ echo " Brier score: $BRIER"
136
+ echo ""
137
+
138
+ # Over/under confidence
139
+ AVG_STATED=$(echo "$WITH_CONF" | jq '[.[].stated_confidence] | add / length')
140
+ SUCCESS_RATE=$(echo "$WITH_CONF" | jq '[.[] | if .actual_outcome == "success" then 1 else 0 end] | add / length')
141
+ echo -e "${CYAN}Calibration summary${NC}"
142
+ echo " Avg stated confidence: $AVG_STATED | Actual success rate: $SUCCESS_RATE"
143
+ if [ "$(echo "$AVG_STATED" | jq '. > '"$SUCCESS_RATE"'')" = "true" ]; then
144
+ echo -e " ${YELLOW}→ Over-confident: stated confidence is higher than actual success rate. Consider lowering stated confidence when uncertain.${NC}"
145
+ elif [ "$(echo "$AVG_STATED" | jq '. < '"$SUCCESS_RATE"'')" = "true" ]; then
146
+ echo -e " ${GREEN}→ Under-confident: stated confidence is lower than actual success. You may be more confident when outcomes are good.${NC}"
147
+ else
148
+ echo -e " ${GREEN}→ Well-calibrated.${NC}"
149
+ fi
150
+ echo ""
151
+
152
+ # Bins table
153
+ echo -e "${CYAN}Success rate by confidence bin${NC}"
154
+ printf " %-12s %6s %8s %10s\n" "Bin" "Total" "Success" "Rate"
155
+ echo "$WITH_CONF" | jq -r '
156
+ def bin_label(c):
157
+ if c < 0.5 then "0.0-0.5"
158
+ elif c < 0.7 then "0.5-0.7"
159
+ elif c < 0.9 then "0.7-0.9"
160
+ else "0.9-1.0" end;
161
+ [.[] | {bin: bin_label(.stated_confidence), success: (if .actual_outcome == "success" then 1 else 0 end)}]
162
+ | group_by(.bin)
163
+ | map({bin: .[0].bin, total: length, successes: (map(.success) | add)})
164
+ | sort_by(.bin)
165
+ | .[]
166
+ | " \(.bin) \(.total) \(.successes) \((.successes / .total * 100) | floor / 100)"
167
+ ' 2>/dev/null | while read -r line; do echo "$line"; done
168
+ echo ""
169
+
170
+ # Optional alert: last K with high stated confidence but low success
171
+ ALERT_K=10
172
+ RECENT_HIGH=$(echo "$DECISIONS" | jq '[.[-'"$ALERT_K"':] | .[] | select(.stated_confidence != null and .stated_confidence > 0.8)]')
173
+ N_RECENT_HIGH=$(echo "$RECENT_HIGH" | jq 'length')
174
+ if [ "$N_RECENT_HIGH" -ge 3 ]; then
175
+ RECENT_SUCCESS_RATE=$(echo "$RECENT_HIGH" | jq '[.[] | if .actual_outcome == "success" then 1 else 0 end] | add / length')
176
+ if [ "$(echo "$RECENT_SUCCESS_RATE" | jq '. < 0.5')" = "true" ]; then
177
+ echo -e "${YELLOW}⚠ Possible over-confidence: last $ALERT_K decisions with stated_confidence > 0.8 have success rate < 50%. Review recent decisions.${NC}"
178
+ echo ""
179
+ fi
180
+ fi
181
+
182
+ # Last N decisions table
183
+ if [ -n "$LAST_N" ]; then
184
+ echo -e "${CYAN}Last $LAST_N decisions${NC}"
185
+ printf " %-8s %8s %-8s %s\n" "ID" "Stated" "Outcome" "Notes (truncated)"
186
+ echo "$DECISIONS" | jq -r --argjson n "$LAST_N" '.[-$n:] | .[] | " \(.id) \(.stated_confidence // "n/a") \(.actual_outcome) \(.notes | .[0:50])"' 2>/dev/null | while read -r line; do echo "$line"; done
187
+ echo ""
188
+ fi
189
+
190
+ # Exit 1 if --fail-on-threshold and MAE > threshold
191
+ if [ -n "$FAIL_THRESHOLD" ]; then
192
+ if [ "$(echo "$MAE" | jq '. > '"$FAIL_THRESHOLD"'')" = "true" ]; then
193
+ echo -e "${RED}Calibration error ($MAE) exceeds threshold ($FAIL_THRESHOLD).${NC}" >&2
194
+ exit 1
195
+ fi
196
+ fi
197
+
198
+ exit 0
@@ -0,0 +1,155 @@
1
+ #!/bin/bash
2
+
3
+ # Production Readiness Check Script
4
+ # Validates project is ready for deployment
5
+
6
+ set -euo pipefail
7
+
8
+ error_exit() {
9
+ echo "ERROR: $1" >&2
10
+ exit "${2:-1}"
11
+ }
12
+
13
+ warning() {
14
+ echo "WARNING: $1" >&2
15
+ }
16
+
17
+ # Colors
18
+ RED='\033[0;31m'
19
+ GREEN='\033[0;32m'
20
+ YELLOW='\033[1;33m'
21
+ BLUE='\033[0;34m'
22
+ NC='\033[0m'
23
+
24
+ ENVIRONMENT="${1:-}"
25
+ if [ -z "$ENVIRONMENT" ]; then
26
+ error_exit "Usage: ./scripts/check-readiness.sh <environment>" 1
27
+ fi
28
+
29
+ echo -e "${BLUE}Running readiness checks for: ${ENVIRONMENT}${NC}"
30
+ echo ""
31
+
32
+ FAILED=0
33
+
34
+ # Check 1: Tests pass
35
+ echo -e "${YELLOW}[1/7] Running tests...${NC}"
36
+ if command -v pnpm >/dev/null 2>&1; then
37
+ if pnpm test >/dev/null 2>&1; then
38
+ echo -e "${GREEN}✓ Tests pass${NC}"
39
+ else
40
+ echo -e "${RED}✗ Tests failed${NC}"
41
+ FAILED=1
42
+ fi
43
+ else
44
+ warning "pnpm not found, skipping test check"
45
+ fi
46
+ echo ""
47
+
48
+ # Check 2: Coverage threshold
49
+ echo -e "${YELLOW}[2/7] Checking test coverage...${NC}"
50
+ if [ -f "project.json" ]; then
51
+ COVERAGE_THRESHOLD=$(jq -r '.settings.testCoverageMinimum // 80' project.json 2>/dev/null || echo "80")
52
+ if command -v pnpm >/dev/null 2>&1; then
53
+ # Try to get coverage (simplified check)
54
+ if pnpm test:coverage >/dev/null 2>&1; then
55
+ echo -e "${GREEN}✓ Coverage check passed${NC}"
56
+ else
57
+ warning "Could not verify coverage threshold ($COVERAGE_THRESHOLD%)"
58
+ fi
59
+ fi
60
+ else
61
+ warning "project.json not found, using default threshold (80%)"
62
+ fi
63
+ echo ""
64
+
65
+ # Check 3: Lint and typecheck
66
+ echo -e "${YELLOW}[3/7] Running lint and typecheck...${NC}"
67
+ if command -v pnpm >/dev/null 2>&1; then
68
+ if pnpm lint >/dev/null 2>&1 && pnpm typecheck >/dev/null 2>&1; then
69
+ echo -e "${GREEN}✓ Lint and typecheck pass${NC}"
70
+ else
71
+ echo -e "${RED}✗ Lint or typecheck failed${NC}"
72
+ FAILED=1
73
+ fi
74
+ else
75
+ warning "pnpm not found, skipping lint/typecheck"
76
+ fi
77
+ echo ""
78
+
79
+ # Check 4: Security audit
80
+ echo -e "${YELLOW}[4/7] Running security audit...${NC}"
81
+ if command -v pnpm >/dev/null 2>&1; then
82
+ if [ -f "scripts/audit-check.sh" ]; then
83
+ if ./scripts/audit-check.sh moderate; then
84
+ echo -e "${GREEN}✓ No unlisted moderate+ vulnerabilities${NC}"
85
+ else
86
+ warning "Security audit found unlisted or expired advisories"
87
+ fi
88
+ else
89
+ warning "audit-check.sh not found, skipping allowlist enforcement"
90
+ if pnpm audit --audit-level=moderate >/dev/null 2>&1; then
91
+ echo -e "${GREEN}✓ No moderate/high/critical vulnerabilities${NC}"
92
+ else
93
+ warning "Security audit found issues (review manually)"
94
+ fi
95
+ fi
96
+ else
97
+ warning "pnpm not found, skipping security audit"
98
+ fi
99
+ echo ""
100
+
101
+ # Check 5: Documentation
102
+ echo -e "${YELLOW}[5/7] Checking documentation...${NC}"
103
+ DOCS_OK=1
104
+ if [ ! -f "README.md" ]; then
105
+ echo -e "${RED}✗ README.md missing${NC}"
106
+ DOCS_OK=0
107
+ fi
108
+ if [ ! -f "CHANGELOG.md" ]; then
109
+ warning "CHANGELOG.md missing (recommended)"
110
+ fi
111
+ if [ $DOCS_OK -eq 1 ]; then
112
+ echo -e "${GREEN}✓ Documentation present${NC}"
113
+ fi
114
+ echo ""
115
+
116
+ # Check 6: Drift check
117
+ echo -e "${YELLOW}[6/7] Running drift check...${NC}"
118
+ if [ -f "scripts/drift-check.sh" ]; then
119
+ if ./scripts/drift-check.sh >/dev/null 2>&1; then
120
+ echo -e "${GREEN}✓ Drift check passed${NC}"
121
+ else
122
+ warning "Drift check found issues (review manually)"
123
+ fi
124
+ else
125
+ warning "drift-check.sh not found, skipping"
126
+ fi
127
+ echo ""
128
+
129
+ # Check 7: Invariants
130
+ echo -e "${YELLOW}[7/7] Checking invariants...${NC}"
131
+ if [ -f "_system/architecture/invariants.yml" ]; then
132
+ if [ -f "scripts/validate-project.sh" ]; then
133
+ if ./scripts/validate-project.sh >/dev/null 2>&1; then
134
+ echo -e "${GREEN}✓ Invariants valid${NC}"
135
+ else
136
+ warning "Invariant validation issues (review manually)"
137
+ fi
138
+ else
139
+ echo -e "${GREEN}✓ Invariants file exists${NC}"
140
+ fi
141
+ else
142
+ warning "invariants.yml not found"
143
+ fi
144
+ echo ""
145
+
146
+ # Summary
147
+ echo -e "${BLUE}════════════════════════════════════════${NC}"
148
+ if [ $FAILED -eq 0 ]; then
149
+ echo -e "${GREEN}✓ Production readiness checks PASSED${NC}"
150
+ exit 0
151
+ else
152
+ echo -e "${RED}✗ Production readiness checks FAILED${NC}"
153
+ echo -e "${YELLOW}Fix the issues above before deploying${NC}"
154
+ exit 1
155
+ fi
@@ -0,0 +1,116 @@
1
+ #!/bin/bash
2
+
3
+ # Metrics Collection Script
4
+ # Tracks workflow success rates and time-per-phase metrics
5
+
6
+ set -euo pipefail
7
+
8
+ error_exit() {
9
+ echo "ERROR: $1" >&2
10
+ exit "${2:-1}"
11
+ }
12
+
13
+ # Colors
14
+ GREEN='\033[0;32m'
15
+ YELLOW='\033[1;33m'
16
+ BLUE='\033[0;34m'
17
+ NC='\033[0m'
18
+
19
+ METRICS_FILE="metrics.json"
20
+
21
+ echo -e "${BLUE}Collecting workflow metrics...${NC}"
22
+
23
+ # Initialize metrics if needed
24
+ if [ ! -f "$METRICS_FILE" ]; then
25
+ cat > "$METRICS_FILE" << EOF
26
+ {
27
+ "version": "1.0",
28
+ "created": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
29
+ "workflows": [],
30
+ "summary": {
31
+ "totalWorkflows": 0,
32
+ "successfulWorkflows": 0,
33
+ "failedWorkflows": 0,
34
+ "averagePhaseTime": {},
35
+ "bottlenecks": []
36
+ }
37
+ }
38
+ EOF
39
+ fi
40
+
41
+ # Collect intent metrics
42
+ intent_files=()
43
+ while IFS= read -r file; do
44
+ intent_files+=("$file")
45
+ done < <(find intent -type f -name "*.md" ! -name "_TEMPLATE.md" 2>/dev/null)
46
+
47
+ INTENT_TOTAL=${#intent_files[@]}
48
+ if [ "$INTENT_TOTAL" -gt 0 ]; then
49
+ INTENT_SHIPPED=$(grep -l "Status.*shipped" "${intent_files[@]}" 2>/dev/null | wc -l | tr -d ' ')
50
+ INTENT_FAILED=$(grep -l "Status.*killed" "${intent_files[@]}" 2>/dev/null | wc -l | tr -d ' ')
51
+ else
52
+ INTENT_SHIPPED=0
53
+ INTENT_FAILED=0
54
+ fi
55
+
56
+ # Calculate success rate
57
+ if [ "$INTENT_TOTAL" -gt 0 ]; then
58
+ SUCCESS_RATE=$((INTENT_SHIPPED * 100 / INTENT_TOTAL))
59
+ else
60
+ SUCCESS_RATE=0
61
+ fi
62
+
63
+ # Collect phase completion times (if workflow state files have timestamps)
64
+ collect_phase_times() {
65
+ local intent_id="$1"
66
+ local phases=("01_analysis" "02_plan" "03_implementation" "04_verification" "05_release_notes")
67
+
68
+ for phase in "${phases[@]}"; do
69
+ local file="work/workflow-state/${phase}.md"
70
+ if [ -f "$file" ]; then
71
+ # Extract timestamp if available
72
+ local created=$(grep -i "generated\|created" "$file" | head -1 | grep -o "[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}Z" | head -1 || echo "")
73
+ echo "$phase:$created"
74
+ fi
75
+ done
76
+ }
77
+
78
+ # Generate metrics summary
79
+ cat > "metrics-summary.md" << EOF || error_exit "Failed to generate metrics summary"
80
+ # Workflow Metrics Summary
81
+
82
+ **Generated:** $(date -u +"%Y-%m-%dT%H:%M:%SZ")
83
+
84
+ ## Success Metrics
85
+
86
+ | Metric | Value |
87
+ |--------|-------|
88
+ | **Total Intents** | $INTENT_TOTAL |
89
+ | **Shipped** | $INTENT_SHIPPED |
90
+ | **Failed/Killed** | $INTENT_FAILED |
91
+ | **Success Rate** | $SUCCESS_RATE% |
92
+
93
+ ## Phase Completion
94
+
95
+ [Phase completion times will be tracked here]
96
+
97
+ ## Bottlenecks
98
+
99
+ [Identified bottlenecks will be listed here]
100
+
101
+ ## Recommendations
102
+
103
+ [Recommendations based on metrics]
104
+
105
+ ---
106
+
107
+ *Run \`pnpm collect-metrics\` to update metrics.*
108
+ EOF
109
+
110
+ echo -e "${GREEN}✓ Metrics collected${NC}"
111
+ echo ""
112
+ echo -e "${YELLOW}Summary:${NC}"
113
+ echo " Total Intents: $INTENT_TOTAL"
114
+ echo " Success Rate: $SUCCESS_RATE%"
115
+ echo " Metrics saved to: metrics-summary.md"
116
+ echo ""