codex-genesis-harness 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/.codebase/API_CONTRACTS.md +12 -0
  2. package/.codebase/ARCHITECTURE.md +6 -0
  3. package/.codebase/ARCHITECTURE_REVIEW_COMPLETE.md +216 -0
  4. package/.codebase/CURRENT_STATE.md +8 -0
  5. package/.codebase/DEPENDENCY_GRAPH.md +14 -0
  6. package/.codebase/DOMAIN_MODELS.md +10 -0
  7. package/.codebase/EVOLUTION_PLAN.md +35 -0
  8. package/.codebase/FILE_NAMING_CLARIFICATION.md +161 -0
  9. package/.codebase/HARNESS_COMPLETENESS_AUDIT.md +613 -0
  10. package/.codebase/IMPLEMENTATION_COMPLETE.md +429 -0
  11. package/.codebase/IMPLEMENTATION_HANDOFF.md +351 -0
  12. package/.codebase/IMPROVEMENTS_SUMMARY.md +419 -0
  13. package/.codebase/KNOWN_PROBLEMS.md +6 -0
  14. package/.codebase/MODULE_INDEX.md +13 -0
  15. package/.codebase/PHASE3_SKILLS_NAMING_COMPLETE.md +292 -0
  16. package/.codebase/PHASE_DEPENDENCY_MAP.md +486 -0
  17. package/.codebase/PIPELINE_FLOW.md +14 -0
  18. package/.codebase/QUICK_START_SPEC_IMPACT.md +456 -0
  19. package/.codebase/README.md +139 -0
  20. package/.codebase/RECOVERY_POINTS.md +438 -0
  21. package/.codebase/TEST_MATRIX.md +11 -0
  22. package/.codebase/UI_ROUTES.md +4 -0
  23. package/.codebase/context/backend-summary.md +4 -0
  24. package/.codebase/context/frontend-summary.md +4 -0
  25. package/.codebase/context/pipeline-summary.md +4 -0
  26. package/.codebase/context/providers-summary.md +4 -0
  27. package/.codebase/context/render-summary.md +4 -0
  28. package/.codebase/context/tests-summary.md +4 -0
  29. package/.codex/skills/genesis-ai-provider/SKILL.md +46 -0
  30. package/.codex/skills/genesis-ai-provider/agents/openai.yaml +7 -0
  31. package/.codex/skills/genesis-ai-provider/checklists/checklist.md +9 -0
  32. package/.codex/skills/genesis-ai-provider/examples/example.md +4 -0
  33. package/.codex/skills/genesis-ai-provider/templates/provider-contract-template.md +9 -0
  34. package/.codex/skills/genesis-api-contract/SKILL.md +46 -0
  35. package/.codex/skills/genesis-api-contract/agents/openai.yaml +7 -0
  36. package/.codex/skills/genesis-api-contract/checklists/checklist.md +8 -0
  37. package/.codex/skills/genesis-api-contract/examples/example.md +4 -0
  38. package/.codex/skills/genesis-api-contract/templates/api-contract-template.md +4 -0
  39. package/.codex/skills/genesis-api-sync/SKILL.md +354 -0
  40. package/.codex/skills/genesis-api-sync/agents/openai.yaml +7 -0
  41. package/.codex/skills/genesis-api-sync/checklists/api-sync-checklist.md +101 -0
  42. package/.codex/skills/genesis-api-sync/examples/example.md +68 -0
  43. package/.codex/skills/genesis-api-sync/templates/api-change-template.md +257 -0
  44. package/.codex/skills/genesis-architecture/SKILL.md +46 -0
  45. package/.codex/skills/genesis-architecture/agents/openai.yaml +7 -0
  46. package/.codex/skills/genesis-architecture/checklists/checklist.md +8 -0
  47. package/.codex/skills/genesis-architecture/examples/example.md +4 -0
  48. package/.codex/skills/genesis-architecture/templates/architecture-decision-template.md +14 -0
  49. package/.codex/skills/genesis-codebase-map/SKILL.md +46 -0
  50. package/.codex/skills/genesis-codebase-map/agents/openai.yaml +7 -0
  51. package/.codex/skills/genesis-codebase-map/checklists/checklist.md +7 -0
  52. package/.codex/skills/genesis-codebase-map/examples/example.md +4 -0
  53. package/.codex/skills/genesis-codebase-map/templates/map-update-template.md +8 -0
  54. package/.codex/skills/genesis-debug-guide/SKILL.md +479 -0
  55. package/.codex/skills/genesis-debug-guide/agents/openai.yaml +7 -0
  56. package/.codex/skills/genesis-debug-guide/checklists/flaky-test-investigation.md +339 -0
  57. package/.codex/skills/genesis-debug-guide/checklists/production-bug-debug.md +210 -0
  58. package/.codex/skills/genesis-debug-guide/checklists/test-failure-debug.md +158 -0
  59. package/.codex/skills/genesis-debug-guide/examples/example.md +48 -0
  60. package/.codex/skills/genesis-debug-guide/observability/debug-commands.md +365 -0
  61. package/.codex/skills/genesis-debug-guide/playbooks/unit-test-failures.md +289 -0
  62. package/.codex/skills/genesis-debug-guide/templates/debug-investigation-log.md +288 -0
  63. package/.codex/skills/genesis-design-spec/SKILL.md +46 -0
  64. package/.codex/skills/genesis-design-spec/agents/openai.yaml +7 -0
  65. package/.codex/skills/genesis-design-spec/checklists/checklist.md +7 -0
  66. package/.codex/skills/genesis-design-spec/examples/example.md +4 -0
  67. package/.codex/skills/genesis-design-spec/templates/design-spec-template.md +9 -0
  68. package/.codex/skills/genesis-docs/SKILL.md +46 -0
  69. package/.codex/skills/genesis-docs/agents/openai.yaml +7 -0
  70. package/.codex/skills/genesis-docs/checklists/checklist.md +7 -0
  71. package/.codex/skills/genesis-docs/examples/example.md +4 -0
  72. package/.codex/skills/genesis-docs/templates/docs-update-template.md +8 -0
  73. package/.codex/skills/genesis-docs-automation/SKILL.md +1003 -0
  74. package/.codex/skills/genesis-docs-automation/agents/openai.yaml +7 -0
  75. package/.codex/skills/genesis-docs-automation/checklists/docs-validation.md +359 -0
  76. package/.codex/skills/genesis-docs-automation/checklists/spec-alignment.md +312 -0
  77. package/.codex/skills/genesis-docs-automation/examples/example.md +59 -0
  78. package/.codex/skills/genesis-docs-automation/observability/docs-tracking.md +382 -0
  79. package/.codex/skills/genesis-docs-automation/playbooks/auto-update-flow.md +851 -0
  80. package/.codex/skills/genesis-docs-automation/playbooks/changelog-generation.md +491 -0
  81. package/.codex/skills/genesis-docs-automation/templates/changelog-entry-template.md +187 -0
  82. package/.codex/skills/genesis-docs-automation/templates/handoff-template.md +297 -0
  83. package/.codex/skills/genesis-harness/SKILL.md +1418 -0
  84. package/.codex/skills/genesis-harness/agents/openai.yaml +9 -0
  85. package/.codex/skills/genesis-harness/checklists/bug-fix-qa.md +169 -0
  86. package/.codex/skills/genesis-harness/checklists/checklist.md +10 -0
  87. package/.codex/skills/genesis-harness/checklists/new-feature-qa.md +157 -0
  88. package/.codex/skills/genesis-harness/checklists/refactor-qa.md +216 -0
  89. package/.codex/skills/genesis-harness/checklists/requirements-validation.md +211 -0
  90. package/.codex/skills/genesis-harness/examples/example.md +4 -0
  91. package/.codex/skills/genesis-harness/resources/change-impact-matrix-template.md +204 -0
  92. package/.codex/skills/genesis-harness/resources/foundation-phase-template.md +131 -0
  93. package/.codex/skills/genesis-harness/resources/phase-00-foundation-template.md +76 -0
  94. package/.codex/skills/genesis-harness/resources/post-implementation-guide.md +347 -0
  95. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/check-architecture-boundaries.sh +23 -23
  96. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/check-docs-sync.sh +24 -24
  97. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/check-no-debug-logs.sh +21 -21
  98. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/check-required-planning-files.sh +46 -46
  99. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/check-spec-changelog.sh +24 -24
  100. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/check-task-tracking.sh +25 -25
  101. package/.codex/skills/genesis-harness/scripts/compact-context.sh +54 -0
  102. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/create-adr.sh +74 -74
  103. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/create-bug.sh +160 -160
  104. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/create-feature.sh +217 -217
  105. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/detect-stack.sh +26 -26
  106. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/init-planning.sh +750 -719
  107. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/list-changed-files.sh +12 -12
  108. package/.codex/skills/genesis-harness/scripts/offload-log.sh +72 -0
  109. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/run-verification.sh +47 -47
  110. package/.codex/skills/genesis-harness/scripts/run-verify-loop.sh +75 -0
  111. package/.codex/skills/{project-genesis-harness → genesis-harness}/scripts/update-state.sh +33 -33
  112. package/.codex/skills/genesis-harness/templates/harness-workflow-template.md +10 -0
  113. package/.codex/skills/genesis-harness-engineering/SKILL.md +159 -0
  114. package/.codex/skills/genesis-harness-engineering/agents/openai.yaml +7 -0
  115. package/.codex/skills/genesis-harness-engineering/checklists/checklist.md +48 -0
  116. package/.codex/skills/genesis-harness-engineering/examples/example.md +57 -0
  117. package/.codex/skills/genesis-harness-engineering/playbooks/harness-evolution.md +99 -0
  118. package/.codex/skills/genesis-harness-engineering/templates/harness-change-template.md +37 -0
  119. package/.codex/skills/genesis-new-design/SKILL.md +95 -0
  120. package/.codex/skills/genesis-new-design/agents/openai.yaml +4 -0
  121. package/.codex/skills/genesis-new-design/checklists/checklist.md +8 -0
  122. package/.codex/skills/genesis-new-design/examples/example.md +4 -0
  123. package/.codex/skills/genesis-new-design/templates/new-design-template.md +9 -0
  124. package/.codex/skills/genesis-observability-automation/SKILL.md +382 -0
  125. package/.codex/skills/genesis-observability-automation/agents/openai.yaml +7 -0
  126. package/.codex/skills/genesis-observability-automation/examples/example.md +86 -0
  127. package/.codex/skills/genesis-performance-profiling/SKILL.md +510 -0
  128. package/.codex/skills/genesis-performance-profiling/agents/openai.yaml +6 -0
  129. package/.codex/skills/genesis-performance-profiling/checklists/optimization-verification.md +199 -0
  130. package/.codex/skills/genesis-performance-profiling/checklists/performance-baseline.md +183 -0
  131. package/.codex/skills/genesis-performance-profiling/examples/example.md +234 -0
  132. package/.codex/skills/genesis-performance-profiling/observability/performance-tracking.md +202 -0
  133. package/.codex/skills/genesis-performance-profiling/playbooks/load-testing-orchestration.md +593 -0
  134. package/.codex/skills/genesis-performance-profiling/playbooks/profiling-playbook.md +601 -0
  135. package/.codex/skills/genesis-performance-profiling/templates/load-test-config-template.md +428 -0
  136. package/.codex/skills/genesis-performance-profiling/templates/performance-report-template.md +238 -0
  137. package/.codex/skills/genesis-pipeline-orchestration/SKILL.md +46 -0
  138. package/.codex/skills/genesis-pipeline-orchestration/agents/openai.yaml +7 -0
  139. package/.codex/skills/genesis-pipeline-orchestration/checklists/checklist.md +12 -0
  140. package/.codex/skills/genesis-pipeline-orchestration/examples/example.md +4 -0
  141. package/.codex/skills/genesis-pipeline-orchestration/templates/orchestration-template.md +12 -0
  142. package/.codex/skills/genesis-planning/SKILL.md +46 -0
  143. package/.codex/skills/genesis-planning/agents/openai.yaml +7 -0
  144. package/.codex/skills/genesis-planning/checklists/checklist.md +8 -0
  145. package/.codex/skills/genesis-planning/examples/example.md +4 -0
  146. package/.codex/skills/genesis-planning/templates/plan-template.md +11 -0
  147. package/.codex/skills/genesis-release/SKILL.md +46 -0
  148. package/.codex/skills/genesis-release/agents/openai.yaml +7 -0
  149. package/.codex/skills/genesis-release/checklists/checklist.md +8 -0
  150. package/.codex/skills/genesis-release/examples/example.md +4 -0
  151. package/.codex/skills/genesis-release/templates/release-checklist-template.md +9 -0
  152. package/.codex/skills/genesis-release-orchestration/SKILL.md +653 -0
  153. package/.codex/skills/genesis-release-orchestration/agents/openai.yaml +7 -0
  154. package/.codex/skills/genesis-release-orchestration/checklists/post-deployment-verification.md +274 -0
  155. package/.codex/skills/genesis-release-orchestration/checklists/pre-release-validation.md +220 -0
  156. package/.codex/skills/genesis-release-orchestration/examples/example.md +78 -0
  157. package/.codex/skills/genesis-release-orchestration/observability/release-tracking.md +253 -0
  158. package/.codex/skills/genesis-release-orchestration/playbooks/canary-deployment-orchestration.md +472 -0
  159. package/.codex/skills/genesis-release-orchestration/playbooks/semantic-versioning-automation.md +494 -0
  160. package/.codex/skills/genesis-release-orchestration/templates/deployment-strategy-template.md +303 -0
  161. package/.codex/skills/genesis-release-orchestration/templates/release-runbook-template.md +420 -0
  162. package/.codex/skills/genesis-research/SKILL.md +46 -0
  163. package/.codex/skills/genesis-research/agents/openai.yaml +7 -0
  164. package/.codex/skills/genesis-research/checklists/checklist.md +7 -0
  165. package/.codex/skills/genesis-research/examples/example.md +4 -0
  166. package/.codex/skills/genesis-research/templates/research-note-template.md +9 -0
  167. package/.codex/skills/genesis-research-first/SKILL.md +237 -0
  168. package/.codex/skills/genesis-research-first/agents/openai.yaml +7 -0
  169. package/.codex/skills/genesis-research-first/examples/example.md +85 -0
  170. package/.codex/skills/genesis-spec-propagation/SKILL.md +534 -0
  171. package/.codex/skills/genesis-spec-propagation/agents/openai.yaml +7 -0
  172. package/.codex/skills/genesis-spec-propagation/checklists/phase-update-verification.md +384 -0
  173. package/.codex/skills/genesis-spec-propagation/checklists/spec-change-detection.md +257 -0
  174. package/.codex/skills/genesis-spec-propagation/examples/example.md +63 -0
  175. package/.codex/skills/genesis-spec-propagation/observability/propagation-tracking.md +373 -0
  176. package/.codex/skills/genesis-spec-propagation/playbooks/breaking-change-propagation.md +692 -0
  177. package/.codex/skills/genesis-spec-propagation/playbooks/feature-change-propagation.md +434 -0
  178. package/.codex/skills/genesis-spec-propagation/templates/migration-guide-template.md +407 -0
  179. package/.codex/skills/genesis-upgrade-design/SKILL.md +97 -0
  180. package/.codex/skills/genesis-upgrade-design/agents/openai.yaml +4 -0
  181. package/.codex/skills/genesis-upgrade-design/checklists/checklist.md +8 -0
  182. package/.codex/skills/genesis-upgrade-design/examples/example.md +4 -0
  183. package/.codex/skills/genesis-upgrade-design/templates/upgrade-design-template.md +9 -0
  184. package/.codex/skills/spec-impact-engine/SKILL.md +504 -0
  185. package/.codex/skills/spec-impact-engine/agents/openai.yaml +7 -0
  186. package/.codex/skills/spec-impact-engine/detect-spec-changes.sh +262 -0
  187. package/.codex/skills/spec-impact-engine/examples/example.md +98 -0
  188. package/.codex/skills/spec-impact-engine/templates/impact-report.md +248 -0
  189. package/.codex/skills/spec-impact-engine/templates/migration-guide.md +223 -0
  190. package/.codex/skills/ui-ux-test-skill/SKILL.md +46 -0
  191. package/.codex/skills/ui-ux-test-skill/agents/openai.yaml +7 -0
  192. package/.codex/skills/ui-ux-test-skill/checklists/checklist.md +8 -0
  193. package/.codex/skills/ui-ux-test-skill/examples/example.md +4 -0
  194. package/.codex/skills/ui-ux-test-skill/templates/playwright-test-template.md +4 -0
  195. package/.codex-plugin/plugin.json +13 -11
  196. package/README.EN.md +719 -0
  197. package/README.VI.md +712 -0
  198. package/README.md +318 -181
  199. package/VERSION +1 -1
  200. package/bin/genesis-harness.js +133 -79
  201. package/contracts/agents/CharacterBibleAgent/error.json +1 -0
  202. package/contracts/agents/CharacterBibleAgent/example.json +1 -0
  203. package/contracts/agents/CharacterBibleAgent/request.json +1 -0
  204. package/contracts/agents/CharacterBibleAgent/response.json +1 -0
  205. package/contracts/agents/CharacterBibleAgent/schema.json +1 -0
  206. package/contracts/agents/ImageGenerationAgent/error.json +1 -0
  207. package/contracts/agents/ImageGenerationAgent/example.json +1 -0
  208. package/contracts/agents/ImageGenerationAgent/request.json +1 -0
  209. package/contracts/agents/ImageGenerationAgent/response.json +1 -0
  210. package/contracts/agents/ImageGenerationAgent/schema.json +1 -0
  211. package/contracts/agents/PipelineOrchestrator/error.json +1 -0
  212. package/contracts/agents/PipelineOrchestrator/example.json +1 -0
  213. package/contracts/agents/PipelineOrchestrator/request.json +1 -0
  214. package/contracts/agents/PipelineOrchestrator/response.json +1 -0
  215. package/contracts/agents/PipelineOrchestrator/schema.json +1 -0
  216. package/contracts/agents/PromptGenerationAgent/error.json +1 -0
  217. package/contracts/agents/PromptGenerationAgent/example.json +1 -0
  218. package/contracts/agents/PromptGenerationAgent/request.json +1 -0
  219. package/contracts/agents/PromptGenerationAgent/response.json +1 -0
  220. package/contracts/agents/PromptGenerationAgent/schema.json +1 -0
  221. package/contracts/agents/RenderAgent/error.json +1 -0
  222. package/contracts/agents/RenderAgent/example.json +1 -0
  223. package/contracts/agents/RenderAgent/request.json +1 -0
  224. package/contracts/agents/RenderAgent/response.json +1 -0
  225. package/contracts/agents/RenderAgent/schema.json +1 -0
  226. package/contracts/agents/ScenePlanningAgent/error.json +1 -0
  227. package/contracts/agents/ScenePlanningAgent/example.json +1 -0
  228. package/contracts/agents/ScenePlanningAgent/request.json +1 -0
  229. package/contracts/agents/ScenePlanningAgent/response.json +1 -0
  230. package/contracts/agents/ScenePlanningAgent/schema.json +1 -0
  231. package/contracts/agents/ScreenplayAgent/error.json +1 -0
  232. package/contracts/agents/ScreenplayAgent/example.json +1 -0
  233. package/contracts/agents/ScreenplayAgent/request.json +1 -0
  234. package/contracts/agents/ScreenplayAgent/response.json +1 -0
  235. package/contracts/agents/ScreenplayAgent/schema.json +1 -0
  236. package/contracts/agents/StoryAnalysisAgent/error.json +1 -0
  237. package/contracts/agents/StoryAnalysisAgent/example.json +1 -0
  238. package/contracts/agents/StoryAnalysisAgent/request.json +1 -0
  239. package/contracts/agents/StoryAnalysisAgent/response.json +1 -0
  240. package/contracts/agents/StoryAnalysisAgent/schema.json +1 -0
  241. package/contracts/agents/SubtitleGenerationAgent/error.json +1 -0
  242. package/contracts/agents/SubtitleGenerationAgent/example.json +1 -0
  243. package/contracts/agents/SubtitleGenerationAgent/request.json +1 -0
  244. package/contracts/agents/SubtitleGenerationAgent/response.json +1 -0
  245. package/contracts/agents/SubtitleGenerationAgent/schema.json +1 -0
  246. package/contracts/agents/VoiceGenerationAgent/error.json +1 -0
  247. package/contracts/agents/VoiceGenerationAgent/example.json +1 -0
  248. package/contracts/agents/VoiceGenerationAgent/request.json +1 -0
  249. package/contracts/agents/VoiceGenerationAgent/response.json +1 -0
  250. package/contracts/agents/VoiceGenerationAgent/schema.json +1 -0
  251. package/contracts/api/README.md +4 -0
  252. package/contracts/api/endpoint-template/error.json +7 -0
  253. package/contracts/api/endpoint-template/example.json +11 -0
  254. package/contracts/api/endpoint-template/request.json +7 -0
  255. package/contracts/api/endpoint-template/response.json +6 -0
  256. package/contracts/api/endpoint-template/schema.json +9 -0
  257. package/contracts/events/README.md +4 -0
  258. package/contracts/events/event-template/error.json +4 -0
  259. package/contracts/events/event-template/example.json +6 -0
  260. package/contracts/events/event-template/request.json +6 -0
  261. package/contracts/events/event-template/response.json +3 -0
  262. package/contracts/events/event-template/schema.json +8 -0
  263. package/contracts/ui/README.md +4 -0
  264. package/contracts/ui/screen-template/error.json +4 -0
  265. package/contracts/ui/screen-template/example.json +7 -0
  266. package/contracts/ui/screen-template/request.json +4 -0
  267. package/contracts/ui/screen-template/response.json +4 -0
  268. package/contracts/ui/screen-template/schema.json +8 -0
  269. package/fixtures/agents/agent-fixture-template.md +14 -0
  270. package/fixtures/api/api-fixture-template.md +14 -0
  271. package/fixtures/images/image-fixture-template.md +14 -0
  272. package/fixtures/pipeline/pipeline-fixture-template.md +14 -0
  273. package/fixtures/render/render-fixture-template.md +14 -0
  274. package/fixtures/tts/tts-fixture-template.md +14 -0
  275. package/fixtures/videos/video-fixture-template.md +14 -0
  276. package/observability/agent-runs/agent-run-template.md +22 -0
  277. package/observability/decision-logs/decision-log-template.md +18 -0
  278. package/observability/failures/failure-template.md +18 -0
  279. package/package.json +8 -2
  280. package/playwright/e2e/e2e-template.md +4 -0
  281. package/playwright/fixtures/ui-fixture-template.md +4 -0
  282. package/playwright/smoke/smoke-template.md +4 -0
  283. package/playwright/visual/visual-regression-template.md +4 -0
  284. package/scripts/README.md +342 -0
  285. package/scripts/compact-context.sh +54 -0
  286. package/scripts/detect-changes.sh +152 -0
  287. package/scripts/install.sh +103 -69
  288. package/scripts/offload-log.sh +72 -0
  289. package/scripts/run-evals.sh +118 -52
  290. package/scripts/run-verify-loop.sh +75 -0
  291. package/scripts/uninstall.sh +81 -52
  292. package/scripts/verify.sh +380 -109
  293. package/tests/contracts/contract-template.test.md +10 -0
  294. package/tests/fixtures/fixture-index.md +4 -0
  295. package/tests/integration/integration-template.test.md +4 -0
  296. package/tests/unit/unit-template.test.md +4 -0
  297. package/.codex/skills/project-genesis-harness/SKILL.md +0 -727
  298. package/.codex/skills/project-genesis-harness/agents/openai.yaml +0 -9
  299. /package/.codex/skills/{project-genesis-harness → genesis-harness}/references/planning-schema.md +0 -0
  300. /package/.codex/skills/{project-genesis-harness → genesis-harness}/references/quality-rubric.md +0 -0
  301. /package/.codex/skills/{project-genesis-harness → genesis-harness}/references/research-rubric.md +0 -0
  302. /package/.codex/skills/{project-genesis-harness → genesis-harness}/references/workflows.md +0 -0
  303. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/agents-template.md +0 -0
  304. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/api-docs-template.md +0 -0
  305. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/architecture-template.md +0 -0
  306. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/audit-template.md +0 -0
  307. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/bug-template.md +0 -0
  308. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/check-template.md +0 -0
  309. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/conventions-template.md +0 -0
  310. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/decision-template.md +0 -0
  311. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/design-template.md +0 -0
  312. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/escalation-template.md +0 -0
  313. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/feature-template.md +0 -0
  314. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/integrations-template.md +0 -0
  315. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/journeys-template.md +0 -0
  316. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/lessons-learned-template.md +0 -0
  317. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/observability-template.md +0 -0
  318. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/phase-template.md +0 -0
  319. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/pitfalls-template.md +0 -0
  320. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/planning-tree-template.md +0 -0
  321. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/project-template.md +0 -0
  322. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/quality-score-template.md +0 -0
  323. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/requirements-template.md +0 -0
  324. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/research-template.md +0 -0
  325. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/review-template.md +0 -0
  326. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/spec-changelog-template.md +0 -0
  327. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/stack-template.md +0 -0
  328. /package/.codex/skills/{project-genesis-harness → genesis-harness}/resources/verification-template.md +0 -0
@@ -0,0 +1,99 @@
1
+ # Playbook: Harness Evolution & Verification Scaling
2
+
3
+ This playbook guides the developer or autonomous agent through the process of evolving the **Genesis Codex Harness** itself, ensuring that new skills, scripts, contracts, and lints are integrated without breaking cross-platform execution or causing regression drift.
4
+
5
+ ---
6
+
7
+ ## 🧭 Phase 1: Problem Isolation & Plan Initiation
8
+
9
+ ### 1. Confirm Requirements & Gaps
10
+ Identify the exact capability gap or bug in the harness operating environment.
11
+ - *Example*: Adding a new skill `genesis-performance-profiling` that needs to be validated in `verify.sh` and CLI scripts.
12
+ - *Checklist*: Verify what contracts, fixtures, memory, or scripts are impacted by this change.
13
+
14
+ ### 2. Scaffold on-disk Task Tracking
15
+ Do not keep state in your context window. Write the target tasks to a localized planning folder or `task.md` file:
16
+ ```markdown
17
+ - [ ] Task 1: Create failing verification test
18
+ - [ ] Task 2: Implement minimal harness script change
19
+ - [ ] Task 3: Normalize line-endings and format
20
+ - [ ] Task 4: Execute full regression evals
21
+ - [ ] Task 5: Recalculate metrics and update codebase memory
22
+ ```
23
+
24
+ ---
25
+
26
+ ## 🛠 Phase 2: Red State - Writing the Failing Test First
27
+
28
+ ### 1. Identify the Verification Target
29
+ - If the change affects a **skill structure**, edit `scripts/verify.sh` to add the new skill folder to the `skill_names` array, and specify its verification protocol.
30
+ - If the change affects **CLI installation or paths**, edit `scripts/run-evals.sh` to add assertions targeting the new folders, links, or package outputs.
31
+
32
+ ### 2. Implement the Failing Assertion
33
+ Insert a strict, skeptical check that evaluates the *exact* expected invariant before you make the change.
34
+ *Example in `verify.sh`*:
35
+ ```bash
36
+ verify_one "$skill_root/genesis-performance-profiling" || fail "performance skill verification failed"
37
+ ```
38
+
39
+ ### 3. Run the Suite & Confirm RED
40
+ Run the script to guarantee it fails precisely where expected:
41
+ ```bash
42
+ bash scripts/verify.sh
43
+ ```
44
+ Verify that the output reports the exact failure you planned to resolve, proving the evaluator is skeptical and active.
45
+
46
+ ---
47
+
48
+ ## 💻 Phase 3: Green State - Minimal Atomic Implementation
49
+
50
+ ### 1. Write the Minimal Change
51
+ Implement the new skill, edit the CLI wrapper (`bin/genesis-harness.js`), or adjust the installer script.
52
+ - *Strict Rule*: Focus **only** on the targeted fix. Do not refactor unrelated files or add features outside the task scope.
53
+
54
+ ### 2. Handle Carriage Return Safety (CRLF)
55
+ Windows checkouts frequently introduce carriage returns (`\r\n`) into Bash scripts, resulting in syntax errors near unexpected tokens like `do` or `then`.
56
+ Always run the line-ending normalization pipeline after writing any shell script:
57
+ ```python
58
+ python -c "import os; [open(os.path.join(root, f), 'wb').write(content) for root, dirs, files in os.walk('.') for f in files if f.endswith('.sh') for content in [open(os.path.join(root, f), 'rb').read().replace(b'\r\n', b'\n')]]"
59
+ ```
60
+
61
+ ---
62
+
63
+ ## 🔍 Phase 4: Verification & Refactoring Loop
64
+
65
+ ### 1. Execute All Verification Levels
66
+ Run the entire validation pipeline locally:
67
+ ```bash
68
+ # 1. Structural and metadata compliance
69
+ bash scripts/verify.sh
70
+
71
+ # 2. Package install/uninstall/verify regression checks
72
+ bash scripts/run-evals.sh
73
+
74
+ # 3. Dry-run npm pack check
75
+ npm run pack:check
76
+ ```
77
+
78
+ ### 2. Eliminate Technical Debt
79
+ Review your changes and clean up:
80
+ - Remove all temporary debug statements, console logs, or print dumps.
81
+ - Confirm all new scripts have execution permissions (`chmod +x`).
82
+ - Review git diffs to ensure no accidental whitespace or formatting changes were checked in.
83
+
84
+ ---
85
+
86
+ ## 💾 Phase 5: Memory Synchronization & Handoff
87
+
88
+ ### 1. Document Recovery Checkpoint
89
+ Append a durable restoration checkpoint entry to `.codebase/RECOVERY_POINTS.md` in the following format:
90
+ ```markdown
91
+ ### Checkpoint: <Description of evolution>
92
+ - **Date**: YYYY-MM-DD
93
+ - **Target Files**: `list of changed paths`
94
+ - **Verification Commands**: `bash scripts/verify.sh && bash scripts/run-evals.sh`
95
+ - **Rollback Steps**: `git checkout -- <files>`
96
+ ```
97
+
98
+ ### 2. Recalculate Architecture and Quality Scores
99
+ Review [.codebase/CURRENT_STATE.md](file:///.codebase/CURRENT_STATE.md) and [.codebase/MODULE_INDEX.md](file:///.codebase/MODULE_INDEX.md). Update the total skill count, state descriptions, and confirm compliance before marking your work as 100% complete.
@@ -0,0 +1,37 @@
1
+ # Harness Evolution Proposal: [Evolution Name]
2
+
3
+ ---
4
+
5
+ ## 🔍 Gap Analysis & Justification
6
+ - **Problem/Gap**: Describe the failure scenario, missing structural check, or CLI issue.
7
+ - **Impact**: What is the impact on developer velocity, token usage, or agent reliability?
8
+
9
+ ---
10
+
11
+ ## 🛠 Proposed Structural Changes
12
+ Group files by category and detail exact changes:
13
+
14
+ ### [MODIFY] [verify.sh](file:///scripts/verify.sh)
15
+ - **Change**: Explain the new assertion or mapping.
16
+ - **TDD Hook**: How does it fail first?
17
+
18
+ ### [NEW] [newfile.md](file:///path/to/newfile)
19
+ - **Purpose**: Explain why this file is required and what invariant it enforces.
20
+
21
+ ---
22
+
23
+ ## 🧪 Verification Plan
24
+
25
+ ### Automated Validation
26
+ - [ ] Run `bash scripts/verify.sh`
27
+ - [ ] Run `bash scripts/run-evals.sh`
28
+ - [ ] Run `npm run pack:check`
29
+
30
+ ### Line Ending Normalization
31
+ - [ ] Execute recursive CRLF -> LF Python script to ensure cross-platform safety.
32
+
33
+ ---
34
+
35
+ ## 💾 Resumability & Rollback Plan
36
+ - **Recovery Point**: Where will this be logged in `.codebase/RECOVERY_POINTS.md`?
37
+ - **Git Rollback command**: `git checkout -- <changed files>`
@@ -0,0 +1,95 @@
1
+ ---
2
+ name: genesis-new-design
3
+ description: Create premium frontend web designs and usable first-screen experiences for new websites, web apps, dashboards, tools, and landing pages. Use when Codex is asked to design or build a new frontend UI, create a fresh page or app experience, choose a visual direction for a new product surface, or turn a product brief into implementation-ready React, Next.js, Tailwind, CSS, or HTML.
4
+ ---
5
+
6
+ # Genesis New Design
7
+
8
+ Use this skill for greenfield frontend web design. Build the actual usable experience first, not a placeholder, explanation page, or generic marketing shell unless the user explicitly asks for one.
9
+
10
+ ## Purpose
11
+ Create new frontend web experiences with testable UI contracts, fixtures, visual states, and verification.
12
+
13
+ ## When to use
14
+ Use when building a new web page, app screen, dashboard, tool, landing page, or frontend flow.
15
+
16
+ ## When NOT to use
17
+ Do not use for redesigning existing UI without first preserving behavior; use `genesis-upgrade-design` instead.
18
+
19
+ ## Inputs required
20
+ Product intent, target users, primary workflow, stack details, route or entry point, state list, and visual constraints.
21
+
22
+ ## Outputs required
23
+ Implemented UI, UI contract, fixtures, responsive states, visual verification, and docs or memory updates.
24
+
25
+ ## Required tests
26
+ Create UI load, interaction, validation, API sync, and visual checks where practical before implementation.
27
+
28
+ ## Required fixtures
29
+ Create UI fixtures for default, loading, empty, error, and success states.
30
+
31
+ ## Required contract updates
32
+ Update `contracts/ui/` and API contracts for changed UI/API behavior.
33
+
34
+ ## Required codebase map updates
35
+ Update `.codebase/UI_ROUTES.md`, frontend summary, and test matrix.
36
+
37
+ ## Token saving rules
38
+ Read UI route maps and summaries first; inspect only relevant components, routes, and styles.
39
+
40
+ ## Acceptance criteria
41
+ The UI renders correctly on desktop/mobile, supports expected states, and passes available checks.
42
+
43
+ ## Common mistakes
44
+ Building a marketing page instead of the requested app, using placeholder content, and skipping error/empty states.
45
+
46
+ ## Recovery workflow
47
+ If visual output fails, capture screenshot evidence, update the fixture or contract, then apply the smallest design correction.
48
+
49
+ ## Workflow
50
+
51
+ 1. Inspect the project stack before choosing patterns:
52
+ - Read package/config files and existing app structure.
53
+ - Use installed UI, icon, styling, animation, and routing libraries. Do not add dependencies unless explicitly requested.
54
+ - Match the framework version and styling system already present.
55
+
56
+ 2. Define the design intent from the request:
57
+ - Identify audience, product category, primary task, density, tone, and constraints.
58
+ - Choose one clear visual direction and commit to it across typography, color, spacing, surfaces, iconography, and motion.
59
+ - For tools, dashboards, and operational apps, prioritize scanning, repeated use, compact controls, and predictable navigation over decorative hero layouts.
60
+
61
+ 3. Build complete UI states:
62
+ - Include default, loading, empty, error, hover, active, focus, disabled, and responsive states when the surface supports them.
63
+ - Use real draft copy and plausible data. Do not use lorem ipsum, "John Doe", "Acme", or vague AI copy.
64
+ - Use icons for common actions and tool controls. Avoid emojis in UI code, text, alt text, and labels.
65
+
66
+ 4. Use visual assets intentionally:
67
+ - Websites and branded/product pages need relevant images, screenshots, generated bitmap assets, or concrete product visuals when appropriate.
68
+ - Do not rely on decorative blobs, generic gradients, or meaningless illustrations.
69
+ - Keep images inspectable and useful for the user, not dark, blurred, or purely atmospheric.
70
+
71
+ 5. Verify the result:
72
+ - Run the project checks that prove the UI compiles.
73
+ - Start the local dev server when needed.
74
+ - Capture screenshots for visual work and inspect desktop/mobile layouts for overlap, clipping, unreadable text, blank canvases, and broken assets.
75
+
76
+ ## Design Rules
77
+
78
+ - Use strong hierarchy: clear title scale, restrained supporting copy, and compact labels inside dense UI.
79
+ - Use responsive structure with stable dimensions for fixed-format controls, grids, boards, counters, and toolbars.
80
+ - Prefer CSS Grid for multi-column layouts; avoid fragile percentage math.
81
+ - Use `min-height: 100dvh` instead of `100vh` for viewport-height sections.
82
+ - Animate only `transform` and `opacity`; avoid scroll listeners for visual effects.
83
+ - Keep cards purposeful. Do not put cards inside cards or turn every page section into a floating card.
84
+ - Avoid one-note palettes and the default purple/blue AI gradient look.
85
+ - Keep border radius moderate unless the existing design system requires otherwise.
86
+ - Ensure text never overlaps adjacent content or escapes buttons, cards, sidebars, tabs, or toolbars.
87
+
88
+ ## Anti-Patterns
89
+
90
+ - Do not create a landing page when the request is for an app, game, dashboard, editor, or tool.
91
+ - Do not explain the product's features in visible UI instead of building the feature.
92
+ - Do not use oversized hero typography inside compact app panels.
93
+ - Do not invent new design systems when the repo already has component conventions.
94
+ - Do not introduce new dependencies, fonts, animation libraries, or icon libraries without checking the repo and getting explicit approval.
95
+ - Do not finish without verification evidence for the code and the rendered layout.
@@ -0,0 +1,4 @@
1
+ interface:
2
+ display_name: "Genesis New Design"
3
+ short_description: "Create premium frontend web designs"
4
+ default_prompt: "Use $genesis-new-design to design and build a premium frontend web experience."
@@ -0,0 +1,8 @@
1
+ # New Design Checklist
2
+
3
+ - [ ] Stack inspected.
4
+ - [ ] UI contract written.
5
+ - [ ] Fixtures created.
6
+ - [ ] Responsive states verified.
7
+ - [ ] Visual check completed.
8
+
@@ -0,0 +1,4 @@
1
+ # Example
2
+
3
+ Use `$genesis-new-design` to build a new dashboard first screen with loading, empty, error, and success states.
4
+
@@ -0,0 +1,9 @@
1
+ # New Design Template
2
+
3
+ ## Product Intent
4
+ ## Primary Workflow
5
+ ## UI States
6
+ ## Contracts
7
+ ## Fixtures
8
+ ## Verification
9
+
@@ -0,0 +1,382 @@
1
+ ---
2
+ name: genesis-observability-automation
3
+ description: "Automate observability architecture, monitoring dashboard config, alerting policy generation, health check automation, and incident response runbook creation. Use to instrument services and prepare for production."
4
+ ---
5
+
6
+ # genesis-observability-automation
7
+
8
+ ## Purpose
9
+
10
+ The `genesis-observability-automation` skill automates the full lifecycle of observability for software services. It generates observability architecture diagrams (metrics/logs/traces topology), produces monitoring dashboard configurations for Grafana, Datadog, and CloudWatch, creates SLO-based alerting policies with escalation chains, automates health check configuration (readiness/liveness probes and SLA validation), and generates incident response runbooks for P0/P1/P2/P3 severity triage, resolution, and post-mortem.
11
+
12
+ This skill transforms observability from an afterthought into an engineering discipline. Every phase produces production-ready artifacts that integrate with standard monitoring stacks — no manual dashboard clicking, no ad-hoc alert configuration. Observability is code, version-controlled and reviewed like any other engineering artifact.
13
+
14
+ **Core philosophy**: You cannot operate what you cannot see. You cannot respond to incidents you cannot detect. Observability must be designed before production launch, not retrofitted after the first outage. Every service must expose the three pillars (metrics, logs, traces) before it ships.
15
+
16
+ ---
17
+
18
+ ## When to use
19
+
20
+ Use `genesis-observability-automation` when:
21
+
22
+ - A new service is approaching production and needs observability infrastructure before launch. Run this skill as part of the production readiness checklist.
23
+ - An existing service is suffering repeated incidents due to lack of visibility (team finds out about problems from users, not monitors).
24
+ - You are migrating monitoring stacks (e.g., from custom scripts to Prometheus + Grafana, or from on-prem to Datadog).
25
+ - A post-mortem action item is "we need better monitoring" or "we need runbooks" — this skill produces both.
26
+ - Sprint planning includes observability-related tickets (dashboard, alert, runbook) and you need to generate them efficiently.
27
+ - An SRE or on-call rotation is being established and needs standard runbooks and escalation chains.
28
+ - An audit or compliance review requires documented incident response procedures.
29
+ - You need to validate that an existing service's observability meets a defined maturity level before certifying it as production-ready.
30
+ - A new team member is joining on-call and needs structured runbooks to operate the service safely.
31
+
32
+ ---
33
+
34
+ ## When NOT to use
35
+
36
+ Do NOT use `genesis-observability-automation` when:
37
+
38
+ - The service is a prototype or demo that will never go to production. Observability infrastructure has maintenance cost — do not invest it in throwaway code.
39
+ - You only need a quick manual alert on a single metric. Use the monitoring tool's UI directly for one-off alerts.
40
+ - The service already has mature, well-maintained observability and you only need to add one metric. Add the metric directly rather than regenerating the full architecture.
41
+ - You need to diagnose a currently active incident. Use the existing runbooks and monitoring tools. This skill generates runbooks — it does not replace them during an emergency.
42
+ - The monitoring stack has not been decided yet. Run `genesis-planning` first to select the monitoring stack, then return to this skill.
43
+ - You need network-layer observability (packet capture, flow logs) — this skill covers application-layer observability. Use a dedicated network observability tool (e.g., Wireshark, VPC flow logs) for network-layer issues.
44
+
45
+ ---
46
+
47
+ ## Inputs required
48
+
49
+ Before invoking this skill, gather or confirm the following inputs:
50
+
51
+ ### Service inputs
52
+ - **Service name**: The canonical name of the service (used in all generated config names, e.g., `users-api`).
53
+ - **Service language/runtime**: Node.js, Python, Go, Java (determines which instrumentation libraries to include).
54
+ - **Service type**: REST API, gRPC service, background worker, streaming service, batch job (determines which RED metrics apply).
55
+ - **Service endpoints or operations**: Complete list of endpoints/operations to monitor (with HTTP methods if applicable).
56
+ - **Deployment platform**: Kubernetes, ECS, Lambda, Heroku, bare metal (determines probe types and service discovery config).
57
+
58
+ ### Monitoring stack inputs
59
+ - **Metrics stack**: Prometheus + Grafana | Datadog | CloudWatch | New Relic | None (select one).
60
+ - **Logging stack**: ELK (Elasticsearch + Logstash + Kibana) | Loki + Grafana | Datadog Logs | CloudWatch Logs | None.
61
+ - **Tracing stack**: Jaeger | Zipkin | AWS X-Ray | Datadog APM | OpenTelemetry (select one).
62
+ - **Alerting tool**: PagerDuty | OpsGenie | Slack | VictorOps | Email (escalation chain target).
63
+ - **On-call rotation**: List of team members in the on-call rotation, in order of escalation.
64
+
65
+ ### SLO/SLA inputs
66
+ - **Availability SLO** (e.g., 99.9%): What uptime percentage is required? Determines error budget.
67
+ - **Latency SLO** (e.g., p95 < 200 ms): What response time is acceptable for 95% of requests?
68
+ - **Error rate SLO** (e.g., < 0.1%): What error rate is acceptable?
69
+ - **Throughput minimum** (e.g., ≥ 100 RPS): Minimum throughput for the service to be considered operational.
70
+
71
+ ### Incident response inputs
72
+ - **Escalation chain**: Primary on-call → Secondary on-call → Engineering manager → VP Engineering (with contact info / PagerDuty IDs).
73
+ - **Communication channels**: Incident Slack channel, status page URL, customer communication channel.
74
+ - **Service dependencies**: What external services does this service depend on? (Used in runbook dependency checks.)
75
+ - **Rollback procedure**: How is the service rolled back? (kubectl rollout undo, feature flag, etc.)
76
+ - **Business impact**: What is the customer impact if this service is down? (Used for severity classification.)
77
+
78
+ ---
79
+
80
+ ## Outputs required
81
+
82
+ ### Phase 1 outputs
83
+ - `observability-architecture.md`: Complete observability topology diagram showing metrics, logs, and traces collection paths, storage, and visualization layers.
84
+ - `instrumentation-guide.md`: Service-specific instrumentation instructions (which libraries to add, what to instrument, structured logging format).
85
+
86
+ ### Phase 2 outputs
87
+ - `dashboards/service-overview.json`: Grafana dashboard JSON (or Datadog dashboard JSON) with RED metrics panels.
88
+ - `dashboards/service-details.json`: Detailed drill-down dashboard with per-endpoint latency histograms, error breakdowns, and resource utilization.
89
+ - `dashboards/slo-tracking.json`: SLO/error budget burn rate dashboard.
90
+
91
+ ### Phase 3 outputs
92
+ - `alerts/alert-rules.yml`: Prometheus alerting rules (or Datadog monitor configs) with SLO-based thresholds.
93
+ - `alerts/escalation-chain.yml`: PagerDuty/OpsGenie escalation policy config.
94
+ - `alerts/alert-silence-template.md`: Template for silencing alerts during planned maintenance.
95
+
96
+ ### Phase 4 outputs
97
+ - `health-checks/readiness-probe.yml`: Kubernetes readiness probe configuration.
98
+ - `health-checks/liveness-probe.yml`: Kubernetes liveness probe configuration.
99
+ - `health-checks/health-endpoint-spec.md`: Specification for the `/health`, `/readiness`, and `/metrics` endpoints.
100
+
101
+ ### Phase 5 outputs
102
+ - `runbooks/p0-runbook.md`: Production down — all-hands incident runbook.
103
+ - `runbooks/p1-runbook.md`: Production degraded — on-call incident runbook.
104
+ - `runbooks/p2-runbook.md`: Partial degradation — business hours runbook.
105
+ - `runbooks/post-mortem-template.md`: Blameless post-mortem template with 5-whys.
106
+ - `INCIDENT_LOG.md`: Running log of all incidents (initialize with this skill, append after each incident).
107
+
108
+ ---
109
+
110
+ ## Required tests
111
+
112
+ ### Architecture tests
113
+ - [ ] `test/observability/instrumentation.test.js`: Verifies service exports metrics endpoint at `/metrics` with required metric names (RED metrics + process metrics).
114
+ - [ ] `test/observability/health-endpoint.test.js`: Verifies `/health`, `/readiness`, and `/liveness` endpoints return correct schemas and status codes under normal conditions.
115
+ - [ ] `test/observability/structured-logging.test.js`: Verifies that all log output is valid JSON with required fields (timestamp, level, service, trace_id).
116
+
117
+ ### Dashboard tests
118
+ - [ ] `test/observability/dashboard-schema.test.js`: Validates generated Grafana dashboard JSON against Grafana's schema (all panels have valid datasource refs, correct query syntax).
119
+ - [ ] `test/observability/dashboard-completeness.test.js`: Verifies dashboard has all required panels (rate, errors, duration, saturation).
120
+
121
+ ### Alert tests
122
+ - [ ] `test/observability/alert-rules-valid.test.js`: Validates Prometheus alert rules YAML with `promtool check rules alert-rules.yml`.
123
+ - [ ] `test/observability/alert-threshold-coverage.test.js`: Verifies alert rules cover all required SLO burn rate windows (1h, 6h, 24h, 3d).
124
+
125
+ ### Runbook tests
126
+ - [ ] `test/observability/runbook-completeness.test.js`: Verifies each runbook has all required sections (severity definition, triage steps, escalation, resolution, post-mortem).
127
+
128
+ All tests must pass against fixtures in `fixtures/observability/`.
129
+
130
+ ---
131
+
132
+ ## Required fixtures
133
+
134
+ - `fixtures/observability/monitoring-config-expected.json`: Prometheus scrape config + Grafana dashboard spec with correct structure.
135
+ - `fixtures/observability/alert-policy-expected.json`: SLO-based alert rules with correct threshold calculations.
136
+ - `fixtures/observability/incident-runbook-expected.json`: P1 incident runbook with correct structure and all required sections.
137
+
138
+ ---
139
+
140
+ ## Required contract updates
141
+
142
+ Update the following when this skill's outputs change:
143
+
144
+ - `contracts/observability/dashboard-schema.contract.json`: JSON Schema for generated dashboard configs. Update when new panel types are added.
145
+ - `contracts/observability/alert-rule-schema.contract.json`: JSON Schema for alert rule configs. Update when threshold calculation logic changes.
146
+ - `contracts/observability/health-endpoint.contract.json`: API contract for `/health`, `/readiness`, `/liveness` endpoints. Update when health check format changes.
147
+
148
+ Contract update procedure:
149
+ 1. Bump `version` field.
150
+ 2. Set `changed_at` to current ISO timestamp.
151
+ 3. Add `changelog` entry.
152
+ 4. Re-run fixture tests.
153
+ 5. Notify any consumers of the contract (teams using the generated configs).
154
+
155
+ ---
156
+
157
+ ## Required codebase map updates
158
+
159
+ After completing observability setup:
160
+
161
+ ### `.codebase/CURRENT_STATE.md`
162
+ - Add: `Observability: [service] instrumented [date]. Stack: [Prometheus|Datadog|CloudWatch].`
163
+ - Add: `Runbooks: P0/P1/P2 runbooks generated for [service].`
164
+
165
+ ### `.codebase/MODULE_INDEX.md`
166
+ - Add entries for dashboard JSON files, alert rule files, and runbook files.
167
+ - Add entries for any new health check endpoints added to the service.
168
+
169
+ ### `observability/INCIDENT_LOG.md`
170
+ - Initialize with the service name, observability architecture summary, and `No incidents yet` placeholder.
171
+
172
+ ---
173
+
174
+ ## Token saving rules
175
+
176
+ 1. **Reference existing dashboards**: If a dashboard for this service exists, diff it against requirements — do not regenerate the whole dashboard.
177
+ 2. **Generate only needed runbooks**: Generate runbooks for the severity levels that apply. A simple internal tool only needs P1/P2 runbooks, not a P0 all-hands procedure.
178
+ 3. **Reuse alert templates**: Base new alert rules on the existing template in `templates/alerting-policy-template.md`. Fill in thresholds, do not rewrite the structure.
179
+ 4. **Summarize topology, don't draw ASCII art in prompts**: Reference `observability-architecture.md` by name, not by embedding it in subsequent prompts.
180
+ 5. **Batch all dashboard panels**: Generate all Grafana panels in one pass. Do not loop back to add individual panels.
181
+ 6. **Skip tracing config if no tracing stack selected**: If the team has not adopted distributed tracing, skip Phase 1 tracing topology and Phase 2 trace-based panels.
182
+ 7. **Use compact JSON for dashboard fixtures**: Minify dashboard JSON in fixtures to reduce token consumption in test comparison.
183
+
184
+ ---
185
+
186
+ ## Acceptance criteria
187
+
188
+ Observability setup is COMPLETE and ACCEPTED when ALL of the following are true:
189
+
190
+ ### Instrumentation
191
+ - [ ] Service exports `/metrics` endpoint (Prometheus format) or sends metrics to the configured metrics backend.
192
+ - [ ] All RED metrics are present: `requests_total`, `request_duration_seconds`, `request_errors_total`.
193
+ - [ ] All logs are structured JSON with: `timestamp`, `level`, `service`, `trace_id`, `span_id`, `message`.
194
+ - [ ] Distributed traces are being collected (if tracing stack is configured).
195
+
196
+ ### Dashboards
197
+ - [ ] Service overview dashboard exists and is deployed to the monitoring stack.
198
+ - [ ] Dashboard shows: Rate (RPS), Errors (error rate %), Duration (p50/p95/p99), Saturation (CPU, memory, connection pool).
199
+ - [ ] SLO tracking panel shows current error budget remaining.
200
+ - [ ] Dashboard is linked from the service's README or internal wiki.
201
+
202
+ ### Alerts
203
+ - [ ] SLO burn rate alerts exist for fast burn (1h/6h windows) and slow burn (24h/3d windows).
204
+ - [ ] All alerts have: `severity` label, `runbook_url` annotation, `description` annotation.
205
+ - [ ] Escalation chain is configured in the alerting tool (PagerDuty/OpsGenie).
206
+ - [ ] At least one alert has been test-fired to verify the escalation chain works end-to-end.
207
+
208
+ ### Health checks
209
+ - [ ] `/health` endpoint exists and returns `{"status": "ok"}` with HTTP 200 when healthy.
210
+ - [ ] `/readiness` probe is configured in Kubernetes (or equivalent).
211
+ - [ ] `/liveness` probe is configured with appropriate failure thresholds.
212
+
213
+ ### Runbooks
214
+ - [ ] P1 runbook exists and covers: detection, triage, escalation, resolution, post-mortem.
215
+ - [ ] Runbook is linked in all alert `runbook_url` annotations.
216
+ - [ ] Runbook has been reviewed by the on-call team and is accessible without production access (stored in wiki or repo).
217
+ - [ ] Post-mortem template is ready for use.
218
+
219
+ ---
220
+
221
+ ## Common mistakes
222
+
223
+ ### Mistake 1: Alerting on symptoms without causes
224
+ **Problem**: Alert fires on "CPU > 80%" but CPU being high is a symptom, not a cause. On-call engineer doesn't know what to do.
225
+ **Fix**: Alert on user-facing symptoms (error rate, latency) and provide runbooks that help diagnose the underlying cause. Pair symptom alerts with diagnostic links.
226
+
227
+ ### Mistake 2: Alert fatigue from too many low-quality alerts
228
+ **Problem**: 50 alerts firing every week, most of which are noise. On-call engineers start ignoring alerts ("boy who cried wolf").
229
+ **Fix**: Start with only SLO-based alerts. Achieve < 5 actionable alerts per week. Every alert must have a runbook and a clear action. Regularly review false positive rates.
230
+
231
+ ### Mistake 3: Dashboards without context
232
+ **Problem**: Dashboard shows a graph going up but no reference line to know if that's good or bad.
233
+ **Fix**: Every metric panel must have a reference line or annotation showing the SLA target, the previous week's baseline, or an absolute threshold. "Is this normal?" should be answerable from the dashboard alone.
234
+
235
+ ### Mistake 4: Missing the "long tail" in alerting windows
236
+ **Problem**: Alert only fires when error rate > 5% for 5 minutes. A slow 0.5% burn for 48 hours exhausts the entire monthly error budget without triggering any alert.
237
+ **Fix**: Implement multi-window alerting: fast burn (≥ 2% in 1 hour), medium burn (≥ 5% in 6 hours), slow burn (≥ 10% in 3 days). Cover both fast and slow failures.
238
+
239
+ ### Mistake 5: Runbooks that only the original author can follow
240
+ **Problem**: Runbook says "check the logs" without specifying where, how, or what to look for. New on-call engineer is lost.
241
+ **Fix**: Write runbooks for the most junior person on the rotation. Include exact commands to run, exact queries to execute, and exact thresholds that indicate each diagnosis. Link to the monitoring dashboard directly.
242
+
243
+ ### Mistake 6: Health checks that always return 200
244
+ **Problem**: `/health` endpoint returns HTTP 200 even when the database is unreachable. Kubernetes load balancer continues routing traffic to a broken pod.
245
+ **Fix**: Health check must verify actual service dependencies. `/readiness` should check DB connectivity, cache connectivity, and any critical downstream dependencies. Return 503 if any dependency is unhealthy.
246
+
247
+ ### Mistake 7: Observability only in production
248
+ **Problem**: Monitoring is only set up for production. Issues are invisible in staging and only discovered after production deployment.
249
+ **Fix**: Deploy the same observability stack in staging. Run integration tests against the `/metrics` endpoint. Validate alert rules in staging before production.
250
+
251
+ ### Mistake 8: Missing trace context in logs
252
+ **Problem**: Logs don't include `trace_id` or `span_id`. When investigating an incident, there's no way to correlate a specific user request across microservices.
253
+ **Fix**: Inject `trace_id` and `span_id` into all log lines using OpenTelemetry or manual propagation. This is the #1 enabler of fast incident resolution in distributed systems.
254
+
255
+ ---
256
+
257
+ ## Recovery workflow
258
+
259
+ ### Recovery 1: Metrics not appearing in dashboard
260
+ ```
261
+ Symptom: Dashboard shows "No data" for all panels.
262
+ Step 1: Verify service is running: kubectl get pods -n [namespace]
263
+ Step 2: Check /metrics endpoint directly: curl http://service-host:9090/metrics | grep http_requests_total
264
+ Step 3: Check Prometheus scrape config: kubectl describe servicemonitor [name] -n monitoring
265
+ Step 4: Check Prometheus targets: open Prometheus UI → Status → Targets → look for service in targets list
266
+ Step 5: If service is in targets but metrics missing: check instrumentation code (is metrics library initialized before first request?)
267
+ Step 6: If service is NOT in targets: check Prometheus scrape config selector labels match service labels
268
+ ```
269
+
270
+ ### Recovery 2: Alert not firing when it should
271
+ ```
272
+ Symptom: Error rate is clearly high but no alert fired.
273
+ Step 1: Check Prometheus alert rule with: promtool check rules alert-rules.yml
274
+ Step 2: Evaluate the alert expression manually in Prometheus: Prometheus UI → Graph → paste alert expression
275
+ Step 3: Check alert state: Prometheus UI → Alerts → find the alert rule → check if it's in "Inactive" state
276
+ Step 4: If alert is firing but not notifying: check Alertmanager config and routing rules
277
+ Step 5: Check Alertmanager status: kubectl exec -n monitoring alertmanager-pod -- amtool alert
278
+ Step 6: Test escalation manually: send a test alert through PagerDuty/OpsGenie UI
279
+ ```
280
+
281
+ ### Recovery 3: Health check causing false pod restarts
282
+ ```
283
+ Symptom: Pods are being killed by liveness probe even though service is working.
284
+ Step 1: Check liveness probe config: kubectl describe pod [pod-name] | grep -A 10 Liveness
285
+ Step 2: Check if probe timeout is too short (default is 1s — increase if health check queries DB)
286
+ Step 3: Check if failure threshold is too low (default is 3 consecutive failures — may be too aggressive)
287
+ Step 4: Check /health endpoint response time under load: does it exceed liveness probe timeout?
288
+ Step 5: Fix: increase timeoutSeconds, increase failureThreshold, or optimize the health check endpoint
289
+ Step 6: Recommended safe config: initialDelaySeconds: 30, timeoutSeconds: 5, failureThreshold: 5
290
+ ```
291
+
292
+ ### Recovery 4: Runbook is wrong or out of date
293
+ ```
294
+ Symptom: On-call engineer followed runbook but steps don't work or are inaccurate.
295
+ Step 1: Immediately annotate the incorrect step with [OUTDATED: <brief note>] so next person is warned.
296
+ Step 2: After incident is resolved, open a PR to correct the runbook.
297
+ Step 3: Re-run the corrected runbook steps in a test environment to verify they work.
298
+ Step 4: Add incident to post-mortem action items: "Update runbook [name] step [N]."
299
+ Step 5: Assign runbook review to the on-call engineer who caught the error.
300
+ ```
301
+
302
+ ---
303
+
304
+ ## Workflow Detail: Phase-by-Phase Execution
305
+
306
+ ### Phase 1: Observability Architecture Generation
307
+
308
+ **Goal**: Design and document the complete observability topology before writing any configuration.
309
+
310
+ **Architecture components to define:**
311
+
312
+ | Pillar | Component | Purpose |
313
+ |--------|-----------|---------|
314
+ | Metrics | Prometheus / Datadog Agent | Scrape and store numeric time-series |
315
+ | Metrics | Grafana / Datadog Dashboards | Visualize and alert on metrics |
316
+ | Logs | Structured logging library | Produce machine-readable log events |
317
+ | Logs | Log aggregator (Loki/ELK/CloudWatch) | Collect and index logs |
318
+ | Logs | Kibana/Grafana/Datadog | Search and visualize logs |
319
+ | Traces | OpenTelemetry SDK | Instrument service for tracing |
320
+ | Traces | Jaeger/Zipkin/Datadog APM | Collect and visualize traces |
321
+
322
+ **Service instrumentation requirements (by language):**
323
+ - Node.js: `prom-client` (metrics), `winston` or `pino` (structured logs), `@opentelemetry/sdk-node` (traces).
324
+ - Python: `prometheus_client` (metrics), `structlog` or `python-json-logger` (logs), `opentelemetry-sdk` (traces).
325
+ - Go: `prometheus/client_golang` (metrics), `zap` or `logrus` (logs), `go.opentelemetry.io/otel` (traces).
326
+
327
+ ### Phase 2: Dashboard Generation
328
+
329
+ **Required panels for every service dashboard:**
330
+
331
+ RED metrics (the minimum viable dashboard for any service):
332
+ - **Rate**: Requests per second (total and per endpoint).
333
+ - **Errors**: Error rate percentage (4xx and 5xx separately).
334
+ - **Duration**: Response time as a histogram with p50, p95, p99 lines.
335
+
336
+ SATURATION metrics (resource utilization):
337
+ - **CPU**: Process CPU utilization %.
338
+ - **Memory**: Heap and RSS memory.
339
+ - **Connection pool**: Active connections vs. pool limit.
340
+ - **Queue depth**: (For background workers) — job queue length.
341
+
342
+ See `templates/monitoring-dashboard-template.md` for complete Grafana JSON scaffold.
343
+
344
+ ### Phase 3: Alerting Policy Generation
345
+
346
+ **SLO-based alert threshold calculation:**
347
+
348
+ For a 99.9% availability SLO (monthly error budget = 43.8 minutes):
349
+
350
+ ```
351
+ Fast burn alert (1h window):
352
+ Threshold: error_rate > 2% for 1 hour
353
+ Reason: 2% error rate burns 2% of monthly budget per hour = exhausted in 50 hours
354
+ Action: Page on-call immediately (P1)
355
+
356
+ Medium burn alert (6h window):
357
+ Threshold: error_rate > 0.5% for 6 hours
358
+ Reason: 0.5% × 6h = 3% of monthly budget consumed
359
+ Action: Page on-call (P2 — business hours response acceptable)
360
+
361
+ Slow burn alert (3d window):
362
+ Threshold: error_rate > 0.1% for 72 hours
363
+ Reason: 0.1% × 72h = 7.2% of monthly budget consumed silently
364
+ Action: Slack notification + ticket creation (investigate next sprint)
365
+ ```
366
+
367
+ See `templates/alerting-policy-template.md` for complete Prometheus alerting rules.
368
+
369
+ ### Phase 4: Health Check Automation
370
+
371
+ **Standard health endpoint specification:**
372
+ - `GET /health` → 200 OK always (used by load balancers for basic routing).
373
+ - `GET /readiness` → 200 if all dependencies healthy, 503 if any dependency unhealthy.
374
+ - `GET /liveness` → 200 if process is alive and event loop is not stuck, 503 if deadlocked.
375
+ - `GET /metrics` → Prometheus text format metrics.
376
+
377
+ ### Phase 5: Incident Response Runbook Generation
378
+
379
+ **Runbook structure requirements:**
380
+ Every runbook must have: Severity definition, Detection signals, Triage steps (ordered, with commands), Escalation triggers, Resolution steps, Rollback procedure, Communication templates, Post-mortem checklist.
381
+
382
+ See `playbooks/incident-triage-playbook.md` for complete P0/P1/P2/P3 runbooks.
@@ -0,0 +1,7 @@
1
+ interface:
2
+ display_name: "Observability Automation Skill"
3
+ short_description: "Automated observability setup, monitoring configuration, and incident response workflows"
4
+ default_prompt: "Use $genesis-observability-automation to set up monitoring, alerting, and observability pipelines for this service."
5
+
6
+ policy:
7
+ allow_implicit_invocation: true