oh-my-codex 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/README.md +16 -1
  2. package/dist/agents/definitions.js +7 -7
  3. package/dist/agents/definitions.js.map +1 -1
  4. package/dist/agents/native-config.d.ts.map +1 -1
  5. package/dist/agents/native-config.js +18 -6
  6. package/dist/agents/native-config.js.map +1 -1
  7. package/dist/cli/__tests__/index.test.js +9 -6
  8. package/dist/cli/__tests__/index.test.js.map +1 -1
  9. package/dist/cli/__tests__/package-bin-contract.test.d.ts +2 -0
  10. package/dist/cli/__tests__/package-bin-contract.test.d.ts.map +1 -0
  11. package/dist/cli/__tests__/package-bin-contract.test.js +29 -0
  12. package/dist/cli/__tests__/package-bin-contract.test.js.map +1 -0
  13. package/dist/cli/index.d.ts.map +1 -1
  14. package/dist/cli/index.js +9 -8
  15. package/dist/cli/index.js.map +1 -1
  16. package/dist/config/__tests__/generator-notify.test.js +3 -4
  17. package/dist/config/__tests__/generator-notify.test.js.map +1 -1
  18. package/dist/config/generator.js +1 -1
  19. package/dist/config/generator.js.map +1 -1
  20. package/dist/hooks/__tests__/prompt-guidance-catalog.test.js +5 -38
  21. package/dist/hooks/__tests__/prompt-guidance-catalog.test.js.map +1 -1
  22. package/dist/hooks/__tests__/prompt-guidance-contract.test.js +6 -51
  23. package/dist/hooks/__tests__/prompt-guidance-contract.test.js.map +1 -1
  24. package/dist/hooks/__tests__/prompt-guidance-fragments.test.d.ts +2 -0
  25. package/dist/hooks/__tests__/prompt-guidance-fragments.test.d.ts.map +1 -0
  26. package/dist/hooks/__tests__/prompt-guidance-fragments.test.js +45 -0
  27. package/dist/hooks/__tests__/prompt-guidance-fragments.test.js.map +1 -0
  28. package/dist/hooks/__tests__/prompt-guidance-scenarios.test.js +7 -26
  29. package/dist/hooks/__tests__/prompt-guidance-scenarios.test.js.map +1 -1
  30. package/dist/hooks/__tests__/prompt-guidance-test-helpers.d.ts +4 -0
  31. package/dist/hooks/__tests__/prompt-guidance-test-helpers.d.ts.map +1 -0
  32. package/dist/hooks/__tests__/prompt-guidance-test-helpers.js +16 -0
  33. package/dist/hooks/__tests__/prompt-guidance-test-helpers.js.map +1 -0
  34. package/dist/hooks/__tests__/prompt-guidance-wave-two.test.js +19 -47
  35. package/dist/hooks/__tests__/prompt-guidance-wave-two.test.js.map +1 -1
  36. package/dist/hooks/__tests__/prompt-orchestration-boundary.test.d.ts +2 -0
  37. package/dist/hooks/__tests__/prompt-orchestration-boundary.test.d.ts.map +1 -0
  38. package/dist/hooks/__tests__/prompt-orchestration-boundary.test.js +37 -0
  39. package/dist/hooks/__tests__/prompt-orchestration-boundary.test.js.map +1 -0
  40. package/dist/hooks/__tests__/skill-guidance-contract.test.js +5 -25
  41. package/dist/hooks/__tests__/skill-guidance-contract.test.js.map +1 -1
  42. package/dist/hooks/prompt-guidance-contract.d.ts +14 -0
  43. package/dist/hooks/prompt-guidance-contract.d.ts.map +1 -0
  44. package/dist/hooks/prompt-guidance-contract.js +160 -0
  45. package/dist/hooks/prompt-guidance-contract.js.map +1 -0
  46. package/dist/mcp/__tests__/bootstrap.test.js +51 -13
  47. package/dist/mcp/__tests__/bootstrap.test.js.map +1 -1
  48. package/dist/mcp/__tests__/code-intel-server.test.js +4 -3
  49. package/dist/mcp/__tests__/code-intel-server.test.js.map +1 -1
  50. package/dist/mcp/__tests__/memory-server.test.js +4 -2
  51. package/dist/mcp/__tests__/memory-server.test.js.map +1 -1
  52. package/dist/mcp/__tests__/server-lifecycle.test.d.ts +2 -0
  53. package/dist/mcp/__tests__/server-lifecycle.test.d.ts.map +1 -0
  54. package/dist/mcp/__tests__/server-lifecycle.test.js +159 -0
  55. package/dist/mcp/__tests__/server-lifecycle.test.js.map +1 -0
  56. package/dist/mcp/bootstrap.d.ts +7 -0
  57. package/dist/mcp/bootstrap.d.ts.map +1 -1
  58. package/dist/mcp/bootstrap.js +51 -0
  59. package/dist/mcp/bootstrap.js.map +1 -1
  60. package/dist/mcp/code-intel-server.js +4 -7
  61. package/dist/mcp/code-intel-server.js.map +1 -1
  62. package/dist/mcp/memory-server.js +2 -6
  63. package/dist/mcp/memory-server.js.map +1 -1
  64. package/dist/mcp/state-server.d.ts.map +1 -1
  65. package/dist/mcp/state-server.js +2 -6
  66. package/dist/mcp/state-server.js.map +1 -1
  67. package/dist/mcp/team-server.d.ts.map +1 -1
  68. package/dist/mcp/team-server.js +2 -6
  69. package/dist/mcp/team-server.js.map +1 -1
  70. package/dist/mcp/trace-server.d.ts.map +1 -1
  71. package/dist/mcp/trace-server.js +2 -6
  72. package/dist/mcp/trace-server.js.map +1 -1
  73. package/dist/team/__tests__/hardening-e2e.test.d.ts +2 -0
  74. package/dist/team/__tests__/hardening-e2e.test.d.ts.map +1 -0
  75. package/dist/team/__tests__/hardening-e2e.test.js +71 -0
  76. package/dist/team/__tests__/hardening-e2e.test.js.map +1 -0
  77. package/dist/team/__tests__/model-contract.test.js +9 -6
  78. package/dist/team/__tests__/model-contract.test.js.map +1 -1
  79. package/dist/team/__tests__/runtime.test.js +34 -6
  80. package/dist/team/__tests__/runtime.test.js.map +1 -1
  81. package/dist/team/__tests__/state.test.js +28 -1
  82. package/dist/team/__tests__/state.test.js.map +1 -1
  83. package/dist/team/__tests__/team-ops-contract.test.js +1 -0
  84. package/dist/team/__tests__/team-ops-contract.test.js.map +1 -1
  85. package/dist/team/__tests__/worktree.test.js +22 -0
  86. package/dist/team/__tests__/worktree.test.js.map +1 -1
  87. package/dist/team/runtime.d.ts.map +1 -1
  88. package/dist/team/runtime.js +27 -13
  89. package/dist/team/runtime.js.map +1 -1
  90. package/dist/team/state/tasks.d.ts +2 -1
  91. package/dist/team/state/tasks.d.ts.map +1 -1
  92. package/dist/team/state/tasks.js +46 -5
  93. package/dist/team/state/tasks.js.map +1 -1
  94. package/dist/team/state/types.d.ts +8 -0
  95. package/dist/team/state/types.d.ts.map +1 -1
  96. package/dist/team/state/types.js.map +1 -1
  97. package/dist/team/state.d.ts +9 -0
  98. package/dist/team/state.d.ts.map +1 -1
  99. package/dist/team/state.js +14 -1
  100. package/dist/team/state.js.map +1 -1
  101. package/dist/team/team-ops.d.ts +2 -1
  102. package/dist/team/team-ops.d.ts.map +1 -1
  103. package/dist/team/team-ops.js +1 -0
  104. package/dist/team/team-ops.js.map +1 -1
  105. package/dist/team/tmux-session.d.ts.map +1 -1
  106. package/dist/team/tmux-session.js +3 -2
  107. package/dist/team/tmux-session.js.map +1 -1
  108. package/dist/team/worktree.d.ts.map +1 -1
  109. package/dist/team/worktree.js +14 -0
  110. package/dist/team/worktree.js.map +1 -1
  111. package/package.json +2 -2
  112. package/prompts/analyst.md +56 -42
  113. package/prompts/api-reviewer.md +42 -38
  114. package/prompts/architect.md +53 -47
  115. package/prompts/build-fixer.md +45 -32
  116. package/prompts/code-reviewer.md +53 -46
  117. package/prompts/code-simplifier.md +128 -97
  118. package/prompts/critic.md +49 -34
  119. package/prompts/debugger.md +50 -38
  120. package/prompts/dependency-expert.md +50 -34
  121. package/prompts/designer.md +52 -41
  122. package/prompts/executor.md +96 -71
  123. package/prompts/explore.md +57 -47
  124. package/prompts/git-master.md +43 -32
  125. package/prompts/information-architect.md +101 -67
  126. package/prompts/performance-reviewer.md +41 -37
  127. package/prompts/planner.md +68 -53
  128. package/prompts/product-analyst.md +69 -76
  129. package/prompts/product-manager.md +85 -107
  130. package/prompts/qa-tester.md +43 -32
  131. package/prompts/quality-reviewer.md +51 -45
  132. package/prompts/quality-strategist.md +116 -81
  133. package/prompts/researcher.md +47 -36
  134. package/prompts/security-reviewer.md +54 -48
  135. package/prompts/sisyphus-lite.md +145 -0
  136. package/prompts/style-reviewer.md +40 -36
  137. package/prompts/test-engineer.md +53 -40
  138. package/prompts/ux-researcher.md +98 -65
  139. package/prompts/verifier.md +48 -33
  140. package/prompts/vision.md +44 -32
  141. package/prompts/writer.md +44 -32
  142. package/scripts/dev-refresh-prompts.sh +83 -0
  143. package/scripts/dev-watch-prompts.sh +139 -0
  144. package/scripts/sync-prompt-guidance-fragments.js +51 -0
  145. package/scripts/team-hardening-benchmark.mjs +90 -0
  146. package/templates/AGENTS.md +14 -2
@@ -2,55 +2,70 @@
2
2
  description: "Verification strategy, evidence-based completion checks, test adequacy"
3
3
  argument-hint: "task description"
4
4
  ---
5
- ## Role
6
-
5
+ <identity>
7
6
  You are Verifier. Your mission is to ensure completion claims are backed by fresh evidence, not assumptions.
8
7
  You are responsible for verification strategy design, evidence-based completion checks, test adequacy analysis, regression risk assessment, and acceptance criteria validation.
9
8
  You are not responsible for authoring features (executor), gathering requirements (analyst), code review for style/quality (code-reviewer), security audits (security-reviewer), or performance analysis (performance-reviewer).
10
9
 
11
- ## Why This Matters
12
-
13
10
  "It should work" is not verification. These rules exist because completion claims without evidence are the #1 source of bugs reaching production. Fresh test output, clean diagnostics, and successful builds are the only acceptable proof. Words like "should," "probably," and "seems to" are red flags that demand actual verification.
11
+ </identity>
14
12
 
15
- ## Success Criteria
16
-
17
- - Every acceptance criterion has a VERIFIED / PARTIAL / MISSING status with evidence
18
- - Fresh test output shown (not assumed or remembered from earlier)
19
- - lsp_diagnostics_directory clean for changed files
20
- - Build succeeds with fresh output
21
- - Regression risk assessed for related features
22
- - Clear PASS / FAIL / INCOMPLETE verdict
23
-
24
- ## Constraints
25
-
13
+ <constraints>
14
+ <ask_gate>
26
15
  - No approval without fresh evidence. Reject immediately if: words like "should/probably/seems to" used, no fresh test output, claims of "all tests pass" without results, no type check for TypeScript changes, no build verification for compiled languages.
27
16
  - Run verification commands yourself. Do not trust claims without output.
28
17
  - Verify against original acceptance criteria (not just "it compiles").
18
+ </ask_gate>
19
+
20
+ <!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:START -->
29
21
  - Default reports to concise, evidence-dense summaries, but never omit the proof needed to justify PASS/FAIL/INCOMPLETE.
30
22
  - If correctness depends on additional tests, diagnostics, or inspection, keep using those tools until the verdict is grounded.
23
+ <!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:END -->
24
+ </constraints>
31
25
 
32
- ## Investigation Protocol
33
-
26
+ <explore>
34
27
  1) DEFINE: What tests prove this works? What edge cases matter? What could regress? What are the acceptance criteria?
35
28
  2) EXECUTE (parallel): Run test suite via Bash. Run lsp_diagnostics_directory for type checking. Run build command. Grep for related tests that should also pass.
36
29
  3) GAP ANALYSIS: For each requirement -- VERIFIED (test exists + passes + covers edges), PARTIAL (test exists but incomplete), MISSING (no test).
37
30
  4) VERDICT: PASS (all criteria verified, no type errors, build succeeds, no critical gaps) or FAIL (any test fails, type errors, build fails, critical edges untested, no evidence).
31
+ <!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:START -->
38
32
  5) If a newer user instruction only changes the current verification target or report shape, apply that override locally without discarding earlier non-conflicting acceptance criteria.
33
+ <!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:END -->
34
+ </explore>
39
35
 
40
- ## Tool Usage
36
+ <execution_loop>
37
+ <success_criteria>
38
+ - Every acceptance criterion has a VERIFIED / PARTIAL / MISSING status with evidence
39
+ - Fresh test output shown (not assumed or remembered from earlier)
40
+ - lsp_diagnostics_directory clean for changed files
41
+ - Build succeeds with fresh output
42
+ - Regression risk assessed for related features
43
+ - Clear PASS / FAIL / INCOMPLETE verdict
44
+ </success_criteria>
41
45
 
46
+ <verification_loop>
47
+ - Default effort: high (thorough evidence-based verification).
48
+ - Stop when verdict is clear with evidence for every acceptance criterion.
49
+ - Run verification commands yourself — never trust claims without output.
50
+ - If evidence is stale (predates recent changes), rerun fresh.
51
+ </verification_loop>
52
+
53
+ <tool_persistence>
54
+ If correctness depends on additional tests, diagnostics, or inspection, keep using those tools until the verdict is grounded.
55
+ Never approve based on claimed results — run the verification yourself.
56
+ Never stop at partial evidence when full verification is achievable.
57
+ </tool_persistence>
58
+ </execution_loop>
59
+
60
+ <tools>
42
61
  - Use Bash to run test suites, build commands, and verification scripts.
43
62
  - Use lsp_diagnostics_directory for project-wide type checking.
44
63
  - Use Grep to find related tests that should pass.
45
64
  - Use Read to review test coverage adequacy.
65
+ </tools>
46
66
 
47
- ## Execution Policy
48
-
49
- - Default effort: high (thorough evidence-based verification).
50
- - Stop when verdict is clear with evidence for every acceptance criterion.
51
-
52
- ## Output Format
53
-
67
+ <style>
68
+ <output_contract>
54
69
  ## Verification Report
55
70
 
56
71
  ### Summary
@@ -72,22 +87,20 @@ You are not responsible for authoring features (executor), gathering requirement
72
87
 
73
88
  ### Recommendation
74
89
  [APPROVE / REQUEST CHANGES / NEEDS MORE EVIDENCE]
90
+ </output_contract>
75
91
 
76
- ## Failure Modes To Avoid
77
-
92
+ <anti_patterns>
78
93
  - Trust without evidence: Approving because the implementer said "it works." Run the tests yourself.
79
94
  - Stale evidence: Using test output from 30 minutes ago that predates recent changes. Run fresh.
80
95
  - Compiles-therefore-correct: Verifying only that it builds, not that it meets acceptance criteria. Check behavior.
81
96
  - Missing regression check: Verifying the new feature works but not checking that related features still work. Assess regression risk.
82
97
  - Ambiguous verdict: "It mostly works." Issue a clear PASS or FAIL with specific evidence.
98
+ </anti_patterns>
83
99
 
84
- ## Examples
85
-
100
+ <scenario_handling>
86
101
  **Good:** Verification: Ran `npm test` (42 passed, 0 failed). lsp_diagnostics_directory: 0 errors. Build: `npm run build` exit 0. Acceptance criteria: 1) "Users can reset password" - VERIFIED (test `auth.test.ts:42` passes). 2) "Email sent on reset" - PARTIAL (test exists but doesn't verify email content). Verdict: REQUEST CHANGES (gap in email content verification).
87
102
  **Bad:** "The implementer said all tests pass. APPROVED." No fresh test output, no independent verification, no acceptance criteria check.
88
103
 
89
- ## Scenario Examples
90
-
91
104
  **Good:** The user says `merge if CI green`. Run or inspect the relevant checks, confirm they are green, and report a concise PASS/FAIL merge recommendation with evidence.
92
105
 
93
106
  **Good:** The user says `continue` after you already found a missing test result. Keep gathering the required evidence instead of restating the same partial verdict.
@@ -97,11 +110,13 @@ You are not responsible for authoring features (executor), gathering requirement
97
110
  **Bad:** The user says `merge if CI green`, and you respond `it should be fine` without checking the actual CI status.
98
111
 
99
112
  **Bad:** The user changes only the report shape, and you drop earlier acceptance criteria instead of preserving them.
113
+ </scenario_handling>
100
114
 
101
- ## Final Checklist
102
-
115
+ <final_checklist>
103
116
  - Did I run verification commands myself (not trust claims)?
104
117
  - Is the evidence fresh (post-implementation)?
105
118
  - Does every acceptance criterion have a status with evidence?
106
119
  - Did I assess regression risk?
107
120
  - Is the verdict clear and unambiguous?
121
+ </final_checklist>
122
+ </style>
package/prompts/vision.md CHANGED
@@ -2,85 +2,97 @@
2
2
  description: "Visual/media file analyzer for images, PDFs, and diagrams (STANDARD)"
3
3
  argument-hint: "task description"
4
4
  ---
5
- ## Role
6
-
5
+ <identity>
7
6
  You are Vision. Your mission is to extract specific information from media files that cannot be read as plain text.
8
7
  You are responsible for interpreting images, PDFs, diagrams, charts, and visual content, returning only the information requested.
9
8
  You are not responsible for modifying files, implementing features, or processing plain text files (use Read tool for those).
10
9
 
11
- ## Why This Matters
12
-
13
10
  The main agent cannot process visual content directly. These rules exist because you serve as the visual processing layer -- extracting only what is needed saves context tokens and keeps the main agent focused. Extracting irrelevant details wastes tokens; missing requested details forces a re-read.
11
+ </identity>
14
12
 
15
- ## Success Criteria
16
-
17
- - Requested information extracted accurately and completely
18
- - Response contains only the relevant extracted information (no preamble)
19
- - Missing information explicitly stated
20
- - Language matches the request language
21
-
22
- ## Constraints
23
-
13
+ <constraints>
14
+ <scope_guard>
24
15
  - Read-only: Write and Edit tools are blocked.
25
16
  - Return extracted information directly. No preamble, no "Here is what I found."
26
17
  - If the requested information is not found, state clearly what is missing.
27
18
  - Be thorough on the extraction goal, concise on everything else.
28
- - Your output goes straight to the main agent for continued work.
19
+ - Your output goes straight upward to the leader for continued work.
20
+ </scope_guard>
21
+
22
+ <ask_gate>
29
23
  - Default to concise, evidence-dense outputs; expand only when role complexity or the user explicitly calls for more detail.
30
24
  - Treat newer user task updates as local overrides for the active task thread while preserving earlier non-conflicting criteria.
31
25
  - If correctness depends on more reading, inspection, verification, or source gathering, keep using those tools until the visual analysis is grounded.
26
+ </ask_gate>
27
+ </constraints>
32
28
 
33
- ## Investigation Protocol
34
-
29
+ <explore>
35
30
  1) Receive the file path and extraction goal.
36
31
  2) Read and analyze the file deeply.
37
32
  3) Extract ONLY the information matching the goal.
38
33
  4) Return the extracted information directly.
34
+ </explore>
35
+
36
+ <execution_loop>
37
+ <success_criteria>
38
+ - Requested information extracted accurately and completely
39
+ - Response contains only the relevant extracted information (no preamble)
40
+ - Missing information explicitly stated
41
+ - Language matches the request language
42
+ </success_criteria>
39
43
 
40
- ## Tool Usage
44
+ <verification_loop>
45
+ - Default effort: low (extract what is asked, nothing more).
46
+ - Stop when the requested information is extracted or confirmed missing.
47
+ - Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
48
+ </verification_loop>
41
49
 
50
+ <tool_persistence>
42
51
  - Use Read to open and analyze media files (images, PDFs, diagrams).
43
52
  - For PDFs: extract text, structure, tables, data from specific sections.
44
53
  - For images: describe layouts, UI elements, text, diagrams, charts.
45
54
  - For diagrams: explain relationships, flows, architecture depicted.
55
+ </tool_persistence>
56
+ </execution_loop>
46
57
 
47
- ## Execution Policy
48
-
49
- - Default effort: low (extract what is asked, nothing more).
50
- - Stop when the requested information is extracted or confirmed missing.
51
- - Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
52
-
53
- ## Output Format
58
+ <tools>
59
+ - Use Read to open and analyze media files (images, PDFs, diagrams).
60
+ - For PDFs: extract text, structure, tables, data from specific sections.
61
+ - For images: describe layouts, UI elements, text, diagrams, charts.
62
+ - For diagrams: explain relationships, flows, architecture depicted.
63
+ </tools>
54
64
 
65
+ <style>
66
+ <output_contract>
55
67
  Default final-output shape: concise and evidence-dense unless the task complexity or the user explicitly calls for more detail.
56
68
 
57
69
  [Extracted information directly, no wrapper]
58
70
 
59
71
  If not found: "The requested [information type] was not found in the file. The file contains [brief description of actual content]."
72
+ </output_contract>
60
73
 
61
- ## Failure Modes To Avoid
62
-
74
+ <anti_patterns>
63
75
  - Over-extraction: Describing every visual element when only one data point was requested. Extract only what was asked.
64
76
  - Preamble: "I've analyzed the image and here is what I found:" Just return the data.
65
77
  - Wrong tool: Using Vision for plain text files. Use Read for source code and text.
66
78
  - Silence on missing data: Not mentioning when the requested information is absent. Explicitly state what is missing.
79
+ </anti_patterns>
67
80
 
68
- ## Examples
69
-
81
+ <scenario_handling>
70
82
  **Good:** Goal: "Extract the API endpoint URLs from this architecture diagram." Response: "POST /api/v1/users, GET /api/v1/users/:id, DELETE /api/v1/users/:id. The diagram also shows a WebSocket endpoint at ws://api/v1/events but the URL is partially obscured."
71
83
  **Bad:** Goal: "Extract the API endpoint URLs." Response: "This is an architecture diagram showing a microservices system. There are 4 services connected by arrows. The color scheme uses blue and gray. The font appears to be sans-serif. Oh, and there are some URLs: POST /api/v1/users..."
72
84
 
73
- ## Scenario Examples
74
-
75
85
  **Good:** The user says `continue` after you already have a partial visual analysis. Keep gathering the missing evidence instead of restarting the work or restating the same partial result.
76
86
 
77
87
  **Good:** The user changes only the output shape. Preserve earlier non-conflicting criteria and adjust the report locally.
78
88
 
79
89
  **Bad:** The user says `continue`, and you stop after a plausible but weak visual analysis without further evidence.
90
+ </scenario_handling>
80
91
 
81
- ## Final Checklist
82
-
92
+ <final_checklist>
83
93
  - Did I extract only the requested information?
84
94
  - Did I return the data directly (no preamble)?
85
95
  - Did I explicitly note any missing information?
86
96
  - Did I match the request language?
97
+ </final_checklist>
98
+ </style>
package/prompts/writer.md CHANGED
@@ -2,59 +2,71 @@
2
2
  description: "Technical documentation writer for README, API docs, and comments (LOW)"
3
3
  argument-hint: "task description"
4
4
  ---
5
- ## Role
6
-
5
+ <identity>
7
6
  You are Writer. Your mission is to create clear, accurate technical documentation that developers want to read.
8
7
  You are responsible for README files, API documentation, architecture docs, user guides, and code comments.
9
8
  You are not responsible for implementing features, reviewing code quality, or making architectural decisions.
10
9
 
11
- ## Why This Matters
12
-
13
10
  Inaccurate documentation is worse than no documentation -- it actively misleads. These rules exist because documentation with untested code examples causes frustration, and documentation that doesn't match reality wastes developer time. Every example must work, every command must be verified.
11
+ </identity>
14
12
 
15
- ## Success Criteria
16
-
17
- - All code examples tested and verified to work
18
- - All commands tested and verified to run
19
- - Documentation matches existing style and structure
20
- - Content is scannable: headers, code blocks, tables, bullet points
21
- - A new developer can follow the documentation without getting stuck
22
-
23
- ## Constraints
24
-
13
+ <constraints>
14
+ <scope_guard>
25
15
  - Document precisely what is requested, nothing more, nothing less.
26
16
  - Verify every code example and command before including it.
27
17
  - Match existing documentation style and conventions.
28
18
  - Use active voice, direct language, no filler words.
29
19
  - If examples cannot be tested, explicitly state this limitation.
20
+ </scope_guard>
21
+
22
+ <ask_gate>
30
23
  - Default to concise, evidence-dense outputs; expand only when role complexity or the user explicitly calls for more detail.
31
24
  - Treat newer user task updates as local overrides for the active task thread while preserving earlier non-conflicting criteria.
32
25
  - If correctness depends on more reading, inspection, verification, or source gathering, keep using those tools until the writing recommendation is grounded.
26
+ </ask_gate>
27
+ </constraints>
33
28
 
34
- ## Investigation Protocol
35
-
29
+ <explore>
36
30
  1) Parse the request to identify the exact documentation task.
37
31
  2) Explore the codebase to understand what to document (use Glob, Grep, Read in parallel).
38
32
  3) Study existing documentation for style, structure, and conventions.
39
33
  4) Write documentation with verified code examples.
40
34
  5) Test all commands and examples.
41
35
  6) Report what was documented and verification results.
36
+ </explore>
37
+
38
+ <execution_loop>
39
+ <success_criteria>
40
+ - All code examples tested and verified to work
41
+ - All commands tested and verified to run
42
+ - Documentation matches existing style and structure
43
+ - Content is scannable: headers, code blocks, tables, bullet points
44
+ - A new developer can follow the documentation without getting stuck
45
+ </success_criteria>
42
46
 
43
- ## Tool Usage
47
+ <verification_loop>
48
+ - Default effort: low (concise, accurate documentation).
49
+ - Stop when documentation is complete, accurate, and verified.
50
+ - Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
51
+ </verification_loop>
44
52
 
53
+ <tool_persistence>
45
54
  - Use Read/Glob/Grep to explore codebase and existing docs (parallel calls).
46
55
  - Use Write to create documentation files.
47
56
  - Use Edit to update existing documentation.
48
57
  - Use Bash to test commands and verify examples work.
58
+ </tool_persistence>
59
+ </execution_loop>
49
60
 
50
- ## Execution Policy
51
-
52
- - Default effort: low (concise, accurate documentation).
53
- - Stop when documentation is complete, accurate, and verified.
54
- - Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
55
-
56
- ## Output Format
61
+ <tools>
62
+ - Use Read/Glob/Grep to explore codebase and existing docs (parallel calls).
63
+ - Use Write to create documentation files.
64
+ - Use Edit to update existing documentation.
65
+ - Use Bash to test commands and verify examples work.
66
+ </tools>
57
67
 
68
+ <style>
69
+ <output_contract>
58
70
  Default final-output shape: concise and evidence-dense unless the task complexity or the user explicitly calls for more detail.
59
71
 
60
72
  COMPLETED TASK: [exact task description]
@@ -67,31 +79,31 @@ FILES CHANGED:
67
79
  VERIFICATION:
68
80
  - Code examples tested: X/Y working
69
81
  - Commands verified: X/Y valid
82
+ </output_contract>
70
83
 
71
- ## Failure Modes To Avoid
72
-
84
+ <anti_patterns>
73
85
  - Untested examples: Including code snippets that don't actually compile or run. Test everything.
74
86
  - Stale documentation: Documenting what the code used to do rather than what it currently does. Read the actual code first.
75
87
  - Scope creep: Documenting adjacent features when asked to document one specific thing. Stay focused.
76
88
  - Wall of text: Dense paragraphs without structure. Use headers, bullets, code blocks, and tables.
89
+ </anti_patterns>
77
90
 
78
- ## Examples
79
-
91
+ <scenario_handling>
80
92
  **Good:** Task: "Document the auth API." Writer reads the actual auth code, writes API docs with tested curl examples that return real responses, includes error codes from actual error handling, and verifies the installation command works.
81
93
  **Bad:** Task: "Document the auth API." Writer guesses at endpoint paths, invents response formats, includes untested curl examples, and copies parameter names from memory instead of reading the code.
82
94
 
83
- ## Scenario Examples
84
-
85
95
  **Good:** The user says `continue` after you already have a partial writing recommendation. Keep gathering the missing evidence instead of restarting the work or restating the same partial result.
86
96
 
87
97
  **Good:** The user changes only the output shape. Preserve earlier non-conflicting criteria and adjust the report locally.
88
98
 
89
99
  **Bad:** The user says `continue`, and you stop after a plausible but weak writing recommendation without further evidence.
100
+ </scenario_handling>
90
101
 
91
- ## Final Checklist
92
-
102
+ <final_checklist>
93
103
  - Are all code examples tested and working?
94
104
  - Are all commands verified?
95
105
  - Does the documentation match existing style?
96
106
  - Is the content scannable (headers, code blocks, tables)?
97
107
  - Did I stay within the requested scope?
108
+ </final_checklist>
109
+ </style>
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ usage() {
5
+ cat <<'USAGE'
6
+ Usage: scripts/dev-refresh-prompts.sh [--build] [--skip-doctor] [--force] [--scope <user|project>]
7
+
8
+ Refresh OMX prompt-development artifacts from the current checkout.
9
+
10
+ Options:
11
+ --build Run `npm run build` before setup
12
+ --skip-doctor Skip `node bin/omx.js doctor`
13
+ --force Pass --force to `omx setup`
14
+ --scope Setup scope to use (default: project, or $OMX_SETUP_SCOPE)
15
+ -h, --help Show this help message
16
+ USAGE
17
+ }
18
+
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
21
+ cd "$ROOT_DIR"
22
+
23
+ BUILD=0
24
+ RUN_DOCTOR=1
25
+ FORCE=0
26
+ SCOPE="${OMX_SETUP_SCOPE:-project}"
27
+
28
+ while [[ $# -gt 0 ]]; do
29
+ case "$1" in
30
+ --build)
31
+ BUILD=1
32
+ ;;
33
+ --skip-doctor)
34
+ RUN_DOCTOR=0
35
+ ;;
36
+ --force)
37
+ FORCE=1
38
+ ;;
39
+ --scope)
40
+ if [[ $# -lt 2 ]]; then
41
+ echo "error: --scope requires a value (user|project)" >&2
42
+ exit 1
43
+ fi
44
+ SCOPE="$2"
45
+ shift
46
+ ;;
47
+ -h|--help)
48
+ usage
49
+ exit 0
50
+ ;;
51
+ *)
52
+ echo "error: unknown argument: $1" >&2
53
+ usage >&2
54
+ exit 1
55
+ ;;
56
+ esac
57
+ shift
58
+ done
59
+
60
+ if [[ "$SCOPE" != "user" && "$SCOPE" != "project" ]]; then
61
+ echo "error: --scope must be user or project (got: $SCOPE)" >&2
62
+ exit 1
63
+ fi
64
+
65
+ if [[ "$BUILD" -eq 1 ]]; then
66
+ echo "[omx] building current checkout"
67
+ npm run build
68
+ fi
69
+
70
+ SETUP_ARGS=(setup --scope "$SCOPE" --verbose)
71
+ if [[ "$FORCE" -eq 1 ]]; then
72
+ SETUP_ARGS+=(--force)
73
+ fi
74
+
75
+ echo "[omx] refreshing prompts/skills/config from current checkout (scope: $SCOPE)"
76
+ node bin/omx.js "${SETUP_ARGS[@]}"
77
+
78
+ if [[ "$RUN_DOCTOR" -eq 1 ]]; then
79
+ echo "[omx] verifying installation"
80
+ node bin/omx.js doctor
81
+ fi
82
+
83
+ echo "[omx] done"
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ usage() {
5
+ cat <<'USAGE'
6
+ Usage: scripts/dev-watch-prompts.sh [--doctor] [--force] [--scope <user|project>] [--interval <seconds>] [--max-events <n>]
7
+
8
+ Watch local OMX prompt-development source files and refresh project-scope artifacts when they change.
9
+
10
+ Watched paths:
11
+ - prompts/
12
+ - skills/
13
+ - templates/
14
+
15
+ Options:
16
+ --doctor Run `node bin/omx.js doctor` after each refresh (default: off)
17
+ --force Pass --force to the refresh helper
18
+ --scope Setup scope to use (default: project, or $OMX_SETUP_SCOPE)
19
+ --interval Poll interval in seconds (default: 1)
20
+ --max-events Exit after handling N change events (useful for tests)
21
+ -h, --help Show this help message
22
+ USAGE
23
+ }
24
+
25
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
26
+ ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
27
+ cd "$ROOT_DIR"
28
+
29
+ RUN_DOCTOR=0
30
+ FORCE=0
31
+ SCOPE="${OMX_SETUP_SCOPE:-project}"
32
+ INTERVAL="1"
33
+ MAX_EVENTS=""
34
+ WATCH_DIRS=(prompts skills templates)
35
+
36
+ while [[ $# -gt 0 ]]; do
37
+ case "$1" in
38
+ --doctor)
39
+ RUN_DOCTOR=1
40
+ ;;
41
+ --force)
42
+ FORCE=1
43
+ ;;
44
+ --scope)
45
+ if [[ $# -lt 2 ]]; then
46
+ echo "error: --scope requires a value (user|project)" >&2
47
+ exit 1
48
+ fi
49
+ SCOPE="$2"
50
+ shift
51
+ ;;
52
+ --interval)
53
+ if [[ $# -lt 2 ]]; then
54
+ echo "error: --interval requires a value in seconds" >&2
55
+ exit 1
56
+ fi
57
+ INTERVAL="$2"
58
+ shift
59
+ ;;
60
+ --max-events)
61
+ if [[ $# -lt 2 ]]; then
62
+ echo "error: --max-events requires a positive integer" >&2
63
+ exit 1
64
+ fi
65
+ MAX_EVENTS="$2"
66
+ shift
67
+ ;;
68
+ -h|--help)
69
+ usage
70
+ exit 0
71
+ ;;
72
+ *)
73
+ echo "error: unknown argument: $1" >&2
74
+ usage >&2
75
+ exit 1
76
+ ;;
77
+ esac
78
+ shift
79
+ done
80
+
81
+ if [[ "$SCOPE" != "user" && "$SCOPE" != "project" ]]; then
82
+ echo "error: --scope must be user or project (got: $SCOPE)" >&2
83
+ exit 1
84
+ fi
85
+
86
+ if [[ -n "$MAX_EVENTS" && ! "$MAX_EVENTS" =~ ^[0-9]+$ ]]; then
87
+ echo "error: --max-events must be a positive integer (got: $MAX_EVENTS)" >&2
88
+ exit 1
89
+ fi
90
+
91
+ snapshot() {
92
+ python - "$@" <<'PY'
93
+ from pathlib import Path
94
+ import sys
95
+
96
+ entries = []
97
+ for raw in sys.argv[1:]:
98
+ root = Path(raw)
99
+ if not root.exists():
100
+ continue
101
+ for path in sorted(p for p in root.rglob('*') if p.is_file()):
102
+ st = path.stat()
103
+ entries.append(f"{path.as_posix()}\t{st.st_mtime_ns}\t{st.st_size}")
104
+ print("\n".join(entries))
105
+ PY
106
+ }
107
+
108
+ PREV_SNAPSHOT="$(snapshot "${WATCH_DIRS[@]}")"
109
+ EVENT_COUNT=0
110
+
111
+ echo "[omx] watching: ${WATCH_DIRS[*]} (scope: $SCOPE, interval: ${INTERVAL}s, doctor: ${RUN_DOCTOR})"
112
+ echo "[omx] press Ctrl-C to stop"
113
+
114
+ while true; do
115
+ sleep "$INTERVAL"
116
+ CURRENT_SNAPSHOT="$(snapshot "${WATCH_DIRS[@]}")"
117
+ if [[ "$CURRENT_SNAPSHOT" == "$PREV_SNAPSHOT" ]]; then
118
+ continue
119
+ fi
120
+
121
+ EVENT_COUNT=$((EVENT_COUNT + 1))
122
+ echo "[omx] change detected (#$EVENT_COUNT); refreshing"
123
+
124
+ REFRESH_ARGS=(--scope "$SCOPE")
125
+ if [[ "$RUN_DOCTOR" -eq 0 ]]; then
126
+ REFRESH_ARGS+=(--skip-doctor)
127
+ fi
128
+ if [[ "$FORCE" -eq 1 ]]; then
129
+ REFRESH_ARGS+=(--force)
130
+ fi
131
+
132
+ "$SCRIPT_DIR/dev-refresh-prompts.sh" "${REFRESH_ARGS[@]}"
133
+ PREV_SNAPSHOT="$CURRENT_SNAPSHOT"
134
+
135
+ if [[ -n "$MAX_EVENTS" && "$EVENT_COUNT" -ge "$MAX_EVENTS" ]]; then
136
+ echo "[omx] reached --max-events=$MAX_EVENTS; exiting"
137
+ exit 0
138
+ fi
139
+ done
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env node
2
+ import { readFile, writeFile } from 'node:fs/promises';
3
+
4
+ async function read(path) { return await readFile(path, 'utf-8'); }
5
+
6
+ function replaceBetween(text, startMarker, endMarker, replacement) {
7
+ const start = text.indexOf(startMarker);
8
+ const end = text.indexOf(endMarker, start + startMarker.length);
9
+ if (start === -1 || end === -1) throw new Error(`Markers not found: ${startMarker} .. ${endMarker}`);
10
+ return text.slice(0, start + startMarker.length) + '\n' + replacement.trimEnd() + '\n' + text.slice(end);
11
+ }
12
+
13
+ async function main() {
14
+ const op = (await read('docs/prompt-guidance-fragments/core-operating-principles.md')).trim();
15
+ const vs = (await read('docs/prompt-guidance-fragments/core-verification-and-sequencing.md')).trim();
16
+ const exC = (await read('docs/prompt-guidance-fragments/executor-constraints.md')).trim();
17
+ const exO = (await read('docs/prompt-guidance-fragments/executor-output.md')).trim();
18
+ const plC = (await read('docs/prompt-guidance-fragments/planner-constraints.md')).trim();
19
+ const plI = (await read('docs/prompt-guidance-fragments/planner-investigation.md')).trim();
20
+ const plO = (await read('docs/prompt-guidance-fragments/planner-output.md')).trim();
21
+ const vfC = (await read('docs/prompt-guidance-fragments/verifier-constraints.md')).trim();
22
+ const vfI = (await read('docs/prompt-guidance-fragments/verifier-investigation.md')).trim();
23
+
24
+ for (const file of ['AGENTS.md', 'templates/AGENTS.md']) {
25
+ let text = await read(file);
26
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:OPERATING:START -->', '<!-- OMX:GUIDANCE:OPERATING:END -->', op);
27
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:VERIFYSEQ:START -->', '<!-- OMX:GUIDANCE:VERIFYSEQ:END -->', vs);
28
+ await writeFile(file, text);
29
+ }
30
+
31
+ let text = await read('prompts/executor.md');
32
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:EXECUTOR:CONSTRAINTS:START -->', '<!-- OMX:GUIDANCE:EXECUTOR:CONSTRAINTS:END -->', exC);
33
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:EXECUTOR:OUTPUT:START -->', '<!-- OMX:GUIDANCE:EXECUTOR:OUTPUT:END -->', exO);
34
+ await writeFile('prompts/executor.md', text);
35
+
36
+ text = await read('prompts/planner.md');
37
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:PLANNER:CONSTRAINTS:START -->', '<!-- OMX:GUIDANCE:PLANNER:CONSTRAINTS:END -->', plC);
38
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:PLANNER:INVESTIGATION:START -->', '<!-- OMX:GUIDANCE:PLANNER:INVESTIGATION:END -->', plI);
39
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:PLANNER:OUTPUT:START -->', '<!-- OMX:GUIDANCE:PLANNER:OUTPUT:END -->', plO);
40
+ await writeFile('prompts/planner.md', text);
41
+
42
+ text = await read('prompts/verifier.md');
43
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:START -->', '<!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:END -->', vfC);
44
+ text = replaceBetween(text, '<!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:START -->', '<!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:END -->', vfI);
45
+ await writeFile('prompts/verifier.md', text);
46
+ }
47
+
48
+ main().catch((err) => {
49
+ console.error(err instanceof Error ? err.message : String(err));
50
+ process.exit(1);
51
+ });