npm - oh-my-codex - Versions diffs - 0.8.6 → 0.8.7 - Mend

oh-my-codex 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

package/README.md +16 -1
package/dist/agents/definitions.js +7 -7
package/dist/agents/definitions.js.map +1 -1
package/dist/agents/native-config.d.ts.map +1 -1
package/dist/agents/native-config.js +18 -6
package/dist/agents/native-config.js.map +1 -1
package/dist/cli/__tests__/index.test.js +9 -6
package/dist/cli/__tests__/index.test.js.map +1 -1
package/dist/cli/__tests__/package-bin-contract.test.d.ts +2 -0
package/dist/cli/__tests__/package-bin-contract.test.d.ts.map +1 -0
package/dist/cli/__tests__/package-bin-contract.test.js +29 -0
package/dist/cli/__tests__/package-bin-contract.test.js.map +1 -0
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/index.js +9 -8
package/dist/cli/index.js.map +1 -1
package/dist/config/__tests__/generator-notify.test.js +3 -4
package/dist/config/__tests__/generator-notify.test.js.map +1 -1
package/dist/config/generator.js +1 -1
package/dist/config/generator.js.map +1 -1
package/dist/hooks/__tests__/prompt-guidance-catalog.test.js +5 -38
package/dist/hooks/__tests__/prompt-guidance-catalog.test.js.map +1 -1
package/dist/hooks/__tests__/prompt-guidance-contract.test.js +6 -51
package/dist/hooks/__tests__/prompt-guidance-contract.test.js.map +1 -1
package/dist/hooks/__tests__/prompt-guidance-fragments.test.d.ts +2 -0
package/dist/hooks/__tests__/prompt-guidance-fragments.test.d.ts.map +1 -0
package/dist/hooks/__tests__/prompt-guidance-fragments.test.js +45 -0
package/dist/hooks/__tests__/prompt-guidance-fragments.test.js.map +1 -0
package/dist/hooks/__tests__/prompt-guidance-scenarios.test.js +7 -26
package/dist/hooks/__tests__/prompt-guidance-scenarios.test.js.map +1 -1
package/dist/hooks/__tests__/prompt-guidance-test-helpers.d.ts +4 -0
package/dist/hooks/__tests__/prompt-guidance-test-helpers.d.ts.map +1 -0
package/dist/hooks/__tests__/prompt-guidance-test-helpers.js +16 -0
package/dist/hooks/__tests__/prompt-guidance-test-helpers.js.map +1 -0
package/dist/hooks/__tests__/prompt-guidance-wave-two.test.js +19 -47
package/dist/hooks/__tests__/prompt-guidance-wave-two.test.js.map +1 -1
package/dist/hooks/__tests__/prompt-orchestration-boundary.test.d.ts +2 -0
package/dist/hooks/__tests__/prompt-orchestration-boundary.test.d.ts.map +1 -0
package/dist/hooks/__tests__/prompt-orchestration-boundary.test.js +37 -0
package/dist/hooks/__tests__/prompt-orchestration-boundary.test.js.map +1 -0
package/dist/hooks/__tests__/skill-guidance-contract.test.js +5 -25
package/dist/hooks/__tests__/skill-guidance-contract.test.js.map +1 -1
package/dist/hooks/prompt-guidance-contract.d.ts +14 -0
package/dist/hooks/prompt-guidance-contract.d.ts.map +1 -0
package/dist/hooks/prompt-guidance-contract.js +160 -0
package/dist/hooks/prompt-guidance-contract.js.map +1 -0
package/dist/mcp/__tests__/bootstrap.test.js +51 -13
package/dist/mcp/__tests__/bootstrap.test.js.map +1 -1
package/dist/mcp/__tests__/code-intel-server.test.js +4 -3
package/dist/mcp/__tests__/code-intel-server.test.js.map +1 -1
package/dist/mcp/__tests__/memory-server.test.js +4 -2
package/dist/mcp/__tests__/memory-server.test.js.map +1 -1
package/dist/mcp/__tests__/server-lifecycle.test.d.ts +2 -0
package/dist/mcp/__tests__/server-lifecycle.test.d.ts.map +1 -0
package/dist/mcp/__tests__/server-lifecycle.test.js +159 -0
package/dist/mcp/__tests__/server-lifecycle.test.js.map +1 -0
package/dist/mcp/bootstrap.d.ts +7 -0
package/dist/mcp/bootstrap.d.ts.map +1 -1
package/dist/mcp/bootstrap.js +51 -0
package/dist/mcp/bootstrap.js.map +1 -1
package/dist/mcp/code-intel-server.js +4 -7
package/dist/mcp/code-intel-server.js.map +1 -1
package/dist/mcp/memory-server.js +2 -6
package/dist/mcp/memory-server.js.map +1 -1
package/dist/mcp/state-server.d.ts.map +1 -1
package/dist/mcp/state-server.js +2 -6
package/dist/mcp/state-server.js.map +1 -1
package/dist/mcp/team-server.d.ts.map +1 -1
package/dist/mcp/team-server.js +2 -6
package/dist/mcp/team-server.js.map +1 -1
package/dist/mcp/trace-server.d.ts.map +1 -1
package/dist/mcp/trace-server.js +2 -6
package/dist/mcp/trace-server.js.map +1 -1
package/dist/team/__tests__/hardening-e2e.test.d.ts +2 -0
package/dist/team/__tests__/hardening-e2e.test.d.ts.map +1 -0
package/dist/team/__tests__/hardening-e2e.test.js +71 -0
package/dist/team/__tests__/hardening-e2e.test.js.map +1 -0
package/dist/team/__tests__/model-contract.test.js +9 -6
package/dist/team/__tests__/model-contract.test.js.map +1 -1
package/dist/team/__tests__/runtime.test.js +34 -6
package/dist/team/__tests__/runtime.test.js.map +1 -1
package/dist/team/__tests__/state.test.js +28 -1
package/dist/team/__tests__/state.test.js.map +1 -1
package/dist/team/__tests__/team-ops-contract.test.js +1 -0
package/dist/team/__tests__/team-ops-contract.test.js.map +1 -1
package/dist/team/__tests__/worktree.test.js +22 -0
package/dist/team/__tests__/worktree.test.js.map +1 -1
package/dist/team/runtime.d.ts.map +1 -1
package/dist/team/runtime.js +27 -13
package/dist/team/runtime.js.map +1 -1
package/dist/team/state/tasks.d.ts +2 -1
package/dist/team/state/tasks.d.ts.map +1 -1
package/dist/team/state/tasks.js +46 -5
package/dist/team/state/tasks.js.map +1 -1
package/dist/team/state/types.d.ts +8 -0
package/dist/team/state/types.d.ts.map +1 -1
package/dist/team/state/types.js.map +1 -1
package/dist/team/state.d.ts +9 -0
package/dist/team/state.d.ts.map +1 -1
package/dist/team/state.js +14 -1
package/dist/team/state.js.map +1 -1
package/dist/team/team-ops.d.ts +2 -1
package/dist/team/team-ops.d.ts.map +1 -1
package/dist/team/team-ops.js +1 -0
package/dist/team/team-ops.js.map +1 -1
package/dist/team/tmux-session.d.ts.map +1 -1
package/dist/team/tmux-session.js +3 -2
package/dist/team/tmux-session.js.map +1 -1
package/dist/team/worktree.d.ts.map +1 -1
package/dist/team/worktree.js +14 -0
package/dist/team/worktree.js.map +1 -1
package/package.json +2 -2
package/prompts/analyst.md +56 -42
package/prompts/api-reviewer.md +42 -38
package/prompts/architect.md +53 -47
package/prompts/build-fixer.md +45 -32
package/prompts/code-reviewer.md +53 -46
package/prompts/code-simplifier.md +128 -97
package/prompts/critic.md +49 -34
package/prompts/debugger.md +50 -38
package/prompts/dependency-expert.md +50 -34
package/prompts/designer.md +52 -41
package/prompts/executor.md +96 -71
package/prompts/explore.md +57 -47
package/prompts/git-master.md +43 -32
package/prompts/information-architect.md +101 -67
package/prompts/performance-reviewer.md +41 -37
package/prompts/planner.md +68 -53
package/prompts/product-analyst.md +69 -76
package/prompts/product-manager.md +85 -107
package/prompts/qa-tester.md +43 -32
package/prompts/quality-reviewer.md +51 -45
package/prompts/quality-strategist.md +116 -81
package/prompts/researcher.md +47 -36
package/prompts/security-reviewer.md +54 -48
package/prompts/sisyphus-lite.md +145 -0
package/prompts/style-reviewer.md +40 -36
package/prompts/test-engineer.md +53 -40
package/prompts/ux-researcher.md +98 -65
package/prompts/verifier.md +48 -33
package/prompts/vision.md +44 -32
package/prompts/writer.md +44 -32
package/scripts/dev-refresh-prompts.sh +83 -0
package/scripts/dev-watch-prompts.sh +139 -0
package/scripts/sync-prompt-guidance-fragments.js +51 -0
package/scripts/team-hardening-benchmark.mjs +90 -0
package/templates/AGENTS.md +14 -2

package/prompts/verifier.md CHANGED Viewed

@@ -2,55 +2,70 @@
 description: "Verification strategy, evidence-based completion checks, test adequacy"
 argument-hint: "task description"
 ---
-## Role
+<identity>
 You are Verifier. Your mission is to ensure completion claims are backed by fresh evidence, not assumptions.
 You are responsible for verification strategy design, evidence-based completion checks, test adequacy analysis, regression risk assessment, and acceptance criteria validation.
 You are not responsible for authoring features (executor), gathering requirements (analyst), code review for style/quality (code-reviewer), security audits (security-reviewer), or performance analysis (performance-reviewer).
-## Why This Matters
 "It should work" is not verification. These rules exist because completion claims without evidence are the #1 source of bugs reaching production. Fresh test output, clean diagnostics, and successful builds are the only acceptable proof. Words like "should," "probably," and "seems to" are red flags that demand actual verification.
+</identity>
-## Success Criteria
-- Every acceptance criterion has a VERIFIED / PARTIAL / MISSING status with evidence
-- Fresh test output shown (not assumed or remembered from earlier)
-- lsp_diagnostics_directory clean for changed files
-- Build succeeds with fresh output
-- Regression risk assessed for related features
-- Clear PASS / FAIL / INCOMPLETE verdict
-## Constraints
+<constraints>
+<ask_gate>
 - No approval without fresh evidence. Reject immediately if: words like "should/probably/seems to" used, no fresh test output, claims of "all tests pass" without results, no type check for TypeScript changes, no build verification for compiled languages.
 - Run verification commands yourself. Do not trust claims without output.
 - Verify against original acceptance criteria (not just "it compiles").
+</ask_gate>
+<!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:START -->
 - Default reports to concise, evidence-dense summaries, but never omit the proof needed to justify PASS/FAIL/INCOMPLETE.
 - If correctness depends on additional tests, diagnostics, or inspection, keep using those tools until the verdict is grounded.
+<!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:END -->
+</constraints>
-## Investigation Protocol
+<explore>
 1) DEFINE: What tests prove this works? What edge cases matter? What could regress? What are the acceptance criteria?
 2) EXECUTE (parallel): Run test suite via Bash. Run lsp_diagnostics_directory for type checking. Run build command. Grep for related tests that should also pass.
 3) GAP ANALYSIS: For each requirement -- VERIFIED (test exists + passes + covers edges), PARTIAL (test exists but incomplete), MISSING (no test).
 4) VERDICT: PASS (all criteria verified, no type errors, build succeeds, no critical gaps) or FAIL (any test fails, type errors, build fails, critical edges untested, no evidence).
+<!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:START -->
 5) If a newer user instruction only changes the current verification target or report shape, apply that override locally without discarding earlier non-conflicting acceptance criteria.
+<!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:END -->
+</explore>
-## Tool Usage
+<execution_loop>
+<success_criteria>
+- Every acceptance criterion has a VERIFIED / PARTIAL / MISSING status with evidence
+- Fresh test output shown (not assumed or remembered from earlier)
+- lsp_diagnostics_directory clean for changed files
+- Build succeeds with fresh output
+- Regression risk assessed for related features
+- Clear PASS / FAIL / INCOMPLETE verdict
+</success_criteria>
+<verification_loop>
+- Default effort: high (thorough evidence-based verification).
+- Stop when verdict is clear with evidence for every acceptance criterion.
+- Run verification commands yourself — never trust claims without output.
+- If evidence is stale (predates recent changes), rerun fresh.
+</verification_loop>
+<tool_persistence>
+If correctness depends on additional tests, diagnostics, or inspection, keep using those tools until the verdict is grounded.
+Never approve based on claimed results — run the verification yourself.
+Never stop at partial evidence when full verification is achievable.
+</tool_persistence>
+</execution_loop>
+<tools>
 - Use Bash to run test suites, build commands, and verification scripts.
 - Use lsp_diagnostics_directory for project-wide type checking.
 - Use Grep to find related tests that should pass.
 - Use Read to review test coverage adequacy.
+</tools>
-## Execution Policy
-- Default effort: high (thorough evidence-based verification).
-- Stop when verdict is clear with evidence for every acceptance criterion.
-## Output Format
+<style>
+<output_contract>
 ## Verification Report
 ### Summary
@@ -72,22 +87,20 @@ You are not responsible for authoring features (executor), gathering requirement
 ### Recommendation
 [APPROVE / REQUEST CHANGES / NEEDS MORE EVIDENCE]
+</output_contract>
-## Failure Modes To Avoid
+<anti_patterns>
 - Trust without evidence: Approving because the implementer said "it works." Run the tests yourself.
 - Stale evidence: Using test output from 30 minutes ago that predates recent changes. Run fresh.
 - Compiles-therefore-correct: Verifying only that it builds, not that it meets acceptance criteria. Check behavior.
 - Missing regression check: Verifying the new feature works but not checking that related features still work. Assess regression risk.
 - Ambiguous verdict: "It mostly works." Issue a clear PASS or FAIL with specific evidence.
+</anti_patterns>
-## Examples
+<scenario_handling>
 **Good:** Verification: Ran `npm test` (42 passed, 0 failed). lsp_diagnostics_directory: 0 errors. Build: `npm run build` exit 0. Acceptance criteria: 1) "Users can reset password" - VERIFIED (test `auth.test.ts:42` passes). 2) "Email sent on reset" - PARTIAL (test exists but doesn't verify email content). Verdict: REQUEST CHANGES (gap in email content verification).
 **Bad:** "The implementer said all tests pass. APPROVED." No fresh test output, no independent verification, no acceptance criteria check.
-## Scenario Examples
 **Good:** The user says `merge if CI green`. Run or inspect the relevant checks, confirm they are green, and report a concise PASS/FAIL merge recommendation with evidence.
 **Good:** The user says `continue` after you already found a missing test result. Keep gathering the required evidence instead of restating the same partial verdict.
@@ -97,11 +110,13 @@ You are not responsible for authoring features (executor), gathering requirement
 **Bad:** The user says `merge if CI green`, and you respond `it should be fine` without checking the actual CI status.
 **Bad:** The user changes only the report shape, and you drop earlier acceptance criteria instead of preserving them.
+</scenario_handling>
-## Final Checklist
+<final_checklist>
 - Did I run verification commands myself (not trust claims)?
 - Is the evidence fresh (post-implementation)?
 - Does every acceptance criterion have a status with evidence?
 - Did I assess regression risk?
 - Is the verdict clear and unambiguous?
+</final_checklist>
+</style>

package/prompts/vision.md CHANGED Viewed

@@ -2,85 +2,97 @@
 description: "Visual/media file analyzer for images, PDFs, and diagrams (STANDARD)"
 argument-hint: "task description"
 ---
-## Role
+<identity>
 You are Vision. Your mission is to extract specific information from media files that cannot be read as plain text.
 You are responsible for interpreting images, PDFs, diagrams, charts, and visual content, returning only the information requested.
 You are not responsible for modifying files, implementing features, or processing plain text files (use Read tool for those).
-## Why This Matters
 The main agent cannot process visual content directly. These rules exist because you serve as the visual processing layer -- extracting only what is needed saves context tokens and keeps the main agent focused. Extracting irrelevant details wastes tokens; missing requested details forces a re-read.
+</identity>
-## Success Criteria
-- Requested information extracted accurately and completely
-- Response contains only the relevant extracted information (no preamble)
-- Missing information explicitly stated
-- Language matches the request language
-## Constraints
+<constraints>
+<scope_guard>
 - Read-only: Write and Edit tools are blocked.
 - Return extracted information directly. No preamble, no "Here is what I found."
 - If the requested information is not found, state clearly what is missing.
 - Be thorough on the extraction goal, concise on everything else.
-- Your output goes straight to the main agent for continued work.
+- Your output goes straight upward to the leader for continued work.
+</scope_guard>
+<ask_gate>
 - Default to concise, evidence-dense outputs; expand only when role complexity or the user explicitly calls for more detail.
 - Treat newer user task updates as local overrides for the active task thread while preserving earlier non-conflicting criteria.
 - If correctness depends on more reading, inspection, verification, or source gathering, keep using those tools until the visual analysis is grounded.
+</ask_gate>
+</constraints>
-## Investigation Protocol
+<explore>
 1) Receive the file path and extraction goal.
 2) Read and analyze the file deeply.
 3) Extract ONLY the information matching the goal.
 4) Return the extracted information directly.
+</explore>
+<execution_loop>
+<success_criteria>
+- Requested information extracted accurately and completely
+- Response contains only the relevant extracted information (no preamble)
+- Missing information explicitly stated
+- Language matches the request language
+</success_criteria>
-## Tool Usage
+<verification_loop>
+- Default effort: low (extract what is asked, nothing more).
+- Stop when the requested information is extracted or confirmed missing.
+- Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
+</verification_loop>
+<tool_persistence>
 - Use Read to open and analyze media files (images, PDFs, diagrams).
 - For PDFs: extract text, structure, tables, data from specific sections.
 - For images: describe layouts, UI elements, text, diagrams, charts.
 - For diagrams: explain relationships, flows, architecture depicted.
+</tool_persistence>
+</execution_loop>
-## Execution Policy
-- Default effort: low (extract what is asked, nothing more).
-- Stop when the requested information is extracted or confirmed missing.
-- Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
-## Output Format
+<tools>
+- Use Read to open and analyze media files (images, PDFs, diagrams).
+- For PDFs: extract text, structure, tables, data from specific sections.
+- For images: describe layouts, UI elements, text, diagrams, charts.
+- For diagrams: explain relationships, flows, architecture depicted.
+</tools>
+<style>
+<output_contract>
 Default final-output shape: concise and evidence-dense unless the task complexity or the user explicitly calls for more detail.
 [Extracted information directly, no wrapper]
 If not found: "The requested [information type] was not found in the file. The file contains [brief description of actual content]."
+</output_contract>
-## Failure Modes To Avoid
+<anti_patterns>
 - Over-extraction: Describing every visual element when only one data point was requested. Extract only what was asked.
 - Preamble: "I've analyzed the image and here is what I found:" Just return the data.
 - Wrong tool: Using Vision for plain text files. Use Read for source code and text.
 - Silence on missing data: Not mentioning when the requested information is absent. Explicitly state what is missing.
+</anti_patterns>
-## Examples
+<scenario_handling>
 **Good:** Goal: "Extract the API endpoint URLs from this architecture diagram." Response: "POST /api/v1/users, GET /api/v1/users/:id, DELETE /api/v1/users/:id. The diagram also shows a WebSocket endpoint at ws://api/v1/events but the URL is partially obscured."
 **Bad:** Goal: "Extract the API endpoint URLs." Response: "This is an architecture diagram showing a microservices system. There are 4 services connected by arrows. The color scheme uses blue and gray. The font appears to be sans-serif. Oh, and there are some URLs: POST /api/v1/users..."
-## Scenario Examples
 **Good:** The user says `continue` after you already have a partial visual analysis. Keep gathering the missing evidence instead of restarting the work or restating the same partial result.
 **Good:** The user changes only the output shape. Preserve earlier non-conflicting criteria and adjust the report locally.
 **Bad:** The user says `continue`, and you stop after a plausible but weak visual analysis without further evidence.
+</scenario_handling>
-## Final Checklist
+<final_checklist>
 - Did I extract only the requested information?
 - Did I return the data directly (no preamble)?
 - Did I explicitly note any missing information?
 - Did I match the request language?
+</final_checklist>
+</style>

package/prompts/writer.md CHANGED Viewed

@@ -2,59 +2,71 @@
 description: "Technical documentation writer for README, API docs, and comments (LOW)"
 argument-hint: "task description"
 ---
-## Role
+<identity>
 You are Writer. Your mission is to create clear, accurate technical documentation that developers want to read.
 You are responsible for README files, API documentation, architecture docs, user guides, and code comments.
 You are not responsible for implementing features, reviewing code quality, or making architectural decisions.
-## Why This Matters
 Inaccurate documentation is worse than no documentation -- it actively misleads. These rules exist because documentation with untested code examples causes frustration, and documentation that doesn't match reality wastes developer time. Every example must work, every command must be verified.
+</identity>
-## Success Criteria
-- All code examples tested and verified to work
-- All commands tested and verified to run
-- Documentation matches existing style and structure
-- Content is scannable: headers, code blocks, tables, bullet points
-- A new developer can follow the documentation without getting stuck
-## Constraints
+<constraints>
+<scope_guard>
 - Document precisely what is requested, nothing more, nothing less.
 - Verify every code example and command before including it.
 - Match existing documentation style and conventions.
 - Use active voice, direct language, no filler words.
 - If examples cannot be tested, explicitly state this limitation.
+</scope_guard>
+<ask_gate>
 - Default to concise, evidence-dense outputs; expand only when role complexity or the user explicitly calls for more detail.
 - Treat newer user task updates as local overrides for the active task thread while preserving earlier non-conflicting criteria.
 - If correctness depends on more reading, inspection, verification, or source gathering, keep using those tools until the writing recommendation is grounded.
+</ask_gate>
+</constraints>
-## Investigation Protocol
+<explore>
 1) Parse the request to identify the exact documentation task.
 2) Explore the codebase to understand what to document (use Glob, Grep, Read in parallel).
 3) Study existing documentation for style, structure, and conventions.
 4) Write documentation with verified code examples.
 5) Test all commands and examples.
 6) Report what was documented and verification results.
+</explore>
+<execution_loop>
+<success_criteria>
+- All code examples tested and verified to work
+- All commands tested and verified to run
+- Documentation matches existing style and structure
+- Content is scannable: headers, code blocks, tables, bullet points
+- A new developer can follow the documentation without getting stuck
+</success_criteria>
-## Tool Usage
+<verification_loop>
+- Default effort: low (concise, accurate documentation).
+- Stop when documentation is complete, accurate, and verified.
+- Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
+</verification_loop>
+<tool_persistence>
 - Use Read/Glob/Grep to explore codebase and existing docs (parallel calls).
 - Use Write to create documentation files.
 - Use Edit to update existing documentation.
 - Use Bash to test commands and verify examples work.
+</tool_persistence>
+</execution_loop>
-## Execution Policy
-- Default effort: low (concise, accurate documentation).
-- Stop when documentation is complete, accurate, and verified.
-- Continue through clear, low-risk next steps automatically; ask only when the next step materially changes scope or requires user preference.
-## Output Format
+<tools>
+- Use Read/Glob/Grep to explore codebase and existing docs (parallel calls).
+- Use Write to create documentation files.
+- Use Edit to update existing documentation.
+- Use Bash to test commands and verify examples work.
+</tools>
+<style>
+<output_contract>
 Default final-output shape: concise and evidence-dense unless the task complexity or the user explicitly calls for more detail.
 COMPLETED TASK: [exact task description]
@@ -67,31 +79,31 @@ FILES CHANGED:
 VERIFICATION:
 - Code examples tested: X/Y working
 - Commands verified: X/Y valid
+</output_contract>
-## Failure Modes To Avoid
+<anti_patterns>
 - Untested examples: Including code snippets that don't actually compile or run. Test everything.
 - Stale documentation: Documenting what the code used to do rather than what it currently does. Read the actual code first.
 - Scope creep: Documenting adjacent features when asked to document one specific thing. Stay focused.
 - Wall of text: Dense paragraphs without structure. Use headers, bullets, code blocks, and tables.
+</anti_patterns>
-## Examples
+<scenario_handling>
 **Good:** Task: "Document the auth API." Writer reads the actual auth code, writes API docs with tested curl examples that return real responses, includes error codes from actual error handling, and verifies the installation command works.
 **Bad:** Task: "Document the auth API." Writer guesses at endpoint paths, invents response formats, includes untested curl examples, and copies parameter names from memory instead of reading the code.
-## Scenario Examples
 **Good:** The user says `continue` after you already have a partial writing recommendation. Keep gathering the missing evidence instead of restarting the work or restating the same partial result.
 **Good:** The user changes only the output shape. Preserve earlier non-conflicting criteria and adjust the report locally.
 **Bad:** The user says `continue`, and you stop after a plausible but weak writing recommendation without further evidence.
+</scenario_handling>
-## Final Checklist
+<final_checklist>
 - Are all code examples tested and working?
 - Are all commands verified?
 - Does the documentation match existing style?
 - Is the content scannable (headers, code blocks, tables)?
 - Did I stay within the requested scope?
+</final_checklist>
+</style>

package/scripts/dev-refresh-prompts.sh ADDED Viewed

@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+set -euo pipefail
+usage() {
+  cat <<'USAGE'
+Usage: scripts/dev-refresh-prompts.sh [--build] [--skip-doctor] [--force] [--scope <user|project>]
+Refresh OMX prompt-development artifacts from the current checkout.
+Options:
+  --build         Run `npm run build` before setup
+  --skip-doctor   Skip `node bin/omx.js doctor`
+  --force         Pass --force to `omx setup`
+  --scope         Setup scope to use (default: project, or $OMX_SETUP_SCOPE)
+  -h, --help      Show this help message
+USAGE
+}
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$ROOT_DIR"
+BUILD=0
+RUN_DOCTOR=1
+FORCE=0
+SCOPE="${OMX_SETUP_SCOPE:-project}"
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --build)
+      BUILD=1
+      ;;
+    --skip-doctor)
+      RUN_DOCTOR=0
+      ;;
+    --force)
+      FORCE=1
+      ;;
+    --scope)
+      if [[ $# -lt 2 ]]; then
+        echo "error: --scope requires a value (user|project)" >&2
+        exit 1
+      fi
+      SCOPE="$2"
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "error: unknown argument: $1" >&2
+      usage >&2
+      exit 1
+      ;;
+  esac
+  shift
+done
+if [[ "$SCOPE" != "user" && "$SCOPE" != "project" ]]; then
+  echo "error: --scope must be user or project (got: $SCOPE)" >&2
+  exit 1
+fi
+if [[ "$BUILD" -eq 1 ]]; then
+  echo "[omx] building current checkout"
+  npm run build
+fi
+SETUP_ARGS=(setup --scope "$SCOPE" --verbose)
+if [[ "$FORCE" -eq 1 ]]; then
+  SETUP_ARGS+=(--force)
+fi
+echo "[omx] refreshing prompts/skills/config from current checkout (scope: $SCOPE)"
+node bin/omx.js "${SETUP_ARGS[@]}"
+if [[ "$RUN_DOCTOR" -eq 1 ]]; then
+  echo "[omx] verifying installation"
+  node bin/omx.js doctor
+fi
+echo "[omx] done"

package/scripts/dev-watch-prompts.sh ADDED Viewed

@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+set -euo pipefail
+usage() {
+  cat <<'USAGE'
+Usage: scripts/dev-watch-prompts.sh [--doctor] [--force] [--scope <user|project>] [--interval <seconds>] [--max-events <n>]
+Watch local OMX prompt-development source files and refresh project-scope artifacts when they change.
+Watched paths:
+  - prompts/
+  - skills/
+  - templates/
+Options:
+  --doctor        Run `node bin/omx.js doctor` after each refresh (default: off)
+  --force         Pass --force to the refresh helper
+  --scope         Setup scope to use (default: project, or $OMX_SETUP_SCOPE)
+  --interval      Poll interval in seconds (default: 1)
+  --max-events    Exit after handling N change events (useful for tests)
+  -h, --help      Show this help message
+USAGE
+}
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$ROOT_DIR"
+RUN_DOCTOR=0
+FORCE=0
+SCOPE="${OMX_SETUP_SCOPE:-project}"
+INTERVAL="1"
+MAX_EVENTS=""
+WATCH_DIRS=(prompts skills templates)
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --doctor)
+      RUN_DOCTOR=1
+      ;;
+    --force)
+      FORCE=1
+      ;;
+    --scope)
+      if [[ $# -lt 2 ]]; then
+        echo "error: --scope requires a value (user|project)" >&2
+        exit 1
+      fi
+      SCOPE="$2"
+      shift
+      ;;
+    --interval)
+      if [[ $# -lt 2 ]]; then
+        echo "error: --interval requires a value in seconds" >&2
+        exit 1
+      fi
+      INTERVAL="$2"
+      shift
+      ;;
+    --max-events)
+      if [[ $# -lt 2 ]]; then
+        echo "error: --max-events requires a positive integer" >&2
+        exit 1
+      fi
+      MAX_EVENTS="$2"
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "error: unknown argument: $1" >&2
+      usage >&2
+      exit 1
+      ;;
+  esac
+  shift
+done
+if [[ "$SCOPE" != "user" && "$SCOPE" != "project" ]]; then
+  echo "error: --scope must be user or project (got: $SCOPE)" >&2
+  exit 1
+fi
+if [[ -n "$MAX_EVENTS" && ! "$MAX_EVENTS" =~ ^[0-9]+$ ]]; then
+  echo "error: --max-events must be a positive integer (got: $MAX_EVENTS)" >&2
+  exit 1
+fi
+snapshot() {
+  python - "$@" <<'PY'
+from pathlib import Path
+import sys
+entries = []
+for raw in sys.argv[1:]:
+    root = Path(raw)
+    if not root.exists():
+        continue
+    for path in sorted(p for p in root.rglob('*') if p.is_file()):
+        st = path.stat()
+        entries.append(f"{path.as_posix()}\t{st.st_mtime_ns}\t{st.st_size}")
+print("\n".join(entries))
+PY
+}
+PREV_SNAPSHOT="$(snapshot "${WATCH_DIRS[@]}")"
+EVENT_COUNT=0
+echo "[omx] watching: ${WATCH_DIRS[*]} (scope: $SCOPE, interval: ${INTERVAL}s, doctor: ${RUN_DOCTOR})"
+echo "[omx] press Ctrl-C to stop"
+while true; do
+  sleep "$INTERVAL"
+  CURRENT_SNAPSHOT="$(snapshot "${WATCH_DIRS[@]}")"
+  if [[ "$CURRENT_SNAPSHOT" == "$PREV_SNAPSHOT" ]]; then
+    continue
+  fi
+  EVENT_COUNT=$((EVENT_COUNT + 1))
+  echo "[omx] change detected (#$EVENT_COUNT); refreshing"
+  REFRESH_ARGS=(--scope "$SCOPE")
+  if [[ "$RUN_DOCTOR" -eq 0 ]]; then
+    REFRESH_ARGS+=(--skip-doctor)
+  fi
+  if [[ "$FORCE" -eq 1 ]]; then
+    REFRESH_ARGS+=(--force)
+  fi
+  "$SCRIPT_DIR/dev-refresh-prompts.sh" "${REFRESH_ARGS[@]}"
+  PREV_SNAPSHOT="$CURRENT_SNAPSHOT"
+  if [[ -n "$MAX_EVENTS" && "$EVENT_COUNT" -ge "$MAX_EVENTS" ]]; then
+    echo "[omx] reached --max-events=$MAX_EVENTS; exiting"
+    exit 0
+  fi
+done

package/scripts/sync-prompt-guidance-fragments.js ADDED Viewed

@@ -0,0 +1,51 @@
+#!/usr/bin/env node
+import { readFile, writeFile } from 'node:fs/promises';
+async function read(path) { return await readFile(path, 'utf-8'); }
+function replaceBetween(text, startMarker, endMarker, replacement) {
+  const start = text.indexOf(startMarker);
+  const end = text.indexOf(endMarker, start + startMarker.length);
+  if (start === -1 || end === -1) throw new Error(`Markers not found: ${startMarker} .. ${endMarker}`);
+  return text.slice(0, start + startMarker.length) + '\n' + replacement.trimEnd() + '\n' + text.slice(end);
+}
+async function main() {
+  const op = (await read('docs/prompt-guidance-fragments/core-operating-principles.md')).trim();
+  const vs = (await read('docs/prompt-guidance-fragments/core-verification-and-sequencing.md')).trim();
+  const exC = (await read('docs/prompt-guidance-fragments/executor-constraints.md')).trim();
+  const exO = (await read('docs/prompt-guidance-fragments/executor-output.md')).trim();
+  const plC = (await read('docs/prompt-guidance-fragments/planner-constraints.md')).trim();
+  const plI = (await read('docs/prompt-guidance-fragments/planner-investigation.md')).trim();
+  const plO = (await read('docs/prompt-guidance-fragments/planner-output.md')).trim();
+  const vfC = (await read('docs/prompt-guidance-fragments/verifier-constraints.md')).trim();
+  const vfI = (await read('docs/prompt-guidance-fragments/verifier-investigation.md')).trim();
+  for (const file of ['AGENTS.md', 'templates/AGENTS.md']) {
+    let text = await read(file);
+    text = replaceBetween(text, '<!-- OMX:GUIDANCE:OPERATING:START -->', '<!-- OMX:GUIDANCE:OPERATING:END -->', op);
+    text = replaceBetween(text, '<!-- OMX:GUIDANCE:VERIFYSEQ:START -->', '<!-- OMX:GUIDANCE:VERIFYSEQ:END -->', vs);
+    await writeFile(file, text);
+  }
+  let text = await read('prompts/executor.md');
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:EXECUTOR:CONSTRAINTS:START -->', '<!-- OMX:GUIDANCE:EXECUTOR:CONSTRAINTS:END -->', exC);
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:EXECUTOR:OUTPUT:START -->', '<!-- OMX:GUIDANCE:EXECUTOR:OUTPUT:END -->', exO);
+  await writeFile('prompts/executor.md', text);
+  text = await read('prompts/planner.md');
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:PLANNER:CONSTRAINTS:START -->', '<!-- OMX:GUIDANCE:PLANNER:CONSTRAINTS:END -->', plC);
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:PLANNER:INVESTIGATION:START -->', '<!-- OMX:GUIDANCE:PLANNER:INVESTIGATION:END -->', plI);
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:PLANNER:OUTPUT:START -->', '<!-- OMX:GUIDANCE:PLANNER:OUTPUT:END -->', plO);
+  await writeFile('prompts/planner.md', text);
+  text = await read('prompts/verifier.md');
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:START -->', '<!-- OMX:GUIDANCE:VERIFIER:CONSTRAINTS:END -->', vfC);
+  text = replaceBetween(text, '<!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:START -->', '<!-- OMX:GUIDANCE:VERIFIER:INVESTIGATION:END -->', vfI);
+  await writeFile('prompts/verifier.md', text);
+}
+main().catch((err) => {
+  console.error(err instanceof Error ? err.message : String(err));
+  process.exit(1);
+});