@event4u/agent-config 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/.agent-src/commands/agent-handoff.md +3 -0
  2. package/.agent-src/commands/agent-status.md +3 -0
  3. package/.agent-src/commands/agents-audit.md +4 -0
  4. package/.agent-src/commands/agents-cleanup.md +6 -1
  5. package/.agent-src/commands/agents-prepare.md +3 -0
  6. package/.agent-src/commands/analyze-reference-repo.md +4 -0
  7. package/.agent-src/commands/bug-fix.md +5 -1
  8. package/.agent-src/commands/bug-investigate.md +4 -0
  9. package/.agent-src/commands/chat-history-checkpoint.md +126 -0
  10. package/.agent-src/commands/chat-history-clear.md +5 -0
  11. package/.agent-src/commands/chat-history-resume.md +5 -0
  12. package/.agent-src/commands/chat-history.md +5 -0
  13. package/.agent-src/commands/check-current-md.md +126 -0
  14. package/.agent-src/commands/commit-in-chunks.md +98 -0
  15. package/.agent-src/commands/commit.md +4 -0
  16. package/.agent-src/commands/compress.md +3 -0
  17. package/.agent-src/commands/context-create.md +4 -0
  18. package/.agent-src/commands/context-refactor.md +4 -0
  19. package/.agent-src/commands/copilot-agents-init.md +3 -0
  20. package/.agent-src/commands/copilot-agents-optimize.md +3 -0
  21. package/.agent-src/commands/create-pr-description.md +4 -0
  22. package/.agent-src/commands/create-pr.md +4 -0
  23. package/.agent-src/commands/do-and-judge.md +4 -1
  24. package/.agent-src/commands/do-in-steps.md +3 -0
  25. package/.agent-src/commands/e2e-heal.md +4 -0
  26. package/.agent-src/commands/e2e-plan.md +4 -0
  27. package/.agent-src/commands/estimate-ticket.md +4 -1
  28. package/.agent-src/commands/feature-dev.md +4 -0
  29. package/.agent-src/commands/feature-explore.md +4 -0
  30. package/.agent-src/commands/feature-plan.md +4 -0
  31. package/.agent-src/commands/feature-refactor.md +4 -0
  32. package/.agent-src/commands/feature-roadmap.md +6 -0
  33. package/.agent-src/commands/fix-ci.md +4 -0
  34. package/.agent-src/commands/fix-portability.md +3 -0
  35. package/.agent-src/commands/fix-pr-bot-comments.md +4 -0
  36. package/.agent-src/commands/fix-pr-comments.md +4 -0
  37. package/.agent-src/commands/fix-pr-developer-comments.md +4 -0
  38. package/.agent-src/commands/fix-references.md +3 -0
  39. package/.agent-src/commands/fix-seeder.md +4 -0
  40. package/.agent-src/commands/implement-ticket.md +39 -13
  41. package/.agent-src/commands/jira-ticket.md +4 -0
  42. package/.agent-src/commands/judge.md +3 -0
  43. package/.agent-src/commands/memory-add.md +5 -3
  44. package/.agent-src/commands/memory-full.md +5 -2
  45. package/.agent-src/commands/memory-promote.md +7 -6
  46. package/.agent-src/commands/mode.md +3 -0
  47. package/.agent-src/commands/module-create.md +4 -0
  48. package/.agent-src/commands/module-explore.md +4 -0
  49. package/.agent-src/commands/onboard.md +24 -0
  50. package/.agent-src/commands/optimize-agents.md +4 -0
  51. package/.agent-src/commands/optimize-augmentignore.md +3 -0
  52. package/.agent-src/commands/optimize-rtk-filters.md +3 -0
  53. package/.agent-src/commands/optimize-skills.md +4 -0
  54. package/.agent-src/commands/override-create.md +4 -0
  55. package/.agent-src/commands/override-manage.md +4 -0
  56. package/.agent-src/commands/package-reset.md +3 -0
  57. package/.agent-src/commands/package-test.md +3 -0
  58. package/.agent-src/commands/prepare-for-review.md +4 -0
  59. package/.agent-src/commands/project-analyze.md +4 -0
  60. package/.agent-src/commands/project-health.md +4 -0
  61. package/.agent-src/commands/propose-memory.md +6 -8
  62. package/.agent-src/commands/quality-fix.md +4 -0
  63. package/.agent-src/commands/refine-ticket.md +4 -1
  64. package/.agent-src/commands/review-changes.md +4 -0
  65. package/.agent-src/commands/review-routing.md +4 -0
  66. package/.agent-src/commands/roadmap-create.md +7 -0
  67. package/.agent-src/commands/roadmap-execute.md +12 -1
  68. package/.agent-src/commands/rule-compliance-audit.md +4 -0
  69. package/.agent-src/commands/set-cost-profile.md +3 -0
  70. package/.agent-src/commands/sync-agent-settings.md +3 -0
  71. package/.agent-src/commands/sync-gitignore.md +3 -0
  72. package/.agent-src/commands/tests-create.md +4 -0
  73. package/.agent-src/commands/tests-execute.md +4 -0
  74. package/.agent-src/commands/threat-model.md +4 -0
  75. package/.agent-src/commands/update-form-request-messages.md +4 -0
  76. package/.agent-src/commands/upstream-contribute.md +4 -0
  77. package/.agent-src/commands/work.md +161 -0
  78. package/.agent-src/guidelines/agent-infra/engineering-memory-data-format.md +2 -6
  79. package/.agent-src/guidelines/agent-infra/layered-settings.md +0 -1
  80. package/.agent-src/guidelines/agent-infra/memory-access.md +0 -7
  81. package/.agent-src/guidelines/agent-infra/role-contracts.md +2 -4
  82. package/.agent-src/guidelines/agent-infra/self-improvement-pipeline.md +0 -1
  83. package/.agent-src/guidelines/php/patterns/strategy.md +180 -2
  84. package/.agent-src/personas/README.md +0 -1
  85. package/.agent-src/rules/artifact-drafting-protocol.md +7 -2
  86. package/.agent-src/rules/artifact-engagement-recording.md +133 -0
  87. package/.agent-src/rules/ask-when-uncertain.md +18 -13
  88. package/.agent-src/rules/augment-portability.md +8 -0
  89. package/.agent-src/rules/autonomous-execution.md +158 -0
  90. package/.agent-src/rules/chat-history.md +147 -118
  91. package/.agent-src/rules/cli-output-handling.md +26 -3
  92. package/.agent-src/rules/command-suggestion.md +133 -0
  93. package/.agent-src/rules/commit-policy.md +99 -0
  94. package/.agent-src/rules/direct-answers.md +114 -0
  95. package/.agent-src/rules/docs-sync.md +36 -0
  96. package/.agent-src/rules/downstream-changes.md +10 -9
  97. package/.agent-src/rules/improve-before-implement.md +9 -6
  98. package/.agent-src/rules/language-and-tone.md +81 -6
  99. package/.agent-src/rules/non-destructive-by-default.md +117 -0
  100. package/.agent-src/rules/package-ci-checks.md +4 -0
  101. package/.agent-src/rules/preservation-guard.md +20 -0
  102. package/.agent-src/rules/roadmap-progress-sync.md +103 -30
  103. package/.agent-src/rules/scope-control.md +42 -1
  104. package/.agent-src/rules/size-enforcement.md +1 -3
  105. package/.agent-src/rules/skill-quality.md +3 -8
  106. package/.agent-src/rules/ui-audit-before-build.md +106 -0
  107. package/.agent-src/rules/user-interaction.md +81 -3
  108. package/.agent-src/scripts/update_roadmap_progress.py +48 -6
  109. package/.agent-src/skills/blade-ui/SKILL.md +30 -5
  110. package/.agent-src/skills/command-routing/SKILL.md +32 -0
  111. package/.agent-src/skills/command-writing/SKILL.md +41 -2
  112. package/.agent-src/skills/description-assist/SKILL.md +21 -0
  113. package/.agent-src/skills/estimate-ticket/SKILL.md +0 -1
  114. package/.agent-src/skills/existing-ui-audit/SKILL.md +187 -0
  115. package/.agent-src/skills/fe-design/SKILL.md +72 -60
  116. package/.agent-src/skills/finishing-a-development-branch/SKILL.md +4 -0
  117. package/.agent-src/skills/flux/SKILL.md +31 -4
  118. package/.agent-src/skills/guideline-writing/SKILL.md +24 -2
  119. package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +51 -9
  120. package/.agent-src/skills/livewire/SKILL.md +30 -4
  121. package/.agent-src/skills/md-language-check/SKILL.md +103 -0
  122. package/.agent-src/skills/php-coder/SKILL.md +24 -0
  123. package/.agent-src/skills/react-shadcn-ui/SKILL.md +121 -0
  124. package/.agent-src/skills/refine-prompt/SKILL.md +220 -0
  125. package/.agent-src/skills/refine-ticket/SKILL.md +2 -4
  126. package/.agent-src/skills/roadmap-management/SKILL.md +10 -3
  127. package/.agent-src/skills/rule-writing/SKILL.md +23 -1
  128. package/.agent-src/skills/skill-writing/SKILL.md +1 -3
  129. package/.agent-src/skills/upstream-contribute/SKILL.md +1 -1
  130. package/.agent-src/skills/using-git-worktrees/SKILL.md +3 -1
  131. package/.agent-src/templates/AGENTS.md +24 -6
  132. package/.agent-src/templates/agent-settings.md +149 -0
  133. package/.agent-src/templates/github-workflows/roadmap-progress-check.yml +63 -0
  134. package/.agent-src/templates/hooks/pre-commit-roadmap-progress +60 -0
  135. package/.agent-src/templates/roadmaps.md +8 -2
  136. package/.agent-src/templates/scripts/implement_ticket/__init__.py +63 -26
  137. package/.agent-src/templates/scripts/implement_ticket/__main__.py +8 -2
  138. package/.agent-src/templates/scripts/memory_lookup.py +382 -21
  139. package/.agent-src/templates/scripts/memory_status.py +110 -9
  140. package/.agent-src/templates/scripts/telemetry/__init__.py +42 -0
  141. package/.agent-src/templates/scripts/telemetry/aggregator.py +154 -0
  142. package/.agent-src/templates/scripts/telemetry/boundary.py +171 -0
  143. package/.agent-src/templates/scripts/telemetry/engagement.py +238 -0
  144. package/.agent-src/templates/scripts/telemetry/report_renderer.py +170 -0
  145. package/.agent-src/templates/scripts/telemetry/settings.py +112 -0
  146. package/.agent-src/templates/scripts/telemetry_record.py +166 -0
  147. package/.agent-src/templates/scripts/telemetry_report.py +161 -0
  148. package/.agent-src/templates/scripts/telemetry_status.py +142 -0
  149. package/.agent-src/templates/scripts/work_engine/__init__.py +58 -0
  150. package/.agent-src/templates/scripts/work_engine/__main__.py +9 -0
  151. package/.agent-src/templates/scripts/work_engine/cli.py +592 -0
  152. package/.agent-src/templates/scripts/{implement_ticket → work_engine}/delivery_state.py +7 -0
  153. package/.agent-src/templates/scripts/work_engine/directives/__init__.py +33 -0
  154. package/.agent-src/templates/scripts/work_engine/directives/backend/__init__.py +98 -0
  155. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/analyze.py +1 -1
  156. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/implement.py +2 -2
  157. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/memory.py +1 -1
  158. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/plan.py +1 -1
  159. package/.agent-src/templates/scripts/work_engine/directives/backend/refine.py +396 -0
  160. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/report.py +36 -4
  161. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/test.py +2 -2
  162. package/.agent-src/templates/scripts/{implement_ticket/steps → work_engine/directives/backend}/verify.py +2 -2
  163. package/.agent-src/templates/scripts/work_engine/directives/mixed/__init__.py +116 -0
  164. package/.agent-src/templates/scripts/work_engine/directives/mixed/contract.py +254 -0
  165. package/.agent-src/templates/scripts/work_engine/directives/mixed/stitch.py +229 -0
  166. package/.agent-src/templates/scripts/work_engine/directives/mixed/ui.py +231 -0
  167. package/.agent-src/templates/scripts/work_engine/directives/ui/__init__.py +113 -0
  168. package/.agent-src/templates/scripts/work_engine/directives/ui/_passthrough.py +44 -0
  169. package/.agent-src/templates/scripts/work_engine/directives/ui/apply.py +241 -0
  170. package/.agent-src/templates/scripts/work_engine/directives/ui/audit.py +414 -0
  171. package/.agent-src/templates/scripts/work_engine/directives/ui/design.py +335 -0
  172. package/.agent-src/templates/scripts/work_engine/directives/ui/polish.py +510 -0
  173. package/.agent-src/templates/scripts/work_engine/directives/ui/review.py +468 -0
  174. package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/__init__.py +119 -0
  175. package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/_skipped.py +37 -0
  176. package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/apply.py +165 -0
  177. package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/refine.py +66 -0
  178. package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/report.py +62 -0
  179. package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/test.py +115 -0
  180. package/.agent-src/templates/scripts/work_engine/dispatcher.py +331 -0
  181. package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +54 -0
  182. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +32 -0
  183. package/.agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.py +103 -0
  184. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.py +44 -0
  185. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.py +42 -0
  186. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_heartbeat.py +50 -0
  187. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_turn_check.py +49 -0
  188. package/.agent-src/templates/scripts/work_engine/hooks/builtin/directive_set_guard.py +53 -0
  189. package/.agent-src/templates/scripts/work_engine/hooks/builtin/halt_surface_audit.py +50 -0
  190. package/.agent-src/templates/scripts/work_engine/hooks/builtin/state_shape_validation.py +52 -0
  191. package/.agent-src/templates/scripts/work_engine/hooks/builtin/trace.py +84 -0
  192. package/.agent-src/templates/scripts/work_engine/hooks/context.py +66 -0
  193. package/.agent-src/templates/scripts/work_engine/hooks/events.py +44 -0
  194. package/.agent-src/templates/scripts/work_engine/hooks/exceptions.py +79 -0
  195. package/.agent-src/templates/scripts/work_engine/hooks/registry.py +60 -0
  196. package/.agent-src/templates/scripts/work_engine/hooks/runner.py +73 -0
  197. package/.agent-src/templates/scripts/work_engine/hooks/settings.py +141 -0
  198. package/.agent-src/templates/scripts/work_engine/intent/__init__.py +47 -0
  199. package/.agent-src/templates/scripts/work_engine/intent/classify.py +280 -0
  200. package/.agent-src/templates/scripts/work_engine/migration/__init__.py +8 -0
  201. package/.agent-src/templates/scripts/work_engine/migration/v0_to_v1.py +199 -0
  202. package/.agent-src/templates/scripts/work_engine/resolvers/__init__.py +22 -0
  203. package/.agent-src/templates/scripts/work_engine/resolvers/diff.py +106 -0
  204. package/.agent-src/templates/scripts/work_engine/resolvers/file.py +113 -0
  205. package/.agent-src/templates/scripts/work_engine/resolvers/prompt.py +90 -0
  206. package/.agent-src/templates/scripts/work_engine/scoring/__init__.py +14 -0
  207. package/.agent-src/templates/scripts/work_engine/scoring/confidence.py +300 -0
  208. package/.agent-src/templates/scripts/work_engine/stack/__init__.py +31 -0
  209. package/.agent-src/templates/scripts/work_engine/stack/detect.py +187 -0
  210. package/.agent-src/templates/scripts/work_engine/state.py +641 -0
  211. package/.claude-plugin/marketplace.json +105 -2
  212. package/AGENTS.md +36 -8
  213. package/CHANGELOG.md +558 -0
  214. package/README.md +146 -4
  215. package/composer.json +3 -0
  216. package/config/agent-settings.template.yml +45 -0
  217. package/config/gitignore-block.txt +4 -0
  218. package/docs/architecture.md +28 -1
  219. package/docs/development.md +1 -1
  220. package/docs/getting-started.md +3 -2
  221. package/docs/installation.md +86 -0
  222. package/docs/showcase.md +204 -0
  223. package/package.json +9 -1
  224. package/scripts/agent-config +274 -0
  225. package/scripts/audit_cloud_compatibility.py +288 -0
  226. package/scripts/build_cloud_bundle.py +458 -0
  227. package/scripts/build_linear_digest.py +263 -0
  228. package/scripts/chat_history.py +796 -7
  229. package/scripts/check_compression.py +139 -0
  230. package/scripts/check_iron_law_prominence.py +143 -0
  231. package/scripts/check_md_language.py +159 -0
  232. package/scripts/check_portability.py +36 -0
  233. package/scripts/check_reply_consistency.py +140 -0
  234. package/scripts/command_suggester/__init__.py +51 -0
  235. package/scripts/command_suggester/cooldown.py +132 -0
  236. package/scripts/command_suggester/loader.py +70 -0
  237. package/scripts/command_suggester/match.py +180 -0
  238. package/scripts/command_suggester/rank.py +120 -0
  239. package/scripts/command_suggester/render.py +86 -0
  240. package/scripts/command_suggester/sanitize.py +113 -0
  241. package/scripts/command_suggester/settings.py +125 -0
  242. package/scripts/command_suggester/types.py +78 -0
  243. package/scripts/hooks/augment-chat-history.sh +56 -0
  244. package/scripts/install-hooks.sh +67 -0
  245. package/scripts/install.py +150 -33
  246. package/scripts/lint_marketplace.py +27 -0
  247. package/scripts/memory_lookup.py +143 -7
  248. package/scripts/memory_status.py +76 -14
  249. package/scripts/migrate_command_suggestions.py +151 -0
  250. package/scripts/postinstall.sh +16 -0
  251. package/scripts/schemas/command.schema.json +41 -0
  252. package/scripts/skill_linter.py +67 -0
  253. package/scripts/sync_agent_settings.py +42 -12
  254. package/templates/consumer-settings/augment-cli-hooks.json +54 -0
  255. package/templates/consumer-settings/claude-settings.json +55 -1
  256. package/.agent-src/templates/scripts/implement_ticket/cli.py +0 -171
  257. package/.agent-src/templates/scripts/implement_ticket/dispatcher.py +0 -134
  258. package/.agent-src/templates/scripts/implement_ticket/steps/__init__.py +0 -49
  259. package/.agent-src/templates/scripts/implement_ticket/steps/refine.py +0 -140
  260. /package/.agent-src/templates/scripts/{implement_ticket → work_engine}/persona_policy.py +0 -0
@@ -0,0 +1,98 @@
1
+ """Backend directive set — step handlers for the backend-coding flow.
2
+
3
+ Each module exposes a single ``run`` callable that matches the
4
+ ``Step`` protocol defined in ``...delivery_state``. The dispatcher
5
+ wires them into the ``STEP_ORDER`` mapping at call time; nothing in
6
+ this package imports handlers eagerly, so a partial wiring during
7
+ development is caught by the dispatcher's missing-step check rather
8
+ than by an import-time failure.
9
+
10
+ This is the first concrete entry in the
11
+ :mod:`work_engine.directives` package. R1 Phase 4 Step 3 moved the
12
+ handlers here from the original ``work_engine.steps`` location so
13
+ the upcoming generalized dispatcher (Step 2) can select between
14
+ multiple directive sets uniformly via :func:`get_steps`. The
15
+ external behavior — flow order, ambiguity surfaces, halt-points —
16
+ is unchanged.
17
+
18
+ The deterministic gates (``refine``, ``memory``, ``analyze``)
19
+ validate upstream state; the delegation gates (``plan``,
20
+ ``implement``, ``test``, ``verify``) halt with
21
+ ``@agent-directive:`` markers so the orchestrator can invoke the
22
+ matching skill and resume. ``report`` renders the delivery Markdown
23
+ once everything else has succeeded. See
24
+ ``agents/roadmaps/road-to-implement-ticket.md`` for the shipping
25
+ order and ``agents/contexts/implement-ticket-flow.md`` for the
26
+ slice contracts each handler writes to.
27
+ """
28
+ from __future__ import annotations
29
+
30
+ from collections.abc import Mapping
31
+
32
+ from ...delivery_state import Step
33
+ from . import analyze, implement, memory, plan, refine, report, test, verify
34
+
35
+ DIRECTIVE_SET_NAME = "backend"
36
+ """External name carried in ``state.directive_set`` for this set."""
37
+
38
+ SUPPORTED_KINDS: tuple[str, ...] = ("ticket", "prompt")
39
+ """Input kinds this directive set knows how to handle.
40
+
41
+ Read by :func:`work_engine.dispatcher.assert_kind_supported` before the
42
+ loop starts. The schema's :data:`work_engine.state.KNOWN_INPUT_KINDS` is
43
+ the *envelope* whitelist (what is accepted on disk); ``SUPPORTED_KINDS``
44
+ is the *capability* whitelist (what this set can actually drive end to
45
+ end).
46
+
47
+ R1 only carried ``ticket``. R2 Phase 3 Step 3 added ``prompt`` once the
48
+ ``refine`` step learned to detect prompt envelopes (presence of ``raw``
49
+ key in ``state.ticket``), delegate to the ``refine-prompt`` skill on
50
+ the first pass, and route the resulting confidence band into
51
+ ``SUCCESS`` / ``PARTIAL`` / ``BLOCKED`` per
52
+ ``agents/roadmaps/archive/road-to-prompt-driven-execution.md`` Phase 3."""
53
+
54
+ _STEPS = (refine, memory, analyze, plan, implement, test, verify, report)
55
+
56
+
57
+ def all_ambiguities() -> dict[str, tuple[dict[str, str], ...]]:
58
+ """Return `{step_name: AMBIGUITIES}` for every step in flow order.
59
+
60
+ Used by documentation generators and the ``test_ambiguity_coverage``
61
+ suite to prove every step explicitly declares what can surface a
62
+ ``BLOCKED`` outcome. Steps that always succeed (``memory``,
63
+ ``report``) return an empty tuple — declared intent, not an
64
+ omission.
65
+ """
66
+ return {step.__name__.rsplit(".", 1)[-1]: step.AMBIGUITIES for step in _STEPS}
67
+
68
+
69
+ def get_steps() -> Mapping[str, Step]:
70
+ """Return the ``{step_name: handler}`` mapping the dispatcher walks.
71
+
72
+ Each value is the module-level ``run`` callable matching the
73
+ :data:`work_engine.delivery_state.Step` protocol —
74
+ ``Callable[[DeliveryState], StepResult]`` — exactly what
75
+ :func:`work_engine.dispatcher.dispatch` calls. Order of insertion
76
+ matches the canonical backend flow (refine → memory → analyze →
77
+ plan → implement → test → verify → report); the dispatcher's own
78
+ ``STEP_ORDER`` is the single source of truth for *which* steps
79
+ exist, but the directive set is the single source of truth for
80
+ *how* each one runs.
81
+ """
82
+ return {step.__name__.rsplit(".", 1)[-1]: step.run for step in _STEPS}
83
+
84
+
85
+ __all__ = [
86
+ "DIRECTIVE_SET_NAME",
87
+ "SUPPORTED_KINDS",
88
+ "all_ambiguities",
89
+ "analyze",
90
+ "get_steps",
91
+ "implement",
92
+ "memory",
93
+ "plan",
94
+ "refine",
95
+ "report",
96
+ "test",
97
+ "verify",
98
+ ]
@@ -20,7 +20,7 @@ rule. Otherwise it returns ``SUCCESS`` without mutating state.
20
20
  """
21
21
  from __future__ import annotations
22
22
 
23
- from ..delivery_state import DeliveryState, Outcome, StepResult
23
+ from ...delivery_state import DeliveryState, Outcome, StepResult
24
24
 
25
25
  AMBIGUITIES: tuple[dict[str, str], ...] = (
26
26
  {
@@ -25,13 +25,13 @@ from __future__ import annotations
25
25
 
26
26
  from typing import Any
27
27
 
28
- from ..delivery_state import (
28
+ from ...delivery_state import (
29
29
  DeliveryState,
30
30
  Outcome,
31
31
  StepResult,
32
32
  agent_directive,
33
33
  )
34
- from ..persona_policy import resolve_policy
34
+ from ...persona_policy import resolve_policy
35
35
 
36
36
  AMBIGUITIES: tuple[dict[str, str], ...] = (
37
37
  {
@@ -23,7 +23,7 @@ from __future__ import annotations
23
23
  import re
24
24
  from typing import Any, Iterable
25
25
 
26
- from ..delivery_state import DeliveryState, Outcome, StepResult
26
+ from ...delivery_state import DeliveryState, Outcome, StepResult
27
27
 
28
28
  MEMORY_TYPES: tuple[str, ...] = (
29
29
  "domain-invariants",
@@ -28,7 +28,7 @@ from __future__ import annotations
28
28
 
29
29
  from typing import Any
30
30
 
31
- from ..delivery_state import (
31
+ from ...delivery_state import (
32
32
  DeliveryState,
33
33
  Outcome,
34
34
  StepResult,
@@ -0,0 +1,396 @@
1
+ """``refine`` step — deterministic gate in front of the refinement skills.
2
+
3
+ The step never calls an LLM. It inspects ``state.ticket`` (which carries
4
+ ``input.data`` after the CLI projection) and routes on shape:
5
+
6
+ - **Ticket envelope** (``id``, ``title``, ``acceptance_criteria``) — the
7
+ R1 path. Validates the minimum viable shape and either returns
8
+ ``SUCCESS`` or ``BLOCKED`` with numbered options pointing at
9
+ ``/refine-ticket``.
10
+ - **Prompt envelope** (``raw`` key present, ``reconstructed_ac`` /
11
+ ``assumptions`` slots) — the R2 path. On the first pass the gate
12
+ delegates to the ``refine-prompt`` skill via an ``@agent-directive:``
13
+ halt; on the rebound it scores the reconstructed envelope and routes
14
+ the resulting confidence band:
15
+
16
+ - ``high`` → ``SUCCESS`` (silent proceed, breakdown logged for the report)
17
+ - ``medium`` → ``PARTIAL`` with an assumptions-report halt
18
+ - ``low`` → ``BLOCKED`` with one clarifying question targeted at the
19
+ weakest dimension
20
+
21
+ The checks live here (rather than inside the refinement skills) because
22
+ the dispatcher is synchronous Python: it cannot "delegate" to an agent
23
+ skill mid-loop. Making the gate deterministic keeps the contract "block
24
+ on ambiguity, never guess" enforceable from code, and ensures the band
25
+ the dispatcher routes on is always engine-computed — the skill produces
26
+ AC + assumptions, the engine decides.
27
+ """
28
+ from __future__ import annotations
29
+
30
+ from typing import Any
31
+
32
+ from ...delivery_state import (
33
+ DeliveryState,
34
+ Outcome,
35
+ StepResult,
36
+ agent_directive,
37
+ )
38
+ from ...scoring import confidence as _confidence
39
+
40
+ _MIN_TITLE_LEN = 3
41
+ _MIN_AC_LEN = 10
42
+
43
+ AMBIGUITIES: tuple[dict[str, str], ...] = (
44
+ {
45
+ "code": "missing_id",
46
+ "trigger": "ticket has no `id` field (or only whitespace)",
47
+ "resolution": "run `/refine-ticket` or paste the ticket id in chat",
48
+ },
49
+ {
50
+ "code": "trivial_title",
51
+ "trigger": f"title missing or shorter than {_MIN_TITLE_LEN} chars",
52
+ "resolution": "run `/refine-ticket` to rewrite the title",
53
+ },
54
+ {
55
+ "code": "missing_or_vague_ac",
56
+ "trigger": (
57
+ f"acceptance_criteria empty, non-list, or any item under "
58
+ f"{_MIN_AC_LEN} chars"
59
+ ),
60
+ "resolution": "run `/refine-ticket` to add concrete acceptance criteria",
61
+ },
62
+ {
63
+ "code": "prompt_unrefined",
64
+ "trigger": (
65
+ "prompt envelope present but `reconstructed_ac` is empty — "
66
+ "the deterministic gate has nothing to score yet"
67
+ ),
68
+ "resolution": "agent directive `refine-prompt` → run the skill, "
69
+ "write AC + assumptions back into `state.ticket`",
70
+ },
71
+ {
72
+ "code": "prompt_medium_confidence",
73
+ "trigger": (
74
+ "scored band is `medium` and the user has not confirmed the "
75
+ "assumptions report yet"
76
+ ),
77
+ "resolution": "user confirms the reconstructed AC + assumptions, "
78
+ "or refines them; agent flips `confidence_confirmed=True` to "
79
+ "release the gate",
80
+ },
81
+ {
82
+ "code": "prompt_low_confidence",
83
+ "trigger": (
84
+ "scored band is `low` — too little signal to plan against, "
85
+ "even after reconstruction"
86
+ ),
87
+ "resolution": "user answers one clarifying question; the agent "
88
+ "re-runs `refine-prompt` against the refreshed prompt",
89
+ },
90
+ {
91
+ "code": "prompt_ui_intent",
92
+ "trigger": (
93
+ "scorer flagged `ui_intent=True` — the prompt reads as UI "
94
+ "work and the backend track cannot ship it cleanly"
95
+ ),
96
+ "resolution": "user re-frames the prompt as backend-only, parks "
97
+ "it for Roadmap 3 (`road-to-product-ui-track.md`), or aborts",
98
+ },
99
+ )
100
+ """Declared ambiguity surfaces. Every BLOCKED / PARTIAL return maps to one code."""
101
+
102
+
103
+ def run(state: DeliveryState) -> StepResult:
104
+ """Route on envelope shape: ticket path or prompt path."""
105
+ data = state.ticket or {}
106
+ if _is_prompt_envelope(data):
107
+ return _run_prompt(state, data)
108
+
109
+ deficiencies = _diagnose(data)
110
+ if not deficiencies:
111
+ return StepResult(outcome=Outcome.SUCCESS)
112
+
113
+ ticket_id = data.get("id") or "(no id)"
114
+ questions = _format_questions(ticket_id, deficiencies)
115
+ return StepResult(
116
+ outcome=Outcome.BLOCKED,
117
+ questions=questions,
118
+ message=(
119
+ f"Ticket {ticket_id} is not refined enough to plan against: "
120
+ + "; ".join(deficiencies)
121
+ ),
122
+ )
123
+
124
+
125
+ def _is_prompt_envelope(data: dict[str, Any]) -> bool:
126
+ """True when ``state.ticket`` carries a prompt envelope.
127
+
128
+ The presence of a string-valued ``raw`` key is unambiguous: ticket
129
+ payloads never carry ``raw``, and prompt envelopes always do (the
130
+ resolver writes it before any handler sees the state).
131
+ """
132
+ if not isinstance(data, dict):
133
+ return False
134
+ raw = data.get("raw")
135
+ return isinstance(raw, str) and bool(raw.strip())
136
+
137
+
138
+ def _diagnose(ticket: dict[str, Any]) -> list[str]:
139
+ """Return a human-readable list of what's missing from the ticket.
140
+
141
+ Order matches what a reader needs first (identity → summary →
142
+ acceptance criteria) so the surfaced questions read naturally.
143
+ """
144
+ issues: list[str] = []
145
+
146
+ ticket_id = ticket.get("id")
147
+ if not isinstance(ticket_id, str) or not ticket_id.strip():
148
+ issues.append("missing ticket id")
149
+
150
+ title = ticket.get("title")
151
+ if not isinstance(title, str) or len(title.strip()) < _MIN_TITLE_LEN:
152
+ issues.append("missing or trivial title")
153
+
154
+ ac = ticket.get("acceptance_criteria")
155
+ if not isinstance(ac, list) or not ac:
156
+ issues.append("no acceptance criteria")
157
+ else:
158
+ weak_indices = [
159
+ idx + 1
160
+ for idx, item in enumerate(ac)
161
+ if not _is_concrete_ac(item)
162
+ ]
163
+ if weak_indices:
164
+ issues.append(
165
+ "vague acceptance criteria at position(s) "
166
+ + ", ".join(str(i) for i in weak_indices),
167
+ )
168
+
169
+ return issues
170
+
171
+
172
+ def _is_concrete_ac(item: Any) -> bool:
173
+ """An AC is concrete when it is a non-empty string above the length floor.
174
+
175
+ The floor is deliberately loose: refine is a gate, not a style
176
+ judge. The heavy lifting (measurability, testability, tone) is
177
+ owned by the ``refine-ticket`` skill on the rebound.
178
+ """
179
+ if not isinstance(item, str):
180
+ return False
181
+ return len(item.strip()) >= _MIN_AC_LEN
182
+
183
+
184
+ def _format_questions(ticket_id: str, deficiencies: list[str]) -> list[str]:
185
+ """Render the numbered options shown to the user when BLOCKED.
186
+
187
+ Three options, ordered by likely next action: run the existing
188
+ refinement skill, paste the missing data in chat, or abandon the
189
+ ticket entirely. ``user-interaction`` requires numbered, prose-
190
+ free options; the deficiency list is rendered as a headnote.
191
+ """
192
+ headnote = (
193
+ "> Ticket "
194
+ + ticket_id
195
+ + " is missing: "
196
+ + "; ".join(deficiencies)
197
+ + "."
198
+ )
199
+ return [
200
+ headnote,
201
+ f"> 1. Run `/refine-ticket {ticket_id}` and re-invoke `/implement-ticket`",
202
+ "> 2. Provide the missing details in chat — I'll merge them into the ticket",
203
+ "> 3. Abandon this ticket — too vague to implement",
204
+ ]
205
+
206
+
207
+ def _run_prompt(state: DeliveryState, data: dict[str, Any]) -> StepResult:
208
+ """Score the prompt envelope and route on the resulting band.
209
+
210
+ First pass (no AC reconstructed yet) → delegate to ``refine-prompt``.
211
+ Second pass → score and branch:
212
+
213
+ - ``high`` → ``SUCCESS``; the breakdown is recorded on
214
+ ``state.ticket['confidence']`` so the report renderer can include
215
+ it without re-scoring.
216
+ - ``medium`` → ``PARTIAL`` with an assumptions-report halt unless
217
+ the agent has flipped ``confidence_confirmed=True`` after the
218
+ user signed off. ``low`` band can never be released this way.
219
+ - ``low`` → ``BLOCKED`` with one clarifying question targeted at
220
+ the weakest dimension (lowest score wins; ties prefer the order
221
+ declared in :data:`work_engine.scoring.confidence.DIMENSION_NAMES`).
222
+ """
223
+ raw = data.get("raw") or ""
224
+ ac = data.get("reconstructed_ac")
225
+ if not isinstance(ac, list):
226
+ ac = []
227
+ assumptions = data.get("assumptions")
228
+ if not isinstance(assumptions, list):
229
+ assumptions = []
230
+
231
+ if not ac:
232
+ return _delegate_to_refine_prompt(raw)
233
+
234
+ result = _confidence.score(raw=raw, ac=ac, assumptions=assumptions)
235
+ data["confidence"] = {
236
+ "band": result.band,
237
+ "score": result.score,
238
+ "dimensions": dict(result.dimensions),
239
+ "reasons": list(result.reasons),
240
+ "ui_intent": result.ui_intent,
241
+ }
242
+ # Mirror reconstructed AC into the legacy slot every downstream gate
243
+ # (analyze, plan, implement) reads. Prompt envelopes carry AC under
244
+ # ``reconstructed_ac``; without this projection ``analyze`` blocks with
245
+ # "ticket lost its acceptance criteria" the moment ``refine`` succeeds.
246
+ data["acceptance_criteria"] = list(ac)
247
+
248
+ if result.ui_intent:
249
+ return _halt_ui_intent(raw, result)
250
+
251
+ if result.band == "high":
252
+ return StepResult(outcome=Outcome.SUCCESS)
253
+
254
+ if result.band == "medium":
255
+ if data.get("confidence_confirmed") is True:
256
+ return StepResult(outcome=Outcome.SUCCESS)
257
+ return _halt_medium(raw, ac, assumptions, result)
258
+
259
+ return _halt_low(raw, result)
260
+
261
+
262
+ def _delegate_to_refine_prompt(raw: str) -> StepResult:
263
+ """Halt with an agent directive so the orchestrator runs ``refine-prompt``."""
264
+ preview = _preview(raw)
265
+ return StepResult(
266
+ outcome=Outcome.BLOCKED,
267
+ questions=[
268
+ agent_directive("refine-prompt"),
269
+ f"> Prompt received: {preview}",
270
+ "> No reconstructed acceptance criteria yet — running "
271
+ "`refine-prompt` and resuming.",
272
+ "> 1. Continue — let the skill reconstruct AC + assumptions",
273
+ "> 2. Abort — the prompt is not what I meant",
274
+ ],
275
+ message="Prompt envelope present but unrefined; delegating to refine-prompt.",
276
+ )
277
+
278
+
279
+ def _halt_medium(
280
+ raw: str,
281
+ ac: list[Any],
282
+ assumptions: list[Any],
283
+ result: "_confidence.ConfidenceScore",
284
+ ) -> StepResult:
285
+ """PARTIAL halt — assumptions report, one user round-trip."""
286
+ preview = _preview(raw)
287
+ ac_lines = [f"> {idx}. {item}" for idx, item in enumerate(ac, start=1)]
288
+ asm_lines = [f"> - {item}" for item in assumptions] or [
289
+ "> - (none recorded)",
290
+ ]
291
+ questions = [
292
+ f"> Prompt: {preview}",
293
+ f"> Confidence: **medium** (score {result.score:.2f}). "
294
+ "Assumptions worth confirming before I plan.",
295
+ "> Reconstructed AC:",
296
+ *ac_lines,
297
+ "> Assumptions:",
298
+ *asm_lines,
299
+ "> 1. Continue as-is — the AC + assumptions are good enough",
300
+ "> 2. Refine — I'll send a corrected prompt and re-run "
301
+ "`refine-prompt`",
302
+ "> 3. Abort — pause this `/work` cycle",
303
+ ]
304
+ return StepResult(
305
+ outcome=Outcome.PARTIAL,
306
+ questions=questions,
307
+ message=(
308
+ f"Prompt scored medium ({result.score:.2f}); "
309
+ "halting for assumptions confirmation."
310
+ ),
311
+ )
312
+
313
+
314
+ def _halt_low(raw: str, result: "_confidence.ConfidenceScore") -> StepResult:
315
+ """BLOCKED halt — one targeted question on the weakest dimension."""
316
+ preview = _preview(raw)
317
+ weakest_idx, weakest_name = _weakest_dimension(result.dimensions)
318
+ reason = result.reasons[weakest_idx] if weakest_idx < len(result.reasons) else ""
319
+ prompts = {
320
+ "goal_clarity": "What is the single observable outcome you want?",
321
+ "scope_boundary": "Which file, class, or module should I touch?",
322
+ "ac_evidence": "What concrete behaviour proves it works?",
323
+ "stack_data": "Which table, column, or migration target is involved?",
324
+ "reversibility": "Is this change destructive — should I work behind a flag?",
325
+ }
326
+ question = prompts.get(weakest_name, "Can you tighten the prompt?")
327
+ return StepResult(
328
+ outcome=Outcome.BLOCKED,
329
+ questions=[
330
+ f"> Prompt: {preview}",
331
+ f"> Confidence: **low** (score {result.score:.2f}). "
332
+ f"Weakest dimension: `{weakest_name}` — {reason}",
333
+ f"> {question}",
334
+ "> 1. I'll answer — paste the answer in chat and re-invoke `/work`",
335
+ "> 2. Abort — drop this prompt",
336
+ ],
337
+ message=(
338
+ f"Prompt scored low ({result.score:.2f}); blocking on "
339
+ f"`{weakest_name}` clarification."
340
+ ),
341
+ )
342
+
343
+
344
+ def _halt_ui_intent(
345
+ raw: str, result: "_confidence.ConfidenceScore",
346
+ ) -> StepResult:
347
+ """BLOCKED halt — UI-shaped prompts await the R3 dispatch track.
348
+
349
+ The backend `directives/backend/` set has no UI capability; routing a
350
+ UI prompt through it would either ship a backend stub or guess at a
351
+ component. Both are worse than a clean refusal with a pointer to the
352
+ deferred R3 track. The halt is band-independent — even a high-band
353
+ UI prompt blocks here, because confidence on the *reconstruction* says
354
+ nothing about whether the *dispatcher* can deliver it.
355
+ """
356
+ preview = _preview(raw)
357
+ return StepResult(
358
+ outcome=Outcome.BLOCKED,
359
+ questions=[
360
+ f"> Prompt: {preview}",
361
+ "> This prompt reads as **UI work** — the backend dispatch "
362
+ "track can't ship it cleanly.",
363
+ "> UI dispatch is deferred to Roadmap 3 "
364
+ "(`road-to-product-ui-track.md`); until it lands, `/work` "
365
+ "only handles backend-shaped prompts.",
366
+ "> 1. Re-frame as a backend-only prompt — I'll re-score and proceed",
367
+ "> 2. Park this prompt — wait for R3 and re-invoke `/work` then",
368
+ "> 3. Abort — drop this prompt",
369
+ ],
370
+ message=(
371
+ f"Prompt flagged as UI-intent (band={result.band}, "
372
+ f"score={result.score:.2f}); blocked pending R3 UI track."
373
+ ),
374
+ )
375
+
376
+
377
+ def _weakest_dimension(dimensions: dict[str, int]) -> tuple[int, str]:
378
+ """Return ``(index, name)`` of the lowest-scoring dimension.
379
+
380
+ Ties are broken by :data:`_confidence.DIMENSION_NAMES` order so the
381
+ same input always produces the same question (replay determinism).
382
+ """
383
+ ordered = list(_confidence.DIMENSION_NAMES)
384
+ weakest_name = min(ordered, key=lambda n: (dimensions.get(n, 0), ordered.index(n)))
385
+ return ordered.index(weakest_name), weakest_name
386
+
387
+
388
+ def _preview(raw: str, max_chars: int = 80) -> str:
389
+ """Trim a raw prompt for inline display in halts."""
390
+ text = " ".join((raw or "").split())
391
+ if len(text) <= max_chars:
392
+ return text
393
+ return text[: max_chars - 1].rstrip() + "\u2026"
394
+
395
+
396
+ __all__ = ["AMBIGUITIES", "run"]
@@ -17,8 +17,8 @@ from __future__ import annotations
17
17
 
18
18
  from typing import Any, Iterable
19
19
 
20
- from ..delivery_state import DeliveryState, Outcome, StepResult
21
- from ..persona_policy import resolve_policy
20
+ from ...delivery_state import DeliveryState, Outcome, StepResult
21
+ from ...persona_policy import resolve_policy
22
22
 
23
23
  AMBIGUITIES: tuple[dict[str, str], ...] = ()
24
24
  """Report rendering is pure and always succeeds — no blocked paths."""
@@ -38,12 +38,13 @@ def _render(state: DeliveryState) -> str:
38
38
  _changes_section(state),
39
39
  _tests_section(state),
40
40
  _verify_section(state),
41
+ _visual_preview_section(state),
41
42
  _memory_section(state),
42
43
  _followups_section(state),
43
44
  _next_commands_section(state),
44
45
  ]
45
- # Drop sections that opted out (memory-that-mattered returns "" when no
46
- # hit influenced an outcome — per the report schema drop-rule).
46
+ # Drop sections that opted out (memory-that-mattered and visual-preview
47
+ # return "" when their slice is absent — per the report schema drop-rule).
47
48
  return "\n\n".join(section for section in sections if section)
48
49
 
49
50
 
@@ -117,6 +118,37 @@ def _verify_section(state: DeliveryState) -> str:
117
118
  return "## Verify\n\n" + _format_kv_block(state.verify, "_(no verify verdict)_")
118
119
 
119
120
 
121
+ def _visual_preview_section(state: DeliveryState) -> str:
122
+ """R4 Phase 3: render captured preview artifacts when the skill rendered.
123
+
124
+ Reads ``state.ui_review.preview`` (engine never renders — the
125
+ stack-specific review skill writes the envelope). Emits a section
126
+ only when ``render_ok`` is ``True`` AND at least one artifact path
127
+ is present. Failed renders, skipped previews, and pre-R4 envelopes
128
+ drop the whole section (heading included).
129
+ """
130
+ ui_review = getattr(state, "ui_review", None)
131
+ if not isinstance(ui_review, dict):
132
+ return ""
133
+ preview = ui_review.get("preview")
134
+ if not isinstance(preview, dict):
135
+ return ""
136
+ if preview.get("render_ok") is not True:
137
+ return ""
138
+ if preview.get("skipped"):
139
+ return ""
140
+ screenshot = preview.get("screenshot_path")
141
+ dom_dump = preview.get("dom_dump_path")
142
+ lines: list[str] = []
143
+ if isinstance(screenshot, str) and screenshot:
144
+ lines.append(f"- Screenshot: `{screenshot}`")
145
+ if isinstance(dom_dump, str) and dom_dump:
146
+ lines.append(f"- DOM dump: `{dom_dump}`")
147
+ if not lines:
148
+ return ""
149
+ return "\n".join(["## Visual preview", "", *lines])
150
+
151
+
120
152
  def _memory_section(state: DeliveryState) -> str:
121
153
  """Render **only** hits that changed an outcome (per report schema)."""
122
154
  influential = [
@@ -22,13 +22,13 @@ from __future__ import annotations
22
22
 
23
23
  from typing import Any
24
24
 
25
- from ..delivery_state import (
25
+ from ...delivery_state import (
26
26
  DeliveryState,
27
27
  Outcome,
28
28
  StepResult,
29
29
  agent_directive,
30
30
  )
31
- from ..persona_policy import resolve_policy
31
+ from ...persona_policy import resolve_policy
32
32
 
33
33
  _ALLOWED_VERDICTS = ("success", "failed", "mixed")
34
34
 
@@ -22,13 +22,13 @@ from __future__ import annotations
22
22
 
23
23
  from typing import Any
24
24
 
25
- from ..delivery_state import (
25
+ from ...delivery_state import (
26
26
  DeliveryState,
27
27
  Outcome,
28
28
  StepResult,
29
29
  agent_directive,
30
30
  )
31
- from ..persona_policy import resolve_policy
31
+ from ...persona_policy import resolve_policy
32
32
 
33
33
  _ALLOWED_VERDICTS = ("success", "blocked", "partial")
34
34