@seanyao/roll 0.5.0 → 2.602.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/CHANGELOG.md +736 -0
  2. package/LICENSE +21 -0
  3. package/README.md +65 -165
  4. package/bin/dream-test-quality-scan +110 -0
  5. package/bin/roll +15030 -814
  6. package/conventions/config.yaml +17 -1
  7. package/conventions/global/AGENTS.md +146 -100
  8. package/conventions/global/CLAUDE.md +1 -21
  9. package/conventions/global/GEMINI.md +8 -22
  10. package/conventions/global/project_rules.md +9 -0
  11. package/conventions/templates/backend-service/AGENTS.md +30 -81
  12. package/conventions/templates/backend-service/GEMINI.md +3 -3
  13. package/conventions/templates/backend-service/project_rules.md +16 -0
  14. package/conventions/templates/cli/AGENTS.md +31 -58
  15. package/conventions/templates/cli/CLAUDE.md +3 -5
  16. package/conventions/templates/cli/GEMINI.md +3 -3
  17. package/conventions/templates/cli/project_rules.md +16 -0
  18. package/conventions/templates/frontend-only/AGENTS.md +29 -64
  19. package/conventions/templates/frontend-only/GEMINI.md +3 -3
  20. package/conventions/templates/frontend-only/project_rules.md +14 -0
  21. package/conventions/templates/fullstack/AGENTS.md +31 -79
  22. package/conventions/templates/fullstack/CLAUDE.md +1 -1
  23. package/conventions/templates/fullstack/GEMINI.md +3 -3
  24. package/conventions/templates/fullstack/project_rules.md +15 -0
  25. package/lib/README.md +42 -0
  26. package/lib/__pycache__/github_sync.cpython-314.pyc +0 -0
  27. package/lib/__pycache__/loop-fmt.cpython-314.pyc +0 -0
  28. package/lib/__pycache__/loop_result_eval.cpython-314.pyc +0 -0
  29. package/lib/__pycache__/loop_unstick.cpython-314.pyc +0 -0
  30. package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
  31. package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
  32. package/lib/__pycache__/roll-home.cpython-314.pyc +0 -0
  33. package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
  34. package/lib/__pycache__/roll_git.cpython-314.pyc +0 -0
  35. package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
  36. package/lib/__pycache__/slides-render.cpython-314.pyc +0 -0
  37. package/lib/agent_usage/README.md +49 -0
  38. package/lib/agent_usage/__init__.py +108 -0
  39. package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
  40. package/lib/agent_usage/__pycache__/gemini.cpython-314.pyc +0 -0
  41. package/lib/agent_usage/__pycache__/kimi.cpython-314.pyc +0 -0
  42. package/lib/agent_usage/__pycache__/openai.cpython-314.pyc +0 -0
  43. package/lib/agent_usage/__pycache__/pi.cpython-314.pyc +0 -0
  44. package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc +0 -0
  45. package/lib/agent_usage/__pycache__/qwen.cpython-314.pyc +0 -0
  46. package/lib/agent_usage/gemini.py +127 -0
  47. package/lib/agent_usage/kimi.py +278 -0
  48. package/lib/agent_usage/kimi_emit.py +123 -0
  49. package/lib/agent_usage/openai.py +126 -0
  50. package/lib/agent_usage/pi.py +200 -0
  51. package/lib/agent_usage/pi_emit.py +135 -0
  52. package/lib/agent_usage/qwen.py +128 -0
  53. package/lib/backfill-pi-usage.py +243 -0
  54. package/lib/changelog_audit.py +155 -0
  55. package/lib/changelog_generate.py +263 -0
  56. package/lib/context_feed_budget.sh +194 -0
  57. package/lib/github_sync.py +876 -0
  58. package/lib/i18n/README.md +54 -0
  59. package/lib/i18n/agent.sh +75 -0
  60. package/lib/i18n/alert.sh +20 -0
  61. package/lib/i18n/backlog.sh +96 -0
  62. package/lib/i18n/brief.sh +5 -0
  63. package/lib/i18n/changelog.sh +5 -0
  64. package/lib/i18n/ci.sh +15 -0
  65. package/lib/i18n/debug.sh +0 -0
  66. package/lib/i18n/doctor.sh +44 -0
  67. package/lib/i18n/dream.sh +0 -0
  68. package/lib/i18n/init.sh +91 -0
  69. package/lib/i18n/lang.sh +10 -0
  70. package/lib/i18n/loop.sh +140 -0
  71. package/lib/i18n/migrate.sh +74 -0
  72. package/lib/i18n/offboard.sh +31 -0
  73. package/lib/i18n/onboard.sh +0 -0
  74. package/lib/i18n/peer.sh +41 -0
  75. package/lib/i18n/peer_help.sh +25 -0
  76. package/lib/i18n/peer_reset.sh +7 -0
  77. package/lib/i18n/peer_status.sh +5 -0
  78. package/lib/i18n/prices.sh +3 -0
  79. package/lib/i18n/prices_refresh.sh +17 -0
  80. package/lib/i18n/prices_show.sh +7 -0
  81. package/lib/i18n/propose.sh +0 -0
  82. package/lib/i18n/release.sh +0 -0
  83. package/lib/i18n/research.sh +0 -0
  84. package/lib/i18n/review_pr.sh +0 -0
  85. package/lib/i18n/sentinel.sh +0 -0
  86. package/lib/i18n/setup.sh +3 -0
  87. package/lib/i18n/shared.sh +157 -0
  88. package/lib/i18n/skills/roll-brief.sh +47 -0
  89. package/lib/i18n/skills/roll-build.sh +97 -0
  90. package/lib/i18n/skills/roll-design.sh +18 -0
  91. package/lib/i18n/skills/roll-fix.sh +53 -0
  92. package/lib/i18n/skills/roll-loop.sh +28 -0
  93. package/lib/i18n/skills/roll-onboard.sh +33 -0
  94. package/lib/i18n/skills_catalog.sh +30 -0
  95. package/lib/i18n/slides.sh +3 -0
  96. package/lib/i18n/slides_build.sh +38 -0
  97. package/lib/i18n/slides_delete.sh +19 -0
  98. package/lib/i18n/slides_list.sh +14 -0
  99. package/lib/i18n/slides_logs.sh +12 -0
  100. package/lib/i18n/slides_new.sh +15 -0
  101. package/lib/i18n/slides_preview.sh +14 -0
  102. package/lib/i18n/slides_templates.sh +7 -0
  103. package/lib/i18n/status.sh +21 -0
  104. package/lib/i18n/update.sh +24 -0
  105. package/lib/i18n.sh +211 -0
  106. package/lib/loop-exit-summary.py +393 -0
  107. package/lib/loop-fmt.py +589 -0
  108. package/lib/loop_pick_agent.py +316 -0
  109. package/lib/loop_result_eval.py +469 -0
  110. package/lib/loop_unstick.py +180 -0
  111. package/lib/model_prices.py +194 -0
  112. package/lib/prices/README.md +35 -0
  113. package/lib/prices/snapshot-2026-05-22.json +22 -0
  114. package/lib/prices/snapshot-2026-05-23-deepseek.json +15 -0
  115. package/lib/prices/snapshot-2026-05-23-kimi.json +15 -0
  116. package/lib/prices_fetcher.py +285 -0
  117. package/lib/roll-backlog.py +225 -0
  118. package/lib/roll-brief.py +286 -0
  119. package/lib/roll-help.py +158 -0
  120. package/lib/roll-home.py +556 -0
  121. package/lib/roll-init.py +156 -0
  122. package/lib/roll-loop-status.py +1683 -0
  123. package/lib/roll-loop-story.py +191 -0
  124. package/lib/roll-onboard-render.py +378 -0
  125. package/lib/roll-peer.py +252 -0
  126. package/lib/roll-plan-validate.py +386 -0
  127. package/lib/roll-setup.py +102 -0
  128. package/lib/roll-status.py +367 -0
  129. package/lib/roll_git.py +41 -0
  130. package/lib/roll_render.py +414 -0
  131. package/lib/slides/components/README.md +123 -0
  132. package/lib/slides/components/cards-2.html +9 -0
  133. package/lib/slides/components/cards-3.html +9 -0
  134. package/lib/slides/components/cards-4.html +9 -0
  135. package/lib/slides/components/compare.html +22 -0
  136. package/lib/slides/components/highlight.html +9 -0
  137. package/lib/slides/components/pipeline.html +12 -0
  138. package/lib/slides/components/plain.html +7 -0
  139. package/lib/slides/components/quote.html +4 -0
  140. package/lib/slides/components/timeline.html +9 -0
  141. package/lib/slides/templates/introduction-v3.html +571 -0
  142. package/lib/slides/templates/pitch.html +0 -0
  143. package/lib/slides-render.py +778 -0
  144. package/lib/slides-validate.py +357 -0
  145. package/lib/test_quality_gate.py +143 -0
  146. package/package.json +8 -7
  147. package/skills/roll-.changelog/SKILL.md +406 -33
  148. package/skills/roll-.clarify/SKILL.md +5 -2
  149. package/skills/roll-.dream/SKILL.md +374 -0
  150. package/skills/roll-.echo/SKILL.md +5 -2
  151. package/skills/roll-.qa/SKILL.md +57 -3
  152. package/skills/roll-.review/SKILL.md +42 -3
  153. package/skills/roll-brief/SKILL.md +209 -0
  154. package/skills/roll-build/SKILL.md +308 -63
  155. package/skills/roll-debug/SKILL.md +341 -162
  156. package/skills/roll-debug/injectable-bb.js +263 -0
  157. package/skills/roll-deck/SKILL.md +296 -0
  158. package/skills/roll-design/ENGINEERING_CHECKLIST.md +1 -1
  159. package/skills/roll-design/SKILL.md +733 -94
  160. package/skills/roll-doc/SKILL.md +595 -0
  161. package/skills/roll-doctor/SKILL.md +192 -0
  162. package/skills/roll-fix/SKILL.md +149 -32
  163. package/skills/{roll-jot → roll-idea}/SKILL.md +18 -10
  164. package/skills/roll-loop/SKILL.md +579 -0
  165. package/skills/roll-notes/SKILL.md +103 -0
  166. package/skills/roll-onboard/SKILL.md +234 -0
  167. package/skills/roll-peer/SKILL.md +336 -0
  168. package/skills/roll-propose/SKILL.md +157 -0
  169. package/skills/roll-review-pr/SKILL.md +58 -0
  170. package/skills/roll-sentinel/SKILL.md +11 -2
  171. package/skills/roll-spar/SKILL.md +8 -6
  172. package/template/.github/workflows/ci.yml +5 -2
  173. package/template/AGENTS.md +20 -74
  174. package/skills/roll-research/SKILL.md +0 -307
  175. package/skills/roll-research/references/schema.json +0 -162
  176. package/skills/roll-research/scripts/md_to_pdf.py +0 -289
  177. package/tools/roll-fetch/SKILL.md +0 -182
  178. package/tools/roll-fetch/package.json +0 -15
  179. package/tools/roll-fetch/smart-web-fetch.js +0 -558
  180. package/tools/roll-probe/SKILL.md +0 -84
  181. /package/template/{BACKLOG.md → .roll/backlog.md} +0 -0
@@ -0,0 +1,469 @@
1
+ #!/usr/bin/env python3
2
+ """Score one loop cycle's *result* against a multi-dimensional rubric (US-EVAL-001).
3
+
4
+ This is the pure-function ground floor of loop-result-eval. It defines the
5
+ rubric — which dimensions exist, how each maps a cycle's *facts* to a 0..1
6
+ score, and how the weighted dimensions roll up into a single 1..10 cycle
7
+ score — and nothing else. It does NOT collect facts, read runs.jsonl, or
8
+ talk to git/gh; that wiring lands in US-EVAL-002.
9
+
10
+ Distinct from skill self-scoring (US-SKILL-010..015): that is the agent's
11
+ *subjective* self-review of a single skill run, written by the agent into
12
+ ``.roll/notes/*.md``. This is an *objective* result eval, computed from cycle
13
+ facts with zero extra tokens, destined for the runs.jsonl ``result_eval`` block.
14
+
15
+ Dimensions (each scored on 0..1; see ``DIMENSIONS`` for weights):
16
+
17
+ outcome did the cycle actually merge into main?
18
+ 1.0 merged · 0.0 not merged · unknown if merge state absent
19
+ correctness is the produced PR's CI green?
20
+ 1.0 green · 0.0 red · unknown if no CI signal
21
+ scope_fidelity did the cycle complete the story it was routed to (vs
22
+ going idle, picking an already-Done story, or drifting)?
23
+ 1.0 completed · 0.0 idle / wrong / drifted
24
+ quality did the cycle add/adjust tests and avoid immediate rework?
25
+ 1.0 tcr_count>=1 and no follow-up rework FIX · 0.5 tests
26
+ but a rework FIX landed · 0.0 no test activity
27
+ efficiency duration vs the story's est_min budget.
28
+ 1.0 within budget · graded down past it · unknown if no
29
+ duration or no est_min to compare against
30
+ cleanliness no orphan worktrees/branches and no ALERTs raised.
31
+ 1.0 clean · 0.0 alerts or orphans present
32
+
33
+ Each dimension may evaluate to the sentinel ``UNKNOWN`` when its required
34
+ facts are absent (e.g. CI could not be fetched). Unknown dimensions are
35
+ *excluded* from the weighted sum and the weights of the remaining dimensions
36
+ are renormalised, so a missing fact never silently scores 0 (AC of US-EVAL-002).
37
+
38
+ The 1..10 cycle score is::
39
+
40
+ weighted = sum(score_i * weight_i for known dims) / sum(weight_i for known dims)
41
+ cycle_score = round(1 + weighted * 9) # 0.0 → 1, 1.0 → 10
42
+
43
+ ``result_eval`` schema (the block US-EVAL-002 writes into runs.jsonl)::
44
+
45
+ {
46
+ "version": 1,
47
+ "score": <int 1..10>,
48
+ "dims": { "<dim>": <float 0..1> | "unknown", ... }
49
+ }
50
+
51
+ Backward compatibility: older runs.jsonl records simply have no ``result_eval``
52
+ key; consumers must treat its absence as "not scored" rather than an error.
53
+
54
+ CLI (used by the bats unit test) — reads a JSON facts object from --facts or
55
+ stdin and prints the result_eval JSON::
56
+
57
+ loop_result_eval.py --facts '{"status":"merged","ci":"green",...}'
58
+ echo '{...}' | loop_result_eval.py
59
+
60
+ Exit codes:
61
+ 0 — scored
62
+ 1 — bad/unreadable facts JSON
63
+ """
64
+ from __future__ import annotations
65
+
66
+ import argparse
67
+ import json
68
+ import sys
69
+
70
+ # Sentinel for a dimension whose facts are unavailable this cycle. Distinct
71
+ # from a real 0.0 score (which means "measured, and bad").
72
+ UNKNOWN = "unknown"
73
+
74
+ SCHEMA_VERSION = 1
75
+
76
+ # Rubric: dimension name → weight. Centralised single source of truth —
77
+ # tunable here, but intentionally NOT a user-facing high-frequency knob.
78
+ # Weights are relative; they are renormalised over the known dimensions, so
79
+ # their absolute scale does not matter, only their ratio.
80
+ DIMENSIONS = (
81
+ ("outcome", 3.0), # merged into main is what ultimately matters
82
+ ("correctness", 2.0), # green CI on the produced PR
83
+ ("scope_fidelity", 2.0), # did the right, intended work
84
+ ("quality", 1.0), # tests added, no immediate rework
85
+ ("efficiency", 1.0), # within the story's time budget
86
+ ("cleanliness", 1.0), # no orphans / alerts
87
+ )
88
+
89
+ DIM_WEIGHTS = dict(DIMENSIONS)
90
+
91
+
92
+ def _truthy_merged(facts) -> bool:
93
+ """A cycle counts as merged when status==merged or merged flag is set."""
94
+ if str(facts.get("status", "")).strip().lower() == "merged":
95
+ return True
96
+ return bool(facts.get("merged"))
97
+
98
+
99
+ def _score_outcome(facts):
100
+ """1.0 merged · 0.0 not merged. Unknown only when there is no signal at
101
+ all (no status and no explicit merged flag)."""
102
+ if "merged" not in facts and not facts.get("status"):
103
+ return UNKNOWN
104
+ return 1.0 if _truthy_merged(facts) else 0.0
105
+
106
+
107
+ def _score_correctness(facts):
108
+ """CI verdict: green → 1.0, red/failing → 0.0, otherwise unknown."""
109
+ ci = facts.get("ci")
110
+ if ci is None or str(ci).strip() == "":
111
+ return UNKNOWN
112
+ ci = str(ci).strip().lower()
113
+ if ci in ("green", "pass", "passing", "success"):
114
+ return 1.0
115
+ if ci in ("red", "fail", "failing", "failure"):
116
+ return 0.0
117
+ return UNKNOWN
118
+
119
+
120
+ def _score_scope_fidelity(facts):
121
+ """Did the cycle complete the story it was routed to?
122
+
123
+ idle / no story picked → 0.0. A story routed but ending without a built
124
+ artefact (drifted / picked-already-Done) → 0.0. Routed and present in
125
+ built[] → 1.0.
126
+ """
127
+ status = str(facts.get("status", "")).strip().lower()
128
+ if status == "idle" or not facts.get("routed_story"):
129
+ return 0.0
130
+ built = facts.get("built") or []
131
+ routed = facts.get("routed_story")
132
+ if isinstance(built, list) and routed in built:
133
+ return 1.0
134
+ # Routed but nothing built for it → drifted / no-op.
135
+ return 0.0
136
+
137
+
138
+ def _score_quality(facts):
139
+ """Tests added/adjusted and no immediate rework.
140
+
141
+ tcr_count missing → unknown (no test signal). >=1 with no rework FIX →
142
+ 1.0; >=1 but a rework FIX landed → 0.5; 0 → 0.0.
143
+ """
144
+ tcr = facts.get("tcr_count")
145
+ if tcr is None:
146
+ return UNKNOWN
147
+ try:
148
+ tcr = int(tcr)
149
+ except (TypeError, ValueError):
150
+ return UNKNOWN
151
+ if tcr <= 0:
152
+ return 0.0
153
+ if facts.get("rework_fix"):
154
+ return 0.5
155
+ return 1.0
156
+
157
+
158
+ def _score_efficiency(facts):
159
+ """duration_sec vs est_min budget. Unknown when either is missing.
160
+
161
+ Within budget → 1.0. Over budget grades down linearly to a 0.2 floor at
162
+ 3x the budget (a cycle that blows way past est is bad but not zero).
163
+ """
164
+ duration_sec = facts.get("duration_sec")
165
+ est_min = facts.get("est_min")
166
+ if duration_sec is None or est_min is None:
167
+ return UNKNOWN
168
+ try:
169
+ duration_min = float(duration_sec) / 60.0
170
+ budget = float(est_min)
171
+ except (TypeError, ValueError):
172
+ return UNKNOWN
173
+ if budget <= 0:
174
+ return UNKNOWN
175
+ if duration_min <= budget:
176
+ return 1.0
177
+ overrun = duration_min / budget # >1
178
+ # 1x → 1.0, 3x → 0.2, clamped.
179
+ graded = 1.0 - (overrun - 1.0) * 0.4
180
+ return max(0.2, min(1.0, graded))
181
+
182
+
183
+ def _score_cleanliness(facts):
184
+ """No ALERTs and no orphan worktrees/branches → 1.0, else 0.0."""
185
+ alerts = facts.get("alerts") or []
186
+ orphans = facts.get("orphans") or []
187
+ if alerts or orphans:
188
+ return 0.0
189
+ return 1.0
190
+
191
+
192
+ _SCORERS = {
193
+ "outcome": _score_outcome,
194
+ "correctness": _score_correctness,
195
+ "scope_fidelity": _score_scope_fidelity,
196
+ "quality": _score_quality,
197
+ "efficiency": _score_efficiency,
198
+ "cleanliness": _score_cleanliness,
199
+ }
200
+
201
+
202
+ def score_dimensions(facts: dict) -> dict:
203
+ """Return {dim: float 0..1 | UNKNOWN} for every rubric dimension."""
204
+ facts = facts or {}
205
+ return {name: _SCORERS[name](facts) for name, _ in DIMENSIONS}
206
+
207
+
208
+ def aggregate(dims: dict) -> int:
209
+ """Roll the per-dimension scores up into a 1..10 cycle score.
210
+
211
+ Unknown dimensions are excluded and the remaining weights renormalised.
212
+ When every dimension is unknown, returns the neutral midpoint (5).
213
+ """
214
+ num = 0.0
215
+ den = 0.0
216
+ for name, weight in DIMENSIONS:
217
+ s = dims.get(name, UNKNOWN)
218
+ if s == UNKNOWN:
219
+ continue
220
+ num += float(s) * weight
221
+ den += weight
222
+ if den == 0:
223
+ return 5 # no measurable dimension → neutral
224
+ weighted = num / den # 0..1
225
+ return int(round(1 + weighted * 9))
226
+
227
+
228
+ def score_cycle(facts: dict) -> dict:
229
+ """Compute the full ``result_eval`` block for one cycle's facts."""
230
+ dims = score_dimensions(facts)
231
+ return {
232
+ "version": SCHEMA_VERSION,
233
+ "score": aggregate(dims),
234
+ "dims": dims,
235
+ }
236
+
237
+
238
+ # ─────────────────────────────────────────────────────────────────────────────
239
+ # US-EVAL-004: self-evolution signals — repeated low-score patterns.
240
+ #
241
+ # This is the pure *detection* half. Given an ordered (oldest→newest) list of
242
+ # runs.jsonl records, it finds dimensions that have been low (0.0) for N cycles
243
+ # in a row and turns each into a structured improvement *signal*. It does NOT
244
+ # write the brief, touch the backlog, or dedup against history — that side-
245
+ # effecting wiring lives in bin/roll, which dedups on each signal's stable
246
+ # ``key`` so the same standing pattern is surfaced once, not every cycle.
247
+ #
248
+ # A signal is advisory only: it is meant to be surfaced in the brief's
249
+ # improvement-signal section and to seed a *candidate* backlog draft marked
250
+ # "📋 待人确认" — never to auto-activate a story or auto-edit code.
251
+ # ─────────────────────────────────────────────────────────────────────────────
252
+
253
+ # How many consecutive low cycles a dimension must show before it is a signal.
254
+ SIGNAL_STREAK = 3
255
+
256
+ # Per-dimension signal metadata: the candidate backlog item kind (FIX vs IDEA)
257
+ # and a human-facing description of what the streak means. A dimension that
258
+ # keeps measuring 0.0 means the loop is reliably failing that axis, so most map
259
+ # to FIX; scope_fidelity (repeatedly idle / off-scope) is a process IDEA.
260
+ _SIGNAL_META = {
261
+ "outcome": ("FIX", "cycles keep failing to merge into main"),
262
+ "correctness": ("FIX", "produced PRs keep failing CI"),
263
+ "scope_fidelity": ("IDEA", "cycles keep going idle or off-scope"),
264
+ "quality": ("FIX", "cycles keep landing without test activity"),
265
+ "efficiency": ("IDEA", "cycles keep blowing past their est_min budget"),
266
+ "cleanliness": ("FIX", "cycles keep leaving orphans / raising ALERTs"),
267
+ }
268
+
269
+
270
+ def _result_eval_of(record):
271
+ """Pull a usable result_eval block out of a record, or None.
272
+
273
+ Accepts either a full runs.jsonl record ({..., "result_eval": {...}}) or a
274
+ bare result_eval block ({"score":.., "dims":{...}})."""
275
+ if not isinstance(record, dict):
276
+ return None
277
+ ev = record.get("result_eval", record)
278
+ if isinstance(ev, dict) and isinstance(ev.get("dims"), dict):
279
+ return ev
280
+ return None
281
+
282
+
283
+ def detect_signals(records, streak: int = SIGNAL_STREAK):
284
+ """Detect repeated-low-score patterns over an ordered record list.
285
+
286
+ ``records`` is oldest→newest. A dimension fires a signal when its most
287
+ recent ``streak`` *scored* cycles all measure exactly 0.0 (low) on it —
288
+ "unknown" cycles are skipped (they neither confirm nor break the streak,
289
+ so a missing CI signal does not mask a real failing streak). Each signal
290
+ is a dict::
291
+
292
+ {
293
+ "key": "lowdim:<dim>", # stable id for dedup
294
+ "dim": "<dim>",
295
+ "kind": "FIX" | "IDEA",
296
+ "streak": <int>, # how many low cycles in a row
297
+ "summary": "<one-line human description>",
298
+ }
299
+
300
+ Returns signals in DIMENSIONS order (deterministic, locale-independent).
301
+ """
302
+ try:
303
+ streak = int(streak)
304
+ except (TypeError, ValueError):
305
+ streak = SIGNAL_STREAK
306
+ if streak < 1:
307
+ streak = 1
308
+
309
+ evals = [ev for ev in (_result_eval_of(r) for r in (records or [])) if ev]
310
+ signals = []
311
+ for name, _weight in DIMENSIONS:
312
+ # Walk newest→oldest, counting a leading run of known-low scores.
313
+ run = 0
314
+ for ev in reversed(evals):
315
+ v = (ev.get("dims") or {}).get(name, UNKNOWN)
316
+ if v == UNKNOWN or v is None:
317
+ continue # unknown neither extends nor breaks the streak
318
+ try:
319
+ fv = float(v)
320
+ except (TypeError, ValueError):
321
+ continue
322
+ if fv <= 0.0:
323
+ run += 1
324
+ else:
325
+ break # a known-good cycle breaks the streak
326
+ if run >= streak:
327
+ kind, why = _SIGNAL_META.get(name, ("IDEA", "repeated low score"))
328
+ signals.append({
329
+ "key": "lowdim:" + name,
330
+ "dim": name,
331
+ "kind": kind,
332
+ "streak": run,
333
+ "summary": "%s for %d cycles in a row" % (why, run),
334
+ })
335
+ return signals
336
+
337
+
338
+ # ─────────────────────────────────────────────────────────────────────────────
339
+ # US-AGENT-030: per-(agent × story_type) historical hit-rate aggregation.
340
+ #
341
+ # This is the pure read-model the adaptive in-tier nudge (lib/loop_pick_agent.py
342
+ # nudge_within_tier) consumes. Given runs.jsonl records, it computes — for every
343
+ # observed (agent, story_type) pair — the share of *scored* cycles that landed a
344
+ # "hit" (a high result_eval.score), plus the sample size that share rests on.
345
+ #
346
+ # Crucially distinct from the US-AGENT-022-retired soft preference: that ranked
347
+ # agents by an opaque, unbounded, implicitly-decaying history with no audit
348
+ # trail. This is a flat, deterministic, locale-independent count over the
349
+ # records handed in — same records in → same numbers out, every time. The nudge
350
+ # layer adds the sample floor and the on/off switch; this function only counts.
351
+ #
352
+ # A "hit" is a cycle whose result_eval.score is at or above HIT_SCORE_MIN. Using
353
+ # the rolled-up 1..10 cycle score (not a single dimension) keeps the signal the
354
+ # same one the dashboard already trends, so the audit story is one number.
355
+ # ─────────────────────────────────────────────────────────────────────────────
356
+
357
+ # A cycle counts as a "hit" for its (agent, story_type) when its rolled-up
358
+ # result_eval.score is at least this. 8/10 = "clearly good cycle". Centralised
359
+ # constant; intentionally not a user knob (keeps the nudge's input deterministic
360
+ # and explainable).
361
+ HIT_SCORE_MIN = 8
362
+
363
+
364
+ def agent_story_hit_rates(records):
365
+ """Aggregate per-(agent, story_type) hit-rate + sample size from records.
366
+
367
+ ``records`` is an iterable of runs.jsonl record dicts (order irrelevant —
368
+ the result is a flat count, so it is deterministic regardless of input
369
+ order). A record contributes to a pair only when it carries a non-empty
370
+ ``agent`` and ``story_type`` and a usable ``result_eval.score`` (records
371
+ without a score are simply not counted — never treated as a 0 hit).
372
+
373
+ Returns a dict keyed by ``"<agent>\\x1f<story_type>"`` (unit-separator so
374
+ the key round-trips through JSON and shell without ambiguity) →
375
+ ``{"agent":.., "story_type":.., "hit_rate": float 0..1, "sample_n": int}``.
376
+ A hit is ``result_eval.score >= HIT_SCORE_MIN``.
377
+ """
378
+ # pair-key → [hits, sample_n]
379
+ tally = {}
380
+ for r in (records or []):
381
+ if not isinstance(r, dict):
382
+ continue
383
+ agent = r.get("agent")
384
+ stype = r.get("story_type")
385
+ if not agent or not stype:
386
+ continue
387
+ ev = r.get("result_eval")
388
+ if not isinstance(ev, dict):
389
+ continue
390
+ score = ev.get("score")
391
+ if not isinstance(score, (int, float)):
392
+ continue
393
+ key = "%s\x1f%s" % (agent, stype)
394
+ slot = tally.get(key)
395
+ if slot is None:
396
+ slot = [0, 0]
397
+ tally[key] = slot
398
+ slot[1] += 1
399
+ if float(score) >= HIT_SCORE_MIN:
400
+ slot[0] += 1
401
+ out = {}
402
+ for key in sorted(tally):
403
+ hits, n = tally[key]
404
+ agent, stype = key.split("\x1f", 1)
405
+ out[key] = {
406
+ "agent": agent,
407
+ "story_type": stype,
408
+ "hit_rate": (hits / n) if n else 0.0,
409
+ "sample_n": n,
410
+ }
411
+ return out
412
+
413
+
414
+ def main() -> int:
415
+ parser = argparse.ArgumentParser(description="Score a loop cycle result.")
416
+ parser.add_argument("--facts", default=None,
417
+ help="cycle facts as a JSON object; reads stdin if omitted")
418
+ parser.add_argument("--signals", action="store_true",
419
+ help="read a JSON array of runs records from --facts/stdin "
420
+ "and emit detected self-evolution signals")
421
+ parser.add_argument("--streak", type=int, default=SIGNAL_STREAK,
422
+ help="consecutive low cycles required to fire a signal")
423
+ parser.add_argument("--hit-rates", action="store_true",
424
+ help="read a JSON array of runs records from --facts/stdin "
425
+ "and emit per-(agent × story_type) hit-rate + sample_n "
426
+ "(US-AGENT-030 adaptive-nudge read model)")
427
+ args = parser.parse_args()
428
+
429
+ raw = args.facts if args.facts is not None else sys.stdin.read()
430
+
431
+ if args.hit_rates:
432
+ try:
433
+ records = json.loads(raw) if raw.strip() else []
434
+ except (ValueError, AttributeError) as exc:
435
+ print(f"loop_result_eval: bad records JSON: {exc}", file=sys.stderr)
436
+ return 1
437
+ if not isinstance(records, list):
438
+ print("loop_result_eval: --hit-rates expects a JSON array", file=sys.stderr)
439
+ return 1
440
+ print(json.dumps(agent_story_hit_rates(records), sort_keys=True))
441
+ return 0
442
+
443
+ if args.signals:
444
+ try:
445
+ records = json.loads(raw) if raw.strip() else []
446
+ except (ValueError, AttributeError) as exc:
447
+ print(f"loop_result_eval: bad records JSON: {exc}", file=sys.stderr)
448
+ return 1
449
+ if not isinstance(records, list):
450
+ print("loop_result_eval: --signals expects a JSON array", file=sys.stderr)
451
+ return 1
452
+ print(json.dumps(detect_signals(records, args.streak), sort_keys=True))
453
+ return 0
454
+
455
+ try:
456
+ facts = json.loads(raw) if raw.strip() else {}
457
+ except (ValueError, AttributeError) as exc:
458
+ print(f"loop_result_eval: bad facts JSON: {exc}", file=sys.stderr)
459
+ return 1
460
+ if not isinstance(facts, dict):
461
+ print("loop_result_eval: facts must be a JSON object", file=sys.stderr)
462
+ return 1
463
+
464
+ print(json.dumps(score_cycle(facts), sort_keys=True))
465
+ return 0
466
+
467
+
468
+ if __name__ == "__main__":
469
+ sys.exit(main())
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env python3
2
+ """FIX-112: revert 🔨 In Progress stories whose latest cycle definitively
3
+ failed and has been quiet for a while. Default safe gate is conservative:
4
+
5
+ - Story row is currently 🔨 In Progress in backlog
6
+ - Most recent `pick_todo <story_id>` event in events-<slug>.ndjson lives in
7
+ a cycle whose `cycle_end` outcome is one of: failed | aborted | blocked
8
+ - That cycle_end timestamp is at least N hours ago (default 4)
9
+
10
+ Stories that match are flipped back to 📋 Todo and an ALERT note is appended
11
+ to the per-project ALERT file. Stories still actively running, or claimed
12
+ by a human / agent for legitimate work (no failed cycle_end), stay alone.
13
+
14
+ Usage:
15
+ python3 lib/loop_unstick.py # apply (writes backlog + ALERT)
16
+ python3 lib/loop_unstick.py --dry-run # report what would change, write nothing
17
+ python3 lib/loop_unstick.py --ttl-hours 8
18
+
19
+ Returns 0 always (idempotent). Prints one line per reverted story.
20
+ """
21
+ from __future__ import annotations
22
+ import argparse, json, os, re, sys, time
23
+ from datetime import datetime, timezone, timedelta
24
+ from pathlib import Path
25
+
26
+ _LIB_DIR = os.path.dirname(os.path.realpath(__file__))
27
+ if _LIB_DIR not in sys.path:
28
+ sys.path.insert(0, _LIB_DIR)
29
+
30
+ # FIX-108-compatible: accept multi-segment story IDs (US-VIEW-011, US-I18N-001)
31
+ # and alphanumeric segments (K8S, D2, 2FA-ish layouts within rules).
32
+ ID_RE = re.compile(r"^\s*\[?([A-Z][A-Z0-9]*(?:-[A-Z][A-Z0-9]*)*-\d+)")
33
+ TICK = chr(96)
34
+
35
+ def _shared_root() -> Path:
36
+ # bin/roll uses _SHARED_ROOT, lib/roll-home.py uses ROLL_SHARED_ROOT.
37
+ # Honor both so tests that sandbox either name work transparently.
38
+ root = os.environ.get("ROLL_SHARED_ROOT") or os.environ.get("_SHARED_ROOT")
39
+ return Path(root or os.path.expanduser("~/.shared/roll"))
40
+
41
+ def _project_slug() -> str:
42
+ try:
43
+ import subprocess, hashlib
44
+ path = os.path.realpath(os.getcwd())
45
+ common = subprocess.check_output(
46
+ ["git", "-C", path, "rev-parse", "--git-common-dir"],
47
+ stderr=subprocess.DEVNULL, text=True,
48
+ ).strip()
49
+ if common.endswith("/.git"):
50
+ path = common[:-5]
51
+ except Exception:
52
+ path = os.path.realpath(os.getcwd())
53
+ import hashlib
54
+ base = re.sub(r"[^A-Za-z0-9]+", "-", os.path.basename(path)).strip("-")
55
+ h = hashlib.md5(path.encode()).hexdigest()[:6]
56
+ return f"{base}-{h}"
57
+
58
+ def _read_events(slug: str) -> list:
59
+ path = _shared_root() / "loop" / f"events-{slug}.ndjson"
60
+ out = []
61
+ if not path.exists():
62
+ return out
63
+ with path.open(errors="ignore") as f:
64
+ for line in f:
65
+ line = line.strip()
66
+ if not line:
67
+ continue
68
+ try:
69
+ ev = json.loads(line)
70
+ ts = ev.get("ts", "")
71
+ ev["_ts"] = datetime.fromisoformat(ts.replace("Z", "+00:00")) if ts else None
72
+ out.append(ev)
73
+ except Exception:
74
+ continue
75
+ return out
76
+
77
+ def _scan_in_progress(backlog: Path) -> list:
78
+ """Return list of (line_index, story_id, raw_line) for rows that are 🔨 In Progress."""
79
+ if not backlog.exists():
80
+ return []
81
+ rows = []
82
+ for i, line in enumerate(backlog.open(errors="ignore")):
83
+ if "| 🔨 In Progress |" not in line:
84
+ continue
85
+ if not line.startswith("|"):
86
+ continue
87
+ parts = [p.strip() for p in line.split("|")]
88
+ if len(parts) < 4:
89
+ continue
90
+ m = ID_RE.match(parts[1])
91
+ if not m:
92
+ continue
93
+ rows.append((i, m.group(1), line.rstrip("\n")))
94
+ return rows
95
+
96
+ def _cycle_end_for_pick(events: list, story_id: str):
97
+ """Return (cycle_end_ts, outcome) of the latest cycle that picked
98
+ story_id, or None if no such cycle / cycle still running."""
99
+ # Walk events back to front looking for the latest pick_todo matching story_id
100
+ latest_pick = None
101
+ for ev in reversed(events):
102
+ if ev.get("stage") == "pick_todo" and ev.get("detail") == story_id:
103
+ latest_pick = ev
104
+ break
105
+ if not latest_pick:
106
+ return None
107
+ label = latest_pick.get("label", "")
108
+ # Look forward (from the pick) for cycle_end with the same label
109
+ pick_idx = events.index(latest_pick)
110
+ for ev in events[pick_idx + 1:]:
111
+ if ev.get("stage") == "cycle_end" and ev.get("label", "").endswith(label):
112
+ return ev.get("_ts"), ev.get("outcome", "")
113
+ return None
114
+
115
+ def main():
116
+ ap = argparse.ArgumentParser()
117
+ ap.add_argument("--dry-run", action="store_true")
118
+ ap.add_argument("--ttl-hours", type=float, default=4.0,
119
+ help="Minimum hours since failed cycle_end before reverting (default 4)")
120
+ ap.add_argument("--backlog", default=".roll/backlog.md")
121
+ args = ap.parse_args()
122
+
123
+ backlog = Path(args.backlog)
124
+ if not backlog.exists():
125
+ print(f"backlog not found: {backlog}", file=sys.stderr)
126
+ return 0
127
+
128
+ slug = _project_slug()
129
+ events = _read_events(slug)
130
+ in_progress = _scan_in_progress(backlog)
131
+ if not in_progress:
132
+ return 0
133
+
134
+ now = datetime.now(timezone.utc)
135
+ cutoff = now - timedelta(hours=args.ttl_hours)
136
+ candidates_to_revert = []
137
+
138
+ failed_outcomes = {"failed", "aborted", "blocked"}
139
+ for line_idx, sid, raw in in_progress:
140
+ result = _cycle_end_for_pick(events, sid)
141
+ if not result:
142
+ continue # still running OR no failed cycle yet — leave alone
143
+ end_ts, outcome = result
144
+ if outcome not in failed_outcomes:
145
+ continue
146
+ if not end_ts or end_ts > cutoff:
147
+ continue # too recent
148
+ age_hours = (now - end_ts).total_seconds() / 3600
149
+ candidates_to_revert.append((line_idx, sid, raw, outcome, age_hours))
150
+
151
+ if not candidates_to_revert:
152
+ return 0
153
+
154
+ if args.dry_run:
155
+ for line_idx, sid, raw, outcome, age in candidates_to_revert:
156
+ print(f"would-revert {sid} (cycle ended {outcome} {age:.1f}h ago)")
157
+ return 0
158
+
159
+ # Apply: read backlog, flip status, write back.
160
+ lines = backlog.read_text(errors="ignore").splitlines(keepends=True)
161
+ for line_idx, sid, raw, outcome, age in candidates_to_revert:
162
+ lines[line_idx] = lines[line_idx].replace("| 🔨 In Progress |", "| 📋 Todo |")
163
+
164
+ backlog.write_text("".join(lines))
165
+
166
+ # Append ALERT
167
+ alert_file = _shared_root() / "loop" / f"ALERT-{slug}.md"
168
+ alert_file.parent.mkdir(parents=True, exist_ok=True)
169
+ with alert_file.open("a") as f:
170
+ for line_idx, sid, raw, outcome, age in candidates_to_revert:
171
+ ts = now.strftime("%Y-%m-%dT%H:%M:%SZ")
172
+ f.write(f"[{ts}] unstick: reverted {sid} (cycle ended {outcome} {age:.1f}h ago, > {args.ttl_hours}h TTL)\n")
173
+
174
+ for line_idx, sid, raw, outcome, age in candidates_to_revert:
175
+ print(f"reverted {sid} (cycle ended {outcome} {age:.1f}h ago)")
176
+
177
+ return 0
178
+
179
+ if __name__ == "__main__":
180
+ sys.exit(main())