rubino-agent 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +11 -2
  3. data/AGENTS.md +1 -1
  4. data/CHANGELOG.md +172 -5
  5. data/CONTRIBUTING.md +10 -1
  6. data/README.md +14 -5
  7. data/Rakefile +31 -0
  8. data/docs/agents.md +42 -23
  9. data/docs/architecture.md +2 -2
  10. data/docs/commands.md +35 -3
  11. data/docs/configuration.md +20 -23
  12. data/docs/getting-started.md +5 -3
  13. data/docs/security.md +16 -5
  14. data/docs/skills.md +31 -0
  15. data/docs/troubleshooting.md +1 -1
  16. data/exe/rubino +16 -2
  17. data/install.sh +721 -59
  18. data/lib/rubino/active_agent.rb +73 -0
  19. data/lib/rubino/agent/action_claim_guard.rb +881 -0
  20. data/lib/rubino/agent/agent_registry.rb +5 -2
  21. data/lib/rubino/agent/definition.rb +1 -9
  22. data/lib/rubino/agent/fallback_chain.rb +0 -6
  23. data/lib/rubino/agent/iteration_budget.rb +109 -3
  24. data/lib/rubino/agent/loop.rb +476 -20
  25. data/lib/rubino/agent/model_call_runner.rb +81 -3
  26. data/lib/rubino/agent/prompts/build.txt +22 -5
  27. data/lib/rubino/agent/response_validator.rb +8 -0
  28. data/lib/rubino/agent/runner.rb +133 -8
  29. data/lib/rubino/agent/tool_executor.rb +166 -14
  30. data/lib/rubino/agent/truncation_continuation.rb +4 -1
  31. data/lib/rubino/api/server.rb +19 -0
  32. data/lib/rubino/attachments/classify.rb +35 -17
  33. data/lib/rubino/boot/config_guard.rb +71 -0
  34. data/lib/rubino/cli/chat/completion_builder.rb +42 -6
  35. data/lib/rubino/cli/chat/idle_card_host.rb +7 -1
  36. data/lib/rubino/cli/chat/session_resolver.rb +87 -21
  37. data/lib/rubino/cli/chat_command.rb +1189 -50
  38. data/lib/rubino/cli/commands.rb +282 -2
  39. data/lib/rubino/cli/config_command.rb +68 -8
  40. data/lib/rubino/cli/doctor_command.rb +204 -12
  41. data/lib/rubino/cli/jobs_command.rb +12 -0
  42. data/lib/rubino/cli/memory_command.rb +53 -20
  43. data/lib/rubino/cli/onboarding_wizard.rb +79 -6
  44. data/lib/rubino/cli/session_command.rb +172 -18
  45. data/lib/rubino/cli/setup_command.rb +131 -8
  46. data/lib/rubino/cli/skills_command.rb +183 -9
  47. data/lib/rubino/cli/trust_gate.rb +16 -7
  48. data/lib/rubino/commands/built_ins.rb +2 -0
  49. data/lib/rubino/commands/command.rb +12 -2
  50. data/lib/rubino/commands/executor.rb +149 -12
  51. data/lib/rubino/commands/handlers/agent_switch.rb +100 -0
  52. data/lib/rubino/commands/handlers/agents.rb +156 -41
  53. data/lib/rubino/commands/handlers/config.rb +4 -1
  54. data/lib/rubino/commands/handlers/help.rb +113 -14
  55. data/lib/rubino/commands/handlers/memory.rb +15 -5
  56. data/lib/rubino/commands/handlers/sessions.rb +26 -3
  57. data/lib/rubino/commands/handlers/status.rb +9 -4
  58. data/lib/rubino/commands/loader.rb +12 -0
  59. data/lib/rubino/config/configuration.rb +86 -24
  60. data/lib/rubino/config/defaults.rb +140 -33
  61. data/lib/rubino/config/loader.rb +62 -12
  62. data/lib/rubino/config/validator.rb +341 -0
  63. data/lib/rubino/config/writer.rb +123 -31
  64. data/lib/rubino/context/compressor.rb +184 -22
  65. data/lib/rubino/context/environment_inspector.rb +2 -2
  66. data/lib/rubino/context/file_discovery.rb +2 -2
  67. data/lib/rubino/context/message_boundary.rb +27 -1
  68. data/lib/rubino/context/project_languages.rb +90 -0
  69. data/lib/rubino/context/prompt_assembler.rb +105 -22
  70. data/lib/rubino/context/summary_builder.rb +45 -4
  71. data/lib/rubino/context/token_budget.rb +36 -11
  72. data/lib/rubino/context/token_estimate.rb +45 -0
  73. data/lib/rubino/context/tool_result_pruner.rb +81 -0
  74. data/lib/rubino/database/connection.rb +154 -3
  75. data/lib/rubino/database/migrations/001_create_initial_schema.rb +314 -40
  76. data/lib/rubino/database/migrator.rb +98 -5
  77. data/lib/rubino/documents/cap_exceeded.rb +13 -0
  78. data/lib/rubino/documents/converters/csv.rb +4 -3
  79. data/lib/rubino/documents/converters/docx.rb +29 -5
  80. data/lib/rubino/documents/converters/html.rb +5 -1
  81. data/lib/rubino/documents/converters/json.rb +2 -1
  82. data/lib/rubino/documents/converters/pdf.rb +11 -2
  83. data/lib/rubino/documents/converters/plain.rb +2 -1
  84. data/lib/rubino/documents/converters/pptx.rb +11 -2
  85. data/lib/rubino/documents/converters/xlsx.rb +35 -4
  86. data/lib/rubino/documents/converters/xml.rb +2 -1
  87. data/lib/rubino/documents/limits.rb +210 -0
  88. data/lib/rubino/documents.rb +10 -3
  89. data/lib/rubino/errors.rb +36 -5
  90. data/lib/rubino/interaction/cancel_token.rb +19 -3
  91. data/lib/rubino/interaction/events.rb +13 -0
  92. data/lib/rubino/interaction/lifecycle.rb +99 -13
  93. data/lib/rubino/interaction/polishing.rb +176 -0
  94. data/lib/rubino/jobs/cron_job_repository.rb +5 -8
  95. data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +11 -0
  96. data/lib/rubino/jobs/handlers/distill_skill_job.rb +65 -9
  97. data/lib/rubino/jobs/queue.rb +63 -8
  98. data/lib/rubino/jobs/runner.rb +24 -6
  99. data/lib/rubino/jobs/worker.rb +0 -4
  100. data/lib/rubino/llm/adapter_response.rb +47 -4
  101. data/lib/rubino/llm/credential_check.rb +15 -16
  102. data/lib/rubino/llm/error_classifier.rb +89 -1
  103. data/lib/rubino/llm/inline_think_filter.rb +69 -12
  104. data/lib/rubino/llm/request.rb +30 -3
  105. data/lib/rubino/llm/ruby_llm_adapter.rb +394 -46
  106. data/lib/rubino/llm/tool_bridge.rb +113 -9
  107. data/lib/rubino/mcp/manager.rb +18 -1
  108. data/lib/rubino/mcp/mcp_tool_wrapper.rb +14 -3
  109. data/lib/rubino/memory/aux_retry.rb +107 -0
  110. data/lib/rubino/memory/backends/sqlite.rb +73 -44
  111. data/lib/rubino/memory/backends.rb +23 -7
  112. data/lib/rubino/memory/salience_gate.rb +103 -0
  113. data/lib/rubino/memory/sqlite_extraction.rb +70 -0
  114. data/lib/rubino/memory/sqlite_extraction_prompt.rb +11 -0
  115. data/lib/rubino/memory/store.rb +33 -5
  116. data/lib/rubino/memory/threat_scanner.rb +52 -0
  117. data/lib/rubino/output/cost.rb +52 -0
  118. data/lib/rubino/output/headless_block_latch.rb +53 -0
  119. data/lib/rubino/output/result_serializer.rb +222 -0
  120. data/lib/rubino/output/turn_recorder.rb +77 -0
  121. data/lib/rubino/security/approval_policy.rb +227 -32
  122. data/lib/rubino/security/command_allowlist.rb +79 -4
  123. data/lib/rubino/security/doom_loop_detector.rb +21 -2
  124. data/lib/rubino/security/hardline_guard.rb +189 -16
  125. data/lib/rubino/security/pattern_matcher.rb +28 -5
  126. data/lib/rubino/security/prefix_deriver.rb +25 -6
  127. data/lib/rubino/security/readonly_commands.rb +145 -5
  128. data/lib/rubino/security/secret_path.rb +134 -0
  129. data/lib/rubino/security/url_safety.rb +255 -0
  130. data/lib/rubino/session/repository.rb +212 -11
  131. data/lib/rubino/session/store.rb +139 -14
  132. data/lib/rubino/skills/installer.rb +230 -0
  133. data/lib/rubino/skills/prompt_index.rb +2 -2
  134. data/lib/rubino/skills/registry.rb +52 -1
  135. data/lib/rubino/skills/skill.rb +64 -3
  136. data/lib/rubino/skills/skill_tool.rb +16 -5
  137. data/lib/rubino/tools/background_tasks.rb +157 -13
  138. data/lib/rubino/tools/base.rb +204 -3
  139. data/lib/rubino/tools/edit_tool.rb +73 -18
  140. data/lib/rubino/tools/glob_tool.rb +48 -9
  141. data/lib/rubino/tools/grep_tool.rb +103 -9
  142. data/lib/rubino/tools/multi_edit_tool.rb +64 -9
  143. data/lib/rubino/tools/patch_tool.rb +5 -0
  144. data/lib/rubino/tools/read_attachment_tool.rb +3 -1
  145. data/lib/rubino/tools/read_tool.rb +33 -15
  146. data/lib/rubino/tools/read_tracker.rb +153 -35
  147. data/lib/rubino/tools/registry.rb +113 -12
  148. data/lib/rubino/tools/result.rb +9 -1
  149. data/lib/rubino/tools/ruby_tool.rb +0 -0
  150. data/lib/rubino/tools/shell_registry.rb +70 -0
  151. data/lib/rubino/tools/shell_tool.rb +40 -1
  152. data/lib/rubino/tools/summarize_file_tool.rb +6 -0
  153. data/lib/rubino/tools/task_stop_tool.rb +10 -16
  154. data/lib/rubino/tools/task_tool.rb +36 -8
  155. data/lib/rubino/tools/vision_tool.rb +5 -0
  156. data/lib/rubino/tools/webfetch_tool.rb +39 -7
  157. data/lib/rubino/tools/websearch_tool.rb +92 -30
  158. data/lib/rubino/tools/write_tool.rb +23 -4
  159. data/lib/rubino/ui/api.rb +10 -1
  160. data/lib/rubino/ui/base.rb +11 -0
  161. data/lib/rubino/ui/bottom_composer.rb +382 -74
  162. data/lib/rubino/ui/cli.rb +515 -83
  163. data/lib/rubino/ui/completion_menu.rb +11 -7
  164. data/lib/rubino/ui/headless_trace.rb +63 -0
  165. data/lib/rubino/ui/live_region.rb +70 -7
  166. data/lib/rubino/ui/markdown_renderer.rb +142 -7
  167. data/lib/rubino/ui/notifier.rb +0 -2
  168. data/lib/rubino/ui/null.rb +52 -5
  169. data/lib/rubino/ui/paste_store.rb +16 -2
  170. data/lib/rubino/ui/queued_indicators.rb +6 -1
  171. data/lib/rubino/ui/status_bar.rb +61 -7
  172. data/lib/rubino/ui/streaming_markdown.rb +59 -6
  173. data/lib/rubino/ui/subagent_view.rb +29 -4
  174. data/lib/rubino/ui/tool_label.rb +52 -0
  175. data/lib/rubino/update_check.rb +39 -4
  176. data/lib/rubino/util/atomic_file.rb +117 -0
  177. data/lib/rubino/util/ignore_rules.rb +120 -0
  178. data/lib/rubino/util/output.rb +229 -12
  179. data/lib/rubino/util/secrets_mask.rb +70 -7
  180. data/lib/rubino/util/spill_store.rb +153 -0
  181. data/lib/rubino/version.rb +1 -1
  182. data/lib/rubino/workspace.rb +9 -1
  183. data/lib/rubino.rb +191 -7
  184. data/rubino-agent.gemspec +1 -0
  185. data/skills/ruby-expert/SKILL.md +1 -0
  186. metadata +42 -12
  187. data/lib/rubino/agent/router.rb +0 -65
  188. data/lib/rubino/database/migrations/002_create_runs.rb +0 -45
  189. data/lib/rubino/database/migrations/003_create_skill_states.rb +0 -15
  190. data/lib/rubino/database/migrations/004_create_cron_jobs.rb +0 -36
  191. data/lib/rubino/database/migrations/005_create_oauth_connections.rb +0 -27
  192. data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +0 -34
  193. data/lib/rubino/database/migrations/007_create_messages_fts.rb +0 -59
  194. data/lib/rubino/database/migrations/008_create_memory_facts.rb +0 -75
  195. data/lib/rubino/database/migrations/009_create_memory_graph.rb +0 -55
  196. data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +0 -20
@@ -0,0 +1,881 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubino
4
+ module Agent
5
+ # Closes the #1 trust-killer: the model ENDS a turn asserting it did or will
6
+ # do something ("Running the tests now", "Saved the file", "Changed
7
+ # directory") with ZERO tool calls, so a fabricated success reaches the user.
8
+ #
9
+ # The structured tool-call channel is the ONLY thing that advances state. A
10
+ # text-only turn whose prose claims an action against a tool rubino exposes
11
+ # is, by construction, a claim with nothing behind it. We do not trust it.
12
+ #
13
+ # Two intents, two outcomes:
14
+ #
15
+ # 1. cd / "change directory" — rubino has NO cd tool, so the claim can NEVER
16
+ # be true. We do not reflect (a reflection would just invite the model to
17
+ # claim it again); we REWRITE the final answer with an honest message
18
+ # explaining how to actually change the workspace (/add-dir or relaunch).
19
+ #
20
+ # 2. any other action verb (run/test/save/write/edit/create/delete/move…)
21
+ # mapped to a tool rubino DOES expose — we REFLECT one corrective turn
22
+ # ("you said you would <X> but issued no tool call; call the tool now or
23
+ # say you cannot and why"), capped at MAX_REFLECTIONS (aider's
24
+ # reflected_message pattern, capped). After the cap the guard becomes
25
+ # BINDING: it REPLACES the fabricated final answer with a deterministic
26
+ # honest message rather than letting the model's "done" reach the user
27
+ # (G1). The structured tool-call channel is the only thing that advances
28
+ # state, so a terminal turn that still asserts a mutation with zero tool
29
+ # calls has, by construction, changed nothing — and we say exactly that.
30
+ #
31
+ # 3. a tool was DENIED or BLOCKED this turn (user-denied, or headless
32
+ # fail-closed "needs approval but no interactive session"), and the model
33
+ # then narrates success OR hands back a fabricated unified diff/patch for
34
+ # files it never wrote (F1/F2). The action did NOT happen, so the diff is
35
+ # not a real artifact; we REPLACE the answer with the honest "that was
36
+ # blocked — nothing was applied; pass --yolo (or approve interactively)"
37
+ # so a plausible-looking but partly-invented diff can never stand as if
38
+ # real and get `git apply`-ed.
39
+ #
40
+ # 4. the INVERSE (#381, PESSIMISTIC fabrication): a turn that ACTUALLY ran
41
+ # tools (often a budget/rate-limit exhausted turn whose forced summary the
42
+ # model writes pessimistically) ends with a confident claim that NOTHING
43
+ # happened — "I have not read a single file, not run grep, not made any
44
+ # edits" — even though the ledger shows N tool calls and real edits on
45
+ # disk. Letting that stand makes a developer believe no work was done and
46
+ # miss correct, uncommitted changes. The harness — not the narration — is
47
+ # the authority on side-effects (the same principle as 1–3, mirrored): when
48
+ # the final answer asserts no/zero actions yet the tool-call ledger shows
49
+ # tools DID run, we RECONCILE it with a truthful harness note ("N tool
50
+ # calls ran this turn (M edits — review uncommitted changes)"). This path
51
+ # is the ONLY one that fires when tool_count > 0; it keys on the ledger, not
52
+ # on exact wording, so it stays model-agnostic.
53
+ #
54
+ # Deliberately conservative — it must never nag a legitimate text answer:
55
+ # * Only fires when the WHOLE turn ran zero tools AND zero denied tools.
56
+ # A turn that ran (or had denied) any tool is the model acting/recovering,
57
+ # not fabricating; its closing prose is a real summary.
58
+ # * The verb must be asserted as the assistant's OWN action in the
59
+ # present-progressive / just-completed / immediate-future ("I'll run…",
60
+ # "Running…", "I ran…", "Saved…", "Done — created…"), not described
61
+ # ("you can run…", "to run the tests…", "the test command is…").
62
+ # * A turn that ASKS the user something (ends on a question) is a legitimate
63
+ # clarify, not a fabricated completion — left alone.
64
+ class ActionClaimGuard
65
+ # Absolute ceiling on corrective turns. After this many the guard becomes
66
+ # BINDING (G1): it stops re-prompting and surfaces an honest deterministic
67
+ # message rather than loop forever against a model that won't call the tool.
68
+ #
69
+ # Lowered from 3 → 2: three accumulated identical "you lied, nothing
70
+ # happened" challenges drove the model into a confession spiral — it stopped
71
+ # acting and started confabulating false histories / pre-emptively
72
+ # apologising, and a legitimate steering change got stranded for 3 turns
73
+ # (#353b). A fresh atomic instruction recovered the model, so the injected
74
+ # text was the aggravator. We now bind after 2, and the SECOND injection
75
+ # DECAYS to a short, non-accusatory atomic instruction (see
76
+ # #reflection_message) instead of repeating the heavy challenge — so the
77
+ # reflections cannot compound into an inescapable loop.
78
+ MAX_REFLECTIONS = 2
79
+
80
+ # After this many consecutive corrective injections the wording DECAYS to a
81
+ # short, atomic, non-accusatory instruction (a single concrete tool call to
82
+ # make) instead of re-injecting the full "nothing happened" challenge — the
83
+ # repeated heavy framing is what compounds into the confession spiral
84
+ # (#353b). The first challenge still names the fabrication in full.
85
+ DECAY_AFTER_REFLECTIONS = 1
86
+
87
+ # Verbs that imply a state-changing action the agent performs THROUGH a
88
+ # tool. Each maps to the tool name(s) that would actually carry it out, so
89
+ # we only reflect when rubino actually exposes a way to do the claimed
90
+ # thing (no point nagging "I searched the web" if web tools are disabled).
91
+ ACTION_TOOLS = {
92
+ "run" => %w[shell ruby test git github],
93
+ "ran" => %w[shell ruby test git github],
94
+ "execute" => %w[shell ruby],
95
+ "executed" => %w[shell ruby],
96
+ "test" => %w[test shell],
97
+ "tested" => %w[test shell],
98
+ "save" => %w[write edit multi_edit patch],
99
+ "saved" => %w[write edit multi_edit patch],
100
+ "write" => %w[write edit multi_edit patch],
101
+ "wrote" => %w[write edit multi_edit patch],
102
+ "edit" => %w[edit multi_edit write patch],
103
+ "edited" => %w[edit multi_edit write patch],
104
+ "create" => %w[write edit multi_edit],
105
+ "created" => %w[write edit multi_edit],
106
+ "delete" => %w[shell],
107
+ "deleted" => %w[shell],
108
+ "remove" => %w[edit multi_edit shell write],
109
+ "removed" => %w[edit multi_edit shell write],
110
+ "move" => %w[shell],
111
+ "moved" => %w[shell],
112
+ "rename" => %w[shell edit multi_edit],
113
+ "renamed" => %w[shell edit multi_edit],
114
+ "install" => %w[shell],
115
+ "installed" => %w[shell],
116
+ "commit" => %w[git shell],
117
+ "committed" => %w[git shell],
118
+ "push" => %w[git shell],
119
+ "pushed" => %w[git shell],
120
+ "fetch" => %w[web_fetch shell git],
121
+ "fetched" => %w[web_fetch shell git]
122
+ }.freeze
123
+
124
+ # File/state MUTATION verbs — the highest-cost class for a coding agent.
125
+ # A toolless turn that asserts ANY of these as the assistant's own past
126
+ # action ("Updated both methods", "Added the docstring", "I removed mode()")
127
+ # has, by construction, changed nothing on disk. Unlike the verbs above,
128
+ # these are matched ANYWHERE in the message (not just sentence-initial or
129
+ # inside a completion window) and PRIORITISED over a trailing future-intent
130
+ # verb, so a message that bundles a fabricated edit-claim with a "then I'll
131
+ # run the tests" is challenged on the EDIT, not on the trailing run. Each
132
+ # maps to the write-family tool(s) that would actually carry it out, so the
133
+ # claim is only challenged when rubino actually exposed a way to mutate.
134
+ MUTATION_TOOLS = {
135
+ "edited" => %w[edit multi_edit write patch],
136
+ "wrote" => %w[write edit multi_edit patch],
137
+ "written" => %w[write edit multi_edit patch],
138
+ "updated" => %w[edit multi_edit write patch],
139
+ "created" => %w[write edit multi_edit],
140
+ "added" => %w[edit multi_edit write patch],
141
+ "removed" => %w[edit multi_edit write patch shell],
142
+ "saved" => %w[write edit multi_edit patch],
143
+ "modified" => %w[edit multi_edit write patch],
144
+ "renamed" => %w[shell edit multi_edit],
145
+ "deleted" => %w[shell edit multi_edit write],
146
+ "applied" => %w[patch edit multi_edit write],
147
+ "changed" => %w[edit multi_edit write patch],
148
+ "replaced" => %w[write edit multi_edit patch],
149
+ "inserted" => %w[edit multi_edit write patch],
150
+ "appended" => %w[edit multi_edit write patch],
151
+ "fixed" => %w[edit multi_edit write patch]
152
+ }.freeze
153
+
154
+ # The assistant asserts a mutation as its OWN completed action — past-tense
155
+ # mutation verb in a first-person OR a bare/completion narration, ANYWHERE
156
+ # in the text. Built per-verb from MUTATION_TOOLS' keys (which are already
157
+ # the past/participle surface forms). Matches "I updated…", "I've added…",
158
+ # "Updated both methods", "Done. Added the docstring", "✓ wrote the file",
159
+ # "- removed mode()". Deliberately past-tense only: a bare future "I'll
160
+ # update…" with a real tool call is handled by tool_count > 0; a future
161
+ # intent with NO tool call is still a fabrication and is also caught here
162
+ # via the first-person future framing below.
163
+ # Three alternatives, no interspersed comments (a `#` mid-concatenation
164
+ # would break the `\` line-continuation into a bare `(?:`):
165
+ # 1. first-person past / completed — "I updated", "I've added",
166
+ # "I just wrote", "we removed", "now i updated".
167
+ # 2. first-person immediate-future with NO tool call — still a
168
+ # fabrication — "I'll update…", "let me add…", "I will write…".
169
+ # 3. bare sentence-initial / list-item / post-completion past form —
170
+ # "Updated both methods", "Added the docstring", "Done. Wrote the
171
+ # file", "✓ removed mode()", "- created config.rb".
172
+ MUTATION_SELF_SRC =
173
+ "(?:" \
174
+ '\b(?:i|we|i\s?\'?ve|we\s?\'?ve|i\s+have|we\s+have|i\s+just|now\s+i)\b' \
175
+ '\s+(?:just\s+|now\s+|already\s+|go\s+ahead\s+and\s+)?(VERB)\b' \
176
+ '|\b(?:i\s?\'?ll|i\s+will|let\s+me|i\s?\'?m\s+going\s+to|i\s+am\s+going\s+to|' \
177
+ 'going\s+to|about\s+to)\b\s+(?:just\s+|now\s+|go\s+ahead\s+and\s+)?(VERBBASE)\b' \
178
+ '|(?:\A|[.!?\n]\s*|^[-*]\s*|' \
179
+ '(?:\b(?:done|finished|complete|completed|ok|okay)\b|✓|✅|all\s+(?:set|done))[^.!?\n]{0,30}?)' \
180
+ '(VERB)\b' \
181
+ ")"
182
+
183
+ # base form of each mutation verb (for the immediate-future framing above):
184
+ # "I'll update", "let me add". Keyed by the past form stored in
185
+ # MUTATION_TOOLS so the alternation stays in lockstep with the tool map.
186
+ MUTATION_BASE = {
187
+ "edited" => "edit", "wrote" => "write", "written" => "write",
188
+ "updated" => "update", "created" => "create", "added" => "add",
189
+ "removed" => "remove", "saved" => "save", "modified" => "modify",
190
+ "renamed" => "rename", "deleted" => "delete", "applied" => "apply",
191
+ "changed" => "change", "replaced" => "replace", "inserted" => "insert",
192
+ "appended" => "append", "fixed" => "fix"
193
+ }.freeze
194
+
195
+ # State-RESULT phrasing — a fabricated mutation dressed as a fact about the
196
+ # file/state rather than as an action verb: "README.md now contains 'API
197
+ # v2'", "the file now has the import", "X is now set to 5", "the contents
198
+ # now read …", "it now reflects the change". No action verb at all, so the
199
+ # verb-based matchers above miss it entirely (this was the r5c NEW-1 hole).
200
+ # We require a "now" + a state predicate so we don't trip on a plain
201
+ # description ("the file contains a bug"). Backed by the write-family tools.
202
+ STATE_RESULT = Regexp.new(
203
+ '\bnow\s+(?:contains|has|holds|includes|reads|reflects|shows|' \
204
+ 'looks\s+like|points\s+to)\b' \
205
+ '|\b(?:is|are|reads)\s+now\s+(?:set\s+to|equal\s+to|)' \
206
+ '|\bnow\s+(?:set\s+to|equal\s+to)\b' \
207
+ '|\bthe\s+(?:file|contents?|method|function|class|line|import|' \
208
+ 'docstring|code|config(?:uration)?)\b[^.!?\n]{0,40}?\bnow\b' \
209
+ '|\b(?:contents?|file|value|content)\b[^.!?\n]{0,30}?\b(?:is|are)\s+now\b',
210
+ Regexp::IGNORECASE
211
+ )
212
+
213
+ # Git-MUTATION RESULT phrasing — a fabricated VCS mutation narrated as a
214
+ # fact rather than a first-person/sentence-initial action verb. This is the
215
+ # exact G1 shape: "Done. New branch feature/tax … committed as 0f60f1d." —
216
+ # a bare "committed as <sha>", "created (the) branch X", "new branch X",
217
+ # "pushed to origin/X", "the commit is <sha>", "on branch X now". The
218
+ # action-verb matcher misses these (the verb is mid-sentence, no "I", and
219
+ # the completion marker is >20 chars away from the verb), so a hallucinated
220
+ # SHA/branch sailed through to the user. Gated on a git/shell tool being
221
+ # exposed (handled at the call site). A bare SHA on its own is NOT enough
222
+ # (too noisy) — we require a commit/branch/push CONTEXT around it.
223
+ GIT_RESULT = Regexp.new(
224
+ '\bcommitted\s+(?:as|in|with(?:\s+(?:sha|hash|id))?|to)\b' \
225
+ '|\b(?:created|made|added|cut)\s+(?:a\s+|the\s+|new\s+)*branch\b' \
226
+ '|\bnew\s+branch\b[^.!?\n]{0,60}?\b(?:committed|created|with\s+the)\b' \
227
+ '|\bbranch\b[^.!?\n]{0,40}?\bcommitted\s+as\b' \
228
+ '|\b(?:pushed|push(?:ed)?)\s+(?:it\s+)?to\s+(?:origin|remote|the\s+remote)\b' \
229
+ '|\bthe\s+commit\s+(?:is|hash\s+is|sha\s+is)\b' \
230
+ '|\b(?:commit|sha|hash)\s+(?:is\s+)?\b[0-9a-f]{7,40}\b',
231
+ Regexp::IGNORECASE
232
+ )
233
+
234
+ # Base/infinitive surface of a tracked action verb so the claim phrase
235
+ # ("commit that", "run that") fits both the reflection ("you'd <claim>")
236
+ # and the binding replacement ("I did not <claim>") templates. ACTION_TOOLS
237
+ # keys mix base ("run") and past ("ran", "committed") forms; map the past
238
+ # ones back to base, leave the rest as-is.
239
+ ACTION_BASE = {
240
+ "ran" => "run", "executed" => "execute", "tested" => "test",
241
+ "saved" => "save", "wrote" => "write", "edited" => "edit",
242
+ "created" => "create", "deleted" => "delete", "removed" => "remove",
243
+ "moved" => "move", "renamed" => "rename", "installed" => "install",
244
+ "committed" => "commit", "pushed" => "push", "fetched" => "fetch"
245
+ }.freeze
246
+
247
+ # The write-family tools any mutation/state-result claim needs on offer for
248
+ # the guard to challenge it — no point challenging "the file now contains X"
249
+ # if rubino has no way to write at all this turn.
250
+ WRITE_FAMILY = %w[write edit multi_edit patch].freeze
251
+
252
+ # The VCS tools a fabricated git-mutation RESULT ("committed as <sha>")
253
+ # needs on offer for the guard to challenge it.
254
+ GIT_TOOLS = %w[git github shell].freeze
255
+
256
+ # The text honestly reports the block instead of fabricating success —
257
+ # "it was blocked", "nothing was applied", "not run/applied", "wasn't run",
258
+ # "needs approval", "no interactive session". Lets a denied/blocked turn
259
+ # that owns up be surfaced as-is; a fabricated diff dressed in honest words
260
+ # is still caught by FABRICATED_DIFF (checked first).
261
+ BLOCKED_HONEST = Regexp.new(
262
+ '\b(?:was|were|is|got)\s+blocked\b' \
263
+ '|\bnothing\s+(?:was|were|got)?\s*(?:applied|changed|written|run|saved)\b' \
264
+ '|\bnot\s+(?:been\s+)?(?:applied|run|executed|saved|written|committed)\b' \
265
+ '|\b(?:was|were)n\s?\'?t\s+(?:applied|run|executed|saved|written|committed)\b' \
266
+ '|\bneeds?\s+approval\b' \
267
+ '|\bno\s+interactive\s+session\b',
268
+ Regexp::IGNORECASE
269
+ )
270
+
271
+ # base verb => [progressive, past] surface forms. Stored explicitly rather
272
+ # than derived so English irregulars (run→running→ran, write→writing→wrote)
273
+ # are correct. Fuels the bare-lead / completion-lead / first-person matches.
274
+ SURFACE_FORMS = {
275
+ "run" => %w[running ran], "ran" => %w[running ran],
276
+ "write" => %w[writing wrote], "wrote" => %w[writing wrote],
277
+ "save" => %w[saving saved], "saved" => %w[saving saved],
278
+ "edit" => %w[editing edited], "edited" => %w[editing edited],
279
+ "create" => %w[creating created], "created" => %w[creating created],
280
+ "delete" => %w[deleting deleted], "deleted" => %w[deleting deleted],
281
+ "remove" => %w[removing removed], "removed" => %w[removing removed],
282
+ "move" => %w[moving moved], "moved" => %w[moving moved],
283
+ "rename" => %w[renaming renamed], "renamed" => %w[renaming renamed],
284
+ "commit" => %w[committing committed], "committed" => %w[committing committed],
285
+ "push" => %w[pushing pushed], "pushed" => %w[pushing pushed],
286
+ "fetch" => %w[fetching fetched], "fetched" => %w[fetching fetched],
287
+ "install" => %w[installing installed], "installed" => %w[installing installed],
288
+ "execute" => %w[executing executed], "executed" => %w[executing executed],
289
+ "test" => %w[testing tested], "tested" => %w[testing tested]
290
+ }.freeze
291
+
292
+ # First-person assertion that the action is happening / happened / is about
293
+ # to happen — NOT a description offered to the user. We require one of these
294
+ # framings immediately around a tracked verb so "you can run the tests"
295
+ # never trips, but "I'll run the tests", "running the tests now",
296
+ # "I ran the tests", "saved the file" do.
297
+ # subject framings: "i", "i'll", "i've", "i have", "let me", "i'm",
298
+ # "i am", "i will", "i just", "going to", "about to"
299
+ # bare-progressive / bare-past at sentence start: "running…", "saved…"
300
+ # "i'll RUN", "i RAN", "let me SAVE" — the VERB placeholder is filled per
301
+ # call. Built from a String (not a regex literal) so the path/comment
302
+ # slashes elsewhere in this file never collide with the regex delimiter.
303
+ FIRST_PERSON_VERB_SRC =
304
+ '(?:\b(?:i\s?\'?ll|i\s?\'?ve|i\s+have|i\s+will|i\s+just|i\s?\'?m|i\s+am|' \
305
+ 'let\s+me|going\s+to|about\s+to|now\s+i|i)\b\s+' \
306
+ '(?:just\s+|now\s+|go\s+ahead\s+and\s+)?)(VERB)\w*'
307
+
308
+ # Bare sentence-initial progressive/past at the start, after a sentence end,
309
+ # or as a list item: "Running the suite now.", "Saved to foo.py and ran
310
+ # it.", "Created the file." — a common MiniMax-M3 narration with no "I".
311
+ BARE_LEAD_VERB_SRC = '(?:\A|[.!?\n]\s*|^[-*]\s*)(VERBING)\b'
312
+
313
+ # A completion marker ("done", "✓", "all set", "all done", "finished")
314
+ # immediately before a past/progressive verb form — "Done — created the
315
+ # file.", "✓ saved.", "All set, removed mode()." — is the model declaring
316
+ # the work finished. The verb may sit up to ~20 non-period chars after.
317
+ COMPLETION_LEAD_SRC =
318
+ '(?:\b(?:done|finished|complete|completed)\b|✓|✅|all\s+(?:set|done))' \
319
+ '[^.!?\n]{0,20}?\b(VERBING)\b'
320
+
321
+ # cd / change-directory intent. rubino has no cd tool, so ANY first-person
322
+ # claim to change the working directory is unfulfillable — handled
323
+ # separately (honest rewrite, never a reflection). "cd /path", "cd ~",
324
+ # "changed the working directory", "switched to the folder", "moved into".
325
+ CD_INTENT = Regexp.new(
326
+ "(?:" \
327
+ '\bcd\s+[~/.]' \
328
+ '|\bchang(?:e|ed|ing)\b[^.\n]{0,40}\b(?:working\s+)?(?:dir(?:ectory)?|cwd|folder|workspace)\b' \
329
+ '|\bswitch(?:ed|ing)?\b[^.\n]{0,40}\b(?:to\s+the\s+)?(?:dir(?:ectory)?|cwd|folder)\b' \
330
+ '|\bmov(?:e|ed|ing)\s+(?:in)?to\s+(?:the\s+)?[~.][\w./-]*' \
331
+ ")",
332
+ Regexp::IGNORECASE
333
+ )
334
+
335
+ # The LATEST user message explicitly requested a NO-ACTION turn — a plan,
336
+ # a list, an explanation, a recall-from-memory answer, or it forbade tools
337
+ # outright ("do not implement yet", "without using any tools", "answer from
338
+ # memory"). On such a turn the model is SUPPOSED to produce prose and call
339
+ # no tool, so its "here's the plan; I'll add X next" is the requested
340
+ # deliverable, NOT a fabricated "done" — challenging it makes the model
341
+ # apologise for obeying. We detect a small set of no-action intents and skip
342
+ # the claim-challenge for that turn. Deliberately narrow: a plain task
343
+ # request ("add the docstring", "run the tests") matches NONE of these, so
344
+ # the anti-fabrication core still fires on real task turns.
345
+ #
346
+ # * explicit tool prohibition — "do not / don't run|use|call … tool(s)",
347
+ # "without (using) (any) tools", "no tools".
348
+ # * answer-from-memory / recall — "from memory", "from what you know/recall",
349
+ # "don't look it up", "without reading".
350
+ # * plan / don't-implement-yet — "don't implement (yet)", "just (the)
351
+ # plan", "outline/list the plan/steps",
352
+ # "plan only", "before you implement".
353
+ # * explain/describe-only ask — "just explain|describe|tell me|summarize",
354
+ # "explanation only", "no code".
355
+ # NOTE: no interspersed comments inside this concatenation — a `#` would
356
+ # break the `\` line-continuation (same gotcha as CD_INTENT above). The four
357
+ # intent groups are, in order: (1) explicit tool prohibition; (2)
358
+ # answer-from-memory / recall / don't-look-up; (3) plan / don't-implement-yet;
359
+ # (4) explain/describe-only ask.
360
+ NO_ACTION_REQUEST = Regexp.new(
361
+ "(?:" \
362
+ '\b(?:do\s+not|don\s?\'?t|dont|please\s+do\s+not|please\s+don\s?\'?t)\b' \
363
+ '[^.!?\n]{0,30}?\b(?:run|use|call|invoke|execute|touch|edit|write|' \
364
+ 'implement|change|modify)\b' \
365
+ '|\bwithout\s+(?:using\s+|running\s+|calling\s+|invoking\s+)?' \
366
+ '(?:any\s+)?(?:tools?|tool\s+calls?|the\s+tools?)\b' \
367
+ '|\bno\s+tools?\b|\bdon\s?\'?t\s+(?:use|call|run)\s+(?:any\s+)?tools?\b' \
368
+ '|\b(?:from|out\s+of)\s+(?:your\s+)?memory\b' \
369
+ '|\bfrom\s+(?:what\s+you\s+(?:know|recall|remember))\b' \
370
+ '|\b(?:answer|recall|tell\s+me)\b[^.!?\n]{0,30}?\bfrom\s+memory\b' \
371
+ '|\bwithout\s+(?:reading|looking\s+(?:it\s+)?up|searching|checking)\b' \
372
+ '|\bdon\s?\'?t\s+(?:look\s+(?:it\s+)?up|read|search|check)\b' \
373
+ '|\b(?:do\s+not|don\s?\'?t|dont)\b[^.!?\n]{0,20}?\bimplement\b' \
374
+ '|\bimplement\b[^.!?\n]{0,10}?\b(?:nothing|yet)\b' \
375
+ '|\bbefore\s+(?:you\s+)?implement(?:ing)?\b' \
376
+ '|\b(?:just|only)\b[^.!?\n]{0,20}?\bthe\s+plan\b' \
377
+ '|\bplan\s+only\b|\bonly\s+(?:the\s+)?plan\b' \
378
+ '|\b(?:outline|list|describe|sketch|propose|give\s+me|show\s+me)\b' \
379
+ '[^.!?\n]{0,30}?\b(?:plan|steps|approach|strategy)\b' \
380
+ '|\b(?:just|only|simply)\b[^.!?\n]{0,15}?\b(?:explain|describe|tell\s+me|' \
381
+ 'summarize|summarise|outline)\b' \
382
+ '|\b(?:explanation|description)\s+only\b|\bno\s+code\b' \
383
+ ")",
384
+ Regexp::IGNORECASE
385
+ )
386
+
387
+ # The text plainly admits the action did NOT / cannot happen — an honest
388
+ # non-completion, not a fabricated "done". A bare "can't"/"unable" anywhere
389
+ # in the answer is enough; this only EXEMPTS, never accuses, so a generous
390
+ # match is safe.
391
+ INABILITY = Regexp.new(
392
+ '\b(?:can\s?\'?t|cannot|could\s?n\'?t|unable\s+to|won\s?\'?t\s+be\s+able|' \
393
+ 'don\s?\'?t\s+have|do\s+not\s+have|no\s+(?:such|test|way\s+to)|' \
394
+ 'not\s+able\s+to|wasn\s?\'?t\s+able|isn\s?\'?t\s+(?:a|any)|there\s+(?:is|are)\s+no)\b',
395
+ Regexp::IGNORECASE
396
+ )
397
+
398
+ # PESSIMISTIC "I did NOTHING" claim (#381) — the inverse of every claim
399
+ # above. The model asserts it took no action at all: "I have not read a
400
+ # single file", "no tools were run/called", "I made no edits", "nothing was
401
+ # done/changed", "I didn't run/use any tools", "I have done nothing". A small
402
+ # phrase set is enough as the TRIGGER condition — we only act after VERIFYING
403
+ # it against the harness tool-call ledger (tool_count > 0), so a false
404
+ # positive here is harmless: the ledger gate, not the wording, decides.
405
+ # Kept model-agnostic (negations of read/run/edit/write/grep/search/tool +
406
+ # "nothing"/"no … was done" shapes), not a single provider's phrasing.
407
+ NO_ACTION_CLAIM = Regexp.new(
408
+ '\b(?:have\s+not|haven\s?\'?t|did\s+not|didn\s?\'?t|have\s+no|having\s+not|' \
409
+ 'was\s+not\s+able\s+to|were\s+not\s+able\s+to|not)\b' \
410
+ '[^.!?\n]{0,40}?' \
411
+ '\b(?:read|run|ran|execute[d]?|use[d]?|call(?:ed)?|invoke[d]?|grep(?:ped)?|' \
412
+ "search(?:ed)?|made|make|edit(?:ed)?|written|wrote|create[d]?|change[d]?|" \
413
+ 'modif(?:y|ied)|touch(?:ed)?|appl(?:y|ied)|do|done|perform(?:ed)?|take|taken|took)\b' \
414
+ '|\b(?:made|make|did|do|ran|run|read|wrote|written|applied|performed|took|taken)\b' \
415
+ '\s+(?:any\s+)?\bno\b\s+(?:tool[\s-]*calls?|tools?|files?|edits?|changes?|' \
416
+ 'actions?|commands?|modifications?)\b' \
417
+ '|\b(?:no|zero)\s+(?:tool[\s-]*calls?|tools?|files?|edits?|changes?|actions?|' \
418
+ 'commands?|modifications?)\b\s*' \
419
+ '(?:were\s+|was\s+|have\s+been\s+|been\s+|are\s+)?' \
420
+ '(?:run|ran|made|called|executed|invoked|read|performed|taken|applied)?\b' \
421
+ '|\b(?:nothing|no\s+action|no\s+work|not\s+a\s+single\s+\w+)\s+' \
422
+ '(?:was|were|has\s+been|have\s+been|got)\s+' \
423
+ '(?:done|run|made|changed|read|executed|performed|taken|applied|edited|written)\b' \
424
+ '|\b(?:i|we)\s+(?:have\s+|had\s+)?(?:did|do|done|made|changed|read|run|' \
425
+ 'executed|performed|accomplished)\s+(?:absolutely\s+|literally\s+)?nothing\b' \
426
+ '|\bnot\s+a\s+single\s+(?:file|tool|edit|command|change)\b',
427
+ Regexp::IGNORECASE
428
+ )
429
+
430
+ # The tools whose execution actually MUTATES disk state — an "I made no
431
+ # edits" claim is most misleading when these ran. Used only to label the
432
+ # truthful harness note ("M edits"); the reconciliation itself fires on ANY
433
+ # tool having run, since "I read nothing" is equally false when a read ran.
434
+ MUTATING_TOOLS = %w[edit multi_edit write patch].freeze
435
+
436
+ # Build a guard for one turn. `exposed_tool_names` is the set of tool names
437
+ # the model actually had this turn (Loop's @turn_tools) — we only reflect a
438
+ # verb whose backing tool was on offer.
439
+ def initialize(exposed_tool_names:)
440
+ @exposed = Array(exposed_tool_names).map(&:to_s).uniq.freeze
441
+ end
442
+
443
+ # The corrective user message injected when a tracked action verb appears in
444
+ # a toolless turn. Names the offending claim so the model self-corrects.
445
+ #
446
+ # `prior_reflections` is how many corrective injections this turn ALREADY
447
+ # had. On the FIRST challenge (0 prior) we name the fabrication in full. On
448
+ # a later one (>= DECAY_AFTER_REFLECTIONS) we DECAY to a short, atomic,
449
+ # NON-accusatory instruction — repeating the heavy "you lied, nothing
450
+ # happened" framing is what compounded into the confession spiral (#353b);
451
+ # a single concrete "make the tool call or say you can't, in one line" is
452
+ # what actually recovered a stuck model.
453
+ def reflection_message(claimed_verb, prior_reflections: 0)
454
+ if prior_reflections >= DECAY_AFTER_REFLECTIONS
455
+ return "Still no tool call. Don't apologise or re-explain — just make " \
456
+ "ONE actual tool call now to #{claimed_verb}, or reply in a " \
457
+ "single line that you cannot and why."
458
+ end
459
+
460
+ "You said you'd #{claimed_verb} but issued NO tool call, so nothing " \
461
+ "actually happened — that text is not a real result and the file is " \
462
+ "unchanged on disk. Do ONE of two things now: (a) make the actual tool " \
463
+ "call to carry it out, or (b) if you cannot (missing info, blocked, " \
464
+ "denied, or no such capability), say plainly that you did NOT do it and " \
465
+ "explain why. Do NOT restate that it is done."
466
+ end
467
+
468
+ # A fabricated unified diff / patch / git-apply artifact in the prose —
469
+ # the F1 class: when its write tool is blocked, the model hands back a
470
+ # confident "ready to `git apply`" diff for files it never read, with
471
+ # invented hunks that would CORRUPT those files if applied. We detect the
472
+ # diff shape (a `--- a/…` + `+++ b/…` header, a `@@ … @@` hunk header, an
473
+ # explicit "git apply"/"apply this patch", or a ```diff/```patch fence) so
474
+ # that, on a denied/blocked turn, the diff is never surfaced as if it were
475
+ # a real, applicable artifact.
476
+ FABRICATED_DIFF = Regexp.new(
477
+ '^\s*---\s+a?/?\S.*\n\+\+\+\s+b?/?\S' \
478
+ '|^\s*@@\s.*@@' \
479
+ '|\bgit\s+apply\b' \
480
+ '|\bapply\s+(?:this\s+)?(?:the\s+)?patch\b' \
481
+ '|```(?:diff|patch)\b',
482
+ Regexp::IGNORECASE
483
+ )
484
+
485
+ # The honest answer that REPLACES a fabricated "I did the mutation" final
486
+ # answer once the reflection budget is spent (G1, BINDING). The model ran
487
+ # zero tools, so nothing changed on disk; we say so deterministically and
488
+ # name the claim, instead of letting its fabricated "Done. committed as
489
+ # <sha>" stand. `claim` is the human-readable phrase the guard already
490
+ # built ("committed the change", "the file now …").
491
+ def replacement_for_fabrication(claim)
492
+ "No tool call was made, so nothing was changed on disk — I did not " \
493
+ "#{claim}. (The previous lines claiming otherwise were not backed by " \
494
+ "any action and are not a real result.) Tell me to proceed and I'll " \
495
+ "actually run the tool to carry it out."
496
+ end
497
+
498
+ # The honest answer that REPLACES a success-narration OR a fabricated diff
499
+ # emitted AFTER a tool was denied/blocked this turn (F1/F2). The action was
500
+ # blocked and nothing was applied; any diff in the text is not a real,
501
+ # applicable artifact. `noninteractive` tailors the escape hatch: headless
502
+ # fail-closed → `--yolo` (and notes approvals.mode: skip no longer
503
+ # auto-runs non-interactively, #281/F2); user-denied → re-ask/approve.
504
+ def replacement_for_blocked(noninteractive:)
505
+ hatch =
506
+ if noninteractive
507
+ "nothing was applied. To run it non-interactively pass `--yolo` " \
508
+ "(note: `approvals.mode: skip` no longer auto-runs non-interactively " \
509
+ "for safety — use `--yolo`), or run rubino interactively and approve " \
510
+ "the action."
511
+ else
512
+ "nothing was applied. Approve the action (or re-run and allow it) " \
513
+ "if you want me to carry it out."
514
+ end
515
+ "That action was blocked, so #{hatch} Any diff or \"done\" above is not " \
516
+ "a real, applied change — I did not read/write those files, so I'm not " \
517
+ "presenting it as something to `git apply`."
518
+ end
519
+
520
+ # The honest answer that REPLACES a fabricated "I changed the directory"
521
+ # final turn. rubino genuinely cannot cd, so we tell the truth and point at
522
+ # the real mechanisms instead of letting the model claim a no-op.
523
+ CD_HONEST_ANSWER =
524
+ "I can't change my working directory — I have no `cd` tool, and each command " \
525
+ "runs from the session's workspace root, so a `cd` would not persist anyway. " \
526
+ "To work against another directory, either add it with `/add-dir <path>` " \
527
+ "(grants access this session) or relaunch rubino from that directory. " \
528
+ "If you want, tell me the path and I'll run commands against it explicitly " \
529
+ "(e.g. by passing the full path to each command)."
530
+
531
+ # The verdict for a finished, TEXT-ONLY turn.
532
+ #
533
+ # tool_count — tools that actually ran this turn (Loop's @tool_count)
534
+ # denied_count — tools denied/blocked this turn (Loop's @denied_count):
535
+ # user-denied AND headless fail-closed both count here.
536
+ # content — the assistant's final text
537
+ # noninteractive — true when a denial this turn was a headless
538
+ # "no interactive session" block (#260), so the honest
539
+ # message can point at `--yolo` (F2) vs "approve it".
540
+ # terminal — true on the LAST chance (reflection budget exhausted):
541
+ # the guard must now be BINDING and REPLACE the answer
542
+ # rather than ask for one more corrective turn (G1).
543
+ # user_request — the LATEST genuine user message that drove this turn (the
544
+ # Loop passes the originating request, NOT a guard
545
+ # reflection). When it requested a NO-ACTION turn (plan /
546
+ # list / explain / "don't run" / "without tools" / "from
547
+ # memory"), the model is SUPPOSED to answer in prose with no
548
+ # tool call, so we skip the claim-challenge for this turn.
549
+ #
550
+ # Returns one of:
551
+ # nil — no fabrication detected; surface the text as-is.
552
+ # [:cd, msg] — replace the final answer with the honest cd message.
553
+ # [:blocked, msg] — replace the answer: a tool was denied/blocked yet the
554
+ # text narrates success or emits a fabricated diff.
555
+ # [:reflect, vb] — reflect a corrective turn; `vb` is the claimed verb.
556
+ # [:replace, msg] — BINDING terminal override: replace the fabricated
557
+ # "done" final text with the honest deterministic msg.
558
+ #
559
+ # The Loop decides what to do with each (rewrite vs re-enter the loop), and
560
+ # owns the MAX_REFLECTIONS cap (passing terminal: once it is reached).
561
+ def evaluate(content:, tool_count:, denied_count:, noninteractive: false,
562
+ terminal: false, user_request: nil)
563
+ text = content.to_s
564
+ return nil if text.strip.empty?
565
+ return nil unless tool_count.to_i.zero?
566
+
567
+ # A tool was DENIED/BLOCKED this turn but none RAN. If the text then
568
+ # narrates success or hands back a fabricated diff/patch for files it
569
+ # never wrote (F1/F2), the action did NOT happen — replace it with the
570
+ # honest "blocked, nothing applied, use --yolo" message so the invented
571
+ # diff can never read as an applicable artifact. An honest "it was
572
+ # blocked / I couldn't" answer is left alone.
573
+ if denied_count.to_i.positive?
574
+ return nil unless blocked_but_claims?(text)
575
+
576
+ return [:blocked, replacement_for_blocked(noninteractive: noninteractive)]
577
+ end
578
+
579
+ # A turn that ends by asking the user is a legitimate clarify, not a
580
+ # claimed completion.
581
+ return nil if asks_user?(text)
582
+
583
+ # The user EXPLICITLY requested a no-action turn (a plan, a list, an
584
+ # explanation, an answer from memory, or "don't run/use any tools"). On
585
+ # such a turn the model is supposed to produce prose and call no tool, so
586
+ # its "here's the plan; I'll add X next" is the requested deliverable, not
587
+ # a fabricated "done". Skip the claim-challenge so the guard doesn't make
588
+ # the model apologise for obeying. (#353a) A plain task request matches
589
+ # none of these, so the anti-fabrication core still fires on real tasks.
590
+ return nil if no_action_requested?(user_request)
591
+
592
+ return [:cd, CD_HONEST_ANSWER] if cd_intent?(text)
593
+
594
+ # The model already owned up that it could NOT do the thing ("I can't
595
+ # run it because…", "unable to", "there is no test file"). An action
596
+ # verb in that sentence is honest framing, not a fabricated success —
597
+ # don't nag it.
598
+ return nil if honest_inability?(text)
599
+
600
+ # HIGHEST-COST class first: a fabricated file/state/git MUTATION
601
+ # ("Updated both methods", "committed as 0f60f1d", "README now contains
602
+ # 'API v2'") anywhere in the message. Prioritised over the trailing-intent
603
+ # verb below so a message that bundles a fake edit-claim with a "then I'll
604
+ # run the tests" is challenged on the EDIT, not the trailing run (r5c B1).
605
+ claim = fabricated_git_result(text) ||
606
+ fabricated_mutation(text) ||
607
+ fabricated_action_verb(text)
608
+ return nil if claim.nil?
609
+
610
+ # BINDING terminal override (G1): the reflection budget is spent and the
611
+ # model is STILL asserting a mutation it never made. Don't surface the
612
+ # fabrication — replace it with the honest deterministic message. Off the
613
+ # terminal turn we ask for one corrective turn first.
614
+ return [:replace, replacement_for_fabrication(claim)] if terminal
615
+
616
+ [:reflect, claim]
617
+ end
618
+
619
+ # PESSIMISTIC reconciliation (#381) — the INVERSE of #evaluate, and the only
620
+ # guard path that fires when tools DID run. A turn that genuinely executed
621
+ # tool calls (typically the budget/rate-limit-exhausted FORCED summary) can
622
+ # end with a confident "I did nothing — not a single file read, no edits"
623
+ # even though the harness ledger shows N tool calls and M real edits. Letting
624
+ # that stand makes the user believe no work happened and miss correct,
625
+ # uncommitted changes. The harness is the authority on side-effects, so we
626
+ # reconcile: append a truthful note naming what the ledger actually recorded.
627
+ #
628
+ # content — the final assistant text (the summary).
629
+ # tool_count — tools that actually RAN this turn (Loop's @tool_count).
630
+ # edit_count — of those, how many were MUTATING (Loop's @edit_count).
631
+ #
632
+ # Returns the harness diagnostic note ALONE (or nil), without splicing it
633
+ # into the answer text. The Loop routes this to STDERR / an event instead
634
+ # of appending it to the returned answer, so the note never pollutes
635
+ # `--output-format text` stdout (#418). Returns nil to leave the summary
636
+ # alone (the only-safe default) unless ALL hold:
637
+ # * at least one tool ran (the ledger has something to contradict);
638
+ # * the text actually CLAIMS no/zero action was taken (the small phrase set
639
+ # above — the trigger), so a truthful "I ran X then Y" summary that names
640
+ # its tools is left completely alone;
641
+ # * the text does NOT already own up to the count (so we never double-note).
642
+ def pessimistic_summary_note(content:, tool_count:, edit_count: 0)
643
+ text = content.to_s
644
+ ran = tool_count.to_i
645
+ return nil unless ran.positive?
646
+ return nil unless NO_ACTION_CLAIM.match?(text)
647
+ # The summary already reports the real count truthfully — don't pile on.
648
+ return nil if already_acknowledges_ledger?(text, ran)
649
+
650
+ harness_ledger_note(ran, edit_count.to_i)
651
+ end
652
+
653
+ # The truthful, harness-authored line appended to (or standing in for) a
654
+ # pessimistic summary. Keyed entirely on the ledger counts, never on wording.
655
+ def harness_ledger_note(tool_count, edit_count)
656
+ edits =
657
+ if edit_count.positive?
658
+ " (#{edit_count} edit#{"s" unless edit_count == 1} — review uncommitted changes)"
659
+ else
660
+ ""
661
+ end
662
+ "[harness note] That summary is not accurate: #{tool_count} tool " \
663
+ "call#{"s" unless tool_count == 1} actually ran this turn#{edits}. The " \
664
+ "tool-call ledger — not the summary — is the record of what happened, so " \
665
+ "review the working tree for real, possibly uncommitted, changes before " \
666
+ "assuming nothing was done."
667
+ end
668
+
669
+ private
670
+
671
+ # The summary already states, truthfully, that the tools ran (it cites the
672
+ # real count or owns the work) — so the pessimistic "I did nothing" trigger
673
+ # was a false positive (e.g. "I ran 5 tools but made no DB changes") and we
674
+ # must not append a contradicting note. We only suppress when the EXACT ran
675
+ # count appears next to a tool/call/ran word, which a genuine "I did nothing"
676
+ # confabulation never contains.
677
+ def already_acknowledges_ledger?(text, ran)
678
+ /\b#{ran}\b[^.!?\n]{0,30}?\b(?:tool|call|ran|executed|edit)/i.match?(text) ||
679
+ /\bharness\s+note\b/i.match?(text)
680
+ end
681
+
682
+ # The latest user request asked for a NO-ACTION turn (plan / list / explain
683
+ # / recall-from-memory / explicit "don't run|use tools"). nil/blank request
684
+ # → not a no-action turn (we can't tell, so we fall through to the normal
685
+ # fabrication checks — fail-safe toward catching fabrications). (#353a)
686
+ def no_action_requested?(user_request)
687
+ req = user_request.to_s
688
+ return false if req.strip.empty?
689
+
690
+ NO_ACTION_REQUEST.match?(req)
691
+ end
692
+
693
+ def honest_inability?(text)
694
+ INABILITY.match?(text)
695
+ end
696
+
697
+ # After a tool was DENIED/BLOCKED this turn, does the text still try to pass
698
+ # off the action as done — by narrating success (a mutation/action claim or
699
+ # a state-result), OR by handing back a fabricated unified diff/patch for
700
+ # files it never wrote (F1)? A fabricated diff fires on its own (the most
701
+ # dangerous artifact: it reads as `git apply`-able). A plain honest "it was
702
+ # blocked / I couldn't" with no success-claim and no diff is left alone.
703
+ def blocked_but_claims?(text)
704
+ return true if FABRICATED_DIFF.match?(text)
705
+ # An honest "blocked / can't / nothing was applied" answer with no diff
706
+ # is a real deny-recovery summary — leave it alone.
707
+ return false if honest_inability?(text) || blocked_honest?(text)
708
+
709
+ !!(cd_intent?(text) ||
710
+ fabricated_git_result(text) ||
711
+ fabricated_mutation(text) ||
712
+ fabricated_action_verb(text) ||
713
+ STATE_RESULT.match?(text))
714
+ end
715
+
716
+ # A fabricated VCS-mutation RESULT narrated as fact ("committed as 0f60f1d",
717
+ # "created branch feature/tax", "pushed to origin/main") — the G1 shape the
718
+ # verb matchers miss. Gated on a git/shell tool being on offer, and on the
719
+ # claim NOT being 2nd-person advice. Returns the human-readable claim phrase
720
+ # ("commit/create that on a branch") or nil.
721
+ def fabricated_git_result(text)
722
+ return nil unless GIT_TOOLS.any? { |t| @exposed.include?(t) }
723
+ return nil if advice_only?(text)
724
+ return nil unless GIT_RESULT.match?(text)
725
+
726
+ "make that git change (commit/branch/push)"
727
+ end
728
+
729
+ def blocked_honest?(text)
730
+ BLOCKED_HONEST.match?(text)
731
+ end
732
+
733
+ # The text claims to have changed the working directory — and rubino can't.
734
+ def cd_intent?(text)
735
+ return false unless CD_INTENT.match?(text)
736
+
737
+ # Only when framed as the assistant's own action / completion, not as
738
+ # advice to the user ("you can cd into …", "run cd /x yourself").
739
+ first_person_anywhere?(text) || CD_INTENT.match?(leading_clause(text))
740
+ end
741
+
742
+ # The first tracked action verb the text asserts as the assistant's own
743
+ # doing, whose backing tool rubino actually exposed this turn, as a
744
+ # base-form claim phrase ("run that", "commit that") that reads naturally
745
+ # in BOTH "You said you'd <claim>" (reflection) and "I did not <claim>"
746
+ # (binding replacement). nil when none.
747
+ def fabricated_action_verb(text)
748
+ ACTION_TOOLS.each do |verb, tools|
749
+ next unless tools.any? { |t| @exposed.include?(t) }
750
+ next unless asserts_verb?(text, verb)
751
+
752
+ return "#{action_base(verb)} that"
753
+ end
754
+ nil
755
+ end
756
+
757
+ def action_base(verb)
758
+ ACTION_BASE.fetch(verb, verb)
759
+ end
760
+
761
+ # The first fabricated file/state MUTATION the toolless turn asserts, as a
762
+ # human-readable claim phrase for the reflection ("updated the file",
763
+ # "added the docstring", "the file now contains that"). Fires on:
764
+ # * a past-tense mutation verb asserted as the assistant's own action
765
+ # ANYWHERE in the message (not just sentence-initial), or
766
+ # * a state-RESULT phrasing ("X now contains …", "the file now has …").
767
+ # Gated on a write-family tool being exposed and on the claim NOT being
768
+ # 2nd-person advice ("you should add…", "you can write…", "to update…").
769
+ # nil when the turn makes no fabricated mutation claim.
770
+ def fabricated_mutation(text)
771
+ return nil unless WRITE_FAMILY.any? { |t| @exposed.include?(t) }
772
+ return nil if advice_only?(text)
773
+
774
+ MUTATION_TOOLS.each do |past, tools|
775
+ next unless tools.any? { |t| @exposed.include?(t) }
776
+ next unless asserts_mutation?(text, past)
777
+
778
+ # Base form so the phrase reads in BOTH "you'd <claim>" and "I did not
779
+ # <claim>" ("update the file", "apply the change").
780
+ return "#{MUTATION_BASE.fetch(past, past)} the file"
781
+ end
782
+
783
+ # State-result phrasing with no action verb at all ("README now contains
784
+ # 'API v2'", "the file now has the import") — a mutation dressed as fact.
785
+ return "make that change to the file" if STATE_RESULT.match?(text)
786
+
787
+ nil
788
+ end
789
+
790
+ # True when `past` (an already-past/participle mutation form) is asserted as
791
+ # the assistant's OWN completed (or unfulfilled-future) mutation anywhere in
792
+ # the text. Built from MUTATION_SELF_SRC with the past form and its base
793
+ # form (for the "I'll <base>…" future framing) substituted in.
794
+ def asserts_mutation?(text, past)
795
+ base = MUTATION_BASE.fetch(past, past)
796
+ src = MUTATION_SELF_SRC.gsub("VERBBASE", Regexp.escape(base))
797
+ .gsub("VERB", Regexp.escape(past))
798
+ Regexp.new(src, Regexp::IGNORECASE).match?(text)
799
+ end
800
+
801
+ # The whole message is 2nd-person ADVICE / a how-to, not a 1st-person
802
+ # claim — "you should add the import", "you can write it with…", "to update
803
+ # the file, use the edit tool". A mutation verb in that framing is help
804
+ # text, not a fabricated completion, so the guard must leave it alone. We
805
+ # only treat it as advice when there is NO competing first-person claim, so
806
+ # "I updated it; you can run it" is still challenged.
807
+ def advice_only?(text)
808
+ return false if /\b(?:i|we)\b\s*'?(?:ll|ve|m)?\b/i.match?(text) &&
809
+ first_person_anywhere?(text)
810
+
811
+ /\byou\s+(?:can|could|should|may|might|will|need\s+to|have\s+to|want\s+to)\b/i
812
+ .match?(text) ||
813
+ /\bto\s+\w+[^.!?\n]{0,40}?,?\s*use\s+the\b/i.match?(text)
814
+ end
815
+
816
+ # True when `verb` appears in a first-person "I'll/I just/now" framing OR as
817
+ # a bare sentence-initial progressive/past — the narration shapes that mean
818
+ # "I am doing / did this", as opposed to describing it to the user.
819
+ def asserts_verb?(text, verb)
820
+ # The verb plus all its surface forms (write/writing/wrote/written via
821
+ # the union) — so "i've written", "i ran", "i'm saving" all match the
822
+ # first-person framing, not just the dictionary form.
823
+ alt = verb_alternation(verb)
824
+
825
+ fp = Regexp.new(FIRST_PERSON_VERB_SRC.sub("VERB", alt), Regexp::IGNORECASE)
826
+ return true if fp.match?(text)
827
+
828
+ # Sentence-initial / list-item progressive-or-past: "Running…", "Saved…".
829
+ bare = Regexp.new(BARE_LEAD_VERB_SRC.sub("VERBING", verbings_for(verb)),
830
+ Regexp::IGNORECASE)
831
+ return true if bare.match?(text)
832
+
833
+ # Completion-lead narration: "Done — created the file.", "✓ saved.",
834
+ # "All set, removed mode()." A done/✓/all-set marker immediately before a
835
+ # past/progressive form is the model declaring the work finished.
836
+ lead = Regexp.new(COMPLETION_LEAD_SRC.sub("VERBING", verbings_for(verb)),
837
+ Regexp::IGNORECASE)
838
+ lead.match?(text)
839
+ end
840
+
841
+ # The regex-source alternation of a verb and every surface form we track for
842
+ # it (run|running|ran, write|writing|wrote|written, …). Used to match the
843
+ # verb in a first-person framing regardless of tense.
844
+ def verb_alternation(verb)
845
+ forms = [verb] + SURFACE_FORMS.fetch(verb, ["#{verb}ing", "#{verb}ed"])
846
+ # write→written participle that the regular -ed/-ing forms miss.
847
+ forms << "written" if %w[write wrote].include?(verb)
848
+ Regexp.union(forms.uniq).source
849
+ end
850
+
851
+ # The surface forms that begin a bare narration sentence for a given base
852
+ # verb: the bare verb (sentence-initial imperative-as-claim "Run the tests
853
+ # now."), its progressive ("Running…"), and its past ("Ran…"/"Saved…").
854
+ # Stored explicitly rather than derived so English irregulars (run→running,
855
+ # write→writing→wrote) are correct.
856
+ def verbings_for(verb)
857
+ forms = [verb] + SURFACE_FORMS.fetch(verb, ["#{verb}ing", "#{verb}ed"])
858
+ forms << "written" if %w[write wrote].include?(verb)
859
+ Regexp.union(forms.uniq).source
860
+ end
861
+
862
+ def first_person_anywhere?(text)
863
+ /\b(?:i\s?'?ll|i\s?'?ve|i\s+have|i\s+will|i\s+just|i\s?'?m|i\s+am|let\s+me|
864
+ i\s+chang|i\s+switch|i\s+mov|now\s+i)\b/ix.match?(text)
865
+ end
866
+
867
+ # The first sentence/clause, where a bare "Changed directory." lead-in lives.
868
+ def leading_clause(text)
869
+ text.strip[/\A[^.!?\n]{0,120}/].to_s
870
+ end
871
+
872
+ # The text ends by asking the user (a trailing question) — a legitimate
873
+ # clarify, not a fabricated completion. We look only at the tail so a
874
+ # rhetorical "?" earlier in a long answer doesn't exempt a fabrication.
875
+ def asks_user?(text)
876
+ tail = text.strip[-160..] || text.strip
877
+ tail.rstrip.end_with?("?")
878
+ end
879
+ end
880
+ end
881
+ end