robot_lab 0.0.8 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +71 -0
  3. data/README.md +106 -4
  4. data/Rakefile +2 -1
  5. data/docs/api/core/robot.md +336 -1
  6. data/docs/api/mcp/client.md +1 -0
  7. data/docs/api/mcp/server.md +27 -8
  8. data/docs/api/mcp/transports.md +21 -6
  9. data/docs/architecture/core-concepts.md +1 -1
  10. data/docs/architecture/robot-execution.md +20 -2
  11. data/docs/concepts.md +4 -0
  12. data/docs/guides/building-robots.md +18 -0
  13. data/docs/guides/creating-networks.md +39 -0
  14. data/docs/guides/index.md +10 -0
  15. data/docs/guides/knowledge.md +182 -0
  16. data/docs/guides/mcp-integration.md +180 -2
  17. data/docs/guides/memory.md +2 -0
  18. data/docs/guides/observability.md +486 -0
  19. data/docs/guides/ractor-parallelism.md +364 -0
  20. data/docs/superpowers/plans/2026-04-14-ractor-integration.md +1538 -0
  21. data/docs/superpowers/specs/2026-04-14-ractor-integration-design.md +258 -0
  22. data/examples/14_rusty_circuit/.gitignore +1 -0
  23. data/examples/14_rusty_circuit/open_mic.rb +1 -1
  24. data/examples/19_token_tracking.rb +128 -0
  25. data/examples/20_circuit_breaker.rb +153 -0
  26. data/examples/21_learning_loop.rb +164 -0
  27. data/examples/22_context_compression.rb +179 -0
  28. data/examples/23_convergence.rb +137 -0
  29. data/examples/24_structured_delegation.rb +150 -0
  30. data/examples/25_history_search/conversation.jsonl +30 -0
  31. data/examples/25_history_search.rb +136 -0
  32. data/examples/26_document_store/api_versioning_adr.md +52 -0
  33. data/examples/26_document_store/incident_postmortem.md +46 -0
  34. data/examples/26_document_store/postgres_runbook.md +49 -0
  35. data/examples/26_document_store/redis_caching_guide.md +48 -0
  36. data/examples/26_document_store/sidekiq_guide.md +51 -0
  37. data/examples/26_document_store.rb +147 -0
  38. data/examples/27_incident_response/incident_response.rb +244 -0
  39. data/examples/28_mcp_discovery.rb +112 -0
  40. data/examples/29_ractor_tools.rb +243 -0
  41. data/examples/30_ractor_network.rb +256 -0
  42. data/examples/README.md +136 -0
  43. data/examples/prompts/skill_with_mcp_test.md +9 -0
  44. data/examples/prompts/skill_with_robot_name_test.md +5 -0
  45. data/examples/prompts/skill_with_tools_test.md +6 -0
  46. data/lib/robot_lab/bus_poller.rb +149 -0
  47. data/lib/robot_lab/convergence.rb +69 -0
  48. data/lib/robot_lab/delegation_future.rb +93 -0
  49. data/lib/robot_lab/document_store.rb +155 -0
  50. data/lib/robot_lab/error.rb +25 -0
  51. data/lib/robot_lab/history_compressor.rb +205 -0
  52. data/lib/robot_lab/mcp/client.rb +23 -9
  53. data/lib/robot_lab/mcp/connection_poller.rb +187 -0
  54. data/lib/robot_lab/mcp/server.rb +26 -3
  55. data/lib/robot_lab/mcp/server_discovery.rb +110 -0
  56. data/lib/robot_lab/mcp/transports/base.rb +10 -2
  57. data/lib/robot_lab/mcp/transports/stdio.rb +58 -26
  58. data/lib/robot_lab/memory.rb +103 -6
  59. data/lib/robot_lab/network.rb +44 -9
  60. data/lib/robot_lab/ractor_boundary.rb +42 -0
  61. data/lib/robot_lab/ractor_job.rb +37 -0
  62. data/lib/robot_lab/ractor_memory_proxy.rb +85 -0
  63. data/lib/robot_lab/ractor_network_scheduler.rb +154 -0
  64. data/lib/robot_lab/ractor_worker_pool.rb +117 -0
  65. data/lib/robot_lab/robot/bus_messaging.rb +43 -65
  66. data/lib/robot_lab/robot/history_search.rb +69 -0
  67. data/lib/robot_lab/robot/mcp_management.rb +61 -4
  68. data/lib/robot_lab/robot.rb +351 -11
  69. data/lib/robot_lab/robot_result.rb +26 -5
  70. data/lib/robot_lab/run_config.rb +1 -1
  71. data/lib/robot_lab/text_analysis.rb +103 -0
  72. data/lib/robot_lab/tool.rb +42 -3
  73. data/lib/robot_lab/tool_config.rb +1 -1
  74. data/lib/robot_lab/version.rb +1 -1
  75. data/lib/robot_lab/waiter.rb +49 -29
  76. data/lib/robot_lab.rb +25 -0
  77. data/mkdocs.yml +1 -0
  78. metadata +71 -2
@@ -0,0 +1,258 @@
1
+ # Ractor Integration Design
2
+
3
+ **Date:** 2026-04-14
4
+ **Status:** Approved
5
+ **Gems:** `ractor_queue`, `ractor-wrapper`
6
+
7
+ ## Goals
8
+
9
+ 1. True CPU parallelism (GIL-bypassing) for CPU-bound tool execution
10
+ 2. True CPU parallelism for parallel robot execution in Networks
11
+ 3. Use `ractor_queue` as the queue backbone for both tracks
12
+ 4. Use `ractor-wrapper` to expose shared `Memory` to Ractor workers
13
+ 5. Deliver both tracks as independent, composable layers
14
+
15
+ ## Non-Goals
16
+
17
+ - Making `ruby_llm` or the `async` gem Ractor-safe
18
+ - Replacing the existing `:async` concurrency model (it remains the default)
19
+ - Ractor-isolating `Robot` instances that are long-lived across multiple tasks
20
+
21
+ ---
22
+
23
+ ## Architecture Overview
24
+
25
+ Two parallel tracks share a frozen-message convention and `ractor_queue` as the communication backbone.
26
+
27
+ ```
28
+ ┌─────────────────────────────────────────────────────────────────┐
29
+ │ Thread/Fiber World │
30
+ │ Robot (ruby_llm, async) ──▶ Tool.call() ──▶ RobotResult │
31
+ │ │ │ │
32
+ │ BusPoller ractor_safe? │
33
+ │ (ractor_queue) │ │ │
34
+ └────────────────────────────────│────────│────────────────────────┘
35
+ │ yes │ no
36
+ ┌───────────────────┘ └──► Thread executor
37
+
38
+ ┌─────────────────────────────────────────────────────────────────┐
39
+ │ Ractor World │
40
+ │ RactorWorkerPool ◀──ractor_queue── frozen RactorJob │
41
+ │ (N Ractor workers) │
42
+ │ │ │
43
+ │ RactorMemoryProxy (ractor-wrapper around Memory) │
44
+ │ ◀── get/set via Ractor messages ──▶ │
45
+ └─────────────────────────────────────────────────────────────────┘
46
+ ```
47
+
48
+ **Key constraint:** only frozen, `Ractor.shareable?` objects cross Ractor boundaries. A `RactorJob` is a `Data.define` struct (shareable by design) carrying a frozen payload and a per-job reply `ractor_queue`.
49
+
50
+ ---
51
+
52
+ ## Shared Infrastructure
53
+
54
+ ### `RactorJob`
55
+
56
+ ```ruby
57
+ RactorJob = Data.define(:id, :type, :payload, :reply_queue)
58
+ ```
59
+
60
+ Single cross-boundary carrier for both tracks. `payload` must be frozen by the caller before submission. `reply_queue` is a `ractor_queue` instance (Ractor-safe).
61
+
62
+ ### `RactorJobError`
63
+
64
+ ```ruby
65
+ RactorJobError = Data.define(:message, :backtrace)
66
+ ```
67
+
68
+ Frozen error representation for exceptions that occur inside a Ractor worker. Serialized at the Ractor boundary, re-raised on the thread side.
69
+
70
+ ### `RobotSpec`
71
+
72
+ ```ruby
73
+ RobotSpec = Data.define(:name, :template, :system_prompt, :config_hash)
74
+ ```
75
+
76
+ Carries everything needed to reconstruct a `Robot` inside a Ractor. All fields must be frozen strings/hashes.
77
+
78
+ ### `RactorBoundary`
79
+
80
+ A utility module with a `freeze_deep(obj)` method that recursively freezes nested `Hash`/`Array` structures before they cross a Ractor boundary. Similar in spirit to the existing `deep_dup` in `Utils`. Raises `RobotLab::RactorBoundaryError` (a subclass of `RobotLab::Error`) if a value cannot be made shareable (e.g., a live IO or Proc).
81
+
82
+ ```ruby
83
+ module RactorBoundary
84
+ def self.freeze_deep(obj)
85
+ case obj
86
+ when Hash then obj.transform_values { freeze_deep(_1) }.freeze
87
+ when Array then obj.map { freeze_deep(_1) }.freeze
88
+ else obj.frozen? ? obj : obj.dup.freeze
89
+ end
90
+ rescue TypeError => e
91
+ raise RobotLab::RactorBoundaryError, "Cannot make value Ractor-shareable: #{e.message}"
92
+ end
93
+ end
94
+ ```
95
+
96
+ ---
97
+
98
+ ## Track 1: RactorWorkerPool (Tool CPU Parallelism)
99
+
100
+ ### Tool opt-in
101
+
102
+ `RobotLab::Tool` gets a `ractor_safe` class macro (default `false`). Ractor-safe tools must be stateless — no captured mutable closures, no non-shareable constants.
103
+
104
+ ```ruby
105
+ class EmbeddingTool < RobotLab::Tool
106
+ ractor_safe true
107
+
108
+ def execute(text:)
109
+ # CPU-bound embedding work — runs inside a Ractor worker
110
+ end
111
+ end
112
+ ```
113
+
114
+ The framework raises `RobotLab::ConfigurationError` at class-definition time if a declared-safe tool captures unshareable state (detected via `Ractor.shareable?` check on the class object).
115
+
116
+ ### `RactorWorkerPool`
117
+
118
+ A pool of N Ractor workers (configurable via `RunConfig#ractor_pool_size`, default `Etc.nprocessors`). Each worker runs:
119
+
120
+ ```ruby
121
+ loop do
122
+ job = work_queue.pop # blocks on ractor_queue
123
+ result = dispatch(job) # instantiates tool class, calls execute
124
+ job.reply_queue.push(result) # frozen result back to caller
125
+ rescue => e
126
+ job.reply_queue.push(RactorJobError.new(message: e.message, backtrace: e.backtrace))
127
+ end
128
+ ```
129
+
130
+ The pool is lazily initialized on first use and shared across robots in a Network via the existing `RunConfig` hierarchy. It lives for the lifetime of the process (or the `RunConfig` that owns it). `RactorWorkerPool#shutdown` drains in-flight jobs, then closes the work `ractor_queue` so all workers exit their loops cleanly. `RunConfig` calls `shutdown` on `ObjectSpace` finalizer or explicit `RobotLab.shutdown` call.
131
+
132
+ If a worker Ractor crashes (unhandled exception kills the Ractor), the pool detects the dead Ractor via `Ractor#take` and spawns a replacement. The failed job's reply queue receives a `RactorJobError`.
133
+
134
+ ### Submission path (inside `Robot#call_tool`)
135
+
136
+ 1. Look up `tool_class` from `ToolManifest`
137
+ 2. Check `tool_class.ractor_safe?`
138
+ 3. **If yes:** `RactorBoundary.freeze_deep(args)`, build `RactorJob`, push to pool's work `ractor_queue`, block on reply queue
139
+ 4. **If no:** run in current thread/fiber as today
140
+ 5. On reply: if result is `RactorJobError`, re-raise as `RobotLab::ToolError` in the calling thread
141
+
142
+ ### `RunConfig` additions
143
+
144
+ ```ruby
145
+ ractor_pool_size: :auto # :auto = Etc.nprocessors, or an Integer
146
+ ```
147
+
148
+ ---
149
+
150
+ ## Track 2: RactorMemoryProxy + RactorNetworkScheduler (Robot Parallelism)
151
+
152
+ ### `RactorMemoryProxy`
153
+
154
+ Wraps the existing `Memory` instance via `ractor-wrapper`. The wrapper Ractor acts as a method-dispatch server: it receives frozen messages and replies with frozen results.
155
+
156
+ Supported operations proxied across the Ractor boundary:
157
+
158
+ | Message | Reply |
159
+ |---------|-------|
160
+ | `[:get, key]` | frozen value or `nil` |
161
+ | `[:set, key, frozen_value]` | `:ok` |
162
+ | `[:keys]` | frozen array of keys |
163
+
164
+ Subscriptions (callbacks) are **not** proxied — closures are not Ractor-safe. Robots that need reactive subscriptions use the thread-side `Memory` directly. `RactorMemoryProxy` is for Ractor workers that need read/write access to shared state.
165
+
166
+ No changes to `Memory` itself.
167
+
168
+ ### `RactorNetworkScheduler`
169
+
170
+ Replaces `SimpleFlow::Pipeline#call_parallel` for Networks with `parallel_mode: :ractor`. Distributes frozen task descriptions to worker Ractors, collects frozen results.
171
+
172
+ `depends_on` ordering is preserved: the scheduler reads the pipeline's existing dependency graph (from `SimpleFlow::Pipeline`) and uses it to determine which tasks are ready to dispatch. A task is submitted to the `ractor_queue` only once all its dependencies have resolved. This mirrors how `call_parallel` works today — the scheduler wraps the same topological resolution logic.
173
+
174
+ ```
175
+ Scheduler ──► ractor_queue (frozen RobotSpec + task payload)
176
+
177
+
178
+ Worker Ractor
179
+ (constructs fresh Robot from RobotSpec,
180
+ runs task, freezes RobotResult,
181
+ pushes to reply ractor_queue)
182
+
183
+ Scheduler ◀── ractor_queue (frozen results)
184
+ ```
185
+
186
+ Each worker Ractor constructs its own `Robot` instance from a `RobotSpec`. The LLM call happens inside the Ractor. This is safe because `ruby_llm` HTTP calls use no shared mutable state between instances — the Ractor constraint is about *shared* non-shareable objects, not fresh instances created inside a Ractor.
187
+
188
+ Results are collected via a reply `ractor_queue` and assembled into the pipeline's `SimpleFlow::Result` context on the thread side.
189
+
190
+ ### `BusPoller` queue upgrade
191
+
192
+ `BusPoller#@robot_queues` changes from `Hash<String, Array>` to `Hash<String, ractor_queue>`. Delivery mechanics (mutex-guarded drain, `process_and_drain`) are unchanged — only the backing store is swapped. This makes `BusPoller` capable of receiving deliveries from Ractor workers.
193
+
194
+ ### Network opt-in
195
+
196
+ ```ruby
197
+ network = RobotLab.create_network(name: "analysis", parallel_mode: :ractor) do
198
+ task :sentiment, sentiment_robot, depends_on: :none
199
+ task :entities, entity_robot, depends_on: :none
200
+ task :summarize, summary_robot, depends_on: [:sentiment, :entities]
201
+ end
202
+ ```
203
+
204
+ `parallel_mode: :async` remains the default and is unchanged.
205
+
206
+ ---
207
+
208
+ ## Error Handling
209
+
210
+ | Scenario | Mechanism |
211
+ |----------|-----------|
212
+ | Tool raises inside Ractor worker | Serialized as `RactorJobError`, re-raised as `RobotLab::ToolError` in calling thread |
213
+ | Robot raises inside `RactorNetworkScheduler` | Serialized as `RactorJobError`, surfaced as failed step in `SimpleFlow::Result` |
214
+ | Worker Ractor crashes (unhandled exception) | Pool detects dead Ractor, spawns replacement, failed job gets `RactorJobError` on reply queue |
215
+ | Non-shareable value submitted to pool | `RobotLab::RactorBoundaryError` raised before the Ractor boundary |
216
+
217
+ ---
218
+
219
+ ## Testing
220
+
221
+ - `RactorWorkerPool` is testable standalone — no Robot or Network required
222
+ - `RactorMemoryProxy` is testable standalone — wrap a `Memory`, call proxy methods from a test Ractor
223
+ - Tools that declare `ractor_safe true` should pass `assert_ractor_safe(tool_class)` — a test helper that spins up a single-worker pool and round-trips a frozen payload
224
+ - `RactorNetworkScheduler` tests use a minimal two-robot network with `parallel_mode: :ractor`
225
+ - All existing tests are unaffected — `:async` remains the default; no existing class is modified in a breaking way
226
+
227
+ ---
228
+
229
+ ## New Files
230
+
231
+ | File | Purpose |
232
+ |------|---------|
233
+ | `lib/robot_lab/ractor_job.rb` | `RactorJob`, `RactorJobError`, `RobotSpec` data classes |
234
+ | `lib/robot_lab/ractor_boundary.rb` | `RactorBoundary.freeze_deep` utility |
235
+ | `lib/robot_lab/ractor_worker_pool.rb` | `RactorWorkerPool` — N Ractor workers fed by `ractor_queue` |
236
+ | `lib/robot_lab/ractor_memory_proxy.rb` | `RactorMemoryProxy` — `ractor-wrapper` around `Memory` |
237
+ | `lib/robot_lab/ractor_network_scheduler.rb` | `RactorNetworkScheduler` — distributes robot tasks to Ractor workers |
238
+
239
+ ## Modified Files
240
+
241
+ | File | Change |
242
+ |------|--------|
243
+ | `lib/robot_lab/tool.rb` | Add `ractor_safe` class macro |
244
+ | `lib/robot_lab/robot.rb` | Check `ractor_safe?` in `call_tool`, submit to pool if true |
245
+ | `lib/robot_lab/run_config.rb` | Add `ractor_pool_size:` field |
246
+ | `lib/robot_lab/bus_poller.rb` | Swap `Array` queues for `ractor_queue` instances |
247
+ | `lib/robot_lab/network.rb` | Add `parallel_mode:` option, delegate to `RactorNetworkScheduler` |
248
+ | `lib/robot_lab/error.rb` | Add `RobotLab::RactorBoundaryError` subclass |
249
+ | `lib/robot_lab.rb` | Require new files |
250
+
251
+ ---
252
+
253
+ ## Dependencies to Add
254
+
255
+ ```ruby
256
+ gem "ractor_queue"
257
+ gem "ractor-wrapper"
258
+ ```
@@ -0,0 +1 @@
1
+ /output/
@@ -59,7 +59,7 @@ end
59
59
 
60
60
  bus = TypedBus::MessageBus.new
61
61
  display = Display.new(
62
- scout_path: File.join(__dir__, "scout_notes.md"),
62
+ scout_path: File.join(__dir__, "output", "scout_notes.md"),
63
63
  log_path: log_path
64
64
  )
65
65
 
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example 19: Per-Robot Token / Cost Tracking
5
+ #
6
+ # Demonstrates per-run and cumulative token tracking:
7
+ # - result.input_tokens / result.output_tokens — tokens used in that run
8
+ # - robot.total_input_tokens / total_output_tokens — running totals on the robot
9
+ # - robot.reset_token_totals — reset the accounting counter (not the chat)
10
+ #
11
+ # Key distinction:
12
+ # reset_token_totals resets the robot's *accounting counter* only.
13
+ # The underlying chat history keeps growing, so input_tokens per run
14
+ # naturally increase as conversation context accumulates.
15
+ # Use a fresh robot.build when you need a genuinely fresh context.
16
+ #
17
+ # Anthropic and OpenAI both return token counts in every response.
18
+ # Token counts are zero for providers that don't report usage data.
19
+ #
20
+ # Usage:
21
+ # ANTHROPIC_API_KEY=your_key ruby examples/19_token_tracking.rb
22
+
23
+ ENV["ROBOT_LAB_TEMPLATE_PATH"] ||= File.join(__dir__, "prompts")
24
+
25
+ require_relative "../lib/robot_lab"
26
+
27
+ # Anthropic claude-haiku-4-5 pricing (as of early 2026)
28
+ HAIKU_INPUT_CPM = 0.80 # $ per 1M input tokens
29
+ HAIKU_OUTPUT_CPM = 4.00 # $ per 1M output tokens
30
+
31
+ def token_summary(input, output)
32
+ "in=#{input} out=#{output} total=#{input + output}"
33
+ end
34
+
35
+ def run_cost(input, output)
36
+ dollars = (input * HAIKU_INPUT_CPM + output * HAIKU_OUTPUT_CPM) / 1_000_000.0
37
+ "$#{"%.5f" % dollars}"
38
+ end
39
+
40
+ def first_line(text)
41
+ text&.strip&.lines&.first&.strip || ""
42
+ end
43
+
44
+ puts "=" * 60
45
+ puts "Example 19: Per-Robot Token & Cost Tracking"
46
+ puts "=" * 60
47
+ puts
48
+
49
+ robot = RobotLab.build(
50
+ name: "analyst",
51
+ system_prompt: "You are a concise technical analyst. Keep every reply under 40 words.",
52
+ model: "claude-haiku-4-5-20251001"
53
+ )
54
+
55
+ prompts = [
56
+ "What is the difference between a stack and a queue?",
57
+ "Name three Ruby gems that every Rails project should consider.",
58
+ "Why does database indexing improve query performance?"
59
+ ]
60
+
61
+ puts "Running #{prompts.size} prompts...\n\n"
62
+
63
+ prompts.each_with_index do |prompt, i|
64
+ result = robot.run(prompt)
65
+
66
+ puts "Run #{i + 1}: #{prompt}"
67
+ puts " Reply: #{first_line(result.reply)}"
68
+ puts " This run: #{token_summary(result.input_tokens, result.output_tokens)}"
69
+ puts " Cost: #{run_cost(result.input_tokens, result.output_tokens)}"
70
+ puts " Cumulative: #{token_summary(robot.total_input_tokens, robot.total_output_tokens)}"
71
+ puts
72
+ end
73
+
74
+ puts "-" * 60
75
+ puts "After #{prompts.size} runs:"
76
+ puts " Total tokens: #{token_summary(robot.total_input_tokens, robot.total_output_tokens)}"
77
+ puts " Total cost: #{run_cost(robot.total_input_tokens, robot.total_output_tokens)}"
78
+ puts
79
+
80
+ # ---------------------------------------------------------------
81
+ # reset_token_totals: resets the accounting counter only.
82
+ # Useful when you want to measure cost for a specific task batch
83
+ # while keeping the robot alive for the next batch.
84
+ #
85
+ # Note: input_tokens will still reflect the full chat history sent
86
+ # to the API — that's the real cost, not an error.
87
+ # ---------------------------------------------------------------
88
+
89
+ puts "Resetting token totals (batch 1 complete, starting batch 2)..."
90
+ robot.reset_token_totals
91
+ puts " Counter now: #{token_summary(robot.total_input_tokens, robot.total_output_tokens)}"
92
+ puts
93
+ puts " Note: the next run's input_tokens will be larger than run 1's"
94
+ puts " because the LLM receives the full accumulated chat context."
95
+ puts " The counter reset tracks *accounting* — it doesn't clear the chat."
96
+ puts
97
+
98
+ result = robot.run("What is memoization?")
99
+ puts "Batch 2 — Run 1: What is memoization?"
100
+ puts " Reply: #{first_line(result.reply)}"
101
+ puts " This run: #{token_summary(result.input_tokens, result.output_tokens)}"
102
+ puts " Batch total: #{token_summary(robot.total_input_tokens, robot.total_output_tokens)}"
103
+ puts
104
+
105
+ # ---------------------------------------------------------------
106
+ # To start truly fresh (new context, new counter), build a new robot.
107
+ # ---------------------------------------------------------------
108
+
109
+ puts "-" * 60
110
+ puts "Fresh robot — genuinely zero context:"
111
+ puts
112
+
113
+ fresh = RobotLab.build(
114
+ name: "analyst2",
115
+ system_prompt: "You are a concise technical analyst. Keep every reply under 40 words.",
116
+ model: "claude-haiku-4-5-20251001"
117
+ )
118
+
119
+ result = fresh.run("What is memoization?")
120
+ puts "Run 1 (fresh robot): What is memoization?"
121
+ puts " Reply: #{first_line(result.reply)}"
122
+ puts " This run: #{token_summary(result.input_tokens, result.output_tokens)}"
123
+ puts " Cumulative: #{token_summary(fresh.total_input_tokens, fresh.total_output_tokens)}"
124
+ puts
125
+
126
+ puts "=" * 60
127
+ puts "Token tracking demo complete."
128
+ puts "=" * 60
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example 20: Tool Loop Circuit Breaker
5
+ #
6
+ # Demonstrates max_tool_rounds to prevent runaway tool call loops.
7
+ #
8
+ # A "process runner" robot is given a step tool whose return value always
9
+ # instructs the LLM to call it again — it would loop forever without a guard.
10
+ # The circuit breaker fires after max_tool_rounds tool calls and raises
11
+ # RobotLab::ToolLoopError.
12
+ #
13
+ # Key behaviour when the breaker fires:
14
+ # The chat history contains a dangling tool_use with no tool_result.
15
+ # Anthropic (and most providers) reject any subsequent request with that
16
+ # broken history. You MUST call robot.clear_messages before reusing the
17
+ # same robot instance — or simply build a fresh robot.
18
+ #
19
+ # Demonstrates:
20
+ # - max_tool_rounds: N on RobotLab.build
21
+ # - ToolLoopError raised when the limit is exceeded
22
+ # - clear_messages to flush the corrupted chat and recover the robot
23
+ # - Contrast: robot without a circuit breaker on a task that terminates
24
+ #
25
+ # Usage:
26
+ # ANTHROPIC_API_KEY=your_key ruby examples/20_circuit_breaker.rb
27
+
28
+ ENV["ROBOT_LAB_TEMPLATE_PATH"] ||= File.join(__dir__, "prompts")
29
+
30
+ require_relative "../lib/robot_lab"
31
+
32
+ # -------------------------------------------------------------------------
33
+ # A tool that always says "more steps remain" — designed to induce looping
34
+ # -------------------------------------------------------------------------
35
+
36
+ class MultiStepProcessor < RubyLLM::Tool
37
+ description <<~DESC
38
+ Executes one step of a sequential batch process.
39
+ Returns the result of that step and whether more steps remain.
40
+ You MUST call this tool again for each remaining step until
41
+ the status is "complete".
42
+ DESC
43
+
44
+ param :step_number,
45
+ type: "integer",
46
+ desc: "Which step to execute. Start at 1, increment by 1 each call."
47
+
48
+ TOTAL_STEPS = 50 # far more than any sensible max_tool_rounds
49
+
50
+ def execute(step_number:)
51
+ remaining = TOTAL_STEPS - step_number
52
+ if remaining <= 0
53
+ { status: "complete", step: step_number, message: "All steps finished." }
54
+ else
55
+ {
56
+ status: "in_progress",
57
+ step_completed: step_number,
58
+ remaining_steps: remaining,
59
+ instruction: "Call this tool again with step_number: #{step_number + 1}"
60
+ }
61
+ end
62
+ end
63
+ end
64
+
65
+ puts "=" * 60
66
+ puts "Example 20: Tool Loop Circuit Breaker"
67
+ puts "=" * 60
68
+ puts
69
+
70
+ TASK = "Run the batch process from step 1 using the MultiStepProcessor tool. " \
71
+ "Execute every step sequentially until the process reports 'complete'."
72
+
73
+ # -------------------------------------------------------------------------
74
+ # Part 1: Circuit breaker fires — rescue ToolLoopError, then recover
75
+ #
76
+ # After ToolLoopError the chat contains a dangling tool_use block with no
77
+ # matching tool_result. Anthropic rejects any follow-up request with that
78
+ # history. Call clear_messages to flush the broken context before reuse.
79
+ # -------------------------------------------------------------------------
80
+
81
+ puts "--- Part 1: Circuit breaker fires (max_tool_rounds: 5) ---"
82
+ puts
83
+
84
+ robot = RobotLab.build(
85
+ name: "process_runner",
86
+ system_prompt: "You are a process runner. Execute tasks exactly as instructed.",
87
+ local_tools: [MultiStepProcessor],
88
+ max_tool_rounds: 5,
89
+ model: "claude-haiku-4-5-20251001"
90
+ )
91
+
92
+ begin
93
+ robot.run(TASK)
94
+ puts "Process completed (unexpected for this demo)."
95
+ rescue RobotLab::ToolLoopError => e
96
+ puts "Circuit breaker fired!"
97
+ puts " #{e.message}"
98
+ end
99
+
100
+ puts
101
+
102
+ # -------------------------------------------------------------------------
103
+ # Part 2: Recover by flushing the corrupted chat with clear_messages
104
+ #
105
+ # The robot retains its config (system prompt, tools, max_tool_rounds).
106
+ # Only the conversation history is cleared — the robot is then reusable.
107
+ # -------------------------------------------------------------------------
108
+
109
+ puts "--- Part 2: Recover with clear_messages ---"
110
+ puts
111
+
112
+ robot.clear_messages
113
+ puts "Chat flushed. Robot config (tools, max_tool_rounds) is unchanged."
114
+ puts " max_tool_rounds still: #{robot.config.max_tool_rounds}"
115
+ puts
116
+
117
+ result = robot.run("What is 3 + 4? Answer in one sentence.")
118
+ puts "Follow-up after recovery: #{result.reply&.strip}"
119
+ puts
120
+
121
+ # -------------------------------------------------------------------------
122
+ # Part 3: Robot without a circuit breaker on a task that terminates quickly
123
+ #
124
+ # Normal tool use works fine with no guard — the breaker is a safety net,
125
+ # not a tax on well-behaved tool interactions.
126
+ # -------------------------------------------------------------------------
127
+
128
+ puts "--- Part 3: No circuit breaker — task terminates naturally ---"
129
+ puts
130
+
131
+ class SingleStep < RubyLLM::Tool
132
+ description "Doubles a number and returns the result immediately."
133
+ param :value, type: "integer", desc: "The number to double"
134
+
135
+ def execute(value:)
136
+ { result: value * 2, status: "complete" }
137
+ end
138
+ end
139
+
140
+ unguarded = RobotLab.build(
141
+ name: "calculator",
142
+ system_prompt: "Use the provided tool to answer questions.",
143
+ local_tools: [SingleStep],
144
+ model: "claude-haiku-4-5-20251001"
145
+ )
146
+
147
+ result = unguarded.run("Double the number 21 using the tool.")
148
+ puts "Result: #{result.reply&.strip}"
149
+ puts
150
+
151
+ puts "=" * 60
152
+ puts "Circuit breaker demo complete."
153
+ puts "=" * 60