rspec-agents 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/bin/rspec-agents +24 -0
  3. data/lib/async_workers/channel_config.rb +34 -0
  4. data/lib/async_workers/doc/process_manager_design.md +512 -0
  5. data/lib/async_workers/errors.rb +21 -0
  6. data/lib/async_workers/managed_process.rb +284 -0
  7. data/lib/async_workers/output_stream.rb +86 -0
  8. data/lib/async_workers/rpc_channel.rb +159 -0
  9. data/lib/async_workers/transport/base.rb +57 -0
  10. data/lib/async_workers/transport/stdio_transport.rb +91 -0
  11. data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
  12. data/lib/async_workers/worker_group.rb +175 -0
  13. data/lib/async_workers.rb +17 -0
  14. data/lib/rspec/agents/agent_response.rb +61 -0
  15. data/lib/rspec/agents/agents/base.rb +123 -0
  16. data/lib/rspec/agents/cli.rb +342 -0
  17. data/lib/rspec/agents/conversation.rb +308 -0
  18. data/lib/rspec/agents/criterion.rb +237 -0
  19. data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
  20. data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
  21. data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
  22. data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
  23. data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
  24. data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
  25. data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
  26. data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
  27. data/lib/rspec/agents/doc/scenario_guide.md +289 -0
  28. data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
  29. data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
  30. data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
  31. data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
  32. data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
  33. data/lib/rspec/agents/dsl/test_context.rb +223 -0
  34. data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
  35. data/lib/rspec/agents/dsl.rb +398 -0
  36. data/lib/rspec/agents/evaluation_result.rb +44 -0
  37. data/lib/rspec/agents/event_bus.rb +78 -0
  38. data/lib/rspec/agents/events.rb +141 -0
  39. data/lib/rspec/agents/isolated_event_bus.rb +86 -0
  40. data/lib/rspec/agents/judge.rb +244 -0
  41. data/lib/rspec/agents/llm/anthropic.rb +143 -0
  42. data/lib/rspec/agents/llm/base.rb +64 -0
  43. data/lib/rspec/agents/llm/mock.rb +181 -0
  44. data/lib/rspec/agents/llm/response.rb +52 -0
  45. data/lib/rspec/agents/matchers.rb +554 -0
  46. data/lib/rspec/agents/message.rb +81 -0
  47. data/lib/rspec/agents/metadata.rb +120 -0
  48. data/lib/rspec/agents/observers/base.rb +70 -0
  49. data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
  50. data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
  51. data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
  52. data/lib/rspec/agents/parallel/controller.rb +284 -0
  53. data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
  54. data/lib/rspec/agents/parallel/partitioner.rb +31 -0
  55. data/lib/rspec/agents/parallel/run_result.rb +22 -0
  56. data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
  57. data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
  58. data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
  59. data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
  60. data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
  61. data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
  62. data/lib/rspec/agents/prompt_builders/base.rb +113 -0
  63. data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
  64. data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
  65. data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
  66. data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
  67. data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
  68. data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
  69. data/lib/rspec/agents/runners/headless_runner.rb +272 -0
  70. data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
  71. data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
  72. data/lib/rspec/agents/runners/user_simulator.rb +261 -0
  73. data/lib/rspec/agents/scenario.rb +133 -0
  74. data/lib/rspec/agents/scenario_loader.rb +145 -0
  75. data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
  76. data/lib/rspec/agents/serialization/extension.rb +199 -0
  77. data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
  78. data/lib/rspec/agents/serialization/presenters.rb +281 -0
  79. data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
  80. data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
  81. data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
  82. data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
  83. data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
  84. data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
  85. data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
  86. data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
  87. data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
  88. data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
  89. data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
  90. data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
  91. data/lib/rspec/agents/serialization.rb +374 -0
  92. data/lib/rspec/agents/simulator_config.rb +336 -0
  93. data/lib/rspec/agents/spec_executor.rb +494 -0
  94. data/lib/rspec/agents/stable_example_id.rb +147 -0
  95. data/lib/rspec/agents/templates/user_simulation.erb +9 -0
  96. data/lib/rspec/agents/tool_call.rb +53 -0
  97. data/lib/rspec/agents/topic.rb +307 -0
  98. data/lib/rspec/agents/topic_graph.rb +236 -0
  99. data/lib/rspec/agents/triggers.rb +122 -0
  100. data/lib/rspec/agents/turn.rb +63 -0
  101. data/lib/rspec/agents/turn_executor.rb +91 -0
  102. data/lib/rspec/agents/version.rb +7 -0
  103. data/lib/rspec/agents.rb +145 -0
  104. metadata +242 -0
@@ -0,0 +1,342 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "optparse"
4
+
5
+ module RSpec
6
+ module Agents
7
+ # Unified CLI for rspec-agents
8
+ #
9
+ # Commands:
10
+ # run - Single-process execution (default)
11
+ # parallel - Parallel execution with worker processes
12
+ # worker - Internal: run as a worker subprocess
13
+ #
14
+ # @example Single process
15
+ # CLI.run(["spec/"])
16
+ # CLI.run(["run", "spec/"])
17
+ #
18
+ # @example Parallel
19
+ # CLI.run(["parallel", "-w", "4", "spec/"])
20
+ #
21
+ # @example Worker (internal)
22
+ # CLI.run(["worker"])
23
+ #
24
+ class CLI
25
+ COMMANDS = %w[run parallel worker render].freeze
26
+
27
+ def self.run(argv)
28
+ new(argv).run
29
+ end
30
+
31
+ def initialize(argv)
32
+ @argv = argv.dup
33
+ end
34
+
35
+ def run
36
+ command, args = parse_command(@argv)
37
+
38
+ case command
39
+ when "run"
40
+ run_single(args)
41
+ when "parallel"
42
+ run_parallel(args)
43
+ when "worker"
44
+ run_worker(args)
45
+ when "render"
46
+ run_render(args)
47
+ else
48
+ # Default to single-process run
49
+ run_single(@argv)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def parse_command(argv)
56
+ return [nil, argv] if argv.empty?
57
+
58
+ first = argv.first
59
+
60
+ # Auto-detect parallel mode if -w/--workers flag is present
61
+ if has_parallel_flag?(argv)
62
+ # If first arg is explicit command, consume it; otherwise keep all args
63
+ if COMMANDS.include?(first)
64
+ return ["parallel", argv[1..]]
65
+ else
66
+ return ["parallel", argv]
67
+ end
68
+ end
69
+
70
+ # Original logic for explicit commands or default to single-process
71
+ if COMMANDS.include?(first)
72
+ [first, argv[1..]]
73
+ elsif first.start_with?("-")
74
+ # Flag, not a command - default to run
75
+ [nil, argv]
76
+ else
77
+ # Path or unknown - default to run
78
+ [nil, argv]
79
+ end
80
+ end
81
+
82
+ def has_parallel_flag?(argv)
83
+ argv.any? { |arg| arg == "-w" || arg.start_with?("--workers") }
84
+ end
85
+
86
+ # =========================================================================
87
+ # Single-process mode
88
+ # =========================================================================
89
+
90
+ def run_single(args)
91
+ options = parse_single_options(args)
92
+
93
+ runner = Runners::TerminalRunner.new(
94
+ output: $stdout,
95
+ color: options[:color],
96
+ json_path: options[:json_path],
97
+ html_path: options[:html_path]
98
+ )
99
+ runner.run(options[:paths])
100
+ end
101
+
102
+ def parse_single_options(args)
103
+ options = { paths: [], color: nil, json_path: nil, html_path: nil }
104
+
105
+ parser = OptionParser.new do |opts|
106
+ opts.banner = "Usage: rspec-agents [run] [options] [paths...]"
107
+ opts.separator ""
108
+ opts.separator "Run specs in a single process with terminal output."
109
+ opts.separator ""
110
+ opts.separator "Options:"
111
+
112
+ opts.on("--[no-]color", "Force color on/off (default: auto)") do |v|
113
+ options[:color] = v
114
+ end
115
+
116
+ opts.on("--ui MODE", [:interactive, :interleaved, :quiet],
117
+ "Output mode (ignored in single-process mode)") do |_mode|
118
+ # Accepted for CLI compatibility with parallel mode, but ignored
119
+ end
120
+
121
+ opts.on("--json PATH", "Save JSON run data to file") do |path|
122
+ options[:json_path] = path
123
+ end
124
+
125
+ opts.on("--html PATH", "Render HTML report to path") do |path|
126
+ options[:html_path] = path
127
+ end
128
+
129
+ opts.on("-h", "--help", "Show this help") do
130
+ puts opts
131
+ exit 0
132
+ end
133
+ end
134
+
135
+ remaining = parser.parse(args)
136
+ options[:paths] = remaining.empty? ? ["spec"] : remaining
137
+ options
138
+ end
139
+
140
+ # =========================================================================
141
+ # Parallel mode
142
+ # =========================================================================
143
+
144
+ def run_parallel(args)
145
+ options = parse_parallel_options(args)
146
+
147
+ runner = Runners::ParallelTerminalRunner.new(
148
+ worker_count: options[:workers],
149
+ fail_fast: options[:fail_fast],
150
+ output: $stdout,
151
+ color: options[:color],
152
+ json_path: options[:json_path],
153
+ html_path: options[:html_path],
154
+ ui_mode: options[:ui_mode]
155
+ )
156
+ runner.run(options[:paths])
157
+ end
158
+
159
+ def parse_parallel_options(args)
160
+ options = {
161
+ workers: 4,
162
+ fail_fast: false,
163
+ paths: [],
164
+ color: nil,
165
+ json_path: nil,
166
+ html_path: nil,
167
+ ui_mode: nil
168
+ }
169
+
170
+ parser = OptionParser.new do |opts|
171
+ opts.banner = "Usage: rspec-agents parallel [options] [paths...]"
172
+ opts.separator ""
173
+ opts.separator "Run specs in parallel across multiple worker processes."
174
+ opts.separator ""
175
+ opts.separator "Options:"
176
+
177
+ opts.on("-w", "--workers COUNT", Integer, "Number of workers (default: 4)") do |w|
178
+ options[:workers] = w
179
+ end
180
+
181
+ opts.on("--fail-fast", "Stop on first failure") do
182
+ options[:fail_fast] = true
183
+ end
184
+
185
+ opts.on("--[no-]color", "Force color on/off (default: auto)") do |v|
186
+ options[:color] = v
187
+ end
188
+
189
+ opts.on("--ui MODE", [:interactive, :interleaved, :quiet],
190
+ "Output mode: interactive, interleaved, quiet (default: auto)") do |mode|
191
+ options[:ui_mode] = mode
192
+ end
193
+
194
+ opts.on("--json PATH", "Save JSON run data to file") do |path|
195
+ options[:json_path] = path
196
+ end
197
+
198
+ opts.on("--html PATH", "Render HTML report to path") do |path|
199
+ options[:html_path] = path
200
+ end
201
+
202
+ opts.on("-h", "--help", "Show this help") do
203
+ puts opts
204
+ exit 0
205
+ end
206
+ end
207
+
208
+ remaining = parser.parse(args)
209
+ options[:paths] = remaining.empty? ? ["spec"] : remaining
210
+ options
211
+ end
212
+
213
+ # =========================================================================
214
+ # Worker mode (internal - used by parallel controller)
215
+ # =========================================================================
216
+
217
+ def run_worker(_args)
218
+ require "json"
219
+
220
+ # Get RPC socket from environment
221
+ fd_str = ENV["RPC_SOCKET_FD"]
222
+ unless fd_str
223
+ $stderr.puts("ERROR: RPC_SOCKET_FD not set (worker mode requires controller)")
224
+ exit 1
225
+ end
226
+
227
+ fd = fd_str.to_i
228
+ socket = IO.for_fd(fd, mode: "r+")
229
+ socket.sync = true
230
+
231
+ worker_index = ENV["WORKER_INDEX"] || "?"
232
+ $stderr.puts("[worker-#{worker_index}] started")
233
+
234
+ runner = nil
235
+ running = true
236
+
237
+ while running && (line = socket.gets)
238
+ begin
239
+ handle_worker_message(socket, line, worker_index) do |example_ids|
240
+ runner ||= Runners::HeadlessRunner.new(rpc_output: socket)
241
+ runner.run(example_ids)
242
+ end
243
+ rescue JSON::ParserError => e
244
+ $stderr.puts("[worker-#{worker_index}] JSON parse error: #{e.message}")
245
+ rescue => e
246
+ $stderr.puts("[worker-#{worker_index}] Error: #{e.class}: #{e.message}")
247
+ $stderr.puts(e.backtrace.first(5).join("\n"))
248
+ end
249
+ end
250
+
251
+ $stderr.puts("[worker-#{worker_index}] exiting")
252
+ 0
253
+ end
254
+
255
+ def handle_worker_message(socket, line, worker_index)
256
+ msg = JSON.parse(line.chomp, symbolize_names: true)
257
+
258
+ case msg[:action]
259
+ when "__shutdown__"
260
+ send_worker_response(socket, msg[:id], { status: "shutting_down" })
261
+ return false # Signal to stop
262
+
263
+ when "run_specs"
264
+ $stderr.puts("[worker-#{worker_index}] running #{msg[:example_ids]&.size || 0} examples")
265
+ result = yield(msg[:example_ids] || [])
266
+ send_worker_response(socket, msg[:id], result)
267
+
268
+ else
269
+ send_worker_response(socket, msg[:id], { error: "unknown action: #{msg[:action]}" })
270
+ end
271
+
272
+ true # Continue running
273
+ end
274
+
275
+ def send_worker_response(socket, request_id, payload)
276
+ response = payload.merge(reply_to: request_id)
277
+ socket.puts(response.to_json)
278
+ socket.flush
279
+ end
280
+
281
+ # =========================================================================
282
+ # Render mode - generate HTML report from JSON file
283
+ # =========================================================================
284
+
285
+ def run_render(args)
286
+ options = parse_render_options(args)
287
+
288
+ unless options[:json_path]
289
+ $stderr.puts("Error: JSON file path is required")
290
+ $stderr.puts("Usage: rspec-agents render <json_file> [--html PATH]")
291
+ return 1
292
+ end
293
+
294
+ unless File.exist?(options[:json_path])
295
+ $stderr.puts("Error: JSON file not found: #{options[:json_path]}")
296
+ return 1
297
+ end
298
+
299
+ output_path = Serialization::TestSuiteRenderer.from_json_file(
300
+ options[:json_path],
301
+ output_path: options[:html_path]
302
+ )
303
+
304
+ if output_path
305
+ puts "HTML report written to: #{output_path}"
306
+ 0
307
+ else
308
+ $stderr.puts("Error: Failed to render HTML report")
309
+ 1
310
+ end
311
+ end
312
+
313
+ def parse_render_options(args)
314
+ options = { json_path: nil, html_path: nil }
315
+
316
+ parser = OptionParser.new do |opts|
317
+ opts.banner = "Usage: rspec-agents render <json_file> [options]"
318
+ opts.separator ""
319
+ opts.separator "Render an HTML report from a JSON run file."
320
+ opts.separator ""
321
+ opts.separator "Options:"
322
+
323
+ opts.on("--html PATH", "Output HTML path (default: tmp/rspec_agents_debug.html)") do |path|
324
+ options[:html_path] = path
325
+ end
326
+
327
+ opts.on("-h", "--help", "Show this help") do
328
+ puts opts
329
+ exit 0
330
+ end
331
+ end
332
+
333
+ remaining = parser.parse(args)
334
+ options[:json_path] = remaining.first
335
+ options
336
+ end
337
+ end
338
+
339
+ # Zeitwerk expects cli.rb to define Cli, but we use CLI (conventional for command-line interfaces)
340
+ Cli = CLI
341
+ end
342
+ end
@@ -0,0 +1,308 @@
1
+ module RSpec
2
+ module Agents
3
+ # Tracks the state of a conversation during test execution
4
+ # Maintains messages, turns, and topic history
5
+ class Conversation
6
+ attr_reader :messages, :turns, :topic_history, :evaluation_results
7
+
8
+ # @param event_bus [EventBus, nil] Optional event bus for emitting events
9
+ def initialize(event_bus: nil)
10
+ @event_bus = event_bus
11
+ @example_id = Thread.current[:rspec_agents_example_id]
12
+ @messages = []
13
+ @turns = []
14
+ @topic_history = [] # Array of { topic: Symbol, turns: Array<Turn> }
15
+ @current_topic = nil
16
+ @turns_per_topic = Hash.new(0)
17
+ @evaluation_results = [] # Array of EvaluationResult objects
18
+ end
19
+
20
+ def conversation
21
+ self
22
+ end
23
+
24
+ # Add a user message to the conversation
25
+ #
26
+ # @param text [String] Message text
27
+ # @param metadata [Hash] Optional metadata
28
+ # @param source [Symbol] Message source (:simulator, :script, :unknown)
29
+ # @return [Message] The added message
30
+ def add_user_message(text, metadata: {}, source: :unknown)
31
+ message = Message.new(
32
+ role: :user,
33
+ content: text,
34
+ metadata: metadata
35
+ )
36
+ @messages << message
37
+
38
+ emit(Events::UserMessage.new(
39
+ example_id: @example_id,
40
+ turn_number: @turns.size + 1,
41
+ text: text,
42
+ source: source,
43
+ time: Time.now
44
+ ))
45
+
46
+ message
47
+ end
48
+
49
+ # Add an agent response to the conversation
50
+ # Creates a turn with the last user message
51
+ #
52
+ # @param response [AgentResponse] The agent's response
53
+ # @return [Turn] The created turn
54
+ def add_agent_response(response)
55
+ message = Message.new(
56
+ role: :agent,
57
+ content: response.text,
58
+ tool_calls: response.tool_calls,
59
+ metadata: response.metadata.to_h
60
+ )
61
+ @messages << message
62
+
63
+ # Find the last user message to pair with this response
64
+ last_user_msg = @messages.reverse.find(&:user?)
65
+ last_user_message = last_user_msg&.content
66
+
67
+ # Create turn
68
+ turn = Turn.new(last_user_message, response, topic: @current_topic)
69
+ @turns << turn
70
+
71
+ # Update topic tracking
72
+ if @current_topic
73
+ @turns_per_topic[@current_topic] += 1
74
+ add_turn_to_topic_history(turn)
75
+ end
76
+
77
+ emit(Events::AgentResponse.new(
78
+ example_id: @example_id,
79
+ turn_number: @turns.size,
80
+ text: response.text,
81
+ tool_calls: response.tool_calls.map { |tc| tc.respond_to?(:to_h) ? tc.to_h : tc },
82
+ metadata: response.metadata.respond_to?(:to_h) ? response.metadata.to_h : (response.metadata || {}),
83
+ time: Time.now
84
+ ))
85
+
86
+ turn
87
+ end
88
+
89
+ # Set the current topic
90
+ #
91
+ # @param topic_name [Symbol] The topic name
92
+ # @param trigger [Symbol, nil] What triggered the topic change
93
+ def set_topic(topic_name, trigger: nil)
94
+ return if @current_topic == topic_name
95
+
96
+ previous = @current_topic
97
+ @current_topic = topic_name
98
+
99
+ # Start new topic history entry
100
+ @topic_history << { topic: topic_name, turns: [] }
101
+
102
+ emit(Events::TopicChanged.new(
103
+ example_id: @example_id,
104
+ turn_number: @turns.size,
105
+ from_topic: previous,
106
+ to_topic: topic_name,
107
+ trigger: trigger || :unknown,
108
+ time: Time.now
109
+ ))
110
+ end
111
+
112
+ # Get the current topic
113
+ #
114
+ # @return [Symbol, nil]
115
+ attr_reader :current_topic
116
+
117
+ # Get the number of turns that have occurred in a specific topic
118
+ #
119
+ # @param topic_name [Symbol] The topic name
120
+ # @return [Integer]
121
+ def turns_in_topic(topic_name)
122
+ @turns_per_topic[topic_name.to_sym]
123
+ end
124
+
125
+ # Get all turns that occurred in a specific topic
126
+ #
127
+ # @param topic_name [Symbol] The topic name
128
+ # @return [Array<Turn>]
129
+ def turns_for_topic(topic_name)
130
+ entry = @topic_history.find { |h| h[:topic] == topic_name }
131
+ entry ? entry[:turns] : []
132
+ end
133
+
134
+ # Get all tool calls across all turns
135
+ #
136
+ # @return [Array<ToolCall>]
137
+ def all_tool_calls
138
+ @turns.flat_map(&:tool_calls)
139
+ end
140
+
141
+ # Check if a specific tool was called
142
+ #
143
+ # @param name [Symbol, String] Tool name
144
+ # @return [Boolean]
145
+ def has_tool_call?(name)
146
+ all_tool_calls.any? { |tc| tc.name == name.to_sym }
147
+ end
148
+
149
+ # Count how many times a tool was called
150
+ #
151
+ # @param name [Symbol, String] Tool name
152
+ # @return [Integer]
153
+ def called_tool?(name)
154
+ all_tool_calls.count { |tc| tc.name == name.to_sym }
155
+ end
156
+
157
+ # Find tool calls by name
158
+ #
159
+ # @param name [Symbol, String] Tool name
160
+ # @param params [Hash, nil] Optional parameter filter
161
+ # @return [Array<ToolCall>]
162
+ def find_tool_calls(name, params: nil)
163
+ calls = all_tool_calls.select { |tc| tc.name == name.to_sym }
164
+ return calls unless params
165
+
166
+ calls.select { |tc| tc.matches_params?(params) }
167
+ end
168
+
169
+ # Get the last agent response
170
+ #
171
+ # @return [AgentResponse, nil]
172
+ def last_agent_response
173
+ @turns.last&.agent_response
174
+ end
175
+
176
+ # Get the last user message
177
+ #
178
+ # @return [String, nil]
179
+ def last_user_message
180
+ @messages.reverse.find(&:user?)&.content
181
+ end
182
+
183
+ # Get the last turn
184
+ #
185
+ # @return [Turn, nil]
186
+ def last_turn
187
+ @turns.last
188
+ end
189
+
190
+ # Get the number of turns
191
+ #
192
+ # @return [Integer]
193
+ def turn_count
194
+ @turns.count
195
+ end
196
+
197
+ # Check if conversation is empty
198
+ #
199
+ # @return [Boolean]
200
+ def empty?
201
+ @messages.empty?
202
+ end
203
+
204
+ # Record an evaluation result (soft or hard)
205
+ #
206
+ # @param mode [Symbol] :soft or :hard
207
+ # @param type [Symbol] Type of assertion (:quality, :grounding, :tool_call, etc.)
208
+ # @param description [String] Human-readable description
209
+ # @param passed [Boolean] Whether evaluation passed
210
+ # @param failure_message [String, nil] Reason for failure
211
+ # @param metadata [Hash] Additional context
212
+ # @return [EvaluationResult] The recorded result
213
+ def record_evaluation(mode:, type:, description:, passed:, failure_message: nil, metadata: {})
214
+ turn_number = @turns.size
215
+ enriched_metadata = metadata.merge(
216
+ turn_number: turn_number,
217
+ topic: @current_topic
218
+ )
219
+
220
+ result = EvaluationResult.new(
221
+ mode: mode,
222
+ type: type,
223
+ description: description,
224
+ passed: passed,
225
+ failure_message: failure_message,
226
+ metadata: enriched_metadata
227
+ )
228
+ @evaluation_results << result
229
+
230
+ emit(Events::EvaluationRecorded.new(
231
+ example_id: @example_id,
232
+ turn_number: turn_number,
233
+ mode: mode,
234
+ type: type,
235
+ description: description,
236
+ passed: passed,
237
+ failure_message: failure_message,
238
+ metadata: metadata,
239
+ time: Time.now
240
+ ))
241
+
242
+ result
243
+ end
244
+
245
+ # Get soft evaluation results only
246
+ #
247
+ # @return [Array<EvaluationResult>]
248
+ def soft_evaluations
249
+ @evaluation_results.select(&:soft?)
250
+ end
251
+
252
+ # Get hard evaluation results only
253
+ #
254
+ # @return [Array<EvaluationResult>]
255
+ def hard_evaluations
256
+ @evaluation_results.select(&:hard?)
257
+ end
258
+
259
+ # Format messages for sending to agent
260
+ # Returns messages in the format expected by agent adapters
261
+ # Note: role is converted to string for API compatibility
262
+ #
263
+ # @return [Array<Hash>]
264
+ def messages_for_agent
265
+ @messages.map do |m|
266
+ { role: m.role.to_s, content: m.content }
267
+ end
268
+ end
269
+
270
+ # Reset the conversation state
271
+ def reset!
272
+ @messages.clear
273
+ @turns.clear
274
+ @topic_history.clear
275
+ @current_topic = nil
276
+ @turns_per_topic.clear
277
+ @evaluation_results.clear
278
+ end
279
+
280
+ def to_h
281
+ {
282
+ messages: @messages.map(&:to_h),
283
+ turns: @turns.map(&:to_h),
284
+ topic_history: @topic_history,
285
+ current_topic: @current_topic,
286
+ evaluation_results: @evaluation_results.map(&:to_h)
287
+ }
288
+ end
289
+
290
+ def inspect
291
+ "#<#{self.class.name} turns=#{@turns.count} messages=#{@messages.count}>"
292
+ end
293
+
294
+ private
295
+
296
+ def emit(event)
297
+ @event_bus&.publish(event)
298
+ end
299
+
300
+ def add_turn_to_topic_history(turn)
301
+ return if @topic_history.empty?
302
+
303
+ current_entry = @topic_history.last
304
+ current_entry[:turns] << turn if current_entry[:topic] == @current_topic
305
+ end
306
+ end
307
+ end
308
+ end