rspec-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/rspec-agents +24 -0
- data/lib/async_workers/channel_config.rb +34 -0
- data/lib/async_workers/doc/process_manager_design.md +512 -0
- data/lib/async_workers/errors.rb +21 -0
- data/lib/async_workers/managed_process.rb +284 -0
- data/lib/async_workers/output_stream.rb +86 -0
- data/lib/async_workers/rpc_channel.rb +159 -0
- data/lib/async_workers/transport/base.rb +57 -0
- data/lib/async_workers/transport/stdio_transport.rb +91 -0
- data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
- data/lib/async_workers/worker_group.rb +175 -0
- data/lib/async_workers.rb +17 -0
- data/lib/rspec/agents/agent_response.rb +61 -0
- data/lib/rspec/agents/agents/base.rb +123 -0
- data/lib/rspec/agents/cli.rb +342 -0
- data/lib/rspec/agents/conversation.rb +308 -0
- data/lib/rspec/agents/criterion.rb +237 -0
- data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
- data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
- data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
- data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
- data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
- data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
- data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
- data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
- data/lib/rspec/agents/doc/scenario_guide.md +289 -0
- data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
- data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
- data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
- data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
- data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
- data/lib/rspec/agents/dsl/test_context.rb +223 -0
- data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
- data/lib/rspec/agents/dsl.rb +398 -0
- data/lib/rspec/agents/evaluation_result.rb +44 -0
- data/lib/rspec/agents/event_bus.rb +78 -0
- data/lib/rspec/agents/events.rb +141 -0
- data/lib/rspec/agents/isolated_event_bus.rb +86 -0
- data/lib/rspec/agents/judge.rb +244 -0
- data/lib/rspec/agents/llm/anthropic.rb +143 -0
- data/lib/rspec/agents/llm/base.rb +64 -0
- data/lib/rspec/agents/llm/mock.rb +181 -0
- data/lib/rspec/agents/llm/response.rb +52 -0
- data/lib/rspec/agents/matchers.rb +554 -0
- data/lib/rspec/agents/message.rb +81 -0
- data/lib/rspec/agents/metadata.rb +120 -0
- data/lib/rspec/agents/observers/base.rb +70 -0
- data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
- data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
- data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
- data/lib/rspec/agents/parallel/controller.rb +284 -0
- data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
- data/lib/rspec/agents/parallel/partitioner.rb +31 -0
- data/lib/rspec/agents/parallel/run_result.rb +22 -0
- data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
- data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
- data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
- data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
- data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
- data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
- data/lib/rspec/agents/prompt_builders/base.rb +113 -0
- data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
- data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
- data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
- data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
- data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
- data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
- data/lib/rspec/agents/runners/headless_runner.rb +272 -0
- data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
- data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
- data/lib/rspec/agents/runners/user_simulator.rb +261 -0
- data/lib/rspec/agents/scenario.rb +133 -0
- data/lib/rspec/agents/scenario_loader.rb +145 -0
- data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
- data/lib/rspec/agents/serialization/extension.rb +199 -0
- data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
- data/lib/rspec/agents/serialization/presenters.rb +281 -0
- data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
- data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
- data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
- data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
- data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
- data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
- data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
- data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
- data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
- data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
- data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
- data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
- data/lib/rspec/agents/serialization.rb +374 -0
- data/lib/rspec/agents/simulator_config.rb +336 -0
- data/lib/rspec/agents/spec_executor.rb +494 -0
- data/lib/rspec/agents/stable_example_id.rb +147 -0
- data/lib/rspec/agents/templates/user_simulation.erb +9 -0
- data/lib/rspec/agents/tool_call.rb +53 -0
- data/lib/rspec/agents/topic.rb +307 -0
- data/lib/rspec/agents/topic_graph.rb +236 -0
- data/lib/rspec/agents/triggers.rb +122 -0
- data/lib/rspec/agents/turn.rb +63 -0
- data/lib/rspec/agents/turn_executor.rb +91 -0
- data/lib/rspec/agents/version.rb +7 -0
- data/lib/rspec/agents.rb +145 -0
- metadata +242 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
5
|
+
module RSpec
|
|
6
|
+
module Agents
|
|
7
|
+
# Unified CLI for rspec-agents
|
|
8
|
+
#
|
|
9
|
+
# Commands:
|
|
10
|
+
# run - Single-process execution (default)
|
|
11
|
+
# parallel - Parallel execution with worker processes
|
|
12
|
+
# worker - Internal: run as a worker subprocess
|
|
13
|
+
#
|
|
14
|
+
# @example Single process
|
|
15
|
+
# CLI.run(["spec/"])
|
|
16
|
+
# CLI.run(["run", "spec/"])
|
|
17
|
+
#
|
|
18
|
+
# @example Parallel
|
|
19
|
+
# CLI.run(["parallel", "-w", "4", "spec/"])
|
|
20
|
+
#
|
|
21
|
+
# @example Worker (internal)
|
|
22
|
+
# CLI.run(["worker"])
|
|
23
|
+
#
|
|
24
|
+
class CLI
|
|
25
|
+
COMMANDS = %w[run parallel worker render].freeze
|
|
26
|
+
|
|
27
|
+
def self.run(argv)
|
|
28
|
+
new(argv).run
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(argv)
|
|
32
|
+
@argv = argv.dup
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def run
|
|
36
|
+
command, args = parse_command(@argv)
|
|
37
|
+
|
|
38
|
+
case command
|
|
39
|
+
when "run"
|
|
40
|
+
run_single(args)
|
|
41
|
+
when "parallel"
|
|
42
|
+
run_parallel(args)
|
|
43
|
+
when "worker"
|
|
44
|
+
run_worker(args)
|
|
45
|
+
when "render"
|
|
46
|
+
run_render(args)
|
|
47
|
+
else
|
|
48
|
+
# Default to single-process run
|
|
49
|
+
run_single(@argv)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def parse_command(argv)
|
|
56
|
+
return [nil, argv] if argv.empty?
|
|
57
|
+
|
|
58
|
+
first = argv.first
|
|
59
|
+
|
|
60
|
+
# Auto-detect parallel mode if -w/--workers flag is present
|
|
61
|
+
if has_parallel_flag?(argv)
|
|
62
|
+
# If first arg is explicit command, consume it; otherwise keep all args
|
|
63
|
+
if COMMANDS.include?(first)
|
|
64
|
+
return ["parallel", argv[1..]]
|
|
65
|
+
else
|
|
66
|
+
return ["parallel", argv]
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Original logic for explicit commands or default to single-process
|
|
71
|
+
if COMMANDS.include?(first)
|
|
72
|
+
[first, argv[1..]]
|
|
73
|
+
elsif first.start_with?("-")
|
|
74
|
+
# Flag, not a command - default to run
|
|
75
|
+
[nil, argv]
|
|
76
|
+
else
|
|
77
|
+
# Path or unknown - default to run
|
|
78
|
+
[nil, argv]
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def has_parallel_flag?(argv)
|
|
83
|
+
argv.any? { |arg| arg == "-w" || arg.start_with?("--workers") }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# =========================================================================
|
|
87
|
+
# Single-process mode
|
|
88
|
+
# =========================================================================
|
|
89
|
+
|
|
90
|
+
def run_single(args)
|
|
91
|
+
options = parse_single_options(args)
|
|
92
|
+
|
|
93
|
+
runner = Runners::TerminalRunner.new(
|
|
94
|
+
output: $stdout,
|
|
95
|
+
color: options[:color],
|
|
96
|
+
json_path: options[:json_path],
|
|
97
|
+
html_path: options[:html_path]
|
|
98
|
+
)
|
|
99
|
+
runner.run(options[:paths])
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def parse_single_options(args)
|
|
103
|
+
options = { paths: [], color: nil, json_path: nil, html_path: nil }
|
|
104
|
+
|
|
105
|
+
parser = OptionParser.new do |opts|
|
|
106
|
+
opts.banner = "Usage: rspec-agents [run] [options] [paths...]"
|
|
107
|
+
opts.separator ""
|
|
108
|
+
opts.separator "Run specs in a single process with terminal output."
|
|
109
|
+
opts.separator ""
|
|
110
|
+
opts.separator "Options:"
|
|
111
|
+
|
|
112
|
+
opts.on("--[no-]color", "Force color on/off (default: auto)") do |v|
|
|
113
|
+
options[:color] = v
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
opts.on("--ui MODE", [:interactive, :interleaved, :quiet],
|
|
117
|
+
"Output mode (ignored in single-process mode)") do |_mode|
|
|
118
|
+
# Accepted for CLI compatibility with parallel mode, but ignored
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
opts.on("--json PATH", "Save JSON run data to file") do |path|
|
|
122
|
+
options[:json_path] = path
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
opts.on("--html PATH", "Render HTML report to path") do |path|
|
|
126
|
+
options[:html_path] = path
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
opts.on("-h", "--help", "Show this help") do
|
|
130
|
+
puts opts
|
|
131
|
+
exit 0
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
remaining = parser.parse(args)
|
|
136
|
+
options[:paths] = remaining.empty? ? ["spec"] : remaining
|
|
137
|
+
options
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# =========================================================================
|
|
141
|
+
# Parallel mode
|
|
142
|
+
# =========================================================================
|
|
143
|
+
|
|
144
|
+
def run_parallel(args)
|
|
145
|
+
options = parse_parallel_options(args)
|
|
146
|
+
|
|
147
|
+
runner = Runners::ParallelTerminalRunner.new(
|
|
148
|
+
worker_count: options[:workers],
|
|
149
|
+
fail_fast: options[:fail_fast],
|
|
150
|
+
output: $stdout,
|
|
151
|
+
color: options[:color],
|
|
152
|
+
json_path: options[:json_path],
|
|
153
|
+
html_path: options[:html_path],
|
|
154
|
+
ui_mode: options[:ui_mode]
|
|
155
|
+
)
|
|
156
|
+
runner.run(options[:paths])
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def parse_parallel_options(args)
|
|
160
|
+
options = {
|
|
161
|
+
workers: 4,
|
|
162
|
+
fail_fast: false,
|
|
163
|
+
paths: [],
|
|
164
|
+
color: nil,
|
|
165
|
+
json_path: nil,
|
|
166
|
+
html_path: nil,
|
|
167
|
+
ui_mode: nil
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
parser = OptionParser.new do |opts|
|
|
171
|
+
opts.banner = "Usage: rspec-agents parallel [options] [paths...]"
|
|
172
|
+
opts.separator ""
|
|
173
|
+
opts.separator "Run specs in parallel across multiple worker processes."
|
|
174
|
+
opts.separator ""
|
|
175
|
+
opts.separator "Options:"
|
|
176
|
+
|
|
177
|
+
opts.on("-w", "--workers COUNT", Integer, "Number of workers (default: 4)") do |w|
|
|
178
|
+
options[:workers] = w
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
opts.on("--fail-fast", "Stop on first failure") do
|
|
182
|
+
options[:fail_fast] = true
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
opts.on("--[no-]color", "Force color on/off (default: auto)") do |v|
|
|
186
|
+
options[:color] = v
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
opts.on("--ui MODE", [:interactive, :interleaved, :quiet],
|
|
190
|
+
"Output mode: interactive, interleaved, quiet (default: auto)") do |mode|
|
|
191
|
+
options[:ui_mode] = mode
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
opts.on("--json PATH", "Save JSON run data to file") do |path|
|
|
195
|
+
options[:json_path] = path
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
opts.on("--html PATH", "Render HTML report to path") do |path|
|
|
199
|
+
options[:html_path] = path
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
opts.on("-h", "--help", "Show this help") do
|
|
203
|
+
puts opts
|
|
204
|
+
exit 0
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
remaining = parser.parse(args)
|
|
209
|
+
options[:paths] = remaining.empty? ? ["spec"] : remaining
|
|
210
|
+
options
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# =========================================================================
|
|
214
|
+
# Worker mode (internal - used by parallel controller)
|
|
215
|
+
# =========================================================================
|
|
216
|
+
|
|
217
|
+
def run_worker(_args)
|
|
218
|
+
require "json"
|
|
219
|
+
|
|
220
|
+
# Get RPC socket from environment
|
|
221
|
+
fd_str = ENV["RPC_SOCKET_FD"]
|
|
222
|
+
unless fd_str
|
|
223
|
+
$stderr.puts("ERROR: RPC_SOCKET_FD not set (worker mode requires controller)")
|
|
224
|
+
exit 1
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
fd = fd_str.to_i
|
|
228
|
+
socket = IO.for_fd(fd, mode: "r+")
|
|
229
|
+
socket.sync = true
|
|
230
|
+
|
|
231
|
+
worker_index = ENV["WORKER_INDEX"] || "?"
|
|
232
|
+
$stderr.puts("[worker-#{worker_index}] started")
|
|
233
|
+
|
|
234
|
+
runner = nil
|
|
235
|
+
running = true
|
|
236
|
+
|
|
237
|
+
while running && (line = socket.gets)
|
|
238
|
+
begin
|
|
239
|
+
handle_worker_message(socket, line, worker_index) do |example_ids|
|
|
240
|
+
runner ||= Runners::HeadlessRunner.new(rpc_output: socket)
|
|
241
|
+
runner.run(example_ids)
|
|
242
|
+
end
|
|
243
|
+
rescue JSON::ParserError => e
|
|
244
|
+
$stderr.puts("[worker-#{worker_index}] JSON parse error: #{e.message}")
|
|
245
|
+
rescue => e
|
|
246
|
+
$stderr.puts("[worker-#{worker_index}] Error: #{e.class}: #{e.message}")
|
|
247
|
+
$stderr.puts(e.backtrace.first(5).join("\n"))
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
$stderr.puts("[worker-#{worker_index}] exiting")
|
|
252
|
+
0
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def handle_worker_message(socket, line, worker_index)
|
|
256
|
+
msg = JSON.parse(line.chomp, symbolize_names: true)
|
|
257
|
+
|
|
258
|
+
case msg[:action]
|
|
259
|
+
when "__shutdown__"
|
|
260
|
+
send_worker_response(socket, msg[:id], { status: "shutting_down" })
|
|
261
|
+
return false # Signal to stop
|
|
262
|
+
|
|
263
|
+
when "run_specs"
|
|
264
|
+
$stderr.puts("[worker-#{worker_index}] running #{msg[:example_ids]&.size || 0} examples")
|
|
265
|
+
result = yield(msg[:example_ids] || [])
|
|
266
|
+
send_worker_response(socket, msg[:id], result)
|
|
267
|
+
|
|
268
|
+
else
|
|
269
|
+
send_worker_response(socket, msg[:id], { error: "unknown action: #{msg[:action]}" })
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
true # Continue running
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def send_worker_response(socket, request_id, payload)
|
|
276
|
+
response = payload.merge(reply_to: request_id)
|
|
277
|
+
socket.puts(response.to_json)
|
|
278
|
+
socket.flush
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# =========================================================================
|
|
282
|
+
# Render mode - generate HTML report from JSON file
|
|
283
|
+
# =========================================================================
|
|
284
|
+
|
|
285
|
+
def run_render(args)
|
|
286
|
+
options = parse_render_options(args)
|
|
287
|
+
|
|
288
|
+
unless options[:json_path]
|
|
289
|
+
$stderr.puts("Error: JSON file path is required")
|
|
290
|
+
$stderr.puts("Usage: rspec-agents render <json_file> [--html PATH]")
|
|
291
|
+
return 1
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
unless File.exist?(options[:json_path])
|
|
295
|
+
$stderr.puts("Error: JSON file not found: #{options[:json_path]}")
|
|
296
|
+
return 1
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
output_path = Serialization::TestSuiteRenderer.from_json_file(
|
|
300
|
+
options[:json_path],
|
|
301
|
+
output_path: options[:html_path]
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
if output_path
|
|
305
|
+
puts "HTML report written to: #{output_path}"
|
|
306
|
+
0
|
|
307
|
+
else
|
|
308
|
+
$stderr.puts("Error: Failed to render HTML report")
|
|
309
|
+
1
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def parse_render_options(args)
|
|
314
|
+
options = { json_path: nil, html_path: nil }
|
|
315
|
+
|
|
316
|
+
parser = OptionParser.new do |opts|
|
|
317
|
+
opts.banner = "Usage: rspec-agents render <json_file> [options]"
|
|
318
|
+
opts.separator ""
|
|
319
|
+
opts.separator "Render an HTML report from a JSON run file."
|
|
320
|
+
opts.separator ""
|
|
321
|
+
opts.separator "Options:"
|
|
322
|
+
|
|
323
|
+
opts.on("--html PATH", "Output HTML path (default: tmp/rspec_agents_debug.html)") do |path|
|
|
324
|
+
options[:html_path] = path
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
opts.on("-h", "--help", "Show this help") do
|
|
328
|
+
puts opts
|
|
329
|
+
exit 0
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
remaining = parser.parse(args)
|
|
334
|
+
options[:json_path] = remaining.first
|
|
335
|
+
options
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Zeitwerk expects cli.rb to define Cli, but we use CLI (conventional for command-line interfaces)
|
|
340
|
+
Cli = CLI
|
|
341
|
+
end
|
|
342
|
+
end
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
# Tracks the state of a conversation during test execution
|
|
4
|
+
# Maintains messages, turns, and topic history
|
|
5
|
+
class Conversation
|
|
6
|
+
attr_reader :messages, :turns, :topic_history, :evaluation_results
|
|
7
|
+
|
|
8
|
+
# @param event_bus [EventBus, nil] Optional event bus for emitting events
|
|
9
|
+
def initialize(event_bus: nil)
|
|
10
|
+
@event_bus = event_bus
|
|
11
|
+
@example_id = Thread.current[:rspec_agents_example_id]
|
|
12
|
+
@messages = []
|
|
13
|
+
@turns = []
|
|
14
|
+
@topic_history = [] # Array of { topic: Symbol, turns: Array<Turn> }
|
|
15
|
+
@current_topic = nil
|
|
16
|
+
@turns_per_topic = Hash.new(0)
|
|
17
|
+
@evaluation_results = [] # Array of EvaluationResult objects
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def conversation
|
|
21
|
+
self
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Add a user message to the conversation
|
|
25
|
+
#
|
|
26
|
+
# @param text [String] Message text
|
|
27
|
+
# @param metadata [Hash] Optional metadata
|
|
28
|
+
# @param source [Symbol] Message source (:simulator, :script, :unknown)
|
|
29
|
+
# @return [Message] The added message
|
|
30
|
+
def add_user_message(text, metadata: {}, source: :unknown)
|
|
31
|
+
message = Message.new(
|
|
32
|
+
role: :user,
|
|
33
|
+
content: text,
|
|
34
|
+
metadata: metadata
|
|
35
|
+
)
|
|
36
|
+
@messages << message
|
|
37
|
+
|
|
38
|
+
emit(Events::UserMessage.new(
|
|
39
|
+
example_id: @example_id,
|
|
40
|
+
turn_number: @turns.size + 1,
|
|
41
|
+
text: text,
|
|
42
|
+
source: source,
|
|
43
|
+
time: Time.now
|
|
44
|
+
))
|
|
45
|
+
|
|
46
|
+
message
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Add an agent response to the conversation
|
|
50
|
+
# Creates a turn with the last user message
|
|
51
|
+
#
|
|
52
|
+
# @param response [AgentResponse] The agent's response
|
|
53
|
+
# @return [Turn] The created turn
|
|
54
|
+
def add_agent_response(response)
|
|
55
|
+
message = Message.new(
|
|
56
|
+
role: :agent,
|
|
57
|
+
content: response.text,
|
|
58
|
+
tool_calls: response.tool_calls,
|
|
59
|
+
metadata: response.metadata.to_h
|
|
60
|
+
)
|
|
61
|
+
@messages << message
|
|
62
|
+
|
|
63
|
+
# Find the last user message to pair with this response
|
|
64
|
+
last_user_msg = @messages.reverse.find(&:user?)
|
|
65
|
+
last_user_message = last_user_msg&.content
|
|
66
|
+
|
|
67
|
+
# Create turn
|
|
68
|
+
turn = Turn.new(last_user_message, response, topic: @current_topic)
|
|
69
|
+
@turns << turn
|
|
70
|
+
|
|
71
|
+
# Update topic tracking
|
|
72
|
+
if @current_topic
|
|
73
|
+
@turns_per_topic[@current_topic] += 1
|
|
74
|
+
add_turn_to_topic_history(turn)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
emit(Events::AgentResponse.new(
|
|
78
|
+
example_id: @example_id,
|
|
79
|
+
turn_number: @turns.size,
|
|
80
|
+
text: response.text,
|
|
81
|
+
tool_calls: response.tool_calls.map { |tc| tc.respond_to?(:to_h) ? tc.to_h : tc },
|
|
82
|
+
metadata: response.metadata.respond_to?(:to_h) ? response.metadata.to_h : (response.metadata || {}),
|
|
83
|
+
time: Time.now
|
|
84
|
+
))
|
|
85
|
+
|
|
86
|
+
turn
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Set the current topic
|
|
90
|
+
#
|
|
91
|
+
# @param topic_name [Symbol] The topic name
|
|
92
|
+
# @param trigger [Symbol, nil] What triggered the topic change
|
|
93
|
+
def set_topic(topic_name, trigger: nil)
|
|
94
|
+
return if @current_topic == topic_name
|
|
95
|
+
|
|
96
|
+
previous = @current_topic
|
|
97
|
+
@current_topic = topic_name
|
|
98
|
+
|
|
99
|
+
# Start new topic history entry
|
|
100
|
+
@topic_history << { topic: topic_name, turns: [] }
|
|
101
|
+
|
|
102
|
+
emit(Events::TopicChanged.new(
|
|
103
|
+
example_id: @example_id,
|
|
104
|
+
turn_number: @turns.size,
|
|
105
|
+
from_topic: previous,
|
|
106
|
+
to_topic: topic_name,
|
|
107
|
+
trigger: trigger || :unknown,
|
|
108
|
+
time: Time.now
|
|
109
|
+
))
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Get the current topic
|
|
113
|
+
#
|
|
114
|
+
# @return [Symbol, nil]
|
|
115
|
+
attr_reader :current_topic
|
|
116
|
+
|
|
117
|
+
# Get the number of turns that have occurred in a specific topic
|
|
118
|
+
#
|
|
119
|
+
# @param topic_name [Symbol] The topic name
|
|
120
|
+
# @return [Integer]
|
|
121
|
+
def turns_in_topic(topic_name)
|
|
122
|
+
@turns_per_topic[topic_name.to_sym]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Get all turns that occurred in a specific topic
|
|
126
|
+
#
|
|
127
|
+
# @param topic_name [Symbol] The topic name
|
|
128
|
+
# @return [Array<Turn>]
|
|
129
|
+
def turns_for_topic(topic_name)
|
|
130
|
+
entry = @topic_history.find { |h| h[:topic] == topic_name }
|
|
131
|
+
entry ? entry[:turns] : []
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Get all tool calls across all turns
|
|
135
|
+
#
|
|
136
|
+
# @return [Array<ToolCall>]
|
|
137
|
+
def all_tool_calls
|
|
138
|
+
@turns.flat_map(&:tool_calls)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Check if a specific tool was called
|
|
142
|
+
#
|
|
143
|
+
# @param name [Symbol, String] Tool name
|
|
144
|
+
# @return [Boolean]
|
|
145
|
+
def has_tool_call?(name)
|
|
146
|
+
all_tool_calls.any? { |tc| tc.name == name.to_sym }
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Count how many times a tool was called
|
|
150
|
+
#
|
|
151
|
+
# @param name [Symbol, String] Tool name
|
|
152
|
+
# @return [Integer]
|
|
153
|
+
def called_tool?(name)
|
|
154
|
+
all_tool_calls.count { |tc| tc.name == name.to_sym }
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Find tool calls by name
|
|
158
|
+
#
|
|
159
|
+
# @param name [Symbol, String] Tool name
|
|
160
|
+
# @param params [Hash, nil] Optional parameter filter
|
|
161
|
+
# @return [Array<ToolCall>]
|
|
162
|
+
def find_tool_calls(name, params: nil)
|
|
163
|
+
calls = all_tool_calls.select { |tc| tc.name == name.to_sym }
|
|
164
|
+
return calls unless params
|
|
165
|
+
|
|
166
|
+
calls.select { |tc| tc.matches_params?(params) }
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Get the last agent response
|
|
170
|
+
#
|
|
171
|
+
# @return [AgentResponse, nil]
|
|
172
|
+
def last_agent_response
|
|
173
|
+
@turns.last&.agent_response
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Get the last user message
|
|
177
|
+
#
|
|
178
|
+
# @return [String, nil]
|
|
179
|
+
def last_user_message
|
|
180
|
+
@messages.reverse.find(&:user?)&.content
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Get the last turn
|
|
184
|
+
#
|
|
185
|
+
# @return [Turn, nil]
|
|
186
|
+
def last_turn
|
|
187
|
+
@turns.last
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Get the number of turns
|
|
191
|
+
#
|
|
192
|
+
# @return [Integer]
|
|
193
|
+
def turn_count
|
|
194
|
+
@turns.count
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Check if conversation is empty
|
|
198
|
+
#
|
|
199
|
+
# @return [Boolean]
|
|
200
|
+
def empty?
|
|
201
|
+
@messages.empty?
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Record an evaluation result (soft or hard)
|
|
205
|
+
#
|
|
206
|
+
# @param mode [Symbol] :soft or :hard
|
|
207
|
+
# @param type [Symbol] Type of assertion (:quality, :grounding, :tool_call, etc.)
|
|
208
|
+
# @param description [String] Human-readable description
|
|
209
|
+
# @param passed [Boolean] Whether evaluation passed
|
|
210
|
+
# @param failure_message [String, nil] Reason for failure
|
|
211
|
+
# @param metadata [Hash] Additional context
|
|
212
|
+
# @return [EvaluationResult] The recorded result
|
|
213
|
+
def record_evaluation(mode:, type:, description:, passed:, failure_message: nil, metadata: {})
|
|
214
|
+
turn_number = @turns.size
|
|
215
|
+
enriched_metadata = metadata.merge(
|
|
216
|
+
turn_number: turn_number,
|
|
217
|
+
topic: @current_topic
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
result = EvaluationResult.new(
|
|
221
|
+
mode: mode,
|
|
222
|
+
type: type,
|
|
223
|
+
description: description,
|
|
224
|
+
passed: passed,
|
|
225
|
+
failure_message: failure_message,
|
|
226
|
+
metadata: enriched_metadata
|
|
227
|
+
)
|
|
228
|
+
@evaluation_results << result
|
|
229
|
+
|
|
230
|
+
emit(Events::EvaluationRecorded.new(
|
|
231
|
+
example_id: @example_id,
|
|
232
|
+
turn_number: turn_number,
|
|
233
|
+
mode: mode,
|
|
234
|
+
type: type,
|
|
235
|
+
description: description,
|
|
236
|
+
passed: passed,
|
|
237
|
+
failure_message: failure_message,
|
|
238
|
+
metadata: metadata,
|
|
239
|
+
time: Time.now
|
|
240
|
+
))
|
|
241
|
+
|
|
242
|
+
result
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Get soft evaluation results only
|
|
246
|
+
#
|
|
247
|
+
# @return [Array<EvaluationResult>]
|
|
248
|
+
def soft_evaluations
|
|
249
|
+
@evaluation_results.select(&:soft?)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Get hard evaluation results only
|
|
253
|
+
#
|
|
254
|
+
# @return [Array<EvaluationResult>]
|
|
255
|
+
def hard_evaluations
|
|
256
|
+
@evaluation_results.select(&:hard?)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Format messages for sending to agent
|
|
260
|
+
# Returns messages in the format expected by agent adapters
|
|
261
|
+
# Note: role is converted to string for API compatibility
|
|
262
|
+
#
|
|
263
|
+
# @return [Array<Hash>]
|
|
264
|
+
def messages_for_agent
|
|
265
|
+
@messages.map do |m|
|
|
266
|
+
{ role: m.role.to_s, content: m.content }
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Reset the conversation state
|
|
271
|
+
def reset!
|
|
272
|
+
@messages.clear
|
|
273
|
+
@turns.clear
|
|
274
|
+
@topic_history.clear
|
|
275
|
+
@current_topic = nil
|
|
276
|
+
@turns_per_topic.clear
|
|
277
|
+
@evaluation_results.clear
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def to_h
|
|
281
|
+
{
|
|
282
|
+
messages: @messages.map(&:to_h),
|
|
283
|
+
turns: @turns.map(&:to_h),
|
|
284
|
+
topic_history: @topic_history,
|
|
285
|
+
current_topic: @current_topic,
|
|
286
|
+
evaluation_results: @evaluation_results.map(&:to_h)
|
|
287
|
+
}
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def inspect
|
|
291
|
+
"#<#{self.class.name} turns=#{@turns.count} messages=#{@messages.count}>"
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
private
|
|
295
|
+
|
|
296
|
+
def emit(event)
|
|
297
|
+
@event_bus&.publish(event)
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def add_turn_to_topic_history(turn)
|
|
301
|
+
return if @topic_history.empty?
|
|
302
|
+
|
|
303
|
+
current_entry = @topic_history.last
|
|
304
|
+
current_entry[:turns] << turn if current_entry[:topic] == @current_topic
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|