rspec-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/rspec-agents +24 -0
- data/lib/async_workers/channel_config.rb +34 -0
- data/lib/async_workers/doc/process_manager_design.md +512 -0
- data/lib/async_workers/errors.rb +21 -0
- data/lib/async_workers/managed_process.rb +284 -0
- data/lib/async_workers/output_stream.rb +86 -0
- data/lib/async_workers/rpc_channel.rb +159 -0
- data/lib/async_workers/transport/base.rb +57 -0
- data/lib/async_workers/transport/stdio_transport.rb +91 -0
- data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
- data/lib/async_workers/worker_group.rb +175 -0
- data/lib/async_workers.rb +17 -0
- data/lib/rspec/agents/agent_response.rb +61 -0
- data/lib/rspec/agents/agents/base.rb +123 -0
- data/lib/rspec/agents/cli.rb +342 -0
- data/lib/rspec/agents/conversation.rb +308 -0
- data/lib/rspec/agents/criterion.rb +237 -0
- data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
- data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
- data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
- data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
- data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
- data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
- data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
- data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
- data/lib/rspec/agents/doc/scenario_guide.md +289 -0
- data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
- data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
- data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
- data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
- data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
- data/lib/rspec/agents/dsl/test_context.rb +223 -0
- data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
- data/lib/rspec/agents/dsl.rb +398 -0
- data/lib/rspec/agents/evaluation_result.rb +44 -0
- data/lib/rspec/agents/event_bus.rb +78 -0
- data/lib/rspec/agents/events.rb +141 -0
- data/lib/rspec/agents/isolated_event_bus.rb +86 -0
- data/lib/rspec/agents/judge.rb +244 -0
- data/lib/rspec/agents/llm/anthropic.rb +143 -0
- data/lib/rspec/agents/llm/base.rb +64 -0
- data/lib/rspec/agents/llm/mock.rb +181 -0
- data/lib/rspec/agents/llm/response.rb +52 -0
- data/lib/rspec/agents/matchers.rb +554 -0
- data/lib/rspec/agents/message.rb +81 -0
- data/lib/rspec/agents/metadata.rb +120 -0
- data/lib/rspec/agents/observers/base.rb +70 -0
- data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
- data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
- data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
- data/lib/rspec/agents/parallel/controller.rb +284 -0
- data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
- data/lib/rspec/agents/parallel/partitioner.rb +31 -0
- data/lib/rspec/agents/parallel/run_result.rb +22 -0
- data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
- data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
- data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
- data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
- data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
- data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
- data/lib/rspec/agents/prompt_builders/base.rb +113 -0
- data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
- data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
- data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
- data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
- data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
- data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
- data/lib/rspec/agents/runners/headless_runner.rb +272 -0
- data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
- data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
- data/lib/rspec/agents/runners/user_simulator.rb +261 -0
- data/lib/rspec/agents/scenario.rb +133 -0
- data/lib/rspec/agents/scenario_loader.rb +145 -0
- data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
- data/lib/rspec/agents/serialization/extension.rb +199 -0
- data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
- data/lib/rspec/agents/serialization/presenters.rb +281 -0
- data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
- data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
- data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
- data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
- data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
- data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
- data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
- data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
- data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
- data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
- data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
- data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
- data/lib/rspec/agents/serialization.rb +374 -0
- data/lib/rspec/agents/simulator_config.rb +336 -0
- data/lib/rspec/agents/spec_executor.rb +494 -0
- data/lib/rspec/agents/stable_example_id.rb +147 -0
- data/lib/rspec/agents/templates/user_simulation.erb +9 -0
- data/lib/rspec/agents/tool_call.rb +53 -0
- data/lib/rspec/agents/topic.rb +307 -0
- data/lib/rspec/agents/topic_graph.rb +236 -0
- data/lib/rspec/agents/triggers.rb +122 -0
- data/lib/rspec/agents/turn.rb +63 -0
- data/lib/rspec/agents/turn_executor.rb +91 -0
- data/lib/rspec/agents/version.rb +7 -0
- data/lib/rspec/agents.rb +145 -0
- metadata +242 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "socket"
|
|
4
|
+
require "open3"
|
|
5
|
+
require_relative "base"
|
|
6
|
+
require_relative "../errors"
|
|
7
|
+
|
|
8
|
+
module AsyncWorkers
|
|
9
|
+
module Transport
|
|
10
|
+
# Transport over Unix domain socket using socketpair.
|
|
11
|
+
# RPC messages go over the socket.
|
|
12
|
+
# Both stdout and stderr are available for log capture.
|
|
13
|
+
class UnixSocketTransport < Base
|
|
14
|
+
attr_reader :wait_thread
|
|
15
|
+
|
|
16
|
+
# @param command [Array<String>] Command to execute
|
|
17
|
+
# @param env [Hash] Environment variables
|
|
18
|
+
# @param chdir [String, nil] Working directory for the process
|
|
19
|
+
def initialize(command:, env: {}, chdir: nil)
|
|
20
|
+
@command = command
|
|
21
|
+
@env = env
|
|
22
|
+
@chdir = chdir
|
|
23
|
+
@closed = false
|
|
24
|
+
@parent_socket = nil
|
|
25
|
+
@child_socket = nil
|
|
26
|
+
@stdout = nil
|
|
27
|
+
@stderr = nil
|
|
28
|
+
@wait_thread = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Spawn the process with socket pair for RPC.
|
|
32
|
+
# Child receives RPC_SOCKET_FD environment variable.
|
|
33
|
+
# @return [Integer] PID
|
|
34
|
+
def spawn
|
|
35
|
+
# Create socket pair - parent and child ends
|
|
36
|
+
@parent_socket, @child_socket = Socket.pair(:UNIX, :STREAM, 0)
|
|
37
|
+
|
|
38
|
+
child_fd = @child_socket.fileno
|
|
39
|
+
|
|
40
|
+
# Prepare environment with socket fd
|
|
41
|
+
spawn_env = @env.merge("RPC_SOCKET_FD" => child_fd.to_s)
|
|
42
|
+
|
|
43
|
+
# Use Open3.popen3 with extra spawn options to inherit the socket fd
|
|
44
|
+
spawn_opts = { child_fd => child_fd, close_others: false }
|
|
45
|
+
spawn_opts[:chdir] = @chdir if @chdir
|
|
46
|
+
|
|
47
|
+
stdin, @stdout, @stderr, @wait_thread = Open3.popen3(
|
|
48
|
+
spawn_env,
|
|
49
|
+
*@command,
|
|
50
|
+
**spawn_opts
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Close stdin since we use socket for RPC
|
|
54
|
+
stdin.close
|
|
55
|
+
# Close child end of socket in parent
|
|
56
|
+
@child_socket.close
|
|
57
|
+
|
|
58
|
+
@parent_socket.sync = true
|
|
59
|
+
@stdout.sync = true
|
|
60
|
+
@stderr.sync = true
|
|
61
|
+
|
|
62
|
+
@wait_thread.pid
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def write_line(line)
|
|
66
|
+
raise ChannelClosedError, "Transport closed" if @closed
|
|
67
|
+
@parent_socket.puts(line)
|
|
68
|
+
@parent_socket.flush
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def read_line
|
|
72
|
+
return nil if @closed
|
|
73
|
+
line = @parent_socket.gets
|
|
74
|
+
return nil if line.nil?
|
|
75
|
+
line.chomp
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# @return [IO] stdout stream (available for logs in socket mode)
|
|
79
|
+
def stdout_reader
|
|
80
|
+
@stdout
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def stderr_reader
|
|
84
|
+
@stderr
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def close
|
|
88
|
+
return if @closed
|
|
89
|
+
@closed = true
|
|
90
|
+
|
|
91
|
+
@parent_socket&.close rescue nil
|
|
92
|
+
@stdout&.close rescue nil
|
|
93
|
+
@stderr&.close rescue nil
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def closed?
|
|
97
|
+
@closed
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def pid
|
|
101
|
+
@wait_thread&.pid
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Wait for the process to exit and return exit status.
|
|
105
|
+
# Uses Open3's wait_thread which handles process reaping.
|
|
106
|
+
# @return [Process::Status]
|
|
107
|
+
def wait_for_exit
|
|
108
|
+
@wait_thread&.value
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "async"
|
|
4
|
+
require_relative "errors"
|
|
5
|
+
require_relative "channel_config"
|
|
6
|
+
require_relative "managed_process"
|
|
7
|
+
|
|
8
|
+
module AsyncWorkers
|
|
9
|
+
# Coordinates multiple identical workers in a fan-out pattern.
|
|
10
|
+
# Provides fail-fast semantics: if any worker exits with non-zero status,
|
|
11
|
+
# all other workers are killed immediately.
|
|
12
|
+
#
|
|
13
|
+
# @example Fan-out work to multiple workers
|
|
14
|
+
# Async do |task|
|
|
15
|
+
# group = WorkerGroup.new(
|
|
16
|
+
# size: 4,
|
|
17
|
+
# command: ['ruby', 'worker.rb'],
|
|
18
|
+
# rpc: ChannelConfig.stdio_rpc
|
|
19
|
+
# )
|
|
20
|
+
#
|
|
21
|
+
# group.start(task: task)
|
|
22
|
+
#
|
|
23
|
+
# # Set up handlers
|
|
24
|
+
# group.each_with_index do |worker, i|
|
|
25
|
+
# worker.stderr.on_data { |line| puts "[worker-#{i}] #{line}" }
|
|
26
|
+
# end
|
|
27
|
+
#
|
|
28
|
+
# # Fan-out work
|
|
29
|
+
# results = group.map do |worker|
|
|
30
|
+
# worker.rpc.request({ action: 'process', data: '...' })
|
|
31
|
+
# end
|
|
32
|
+
#
|
|
33
|
+
# group.stop
|
|
34
|
+
# end
|
|
35
|
+
#
|
|
36
|
+
class WorkerGroup
|
|
37
|
+
include Enumerable
|
|
38
|
+
|
|
39
|
+
# @return [Array<ManagedProcess>] All workers
|
|
40
|
+
attr_reader :workers
|
|
41
|
+
alias_method :to_a, :workers
|
|
42
|
+
|
|
43
|
+
# @return [Integer] Number of workers
|
|
44
|
+
attr_reader :size
|
|
45
|
+
|
|
46
|
+
# @return [WorkerFailure, nil] First failure encountered
|
|
47
|
+
attr_reader :failure
|
|
48
|
+
|
|
49
|
+
# @param size [Integer] Number of workers to spawn
|
|
50
|
+
# @param command [Array<String>] Command to execute for each worker
|
|
51
|
+
# @param env [Hash] Base environment variables (WORKER_INDEX added automatically)
|
|
52
|
+
# @param rpc [ChannelConfig] RPC configuration
|
|
53
|
+
def initialize(size:, command:, env: {}, rpc: ChannelConfig.no_rpc)
|
|
54
|
+
@size = size
|
|
55
|
+
@command = command
|
|
56
|
+
@base_env = env
|
|
57
|
+
@rpc_config = rpc
|
|
58
|
+
@stopping = false
|
|
59
|
+
@failure = nil
|
|
60
|
+
@failure_condition = nil
|
|
61
|
+
@failure_mutex = Mutex.new
|
|
62
|
+
|
|
63
|
+
@workers = size.times.map do |i|
|
|
64
|
+
ManagedProcess.new(
|
|
65
|
+
command: command,
|
|
66
|
+
env: env.merge("WORKER_INDEX" => i.to_s),
|
|
67
|
+
rpc: rpc
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Spawn all workers.
|
|
73
|
+
# @param task [Async::Task] Parent async task
|
|
74
|
+
def start(task:)
|
|
75
|
+
@failure_condition = Async::Condition.new
|
|
76
|
+
|
|
77
|
+
@workers.each_with_index do |worker, i|
|
|
78
|
+
worker.on_exit do |status|
|
|
79
|
+
handle_worker_exit(i, status) unless @stopping
|
|
80
|
+
end
|
|
81
|
+
worker.start(task: task)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Access worker by index.
|
|
86
|
+
# @param index [Integer] Worker index
|
|
87
|
+
# @return [ManagedProcess]
|
|
88
|
+
def [](index)
|
|
89
|
+
@workers[index]
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Iterate over workers.
|
|
93
|
+
# @yield [ManagedProcess] Each worker
|
|
94
|
+
def each(&block)
|
|
95
|
+
@workers.each(&block)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Graceful shutdown of all workers (parallel).
|
|
99
|
+
# @param timeout [Numeric] Timeout per worker
|
|
100
|
+
def stop(timeout: 5)
|
|
101
|
+
@stopping = true
|
|
102
|
+
|
|
103
|
+
# Stop all workers - if we're in an Async context, run in parallel
|
|
104
|
+
if Async::Task.current?
|
|
105
|
+
tasks = @workers.map do |worker|
|
|
106
|
+
Async::Task.current.async { worker.stop(timeout: timeout) }
|
|
107
|
+
end
|
|
108
|
+
tasks.each(&:wait)
|
|
109
|
+
else
|
|
110
|
+
# Not in async context, stop sequentially
|
|
111
|
+
@workers.each { |worker| worker.stop(timeout: timeout) }
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Immediate kill of all workers.
|
|
116
|
+
def kill
|
|
117
|
+
@stopping = true
|
|
118
|
+
@workers.each(&:kill)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Block until all workers exit.
|
|
122
|
+
# @param timeout [Numeric, nil] Optional timeout in seconds
|
|
123
|
+
# @return [Array<Process::Status>] Exit statuses of all workers
|
|
124
|
+
# @raise [Async::TimeoutError] If timeout exceeded
|
|
125
|
+
def wait(timeout: nil)
|
|
126
|
+
if timeout
|
|
127
|
+
Async::Task.current.with_timeout(timeout) do
|
|
128
|
+
@workers.map(&:wait)
|
|
129
|
+
end
|
|
130
|
+
else
|
|
131
|
+
@workers.map(&:wait)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Check if all workers are running.
|
|
136
|
+
# @return [Boolean]
|
|
137
|
+
def alive?
|
|
138
|
+
@workers.all?(&:alive?)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Check if any worker has failed.
|
|
142
|
+
# @return [Boolean]
|
|
143
|
+
def failed?
|
|
144
|
+
!@failure.nil?
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Block until a worker fails.
|
|
148
|
+
# @return [WorkerFailure] The failure exception
|
|
149
|
+
def wait_for_failure
|
|
150
|
+
@failure_condition.wait
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
private
|
|
154
|
+
|
|
155
|
+
def handle_worker_exit(index, status)
|
|
156
|
+
return if status.nil? || status.success?
|
|
157
|
+
|
|
158
|
+
# Use mutex to ensure only first failure is recorded
|
|
159
|
+
first_failure = @failure_mutex.synchronize do
|
|
160
|
+
return if @failure # Already handling a failure
|
|
161
|
+
@failure = WorkerFailure.new(worker_index: index, exit_status: status)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
return unless first_failure
|
|
165
|
+
|
|
166
|
+
@failure_condition.signal(@failure)
|
|
167
|
+
|
|
168
|
+
# Kill all other workers
|
|
169
|
+
@stopping = true
|
|
170
|
+
@workers.each_with_index do |worker, i|
|
|
171
|
+
worker.kill if i != index && worker.alive?
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "async"
|
|
4
|
+
|
|
5
|
+
require_relative "async_workers/errors"
|
|
6
|
+
require_relative "async_workers/channel_config"
|
|
7
|
+
require_relative "async_workers/output_stream"
|
|
8
|
+
require_relative "async_workers/transport/base"
|
|
9
|
+
require_relative "async_workers/transport/stdio_transport"
|
|
10
|
+
require_relative "async_workers/transport/unix_socket_transport"
|
|
11
|
+
require_relative "async_workers/rpc_channel"
|
|
12
|
+
require_relative "async_workers/managed_process"
|
|
13
|
+
require_relative "async_workers/worker_group"
|
|
14
|
+
|
|
15
|
+
module AsyncWorkers
|
|
16
|
+
VERSION = "0.1.0"
|
|
17
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
# Represents a response from the agent under test
|
|
4
|
+
class AgentResponse
|
|
5
|
+
attr_reader :text, :tool_calls, :metadata
|
|
6
|
+
|
|
7
|
+
# @param text [String] The agent's response text
|
|
8
|
+
# @param tool_calls [Array<ToolCall>] Tool calls made during this response
|
|
9
|
+
# @param metadata [Metadata] Optional provider-specific data
|
|
10
|
+
def initialize(text:, tool_calls: [], metadata: Metadata.new)
|
|
11
|
+
@text = text
|
|
12
|
+
@tool_calls = tool_calls
|
|
13
|
+
@metadata = metadata
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def has_tool_call?(name, params: nil)
|
|
17
|
+
@tool_calls.any? do |tc|
|
|
18
|
+
tc.name == name.to_sym && tc.matches_params?(params)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def find_tool_calls(name, params: nil)
|
|
23
|
+
@tool_calls.select do |tc|
|
|
24
|
+
tc.name == name.to_sym && tc.matches_params?(params)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def tool_call(name)
|
|
29
|
+
@tool_calls.find { |tc| tc.name == name.to_sym }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def empty?
|
|
33
|
+
@text.nil? || @text.empty?
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def length
|
|
37
|
+
@text&.length || 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def match?(pattern)
|
|
41
|
+
pattern.match?(@text.to_s)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def include?(substring)
|
|
45
|
+
@text.to_s.include?(substring)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def to_h
|
|
49
|
+
{
|
|
50
|
+
text: @text,
|
|
51
|
+
tool_calls: @tool_calls.map(&:to_h),
|
|
52
|
+
metadata: @metadata.to_h
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def to_s
|
|
57
|
+
@text.to_s
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
module Agents
|
|
4
|
+
# Base class for agent adapters
|
|
5
|
+
# Agent adapters handle communication with the chatbot being tested
|
|
6
|
+
#
|
|
7
|
+
# Each test execution receives a fresh agent instance, allowing per-test
|
|
8
|
+
# configuration and state isolation.
|
|
9
|
+
#
|
|
10
|
+
# @example Implementing a custom agent
|
|
11
|
+
# class MyHttpAgent < RSpec::Agents::Agents::Base
|
|
12
|
+
# def self.build(context = {})
|
|
13
|
+
# new(
|
|
14
|
+
# base_url: ENV["AGENT_URL"],
|
|
15
|
+
# api_key: ENV["AGENT_API_KEY"],
|
|
16
|
+
# context: context
|
|
17
|
+
# )
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# def initialize(base_url:, api_key:, context: {})
|
|
21
|
+
# super(context: context)
|
|
22
|
+
# @base_url = base_url
|
|
23
|
+
# @api_key = api_key
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# def chat(messages, on_tool_call: nil)
|
|
27
|
+
# response = HTTParty.post("#{@base_url}/chat", ...)
|
|
28
|
+
# tool_calls = parse_tool_calls(response["tool_calls"])
|
|
29
|
+
#
|
|
30
|
+
# # Signal each tool call via callback if provided
|
|
31
|
+
# tool_calls.each { |tc| on_tool_call&.call(tc) }
|
|
32
|
+
#
|
|
33
|
+
# AgentResponse.new(
|
|
34
|
+
# text: response["content"],
|
|
35
|
+
# tool_calls: tool_calls,
|
|
36
|
+
# metadata: { latency_ms: elapsed }
|
|
37
|
+
# )
|
|
38
|
+
# end
|
|
39
|
+
# end
|
|
40
|
+
class Base
|
|
41
|
+
# Factory method called by the framework for each test
|
|
42
|
+
# Override this in subclasses to customize instantiation
|
|
43
|
+
#
|
|
44
|
+
# @param context [Hash] Test execution context containing:
|
|
45
|
+
# - :test_name [String] Full RSpec example description
|
|
46
|
+
# - :test_file [String] Source file path
|
|
47
|
+
# - :test_line [Integer] Line number of the test
|
|
48
|
+
# - :tags [Hash] RSpec metadata tags (:focus, :slow, etc.)
|
|
49
|
+
# - :scenario [String] Scenario name if using external scenario files
|
|
50
|
+
# @return [Base] Agent instance
|
|
51
|
+
def self.build(context = {})
|
|
52
|
+
new(context: context)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @param context [Hash] Test execution context
|
|
56
|
+
def initialize(context: {})
|
|
57
|
+
@context = context
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Send messages and receive a response
|
|
61
|
+
# This is the main method that subclasses must implement
|
|
62
|
+
#
|
|
63
|
+
# @param messages [Array<Hash, Message>] Conversation history
|
|
64
|
+
# Each message has :role ("user" or "agent") and :content
|
|
65
|
+
# @param on_tool_call [Proc, nil] Optional callback invoked for each tool call
|
|
66
|
+
# Callback receives a ToolCall object as argument
|
|
67
|
+
# @return [AgentResponse] The agent's response
|
|
68
|
+
def chat(messages, on_tool_call: nil)
|
|
69
|
+
raise NotImplementedError, "#{self.class} must implement #chat(messages, on_tool_call: nil)"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Reset conversation state (for stateful agents)
|
|
73
|
+
# Override in subclasses that maintain internal state
|
|
74
|
+
def reset!
|
|
75
|
+
# Default no-op
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Wrap test execution for isolation (e.g., database transactions)
|
|
79
|
+
# Override in subclasses to provide custom wrapping behavior
|
|
80
|
+
#
|
|
81
|
+
# @yield The test block to execute
|
|
82
|
+
# @return [Object] The result of the block
|
|
83
|
+
#
|
|
84
|
+
# @example Wrapping in a database transaction
|
|
85
|
+
# def around(&block)
|
|
86
|
+
# ActiveRecord::Base.transaction(requires_new: true) do
|
|
87
|
+
# block.call
|
|
88
|
+
# raise ActiveRecord::Rollback
|
|
89
|
+
# end
|
|
90
|
+
# end
|
|
91
|
+
def around(&block)
|
|
92
|
+
block.call # Default: no-op wrapping
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Agent metadata for reporting
|
|
96
|
+
# Override to provide useful debugging information
|
|
97
|
+
#
|
|
98
|
+
# @return [Metadata]
|
|
99
|
+
def metadata
|
|
100
|
+
Metadata.new
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
protected
|
|
104
|
+
|
|
105
|
+
attr_reader :context
|
|
106
|
+
|
|
107
|
+
# Helper to convert messages to a standard format
|
|
108
|
+
# @param messages [Array] Messages in various formats
|
|
109
|
+
# @return [Array<Hash>] Normalized messages
|
|
110
|
+
def normalize_messages(messages)
|
|
111
|
+
messages.map do |msg|
|
|
112
|
+
case msg
|
|
113
|
+
when Hash
|
|
114
|
+
{ role: msg[:role] || msg["role"], content: msg[:content] || msg["content"] }
|
|
115
|
+
else
|
|
116
|
+
{ role: msg.role, content: msg.content }
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|