rspec-agents 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/bin/rspec-agents +24 -0
  3. data/lib/async_workers/channel_config.rb +34 -0
  4. data/lib/async_workers/doc/process_manager_design.md +512 -0
  5. data/lib/async_workers/errors.rb +21 -0
  6. data/lib/async_workers/managed_process.rb +284 -0
  7. data/lib/async_workers/output_stream.rb +86 -0
  8. data/lib/async_workers/rpc_channel.rb +159 -0
  9. data/lib/async_workers/transport/base.rb +57 -0
  10. data/lib/async_workers/transport/stdio_transport.rb +91 -0
  11. data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
  12. data/lib/async_workers/worker_group.rb +175 -0
  13. data/lib/async_workers.rb +17 -0
  14. data/lib/rspec/agents/agent_response.rb +61 -0
  15. data/lib/rspec/agents/agents/base.rb +123 -0
  16. data/lib/rspec/agents/cli.rb +342 -0
  17. data/lib/rspec/agents/conversation.rb +308 -0
  18. data/lib/rspec/agents/criterion.rb +237 -0
  19. data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
  20. data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
  21. data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
  22. data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
  23. data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
  24. data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
  25. data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
  26. data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
  27. data/lib/rspec/agents/doc/scenario_guide.md +289 -0
  28. data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
  29. data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
  30. data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
  31. data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
  32. data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
  33. data/lib/rspec/agents/dsl/test_context.rb +223 -0
  34. data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
  35. data/lib/rspec/agents/dsl.rb +398 -0
  36. data/lib/rspec/agents/evaluation_result.rb +44 -0
  37. data/lib/rspec/agents/event_bus.rb +78 -0
  38. data/lib/rspec/agents/events.rb +141 -0
  39. data/lib/rspec/agents/isolated_event_bus.rb +86 -0
  40. data/lib/rspec/agents/judge.rb +244 -0
  41. data/lib/rspec/agents/llm/anthropic.rb +143 -0
  42. data/lib/rspec/agents/llm/base.rb +64 -0
  43. data/lib/rspec/agents/llm/mock.rb +181 -0
  44. data/lib/rspec/agents/llm/response.rb +52 -0
  45. data/lib/rspec/agents/matchers.rb +554 -0
  46. data/lib/rspec/agents/message.rb +81 -0
  47. data/lib/rspec/agents/metadata.rb +120 -0
  48. data/lib/rspec/agents/observers/base.rb +70 -0
  49. data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
  50. data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
  51. data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
  52. data/lib/rspec/agents/parallel/controller.rb +284 -0
  53. data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
  54. data/lib/rspec/agents/parallel/partitioner.rb +31 -0
  55. data/lib/rspec/agents/parallel/run_result.rb +22 -0
  56. data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
  57. data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
  58. data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
  59. data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
  60. data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
  61. data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
  62. data/lib/rspec/agents/prompt_builders/base.rb +113 -0
  63. data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
  64. data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
  65. data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
  66. data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
  67. data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
  68. data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
  69. data/lib/rspec/agents/runners/headless_runner.rb +272 -0
  70. data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
  71. data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
  72. data/lib/rspec/agents/runners/user_simulator.rb +261 -0
  73. data/lib/rspec/agents/scenario.rb +133 -0
  74. data/lib/rspec/agents/scenario_loader.rb +145 -0
  75. data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
  76. data/lib/rspec/agents/serialization/extension.rb +199 -0
  77. data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
  78. data/lib/rspec/agents/serialization/presenters.rb +281 -0
  79. data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
  80. data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
  81. data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
  82. data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
  83. data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
  84. data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
  85. data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
  86. data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
  87. data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
  88. data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
  89. data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
  90. data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
  91. data/lib/rspec/agents/serialization.rb +374 -0
  92. data/lib/rspec/agents/simulator_config.rb +336 -0
  93. data/lib/rspec/agents/spec_executor.rb +494 -0
  94. data/lib/rspec/agents/stable_example_id.rb +147 -0
  95. data/lib/rspec/agents/templates/user_simulation.erb +9 -0
  96. data/lib/rspec/agents/tool_call.rb +53 -0
  97. data/lib/rspec/agents/topic.rb +307 -0
  98. data/lib/rspec/agents/topic_graph.rb +236 -0
  99. data/lib/rspec/agents/triggers.rb +122 -0
  100. data/lib/rspec/agents/turn.rb +63 -0
  101. data/lib/rspec/agents/turn_executor.rb +91 -0
  102. data/lib/rspec/agents/version.rb +7 -0
  103. data/lib/rspec/agents.rb +145 -0
  104. metadata +242 -0
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "socket"
4
+ require "open3"
5
+ require_relative "base"
6
+ require_relative "../errors"
7
+
8
+ module AsyncWorkers
9
+ module Transport
10
+ # Transport over Unix domain socket using socketpair.
11
+ # RPC messages go over the socket.
12
+ # Both stdout and stderr are available for log capture.
13
+ class UnixSocketTransport < Base
14
+ attr_reader :wait_thread
15
+
16
+ # @param command [Array<String>] Command to execute
17
+ # @param env [Hash] Environment variables
18
+ # @param chdir [String, nil] Working directory for the process
19
+ def initialize(command:, env: {}, chdir: nil)
20
+ @command = command
21
+ @env = env
22
+ @chdir = chdir
23
+ @closed = false
24
+ @parent_socket = nil
25
+ @child_socket = nil
26
+ @stdout = nil
27
+ @stderr = nil
28
+ @wait_thread = nil
29
+ end
30
+
31
+ # Spawn the process with socket pair for RPC.
32
+ # Child receives RPC_SOCKET_FD environment variable.
33
+ # @return [Integer] PID
34
+ def spawn
35
+ # Create socket pair - parent and child ends
36
+ @parent_socket, @child_socket = Socket.pair(:UNIX, :STREAM, 0)
37
+
38
+ child_fd = @child_socket.fileno
39
+
40
+ # Prepare environment with socket fd
41
+ spawn_env = @env.merge("RPC_SOCKET_FD" => child_fd.to_s)
42
+
43
+ # Use Open3.popen3 with extra spawn options to inherit the socket fd
44
+ spawn_opts = { child_fd => child_fd, close_others: false }
45
+ spawn_opts[:chdir] = @chdir if @chdir
46
+
47
+ stdin, @stdout, @stderr, @wait_thread = Open3.popen3(
48
+ spawn_env,
49
+ *@command,
50
+ **spawn_opts
51
+ )
52
+
53
+ # Close stdin since we use socket for RPC
54
+ stdin.close
55
+ # Close child end of socket in parent
56
+ @child_socket.close
57
+
58
+ @parent_socket.sync = true
59
+ @stdout.sync = true
60
+ @stderr.sync = true
61
+
62
+ @wait_thread.pid
63
+ end
64
+
65
+ def write_line(line)
66
+ raise ChannelClosedError, "Transport closed" if @closed
67
+ @parent_socket.puts(line)
68
+ @parent_socket.flush
69
+ end
70
+
71
+ def read_line
72
+ return nil if @closed
73
+ line = @parent_socket.gets
74
+ return nil if line.nil?
75
+ line.chomp
76
+ end
77
+
78
+ # @return [IO] stdout stream (available for logs in socket mode)
79
+ def stdout_reader
80
+ @stdout
81
+ end
82
+
83
+ def stderr_reader
84
+ @stderr
85
+ end
86
+
87
+ def close
88
+ return if @closed
89
+ @closed = true
90
+
91
+ @parent_socket&.close rescue nil
92
+ @stdout&.close rescue nil
93
+ @stderr&.close rescue nil
94
+ end
95
+
96
+ def closed?
97
+ @closed
98
+ end
99
+
100
+ def pid
101
+ @wait_thread&.pid
102
+ end
103
+
104
+ # Wait for the process to exit and return exit status.
105
+ # Uses Open3's wait_thread which handles process reaping.
106
+ # @return [Process::Status]
107
+ def wait_for_exit
108
+ @wait_thread&.value
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "async"
4
+ require_relative "errors"
5
+ require_relative "channel_config"
6
+ require_relative "managed_process"
7
+
8
+ module AsyncWorkers
9
+ # Coordinates multiple identical workers in a fan-out pattern.
10
+ # Provides fail-fast semantics: if any worker exits with non-zero status,
11
+ # all other workers are killed immediately.
12
+ #
13
+ # @example Fan-out work to multiple workers
14
+ # Async do |task|
15
+ # group = WorkerGroup.new(
16
+ # size: 4,
17
+ # command: ['ruby', 'worker.rb'],
18
+ # rpc: ChannelConfig.stdio_rpc
19
+ # )
20
+ #
21
+ # group.start(task: task)
22
+ #
23
+ # # Set up handlers
24
+ # group.each_with_index do |worker, i|
25
+ # worker.stderr.on_data { |line| puts "[worker-#{i}] #{line}" }
26
+ # end
27
+ #
28
+ # # Fan-out work
29
+ # results = group.map do |worker|
30
+ # worker.rpc.request({ action: 'process', data: '...' })
31
+ # end
32
+ #
33
+ # group.stop
34
+ # end
35
+ #
36
+ class WorkerGroup
37
+ include Enumerable
38
+
39
+ # @return [Array<ManagedProcess>] All workers
40
+ attr_reader :workers
41
+ alias_method :to_a, :workers
42
+
43
+ # @return [Integer] Number of workers
44
+ attr_reader :size
45
+
46
+ # @return [WorkerFailure, nil] First failure encountered
47
+ attr_reader :failure
48
+
49
+ # @param size [Integer] Number of workers to spawn
50
+ # @param command [Array<String>] Command to execute for each worker
51
+ # @param env [Hash] Base environment variables (WORKER_INDEX added automatically)
52
+ # @param rpc [ChannelConfig] RPC configuration
53
+ def initialize(size:, command:, env: {}, rpc: ChannelConfig.no_rpc)
54
+ @size = size
55
+ @command = command
56
+ @base_env = env
57
+ @rpc_config = rpc
58
+ @stopping = false
59
+ @failure = nil
60
+ @failure_condition = nil
61
+ @failure_mutex = Mutex.new
62
+
63
+ @workers = size.times.map do |i|
64
+ ManagedProcess.new(
65
+ command: command,
66
+ env: env.merge("WORKER_INDEX" => i.to_s),
67
+ rpc: rpc
68
+ )
69
+ end
70
+ end
71
+
72
+ # Spawn all workers.
73
+ # @param task [Async::Task] Parent async task
74
+ def start(task:)
75
+ @failure_condition = Async::Condition.new
76
+
77
+ @workers.each_with_index do |worker, i|
78
+ worker.on_exit do |status|
79
+ handle_worker_exit(i, status) unless @stopping
80
+ end
81
+ worker.start(task: task)
82
+ end
83
+ end
84
+
85
+ # Access worker by index.
86
+ # @param index [Integer] Worker index
87
+ # @return [ManagedProcess]
88
+ def [](index)
89
+ @workers[index]
90
+ end
91
+
92
+ # Iterate over workers.
93
+ # @yield [ManagedProcess] Each worker
94
+ def each(&block)
95
+ @workers.each(&block)
96
+ end
97
+
98
+ # Graceful shutdown of all workers (parallel).
99
+ # @param timeout [Numeric] Timeout per worker
100
+ def stop(timeout: 5)
101
+ @stopping = true
102
+
103
+ # Stop all workers - if we're in an Async context, run in parallel
104
+ if Async::Task.current?
105
+ tasks = @workers.map do |worker|
106
+ Async::Task.current.async { worker.stop(timeout: timeout) }
107
+ end
108
+ tasks.each(&:wait)
109
+ else
110
+ # Not in async context, stop sequentially
111
+ @workers.each { |worker| worker.stop(timeout: timeout) }
112
+ end
113
+ end
114
+
115
+ # Immediate kill of all workers.
116
+ def kill
117
+ @stopping = true
118
+ @workers.each(&:kill)
119
+ end
120
+
121
+ # Block until all workers exit.
122
+ # @param timeout [Numeric, nil] Optional timeout in seconds
123
+ # @return [Array<Process::Status>] Exit statuses of all workers
124
+ # @raise [Async::TimeoutError] If timeout exceeded
125
+ def wait(timeout: nil)
126
+ if timeout
127
+ Async::Task.current.with_timeout(timeout) do
128
+ @workers.map(&:wait)
129
+ end
130
+ else
131
+ @workers.map(&:wait)
132
+ end
133
+ end
134
+
135
+ # Check if all workers are running.
136
+ # @return [Boolean]
137
+ def alive?
138
+ @workers.all?(&:alive?)
139
+ end
140
+
141
+ # Check if any worker has failed.
142
+ # @return [Boolean]
143
+ def failed?
144
+ !@failure.nil?
145
+ end
146
+
147
+ # Block until a worker fails.
148
+ # @return [WorkerFailure] The failure exception
149
+ def wait_for_failure
150
+ @failure_condition.wait
151
+ end
152
+
153
+ private
154
+
155
+ def handle_worker_exit(index, status)
156
+ return if status.nil? || status.success?
157
+
158
+ # Use mutex to ensure only first failure is recorded
159
+ first_failure = @failure_mutex.synchronize do
160
+ return if @failure # Already handling a failure
161
+ @failure = WorkerFailure.new(worker_index: index, exit_status: status)
162
+ end
163
+
164
+ return unless first_failure
165
+
166
+ @failure_condition.signal(@failure)
167
+
168
+ # Kill all other workers
169
+ @stopping = true
170
+ @workers.each_with_index do |worker, i|
171
+ worker.kill if i != index && worker.alive?
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "async"
4
+
5
+ require_relative "async_workers/errors"
6
+ require_relative "async_workers/channel_config"
7
+ require_relative "async_workers/output_stream"
8
+ require_relative "async_workers/transport/base"
9
+ require_relative "async_workers/transport/stdio_transport"
10
+ require_relative "async_workers/transport/unix_socket_transport"
11
+ require_relative "async_workers/rpc_channel"
12
+ require_relative "async_workers/managed_process"
13
+ require_relative "async_workers/worker_group"
14
+
15
+ module AsyncWorkers
16
+ VERSION = "0.1.0"
17
+ end
@@ -0,0 +1,61 @@
1
+ module RSpec
2
+ module Agents
3
+ # Represents a response from the agent under test
4
+ class AgentResponse
5
+ attr_reader :text, :tool_calls, :metadata
6
+
7
+ # @param text [String] The agent's response text
8
+ # @param tool_calls [Array<ToolCall>] Tool calls made during this response
9
+ # @param metadata [Metadata] Optional provider-specific data
10
+ def initialize(text:, tool_calls: [], metadata: Metadata.new)
11
+ @text = text
12
+ @tool_calls = tool_calls
13
+ @metadata = metadata
14
+ end
15
+
16
+ def has_tool_call?(name, params: nil)
17
+ @tool_calls.any? do |tc|
18
+ tc.name == name.to_sym && tc.matches_params?(params)
19
+ end
20
+ end
21
+
22
+ def find_tool_calls(name, params: nil)
23
+ @tool_calls.select do |tc|
24
+ tc.name == name.to_sym && tc.matches_params?(params)
25
+ end
26
+ end
27
+
28
+ def tool_call(name)
29
+ @tool_calls.find { |tc| tc.name == name.to_sym }
30
+ end
31
+
32
+ def empty?
33
+ @text.nil? || @text.empty?
34
+ end
35
+
36
+ def length
37
+ @text&.length || 0
38
+ end
39
+
40
+ def match?(pattern)
41
+ pattern.match?(@text.to_s)
42
+ end
43
+
44
+ def include?(substring)
45
+ @text.to_s.include?(substring)
46
+ end
47
+
48
+ def to_h
49
+ {
50
+ text: @text,
51
+ tool_calls: @tool_calls.map(&:to_h),
52
+ metadata: @metadata.to_h
53
+ }
54
+ end
55
+
56
+ def to_s
57
+ @text.to_s
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,123 @@
1
+ module RSpec
2
+ module Agents
3
+ module Agents
4
+ # Base class for agent adapters
5
+ # Agent adapters handle communication with the chatbot being tested
6
+ #
7
+ # Each test execution receives a fresh agent instance, allowing per-test
8
+ # configuration and state isolation.
9
+ #
10
+ # @example Implementing a custom agent
11
+ # class MyHttpAgent < RSpec::Agents::Agents::Base
12
+ # def self.build(context = {})
13
+ # new(
14
+ # base_url: ENV["AGENT_URL"],
15
+ # api_key: ENV["AGENT_API_KEY"],
16
+ # context: context
17
+ # )
18
+ # end
19
+ #
20
+ # def initialize(base_url:, api_key:, context: {})
21
+ # super(context: context)
22
+ # @base_url = base_url
23
+ # @api_key = api_key
24
+ # end
25
+ #
26
+ # def chat(messages, on_tool_call: nil)
27
+ # response = HTTParty.post("#{@base_url}/chat", ...)
28
+ # tool_calls = parse_tool_calls(response["tool_calls"])
29
+ #
30
+ # # Signal each tool call via callback if provided
31
+ # tool_calls.each { |tc| on_tool_call&.call(tc) }
32
+ #
33
+ # AgentResponse.new(
34
+ # text: response["content"],
35
+ # tool_calls: tool_calls,
36
+ # metadata: { latency_ms: elapsed }
37
+ # )
38
+ # end
39
+ # end
40
+ class Base
41
+ # Factory method called by the framework for each test
42
+ # Override this in subclasses to customize instantiation
43
+ #
44
+ # @param context [Hash] Test execution context containing:
45
+ # - :test_name [String] Full RSpec example description
46
+ # - :test_file [String] Source file path
47
+ # - :test_line [Integer] Line number of the test
48
+ # - :tags [Hash] RSpec metadata tags (:focus, :slow, etc.)
49
+ # - :scenario [String] Scenario name if using external scenario files
50
+ # @return [Base] Agent instance
51
+ def self.build(context = {})
52
+ new(context: context)
53
+ end
54
+
55
+ # @param context [Hash] Test execution context
56
+ def initialize(context: {})
57
+ @context = context
58
+ end
59
+
60
+ # Send messages and receive a response
61
+ # This is the main method that subclasses must implement
62
+ #
63
+ # @param messages [Array<Hash, Message>] Conversation history
64
+ # Each message has :role ("user" or "agent") and :content
65
+ # @param on_tool_call [Proc, nil] Optional callback invoked for each tool call
66
+ # Callback receives a ToolCall object as argument
67
+ # @return [AgentResponse] The agent's response
68
+ def chat(messages, on_tool_call: nil)
69
+ raise NotImplementedError, "#{self.class} must implement #chat(messages, on_tool_call: nil)"
70
+ end
71
+
72
+ # Reset conversation state (for stateful agents)
73
+ # Override in subclasses that maintain internal state
74
+ def reset!
75
+ # Default no-op
76
+ end
77
+
78
+ # Wrap test execution for isolation (e.g., database transactions)
79
+ # Override in subclasses to provide custom wrapping behavior
80
+ #
81
+ # @yield The test block to execute
82
+ # @return [Object] The result of the block
83
+ #
84
+ # @example Wrapping in a database transaction
85
+ # def around(&block)
86
+ # ActiveRecord::Base.transaction(requires_new: true) do
87
+ # block.call
88
+ # raise ActiveRecord::Rollback
89
+ # end
90
+ # end
91
+ def around(&block)
92
+ block.call # Default: no-op wrapping
93
+ end
94
+
95
+ # Agent metadata for reporting
96
+ # Override to provide useful debugging information
97
+ #
98
+ # @return [Metadata]
99
+ def metadata
100
+ Metadata.new
101
+ end
102
+
103
+ protected
104
+
105
+ attr_reader :context
106
+
107
+ # Helper to convert messages to a standard format
108
+ # @param messages [Array] Messages in various formats
109
+ # @return [Array<Hash>] Normalized messages
110
+ def normalize_messages(messages)
111
+ messages.map do |msg|
112
+ case msg
113
+ when Hash
114
+ { role: msg[:role] || msg["role"], content: msg[:content] || msg["content"] }
115
+ else
116
+ { role: msg.role, content: msg.content }
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end