rspec-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/rspec-agents +24 -0
- data/lib/async_workers/channel_config.rb +34 -0
- data/lib/async_workers/doc/process_manager_design.md +512 -0
- data/lib/async_workers/errors.rb +21 -0
- data/lib/async_workers/managed_process.rb +284 -0
- data/lib/async_workers/output_stream.rb +86 -0
- data/lib/async_workers/rpc_channel.rb +159 -0
- data/lib/async_workers/transport/base.rb +57 -0
- data/lib/async_workers/transport/stdio_transport.rb +91 -0
- data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
- data/lib/async_workers/worker_group.rb +175 -0
- data/lib/async_workers.rb +17 -0
- data/lib/rspec/agents/agent_response.rb +61 -0
- data/lib/rspec/agents/agents/base.rb +123 -0
- data/lib/rspec/agents/cli.rb +342 -0
- data/lib/rspec/agents/conversation.rb +308 -0
- data/lib/rspec/agents/criterion.rb +237 -0
- data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
- data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
- data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
- data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
- data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
- data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
- data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
- data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
- data/lib/rspec/agents/doc/scenario_guide.md +289 -0
- data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
- data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
- data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
- data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
- data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
- data/lib/rspec/agents/dsl/test_context.rb +223 -0
- data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
- data/lib/rspec/agents/dsl.rb +398 -0
- data/lib/rspec/agents/evaluation_result.rb +44 -0
- data/lib/rspec/agents/event_bus.rb +78 -0
- data/lib/rspec/agents/events.rb +141 -0
- data/lib/rspec/agents/isolated_event_bus.rb +86 -0
- data/lib/rspec/agents/judge.rb +244 -0
- data/lib/rspec/agents/llm/anthropic.rb +143 -0
- data/lib/rspec/agents/llm/base.rb +64 -0
- data/lib/rspec/agents/llm/mock.rb +181 -0
- data/lib/rspec/agents/llm/response.rb +52 -0
- data/lib/rspec/agents/matchers.rb +554 -0
- data/lib/rspec/agents/message.rb +81 -0
- data/lib/rspec/agents/metadata.rb +120 -0
- data/lib/rspec/agents/observers/base.rb +70 -0
- data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
- data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
- data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
- data/lib/rspec/agents/parallel/controller.rb +284 -0
- data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
- data/lib/rspec/agents/parallel/partitioner.rb +31 -0
- data/lib/rspec/agents/parallel/run_result.rb +22 -0
- data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
- data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
- data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
- data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
- data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
- data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
- data/lib/rspec/agents/prompt_builders/base.rb +113 -0
- data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
- data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
- data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
- data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
- data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
- data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
- data/lib/rspec/agents/runners/headless_runner.rb +272 -0
- data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
- data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
- data/lib/rspec/agents/runners/user_simulator.rb +261 -0
- data/lib/rspec/agents/scenario.rb +133 -0
- data/lib/rspec/agents/scenario_loader.rb +145 -0
- data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
- data/lib/rspec/agents/serialization/extension.rb +199 -0
- data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
- data/lib/rspec/agents/serialization/presenters.rb +281 -0
- data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
- data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
- data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
- data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
- data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
- data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
- data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
- data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
- data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
- data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
- data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
- data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
- data/lib/rspec/agents/serialization.rb +374 -0
- data/lib/rspec/agents/simulator_config.rb +336 -0
- data/lib/rspec/agents/spec_executor.rb +494 -0
- data/lib/rspec/agents/stable_example_id.rb +147 -0
- data/lib/rspec/agents/templates/user_simulation.erb +9 -0
- data/lib/rspec/agents/tool_call.rb +53 -0
- data/lib/rspec/agents/topic.rb +307 -0
- data/lib/rspec/agents/topic_graph.rb +236 -0
- data/lib/rspec/agents/triggers.rb +122 -0
- data/lib/rspec/agents/turn.rb +63 -0
- data/lib/rspec/agents/turn_executor.rb +91 -0
- data/lib/rspec/agents/version.rb +7 -0
- data/lib/rspec/agents.rb +145 -0
- metadata +242 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
module Llm
|
|
4
|
+
# Mock LLM adapter for deterministic testing
|
|
5
|
+
# Allows queuing responses and setting expected evaluation results
|
|
6
|
+
class Mock < Base
|
|
7
|
+
attr_reader :calls, :evaluation_results, :user_responses, :topic_classifications
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@calls = []
|
|
11
|
+
@evaluation_results = {}
|
|
12
|
+
@user_responses = []
|
|
13
|
+
@topic_classifications = []
|
|
14
|
+
@default_responses = {}
|
|
15
|
+
@response_index = 0
|
|
16
|
+
@topic_index = 0
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def complete(prompt, response_format: :text, max_tokens: 1024)
|
|
20
|
+
@calls << { prompt: prompt, response_format: response_format, max_tokens: max_tokens }
|
|
21
|
+
|
|
22
|
+
text = generate_response(prompt, response_format)
|
|
23
|
+
parsed = response_format == :json ? safe_parse(text) : nil
|
|
24
|
+
|
|
25
|
+
Response.new(
|
|
26
|
+
text: text,
|
|
27
|
+
parsed: parsed,
|
|
28
|
+
metadata: { model: "mock", latency_ms: 0 }
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def available?
|
|
33
|
+
true
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def model_info
|
|
37
|
+
"Mock LLM"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# ---- Configuration helpers ----
|
|
41
|
+
|
|
42
|
+
# Set expected evaluation result for a criterion
|
|
43
|
+
# @param criterion [Symbol, String] Criterion name
|
|
44
|
+
# @param satisfied [Boolean] Whether criterion is satisfied
|
|
45
|
+
# @param reasoning [String, nil] Optional reasoning
|
|
46
|
+
def set_evaluation(criterion, satisfied, reasoning = nil)
|
|
47
|
+
key = criterion.to_s.downcase
|
|
48
|
+
@evaluation_results[key] = {
|
|
49
|
+
satisfied: satisfied,
|
|
50
|
+
reasoning: reasoning || "Mock evaluation for #{criterion}"
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Queue a user simulation response
|
|
55
|
+
# @param response [String] The user message to return
|
|
56
|
+
def queue_user_response(response)
|
|
57
|
+
@user_responses << response
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Queue a topic classification result
|
|
61
|
+
# @param topic [Symbol] The topic to return
|
|
62
|
+
def queue_topic_classification(topic)
|
|
63
|
+
@topic_classifications << topic.to_sym
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Set a default response for a prompt pattern
|
|
67
|
+
# @param pattern [Regexp, String] Pattern to match in prompt
|
|
68
|
+
# @param response [String] Response to return
|
|
69
|
+
def set_default_response(pattern, response)
|
|
70
|
+
@default_responses[pattern] = response
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Reset all state
|
|
74
|
+
def reset!
|
|
75
|
+
@calls.clear
|
|
76
|
+
@evaluation_results.clear
|
|
77
|
+
@user_responses.clear
|
|
78
|
+
@topic_classifications.clear
|
|
79
|
+
@default_responses.clear
|
|
80
|
+
@response_index = 0
|
|
81
|
+
@topic_index = 0
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get the last prompt that was sent
|
|
85
|
+
def last_prompt
|
|
86
|
+
@calls.last&.dig(:prompt)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Get all prompts that were sent
|
|
90
|
+
def all_prompts
|
|
91
|
+
@calls.map { |c| c[:prompt] }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def generate_response(prompt, response_format)
|
|
97
|
+
# Check for criterion evaluation prompts
|
|
98
|
+
if prompt.include?("satisfied") || prompt.include?("criterion")
|
|
99
|
+
return handle_evaluation_prompt(prompt)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Check for user simulation prompts
|
|
103
|
+
if prompt.include?("Generate") && prompt.include?("user")
|
|
104
|
+
return handle_user_simulation_prompt
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Check for topic classification prompts
|
|
108
|
+
if prompt.include?("topic") && prompt.include?("classify")
|
|
109
|
+
return handle_topic_classification_prompt
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Check for grounding evaluation prompts
|
|
113
|
+
if prompt.include?("grounded") || prompt.include?("grounding")
|
|
114
|
+
return handle_grounding_prompt(prompt)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Check default responses
|
|
118
|
+
@default_responses.each do |pattern, response|
|
|
119
|
+
if pattern.is_a?(Regexp) ? pattern.match?(prompt) : prompt.include?(pattern.to_s)
|
|
120
|
+
return response
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Generic response
|
|
125
|
+
response_format == :json ? '{"result": "mock response"}' : "Mock response"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def handle_evaluation_prompt(prompt)
|
|
129
|
+
# Try to match a predefined evaluation result
|
|
130
|
+
@evaluation_results.each do |key, result|
|
|
131
|
+
if prompt.downcase.include?(key)
|
|
132
|
+
return {
|
|
133
|
+
"satisfied" => result[:satisfied],
|
|
134
|
+
"reasoning" => result[:reasoning]
|
|
135
|
+
}.to_json
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Default: satisfied
|
|
140
|
+
{
|
|
141
|
+
"satisfied" => true,
|
|
142
|
+
"reasoning" => "Mock adapter: automatically satisfied"
|
|
143
|
+
}.to_json
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def handle_user_simulation_prompt
|
|
147
|
+
if @response_index < @user_responses.length
|
|
148
|
+
response = @user_responses[@response_index]
|
|
149
|
+
@response_index += 1
|
|
150
|
+
response
|
|
151
|
+
else
|
|
152
|
+
"Mock user response #{@response_index + 1}"
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def handle_topic_classification_prompt
|
|
157
|
+
if @topic_index < @topic_classifications.length
|
|
158
|
+
topic = @topic_classifications[@topic_index]
|
|
159
|
+
@topic_index += 1
|
|
160
|
+
{ "topic" => topic.to_s }.to_json
|
|
161
|
+
else
|
|
162
|
+
{ "topic" => "unknown" }.to_json
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def handle_grounding_prompt(prompt)
|
|
167
|
+
{
|
|
168
|
+
"grounded" => true,
|
|
169
|
+
"violations" => []
|
|
170
|
+
}.to_json
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def safe_parse(text)
|
|
174
|
+
JSON.parse(text)
|
|
175
|
+
rescue JSON::ParserError
|
|
176
|
+
nil
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
module Llm
|
|
4
|
+
# Represents a response from an LLM completion
|
|
5
|
+
class Response
|
|
6
|
+
attr_reader :text, :parsed, :metadata
|
|
7
|
+
|
|
8
|
+
# @param text [String] Raw response text
|
|
9
|
+
# @param parsed [Hash, nil] Parsed JSON if response_format was :json
|
|
10
|
+
# @param metadata [Hash] Additional metadata (model, tokens, latency, etc.)
|
|
11
|
+
def initialize(text:, parsed: nil, metadata: {})
|
|
12
|
+
@text = text
|
|
13
|
+
@parsed = parsed
|
|
14
|
+
@metadata = metadata.is_a?(Metadata) ? metadata : Metadata.new(metadata)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Check if JSON parsing succeeded
|
|
18
|
+
#
|
|
19
|
+
# @return [Boolean]
|
|
20
|
+
def parsed?
|
|
21
|
+
!@parsed.nil?
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Check if there was a parse error
|
|
25
|
+
#
|
|
26
|
+
# @return [Boolean]
|
|
27
|
+
def parse_error?
|
|
28
|
+
@metadata[:parse_error].present?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get the parse error message if any
|
|
32
|
+
#
|
|
33
|
+
# @return [String, nil]
|
|
34
|
+
def parse_error
|
|
35
|
+
@metadata[:parse_error]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def to_h
|
|
39
|
+
{
|
|
40
|
+
text: @text,
|
|
41
|
+
parsed: @parsed,
|
|
42
|
+
metadata: @metadata.to_h
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def inspect
|
|
47
|
+
"#<#{self.class.name} text=#{@text&.length || 0} chars, parsed=#{parsed?}>"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|