rspec-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/rspec-agents +24 -0
- data/lib/async_workers/channel_config.rb +34 -0
- data/lib/async_workers/doc/process_manager_design.md +512 -0
- data/lib/async_workers/errors.rb +21 -0
- data/lib/async_workers/managed_process.rb +284 -0
- data/lib/async_workers/output_stream.rb +86 -0
- data/lib/async_workers/rpc_channel.rb +159 -0
- data/lib/async_workers/transport/base.rb +57 -0
- data/lib/async_workers/transport/stdio_transport.rb +91 -0
- data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
- data/lib/async_workers/worker_group.rb +175 -0
- data/lib/async_workers.rb +17 -0
- data/lib/rspec/agents/agent_response.rb +61 -0
- data/lib/rspec/agents/agents/base.rb +123 -0
- data/lib/rspec/agents/cli.rb +342 -0
- data/lib/rspec/agents/conversation.rb +308 -0
- data/lib/rspec/agents/criterion.rb +237 -0
- data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
- data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
- data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
- data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
- data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
- data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
- data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
- data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
- data/lib/rspec/agents/doc/scenario_guide.md +289 -0
- data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
- data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
- data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
- data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
- data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
- data/lib/rspec/agents/dsl/test_context.rb +223 -0
- data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
- data/lib/rspec/agents/dsl.rb +398 -0
- data/lib/rspec/agents/evaluation_result.rb +44 -0
- data/lib/rspec/agents/event_bus.rb +78 -0
- data/lib/rspec/agents/events.rb +141 -0
- data/lib/rspec/agents/isolated_event_bus.rb +86 -0
- data/lib/rspec/agents/judge.rb +244 -0
- data/lib/rspec/agents/llm/anthropic.rb +143 -0
- data/lib/rspec/agents/llm/base.rb +64 -0
- data/lib/rspec/agents/llm/mock.rb +181 -0
- data/lib/rspec/agents/llm/response.rb +52 -0
- data/lib/rspec/agents/matchers.rb +554 -0
- data/lib/rspec/agents/message.rb +81 -0
- data/lib/rspec/agents/metadata.rb +120 -0
- data/lib/rspec/agents/observers/base.rb +70 -0
- data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
- data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
- data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
- data/lib/rspec/agents/parallel/controller.rb +284 -0
- data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
- data/lib/rspec/agents/parallel/partitioner.rb +31 -0
- data/lib/rspec/agents/parallel/run_result.rb +22 -0
- data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
- data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
- data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
- data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
- data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
- data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
- data/lib/rspec/agents/prompt_builders/base.rb +113 -0
- data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
- data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
- data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
- data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
- data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
- data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
- data/lib/rspec/agents/runners/headless_runner.rb +272 -0
- data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
- data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
- data/lib/rspec/agents/runners/user_simulator.rb +261 -0
- data/lib/rspec/agents/scenario.rb +133 -0
- data/lib/rspec/agents/scenario_loader.rb +145 -0
- data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
- data/lib/rspec/agents/serialization/extension.rb +199 -0
- data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
- data/lib/rspec/agents/serialization/presenters.rb +281 -0
- data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
- data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
- data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
- data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
- data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
- data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
- data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
- data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
- data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
- data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
- data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
- data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
- data/lib/rspec/agents/serialization.rb +374 -0
- data/lib/rspec/agents/simulator_config.rb +336 -0
- data/lib/rspec/agents/spec_executor.rb +494 -0
- data/lib/rspec/agents/stable_example_id.rb +147 -0
- data/lib/rspec/agents/templates/user_simulation.erb +9 -0
- data/lib/rspec/agents/tool_call.rb +53 -0
- data/lib/rspec/agents/topic.rb +307 -0
- data/lib/rspec/agents/topic_graph.rb +236 -0
- data/lib/rspec/agents/triggers.rb +122 -0
- data/lib/rspec/agents/turn.rb +63 -0
- data/lib/rspec/agents/turn_executor.rb +91 -0
- data/lib/rspec/agents/version.rb +7 -0
- data/lib/rspec/agents.rb +145 -0
- metadata +242 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require "securerandom"
|
|
6
|
+
require_relative "metadata"
|
|
7
|
+
|
|
8
|
+
# Event Serialization System
|
|
9
|
+
#
|
|
10
|
+
# Provides data structures for capturing and persisting test run data.
|
|
11
|
+
# See 2026_01_27_event_serialization-design.md for architecture details.
|
|
12
|
+
|
|
13
|
+
module RSpec
|
|
14
|
+
module Agents
|
|
15
|
+
module Serialization
|
|
16
|
+
ALPINE_VERSION = "3.14.8"
|
|
17
|
+
|
|
18
|
+
# Shared serialization helpers
|
|
19
|
+
module SerializationHelpers
|
|
20
|
+
def serialize_value(value)
|
|
21
|
+
case value
|
|
22
|
+
when nil then nil
|
|
23
|
+
when Time then value.iso8601(3)
|
|
24
|
+
when Array then value.map { |v| serialize_value(v) }
|
|
25
|
+
when Hash then value.transform_values { |v| serialize_value(v) }
|
|
26
|
+
when ->(v) { v.respond_to?(:to_h) && !v.is_a?(Hash) } then value.to_h
|
|
27
|
+
else value
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def parse_time(value)
|
|
32
|
+
return nil unless value
|
|
33
|
+
value.is_a?(Time) ? value : Time.parse(value)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def get(hash, key)
|
|
37
|
+
return hash[key] if hash.key?(key)
|
|
38
|
+
hash[key.to_s]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def get_array(hash, key)
|
|
42
|
+
get(hash, key) || []
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def wrap_metadata(value)
|
|
46
|
+
value.is_a?(Metadata) ? value : Metadata.new(value || {})
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# For Data.define classes - provides to_h with serialization
|
|
51
|
+
module DataClassMethods
|
|
52
|
+
include SerializationHelpers
|
|
53
|
+
|
|
54
|
+
def to_h
|
|
55
|
+
members.each_with_object({}) do |key, hash|
|
|
56
|
+
hash[key] = serialize_value(send(key))
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Class methods for from_h on Data.define classes
|
|
62
|
+
module DataClassFromH
|
|
63
|
+
include SerializationHelpers
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# =========================================================================
|
|
67
|
+
# Immutable Data Classes
|
|
68
|
+
# =========================================================================
|
|
69
|
+
|
|
70
|
+
ExceptionData = Data.define(:class_name, :message, :backtrace) do
|
|
71
|
+
include DataClassMethods
|
|
72
|
+
extend DataClassFromH
|
|
73
|
+
|
|
74
|
+
def initialize(class_name:, message:, backtrace: [])
|
|
75
|
+
super(class_name: class_name, message: message, backtrace: Array(backtrace).first(10))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def self.from_h(hash)
|
|
79
|
+
return nil unless hash
|
|
80
|
+
new(class_name: get(hash, :class_name), message: get(hash, :message), backtrace: get_array(hash, :backtrace))
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
MessageData = Data.define(:role, :content, :timestamp, :source, :metadata) do
|
|
85
|
+
include DataClassMethods
|
|
86
|
+
extend DataClassFromH
|
|
87
|
+
|
|
88
|
+
def initialize(role:, content:, timestamp:, source: nil, metadata: {})
|
|
89
|
+
super(role: role.to_s, content: content, timestamp: timestamp, source: source&.to_s, metadata: wrap_metadata(metadata))
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.from_h(hash)
|
|
93
|
+
return nil unless hash
|
|
94
|
+
new(role: get(hash, :role), content: get(hash, :content), timestamp: parse_time(get(hash, :timestamp)),
|
|
95
|
+
source: get(hash, :source), metadata: get(hash, :metadata) || {})
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
ToolCallData = Data.define(:name, :arguments, :result, :error, :timestamp, :metadata) do
|
|
100
|
+
include DataClassMethods
|
|
101
|
+
extend DataClassFromH
|
|
102
|
+
|
|
103
|
+
def initialize(name:, arguments:, timestamp:, result: nil, error: nil, metadata: {})
|
|
104
|
+
super(name: name.to_s, arguments: arguments || {}, result: result, error: error,
|
|
105
|
+
timestamp: timestamp, metadata: wrap_metadata(metadata))
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def self.from_h(hash)
|
|
109
|
+
return nil unless hash
|
|
110
|
+
new(name: get(hash, :name), arguments: get(hash, :arguments) || {}, result: get(hash, :result),
|
|
111
|
+
error: get(hash, :error), timestamp: parse_time(get(hash, :timestamp)), metadata: get(hash, :metadata) || {})
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
EvaluationData = Data.define(:name, :description, :passed, :reasoning, :timestamp, :mode, :type, :failure_message, :metadata) do
|
|
116
|
+
include DataClassMethods
|
|
117
|
+
extend DataClassFromH
|
|
118
|
+
|
|
119
|
+
def initialize(name:, description:, passed:, timestamp:, reasoning: nil, mode: nil, type: nil, failure_message: nil, metadata: {})
|
|
120
|
+
super(name: name, description: description, passed: passed, reasoning: reasoning,
|
|
121
|
+
timestamp: timestamp, mode: mode&.to_sym, type: type&.to_sym, failure_message: failure_message,
|
|
122
|
+
metadata: wrap_metadata(metadata))
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Whether this is a soft evaluation (quality metric, doesn't affect test pass/fail)
|
|
126
|
+
def soft?
|
|
127
|
+
mode == :soft
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Whether this is a hard expectation (affects test pass/fail)
|
|
131
|
+
def hard?
|
|
132
|
+
mode == :hard
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def self.from_h(hash)
|
|
136
|
+
return nil unless hash
|
|
137
|
+
new(name: get(hash, :name), description: get(hash, :description), passed: get(hash, :passed),
|
|
138
|
+
reasoning: get(hash, :reasoning), timestamp: parse_time(get(hash, :timestamp)),
|
|
139
|
+
mode: get(hash, :mode)&.to_sym, type: get(hash, :type)&.to_sym,
|
|
140
|
+
failure_message: get(hash, :failure_message), metadata: get(hash, :metadata) || {})
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Scenario data for serialization
|
|
145
|
+
# Captures the scenario definition used in a test
|
|
146
|
+
ScenarioData = Data.define(:id, :name, :goal, :personality, :context, :verification, :data) do
|
|
147
|
+
include DataClassMethods
|
|
148
|
+
extend DataClassFromH
|
|
149
|
+
|
|
150
|
+
def initialize(id:, name:, goal:, personality: nil, context: nil, verification: nil, data: {})
|
|
151
|
+
super(id: id, name: name, goal: goal, personality: personality,
|
|
152
|
+
context: context, verification: verification, data: data || {})
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def self.from_h(hash)
|
|
156
|
+
return nil unless hash
|
|
157
|
+
new(
|
|
158
|
+
id: get(hash, :id),
|
|
159
|
+
name: get(hash, :name),
|
|
160
|
+
goal: get(hash, :goal),
|
|
161
|
+
personality: get(hash, :personality),
|
|
162
|
+
context: get(hash, :context),
|
|
163
|
+
verification: get(hash, :verification),
|
|
164
|
+
data: get(hash, :data) || {}
|
|
165
|
+
)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Create from a Scenario object
|
|
169
|
+
def self.from_scenario(scenario)
|
|
170
|
+
return nil unless scenario
|
|
171
|
+
new(
|
|
172
|
+
id: scenario.identifier,
|
|
173
|
+
name: scenario[:name] || scenario[:id],
|
|
174
|
+
goal: scenario[:goal],
|
|
175
|
+
personality: scenario[:personality],
|
|
176
|
+
context: scenario[:context],
|
|
177
|
+
verification: scenario[:verification],
|
|
178
|
+
data: scenario.to_h
|
|
179
|
+
)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
SummaryStats = Data.define(:example_count, :passed_count, :failed_count, :pending_count, :total_duration_ms) do
|
|
184
|
+
include DataClassMethods
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# =========================================================================
|
|
188
|
+
# Mutable Data Classes (built incrementally during test execution)
|
|
189
|
+
# =========================================================================
|
|
190
|
+
|
|
191
|
+
class TurnData
|
|
192
|
+
include SerializationHelpers
|
|
193
|
+
extend DataClassFromH
|
|
194
|
+
|
|
195
|
+
attr_reader :number, :user_message, :metadata
|
|
196
|
+
attr_accessor :agent_response, :tool_calls
|
|
197
|
+
|
|
198
|
+
def initialize(number:, user_message:, agent_response: nil, tool_calls: [], topic: nil, metadata: {})
|
|
199
|
+
@number = number
|
|
200
|
+
@user_message = user_message
|
|
201
|
+
@agent_response = agent_response
|
|
202
|
+
@tool_calls = tool_calls || []
|
|
203
|
+
@topic = topic&.to_s
|
|
204
|
+
@metadata = wrap_metadata(metadata)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def topic=(value)
|
|
208
|
+
@topic = value&.to_s
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
attr_reader :topic
|
|
212
|
+
|
|
213
|
+
def to_h
|
|
214
|
+
{ number: @number, user_message: @user_message&.to_h, agent_response: @agent_response&.to_h,
|
|
215
|
+
tool_calls: @tool_calls.map(&:to_h), topic: @topic, metadata: @metadata.to_h }
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def self.from_h(hash)
|
|
219
|
+
return nil unless hash
|
|
220
|
+
new(number: get(hash, :number), user_message: MessageData.from_h(get(hash, :user_message)),
|
|
221
|
+
agent_response: MessageData.from_h(get(hash, :agent_response)),
|
|
222
|
+
tool_calls: get_array(hash, :tool_calls).map { |tc| ToolCallData.from_h(tc) },
|
|
223
|
+
topic: get(hash, :topic), metadata: get(hash, :metadata) || {})
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
class ConversationData
|
|
228
|
+
include SerializationHelpers
|
|
229
|
+
extend DataClassFromH
|
|
230
|
+
|
|
231
|
+
attr_reader :started_at, :turns, :metadata
|
|
232
|
+
attr_accessor :ended_at, :final_topic
|
|
233
|
+
|
|
234
|
+
def initialize(started_at:, ended_at: nil, turns: [], final_topic: nil, metadata: {})
|
|
235
|
+
@started_at = started_at
|
|
236
|
+
@ended_at = ended_at
|
|
237
|
+
@turns = turns || []
|
|
238
|
+
@final_topic = final_topic&.to_s
|
|
239
|
+
@metadata = wrap_metadata(metadata)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def add_turn(turn) = @turns << turn
|
|
243
|
+
def current_turn = @turns.last
|
|
244
|
+
|
|
245
|
+
def to_h
|
|
246
|
+
{ started_at: serialize_value(@started_at), ended_at: serialize_value(@ended_at),
|
|
247
|
+
turns: @turns.map(&:to_h), final_topic: @final_topic, metadata: @metadata.to_h }
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def self.from_h(hash)
|
|
251
|
+
return nil unless hash
|
|
252
|
+
new(started_at: parse_time(get(hash, :started_at)), ended_at: parse_time(get(hash, :ended_at)),
|
|
253
|
+
turns: get_array(hash, :turns).map { |t| TurnData.from_h(t) },
|
|
254
|
+
final_topic: get(hash, :final_topic), metadata: get(hash, :metadata) || {})
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
class ExampleData
|
|
259
|
+
include SerializationHelpers
|
|
260
|
+
extend DataClassFromH
|
|
261
|
+
|
|
262
|
+
attr_reader :id, :stable_id, :canonical_path, :file, :description, :location, :started_at, :evaluations, :metadata
|
|
263
|
+
attr_accessor :status, :finished_at, :duration_ms, :exception, :conversation, :scenario_id
|
|
264
|
+
|
|
265
|
+
def initialize(id:, file:, description:, location:, started_at:, status: :pending,
|
|
266
|
+
stable_id: nil, canonical_path: nil, scenario_id: nil,
|
|
267
|
+
finished_at: nil, duration_ms: nil, exception: nil, conversation: nil, evaluations: [], metadata: {})
|
|
268
|
+
@id, @file, @description, @location = id, file, description, location
|
|
269
|
+
@stable_id, @canonical_path = stable_id, canonical_path
|
|
270
|
+
@scenario_id = scenario_id
|
|
271
|
+
@status, @started_at, @finished_at, @duration_ms = status.to_sym, started_at, finished_at, duration_ms
|
|
272
|
+
@exception, @conversation, @evaluations = exception, conversation, evaluations || []
|
|
273
|
+
@metadata = wrap_metadata(metadata)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def add_evaluation(evaluation) = @evaluations << evaluation
|
|
277
|
+
|
|
278
|
+
def to_h
|
|
279
|
+
{ id: @id, stable_id: @stable_id, canonical_path: @canonical_path, scenario_id: @scenario_id,
|
|
280
|
+
file: @file, description: @description, location: @location, status: @status.to_s,
|
|
281
|
+
started_at: serialize_value(@started_at), finished_at: serialize_value(@finished_at),
|
|
282
|
+
duration_ms: @duration_ms, exception: @exception&.to_h, conversation: @conversation&.to_h,
|
|
283
|
+
evaluations: @evaluations.map(&:to_h), metadata: @metadata.to_h }
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def self.from_h(hash)
|
|
287
|
+
return nil unless hash
|
|
288
|
+
new(id: get(hash, :id), stable_id: get(hash, :stable_id), canonical_path: get(hash, :canonical_path),
|
|
289
|
+
scenario_id: get(hash, :scenario_id),
|
|
290
|
+
file: get(hash, :file), description: get(hash, :description),
|
|
291
|
+
location: get(hash, :location), status: get(hash, :status)&.to_sym || :pending,
|
|
292
|
+
started_at: parse_time(get(hash, :started_at)), finished_at: parse_time(get(hash, :finished_at)),
|
|
293
|
+
duration_ms: get(hash, :duration_ms), exception: ExceptionData.from_h(get(hash, :exception)),
|
|
294
|
+
conversation: ConversationData.from_h(get(hash, :conversation)),
|
|
295
|
+
evaluations: get_array(hash, :evaluations).map { |e| EvaluationData.from_h(e) },
|
|
296
|
+
metadata: get(hash, :metadata) || {})
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
class RunData
|
|
301
|
+
include SerializationHelpers
|
|
302
|
+
extend DataClassFromH
|
|
303
|
+
|
|
304
|
+
attr_reader :run_id, :started_at, :seed, :examples, :scenarios
|
|
305
|
+
attr_accessor :finished_at
|
|
306
|
+
|
|
307
|
+
def initialize(run_id:, started_at:, finished_at: nil, seed: nil, examples: {}, scenarios: {})
|
|
308
|
+
@run_id, @started_at, @finished_at, @seed = run_id, started_at, finished_at, seed
|
|
309
|
+
@examples = examples || {}
|
|
310
|
+
@scenarios = scenarios || {}
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def add_example(example_data) = @examples[example_data.id] = example_data
|
|
314
|
+
def example(id) = @examples[id]
|
|
315
|
+
|
|
316
|
+
# Register a scenario in the scenarios hash
|
|
317
|
+
# @param scenario_data [ScenarioData] The scenario to register
|
|
318
|
+
# @return [String] The scenario ID
|
|
319
|
+
def register_scenario(scenario_data)
|
|
320
|
+
return nil unless scenario_data
|
|
321
|
+
@scenarios[scenario_data.id] = scenario_data unless @scenarios.key?(scenario_data.id)
|
|
322
|
+
scenario_data.id
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Get a scenario by ID
|
|
326
|
+
# @param id [String] The scenario ID
|
|
327
|
+
# @return [ScenarioData, nil]
|
|
328
|
+
def scenario(id)
|
|
329
|
+
@scenarios[id]
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def summary
|
|
333
|
+
counts = @examples.each_value.with_object(passed: 0, failed: 0, pending: 0, duration: 0) do |ex, c|
|
|
334
|
+
c[ex.status] += 1 if c.key?(ex.status)
|
|
335
|
+
c[:duration] += ex.duration_ms.to_i
|
|
336
|
+
end
|
|
337
|
+
SummaryStats.new(example_count: @examples.size, passed_count: counts[:passed],
|
|
338
|
+
failed_count: counts[:failed], pending_count: counts[:pending],
|
|
339
|
+
total_duration_ms: counts[:duration])
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def to_h
|
|
343
|
+
{ run_id: @run_id, started_at: serialize_value(@started_at), finished_at: serialize_value(@finished_at),
|
|
344
|
+
seed: @seed, scenarios: @scenarios.transform_values(&:to_h), examples: @examples.transform_values(&:to_h) }
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def self.from_h(hash)
|
|
348
|
+
return nil unless hash
|
|
349
|
+
new(run_id: get(hash, :run_id), started_at: parse_time(get(hash, :started_at)),
|
|
350
|
+
finished_at: parse_time(get(hash, :finished_at)), seed: get(hash, :seed),
|
|
351
|
+
scenarios: (get(hash, :scenarios) || {}).transform_values { |s| ScenarioData.from_h(s) },
|
|
352
|
+
examples: (get(hash, :examples) || {}).transform_values { |e| ExampleData.from_h(e) })
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# =========================================================================
|
|
357
|
+
# JsonFile - Simple JSON file read/write utility
|
|
358
|
+
# =========================================================================
|
|
359
|
+
|
|
360
|
+
class JsonFile
|
|
361
|
+
class << self
|
|
362
|
+
def write(path, run_data)
|
|
363
|
+
FileUtils.mkdir_p(File.dirname(path))
|
|
364
|
+
File.write(path, JSON.pretty_generate(run_data.to_h))
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def read(path)
|
|
368
|
+
RunData.from_h(JSON.parse(File.read(path)))
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
end
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
# Configuration for the user simulator
|
|
4
|
+
# Supports inheritance through RSpec nesting with specific merge behaviors
|
|
5
|
+
#
|
|
6
|
+
# Inheritance rules (per design doc Section 3.4):
|
|
7
|
+
# - String replaces block, block replaces string
|
|
8
|
+
# - Block + block = notes merged
|
|
9
|
+
# - Rules are always accumulated
|
|
10
|
+
# - max_turns/stop_when replaced by child
|
|
11
|
+
# - goal/template are test-level only (not inheritable)
|
|
12
|
+
class SimulatorConfig
|
|
13
|
+
attr_reader :role_value, :personality_value, :context_value
|
|
14
|
+
attr_reader :rules, :max_turns, :stop_when, :goal, :template
|
|
15
|
+
attr_reader :topic_overrides
|
|
16
|
+
|
|
17
|
+
def initialize
|
|
18
|
+
@role_value = nil
|
|
19
|
+
@role_type = nil # :string or :block
|
|
20
|
+
|
|
21
|
+
@personality_value = nil
|
|
22
|
+
@personality_type = nil
|
|
23
|
+
@personality_notes = []
|
|
24
|
+
|
|
25
|
+
@context_value = nil
|
|
26
|
+
@context_type = nil
|
|
27
|
+
@context_notes = []
|
|
28
|
+
|
|
29
|
+
@rules = []
|
|
30
|
+
@max_turns = nil
|
|
31
|
+
@stop_when = nil
|
|
32
|
+
@goal = nil
|
|
33
|
+
@template = nil
|
|
34
|
+
@topic_overrides = {}
|
|
35
|
+
|
|
36
|
+
@current_notes_target = nil # Tracks which block we're in
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# DSL: Set the user's role
|
|
40
|
+
# @param text [String, nil] Role description (or nil to use block)
|
|
41
|
+
# @yield Block for complex role definition (not implemented yet)
|
|
42
|
+
def role(text = nil, &block)
|
|
43
|
+
if block_given?
|
|
44
|
+
@role_type = :block
|
|
45
|
+
@role_value = block
|
|
46
|
+
else
|
|
47
|
+
@role_type = :string
|
|
48
|
+
@role_value = text
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# DSL: Set the user's personality
|
|
53
|
+
# @param text [String, nil] Personality description (or nil to use block)
|
|
54
|
+
# @yield Block for personality with notes
|
|
55
|
+
def personality(text = nil, &block)
|
|
56
|
+
if block_given?
|
|
57
|
+
@personality_type = :block
|
|
58
|
+
@personality_value = nil
|
|
59
|
+
@current_notes_target = :personality
|
|
60
|
+
instance_eval(&block)
|
|
61
|
+
@current_notes_target = nil
|
|
62
|
+
else
|
|
63
|
+
@personality_type = :string
|
|
64
|
+
@personality_value = text
|
|
65
|
+
@personality_notes = []
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# DSL: Set the context
|
|
70
|
+
# @param text [String, nil] Context description (or nil to use block)
|
|
71
|
+
# @yield Block for context with notes
|
|
72
|
+
def context(text = nil, &block)
|
|
73
|
+
if block_given?
|
|
74
|
+
@context_type = :block
|
|
75
|
+
@context_value = nil
|
|
76
|
+
@current_notes_target = :context
|
|
77
|
+
instance_eval(&block)
|
|
78
|
+
@current_notes_target = nil
|
|
79
|
+
else
|
|
80
|
+
@context_type = :string
|
|
81
|
+
@context_value = text
|
|
82
|
+
@context_notes = []
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# DSL: Add a note (used within personality/context blocks)
|
|
87
|
+
# @param text [String] Note text
|
|
88
|
+
def note(text)
|
|
89
|
+
case @current_notes_target
|
|
90
|
+
when :personality
|
|
91
|
+
@personality_notes << text
|
|
92
|
+
when :context
|
|
93
|
+
@context_notes << text
|
|
94
|
+
else
|
|
95
|
+
# Default to personality notes if called outside a block
|
|
96
|
+
@personality_notes << text
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# DSL: Add a rule
|
|
101
|
+
# @param type [Symbol, Proc] :should, :should_not, or a lambda for dynamic rules
|
|
102
|
+
# @param text [String, nil] Rule description (for :should/:should_not)
|
|
103
|
+
# @yield Optional block for dynamic rules
|
|
104
|
+
def rule(type_or_lambda = nil, text = nil, &block)
|
|
105
|
+
if block_given?
|
|
106
|
+
@rules << { type: :dynamic, block: block }
|
|
107
|
+
elsif type_or_lambda.is_a?(Proc)
|
|
108
|
+
@rules << { type: :dynamic, block: type_or_lambda }
|
|
109
|
+
elsif type_or_lambda.is_a?(Symbol) && text
|
|
110
|
+
@rules << { type: type_or_lambda, text: text }
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# DSL: Set maximum turns
|
|
115
|
+
# @param count [Integer]
|
|
116
|
+
attr_writer :max_turns
|
|
117
|
+
|
|
118
|
+
# For DSL compatibility
|
|
119
|
+
def max_turns(count = nil)
|
|
120
|
+
if count.nil?
|
|
121
|
+
@max_turns
|
|
122
|
+
else
|
|
123
|
+
@max_turns = count
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# DSL: Set stop condition
|
|
128
|
+
# @yield [turn, conversation] Block returning true to stop
|
|
129
|
+
def stop_when(&block)
|
|
130
|
+
if block_given?
|
|
131
|
+
@stop_when = block
|
|
132
|
+
else
|
|
133
|
+
@stop_when
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# DSL: Set the goal (test-level only)
|
|
138
|
+
# @param text [String]
|
|
139
|
+
def goal(text = nil)
|
|
140
|
+
if text.nil?
|
|
141
|
+
@goal
|
|
142
|
+
else
|
|
143
|
+
@goal = text
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# DSL: Set the template path (test-level only)
|
|
148
|
+
# @param path [String, nil] Path to ERB template file
|
|
149
|
+
def template(path = nil)
|
|
150
|
+
if path.nil?
|
|
151
|
+
@template
|
|
152
|
+
else
|
|
153
|
+
@template = path.to_s
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# DSL: Override settings during a specific topic
|
|
158
|
+
# @param topic_name [Symbol] Topic name
|
|
159
|
+
# @yield Block with overrides
|
|
160
|
+
def during_topic(topic_name, &block)
|
|
161
|
+
override = self.class.new
|
|
162
|
+
override.instance_eval(&block)
|
|
163
|
+
@topic_overrides[topic_name.to_sym] = override
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Get the effective role as an array
|
|
167
|
+
# @return [Array<String>] Role items (empty array if not set)
|
|
168
|
+
def effective_role
|
|
169
|
+
case @role_type
|
|
170
|
+
when :string
|
|
171
|
+
[@role_value]
|
|
172
|
+
when :block
|
|
173
|
+
result = @role_value&.call
|
|
174
|
+
result ? [result] : []
|
|
175
|
+
else
|
|
176
|
+
[]
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Get the effective personality as an array
|
|
181
|
+
# @return [Array<String>] Personality items (empty array if not set)
|
|
182
|
+
def effective_personality
|
|
183
|
+
case @personality_type
|
|
184
|
+
when :string
|
|
185
|
+
[@personality_value]
|
|
186
|
+
when :block
|
|
187
|
+
@personality_notes.dup
|
|
188
|
+
else
|
|
189
|
+
[]
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Get the effective context as an array
|
|
194
|
+
# @return [Array<String>] Context items (empty array if not set)
|
|
195
|
+
def effective_context
|
|
196
|
+
case @context_type
|
|
197
|
+
when :string
|
|
198
|
+
[@context_value]
|
|
199
|
+
when :block
|
|
200
|
+
@context_notes.dup
|
|
201
|
+
else
|
|
202
|
+
[]
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Merge with a child config using inheritance rules
|
|
207
|
+
# @param child [SimulatorConfig] Child configuration
|
|
208
|
+
# @return [SimulatorConfig] New merged configuration
|
|
209
|
+
def merge(child)
|
|
210
|
+
result = self.class.new
|
|
211
|
+
|
|
212
|
+
# Role: String/block replacement rules
|
|
213
|
+
merge_setting(result, :role, child)
|
|
214
|
+
|
|
215
|
+
# Personality: String/block replacement, block+block = merge notes
|
|
216
|
+
merge_personality(result, child)
|
|
217
|
+
|
|
218
|
+
# Context: String/block replacement, block+block = merge notes
|
|
219
|
+
merge_context(result, child)
|
|
220
|
+
|
|
221
|
+
# Rules: Always accumulated
|
|
222
|
+
result.instance_variable_set(:@rules, @rules + child.rules)
|
|
223
|
+
|
|
224
|
+
# max_turns: Child replaces parent
|
|
225
|
+
result.instance_variable_set(:@max_turns, child.max_turns || @max_turns)
|
|
226
|
+
|
|
227
|
+
# stop_when: Child replaces parent
|
|
228
|
+
result.instance_variable_set(:@stop_when, child.instance_variable_get(:@stop_when) || @stop_when)
|
|
229
|
+
|
|
230
|
+
# goal: Test-level only, child takes precedence
|
|
231
|
+
result.instance_variable_set(:@goal, child.instance_variable_get(:@goal) || @goal)
|
|
232
|
+
|
|
233
|
+
# template: Test-level only, child takes precedence
|
|
234
|
+
result.instance_variable_set(:@template, child.template || @template)
|
|
235
|
+
|
|
236
|
+
# Topic overrides: Merge hashes
|
|
237
|
+
merged_overrides = @topic_overrides.merge(child.topic_overrides)
|
|
238
|
+
result.instance_variable_set(:@topic_overrides, merged_overrides)
|
|
239
|
+
|
|
240
|
+
result
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Get config with topic-specific overrides applied
|
|
244
|
+
# @param topic_name [Symbol] Current topic
|
|
245
|
+
# @return [SimulatorConfig]
|
|
246
|
+
def for_topic(topic_name)
|
|
247
|
+
override = @topic_overrides[topic_name.to_sym]
|
|
248
|
+
override ? merge(override) : self
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def to_h
|
|
252
|
+
{
|
|
253
|
+
role: effective_role,
|
|
254
|
+
personality: effective_personality,
|
|
255
|
+
context: effective_context,
|
|
256
|
+
rules: rules,
|
|
257
|
+
max_turns: @max_turns,
|
|
258
|
+
goal: @goal
|
|
259
|
+
}
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
private
|
|
263
|
+
|
|
264
|
+
def merge_setting(result, name, child)
|
|
265
|
+
parent_type = instance_variable_get(:"@#{name}_type")
|
|
266
|
+
parent_value = instance_variable_get(:"@#{name}_value")
|
|
267
|
+
child_type = child.instance_variable_get(:"@#{name}_type")
|
|
268
|
+
child_value = child.instance_variable_get(:"@#{name}_value")
|
|
269
|
+
|
|
270
|
+
if child_type
|
|
271
|
+
# Child has a value - it replaces parent
|
|
272
|
+
result.instance_variable_set(:"@#{name}_type", child_type)
|
|
273
|
+
result.instance_variable_set(:"@#{name}_value", child_value)
|
|
274
|
+
else
|
|
275
|
+
# No child value - inherit from parent
|
|
276
|
+
result.instance_variable_set(:"@#{name}_type", parent_type)
|
|
277
|
+
result.instance_variable_set(:"@#{name}_value", parent_value)
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def merge_personality(result, child)
|
|
282
|
+
parent_type = @personality_type
|
|
283
|
+
child_type = child.instance_variable_get(:@personality_type)
|
|
284
|
+
|
|
285
|
+
if child_type == :string
|
|
286
|
+
# String replaces anything
|
|
287
|
+
result.instance_variable_set(:@personality_type, :string)
|
|
288
|
+
result.instance_variable_set(:@personality_value, child.personality_value)
|
|
289
|
+
result.instance_variable_set(:@personality_notes, [])
|
|
290
|
+
elsif child_type == :block && parent_type == :block
|
|
291
|
+
# Block + Block = merge notes
|
|
292
|
+
result.instance_variable_set(:@personality_type, :block)
|
|
293
|
+
result.instance_variable_set(:@personality_value, nil)
|
|
294
|
+
result.instance_variable_set(:@personality_notes, @personality_notes + child.instance_variable_get(:@personality_notes))
|
|
295
|
+
elsif child_type == :block
|
|
296
|
+
# Block replaces string
|
|
297
|
+
result.instance_variable_set(:@personality_type, :block)
|
|
298
|
+
result.instance_variable_set(:@personality_value, nil)
|
|
299
|
+
result.instance_variable_set(:@personality_notes, child.instance_variable_get(:@personality_notes))
|
|
300
|
+
else
|
|
301
|
+
# Inherit from parent
|
|
302
|
+
result.instance_variable_set(:@personality_type, parent_type)
|
|
303
|
+
result.instance_variable_set(:@personality_value, @personality_value)
|
|
304
|
+
result.instance_variable_set(:@personality_notes, @personality_notes.dup)
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def merge_context(result, child)
|
|
309
|
+
parent_type = @context_type
|
|
310
|
+
child_type = child.instance_variable_get(:@context_type)
|
|
311
|
+
|
|
312
|
+
if child_type == :string
|
|
313
|
+
# String replaces anything
|
|
314
|
+
result.instance_variable_set(:@context_type, :string)
|
|
315
|
+
result.instance_variable_set(:@context_value, child.context_value)
|
|
316
|
+
result.instance_variable_set(:@context_notes, [])
|
|
317
|
+
elsif child_type == :block && parent_type == :block
|
|
318
|
+
# Block + Block = merge notes
|
|
319
|
+
result.instance_variable_set(:@context_type, :block)
|
|
320
|
+
result.instance_variable_set(:@context_value, nil)
|
|
321
|
+
result.instance_variable_set(:@context_notes, @context_notes + child.instance_variable_get(:@context_notes))
|
|
322
|
+
elsif child_type == :block
|
|
323
|
+
# Block replaces string
|
|
324
|
+
result.instance_variable_set(:@context_type, :block)
|
|
325
|
+
result.instance_variable_set(:@context_value, nil)
|
|
326
|
+
result.instance_variable_set(:@context_notes, child.instance_variable_get(:@context_notes))
|
|
327
|
+
else
|
|
328
|
+
# Inherit from parent
|
|
329
|
+
result.instance_variable_set(:@context_type, parent_type)
|
|
330
|
+
result.instance_variable_set(:@context_value, @context_value)
|
|
331
|
+
result.instance_variable_set(:@context_notes, @context_notes.dup)
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|