rspec-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/rspec-agents +24 -0
- data/lib/async_workers/channel_config.rb +34 -0
- data/lib/async_workers/doc/process_manager_design.md +512 -0
- data/lib/async_workers/errors.rb +21 -0
- data/lib/async_workers/managed_process.rb +284 -0
- data/lib/async_workers/output_stream.rb +86 -0
- data/lib/async_workers/rpc_channel.rb +159 -0
- data/lib/async_workers/transport/base.rb +57 -0
- data/lib/async_workers/transport/stdio_transport.rb +91 -0
- data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
- data/lib/async_workers/worker_group.rb +175 -0
- data/lib/async_workers.rb +17 -0
- data/lib/rspec/agents/agent_response.rb +61 -0
- data/lib/rspec/agents/agents/base.rb +123 -0
- data/lib/rspec/agents/cli.rb +342 -0
- data/lib/rspec/agents/conversation.rb +308 -0
- data/lib/rspec/agents/criterion.rb +237 -0
- data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
- data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
- data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
- data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
- data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
- data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
- data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
- data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
- data/lib/rspec/agents/doc/scenario_guide.md +289 -0
- data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
- data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
- data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
- data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
- data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
- data/lib/rspec/agents/dsl/test_context.rb +223 -0
- data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
- data/lib/rspec/agents/dsl.rb +398 -0
- data/lib/rspec/agents/evaluation_result.rb +44 -0
- data/lib/rspec/agents/event_bus.rb +78 -0
- data/lib/rspec/agents/events.rb +141 -0
- data/lib/rspec/agents/isolated_event_bus.rb +86 -0
- data/lib/rspec/agents/judge.rb +244 -0
- data/lib/rspec/agents/llm/anthropic.rb +143 -0
- data/lib/rspec/agents/llm/base.rb +64 -0
- data/lib/rspec/agents/llm/mock.rb +181 -0
- data/lib/rspec/agents/llm/response.rb +52 -0
- data/lib/rspec/agents/matchers.rb +554 -0
- data/lib/rspec/agents/message.rb +81 -0
- data/lib/rspec/agents/metadata.rb +120 -0
- data/lib/rspec/agents/observers/base.rb +70 -0
- data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
- data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
- data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
- data/lib/rspec/agents/parallel/controller.rb +284 -0
- data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
- data/lib/rspec/agents/parallel/partitioner.rb +31 -0
- data/lib/rspec/agents/parallel/run_result.rb +22 -0
- data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
- data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
- data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
- data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
- data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
- data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
- data/lib/rspec/agents/prompt_builders/base.rb +113 -0
- data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
- data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
- data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
- data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
- data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
- data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
- data/lib/rspec/agents/runners/headless_runner.rb +272 -0
- data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
- data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
- data/lib/rspec/agents/runners/user_simulator.rb +261 -0
- data/lib/rspec/agents/scenario.rb +133 -0
- data/lib/rspec/agents/scenario_loader.rb +145 -0
- data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
- data/lib/rspec/agents/serialization/extension.rb +199 -0
- data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
- data/lib/rspec/agents/serialization/presenters.rb +281 -0
- data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
- data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
- data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
- data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
- data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
- data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
- data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
- data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
- data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
- data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
- data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
- data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
- data/lib/rspec/agents/serialization.rb +374 -0
- data/lib/rspec/agents/simulator_config.rb +336 -0
- data/lib/rspec/agents/spec_executor.rb +494 -0
- data/lib/rspec/agents/stable_example_id.rb +147 -0
- data/lib/rspec/agents/templates/user_simulation.erb +9 -0
- data/lib/rspec/agents/tool_call.rb +53 -0
- data/lib/rspec/agents/topic.rb +307 -0
- data/lib/rspec/agents/topic_graph.rb +236 -0
- data/lib/rspec/agents/triggers.rb +122 -0
- data/lib/rspec/agents/turn.rb +63 -0
- data/lib/rspec/agents/turn_executor.rb +91 -0
- data/lib/rspec/agents/version.rb +7 -0
- data/lib/rspec/agents.rb +145 -0
- metadata +242 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
# Wraps a single expectation with its type, mode, and data
|
|
4
|
+
class TopicExpectation
|
|
5
|
+
attr_reader :type, :mode, :data
|
|
6
|
+
|
|
7
|
+
def initialize(type, mode, data)
|
|
8
|
+
@type = type # :quality, :match, :no_match, :grounding, :tool_call, :forbidden_tool, :forbidden_claims, :custom
|
|
9
|
+
@mode = mode # :soft or :hard
|
|
10
|
+
@data = data # Type-specific data hash
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def soft?
|
|
14
|
+
@mode == :soft
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def hard?
|
|
18
|
+
@mode == :hard
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Represents a distinct phase or state in a conversation
|
|
23
|
+
# Each topic has characteristics, triggers, invariants, and an intent
|
|
24
|
+
class Topic
|
|
25
|
+
attr_reader :name, :triggers, :invariants
|
|
26
|
+
attr_accessor :successors # Set by TopicGraph
|
|
27
|
+
|
|
28
|
+
def initialize(name, &block)
|
|
29
|
+
@name = name.to_sym
|
|
30
|
+
@characteristic_text = nil
|
|
31
|
+
@agent_intent_text = nil
|
|
32
|
+
@triggers = Triggers.new
|
|
33
|
+
@invariants = InvariantSet.new
|
|
34
|
+
@successors = []
|
|
35
|
+
|
|
36
|
+
instance_eval(&block) if block_given?
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# DSL: Set the topic's characteristic description
|
|
40
|
+
# Used by LLM judge to identify when conversation is in this topic
|
|
41
|
+
def characteristic(text)
|
|
42
|
+
@characteristic_text = text.strip
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# DSL: Set the agent's intended behavior during this topic
|
|
46
|
+
def agent_intent(text)
|
|
47
|
+
@agent_intent_text = text.strip
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# DSL: Define triggers for deterministic topic classification
|
|
51
|
+
def triggers(&block)
|
|
52
|
+
if block_given?
|
|
53
|
+
@triggers = Triggers.new(&block)
|
|
54
|
+
else
|
|
55
|
+
@triggers
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# DSL: Expect agent to satisfy criteria (hard)
|
|
60
|
+
# @param to_satisfy [Array<Symbol>] Criterion names
|
|
61
|
+
# @param to_match [Regexp] Pattern agent response must match
|
|
62
|
+
# @param not_to_match [Regexp] Pattern agent response must NOT match
|
|
63
|
+
def expect_agent(to_satisfy: nil, to_match: nil, not_to_match: nil)
|
|
64
|
+
@invariants.add_expectation(:quality, :hard, { criteria: to_satisfy }) if to_satisfy
|
|
65
|
+
@invariants.add_expectation(:match, :hard, { pattern: to_match }) if to_match
|
|
66
|
+
@invariants.add_expectation(:no_match, :hard, { pattern: not_to_match }) if not_to_match
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# DSL: Expect agent claims to be grounded in tool results (hard)
|
|
70
|
+
# @param claim_types [Array<Symbol>] Types of claims (:venues, :pricing, etc.)
|
|
71
|
+
# @param from_tools [Array<Symbol>] Tool names that should provide grounding
|
|
72
|
+
def expect_grounding(*claim_types, from_tools: [])
|
|
73
|
+
@invariants.add_expectation(:grounding, :hard, { claim_types: claim_types, from_tools: from_tools })
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# DSL: Forbid agent from making ungrounded claims (hard)
|
|
77
|
+
# @param claim_types [Array<Symbol>] Types of claims to forbid
|
|
78
|
+
def forbid_claims(*claim_types)
|
|
79
|
+
@invariants.add_expectation(:forbidden_claims, :hard, { claim_types: claim_types })
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# DSL: Expect agent to call a specific tool (hard)
|
|
83
|
+
# @param tool_name [Symbol] Tool name
|
|
84
|
+
def expect_tool_call(tool_name)
|
|
85
|
+
@invariants.add_expectation(:tool_call, :hard, { tool_name: tool_name })
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# DSL: Forbid agent from calling a specific tool (hard)
|
|
89
|
+
# @param tool_name [Symbol] Tool name
|
|
90
|
+
def forbid_tool_call(tool_name)
|
|
91
|
+
@invariants.add_expectation(:forbidden_tool, :hard, { tool_name: tool_name })
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# DSL: Custom expectation with block (hard)
|
|
95
|
+
# @param description [String] Description of expectation
|
|
96
|
+
# @yield [turn, conversation] Block that returns true/false
|
|
97
|
+
def expect(description, &block)
|
|
98
|
+
@invariants.add_expectation(:custom, :hard, { description: description, block: block })
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# DSL: Soft evaluation - quality criteria
|
|
102
|
+
# @param to_satisfy [Array<Symbol>] Criterion names
|
|
103
|
+
# @param to_match [Regexp] Pattern agent response must match
|
|
104
|
+
# @param not_to_match [Regexp] Pattern agent response must NOT match
|
|
105
|
+
def evaluate_agent(to_satisfy: nil, to_match: nil, not_to_match: nil)
|
|
106
|
+
@invariants.add_expectation(:quality, :soft, { criteria: to_satisfy }) if to_satisfy
|
|
107
|
+
@invariants.add_expectation(:match, :soft, { pattern: to_match }) if to_match
|
|
108
|
+
@invariants.add_expectation(:no_match, :soft, { pattern: not_to_match }) if not_to_match
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# DSL: Soft evaluation - grounding
|
|
112
|
+
# @param claim_types [Array<Symbol>] Types of claims (:venues, :pricing, etc.)
|
|
113
|
+
# @param from_tools [Array<Symbol>] Tool names that should provide grounding
|
|
114
|
+
def evaluate_grounding(*claim_types, from_tools: [])
|
|
115
|
+
@invariants.add_expectation(:grounding, :soft, { claim_types: claim_types, from_tools: from_tools })
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# DSL: Soft evaluation - tool calls
|
|
119
|
+
# @param tool_name [Symbol] Tool name
|
|
120
|
+
def evaluate_tool_call(tool_name)
|
|
121
|
+
@invariants.add_expectation(:tool_call, :soft, { tool_name: tool_name })
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# DSL: Soft evaluation - custom
|
|
125
|
+
# @param description [String] Description of expectation
|
|
126
|
+
# @yield [turn, conversation] Block that returns true/false
|
|
127
|
+
def evaluate(description, &block)
|
|
128
|
+
@invariants.add_expectation(:custom, :soft, { description: description, block: block })
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
attr_reader :characteristic_text
|
|
132
|
+
|
|
133
|
+
attr_reader :agent_intent_text
|
|
134
|
+
|
|
135
|
+
def trigger_matches?(turn, conversation)
|
|
136
|
+
@triggers.any_match?(turn, conversation)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Evaluate all invariants for turns that occurred in this topic
|
|
140
|
+
# @param turns [Array] Turns that occurred in this topic
|
|
141
|
+
# @param conversation [Object] Full conversation context
|
|
142
|
+
# @param judge [Object] LLM judge for evaluating quality/grounding
|
|
143
|
+
# @return [InvariantResults]
|
|
144
|
+
def evaluate_invariants(turns, conversation, judge)
|
|
145
|
+
@invariants.evaluate(turns, conversation, judge, @name)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def to_h
|
|
149
|
+
{
|
|
150
|
+
name: @name,
|
|
151
|
+
characteristic: @characteristic_text,
|
|
152
|
+
agent_intent: @agent_intent_text,
|
|
153
|
+
successors: @successors
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Collects and evaluates invariants for a topic
|
|
159
|
+
class InvariantSet
|
|
160
|
+
def initialize
|
|
161
|
+
@expectations = [] # Array of TopicExpectation objects
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Generic method to add any type of expectation
|
|
165
|
+
def add_expectation(type, mode, data)
|
|
166
|
+
@expectations << TopicExpectation.new(type, mode, data)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def empty?
|
|
170
|
+
@expectations.empty?
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Evaluate all invariants against the given turns
|
|
174
|
+
# @return [InvariantResults]
|
|
175
|
+
def evaluate(turns, conversation, judge, topic_name)
|
|
176
|
+
results = InvariantResults.new(topic_name)
|
|
177
|
+
|
|
178
|
+
@expectations.each do |expectation|
|
|
179
|
+
case expectation.type
|
|
180
|
+
when :quality
|
|
181
|
+
# LLM-based evaluation - mark as pending
|
|
182
|
+
results.add_pending(:quality, expectation.data[:criteria], mode: expectation.mode)
|
|
183
|
+
|
|
184
|
+
when :match
|
|
185
|
+
# Pattern matching - deterministic
|
|
186
|
+
pattern = expectation.data[:pattern]
|
|
187
|
+
matched = turns.any? { |t| pattern.match?(t.agent_response&.text.to_s) }
|
|
188
|
+
results.add(
|
|
189
|
+
:match,
|
|
190
|
+
pattern.inspect,
|
|
191
|
+
matched,
|
|
192
|
+
matched ? nil : "No response matched #{pattern.inspect}",
|
|
193
|
+
mode: expectation.mode
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
when :no_match
|
|
197
|
+
# Forbidden pattern - deterministic
|
|
198
|
+
pattern = expectation.data[:pattern]
|
|
199
|
+
violated = turns.any? { |t| pattern.match?(t.agent_response&.text.to_s) }
|
|
200
|
+
results.add(
|
|
201
|
+
:no_match,
|
|
202
|
+
pattern.inspect,
|
|
203
|
+
!violated,
|
|
204
|
+
violated ? "Response matched forbidden pattern #{pattern.inspect}" : nil,
|
|
205
|
+
mode: expectation.mode
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
when :tool_call
|
|
209
|
+
# Tool call expectation - deterministic
|
|
210
|
+
all_tool_calls = turns.flat_map { |t| t.agent_response&.tool_calls || [] }
|
|
211
|
+
tool_name = expectation.data[:tool_name]
|
|
212
|
+
called = all_tool_calls.any? { |tc| tc.name == tool_name }
|
|
213
|
+
results.add(
|
|
214
|
+
:tool_call,
|
|
215
|
+
tool_name,
|
|
216
|
+
called,
|
|
217
|
+
called ? nil : "Expected tool call to #{tool_name} but it was not called",
|
|
218
|
+
mode: expectation.mode
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
when :forbidden_tool
|
|
222
|
+
# Forbidden tool - deterministic
|
|
223
|
+
all_tool_calls = turns.flat_map { |t| t.agent_response&.tool_calls || [] }
|
|
224
|
+
tool_name = expectation.data[:tool_name]
|
|
225
|
+
called = all_tool_calls.any? { |tc| tc.name == tool_name }
|
|
226
|
+
results.add(
|
|
227
|
+
:forbidden_tool,
|
|
228
|
+
tool_name,
|
|
229
|
+
!called,
|
|
230
|
+
called ? "Forbidden tool #{tool_name} was called" : nil,
|
|
231
|
+
mode: expectation.mode
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
when :grounding
|
|
235
|
+
# LLM-based evaluation - mark as pending
|
|
236
|
+
results.add_pending(:grounding, expectation.data, mode: expectation.mode)
|
|
237
|
+
|
|
238
|
+
when :forbidden_claims
|
|
239
|
+
# LLM-based evaluation - mark as pending
|
|
240
|
+
results.add_pending(:forbidden_claims, expectation.data[:claim_types], mode: expectation.mode)
|
|
241
|
+
|
|
242
|
+
when :custom
|
|
243
|
+
# Code-based evaluation - deterministic
|
|
244
|
+
passed = turns.all? { |turn| expectation.data[:block].call(turn, conversation) }
|
|
245
|
+
results.add(
|
|
246
|
+
:custom,
|
|
247
|
+
expectation.data[:description],
|
|
248
|
+
passed,
|
|
249
|
+
passed ? nil : "Custom expectation failed: #{expectation.data[:description]}",
|
|
250
|
+
mode: expectation.mode
|
|
251
|
+
)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
results
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Stores results of invariant evaluation
|
|
260
|
+
class InvariantResults
|
|
261
|
+
attr_reader :topic_name, :results, :pending
|
|
262
|
+
|
|
263
|
+
def initialize(topic_name)
|
|
264
|
+
@topic_name = topic_name
|
|
265
|
+
@results = []
|
|
266
|
+
@pending = [] # Requires judge evaluation
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def add(type, description, passed, failure_message, mode: :hard)
|
|
270
|
+
@results << {
|
|
271
|
+
type: type,
|
|
272
|
+
description: description,
|
|
273
|
+
passed: passed,
|
|
274
|
+
failure_message: failure_message,
|
|
275
|
+
mode: mode
|
|
276
|
+
}
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def add_pending(type, data, mode: :hard)
|
|
280
|
+
@pending << { type: type, data: data, mode: mode }
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def passed?
|
|
284
|
+
# Only check hard expectations
|
|
285
|
+
@results.select { |r| r[:mode] == :hard }.all? { |r| r[:passed] } &&
|
|
286
|
+
@pending.select { |p| p[:mode] == :hard }.empty?
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def failures
|
|
290
|
+
# Only return hard failures
|
|
291
|
+
@results.reject { |r| r[:passed] || r[:mode] == :soft }
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def soft_failures
|
|
295
|
+
@results.select { |r| !r[:passed] && r[:mode] == :soft }
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def hard_failures
|
|
299
|
+
@results.select { |r| !r[:passed] && r[:mode] == :hard }
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def has_pending?
|
|
303
|
+
!@pending.empty?
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
end
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
# Topic graph validation errors
|
|
4
|
+
# Defined at module level so they can be caught before RSpec wraps them
|
|
5
|
+
class TopicGraphValidationError < StandardError; end
|
|
6
|
+
class DuplicateTopicError < TopicGraphValidationError; end
|
|
7
|
+
class UndefinedTopicError < TopicGraphValidationError; end
|
|
8
|
+
class SelfLoopError < TopicGraphValidationError; end
|
|
9
|
+
class UnreachableTopicError < TopicGraphValidationError; end
|
|
10
|
+
|
|
11
|
+
# Directed graph of topics representing conversation flow
|
|
12
|
+
# Validates structure and provides traversal methods
|
|
13
|
+
class TopicGraph
|
|
14
|
+
attr_reader :initial_topic, :topics
|
|
15
|
+
|
|
16
|
+
def initialize
|
|
17
|
+
@topics = {}
|
|
18
|
+
@edges = {} # topic_name => [successor_names]
|
|
19
|
+
@initial_topic = nil
|
|
20
|
+
@topic_order = [] # Tracks order for initial topic detection
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Add a topic with its successors
|
|
24
|
+
# @param topic [Topic] Topic instance
|
|
25
|
+
# @param next_topics [Symbol, Array<Symbol>, nil] Successor topic name(s)
|
|
26
|
+
def add_topic(topic, next_topics: nil)
|
|
27
|
+
name = topic.name
|
|
28
|
+
|
|
29
|
+
raise DuplicateTopicError, "Topic :#{name} is already defined" if @topics.key?(name)
|
|
30
|
+
|
|
31
|
+
@topics[name] = topic
|
|
32
|
+
@edges[name] = normalize_next(next_topics)
|
|
33
|
+
@topic_order << name
|
|
34
|
+
@initial_topic ||= name
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Reference a shared topic and wire it into the graph
|
|
38
|
+
# @param name [Symbol] Topic name (must exist in shared topics or already added)
|
|
39
|
+
# @param next_topics [Symbol, Array<Symbol>, nil] Successor topic name(s)
|
|
40
|
+
# @param shared_topics [Hash] Hash of shared topic definitions
|
|
41
|
+
def use_topic(name, next_topics: nil, shared_topics: {})
|
|
42
|
+
name = name.to_sym
|
|
43
|
+
|
|
44
|
+
if @topics.key?(name)
|
|
45
|
+
# Topic already added, just update edges
|
|
46
|
+
@edges[name] = normalize_next(next_topics)
|
|
47
|
+
elsif shared_topics.key?(name)
|
|
48
|
+
# Copy from shared topics
|
|
49
|
+
shared_topic = shared_topics[name]
|
|
50
|
+
@topics[name] = shared_topic
|
|
51
|
+
@edges[name] = normalize_next(next_topics)
|
|
52
|
+
@topic_order << name
|
|
53
|
+
@initial_topic ||= name
|
|
54
|
+
else
|
|
55
|
+
# Create placeholder - will be validated later
|
|
56
|
+
@topics[name] = nil
|
|
57
|
+
@edges[name] = normalize_next(next_topics)
|
|
58
|
+
@topic_order << name
|
|
59
|
+
@initial_topic ||= name
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Validate the graph structure
|
|
64
|
+
# @raise [ValidationError] If validation fails
|
|
65
|
+
def validate!
|
|
66
|
+
validate_referential_integrity!
|
|
67
|
+
validate_no_self_loops!
|
|
68
|
+
validate_connectivity!
|
|
69
|
+
wire_successors!
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Get successor topic names for a topic
|
|
73
|
+
# @param topic_name [Symbol] Topic name
|
|
74
|
+
# @return [Array<Symbol>]
|
|
75
|
+
def successors_of(topic_name)
|
|
76
|
+
@edges[topic_name.to_sym] || []
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if target is reachable from start
|
|
80
|
+
# @param start [Symbol] Starting topic name
|
|
81
|
+
# @param target [Symbol] Target topic name
|
|
82
|
+
# @return [Boolean]
|
|
83
|
+
def reachable_from?(start, target)
|
|
84
|
+
visited = Set.new
|
|
85
|
+
queue = [start.to_sym]
|
|
86
|
+
|
|
87
|
+
while queue.any?
|
|
88
|
+
current = queue.shift
|
|
89
|
+
return true if current == target.to_sym
|
|
90
|
+
|
|
91
|
+
next if visited.include?(current)
|
|
92
|
+
visited << current
|
|
93
|
+
|
|
94
|
+
queue.concat(successors_of(current))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
false
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Get a topic by name
|
|
101
|
+
# @param name [Symbol] Topic name
|
|
102
|
+
# @return [Topic, nil]
|
|
103
|
+
def [](name)
|
|
104
|
+
@topics[name.to_sym]
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def topic_names
|
|
108
|
+
@topics.keys
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def empty?
|
|
112
|
+
@topics.empty?
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def size
|
|
116
|
+
@topics.size
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Get all terminal topics (no successors)
|
|
120
|
+
def terminal_topics
|
|
121
|
+
@topics.keys.select { |name| successors_of(name).empty? }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Classify which topic a turn belongs to, given the current topic
|
|
125
|
+
# Uses conservative classification: current topic triggers checked first,
|
|
126
|
+
# then successor triggers, then LLM fallback if judge provided.
|
|
127
|
+
#
|
|
128
|
+
# @param turn [Turn] The turn to classify
|
|
129
|
+
# @param conversation [Conversation] Full conversation context
|
|
130
|
+
# @param current_topic [Symbol] The current topic name
|
|
131
|
+
# @param judge [Judge, nil] Optional judge for LLM-based classification
|
|
132
|
+
# @return [Symbol] The classified topic name
|
|
133
|
+
def classify(turn, conversation, current_topic, judge: nil)
|
|
134
|
+
current_topic = current_topic.to_sym
|
|
135
|
+
current_topic_obj = @topics[current_topic]
|
|
136
|
+
|
|
137
|
+
# 1. Check current topic's triggers first (conservative stay)
|
|
138
|
+
if current_topic_obj&.trigger_matches?(turn, conversation)
|
|
139
|
+
return current_topic
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# 2. Check successor topics' triggers
|
|
143
|
+
successors = successors_of(current_topic)
|
|
144
|
+
successors.each do |successor_name|
|
|
145
|
+
successor = @topics[successor_name]
|
|
146
|
+
if successor&.trigger_matches?(turn, conversation)
|
|
147
|
+
return successor_name
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# 3. No triggers matched - use LLM classification if judge available and successors exist
|
|
152
|
+
if judge && successors.any?
|
|
153
|
+
possible_topics = ([current_topic_obj] + successors.map { |s| @topics[s] }).compact
|
|
154
|
+
return judge.classify_topic(turn, conversation, possible_topics)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# 4. No successors and no triggers (or no judge) - stay in current topic
|
|
158
|
+
current_topic
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def to_h
|
|
162
|
+
{
|
|
163
|
+
initial_topic: @initial_topic,
|
|
164
|
+
topics: @topics.transform_values { |t| t&.to_h },
|
|
165
|
+
edges: @edges
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
private
|
|
170
|
+
|
|
171
|
+
def normalize_next(next_topics)
|
|
172
|
+
case next_topics
|
|
173
|
+
when nil
|
|
174
|
+
[]
|
|
175
|
+
when Symbol
|
|
176
|
+
[next_topics]
|
|
177
|
+
when Array
|
|
178
|
+
next_topics.map(&:to_sym)
|
|
179
|
+
else
|
|
180
|
+
raise ArgumentError, "next: must be a Symbol, Array of Symbols, or nil"
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def validate_referential_integrity!
|
|
185
|
+
@edges.each do |source, targets|
|
|
186
|
+
targets.each do |target|
|
|
187
|
+
unless @topics.key?(target)
|
|
188
|
+
raise UndefinedTopicError, "Topic :#{source} references undefined topic :#{target}"
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Check for placeholder topics (used but never defined)
|
|
194
|
+
@topics.each do |name, topic|
|
|
195
|
+
if topic.nil?
|
|
196
|
+
raise UndefinedTopicError, "Topic :#{name} was referenced but never defined"
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def validate_no_self_loops!
|
|
202
|
+
@edges.each do |source, targets|
|
|
203
|
+
if targets.include?(source)
|
|
204
|
+
raise SelfLoopError, "Topic :#{source} has a self-loop (next: [:#{source}]). Self-loops are not allowed; use conservative topic classification instead."
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def validate_connectivity!
|
|
210
|
+
return if @topics.empty?
|
|
211
|
+
|
|
212
|
+
reachable = Set.new
|
|
213
|
+
queue = [@initial_topic]
|
|
214
|
+
|
|
215
|
+
while queue.any?
|
|
216
|
+
current = queue.shift
|
|
217
|
+
next if reachable.include?(current)
|
|
218
|
+
|
|
219
|
+
reachable << current
|
|
220
|
+
queue.concat(successors_of(current))
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
unreachable = @topics.keys - reachable.to_a
|
|
224
|
+
if unreachable.any?
|
|
225
|
+
raise UnreachableTopicError, "Topics #{unreachable.map { |t| ":#{t}" }.join(', ')} are not reachable from initial topic :#{@initial_topic}"
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def wire_successors!
|
|
230
|
+
@topics.each do |name, topic|
|
|
231
|
+
topic.successors = successors_of(name)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
module RSpec
|
|
2
|
+
module Agents
|
|
3
|
+
# Enables deterministic topic classification without LLM calls
|
|
4
|
+
# When a trigger matches, the topic is classified without invoking the judge
|
|
5
|
+
class Triggers
|
|
6
|
+
def initialize(&block)
|
|
7
|
+
@triggers = []
|
|
8
|
+
instance_eval(&block) if block_given?
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Trigger when agent calls a specific tool
|
|
12
|
+
# @param name [Symbol] Tool name
|
|
13
|
+
# @param with_params [Hash] Optional parameter constraints (values can be Regexp or exact match)
|
|
14
|
+
def on_tool_call(name, with_params: nil)
|
|
15
|
+
@triggers << ToolCallTrigger.new(name, with_params)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Trigger when agent response matches a pattern
|
|
19
|
+
# @param pattern [Regexp] Pattern to match against agent response text
|
|
20
|
+
def on_response_match(pattern)
|
|
21
|
+
@triggers << ResponseMatchTrigger.new(pattern)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Trigger when user message matches a pattern
|
|
25
|
+
# @param pattern [Regexp] Pattern to match against user message text
|
|
26
|
+
def on_user_match(pattern)
|
|
27
|
+
@triggers << UserMatchTrigger.new(pattern)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Trigger after N turns in a specific topic
|
|
31
|
+
# @param topic [Symbol] Topic name
|
|
32
|
+
# @param count [Integer] Number of turns
|
|
33
|
+
def after_turns_in(topic, count:)
|
|
34
|
+
@triggers << TurnsInTopicTrigger.new(topic, count)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Trigger on custom condition
|
|
38
|
+
# @param condition [Proc] Lambda receiving (turn, conversation)
|
|
39
|
+
def on_condition(condition)
|
|
40
|
+
@triggers << ConditionTrigger.new(condition)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Check if any trigger matches the current turn
|
|
44
|
+
# @param turn [Object] Current turn (has agent_response, user_message)
|
|
45
|
+
# @param conversation [Object] Full conversation context
|
|
46
|
+
# @return [Boolean]
|
|
47
|
+
def any_match?(turn, conversation)
|
|
48
|
+
@triggers.any? { |trigger| trigger.match?(turn, conversation) }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def empty?
|
|
52
|
+
@triggers.empty?
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def count
|
|
56
|
+
@triggers.count
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Individual trigger types
|
|
60
|
+
|
|
61
|
+
class ToolCallTrigger
|
|
62
|
+
def initialize(name, params)
|
|
63
|
+
@name = name.to_sym
|
|
64
|
+
@params = params
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def match?(turn, _conversation)
|
|
68
|
+
return false unless turn.respond_to?(:agent_response) && turn.agent_response
|
|
69
|
+
|
|
70
|
+
turn.agent_response.has_tool_call?(@name, params: @params)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
class ResponseMatchTrigger
|
|
75
|
+
def initialize(pattern)
|
|
76
|
+
@pattern = pattern
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def match?(turn, _conversation)
|
|
80
|
+
return false unless turn.respond_to?(:agent_response) && turn.agent_response
|
|
81
|
+
|
|
82
|
+
@pattern.match?(turn.agent_response.text.to_s)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
class UserMatchTrigger
|
|
87
|
+
def initialize(pattern)
|
|
88
|
+
@pattern = pattern
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def match?(turn, _conversation)
|
|
92
|
+
return false unless turn.respond_to?(:user_message)
|
|
93
|
+
|
|
94
|
+
@pattern.match?(turn.user_message.to_s)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
class TurnsInTopicTrigger
|
|
99
|
+
def initialize(topic, count)
|
|
100
|
+
@topic = topic.to_sym
|
|
101
|
+
@count = count
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def match?(_turn, conversation)
|
|
105
|
+
return false unless conversation.respond_to?(:turns_in_topic)
|
|
106
|
+
|
|
107
|
+
conversation.turns_in_topic(@topic) >= @count
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
class ConditionTrigger
|
|
112
|
+
def initialize(condition)
|
|
113
|
+
@condition = condition
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def match?(turn, conversation)
|
|
117
|
+
@condition.call(turn, conversation)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|