rspec-agents 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/bin/rspec-agents +24 -0
  3. data/lib/async_workers/channel_config.rb +34 -0
  4. data/lib/async_workers/doc/process_manager_design.md +512 -0
  5. data/lib/async_workers/errors.rb +21 -0
  6. data/lib/async_workers/managed_process.rb +284 -0
  7. data/lib/async_workers/output_stream.rb +86 -0
  8. data/lib/async_workers/rpc_channel.rb +159 -0
  9. data/lib/async_workers/transport/base.rb +57 -0
  10. data/lib/async_workers/transport/stdio_transport.rb +91 -0
  11. data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
  12. data/lib/async_workers/worker_group.rb +175 -0
  13. data/lib/async_workers.rb +17 -0
  14. data/lib/rspec/agents/agent_response.rb +61 -0
  15. data/lib/rspec/agents/agents/base.rb +123 -0
  16. data/lib/rspec/agents/cli.rb +342 -0
  17. data/lib/rspec/agents/conversation.rb +308 -0
  18. data/lib/rspec/agents/criterion.rb +237 -0
  19. data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
  20. data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
  21. data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
  22. data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
  23. data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
  24. data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
  25. data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
  26. data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
  27. data/lib/rspec/agents/doc/scenario_guide.md +289 -0
  28. data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
  29. data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
  30. data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
  31. data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
  32. data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
  33. data/lib/rspec/agents/dsl/test_context.rb +223 -0
  34. data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
  35. data/lib/rspec/agents/dsl.rb +398 -0
  36. data/lib/rspec/agents/evaluation_result.rb +44 -0
  37. data/lib/rspec/agents/event_bus.rb +78 -0
  38. data/lib/rspec/agents/events.rb +141 -0
  39. data/lib/rspec/agents/isolated_event_bus.rb +86 -0
  40. data/lib/rspec/agents/judge.rb +244 -0
  41. data/lib/rspec/agents/llm/anthropic.rb +143 -0
  42. data/lib/rspec/agents/llm/base.rb +64 -0
  43. data/lib/rspec/agents/llm/mock.rb +181 -0
  44. data/lib/rspec/agents/llm/response.rb +52 -0
  45. data/lib/rspec/agents/matchers.rb +554 -0
  46. data/lib/rspec/agents/message.rb +81 -0
  47. data/lib/rspec/agents/metadata.rb +120 -0
  48. data/lib/rspec/agents/observers/base.rb +70 -0
  49. data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
  50. data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
  51. data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
  52. data/lib/rspec/agents/parallel/controller.rb +284 -0
  53. data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
  54. data/lib/rspec/agents/parallel/partitioner.rb +31 -0
  55. data/lib/rspec/agents/parallel/run_result.rb +22 -0
  56. data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
  57. data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
  58. data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
  59. data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
  60. data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
  61. data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
  62. data/lib/rspec/agents/prompt_builders/base.rb +113 -0
  63. data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
  64. data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
  65. data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
  66. data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
  67. data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
  68. data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
  69. data/lib/rspec/agents/runners/headless_runner.rb +272 -0
  70. data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
  71. data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
  72. data/lib/rspec/agents/runners/user_simulator.rb +261 -0
  73. data/lib/rspec/agents/scenario.rb +133 -0
  74. data/lib/rspec/agents/scenario_loader.rb +145 -0
  75. data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
  76. data/lib/rspec/agents/serialization/extension.rb +199 -0
  77. data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
  78. data/lib/rspec/agents/serialization/presenters.rb +281 -0
  79. data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
  80. data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
  81. data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
  82. data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
  83. data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
  84. data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
  85. data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
  86. data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
  87. data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
  88. data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
  89. data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
  90. data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
  91. data/lib/rspec/agents/serialization.rb +374 -0
  92. data/lib/rspec/agents/simulator_config.rb +336 -0
  93. data/lib/rspec/agents/spec_executor.rb +494 -0
  94. data/lib/rspec/agents/stable_example_id.rb +147 -0
  95. data/lib/rspec/agents/templates/user_simulation.erb +9 -0
  96. data/lib/rspec/agents/tool_call.rb +53 -0
  97. data/lib/rspec/agents/topic.rb +307 -0
  98. data/lib/rspec/agents/topic_graph.rb +236 -0
  99. data/lib/rspec/agents/triggers.rb +122 -0
  100. data/lib/rspec/agents/turn.rb +63 -0
  101. data/lib/rspec/agents/turn_executor.rb +91 -0
  102. data/lib/rspec/agents/version.rb +7 -0
  103. data/lib/rspec/agents.rb +145 -0
  104. metadata +242 -0
@@ -0,0 +1,181 @@
1
+ module RSpec
2
+ module Agents
3
+ module Llm
4
+ # Mock LLM adapter for deterministic testing
5
+ # Allows queuing responses and setting expected evaluation results
6
+ class Mock < Base
7
+ attr_reader :calls, :evaluation_results, :user_responses, :topic_classifications
8
+
9
+ def initialize
10
+ @calls = []
11
+ @evaluation_results = {}
12
+ @user_responses = []
13
+ @topic_classifications = []
14
+ @default_responses = {}
15
+ @response_index = 0
16
+ @topic_index = 0
17
+ end
18
+
19
+ def complete(prompt, response_format: :text, max_tokens: 1024)
20
+ @calls << { prompt: prompt, response_format: response_format, max_tokens: max_tokens }
21
+
22
+ text = generate_response(prompt, response_format)
23
+ parsed = response_format == :json ? safe_parse(text) : nil
24
+
25
+ Response.new(
26
+ text: text,
27
+ parsed: parsed,
28
+ metadata: { model: "mock", latency_ms: 0 }
29
+ )
30
+ end
31
+
32
+ def available?
33
+ true
34
+ end
35
+
36
+ def model_info
37
+ "Mock LLM"
38
+ end
39
+
40
+ # ---- Configuration helpers ----
41
+
42
+ # Set expected evaluation result for a criterion
43
+ # @param criterion [Symbol, String] Criterion name
44
+ # @param satisfied [Boolean] Whether criterion is satisfied
45
+ # @param reasoning [String, nil] Optional reasoning
46
+ def set_evaluation(criterion, satisfied, reasoning = nil)
47
+ key = criterion.to_s.downcase
48
+ @evaluation_results[key] = {
49
+ satisfied: satisfied,
50
+ reasoning: reasoning || "Mock evaluation for #{criterion}"
51
+ }
52
+ end
53
+
54
+ # Queue a user simulation response
55
+ # @param response [String] The user message to return
56
+ def queue_user_response(response)
57
+ @user_responses << response
58
+ end
59
+
60
+ # Queue a topic classification result
61
+ # @param topic [Symbol] The topic to return
62
+ def queue_topic_classification(topic)
63
+ @topic_classifications << topic.to_sym
64
+ end
65
+
66
+ # Set a default response for a prompt pattern
67
+ # @param pattern [Regexp, String] Pattern to match in prompt
68
+ # @param response [String] Response to return
69
+ def set_default_response(pattern, response)
70
+ @default_responses[pattern] = response
71
+ end
72
+
73
+ # Reset all state
74
+ def reset!
75
+ @calls.clear
76
+ @evaluation_results.clear
77
+ @user_responses.clear
78
+ @topic_classifications.clear
79
+ @default_responses.clear
80
+ @response_index = 0
81
+ @topic_index = 0
82
+ end
83
+
84
+ # Get the last prompt that was sent
85
+ def last_prompt
86
+ @calls.last&.dig(:prompt)
87
+ end
88
+
89
+ # Get all prompts that were sent
90
+ def all_prompts
91
+ @calls.map { |c| c[:prompt] }
92
+ end
93
+
94
+ private
95
+
96
+ def generate_response(prompt, response_format)
97
+ # Check for criterion evaluation prompts
98
+ if prompt.include?("satisfied") || prompt.include?("criterion")
99
+ return handle_evaluation_prompt(prompt)
100
+ end
101
+
102
+ # Check for user simulation prompts
103
+ if prompt.include?("Generate") && prompt.include?("user")
104
+ return handle_user_simulation_prompt
105
+ end
106
+
107
+ # Check for topic classification prompts
108
+ if prompt.include?("topic") && prompt.include?("classify")
109
+ return handle_topic_classification_prompt
110
+ end
111
+
112
+ # Check for grounding evaluation prompts
113
+ if prompt.include?("grounded") || prompt.include?("grounding")
114
+ return handle_grounding_prompt(prompt)
115
+ end
116
+
117
+ # Check default responses
118
+ @default_responses.each do |pattern, response|
119
+ if pattern.is_a?(Regexp) ? pattern.match?(prompt) : prompt.include?(pattern.to_s)
120
+ return response
121
+ end
122
+ end
123
+
124
+ # Generic response
125
+ response_format == :json ? '{"result": "mock response"}' : "Mock response"
126
+ end
127
+
128
+ def handle_evaluation_prompt(prompt)
129
+ # Try to match a predefined evaluation result
130
+ @evaluation_results.each do |key, result|
131
+ if prompt.downcase.include?(key)
132
+ return {
133
+ "satisfied" => result[:satisfied],
134
+ "reasoning" => result[:reasoning]
135
+ }.to_json
136
+ end
137
+ end
138
+
139
+ # Default: satisfied
140
+ {
141
+ "satisfied" => true,
142
+ "reasoning" => "Mock adapter: automatically satisfied"
143
+ }.to_json
144
+ end
145
+
146
+ def handle_user_simulation_prompt
147
+ if @response_index < @user_responses.length
148
+ response = @user_responses[@response_index]
149
+ @response_index += 1
150
+ response
151
+ else
152
+ "Mock user response #{@response_index + 1}"
153
+ end
154
+ end
155
+
156
+ def handle_topic_classification_prompt
157
+ if @topic_index < @topic_classifications.length
158
+ topic = @topic_classifications[@topic_index]
159
+ @topic_index += 1
160
+ { "topic" => topic.to_s }.to_json
161
+ else
162
+ { "topic" => "unknown" }.to_json
163
+ end
164
+ end
165
+
166
+ def handle_grounding_prompt(prompt)
167
+ {
168
+ "grounded" => true,
169
+ "violations" => []
170
+ }.to_json
171
+ end
172
+
173
+ def safe_parse(text)
174
+ JSON.parse(text)
175
+ rescue JSON::ParserError
176
+ nil
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,52 @@
1
+ module RSpec
2
+ module Agents
3
+ module Llm
4
+ # Represents a response from an LLM completion
5
+ class Response
6
+ attr_reader :text, :parsed, :metadata
7
+
8
+ # @param text [String] Raw response text
9
+ # @param parsed [Hash, nil] Parsed JSON if response_format was :json
10
+ # @param metadata [Hash] Additional metadata (model, tokens, latency, etc.)
11
+ def initialize(text:, parsed: nil, metadata: {})
12
+ @text = text
13
+ @parsed = parsed
14
+ @metadata = metadata.is_a?(Metadata) ? metadata : Metadata.new(metadata)
15
+ end
16
+
17
+ # Check if JSON parsing succeeded
18
+ #
19
+ # @return [Boolean]
20
+ def parsed?
21
+ !@parsed.nil?
22
+ end
23
+
24
+ # Check if there was a parse error
25
+ #
26
+ # @return [Boolean]
27
+ def parse_error?
28
+ @metadata[:parse_error].present?
29
+ end
30
+
31
+ # Get the parse error message if any
32
+ #
33
+ # @return [String, nil]
34
+ def parse_error
35
+ @metadata[:parse_error]
36
+ end
37
+
38
+ def to_h
39
+ {
40
+ text: @text,
41
+ parsed: @parsed,
42
+ metadata: @metadata.to_h
43
+ }
44
+ end
45
+
46
+ def inspect
47
+ "#<#{self.class.name} text=#{@text&.length || 0} chars, parsed=#{parsed?}>"
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end