decision_agent 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +84 -233
- data/lib/decision_agent/ab_testing/ab_testing_agent.rb +46 -10
- data/lib/decision_agent/agent.rb +5 -3
- data/lib/decision_agent/auth/access_audit_logger.rb +122 -0
- data/lib/decision_agent/auth/authenticator.rb +127 -0
- data/lib/decision_agent/auth/password_reset_manager.rb +57 -0
- data/lib/decision_agent/auth/password_reset_token.rb +33 -0
- data/lib/decision_agent/auth/permission.rb +29 -0
- data/lib/decision_agent/auth/permission_checker.rb +43 -0
- data/lib/decision_agent/auth/rbac_adapter.rb +278 -0
- data/lib/decision_agent/auth/rbac_config.rb +51 -0
- data/lib/decision_agent/auth/role.rb +56 -0
- data/lib/decision_agent/auth/session.rb +33 -0
- data/lib/decision_agent/auth/session_manager.rb +57 -0
- data/lib/decision_agent/auth/user.rb +70 -0
- data/lib/decision_agent/context.rb +24 -4
- data/lib/decision_agent/decision.rb +10 -3
- data/lib/decision_agent/dsl/condition_evaluator.rb +378 -1
- data/lib/decision_agent/dsl/schema_validator.rb +8 -1
- data/lib/decision_agent/errors.rb +38 -0
- data/lib/decision_agent/evaluation.rb +10 -3
- data/lib/decision_agent/evaluation_validator.rb +8 -13
- data/lib/decision_agent/monitoring/dashboard_server.rb +1 -0
- data/lib/decision_agent/monitoring/metrics_collector.rb +17 -5
- data/lib/decision_agent/testing/batch_test_importer.rb +373 -0
- data/lib/decision_agent/testing/batch_test_runner.rb +244 -0
- data/lib/decision_agent/testing/test_coverage_analyzer.rb +191 -0
- data/lib/decision_agent/testing/test_result_comparator.rb +235 -0
- data/lib/decision_agent/testing/test_scenario.rb +42 -0
- data/lib/decision_agent/version.rb +10 -1
- data/lib/decision_agent/versioning/activerecord_adapter.rb +1 -1
- data/lib/decision_agent/versioning/file_storage_adapter.rb +96 -28
- data/lib/decision_agent/web/middleware/auth_middleware.rb +45 -0
- data/lib/decision_agent/web/middleware/permission_middleware.rb +94 -0
- data/lib/decision_agent/web/public/app.js +184 -29
- data/lib/decision_agent/web/public/batch_testing.html +640 -0
- data/lib/decision_agent/web/public/index.html +37 -9
- data/lib/decision_agent/web/public/login.html +298 -0
- data/lib/decision_agent/web/public/users.html +679 -0
- data/lib/decision_agent/web/server.rb +873 -7
- data/lib/decision_agent.rb +52 -0
- data/lib/generators/decision_agent/install/templates/rule_version.rb +1 -1
- data/spec/ab_testing/ab_test_assignment_spec.rb +253 -0
- data/spec/ab_testing/ab_test_manager_spec.rb +282 -0
- data/spec/ab_testing/ab_testing_agent_spec.rb +481 -0
- data/spec/ab_testing/storage/adapter_spec.rb +64 -0
- data/spec/ab_testing/storage/memory_adapter_spec.rb +485 -0
- data/spec/advanced_operators_spec.rb +1003 -0
- data/spec/agent_spec.rb +40 -0
- data/spec/audit_adapters_spec.rb +18 -0
- data/spec/auth/access_audit_logger_spec.rb +394 -0
- data/spec/auth/authenticator_spec.rb +112 -0
- data/spec/auth/password_reset_spec.rb +294 -0
- data/spec/auth/permission_checker_spec.rb +207 -0
- data/spec/auth/permission_spec.rb +73 -0
- data/spec/auth/rbac_adapter_spec.rb +550 -0
- data/spec/auth/rbac_config_spec.rb +82 -0
- data/spec/auth/role_spec.rb +51 -0
- data/spec/auth/session_manager_spec.rb +172 -0
- data/spec/auth/session_spec.rb +112 -0
- data/spec/auth/user_spec.rb +130 -0
- data/spec/context_spec.rb +43 -0
- data/spec/decision_agent_spec.rb +96 -0
- data/spec/decision_spec.rb +423 -0
- data/spec/dsl/condition_evaluator_spec.rb +774 -0
- data/spec/evaluation_spec.rb +364 -0
- data/spec/evaluation_validator_spec.rb +165 -0
- data/spec/examples.txt +1542 -612
- data/spec/monitoring/metrics_collector_spec.rb +220 -2
- data/spec/monitoring/storage/activerecord_adapter_spec.rb +153 -1
- data/spec/monitoring/storage/base_adapter_spec.rb +61 -0
- data/spec/performance_optimizations_spec.rb +486 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/testing/batch_test_importer_spec.rb +693 -0
- data/spec/testing/batch_test_runner_spec.rb +307 -0
- data/spec/testing/test_coverage_analyzer_spec.rb +292 -0
- data/spec/testing/test_result_comparator_spec.rb +392 -0
- data/spec/testing/test_scenario_spec.rb +113 -0
- data/spec/versioning/adapter_spec.rb +156 -0
- data/spec/versioning_spec.rb +253 -0
- data/spec/web/middleware/auth_middleware_spec.rb +133 -0
- data/spec/web/middleware/permission_middleware_spec.rb +247 -0
- data/spec/web_ui_rack_spec.rb +1705 -0
- metadata +99 -6
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
require "tempfile"
|
|
3
|
+
|
|
4
|
+
RSpec.describe DecisionAgent::Testing::BatchTestRunner do
|
|
5
|
+
let(:evaluator) { DecisionAgent::Evaluators::StaticEvaluator.new(decision: "approve", weight: 1.0) }
|
|
6
|
+
let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
|
|
7
|
+
let(:runner) { DecisionAgent::Testing::BatchTestRunner.new(agent) }
|
|
8
|
+
|
|
9
|
+
describe "#initialize" do
|
|
10
|
+
it "creates a batch test runner with an agent" do
|
|
11
|
+
expect(runner.agent).to eq(agent)
|
|
12
|
+
expect(runner.results).to eq([])
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe "#run" do
|
|
17
|
+
let(:scenarios) do
|
|
18
|
+
[
|
|
19
|
+
DecisionAgent::Testing::TestScenario.new(
|
|
20
|
+
id: "test_1",
|
|
21
|
+
context: { user_id: 123 }
|
|
22
|
+
),
|
|
23
|
+
DecisionAgent::Testing::TestScenario.new(
|
|
24
|
+
id: "test_2",
|
|
25
|
+
context: { user_id: 456 }
|
|
26
|
+
)
|
|
27
|
+
]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "executes test scenarios sequentially" do
|
|
31
|
+
results = runner.run(scenarios, parallel: false)
|
|
32
|
+
|
|
33
|
+
expect(results.size).to eq(2)
|
|
34
|
+
expect(results.all?(&:success?)).to be true
|
|
35
|
+
expect(results[0].decision).to eq("approve")
|
|
36
|
+
expect(results[1].decision).to eq("approve")
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "executes test scenarios in parallel when enabled" do
|
|
40
|
+
results = runner.run(scenarios, parallel: true, thread_count: 2)
|
|
41
|
+
|
|
42
|
+
expect(results.size).to eq(2)
|
|
43
|
+
expect(results.all?(&:success?)).to be true
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "executes single scenario sequentially even with parallel enabled" do
|
|
47
|
+
single_scenario = [scenarios[0]]
|
|
48
|
+
results = runner.run(single_scenario, parallel: true)
|
|
49
|
+
|
|
50
|
+
expect(results.size).to eq(1)
|
|
51
|
+
expect(results.all?(&:success?)).to be true
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "handles empty scenarios array" do
|
|
55
|
+
results = runner.run([])
|
|
56
|
+
expect(results).to eq([])
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "calls progress callback during execution" do
|
|
60
|
+
progress_updates = []
|
|
61
|
+
|
|
62
|
+
runner.run(scenarios, progress_callback: lambda { |progress|
|
|
63
|
+
progress_updates << progress
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
expect(progress_updates.size).to be >= 2
|
|
67
|
+
expect(progress_updates.last[:completed]).to eq(2)
|
|
68
|
+
expect(progress_updates.last[:total]).to eq(2)
|
|
69
|
+
expect(progress_updates.last[:percentage]).to eq(100.0)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it "tracks execution time for each scenario" do
|
|
73
|
+
results = runner.run(scenarios)
|
|
74
|
+
|
|
75
|
+
results.each do |result|
|
|
76
|
+
expect(result.execution_time_ms).to be >= 0
|
|
77
|
+
expect(result.execution_time_ms).to be_a(Numeric)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "handles errors gracefully" do
|
|
82
|
+
# Create an agent that will raise an error
|
|
83
|
+
error_evaluator = Class.new do
|
|
84
|
+
def evaluate(_context, _feedback: {})
|
|
85
|
+
raise StandardError, "Test error"
|
|
86
|
+
end
|
|
87
|
+
end.new
|
|
88
|
+
|
|
89
|
+
error_agent = DecisionAgent::Agent.new(evaluators: [error_evaluator])
|
|
90
|
+
error_runner = DecisionAgent::Testing::BatchTestRunner.new(error_agent)
|
|
91
|
+
|
|
92
|
+
results = error_runner.run(scenarios)
|
|
93
|
+
|
|
94
|
+
expect(results.size).to eq(2)
|
|
95
|
+
expect(results.none?(&:success?)).to be true
|
|
96
|
+
expect(results[0].error).to be_a(StandardError)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
it "passes feedback to agent" do
|
|
100
|
+
feedback = { source: "batch_test" }
|
|
101
|
+
results = runner.run(scenarios, feedback: feedback)
|
|
102
|
+
|
|
103
|
+
expect(results.size).to eq(2)
|
|
104
|
+
expect(results.all?(&:success?)).to be true
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
describe "#statistics" do
|
|
109
|
+
it "returns empty hash when no results" do
|
|
110
|
+
expect(runner.statistics).to eq({})
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it "calculates statistics from results" do
|
|
114
|
+
scenarios = [
|
|
115
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_1", context: { user_id: 123 }),
|
|
116
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_2", context: { user_id: 456 })
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
runner.run(scenarios)
|
|
120
|
+
stats = runner.statistics
|
|
121
|
+
|
|
122
|
+
expect(stats[:total]).to eq(2)
|
|
123
|
+
expect(stats[:successful]).to eq(2)
|
|
124
|
+
expect(stats[:failed]).to eq(0)
|
|
125
|
+
expect(stats[:success_rate]).to eq(1.0)
|
|
126
|
+
expect(stats[:avg_execution_time_ms]).to be >= 0
|
|
127
|
+
expect(stats[:min_execution_time_ms]).to be >= 0
|
|
128
|
+
expect(stats[:max_execution_time_ms]).to be >= 0
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it "handles nil execution times in statistics" do
|
|
132
|
+
scenarios = [
|
|
133
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_1", context: { user_id: 123 })
|
|
134
|
+
]
|
|
135
|
+
runner.run(scenarios)
|
|
136
|
+
|
|
137
|
+
# Manually add a result with nil execution time
|
|
138
|
+
runner.instance_variable_get(:@results) << DecisionAgent::Testing::TestResult.new(
|
|
139
|
+
scenario_id: "test_2",
|
|
140
|
+
execution_time_ms: nil
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
stats = runner.statistics
|
|
144
|
+
expect(stats[:total]).to eq(2)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
describe "#resume" do
|
|
149
|
+
let(:scenarios) do
|
|
150
|
+
[
|
|
151
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_1", context: { user_id: 123 }),
|
|
152
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_2", context: { user_id: 456 })
|
|
153
|
+
]
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
it "resumes from checkpoint file" do
|
|
157
|
+
checkpoint_file = Tempfile.new(["checkpoint", ".json"])
|
|
158
|
+
checkpoint_file.write(JSON.pretty_generate({ completed_scenario_ids: ["test_1"], last_updated: Time.now.to_i }))
|
|
159
|
+
checkpoint_file.close
|
|
160
|
+
|
|
161
|
+
results = runner.resume(scenarios, checkpoint_file.path)
|
|
162
|
+
|
|
163
|
+
# Should only run test_2 since test_1 is already completed
|
|
164
|
+
expect(results.size).to eq(1)
|
|
165
|
+
expect(results[0].scenario_id).to eq("test_2")
|
|
166
|
+
|
|
167
|
+
checkpoint_file.unlink
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
describe "checkpoint functionality" do
|
|
172
|
+
let(:scenarios) do
|
|
173
|
+
[
|
|
174
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_1", context: { user_id: 123 }),
|
|
175
|
+
DecisionAgent::Testing::TestScenario.new(id: "test_2", context: { user_id: 456 })
|
|
176
|
+
]
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it "saves checkpoints during execution" do
|
|
180
|
+
checkpoint_file = Tempfile.new(["checkpoint", ".json"])
|
|
181
|
+
checkpoint_file.close
|
|
182
|
+
File.delete(checkpoint_file.path) # Start with no file
|
|
183
|
+
|
|
184
|
+
runner.run(scenarios, checkpoint_file: checkpoint_file.path)
|
|
185
|
+
|
|
186
|
+
# Checkpoint file should exist and contain completed scenario IDs
|
|
187
|
+
expect(File.exist?(checkpoint_file.path)).to be false # Should be cleaned up after completion
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
it "handles checkpoint file errors gracefully" do
|
|
191
|
+
checkpoint_file = Tempfile.new(["checkpoint", ".json"])
|
|
192
|
+
checkpoint_file.close
|
|
193
|
+
|
|
194
|
+
# Make file read-only to cause write errors
|
|
195
|
+
File.chmod(0o444, checkpoint_file.path)
|
|
196
|
+
|
|
197
|
+
expect do
|
|
198
|
+
runner.run(scenarios, checkpoint_file: checkpoint_file.path)
|
|
199
|
+
end.not_to raise_error
|
|
200
|
+
|
|
201
|
+
# Clean up - file might have been deleted, so check first
|
|
202
|
+
if File.exist?(checkpoint_file.path)
|
|
203
|
+
File.chmod(0o644, checkpoint_file.path)
|
|
204
|
+
checkpoint_file.unlink
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
it "loads checkpoint data correctly" do
|
|
209
|
+
checkpoint_file = Tempfile.new(["checkpoint", ".json"])
|
|
210
|
+
checkpoint_data = {
|
|
211
|
+
completed_scenario_ids: ["test_1"],
|
|
212
|
+
last_updated: Time.now.to_i
|
|
213
|
+
}
|
|
214
|
+
checkpoint_file.write(JSON.pretty_generate(checkpoint_data))
|
|
215
|
+
checkpoint_file.close
|
|
216
|
+
|
|
217
|
+
results = runner.run(scenarios, checkpoint_file: checkpoint_file.path)
|
|
218
|
+
|
|
219
|
+
# Should only execute test_2
|
|
220
|
+
expect(results.size).to eq(1)
|
|
221
|
+
expect(results[0].scenario_id).to eq("test_2")
|
|
222
|
+
|
|
223
|
+
checkpoint_file.unlink
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
it "handles invalid JSON in checkpoint file" do
|
|
227
|
+
checkpoint_file = Tempfile.new(["checkpoint", ".json"])
|
|
228
|
+
checkpoint_file.write("invalid json")
|
|
229
|
+
checkpoint_file.close
|
|
230
|
+
|
|
231
|
+
# Should handle gracefully and start fresh
|
|
232
|
+
results = runner.run(scenarios, checkpoint_file: checkpoint_file.path)
|
|
233
|
+
expect(results.size).to eq(2)
|
|
234
|
+
|
|
235
|
+
checkpoint_file.unlink
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
describe "TestResult" do
|
|
240
|
+
let(:result) do
|
|
241
|
+
DecisionAgent::Testing::TestResult.new(
|
|
242
|
+
scenario_id: "test_1",
|
|
243
|
+
decision: "approve",
|
|
244
|
+
confidence: 0.95,
|
|
245
|
+
execution_time_ms: 10.5
|
|
246
|
+
)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
it "creates a successful test result" do
|
|
250
|
+
expect(result.success?).to be true
|
|
251
|
+
expect(result.scenario_id).to eq("test_1")
|
|
252
|
+
expect(result.decision).to eq("approve")
|
|
253
|
+
expect(result.confidence).to eq(0.95)
|
|
254
|
+
expect(result.execution_time_ms).to eq(10.5)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
it "creates a failed test result" do
|
|
258
|
+
error = StandardError.new("Test error")
|
|
259
|
+
failed_result = DecisionAgent::Testing::TestResult.new(
|
|
260
|
+
scenario_id: "test_1",
|
|
261
|
+
error: error
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
expect(failed_result.success?).to be false
|
|
265
|
+
expect(failed_result.error).to eq(error)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
it "converts to hash" do
|
|
269
|
+
hash = result.to_h
|
|
270
|
+
|
|
271
|
+
expect(hash[:scenario_id]).to eq("test_1")
|
|
272
|
+
expect(hash[:decision]).to eq("approve")
|
|
273
|
+
expect(hash[:confidence]).to eq(0.95)
|
|
274
|
+
expect(hash[:execution_time_ms]).to eq(10.5)
|
|
275
|
+
expect(hash[:success]).to be true
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
it "includes evaluations in hash" do
|
|
279
|
+
evaluation = DecisionAgent::Evaluation.new(
|
|
280
|
+
decision: "approve",
|
|
281
|
+
weight: 1.0,
|
|
282
|
+
reason: "Test",
|
|
283
|
+
evaluator_name: "TestEvaluator"
|
|
284
|
+
)
|
|
285
|
+
result_with_eval = DecisionAgent::Testing::TestResult.new(
|
|
286
|
+
scenario_id: "test_1",
|
|
287
|
+
decision: "approve",
|
|
288
|
+
evaluations: [evaluation]
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
hash = result_with_eval.to_h
|
|
292
|
+
expect(hash[:evaluations]).to be_an(Array)
|
|
293
|
+
expect(hash[:evaluations].first).to respond_to(:to_h)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
it "handles nil decision and confidence" do
|
|
297
|
+
result = DecisionAgent::Testing::TestResult.new(
|
|
298
|
+
scenario_id: "test_1",
|
|
299
|
+
decision: nil,
|
|
300
|
+
confidence: nil
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
expect(result.decision).to be_nil
|
|
304
|
+
expect(result.confidence).to be_nil
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
end
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
|
|
3
|
+
RSpec.describe DecisionAgent::Testing::TestCoverageAnalyzer do
|
|
4
|
+
let(:analyzer) { DecisionAgent::Testing::TestCoverageAnalyzer.new }
|
|
5
|
+
|
|
6
|
+
describe "#analyze" do
|
|
7
|
+
let(:evaluator) do
|
|
8
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(decision: "approve", weight: 1.0)
|
|
9
|
+
# Add metadata to simulate rule_id
|
|
10
|
+
allow(evaluator).to receive(:evaluate).and_wrap_original do |method, context, **kwargs|
|
|
11
|
+
evaluation = method.call(context, **kwargs)
|
|
12
|
+
# Create a new evaluation with metadata
|
|
13
|
+
DecisionAgent::Evaluation.new(
|
|
14
|
+
decision: evaluation.decision,
|
|
15
|
+
weight: evaluation.weight,
|
|
16
|
+
reason: evaluation.reason,
|
|
17
|
+
evaluator_name: evaluation.evaluator_name,
|
|
18
|
+
metadata: { rule_id: "rule_1", condition_id: "condition_1" }
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
evaluator
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
|
|
25
|
+
|
|
26
|
+
let(:results) do
|
|
27
|
+
[
|
|
28
|
+
DecisionAgent::Testing::TestResult.new(
|
|
29
|
+
scenario_id: "test_1",
|
|
30
|
+
decision: "approve",
|
|
31
|
+
confidence: 0.95,
|
|
32
|
+
evaluations: [
|
|
33
|
+
DecisionAgent::Evaluation.new(
|
|
34
|
+
decision: "approve",
|
|
35
|
+
weight: 1.0,
|
|
36
|
+
reason: "Test",
|
|
37
|
+
evaluator_name: "TestEvaluator",
|
|
38
|
+
metadata: { rule_id: "rule_1", condition_id: "condition_1" }
|
|
39
|
+
)
|
|
40
|
+
]
|
|
41
|
+
),
|
|
42
|
+
DecisionAgent::Testing::TestResult.new(
|
|
43
|
+
scenario_id: "test_2",
|
|
44
|
+
decision: "approve",
|
|
45
|
+
confidence: 0.90,
|
|
46
|
+
evaluations: [
|
|
47
|
+
DecisionAgent::Evaluation.new(
|
|
48
|
+
decision: "approve",
|
|
49
|
+
weight: 1.0,
|
|
50
|
+
reason: "Test",
|
|
51
|
+
evaluator_name: "TestEvaluator",
|
|
52
|
+
metadata: { rule_id: "rule_2", condition_id: "condition_2" }
|
|
53
|
+
)
|
|
54
|
+
]
|
|
55
|
+
)
|
|
56
|
+
]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "analyzes coverage from test results" do
|
|
60
|
+
report = analyzer.analyze(results, agent)
|
|
61
|
+
|
|
62
|
+
expect(report).to be_a(DecisionAgent::Testing::CoverageReport)
|
|
63
|
+
expect(report.covered_rules).to be >= 0
|
|
64
|
+
# Coverage percentage should be capped at 1.0
|
|
65
|
+
expect(report.coverage_percentage).to be <= 1.0
|
|
66
|
+
expect(report.coverage_percentage).to be >= 0.0
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "tracks executed rules" do
|
|
70
|
+
report = analyzer.analyze(results, agent)
|
|
71
|
+
|
|
72
|
+
# Should have tracked at least some rules
|
|
73
|
+
expect(report.rule_coverage).to be_an(Array)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
it "identifies untested rules when agent is provided" do
|
|
77
|
+
# Create agent with multiple evaluators
|
|
78
|
+
evaluator1 = DecisionAgent::Evaluators::StaticEvaluator.new(decision: "approve", weight: 1.0)
|
|
79
|
+
evaluator2 = DecisionAgent::Evaluators::StaticEvaluator.new(decision: "reject", weight: 1.0)
|
|
80
|
+
|
|
81
|
+
multi_agent = DecisionAgent::Agent.new(evaluators: [evaluator1, evaluator2])
|
|
82
|
+
|
|
83
|
+
# Results only exercise one rule
|
|
84
|
+
single_result = [
|
|
85
|
+
DecisionAgent::Testing::TestResult.new(
|
|
86
|
+
scenario_id: "test_1",
|
|
87
|
+
decision: "approve",
|
|
88
|
+
confidence: 0.95,
|
|
89
|
+
evaluations: [
|
|
90
|
+
DecisionAgent::Evaluation.new(
|
|
91
|
+
decision: "approve",
|
|
92
|
+
weight: 1.0,
|
|
93
|
+
reason: "Test",
|
|
94
|
+
evaluator_name: evaluator1.class.name,
|
|
95
|
+
metadata: { rule_id: "rule_1" }
|
|
96
|
+
)
|
|
97
|
+
]
|
|
98
|
+
)
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
report = analyzer.analyze(single_result, multi_agent)
|
|
102
|
+
|
|
103
|
+
expect(report.total_rules).to be >= 1
|
|
104
|
+
expect(report.coverage_percentage).to be <= 1.0
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it "handles agent with evaluators that have rules method" do
|
|
108
|
+
evaluator_with_rules = double("EvaluatorWithRules",
|
|
109
|
+
evaluate: nil,
|
|
110
|
+
rules: %w[rule_1 rule_2])
|
|
111
|
+
agent_with_rules = DecisionAgent::Agent.new(evaluators: [evaluator_with_rules])
|
|
112
|
+
|
|
113
|
+
report = analyzer.analyze([], agent_with_rules)
|
|
114
|
+
expect(report.total_rules).to be >= 2
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "handles agent with evaluators that have rule_id method" do
|
|
118
|
+
evaluator_with_rule_id = double("EvaluatorWithRuleId",
|
|
119
|
+
evaluate: nil,
|
|
120
|
+
rule_id: "my_rule_1")
|
|
121
|
+
agent_with_rule_id = DecisionAgent::Agent.new(evaluators: [evaluator_with_rule_id])
|
|
122
|
+
|
|
123
|
+
report = analyzer.analyze([], agent_with_rule_id)
|
|
124
|
+
expect(report.total_rules).to be >= 1
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
it "handles agent with evaluators that have conditions method" do
|
|
128
|
+
evaluator_with_conditions = double("EvaluatorWithConditions",
|
|
129
|
+
evaluate: nil,
|
|
130
|
+
conditions: %w[condition_1 condition_2])
|
|
131
|
+
agent_with_conditions = DecisionAgent::Agent.new(evaluators: [evaluator_with_conditions])
|
|
132
|
+
|
|
133
|
+
report = analyzer.analyze([], agent_with_conditions)
|
|
134
|
+
expect(report.condition_coverage).to be_an(Array)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
it "handles agent with evaluators that have condition_id method" do
|
|
138
|
+
evaluator_with_condition_id = double("EvaluatorWithConditionId",
|
|
139
|
+
evaluate: nil,
|
|
140
|
+
condition_id: "my_condition_1")
|
|
141
|
+
agent_with_condition_id = DecisionAgent::Agent.new(evaluators: [evaluator_with_condition_id])
|
|
142
|
+
|
|
143
|
+
report = analyzer.analyze([], agent_with_condition_id)
|
|
144
|
+
expect(report.condition_coverage).to be_an(Array)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
it "caps coverage percentage at 1.0" do
|
|
148
|
+
# Create a scenario where coverage could exceed 1.0
|
|
149
|
+
evaluator1 = DecisionAgent::Evaluators::StaticEvaluator.new(decision: "approve", weight: 1.0)
|
|
150
|
+
single_agent = DecisionAgent::Agent.new(evaluators: [evaluator1])
|
|
151
|
+
|
|
152
|
+
results_with_metadata = [
|
|
153
|
+
DecisionAgent::Testing::TestResult.new(
|
|
154
|
+
scenario_id: "test_1",
|
|
155
|
+
decision: "approve",
|
|
156
|
+
evaluations: [
|
|
157
|
+
DecisionAgent::Evaluation.new(
|
|
158
|
+
decision: "approve",
|
|
159
|
+
weight: 1.0,
|
|
160
|
+
reason: "Test",
|
|
161
|
+
evaluator_name: evaluator1.class.name,
|
|
162
|
+
metadata: { rule_id: "rule_1" }
|
|
163
|
+
)
|
|
164
|
+
]
|
|
165
|
+
)
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
report = analyzer.analyze(results_with_metadata, single_agent)
|
|
169
|
+
expect(report.coverage_percentage).to be <= 1.0
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
it "handles results without agent" do
|
|
173
|
+
report = analyzer.analyze(results, nil)
|
|
174
|
+
|
|
175
|
+
expect(report).to be_a(DecisionAgent::Testing::CoverageReport)
|
|
176
|
+
expect(report.covered_rules).to be >= 0
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it "handles empty results" do
|
|
180
|
+
report = analyzer.analyze([], agent)
|
|
181
|
+
|
|
182
|
+
expect(report.covered_rules).to eq(0)
|
|
183
|
+
expect(report.coverage_percentage).to eq(0.0)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it "handles failed test results" do
|
|
187
|
+
failed_results = [
|
|
188
|
+
DecisionAgent::Testing::TestResult.new(
|
|
189
|
+
scenario_id: "test_1",
|
|
190
|
+
error: StandardError.new("Test failed")
|
|
191
|
+
)
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
report = analyzer.analyze(failed_results, agent)
|
|
195
|
+
|
|
196
|
+
expect(report.covered_rules).to eq(0)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
it "handles evaluations without metadata" do
|
|
200
|
+
results_no_metadata = [
|
|
201
|
+
DecisionAgent::Testing::TestResult.new(
|
|
202
|
+
scenario_id: "test_1",
|
|
203
|
+
decision: "approve",
|
|
204
|
+
evaluations: [
|
|
205
|
+
DecisionAgent::Evaluation.new(
|
|
206
|
+
decision: "approve",
|
|
207
|
+
weight: 1.0,
|
|
208
|
+
reason: "Test",
|
|
209
|
+
evaluator_name: "TestEvaluator"
|
|
210
|
+
# No metadata
|
|
211
|
+
)
|
|
212
|
+
]
|
|
213
|
+
)
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
report = analyzer.analyze(results_no_metadata, agent)
|
|
217
|
+
expect(report).to be_a(DecisionAgent::Testing::CoverageReport)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
it "handles evaluations with evaluator_name as rule identifier" do
|
|
221
|
+
results_eval_name = [
|
|
222
|
+
DecisionAgent::Testing::TestResult.new(
|
|
223
|
+
scenario_id: "test_1",
|
|
224
|
+
decision: "approve",
|
|
225
|
+
evaluations: [
|
|
226
|
+
DecisionAgent::Evaluation.new(
|
|
227
|
+
decision: "approve",
|
|
228
|
+
weight: 1.0,
|
|
229
|
+
reason: "Test",
|
|
230
|
+
evaluator_name: "MyEvaluator"
|
|
231
|
+
# No metadata, should use evaluator_name
|
|
232
|
+
)
|
|
233
|
+
]
|
|
234
|
+
)
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
report = analyzer.analyze(results_eval_name, agent)
|
|
238
|
+
expect(report.covered_rules).to be >= 0
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
describe "#coverage_percentage" do
|
|
243
|
+
it "returns 0.0 when no rules executed" do
|
|
244
|
+
expect(analyzer.coverage_percentage).to eq(0.0)
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
it "returns 0.0 when rule_evaluation_count is empty" do
|
|
248
|
+
analyzer.instance_variable_set(:@executed_rules, Set.new(["rule_1"]))
|
|
249
|
+
analyzer.instance_variable_set(:@rule_evaluation_count, {})
|
|
250
|
+
|
|
251
|
+
expect(analyzer.coverage_percentage).to eq(0.0)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
describe "CoverageReport" do
|
|
256
|
+
let(:report) do
|
|
257
|
+
DecisionAgent::Testing::CoverageReport.new(
|
|
258
|
+
total_rules: 10,
|
|
259
|
+
covered_rules: 7,
|
|
260
|
+
untested_rules: %w[rule_8 rule_9 rule_10],
|
|
261
|
+
coverage_percentage: 0.7,
|
|
262
|
+
rule_coverage: [
|
|
263
|
+
{ rule_id: "rule_1", covered: true, execution_count: 5 }
|
|
264
|
+
],
|
|
265
|
+
condition_coverage: [
|
|
266
|
+
{ condition_id: "condition_1", covered: true, execution_count: 3 }
|
|
267
|
+
]
|
|
268
|
+
)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
it "creates a coverage report" do
|
|
272
|
+
expect(report.total_rules).to eq(10)
|
|
273
|
+
expect(report.covered_rules).to eq(7)
|
|
274
|
+
expect(report.untested_rules).to eq(%w[rule_8 rule_9 rule_10])
|
|
275
|
+
expect(report.coverage_percentage).to eq(0.7)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
it "converts to hash" do
|
|
279
|
+
hash = report.to_h
|
|
280
|
+
|
|
281
|
+
expect(hash[:total_rules]).to eq(10)
|
|
282
|
+
expect(hash[:covered_rules]).to eq(7)
|
|
283
|
+
expect(hash[:coverage_percentage]).to eq(0.7)
|
|
284
|
+
expect(hash[:rule_coverage]).to be_an(Array)
|
|
285
|
+
expect(hash[:condition_coverage]).to be_an(Array)
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
it "freezes the report" do
|
|
289
|
+
expect(report.frozen?).to be true
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
end
|