decision_agent 0.2.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +313 -8
  3. data/bin/decision_agent +104 -0
  4. data/lib/decision_agent/agent.rb +72 -1
  5. data/lib/decision_agent/context.rb +1 -0
  6. data/lib/decision_agent/data_enrichment/cache/memory_adapter.rb +86 -0
  7. data/lib/decision_agent/data_enrichment/cache_adapter.rb +49 -0
  8. data/lib/decision_agent/data_enrichment/circuit_breaker.rb +135 -0
  9. data/lib/decision_agent/data_enrichment/client.rb +220 -0
  10. data/lib/decision_agent/data_enrichment/config.rb +78 -0
  11. data/lib/decision_agent/data_enrichment/errors.rb +36 -0
  12. data/lib/decision_agent/decision.rb +102 -2
  13. data/lib/decision_agent/dmn/adapter.rb +135 -0
  14. data/lib/decision_agent/dmn/cache.rb +306 -0
  15. data/lib/decision_agent/dmn/decision_graph.rb +327 -0
  16. data/lib/decision_agent/dmn/decision_tree.rb +192 -0
  17. data/lib/decision_agent/dmn/errors.rb +30 -0
  18. data/lib/decision_agent/dmn/exporter.rb +217 -0
  19. data/lib/decision_agent/dmn/feel/evaluator.rb +819 -0
  20. data/lib/decision_agent/dmn/feel/functions.rb +420 -0
  21. data/lib/decision_agent/dmn/feel/parser.rb +349 -0
  22. data/lib/decision_agent/dmn/feel/simple_parser.rb +276 -0
  23. data/lib/decision_agent/dmn/feel/transformer.rb +372 -0
  24. data/lib/decision_agent/dmn/feel/types.rb +276 -0
  25. data/lib/decision_agent/dmn/importer.rb +77 -0
  26. data/lib/decision_agent/dmn/model.rb +197 -0
  27. data/lib/decision_agent/dmn/parser.rb +191 -0
  28. data/lib/decision_agent/dmn/testing.rb +333 -0
  29. data/lib/decision_agent/dmn/validator.rb +315 -0
  30. data/lib/decision_agent/dmn/versioning.rb +229 -0
  31. data/lib/decision_agent/dmn/visualizer.rb +513 -0
  32. data/lib/decision_agent/dsl/condition_evaluator.rb +984 -838
  33. data/lib/decision_agent/dsl/schema_validator.rb +53 -14
  34. data/lib/decision_agent/evaluators/dmn_evaluator.rb +308 -0
  35. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +69 -9
  36. data/lib/decision_agent/explainability/condition_trace.rb +83 -0
  37. data/lib/decision_agent/explainability/explainability_result.rb +52 -0
  38. data/lib/decision_agent/explainability/rule_trace.rb +39 -0
  39. data/lib/decision_agent/explainability/trace_collector.rb +24 -0
  40. data/lib/decision_agent/monitoring/alert_manager.rb +5 -1
  41. data/lib/decision_agent/simulation/errors.rb +18 -0
  42. data/lib/decision_agent/simulation/impact_analyzer.rb +498 -0
  43. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +635 -0
  44. data/lib/decision_agent/simulation/replay_engine.rb +486 -0
  45. data/lib/decision_agent/simulation/scenario_engine.rb +318 -0
  46. data/lib/decision_agent/simulation/scenario_library.rb +163 -0
  47. data/lib/decision_agent/simulation/shadow_test_engine.rb +287 -0
  48. data/lib/decision_agent/simulation/what_if_analyzer.rb +1002 -0
  49. data/lib/decision_agent/simulation.rb +17 -0
  50. data/lib/decision_agent/version.rb +1 -1
  51. data/lib/decision_agent/versioning/activerecord_adapter.rb +23 -8
  52. data/lib/decision_agent/web/dmn_editor.rb +426 -0
  53. data/lib/decision_agent/web/public/app.js +119 -0
  54. data/lib/decision_agent/web/public/dmn-editor.css +596 -0
  55. data/lib/decision_agent/web/public/dmn-editor.html +250 -0
  56. data/lib/decision_agent/web/public/dmn-editor.js +553 -0
  57. data/lib/decision_agent/web/public/index.html +52 -0
  58. data/lib/decision_agent/web/public/simulation.html +130 -0
  59. data/lib/decision_agent/web/public/simulation_impact.html +478 -0
  60. data/lib/decision_agent/web/public/simulation_replay.html +551 -0
  61. data/lib/decision_agent/web/public/simulation_shadow.html +546 -0
  62. data/lib/decision_agent/web/public/simulation_whatif.html +532 -0
  63. data/lib/decision_agent/web/public/styles.css +86 -0
  64. data/lib/decision_agent/web/server.rb +1059 -23
  65. data/lib/decision_agent.rb +60 -2
  66. metadata +105 -61
  67. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  68. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  69. data/spec/ab_testing/ab_test_spec.rb +0 -270
  70. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -481
  71. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  72. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  73. data/spec/activerecord_thread_safety_spec.rb +0 -553
  74. data/spec/advanced_operators_spec.rb +0 -3150
  75. data/spec/agent_spec.rb +0 -289
  76. data/spec/api_contract_spec.rb +0 -430
  77. data/spec/audit_adapters_spec.rb +0 -92
  78. data/spec/auth/access_audit_logger_spec.rb +0 -394
  79. data/spec/auth/authenticator_spec.rb +0 -112
  80. data/spec/auth/password_reset_spec.rb +0 -294
  81. data/spec/auth/permission_checker_spec.rb +0 -207
  82. data/spec/auth/permission_spec.rb +0 -73
  83. data/spec/auth/rbac_adapter_spec.rb +0 -550
  84. data/spec/auth/rbac_config_spec.rb +0 -82
  85. data/spec/auth/role_spec.rb +0 -51
  86. data/spec/auth/session_manager_spec.rb +0 -172
  87. data/spec/auth/session_spec.rb +0 -112
  88. data/spec/auth/user_spec.rb +0 -130
  89. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  90. data/spec/context_spec.rb +0 -127
  91. data/spec/decision_agent_spec.rb +0 -96
  92. data/spec/decision_spec.rb +0 -423
  93. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  94. data/spec/dsl_validation_spec.rb +0 -648
  95. data/spec/edge_cases_spec.rb +0 -353
  96. data/spec/evaluation_spec.rb +0 -364
  97. data/spec/evaluation_validator_spec.rb +0 -165
  98. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  99. data/spec/examples.txt +0 -1633
  100. data/spec/issue_verification_spec.rb +0 -759
  101. data/spec/json_rule_evaluator_spec.rb +0 -587
  102. data/spec/monitoring/alert_manager_spec.rb +0 -378
  103. data/spec/monitoring/metrics_collector_spec.rb +0 -499
  104. data/spec/monitoring/monitored_agent_spec.rb +0 -222
  105. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  106. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  107. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  108. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  109. data/spec/performance_optimizations_spec.rb +0 -486
  110. data/spec/replay_edge_cases_spec.rb +0 -699
  111. data/spec/replay_spec.rb +0 -210
  112. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  113. data/spec/scoring_spec.rb +0 -225
  114. data/spec/spec_helper.rb +0 -60
  115. data/spec/testing/batch_test_importer_spec.rb +0 -693
  116. data/spec/testing/batch_test_runner_spec.rb +0 -307
  117. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  118. data/spec/testing/test_result_comparator_spec.rb +0 -392
  119. data/spec/testing/test_scenario_spec.rb +0 -113
  120. data/spec/thread_safety_spec.rb +0 -482
  121. data/spec/thread_safety_spec.rb.broken +0 -878
  122. data/spec/versioning/adapter_spec.rb +0 -156
  123. data/spec/versioning_spec.rb +0 -1030
  124. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  125. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  126. data/spec/web_ui_rack_spec.rb +0 -1840
@@ -1,699 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "DecisionAgent::Replay Edge Cases" do
4
- describe "handling rule changes" do
5
- let(:original_rules) do
6
- {
7
- version: "1.0",
8
- ruleset: "approval",
9
- rules: [
10
- {
11
- id: "auto_approve",
12
- if: { field: "score", op: "gte", value: 80 },
13
- then: { decision: "approve", weight: 0.9, reason: "High score" }
14
- }
15
- ]
16
- }
17
- end
18
-
19
- let(:modified_rules) do
20
- {
21
- version: "2.0",
22
- ruleset: "approval",
23
- rules: [
24
- {
25
- id: "auto_approve",
26
- if: { field: "score", op: "gte", value: 90 }, # Changed threshold
27
- then: { decision: "approve", weight: 0.9, reason: "Very high score" }
28
- }
29
- ]
30
- }
31
- end
32
-
33
- it "successfully replays with strict mode when rules haven't changed" do
34
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
35
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
36
-
37
- original_result = agent.decide(context: { score: 85 })
38
-
39
- expect do
40
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
41
- end.not_to raise_error
42
- end
43
-
44
- it "detects differences in strict mode when rules have changed" do
45
- # Original decision with old rules
46
- evaluator_v1 = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
47
- agent_v1 = DecisionAgent::Agent.new(evaluators: [evaluator_v1])
48
- original_result = agent_v1.decide(context: { score: 85 })
49
-
50
- # Now the rules have changed (threshold increased from 80 to 90)
51
- # Score of 85 no longer matches, so replay should detect a difference
52
-
53
- # Replay uses the stored evaluations (not re-evaluating rules)
54
- # So it should succeed because replay uses static evaluators from the audit payload
55
- expect do
56
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
57
- end.not_to raise_error
58
-
59
- # The replayed result should match the original
60
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
61
- expect(replayed_result.decision).to eq(original_result.decision)
62
- expect(replayed_result.confidence).to eq(original_result.confidence)
63
- end
64
-
65
- it "allows evolution in non-strict mode" do
66
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
67
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
68
-
69
- original_result = agent.decide(context: { score: 85 })
70
-
71
- # In non-strict mode, differences are logged but don't raise errors
72
- expect do
73
- DecisionAgent::Replay.run(original_result.audit_payload, strict: false)
74
- end.not_to raise_error
75
- end
76
- end
77
-
78
- describe "metadata comparison" do
79
- it "preserves and replays metadata correctly" do
80
- rules = {
81
- version: "1.0",
82
- ruleset: "test",
83
- rules: [
84
- {
85
- id: "metadata_test_rule",
86
- if: { field: "user", op: "eq", value: "alice" },
87
- then: {
88
- decision: "approve",
89
- weight: 0.8,
90
- reason: "Trusted user"
91
- }
92
- }
93
- ]
94
- }
95
-
96
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
97
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
98
-
99
- original_result = agent.decide(context: { user: "alice" })
100
-
101
- # Verify metadata is in the audit payload
102
- expect(original_result.audit_payload[:evaluations].first[:metadata]).to include(
103
- rule_id: "metadata_test_rule"
104
- )
105
-
106
- # Replay should preserve metadata
107
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
108
-
109
- expect(replayed_result.evaluations.first.metadata).to eq(
110
- original_result.evaluations.first.metadata
111
- )
112
- end
113
-
114
- it "handles metadata from static evaluators" do
115
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
116
- decision: "approve",
117
- weight: 0.7,
118
- reason: "No custom metadata"
119
- )
120
-
121
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
122
- original_result = agent.decide(context: { user: "bob" })
123
-
124
- # StaticEvaluator adds type: "static" by default
125
- expect(original_result.evaluations.first.metadata).to eq({ type: "static" })
126
-
127
- expect do
128
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
129
- end.not_to raise_error
130
-
131
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
132
- expect(replayed_result.evaluations.first.metadata).to eq({ type: "static" })
133
- end
134
-
135
- it "handles complex nested metadata" do
136
- evaluation = DecisionAgent::Evaluation.new(
137
- decision: "escalate",
138
- weight: 0.85,
139
- reason: "Complex case",
140
- evaluator_name: "CustomEvaluator",
141
- metadata: {
142
- user: { id: 123, role: "admin" },
143
- tags: %w[urgent important],
144
- history: [
145
- { action: "created", timestamp: "2025-01-01" },
146
- { action: "updated", timestamp: "2025-01-02" }
147
- ]
148
- }
149
- )
150
-
151
- static_eval = DecisionAgent::Evaluators::StaticEvaluator.new(
152
- decision: evaluation.decision,
153
- weight: evaluation.weight,
154
- reason: evaluation.reason
155
- )
156
-
157
- agent = DecisionAgent::Agent.new(evaluators: [static_eval])
158
- original_result = agent.decide(context: { test: true })
159
-
160
- # Manually construct audit payload with complex metadata
161
- payload = original_result.audit_payload.dup
162
- payload[:evaluations] = [evaluation.to_h]
163
-
164
- replayed_result = DecisionAgent::Replay.run(payload, strict: false)
165
-
166
- expect(replayed_result.evaluations.first.metadata).to be_a(Hash)
167
- end
168
- end
169
-
170
- describe "handling missing evaluators in replay" do
171
- it "replays successfully even if original evaluator class doesn't exist" do
172
- # This simulates a scenario where we had a CustomEvaluator that no longer exists
173
- # but we can still replay the decision from the audit log
174
-
175
- # WeightedAverage normalizes confidence: with one eval of weight 0.9, confidence = 0.9/0.9 = 1.0
176
- # So we need to use the correct confidence value that WeightedAverage would produce
177
- audit_payload = {
178
- timestamp: "2025-01-15T10:00:00.123456Z",
179
- context: { user: "charlie", action: "login" },
180
- feedback: {},
181
- evaluations: [
182
- {
183
- decision: "allow",
184
- weight: 0.9,
185
- reason: "User authenticated successfully",
186
- evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
187
- metadata: { auth_method: "oauth", provider: "google" }
188
- }
189
- ],
190
- decision: "allow",
191
- confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
192
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
193
- agent_version: "0.1.0",
194
- deterministic_hash: "abc123"
195
- }
196
-
197
- # Replay should work because it uses StaticEvaluator, not the original evaluator
198
- expect do
199
- DecisionAgent::Replay.run(audit_payload, strict: true)
200
- end.not_to raise_error
201
-
202
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
203
-
204
- expect(replayed_result.decision).to eq("allow")
205
- expect(replayed_result.confidence).to eq(1.0)
206
- expect(replayed_result.evaluations.first.evaluator_name).to eq("DeletedCustomAuthEvaluator")
207
- end
208
-
209
- it "handles multiple evaluators where some are missing" do
210
- # WeightedAverage with two evals agreeing: confidence = (0.8 + 0.7) / (0.8 + 0.7) = 1.0
211
- audit_payload = {
212
- timestamp: "2025-01-15T10:00:00.123456Z",
213
- context: { user: "dave" },
214
- feedback: {},
215
- evaluations: [
216
- {
217
- decision: "approve",
218
- weight: 0.8,
219
- reason: "Rule matched",
220
- evaluator_name: "RuleEngine",
221
- metadata: { rule_id: "rule_123" }
222
- },
223
- {
224
- decision: "approve",
225
- weight: 0.7,
226
- reason: "ML model prediction",
227
- evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
228
- metadata: { model_version: "v2.1" }
229
- }
230
- ],
231
- decision: "approve",
232
- confidence: 1.0, # Both agree, so 100% confidence
233
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
234
- agent_version: "0.1.0",
235
- deterministic_hash: "def456"
236
- }
237
-
238
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
239
-
240
- expect(replayed_result.decision).to eq("approve")
241
- expect(replayed_result.evaluations.size).to eq(2)
242
- expect(replayed_result.evaluations.map(&:evaluator_name)).to include("NonExistentMLEvaluator")
243
- end
244
- end
245
-
246
- describe "scoring strategy evolution" do
247
- it "handles unknown scoring strategies gracefully" do
248
- audit_payload = {
249
- timestamp: "2025-01-15T10:00:00.123456Z",
250
- context: { test: true },
251
- feedback: {},
252
- evaluations: [
253
- {
254
- decision: "approve",
255
- weight: 0.9,
256
- reason: "Test",
257
- evaluator_name: "TestEvaluator",
258
- metadata: {}
259
- }
260
- ],
261
- decision: "approve",
262
- confidence: 0.9,
263
- scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
264
- agent_version: "0.1.0",
265
- deterministic_hash: "ghi789"
266
- }
267
-
268
- # Should fall back to WeightedAverage
269
- expect do
270
- DecisionAgent::Replay.run(audit_payload, strict: false)
271
- end.not_to raise_error
272
-
273
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: false)
274
- expect(replayed_result.decision).to eq("approve")
275
- end
276
-
277
- it "detects scoring strategy mismatch in strict mode" do
278
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
279
- decision: "approve",
280
- weight: 0.6,
281
- reason: "Test"
282
- )
283
-
284
- # Create decision with WeightedAverage
285
- agent_weighted = DecisionAgent::Agent.new(
286
- evaluators: [evaluator],
287
- scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
288
- )
289
-
290
- original_result = agent_weighted.decide(context: { test: true })
291
-
292
- # Replay uses the stored scoring strategy from the audit payload
293
- # So it should replay successfully
294
- expect do
295
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
296
- end.not_to raise_error
297
- end
298
- end
299
-
300
- describe "audit payload validation" do
301
- it "requires context field" do
302
- incomplete_payload = {
303
- evaluations: [],
304
- decision: "test",
305
- confidence: 0.5
306
- }
307
-
308
- expect do
309
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
310
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
311
- end
312
-
313
- it "requires evaluations field" do
314
- incomplete_payload = {
315
- context: { test: true },
316
- decision: "test",
317
- confidence: 0.5
318
- }
319
-
320
- expect do
321
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
322
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
323
- end
324
-
325
- it "requires decision field" do
326
- incomplete_payload = {
327
- context: { test: true },
328
- evaluations: [],
329
- confidence: 0.5
330
- }
331
-
332
- expect do
333
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
334
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
335
- end
336
-
337
- it "requires confidence field" do
338
- incomplete_payload = {
339
- context: { test: true },
340
- evaluations: [],
341
- decision: "test"
342
- }
343
-
344
- expect do
345
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
346
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
347
- end
348
-
349
- it "accepts both symbol and string keys" do
350
- # Use MaxWeight strategy which preserves the original weight as confidence
351
- payload_with_strings = {
352
- "timestamp" => "2025-01-15T10:00:00.123456Z",
353
- "context" => { "test" => true },
354
- "feedback" => {},
355
- "evaluations" => [
356
- {
357
- "decision" => "approve",
358
- "weight" => 0.9,
359
- "reason" => "Test",
360
- "evaluator_name" => "TestEvaluator",
361
- "metadata" => {}
362
- }
363
- ],
364
- "decision" => "approve",
365
- "confidence" => 0.9,
366
- "scoring_strategy" => "DecisionAgent::Scoring::MaxWeight"
367
- }
368
-
369
- expect do
370
- DecisionAgent::Replay.run(payload_with_strings, strict: true)
371
- end.not_to raise_error
372
- end
373
- end
374
-
375
- describe "deterministic hash verification" do
376
- it "can verify replay produced the same deterministic hash" do
377
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
378
- decision: "approve",
379
- weight: 0.8,
380
- reason: "Test"
381
- )
382
-
383
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
384
- original_result = agent.decide(context: { user: "test" })
385
-
386
- original_hash = original_result.audit_payload[:deterministic_hash]
387
-
388
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
389
- replayed_hash = replayed_result.audit_payload[:deterministic_hash]
390
-
391
- # Hashes should match because same context, evaluations, decision, confidence, and strategy
392
- expect(replayed_hash).to eq(original_hash)
393
- end
394
-
395
- it "hash changes when context changes" do
396
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
397
- decision: "approve",
398
- weight: 0.8,
399
- reason: "Test"
400
- )
401
-
402
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
403
-
404
- result1 = agent.decide(context: { user: "alice" })
405
- result2 = agent.decide(context: { user: "bob" })
406
-
407
- expect(result1.audit_payload[:deterministic_hash]).not_to eq(
408
- result2.audit_payload[:deterministic_hash]
409
- )
410
- end
411
- end
412
-
413
- describe "feedback preservation in replay" do
414
- it "preserves original feedback in replay" do
415
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
416
- decision: "approve",
417
- weight: 0.8,
418
- reason: "Test"
419
- )
420
-
421
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
422
-
423
- original_feedback = { user_id: "manager_123", source: "manual_review" }
424
- original_result = agent.decide(context: { test: true }, feedback: original_feedback)
425
-
426
- expect(original_result.audit_payload[:feedback]).to eq(original_feedback)
427
-
428
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
429
-
430
- expect(replayed_result.audit_payload[:feedback]).to eq(original_feedback)
431
- end
432
-
433
- it "handles empty feedback" do
434
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
435
- decision: "approve",
436
- weight: 0.8,
437
- reason: "Test"
438
- )
439
-
440
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
441
- original_result = agent.decide(context: { test: true })
442
-
443
- expect(original_result.audit_payload[:feedback]).to eq({})
444
-
445
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
446
- expect(replayed_result.audit_payload[:feedback]).to eq({})
447
- end
448
- end
449
-
450
- describe "version mismatch scenarios" do
451
- it "logs warning when agent_version differs in non-strict mode" do
452
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
453
- decision: "approve",
454
- weight: 0.8,
455
- reason: "Test"
456
- )
457
-
458
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
459
- original_result = agent.decide(context: { test: true })
460
-
461
- # Modify agent_version
462
- modified_payload = original_result.audit_payload.dup
463
- modified_payload[:agent_version] = "99.0.0" # Different version
464
-
465
- # Non-strict mode should log but not raise
466
- expect do
467
- DecisionAgent::Replay.run(modified_payload, strict: false)
468
- end.not_to raise_error
469
-
470
- # Should successfully replay despite version difference
471
- replayed_result = DecisionAgent::Replay.run(modified_payload, strict: false)
472
- expect(replayed_result.decision).to eq("approve")
473
- end
474
-
475
- it "accepts different agent_version in non-strict mode" do
476
- audit_payload = {
477
- timestamp: "2025-01-15T10:00:00.123456Z",
478
- context: { test: true },
479
- feedback: {},
480
- evaluations: [
481
- {
482
- decision: "approve",
483
- weight: 0.9,
484
- reason: "Test",
485
- evaluator_name: "TestEvaluator",
486
- metadata: {}
487
- }
488
- ],
489
- decision: "approve",
490
- confidence: 1.0,
491
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
492
- agent_version: "0.0.1", # Old version
493
- deterministic_hash: "old_hash"
494
- }
495
-
496
- # Should accept and replay successfully
497
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
498
- expect(result.decision).to eq("approve")
499
- end
500
-
501
- it "replays successfully in strict mode regardless of version" do
502
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
503
- decision: "approve",
504
- weight: 0.8,
505
- reason: "Test"
506
- )
507
-
508
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
509
- original_result = agent.decide(context: { test: true })
510
-
511
- # Modify agent_version
512
- modified_payload = original_result.audit_payload.dup
513
- modified_payload[:agent_version] = "2.0.0"
514
-
515
- # Strict mode should still work because version is not part of deterministic comparison
516
- # (only decision and confidence are compared in strict mode)
517
- expect do
518
- DecisionAgent::Replay.run(modified_payload, strict: true)
519
- end.not_to raise_error
520
- end
521
- end
522
-
523
- describe "corrupted audit payload scenarios" do
524
- it "handles missing deterministic_hash gracefully" do
525
- audit_payload = {
526
- timestamp: "2025-01-15T10:00:00.123456Z",
527
- context: { test: true },
528
- feedback: {},
529
- evaluations: [
530
- {
531
- decision: "approve",
532
- weight: 0.9,
533
- reason: "Test",
534
- evaluator_name: "TestEvaluator",
535
- metadata: {}
536
- }
537
- ],
538
- decision: "approve",
539
- confidence: 1.0,
540
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
541
- agent_version: "0.1.0"
542
- # deterministic_hash is missing
543
- }
544
-
545
- # Should not raise error, just creates new hash during replay
546
- expect do
547
- DecisionAgent::Replay.run(audit_payload, strict: false)
548
- end.not_to raise_error
549
-
550
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
551
- expect(result.decision).to eq("approve")
552
- expect(result.audit_payload[:deterministic_hash]).to be_a(String)
553
- end
554
-
555
- it "handles invalid deterministic_hash gracefully" do
556
- audit_payload = {
557
- timestamp: "2025-01-15T10:00:00.123456Z",
558
- context: { test: true },
559
- feedback: {},
560
- evaluations: [
561
- {
562
- decision: "approve",
563
- weight: 0.9,
564
- reason: "Test",
565
- evaluator_name: "TestEvaluator",
566
- metadata: {}
567
- }
568
- ],
569
- decision: "approve",
570
- confidence: 1.0,
571
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
572
- agent_version: "0.1.0",
573
- deterministic_hash: "corrupted_invalid_hash_12345"
574
- }
575
-
576
- # Should replay successfully, generating new hash
577
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
578
- expect(result.decision).to eq("approve")
579
- # New hash should be different from corrupted one
580
- expect(result.audit_payload[:deterministic_hash]).not_to eq("corrupted_invalid_hash_12345")
581
- end
582
-
583
- it "validates required fields before replay" do
584
- # Missing context
585
- expect do
586
- DecisionAgent::Replay.run({ decision: "test", confidence: 0.5, evaluations: [] }, strict: true)
587
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
588
-
589
- # Missing evaluations
590
- expect do
591
- DecisionAgent::Replay.run({ context: {}, decision: "test", confidence: 0.5 }, strict: true)
592
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
593
-
594
- # Missing decision
595
- expect do
596
- DecisionAgent::Replay.run({ context: {}, evaluations: [], confidence: 0.5 }, strict: true)
597
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
598
-
599
- # Missing confidence
600
- expect do
601
- DecisionAgent::Replay.run({ context: {}, evaluations: [], decision: "test" }, strict: true)
602
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
603
- end
604
-
605
- it "handles evaluation with invalid weight" do
606
- audit_payload = {
607
- timestamp: "2025-01-15T10:00:00.123456Z",
608
- context: { test: true },
609
- feedback: {},
610
- evaluations: [
611
- {
612
- decision: "approve",
613
- weight: 2.5, # Weight > 1.0, invalid
614
- reason: "Test",
615
- evaluator_name: "TestEvaluator",
616
- metadata: {}
617
- }
618
- ],
619
- decision: "approve",
620
- confidence: 1.0,
621
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage"
622
- }
623
-
624
- # Invalid weight (> 1.0) should raise error when creating Evaluation
625
- expect do
626
- DecisionAgent::Replay.run(audit_payload, strict: false)
627
- end.to raise_error(DecisionAgent::InvalidWeightError)
628
- end
629
-
630
- it "handles completely empty audit payload" do
631
- expect do
632
- DecisionAgent::Replay.run({}, strict: false)
633
- end.to raise_error(DecisionAgent::InvalidRuleDslError)
634
- end
635
-
636
- it "handles nil audit payload" do
637
- expect do
638
- DecisionAgent::Replay.run(nil, strict: false)
639
- end.to raise_error
640
- end
641
- end
642
-
643
- describe "scoring strategy class rename scenarios" do
644
- it "handles renamed scoring strategy class in non-strict mode" do
645
- audit_payload = {
646
- timestamp: "2025-01-15T10:00:00.123456Z",
647
- context: { test: true },
648
- feedback: {},
649
- evaluations: [
650
- {
651
- decision: "approve",
652
- weight: 0.9,
653
- reason: "Test",
654
- evaluator_name: "TestEvaluator",
655
- metadata: {}
656
- }
657
- ],
658
- decision: "approve",
659
- confidence: 0.9,
660
- scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
661
- agent_version: "0.1.0"
662
- }
663
-
664
- # Should fall back to default strategy (WeightedAverage)
665
- expect do
666
- DecisionAgent::Replay.run(audit_payload, strict: false)
667
- end.not_to raise_error
668
-
669
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
670
- expect(result.decision).to eq("approve")
671
- end
672
-
673
- it "handles custom scoring strategy not in current codebase" do
674
- audit_payload = {
675
- timestamp: "2025-01-15T10:00:00.123456Z",
676
- context: { test: true },
677
- feedback: {},
678
- evaluations: [
679
- {
680
- decision: "approve",
681
- weight: 0.85,
682
- reason: "Test",
683
- evaluator_name: "TestEvaluator",
684
- metadata: {}
685
- }
686
- ],
687
- decision: "approve",
688
- confidence: 0.85,
689
- scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
690
- agent_version: "0.1.0"
691
- }
692
-
693
- # Should use fallback strategy
694
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
695
- expect(result).not_to be_nil
696
- expect(result.decision).to eq("approve")
697
- end
698
- end
699
- end