decision_agent 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +1060 -0
  4. data/bin/decision_agent +104 -0
  5. data/lib/decision_agent/agent.rb +147 -0
  6. data/lib/decision_agent/audit/adapter.rb +9 -0
  7. data/lib/decision_agent/audit/logger_adapter.rb +27 -0
  8. data/lib/decision_agent/audit/null_adapter.rb +8 -0
  9. data/lib/decision_agent/context.rb +42 -0
  10. data/lib/decision_agent/decision.rb +51 -0
  11. data/lib/decision_agent/dsl/condition_evaluator.rb +133 -0
  12. data/lib/decision_agent/dsl/rule_parser.rb +36 -0
  13. data/lib/decision_agent/dsl/schema_validator.rb +275 -0
  14. data/lib/decision_agent/errors.rb +62 -0
  15. data/lib/decision_agent/evaluation.rb +52 -0
  16. data/lib/decision_agent/evaluators/base.rb +15 -0
  17. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +51 -0
  18. data/lib/decision_agent/evaluators/static_evaluator.rb +31 -0
  19. data/lib/decision_agent/replay/replay.rb +147 -0
  20. data/lib/decision_agent/scoring/base.rb +19 -0
  21. data/lib/decision_agent/scoring/consensus.rb +40 -0
  22. data/lib/decision_agent/scoring/max_weight.rb +16 -0
  23. data/lib/decision_agent/scoring/threshold.rb +40 -0
  24. data/lib/decision_agent/scoring/weighted_average.rb +26 -0
  25. data/lib/decision_agent/version.rb +3 -0
  26. data/lib/decision_agent/web/public/app.js +580 -0
  27. data/lib/decision_agent/web/public/index.html +190 -0
  28. data/lib/decision_agent/web/public/styles.css +558 -0
  29. data/lib/decision_agent/web/server.rb +255 -0
  30. data/lib/decision_agent.rb +29 -0
  31. data/spec/agent_spec.rb +249 -0
  32. data/spec/api_contract_spec.rb +430 -0
  33. data/spec/audit_adapters_spec.rb +74 -0
  34. data/spec/comprehensive_edge_cases_spec.rb +1777 -0
  35. data/spec/context_spec.rb +84 -0
  36. data/spec/dsl_validation_spec.rb +648 -0
  37. data/spec/edge_cases_spec.rb +353 -0
  38. data/spec/examples/feedback_aware_evaluator_spec.rb +460 -0
  39. data/spec/json_rule_evaluator_spec.rb +587 -0
  40. data/spec/replay_edge_cases_spec.rb +699 -0
  41. data/spec/replay_spec.rb +210 -0
  42. data/spec/scoring_spec.rb +225 -0
  43. data/spec/spec_helper.rb +28 -0
  44. metadata +133 -0
@@ -0,0 +1,460 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require_relative "../../examples/feedback_aware_evaluator"
5
+
6
+ RSpec.describe Examples::FeedbackAwareEvaluator do
7
+ describe "initialization" do
8
+ it "creates an evaluator with base decision and weight" do
9
+ evaluator = described_class.new(base_decision: "approve", base_weight: 0.8)
10
+ expect(evaluator).to be_a(DecisionAgent::Evaluators::Base)
11
+ end
12
+
13
+ it "uses default weight of 0.5 when not specified" do
14
+ evaluator = described_class.new(base_decision: "approve")
15
+ context = DecisionAgent::Context.new({})
16
+ result = evaluator.evaluate(context, feedback: {})
17
+ expect(result.weight).to eq(0.5)
18
+ end
19
+ end
20
+
21
+ describe "#evaluate with no feedback" do
22
+ let(:evaluator) { described_class.new(base_decision: "approve", base_weight: 0.8) }
23
+ let(:context) { DecisionAgent::Context.new({ user: "alice" }) }
24
+
25
+ it "returns base decision with base weight" do
26
+ result = evaluator.evaluate(context, feedback: {})
27
+
28
+ expect(result.decision).to eq("approve")
29
+ expect(result.weight).to eq(0.8)
30
+ expect(result.reason).to include("Base decision")
31
+ end
32
+
33
+ it "includes metadata about feedback presence" do
34
+ result = evaluator.evaluate(context, feedback: {})
35
+
36
+ expect(result.metadata[:base_weight]).to eq(0.8)
37
+ expect(result.metadata[:feedback_present]).to be false
38
+ expect(result.metadata[:feedback_keys]).to eq([])
39
+ end
40
+
41
+ it "tracks feedback keys when feedback is provided" do
42
+ result = evaluator.evaluate(context, feedback: { custom_key: "value" })
43
+
44
+ expect(result.metadata[:feedback_present]).to be true
45
+ expect(result.metadata[:feedback_keys]).to include(:custom_key)
46
+ end
47
+ end
48
+
49
+ describe "#evaluate with override feedback" do
50
+ let(:evaluator) { described_class.new(base_decision: "approve", base_weight: 0.8) }
51
+ let(:context) { DecisionAgent::Context.new({ user: "bob" }) }
52
+
53
+ it "overrides the base decision" do
54
+ result = evaluator.evaluate(
55
+ context,
56
+ feedback: { override: "reject" }
57
+ )
58
+
59
+ expect(result.decision).to eq("reject")
60
+ expect(result.decision).not_to eq("approve")
61
+ end
62
+
63
+ it "uses high confidence weight for manual override" do
64
+ result = evaluator.evaluate(
65
+ context,
66
+ feedback: { override: "reject" }
67
+ )
68
+
69
+ expect(result.weight).to eq(0.9)
70
+ end
71
+
72
+ it "uses provided reason from feedback" do
73
+ result = evaluator.evaluate(
74
+ context,
75
+ feedback: { override: "reject", reason: "Fraud detected" }
76
+ )
77
+
78
+ expect(result.reason).to eq("Fraud detected")
79
+ end
80
+
81
+ it "uses default reason when none provided" do
82
+ result = evaluator.evaluate(
83
+ context,
84
+ feedback: { override: "reject" }
85
+ )
86
+
87
+ expect(result.reason).to eq("Manual override from feedback")
88
+ end
89
+
90
+ it "includes override metadata" do
91
+ result = evaluator.evaluate(
92
+ context,
93
+ feedback: { override: "manual_review", reason: "Complex case" }
94
+ )
95
+
96
+ expect(result.metadata[:feedback_override]).to be true
97
+ expect(result.metadata[:original_decision]).to eq("approve")
98
+ expect(result.metadata[:override_timestamp]).to be_a(String)
99
+ end
100
+
101
+ it "takes precedence over other feedback types" do
102
+ result = evaluator.evaluate(
103
+ context,
104
+ feedback: {
105
+ override: "escalate",
106
+ past_accuracy: 0.95,
107
+ source: "expert_review"
108
+ }
109
+ )
110
+
111
+ # Override should take precedence
112
+ expect(result.decision).to eq("escalate")
113
+ expect(result.weight).to eq(0.9)
114
+ end
115
+ end
116
+
117
+ describe "#evaluate with past_accuracy feedback" do
118
+ let(:evaluator) { described_class.new(base_decision: "approve", base_weight: 0.8) }
119
+ let(:context) { DecisionAgent::Context.new({ user: "charlie" }) }
120
+
121
+ it "adjusts weight based on past accuracy" do
122
+ result = evaluator.evaluate(
123
+ context,
124
+ feedback: { past_accuracy: 0.5 }
125
+ )
126
+
127
+ # 0.8 * 0.5 = 0.4
128
+ expect(result.weight).to eq(0.4)
129
+ expect(result.decision).to eq("approve")
130
+ end
131
+
132
+ it "boosts weight with high accuracy" do
133
+ result = evaluator.evaluate(
134
+ context,
135
+ feedback: { past_accuracy: 1.0 }
136
+ )
137
+
138
+ # 0.8 * 1.0 = 0.8
139
+ expect(result.weight).to eq(0.8)
140
+ end
141
+
142
+ it "reduces weight with low accuracy" do
143
+ result = evaluator.evaluate(
144
+ context,
145
+ feedback: { past_accuracy: 0.25 }
146
+ )
147
+
148
+ # 0.8 * 0.25 = 0.2
149
+ expect(result.weight).to eq(0.2)
150
+ end
151
+
152
+ it "clamps weight to minimum 0.0" do
153
+ result = evaluator.evaluate(
154
+ context,
155
+ feedback: { past_accuracy: -1.0 } # Invalid, but should be handled
156
+ )
157
+
158
+ expect(result.weight).to be >= 0.0
159
+ end
160
+
161
+ it "clamps weight to maximum 1.0" do
162
+ result = evaluator.evaluate(
163
+ context,
164
+ feedback: { past_accuracy: 2.0 } # Would produce 1.6, should clamp to 1.0
165
+ )
166
+
167
+ expect(result.weight).to be <= 1.0
168
+ end
169
+
170
+ it "includes adjustment metadata" do
171
+ result = evaluator.evaluate(
172
+ context,
173
+ feedback: { past_accuracy: 0.95 }
174
+ )
175
+
176
+ expect(result.metadata[:base_weight]).to eq(0.8)
177
+ expect(result.metadata[:adjusted_weight]).to eq(0.76)
178
+ expect(result.metadata[:past_accuracy]).to eq(0.95)
179
+ expect(result.metadata[:adjustment_factor]).to eq(0.95)
180
+ end
181
+
182
+ it "provides descriptive reason with accuracy percentage" do
183
+ result = evaluator.evaluate(
184
+ context,
185
+ feedback: { past_accuracy: 0.95 }
186
+ )
187
+
188
+ expect(result.reason).to include("95%")
189
+ expect(result.reason).to include("past accuracy")
190
+ end
191
+ end
192
+
193
+ describe "#evaluate with source feedback" do
194
+ let(:evaluator) { described_class.new(base_decision: "approve", base_weight: 0.8) }
195
+ let(:context) { DecisionAgent::Context.new({ user: "dave" }) }
196
+
197
+ it "reduces weight for user_override source" do
198
+ result = evaluator.evaluate(
199
+ context,
200
+ feedback: { source: "user_override" }
201
+ )
202
+
203
+ # 0.8 * 0.5 = 0.4
204
+ expect(result.weight).to eq(0.4)
205
+ expect(result.metadata[:multiplier]).to eq(0.5)
206
+ end
207
+
208
+ it "maintains weight for automated_test source" do
209
+ result = evaluator.evaluate(
210
+ context,
211
+ feedback: { source: "automated_test" }
212
+ )
213
+
214
+ # 0.8 * 1.0 = 0.8
215
+ expect(result.weight).to eq(0.8)
216
+ expect(result.metadata[:multiplier]).to eq(1.0)
217
+ end
218
+
219
+ it "boosts weight for peer_review source" do
220
+ result = evaluator.evaluate(
221
+ context,
222
+ feedback: { source: "peer_review" }
223
+ )
224
+
225
+ # 0.8 * 1.1 = 0.88
226
+ expect(result.weight).to be_within(0.0001).of(0.88)
227
+ expect(result.metadata[:multiplier]).to eq(1.1)
228
+ end
229
+
230
+ it "boosts weight significantly for expert_review source" do
231
+ result = evaluator.evaluate(
232
+ context,
233
+ feedback: { source: "expert_review" }
234
+ )
235
+
236
+ # 0.8 * 1.2 = 0.96
237
+ expect(result.weight).to eq(0.96)
238
+ expect(result.metadata[:multiplier]).to eq(1.2)
239
+ end
240
+
241
+ it "uses default multiplier for unknown source" do
242
+ result = evaluator.evaluate(
243
+ context,
244
+ feedback: { source: "unknown_source" }
245
+ )
246
+
247
+ # 0.8 * 1.0 = 0.8 (default multiplier)
248
+ expect(result.weight).to eq(0.8)
249
+ expect(result.metadata[:multiplier]).to eq(1.0)
250
+ end
251
+
252
+ it "clamps boosted weight to 1.0" do
253
+ high_weight_evaluator = described_class.new(base_decision: "approve", base_weight: 0.9)
254
+
255
+ result = high_weight_evaluator.evaluate(
256
+ context,
257
+ feedback: { source: "expert_review" } # 0.9 * 1.2 = 1.08, should clamp to 1.0
258
+ )
259
+
260
+ expect(result.weight).to eq(1.0)
261
+ end
262
+
263
+ it "includes source metadata" do
264
+ result = evaluator.evaluate(
265
+ context,
266
+ feedback: { source: "peer_review" }
267
+ )
268
+
269
+ expect(result.metadata[:feedback_source]).to eq("peer_review")
270
+ expect(result.metadata[:base_weight]).to eq(0.8)
271
+ expect(result.metadata[:adjusted_weight]).to be_within(0.0001).of(0.88)
272
+ end
273
+
274
+ it "provides descriptive reason with source" do
275
+ result = evaluator.evaluate(
276
+ context,
277
+ feedback: { source: "expert_review" }
278
+ )
279
+
280
+ expect(result.reason).to include("expert_review")
281
+ expect(result.reason).to include("feedback source")
282
+ end
283
+ end
284
+
285
+ describe "feedback priority" do
286
+ let(:evaluator) { described_class.new(base_decision: "approve", base_weight: 0.8) }
287
+ let(:context) { DecisionAgent::Context.new({}) }
288
+
289
+ it "override takes precedence over past_accuracy" do
290
+ result = evaluator.evaluate(
291
+ context,
292
+ feedback: { override: "reject", past_accuracy: 0.95 }
293
+ )
294
+
295
+ expect(result.decision).to eq("reject")
296
+ expect(result.weight).to eq(0.9) # Override weight, not adjusted weight
297
+ end
298
+
299
+ it "override takes precedence over source" do
300
+ result = evaluator.evaluate(
301
+ context,
302
+ feedback: { override: "escalate", source: "expert_review" }
303
+ )
304
+
305
+ expect(result.decision).to eq("escalate")
306
+ expect(result.weight).to eq(0.9) # Override weight, not source-adjusted weight
307
+ end
308
+
309
+ it "past_accuracy takes precedence over source" do
310
+ result = evaluator.evaluate(
311
+ context,
312
+ feedback: { past_accuracy: 0.5, source: "expert_review" }
313
+ )
314
+
315
+ # Should use past_accuracy adjustment (0.8 * 0.5 = 0.4)
316
+ # Not source adjustment (0.8 * 1.2 = 0.96)
317
+ expect(result.weight).to eq(0.4)
318
+ expect(result.metadata[:past_accuracy]).to eq(0.5)
319
+ end
320
+ end
321
+
322
+ describe "integration with DecisionAgent::Agent" do
323
+ it "works as an evaluator in a full decision agent" do
324
+ evaluator = described_class.new(base_decision: "approve", base_weight: 0.8)
325
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
326
+
327
+ result = agent.decide(
328
+ context: { user_id: 123 },
329
+ feedback: {}
330
+ )
331
+
332
+ expect(result.decision).to eq("approve")
333
+ expect(result.confidence).to eq(1.0) # Single evaluator normalized to 1.0
334
+ end
335
+
336
+ it "respects feedback in agent context" do
337
+ evaluator = described_class.new(base_decision: "approve", base_weight: 0.8)
338
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
339
+
340
+ result = agent.decide(
341
+ context: { user_id: 456 },
342
+ feedback: { override: "manual_review", reason: "Sensitive action" }
343
+ )
344
+
345
+ expect(result.decision).to eq("manual_review")
346
+ expect(result.evaluations.first.reason).to eq("Sensitive action")
347
+ end
348
+
349
+ it "combines with other evaluators" do
350
+ feedback_eval = described_class.new(base_decision: "approve", base_weight: 0.8)
351
+ static_eval = DecisionAgent::Evaluators::StaticEvaluator.new(
352
+ decision: "approve",
353
+ weight: 0.6
354
+ )
355
+
356
+ agent = DecisionAgent::Agent.new(evaluators: [feedback_eval, static_eval])
357
+
358
+ result = agent.decide(
359
+ context: {},
360
+ feedback: { past_accuracy: 0.5 } # Reduces feedback_eval weight to 0.4
361
+ )
362
+
363
+ expect(result.decision).to eq("approve")
364
+ expect(result.evaluations.size).to eq(2)
365
+
366
+ # Confidence = (0.4 + 0.6) / (0.4 + 0.6) = 1.0 (both agree)
367
+ expect(result.confidence).to eq(1.0)
368
+ end
369
+
370
+ it "feedback affects only feedback-aware evaluators" do
371
+ feedback_eval = described_class.new(base_decision: "approve", base_weight: 0.8)
372
+ static_eval = DecisionAgent::Evaluators::StaticEvaluator.new(
373
+ decision: "reject",
374
+ weight: 0.9
375
+ )
376
+
377
+ agent = DecisionAgent::Agent.new(
378
+ evaluators: [feedback_eval, static_eval],
379
+ scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
380
+ )
381
+
382
+ result_no_feedback = agent.decide(context: {}, feedback: {})
383
+ # MaxWeight picks static_eval (0.9 > 0.8)
384
+ expect(result_no_feedback.decision).to eq("reject")
385
+
386
+ result_with_feedback = agent.decide(
387
+ context: {},
388
+ feedback: { past_accuracy: 0.5 } # Reduces feedback_eval to 0.4
389
+ )
390
+ # MaxWeight still picks static_eval (0.9 > 0.4)
391
+ expect(result_with_feedback.decision).to eq("reject")
392
+ end
393
+ end
394
+
395
+ describe "edge cases" do
396
+ let(:evaluator) { described_class.new(base_decision: "approve", base_weight: 0.8) }
397
+ let(:context) { DecisionAgent::Context.new({}) }
398
+
399
+ it "handles feedback with string keys" do
400
+ result = evaluator.evaluate(
401
+ context,
402
+ feedback: { "override" => "reject" } # String key instead of symbol
403
+ )
404
+
405
+ # Should not match because code expects symbols
406
+ # Falls back to base decision
407
+ expect(result.decision).to eq("approve")
408
+ expect(result.weight).to eq(0.8)
409
+ end
410
+
411
+ it "handles nil feedback gracefully" do
412
+ # Feedback defaults to {} in the signature, but test explicit nil handling
413
+ result = evaluator.evaluate(context, feedback: {})
414
+ expect(result.decision).to eq("approve")
415
+ end
416
+
417
+ it "handles empty override value" do
418
+ result = evaluator.evaluate(
419
+ context,
420
+ feedback: { override: "" }
421
+ )
422
+
423
+ # Empty string is truthy in Ruby, so override applies
424
+ expect(result.decision).to eq("")
425
+ expect(result.metadata[:feedback_override]).to be true
426
+ end
427
+
428
+ it "handles zero past_accuracy" do
429
+ result = evaluator.evaluate(
430
+ context,
431
+ feedback: { past_accuracy: 0.0 }
432
+ )
433
+
434
+ # 0.8 * 0.0 = 0.0
435
+ expect(result.weight).to eq(0.0)
436
+ end
437
+
438
+ it "handles very high past_accuracy" do
439
+ result = evaluator.evaluate(
440
+ context,
441
+ feedback: { past_accuracy: 10.0 }
442
+ )
443
+
444
+ # 0.8 * 10.0 = 8.0, clamped to 1.0
445
+ expect(result.weight).to eq(1.0)
446
+ end
447
+ end
448
+
449
+ describe "evaluator_name" do
450
+ it "returns the correct evaluator name" do
451
+ evaluator = described_class.new(base_decision: "approve", base_weight: 0.8)
452
+ context = DecisionAgent::Context.new({})
453
+
454
+ result = evaluator.evaluate(context, feedback: {})
455
+
456
+ # Base class extracts just the class name without module
457
+ expect(result.evaluator_name).to eq("FeedbackAwareEvaluator")
458
+ end
459
+ end
460
+ end