decision_agent 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +1060 -0
  4. data/bin/decision_agent +104 -0
  5. data/lib/decision_agent/agent.rb +147 -0
  6. data/lib/decision_agent/audit/adapter.rb +9 -0
  7. data/lib/decision_agent/audit/logger_adapter.rb +27 -0
  8. data/lib/decision_agent/audit/null_adapter.rb +8 -0
  9. data/lib/decision_agent/context.rb +42 -0
  10. data/lib/decision_agent/decision.rb +51 -0
  11. data/lib/decision_agent/dsl/condition_evaluator.rb +133 -0
  12. data/lib/decision_agent/dsl/rule_parser.rb +36 -0
  13. data/lib/decision_agent/dsl/schema_validator.rb +275 -0
  14. data/lib/decision_agent/errors.rb +62 -0
  15. data/lib/decision_agent/evaluation.rb +52 -0
  16. data/lib/decision_agent/evaluators/base.rb +15 -0
  17. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +51 -0
  18. data/lib/decision_agent/evaluators/static_evaluator.rb +31 -0
  19. data/lib/decision_agent/replay/replay.rb +147 -0
  20. data/lib/decision_agent/scoring/base.rb +19 -0
  21. data/lib/decision_agent/scoring/consensus.rb +40 -0
  22. data/lib/decision_agent/scoring/max_weight.rb +16 -0
  23. data/lib/decision_agent/scoring/threshold.rb +40 -0
  24. data/lib/decision_agent/scoring/weighted_average.rb +26 -0
  25. data/lib/decision_agent/version.rb +3 -0
  26. data/lib/decision_agent/web/public/app.js +580 -0
  27. data/lib/decision_agent/web/public/index.html +190 -0
  28. data/lib/decision_agent/web/public/styles.css +558 -0
  29. data/lib/decision_agent/web/server.rb +255 -0
  30. data/lib/decision_agent.rb +29 -0
  31. data/spec/agent_spec.rb +249 -0
  32. data/spec/api_contract_spec.rb +430 -0
  33. data/spec/audit_adapters_spec.rb +74 -0
  34. data/spec/comprehensive_edge_cases_spec.rb +1777 -0
  35. data/spec/context_spec.rb +84 -0
  36. data/spec/dsl_validation_spec.rb +648 -0
  37. data/spec/edge_cases_spec.rb +353 -0
  38. data/spec/examples/feedback_aware_evaluator_spec.rb +460 -0
  39. data/spec/json_rule_evaluator_spec.rb +587 -0
  40. data/spec/replay_edge_cases_spec.rb +699 -0
  41. data/spec/replay_spec.rb +210 -0
  42. data/spec/scoring_spec.rb +225 -0
  43. data/spec/spec_helper.rb +28 -0
  44. metadata +133 -0
@@ -0,0 +1,430 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe "DecisionAgent API Contracts" do
4
+ describe "Evaluator Interface Contract" do
5
+ let(:context) { DecisionAgent::Context.new(user: "alice", priority: "high") }
6
+ let(:feedback) { { source: "test" } }
7
+
8
+ describe "Base evaluator interface" do
9
+ it "defines evaluate(context, feedback: {}) method signature" do
10
+ evaluator = DecisionAgent::Evaluators::Base.new
11
+ expect(evaluator).to respond_to(:evaluate)
12
+
13
+ # Should accept context and optional feedback
14
+ expect { evaluator.evaluate(context) }.to raise_error(NotImplementedError)
15
+ expect { evaluator.evaluate(context, feedback: feedback) }.to raise_error(NotImplementedError)
16
+ end
17
+ end
18
+
19
+ describe "evaluate method return contract" do
20
+ context "when returning an Evaluation" do
21
+ let(:evaluator) do
22
+ DecisionAgent::Evaluators::StaticEvaluator.new(
23
+ decision: "approve",
24
+ weight: 0.8,
25
+ reason: "Test reason"
26
+ )
27
+ end
28
+
29
+ it "returns DecisionAgent::Evaluation object" do
30
+ result = evaluator.evaluate(context, feedback: feedback)
31
+ expect(result).to be_a(DecisionAgent::Evaluation)
32
+ end
33
+
34
+ it "includes required evaluator_name field" do
35
+ result = evaluator.evaluate(context, feedback: feedback)
36
+ expect(result.evaluator_name).to be_a(String)
37
+ expect(result.evaluator_name).not_to be_empty
38
+ end
39
+
40
+ it "includes required decision field" do
41
+ result = evaluator.evaluate(context, feedback: feedback)
42
+ expect(result.decision).to be_a(String)
43
+ expect(result.decision).not_to be_empty
44
+ end
45
+
46
+ it "includes required weight field (0.0-1.0)" do
47
+ result = evaluator.evaluate(context, feedback: feedback)
48
+ expect(result.weight).to be_a(Float)
49
+ expect(result.weight).to be >= 0.0
50
+ expect(result.weight).to be <= 1.0
51
+ end
52
+
53
+ it "includes required reason field" do
54
+ result = evaluator.evaluate(context, feedback: feedback)
55
+ expect(result.reason).to be_a(String)
56
+ expect(result.reason).not_to be_empty
57
+ end
58
+
59
+ it "includes metadata field (defaults to {})" do
60
+ result = evaluator.evaluate(context, feedback: feedback)
61
+ expect(result.metadata).to be_a(Hash)
62
+ end
63
+
64
+ it "records rule_id in metadata for rule-based evaluators" do
65
+ rules = {
66
+ version: "1.0",
67
+ ruleset: "test",
68
+ rules: [
69
+ {
70
+ id: "test_rule_123",
71
+ if: { field: "priority", op: "eq", value: "high" },
72
+ then: { decision: "escalate", weight: 0.9, reason: "High priority" }
73
+ }
74
+ ]
75
+ }
76
+
77
+ rule_evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
78
+ result = rule_evaluator.evaluate(context, feedback: feedback)
79
+
80
+ expect(result.metadata).to have_key(:rule_id)
81
+ expect(result.metadata[:rule_id]).to eq("test_rule_123")
82
+ end
83
+ end
84
+
85
+ context "when no decision can be made" do
86
+ it "returns nil" do
87
+ rules = {
88
+ version: "1.0",
89
+ ruleset: "test",
90
+ rules: [
91
+ {
92
+ id: "no_match",
93
+ if: { field: "priority", op: "eq", value: "impossible" },
94
+ then: { decision: "none", weight: 0.5, reason: "Won't match" }
95
+ }
96
+ ]
97
+ }
98
+
99
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
100
+ result = evaluator.evaluate(context, feedback: feedback)
101
+
102
+ expect(result).to be_nil
103
+ end
104
+ end
105
+ end
106
+
107
+ describe "weight bounds validation" do
108
+ it "rejects weight < 0.0" do
109
+ expect {
110
+ DecisionAgent::Evaluation.new(
111
+ decision: "test",
112
+ weight: -0.1,
113
+ reason: "test",
114
+ evaluator_name: "Test"
115
+ )
116
+ }.to raise_error(DecisionAgent::InvalidWeightError, /must be between 0.0 and 1.0/)
117
+ end
118
+
119
+ it "rejects weight > 1.0" do
120
+ expect {
121
+ DecisionAgent::Evaluation.new(
122
+ decision: "test",
123
+ weight: 1.1,
124
+ reason: "test",
125
+ evaluator_name: "Test"
126
+ )
127
+ }.to raise_error(DecisionAgent::InvalidWeightError, /must be between 0.0 and 1.0/)
128
+ end
129
+
130
+ it "accepts weight = 0.0" do
131
+ expect {
132
+ DecisionAgent::Evaluation.new(
133
+ decision: "test",
134
+ weight: 0.0,
135
+ reason: "test",
136
+ evaluator_name: "Test"
137
+ )
138
+ }.not_to raise_error
139
+ end
140
+
141
+ it "accepts weight = 1.0" do
142
+ expect {
143
+ DecisionAgent::Evaluation.new(
144
+ decision: "test",
145
+ weight: 1.0,
146
+ reason: "test",
147
+ evaluator_name: "Test"
148
+ )
149
+ }.not_to raise_error
150
+ end
151
+ end
152
+
153
+ describe "reason handling" do
154
+ it "converts nil reason to empty string" do
155
+ evaluation = DecisionAgent::Evaluation.new(
156
+ decision: "test",
157
+ weight: 0.5,
158
+ reason: nil,
159
+ evaluator_name: "Test"
160
+ )
161
+
162
+ expect(evaluation.reason).to eq("")
163
+ end
164
+
165
+ it "converts non-string reason to string" do
166
+ evaluation = DecisionAgent::Evaluation.new(
167
+ decision: "test",
168
+ weight: 0.5,
169
+ reason: 123,
170
+ evaluator_name: "Test"
171
+ )
172
+
173
+ expect(evaluation.reason).to eq("123")
174
+ end
175
+
176
+ it "requires reason parameter to be provided" do
177
+ expect {
178
+ DecisionAgent::Evaluation.new(
179
+ decision: "test",
180
+ weight: 0.5,
181
+ evaluator_name: "Test"
182
+ )
183
+ }.to raise_error(ArgumentError, /missing keyword.*reason/)
184
+ end
185
+ end
186
+ end
187
+
188
+ describe "Decision Object API Contract" do
189
+ let(:evaluator) do
190
+ DecisionAgent::Evaluators::StaticEvaluator.new(
191
+ decision: "approve",
192
+ weight: 0.85,
193
+ reason: "Test approval"
194
+ )
195
+ end
196
+
197
+ let(:agent) do
198
+ DecisionAgent::Agent.new(
199
+ evaluators: [evaluator],
200
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new,
201
+ audit_adapter: DecisionAgent::Audit::NullAdapter.new
202
+ )
203
+ end
204
+
205
+ let(:context) { { user: "bob", priority: "medium" } }
206
+ let(:result) { agent.decide(context: context) }
207
+
208
+ describe "standardized API" do
209
+ it "exposes decision as string" do
210
+ expect(result.decision).to be_a(String)
211
+ expect(result.decision).to eq("approve")
212
+ end
213
+
214
+ it "exposes confidence as float (0.0-1.0)" do
215
+ expect(result.confidence).to be_a(Float)
216
+ expect(result.confidence).to be >= 0.0
217
+ expect(result.confidence).to be <= 1.0
218
+ end
219
+
220
+ it "exposes evaluations as array of Evaluation objects" do
221
+ expect(result.evaluations).to be_an(Array)
222
+ expect(result.evaluations).to all(be_a(DecisionAgent::Evaluation))
223
+ expect(result.evaluations.size).to eq(1)
224
+ end
225
+
226
+ it "exposes explanations as array of strings" do
227
+ expect(result.explanations).to be_an(Array)
228
+ expect(result.explanations).to all(be_a(String))
229
+ expect(result.explanations).not_to be_empty
230
+ end
231
+
232
+ it "exposes audit_payload as fully reproducible Hash" do
233
+ expect(result.audit_payload).to be_a(Hash)
234
+ expect(result.audit_payload).to be_frozen
235
+ end
236
+ end
237
+
238
+ describe "audit_payload specification" do
239
+ it "includes timestamp field" do
240
+ expect(result.audit_payload).to have_key(:timestamp)
241
+ expect(result.audit_payload[:timestamp]).to match(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z/)
242
+ end
243
+
244
+ it "includes context field" do
245
+ expect(result.audit_payload).to have_key(:context)
246
+ expect(result.audit_payload[:context]).to eq(user: "bob", priority: "medium")
247
+ end
248
+
249
+ it "includes feedback field" do
250
+ expect(result.audit_payload).to have_key(:feedback)
251
+ end
252
+
253
+ it "includes evaluations array with full details" do
254
+ expect(result.audit_payload).to have_key(:evaluations)
255
+ expect(result.audit_payload[:evaluations]).to be_an(Array)
256
+
257
+ eval_hash = result.audit_payload[:evaluations].first
258
+ expect(eval_hash).to have_key(:decision)
259
+ expect(eval_hash).to have_key(:weight)
260
+ expect(eval_hash).to have_key(:reason)
261
+ expect(eval_hash).to have_key(:evaluator_name)
262
+ expect(eval_hash).to have_key(:metadata)
263
+ end
264
+
265
+ it "includes decision field" do
266
+ expect(result.audit_payload).to have_key(:decision)
267
+ expect(result.audit_payload[:decision]).to eq("approve")
268
+ end
269
+
270
+ it "includes confidence field" do
271
+ expect(result.audit_payload).to have_key(:confidence)
272
+ expect(result.audit_payload[:confidence]).to eq(result.confidence)
273
+ end
274
+
275
+ it "includes scoring_strategy field" do
276
+ expect(result.audit_payload).to have_key(:scoring_strategy)
277
+ expect(result.audit_payload[:scoring_strategy]).to eq("DecisionAgent::Scoring::WeightedAverage")
278
+ end
279
+
280
+ it "includes agent_version field" do
281
+ expect(result.audit_payload).to have_key(:agent_version)
282
+ expect(result.audit_payload[:agent_version]).to eq(DecisionAgent::VERSION)
283
+ end
284
+
285
+ it "includes deterministic_hash field" do
286
+ expect(result.audit_payload).to have_key(:deterministic_hash)
287
+ expect(result.audit_payload[:deterministic_hash]).to be_a(String)
288
+ expect(result.audit_payload[:deterministic_hash]).to match(/^[a-f0-9]{64}$/)
289
+ end
290
+ end
291
+
292
+ describe "deterministic hash generation" do
293
+ it "generates same hash for same input" do
294
+ result1 = agent.decide(context: context)
295
+ result2 = agent.decide(context: context)
296
+
297
+ expect(result1.audit_payload[:deterministic_hash]).to eq(result2.audit_payload[:deterministic_hash])
298
+ end
299
+
300
+ it "generates different hash for different context" do
301
+ result1 = agent.decide(context: { user: "alice" })
302
+ result2 = agent.decide(context: { user: "bob" })
303
+
304
+ expect(result1.audit_payload[:deterministic_hash]).not_to eq(result2.audit_payload[:deterministic_hash])
305
+ end
306
+
307
+ it "excludes timestamp from hash (for determinism)" do
308
+ # Two decisions with same context should have same hash despite different timestamps
309
+ result1 = agent.decide(context: context)
310
+ sleep 0.01
311
+ result2 = agent.decide(context: context)
312
+
313
+ expect(result1.audit_payload[:timestamp]).not_to eq(result2.audit_payload[:timestamp])
314
+ expect(result1.audit_payload[:deterministic_hash]).to eq(result2.audit_payload[:deterministic_hash])
315
+ end
316
+
317
+ it "excludes feedback from hash (for determinism)" do
318
+ result1 = agent.decide(context: context, feedback: { source: "test1" })
319
+ result2 = agent.decide(context: context, feedback: { source: "test2" })
320
+
321
+ expect(result1.audit_payload[:deterministic_hash]).to eq(result2.audit_payload[:deterministic_hash])
322
+ end
323
+ end
324
+
325
+ describe "confidence bounds validation" do
326
+ it "validates confidence is between 0.0 and 1.0" do
327
+ expect {
328
+ DecisionAgent::Decision.new(
329
+ decision: "test",
330
+ confidence: -0.1,
331
+ explanations: [],
332
+ evaluations: [],
333
+ audit_payload: {}
334
+ )
335
+ }.to raise_error(DecisionAgent::InvalidConfidenceError)
336
+
337
+ expect {
338
+ DecisionAgent::Decision.new(
339
+ decision: "test",
340
+ confidence: 1.1,
341
+ explanations: [],
342
+ evaluations: [],
343
+ audit_payload: {}
344
+ )
345
+ }.to raise_error(DecisionAgent::InvalidConfidenceError)
346
+ end
347
+ end
348
+ end
349
+
350
+ describe "Threshold Strategy Fallback Behavior" do
351
+ let(:evaluator) do
352
+ DecisionAgent::Evaluators::StaticEvaluator.new(
353
+ decision: "approve",
354
+ weight: 0.5, # Below threshold
355
+ reason: "Low confidence approval"
356
+ )
357
+ end
358
+
359
+ context "when no evaluation meets threshold" do
360
+ it "returns fallback_decision" do
361
+ agent = DecisionAgent::Agent.new(
362
+ evaluators: [evaluator],
363
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
364
+ threshold: 0.8,
365
+ fallback_decision: "manual_review"
366
+ )
367
+ )
368
+
369
+ result = agent.decide(context: { user: "test" })
370
+
371
+ expect(result.decision).to eq("manual_review")
372
+ end
373
+
374
+ it "sets reduced confidence for fallback (original_weight * 0.5)" do
375
+ agent = DecisionAgent::Agent.new(
376
+ evaluators: [evaluator],
377
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
378
+ threshold: 0.8,
379
+ fallback_decision: "manual_review"
380
+ )
381
+ )
382
+
383
+ result = agent.decide(context: { user: "test" })
384
+
385
+ # Threshold strategy reduces confidence by 50% when falling back
386
+ # Original weight was 0.5, so fallback confidence is 0.5 * 0.5 = 0.25
387
+ expect(result.confidence).to eq(0.25)
388
+ end
389
+
390
+ it "includes fallback explanation" do
391
+ agent = DecisionAgent::Agent.new(
392
+ evaluators: [evaluator],
393
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
394
+ threshold: 0.8,
395
+ fallback_decision: "manual_review"
396
+ )
397
+ )
398
+
399
+ result = agent.decide(context: { user: "test" })
400
+
401
+ expect(result.explanations.join(" ")).to include("manual_review")
402
+ end
403
+ end
404
+
405
+ context "when evaluation meets threshold" do
406
+ let(:high_confidence_evaluator) do
407
+ DecisionAgent::Evaluators::StaticEvaluator.new(
408
+ decision: "approve",
409
+ weight: 0.9, # Above threshold
410
+ reason: "High confidence approval"
411
+ )
412
+ end
413
+
414
+ it "returns the decision with full confidence" do
415
+ agent = DecisionAgent::Agent.new(
416
+ evaluators: [high_confidence_evaluator],
417
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
418
+ threshold: 0.8,
419
+ fallback_decision: "manual_review"
420
+ )
421
+ )
422
+
423
+ result = agent.decide(context: { user: "test" })
424
+
425
+ expect(result.decision).to eq("approve")
426
+ expect(result.confidence).to eq(0.9)
427
+ end
428
+ end
429
+ end
430
+ end
@@ -0,0 +1,74 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe "Audit Adapters" do
4
+ describe DecisionAgent::Audit::NullAdapter do
5
+ it "implements record method without side effects" do
6
+ adapter = DecisionAgent::Audit::NullAdapter.new
7
+
8
+ decision = DecisionAgent::Decision.new(
9
+ decision: "approve",
10
+ confidence: 0.8,
11
+ explanations: [],
12
+ evaluations: [],
13
+ audit_payload: {}
14
+ )
15
+
16
+ context = DecisionAgent::Context.new({ user: "alice" })
17
+
18
+ expect {
19
+ adapter.record(decision, context)
20
+ }.not_to raise_error
21
+ end
22
+ end
23
+
24
+ describe DecisionAgent::Audit::LoggerAdapter do
25
+ it "logs decision to provided logger" do
26
+ io = StringIO.new
27
+ logger = Logger.new(io)
28
+
29
+ adapter = DecisionAgent::Audit::LoggerAdapter.new(logger: logger)
30
+
31
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(decision: "approve")
32
+ agent = DecisionAgent::Agent.new(
33
+ evaluators: [evaluator],
34
+ audit_adapter: adapter
35
+ )
36
+
37
+ agent.decide(context: { user: "alice" })
38
+
39
+ log_output = io.string
40
+ expect(log_output).to include("approve")
41
+ expect(log_output).to include("alice")
42
+ end
43
+
44
+ it "uses default logger when none provided" do
45
+ adapter = DecisionAgent::Audit::LoggerAdapter.new
46
+
47
+ expect(adapter.logger).to be_a(Logger)
48
+ end
49
+
50
+ it "logs JSON format" do
51
+ io = StringIO.new
52
+ logger = Logger.new(io)
53
+ logger.formatter = proc { |severity, datetime, progname, msg| "#{msg}\n" }
54
+
55
+ adapter = DecisionAgent::Audit::LoggerAdapter.new(logger: logger)
56
+
57
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(decision: "test_decision", weight: 0.9)
58
+ agent = DecisionAgent::Agent.new(
59
+ evaluators: [evaluator],
60
+ audit_adapter: adapter
61
+ )
62
+
63
+ agent.decide(context: { key: "value" })
64
+
65
+ log_output = io.string
66
+ log_json = JSON.parse(log_output.strip)
67
+
68
+ expect(log_json["decision"]).to eq("test_decision")
69
+ expect(log_json["confidence"]).to be_a(Numeric)
70
+ expect(log_json["context"]).to eq({ "key" => "value" })
71
+ expect(log_json["audit_hash"]).to be_a(String)
72
+ end
73
+ end
74
+ end