decision_agent 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/lib/decision_agent/ab_testing/ab_test.rb +197 -0
  3. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +76 -0
  4. data/lib/decision_agent/ab_testing/ab_test_manager.rb +317 -0
  5. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +152 -0
  6. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +155 -0
  7. data/lib/decision_agent/ab_testing/storage/adapter.rb +67 -0
  8. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +116 -0
  9. data/lib/decision_agent/monitoring/metrics_collector.rb +148 -3
  10. data/lib/decision_agent/monitoring/storage/activerecord_adapter.rb +253 -0
  11. data/lib/decision_agent/monitoring/storage/base_adapter.rb +90 -0
  12. data/lib/decision_agent/monitoring/storage/memory_adapter.rb +222 -0
  13. data/lib/decision_agent/version.rb +1 -1
  14. data/lib/decision_agent.rb +7 -0
  15. data/lib/generators/decision_agent/install/install_generator.rb +37 -0
  16. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +45 -0
  17. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +54 -0
  18. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +43 -0
  19. data/lib/generators/decision_agent/install/templates/ab_testing_tasks.rake +189 -0
  20. data/lib/generators/decision_agent/install/templates/decision_agent_tasks.rake +114 -0
  21. data/lib/generators/decision_agent/install/templates/decision_log.rb +57 -0
  22. data/lib/generators/decision_agent/install/templates/error_metric.rb +53 -0
  23. data/lib/generators/decision_agent/install/templates/evaluation_metric.rb +43 -0
  24. data/lib/generators/decision_agent/install/templates/monitoring_migration.rb +109 -0
  25. data/lib/generators/decision_agent/install/templates/performance_metric.rb +76 -0
  26. data/spec/ab_testing/ab_test_manager_spec.rb +330 -0
  27. data/spec/ab_testing/ab_test_spec.rb +270 -0
  28. data/spec/examples.txt +612 -548
  29. data/spec/issue_verification_spec.rb +95 -21
  30. data/spec/monitoring/metrics_collector_spec.rb +2 -2
  31. data/spec/monitoring/monitored_agent_spec.rb +1 -1
  32. data/spec/monitoring/prometheus_exporter_spec.rb +1 -1
  33. data/spec/monitoring/storage/activerecord_adapter_spec.rb +346 -0
  34. data/spec/monitoring/storage/memory_adapter_spec.rb +247 -0
  35. metadata +26 -2
@@ -254,6 +254,54 @@ RSpec.describe "Issue Verification Tests" do
254
254
  )
255
255
  end.to raise_error(ActiveRecord::RecordNotUnique)
256
256
  end
257
+
258
+ it "verifies application-level constraint for single active version (all databases)" do
259
+ # For databases that don't support partial unique indexes (like SQLite),
260
+ # the application should enforce only one active version per rule
261
+
262
+ ActiveRecord::Schema.define do
263
+ create_table :rule_versions, force: true do |t|
264
+ t.string :rule_id, null: false
265
+ t.integer :version_number, null: false
266
+ t.text :content, null: false
267
+ t.string :status, default: "active", null: false
268
+ t.timestamps
269
+ end
270
+ add_index :rule_versions, %i[rule_id version_number], unique: true
271
+ end
272
+
273
+ class TestRuleVersion6 < ActiveRecord::Base
274
+ self.table_name = "rule_versions"
275
+
276
+ # Application-level validation (works on all databases)
277
+ validate :only_one_active_per_rule, if: -> { status == "active" }
278
+
279
+ def only_one_active_per_rule
280
+ existing = self.class.where(rule_id: rule_id, status: "active")
281
+ existing = existing.where.not(id: id) if persisted?
282
+ return unless existing.exists?
283
+
284
+ errors.add(:base, "Only one active version allowed per rule")
285
+ end
286
+ end
287
+
288
+ TestRuleVersion6.create!(
289
+ rule_id: "test_rule",
290
+ version_number: 1,
291
+ content: { test: "v1" }.to_json,
292
+ status: "active"
293
+ )
294
+
295
+ # Try to create second active version - should fail with validation error
296
+ expect do
297
+ TestRuleVersion6.create!(
298
+ rule_id: "test_rule",
299
+ version_number: 2,
300
+ content: { test: "v2" }.to_json,
301
+ status: "active"
302
+ )
303
+ end.to raise_error(ActiveRecord::RecordInvalid, /Only one active version allowed/)
304
+ end
257
305
  end
258
306
  end
259
307
  end
@@ -466,7 +514,11 @@ RSpec.describe "Issue Verification Tests" do
466
514
  add_index :rule_versions, %i[rule_id version_number], unique: true
467
515
  end
468
516
 
469
- unless defined?(RuleVersion)
517
+ if defined?(RuleVersion)
518
+ # Clear existing validations if RuleVersion was defined by another spec
519
+ RuleVersion.clear_validators!
520
+ RuleVersion.reset_callbacks(:validate)
521
+ else
470
522
  class ::RuleVersion < ActiveRecord::Base
471
523
  end
472
524
  end
@@ -495,33 +547,55 @@ RSpec.describe "Issue Verification Tests" do
495
547
  end.to raise_error(DecisionAgent::ValidationError, /Invalid JSON/)
496
548
  end
497
549
 
498
- it "raises ValidationError when content is empty string" do
499
- # ActiveRecord validation prevents empty string content
500
- skip "ActiveRecord validation prevents empty string content"
550
+ it "handles empty string content in JSON parsing" do
551
+ # Even if the database allows empty strings (no NOT NULL + no validation),
552
+ # the adapter should handle it gracefully when parsing JSON
553
+ version = RuleVersion.create!(
554
+ rule_id: "test_rule",
555
+ version_number: 1,
556
+ content: "", # EMPTY STRING!
557
+ created_by: "test",
558
+ status: "active"
559
+ )
501
560
 
502
- # This test would only be relevant if the model allowed empty strings
503
- # The RuleVersion model has `validates :content, presence: true`
504
- # which rejects empty strings before record creation
561
+ # serialize_version should catch JSON parsing errors
562
+ expect do
563
+ adapter.send(:serialize_version, version)
564
+ end.to raise_error(DecisionAgent::ValidationError, /Invalid JSON/)
505
565
  end
506
566
 
507
- it "raises ValidationError when content is nil (if allowed by DB)" do
508
- # Skip this test because the schema has NOT NULL constraint on content
509
- # The database won't allow nil content to be saved in the first place
510
- skip "Schema has NOT NULL constraint on content column"
567
+ it "enforces NOT NULL constraint on content column" do
568
+ # The schema has NOT NULL constraint on content column
569
+ # The database should raise an error when trying to create with nil content
511
570
 
512
- # This test would only be relevant if the schema allowed NULL content
513
- # In that case, the serialize_version method already handles it with:
514
- # rescue TypeError, NoMethodError
515
- # raise DecisionAgent::ValidationError, "content is nil or not a string"
571
+ expect do
572
+ RuleVersion.create!(
573
+ rule_id: "test_rule",
574
+ version_number: 1,
575
+ content: nil, # NIL!
576
+ created_by: "test",
577
+ status: "active"
578
+ )
579
+ end.to raise_error(ActiveRecord::NotNullViolation)
516
580
  end
517
581
 
518
- it "raises ValidationError when content contains malformed UTF-8" do
519
- # ActiveRecord validation rejects malformed UTF-8 before record creation
520
- skip "ActiveRecord validation rejects malformed UTF-8 strings"
582
+ it "handles content with special UTF-8 characters correctly" do
583
+ # Instead of testing malformed UTF-8 (which ActiveRecord rejects),
584
+ # test that valid UTF-8 special characters are handled correctly
585
+ special_content = {
586
+ "unicode" => "Hello \u4E16\u754C",
587
+ "emoji" => "\u{1F44D}",
588
+ "special" => "\n\t\r"
589
+ }
521
590
 
522
- # This test would only be relevant if ActiveRecord allowed malformed UTF-8
523
- # In practice, ActiveRecord's blank? check fails on invalid UTF-8
524
- # which prevents the record from being created in the first place
591
+ version = adapter.create_version(
592
+ rule_id: "test_rule",
593
+ content: special_content,
594
+ metadata: { created_by: "test" }
595
+ )
596
+
597
+ loaded = adapter.get_version(version_id: version[:id])
598
+ expect(loaded[:content]).to eq(special_content)
525
599
  end
526
600
 
527
601
  it "raises ValidationError when content is truncated JSON" do
@@ -2,7 +2,7 @@ require "spec_helper"
2
2
  require "decision_agent/monitoring/metrics_collector"
3
3
 
4
4
  RSpec.describe DecisionAgent::Monitoring::MetricsCollector do
5
- let(:collector) { described_class.new(window_size: 60) }
5
+ let(:collector) { described_class.new(window_size: 60, storage: :memory) }
6
6
  let(:decision) do
7
7
  double(
8
8
  "Decision",
@@ -266,7 +266,7 @@ RSpec.describe DecisionAgent::Monitoring::MetricsCollector do
266
266
 
267
267
  describe "metric cleanup" do
268
268
  it "removes old metrics outside window" do
269
- collector = described_class.new(window_size: 1)
269
+ collector = described_class.new(window_size: 1, storage: :memory)
270
270
 
271
271
  collector.record_decision(decision, context)
272
272
  expect(collector.metrics_count[:decisions]).to eq(1)
@@ -3,7 +3,7 @@ require "decision_agent/monitoring/metrics_collector"
3
3
  require "decision_agent/monitoring/monitored_agent"
4
4
 
5
5
  RSpec.describe DecisionAgent::Monitoring::MonitoredAgent do
6
- let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new }
6
+ let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new(storage: :memory) }
7
7
  let(:evaluator) do
8
8
  double(
9
9
  "Evaluator",
@@ -3,7 +3,7 @@ require "decision_agent/monitoring/metrics_collector"
3
3
  require "decision_agent/monitoring/prometheus_exporter"
4
4
 
5
5
  RSpec.describe DecisionAgent::Monitoring::PrometheusExporter do
6
- let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new }
6
+ let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new(storage: :memory) }
7
7
  let(:exporter) { described_class.new(metrics_collector: collector, namespace: "test") }
8
8
 
9
9
  let(:decision) do
@@ -0,0 +1,346 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "active_record"
5
+ require "decision_agent/monitoring/storage/activerecord_adapter"
6
+
7
+ RSpec.describe DecisionAgent::Monitoring::Storage::ActiveRecordAdapter do
8
+ # Setup in-memory SQLite database for testing
9
+ before(:all) do
10
+ ActiveRecord::Base.establish_connection(
11
+ adapter: "sqlite3",
12
+ database: ":memory:"
13
+ )
14
+
15
+ # Create tables
16
+ ActiveRecord::Schema.define do
17
+ create_table :decision_logs, force: true do |t|
18
+ t.string :decision, null: false
19
+ t.float :confidence
20
+ t.integer :evaluations_count, default: 0
21
+ t.float :duration_ms
22
+ t.string :status
23
+ t.text :context
24
+ t.text :metadata
25
+ t.timestamps
26
+ end
27
+
28
+ create_table :evaluation_metrics, force: true do |t|
29
+ t.references :decision_log, foreign_key: true
30
+ t.string :evaluator_name, null: false
31
+ t.float :score
32
+ t.boolean :success
33
+ t.float :duration_ms
34
+ t.text :details
35
+ t.timestamps
36
+ end
37
+
38
+ create_table :performance_metrics, force: true do |t|
39
+ t.string :operation, null: false
40
+ t.float :duration_ms
41
+ t.string :status
42
+ t.text :metadata
43
+ t.timestamps
44
+ end
45
+
46
+ create_table :error_metrics, force: true do |t|
47
+ t.string :error_type, null: false
48
+ t.text :message
49
+ t.text :stack_trace
50
+ t.string :severity
51
+ t.text :context
52
+ t.timestamps
53
+ end
54
+ end
55
+
56
+ # Define models
57
+ # rubocop:disable Lint/ConstantDefinitionInBlock
58
+ class DecisionLog < ActiveRecord::Base
59
+ has_many :evaluation_metrics, dependent: :destroy
60
+
61
+ scope :recent, ->(time_range) { where("created_at >= ?", Time.now - time_range) }
62
+
63
+ def self.success_rate(time_range: 3600)
64
+ total = recent(time_range).where.not(status: nil).count
65
+ return 0.0 if total.zero?
66
+
67
+ recent(time_range).where(status: "success").count.to_f / total
68
+ end
69
+
70
+ def parsed_context
71
+ JSON.parse(context, symbolize_names: true)
72
+ rescue StandardError
73
+ {}
74
+ end
75
+ end
76
+
77
+ class EvaluationMetric < ActiveRecord::Base
78
+ belongs_to :decision_log, optional: true
79
+
80
+ scope :recent, ->(time_range) { where("created_at >= ?", Time.now - time_range) }
81
+ scope :successful, -> { where(success: true) }
82
+
83
+ def parsed_details
84
+ JSON.parse(details, symbolize_names: true)
85
+ rescue StandardError
86
+ {}
87
+ end
88
+ end
89
+
90
+ class PerformanceMetric < ActiveRecord::Base
91
+ scope :recent, ->(time_range) { where("created_at >= ?", Time.now - time_range) }
92
+
93
+ def self.average_duration(time_range: 3600)
94
+ recent(time_range).average(:duration_ms).to_f
95
+ end
96
+
97
+ def self.p50(time_range: 3600)
98
+ percentile(0.50, time_range: time_range)
99
+ end
100
+
101
+ def self.p95(time_range: 3600)
102
+ percentile(0.95, time_range: time_range)
103
+ end
104
+
105
+ def self.p99(time_range: 3600)
106
+ percentile(0.99, time_range: time_range)
107
+ end
108
+
109
+ def self.percentile(pct, time_range: 3600)
110
+ durations = recent(time_range).where.not(duration_ms: nil).order(:duration_ms).pluck(:duration_ms)
111
+ return 0.0 if durations.empty?
112
+
113
+ durations[(durations.length * pct).ceil - 1].to_f
114
+ end
115
+
116
+ def self.success_rate(time_range: 3600)
117
+ total = recent(time_range).where.not(status: nil).count
118
+ return 0.0 if total.zero?
119
+
120
+ recent(time_range).where(status: "success").count.to_f / total
121
+ end
122
+ end
123
+
124
+ class ErrorMetric < ActiveRecord::Base
125
+ scope :recent, ->(time_range) { where("created_at >= ?", Time.now - time_range) }
126
+ scope :critical, -> { where(severity: "critical") }
127
+
128
+ def parsed_context
129
+ JSON.parse(context, symbolize_names: true)
130
+ rescue StandardError
131
+ {}
132
+ end
133
+ end
134
+ # rubocop:enable Lint/ConstantDefinitionInBlock
135
+ end
136
+
137
+ before do
138
+ DecisionLog.delete_all
139
+ EvaluationMetric.delete_all
140
+ PerformanceMetric.delete_all
141
+ ErrorMetric.delete_all
142
+ end
143
+
144
+ let(:adapter) { described_class.new }
145
+
146
+ describe ".available?" do
147
+ it "returns true when ActiveRecord and models are defined" do
148
+ expect(described_class.available?).to be_truthy
149
+ end
150
+ end
151
+
152
+ describe "#record_decision" do
153
+ it "creates a decision log record" do
154
+ expect do
155
+ adapter.record_decision(
156
+ "approve_payment",
157
+ { user_id: 123, amount: 500 },
158
+ confidence: 0.85,
159
+ evaluations_count: 3,
160
+ duration_ms: 45.5,
161
+ status: "success"
162
+ )
163
+ end.to change(DecisionLog, :count).by(1)
164
+
165
+ log = DecisionLog.last
166
+ expect(log.decision).to eq("approve_payment")
167
+ expect(log.confidence).to eq(0.85)
168
+ expect(log.evaluations_count).to eq(3)
169
+ expect(log.duration_ms).to eq(45.5)
170
+ expect(log.status).to eq("success")
171
+ expect(log.parsed_context).to eq(user_id: 123, amount: 500)
172
+ end
173
+ end
174
+
175
+ describe "#record_evaluation" do
176
+ it "creates an evaluation metric record" do
177
+ expect do
178
+ adapter.record_evaluation(
179
+ "FraudDetector",
180
+ score: 0.92,
181
+ success: true,
182
+ duration_ms: 12.3,
183
+ details: { risk_level: "low" }
184
+ )
185
+ end.to change(EvaluationMetric, :count).by(1)
186
+
187
+ metric = EvaluationMetric.last
188
+ expect(metric.evaluator_name).to eq("FraudDetector")
189
+ expect(metric.score).to eq(0.92)
190
+ expect(metric.success).to be true
191
+ expect(metric.duration_ms).to eq(12.3)
192
+ expect(metric.parsed_details).to eq(risk_level: "low")
193
+ end
194
+ end
195
+
196
+ describe "#record_performance" do
197
+ it "creates a performance metric record" do
198
+ expect do
199
+ adapter.record_performance(
200
+ "api_call",
201
+ duration_ms: 250.5,
202
+ status: "success",
203
+ metadata: { endpoint: "/api/v1/users" }
204
+ )
205
+ end.to change(PerformanceMetric, :count).by(1)
206
+
207
+ metric = PerformanceMetric.last
208
+ expect(metric.operation).to eq("api_call")
209
+ expect(metric.duration_ms).to eq(250.5)
210
+ expect(metric.status).to eq("success")
211
+ end
212
+ end
213
+
214
+ describe "#record_error" do
215
+ it "creates an error metric record" do
216
+ expect do
217
+ adapter.record_error(
218
+ "RuntimeError",
219
+ message: "Something went wrong",
220
+ stack_trace: ["line 1", "line 2"],
221
+ severity: "critical",
222
+ context: { user_id: 456 }
223
+ )
224
+ end.to change(ErrorMetric, :count).by(1)
225
+
226
+ error = ErrorMetric.last
227
+ expect(error.error_type).to eq("RuntimeError")
228
+ expect(error.message).to eq("Something went wrong")
229
+ expect(error.severity).to eq("critical")
230
+ expect(error.parsed_context).to eq(user_id: 456)
231
+ end
232
+ end
233
+
234
+ describe "#statistics" do
235
+ before do
236
+ # Create test data
237
+ 3.times do |i|
238
+ adapter.record_decision(
239
+ "decision_#{i}",
240
+ { index: i },
241
+ confidence: 0.5 + (i * 0.1),
242
+ evaluations_count: 2,
243
+ duration_ms: 100 + (i * 10),
244
+ status: "success"
245
+ )
246
+ end
247
+
248
+ 2.times do |i|
249
+ adapter.record_evaluation(
250
+ "Evaluator#{i}",
251
+ score: 0.8,
252
+ success: true,
253
+ duration_ms: 50
254
+ )
255
+ end
256
+
257
+ 4.times do |i|
258
+ adapter.record_performance(
259
+ "operation_#{i}",
260
+ duration_ms: 100 + (i * 50),
261
+ status: i.even? ? "success" : "failure"
262
+ )
263
+ end
264
+
265
+ adapter.record_error("TestError", severity: "critical")
266
+ end
267
+
268
+ it "returns comprehensive statistics" do
269
+ stats = adapter.statistics(time_range: 3600)
270
+
271
+ expect(stats[:decisions][:total]).to eq(3)
272
+ expect(stats[:decisions][:average_confidence]).to be_within(0.01).of(0.6)
273
+ expect(stats[:evaluations][:total]).to eq(2)
274
+ expect(stats[:performance][:total]).to eq(4)
275
+ expect(stats[:errors][:total]).to eq(1)
276
+ expect(stats[:errors][:critical_count]).to eq(1)
277
+ end
278
+ end
279
+
280
+ describe "#time_series" do
281
+ before do
282
+ # Create metrics at different times
283
+ [10, 70, 130].each do |seconds_ago|
284
+ travel_back = Time.now - seconds_ago
285
+ DecisionLog.create!(
286
+ decision: "test",
287
+ confidence: 0.8,
288
+ created_at: travel_back
289
+ )
290
+ end
291
+ end
292
+
293
+ it "returns time series data grouped by buckets" do
294
+ series = adapter.time_series(:decisions, bucket_size: 60, time_range: 200)
295
+
296
+ expect(series[:timestamps]).to be_an(Array)
297
+ expect(series[:data]).to be_an(Array)
298
+ expect(series[:data].sum).to eq(3)
299
+ end
300
+ end
301
+
302
+ describe "#metrics_count" do
303
+ before do
304
+ adapter.record_decision("test", {}, confidence: 0.8)
305
+ adapter.record_evaluation("TestEval", score: 0.9)
306
+ adapter.record_performance("test_op", duration_ms: 100)
307
+ adapter.record_error("TestError")
308
+ end
309
+
310
+ it "returns count of all metric types" do
311
+ counts = adapter.metrics_count
312
+
313
+ expect(counts[:decisions]).to eq(1)
314
+ expect(counts[:evaluations]).to eq(1)
315
+ expect(counts[:performance]).to eq(1)
316
+ expect(counts[:errors]).to eq(1)
317
+ end
318
+ end
319
+
320
+ describe "#cleanup" do
321
+ before do
322
+ # Create old metrics
323
+ old_time = Time.now - 8.days
324
+ DecisionLog.create!(decision: "old", confidence: 0.8, created_at: old_time)
325
+ EvaluationMetric.create!(evaluator_name: "old", created_at: old_time)
326
+ PerformanceMetric.create!(operation: "old", created_at: old_time)
327
+ ErrorMetric.create!(error_type: "old", created_at: old_time)
328
+
329
+ # Create recent metrics
330
+ adapter.record_decision("recent", {}, confidence: 0.8)
331
+ adapter.record_evaluation("recent", score: 0.9)
332
+ adapter.record_performance("recent", duration_ms: 100)
333
+ adapter.record_error("recent")
334
+ end
335
+
336
+ it "removes old metrics and keeps recent ones" do
337
+ count = adapter.cleanup(older_than: 7.days.to_i)
338
+
339
+ expect(count).to eq(4) # 4 old metrics removed
340
+ expect(DecisionLog.count).to eq(1)
341
+ expect(EvaluationMetric.count).to eq(1)
342
+ expect(PerformanceMetric.count).to eq(1)
343
+ expect(ErrorMetric.count).to eq(1)
344
+ end
345
+ end
346
+ end