decision_agent 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +212 -35
- data/bin/decision_agent +3 -8
- data/lib/decision_agent/ab_testing/ab_test.rb +197 -0
- data/lib/decision_agent/ab_testing/ab_test_assignment.rb +76 -0
- data/lib/decision_agent/ab_testing/ab_test_manager.rb +317 -0
- data/lib/decision_agent/ab_testing/ab_testing_agent.rb +152 -0
- data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +155 -0
- data/lib/decision_agent/ab_testing/storage/adapter.rb +67 -0
- data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +116 -0
- data/lib/decision_agent/agent.rb +19 -26
- data/lib/decision_agent/audit/null_adapter.rb +1 -2
- data/lib/decision_agent/decision.rb +3 -1
- data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
- data/lib/decision_agent/dsl/rule_parser.rb +4 -6
- data/lib/decision_agent/dsl/schema_validator.rb +27 -31
- data/lib/decision_agent/errors.rb +11 -8
- data/lib/decision_agent/evaluation.rb +3 -1
- data/lib/decision_agent/evaluation_validator.rb +78 -0
- data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
- data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
- data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
- data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
- data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
- data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
- data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
- data/lib/decision_agent/monitoring/metrics_collector.rb +423 -0
- data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
- data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
- data/lib/decision_agent/monitoring/storage/activerecord_adapter.rb +253 -0
- data/lib/decision_agent/monitoring/storage/base_adapter.rb +90 -0
- data/lib/decision_agent/monitoring/storage/memory_adapter.rb +222 -0
- data/lib/decision_agent/replay/replay.rb +12 -22
- data/lib/decision_agent/scoring/base.rb +1 -1
- data/lib/decision_agent/scoring/consensus.rb +5 -5
- data/lib/decision_agent/scoring/weighted_average.rb +1 -1
- data/lib/decision_agent/version.rb +1 -1
- data/lib/decision_agent/versioning/activerecord_adapter.rb +69 -33
- data/lib/decision_agent/versioning/adapter.rb +1 -3
- data/lib/decision_agent/versioning/file_storage_adapter.rb +143 -35
- data/lib/decision_agent/versioning/version_manager.rb +4 -12
- data/lib/decision_agent/web/public/index.html +1 -1
- data/lib/decision_agent/web/server.rb +19 -24
- data/lib/decision_agent.rb +14 -0
- data/lib/generators/decision_agent/install/install_generator.rb +42 -5
- data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +45 -0
- data/lib/generators/decision_agent/install/templates/ab_test_model.rb +54 -0
- data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +43 -0
- data/lib/generators/decision_agent/install/templates/ab_testing_tasks.rake +189 -0
- data/lib/generators/decision_agent/install/templates/decision_agent_tasks.rake +114 -0
- data/lib/generators/decision_agent/install/templates/decision_log.rb +57 -0
- data/lib/generators/decision_agent/install/templates/error_metric.rb +53 -0
- data/lib/generators/decision_agent/install/templates/evaluation_metric.rb +43 -0
- data/lib/generators/decision_agent/install/templates/migration.rb +17 -6
- data/lib/generators/decision_agent/install/templates/monitoring_migration.rb +109 -0
- data/lib/generators/decision_agent/install/templates/performance_metric.rb +76 -0
- data/lib/generators/decision_agent/install/templates/rule.rb +3 -3
- data/lib/generators/decision_agent/install/templates/rule_version.rb +13 -7
- data/spec/ab_testing/ab_test_manager_spec.rb +330 -0
- data/spec/ab_testing/ab_test_spec.rb +270 -0
- data/spec/activerecord_thread_safety_spec.rb +553 -0
- data/spec/agent_spec.rb +13 -13
- data/spec/api_contract_spec.rb +16 -16
- data/spec/audit_adapters_spec.rb +3 -3
- data/spec/comprehensive_edge_cases_spec.rb +86 -86
- data/spec/dsl_validation_spec.rb +83 -83
- data/spec/edge_cases_spec.rb +23 -23
- data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
- data/spec/examples.txt +612 -0
- data/spec/issue_verification_spec.rb +759 -0
- data/spec/json_rule_evaluator_spec.rb +15 -15
- data/spec/monitoring/alert_manager_spec.rb +378 -0
- data/spec/monitoring/metrics_collector_spec.rb +281 -0
- data/spec/monitoring/monitored_agent_spec.rb +222 -0
- data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
- data/spec/monitoring/storage/activerecord_adapter_spec.rb +346 -0
- data/spec/monitoring/storage/memory_adapter_spec.rb +247 -0
- data/spec/replay_edge_cases_spec.rb +58 -58
- data/spec/replay_spec.rb +11 -11
- data/spec/rfc8785_canonicalization_spec.rb +215 -0
- data/spec/scoring_spec.rb +1 -1
- data/spec/spec_helper.rb +9 -0
- data/spec/thread_safety_spec.rb +482 -0
- data/spec/thread_safety_spec.rb.broken +878 -0
- data/spec/versioning_spec.rb +141 -37
- data/spec/web_ui_rack_spec.rb +135 -0
- metadata +93 -6
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
require "sinatra/base"
|
|
2
|
+
require "json"
|
|
3
|
+
|
|
4
|
+
# Faye/WebSocket is optional for real-time features
|
|
5
|
+
begin
|
|
6
|
+
require "faye/websocket"
|
|
7
|
+
WEBSOCKET_AVAILABLE = true
|
|
8
|
+
rescue LoadError
|
|
9
|
+
WEBSOCKET_AVAILABLE = false
|
|
10
|
+
warn "Warning: faye-websocket gem not found. Real-time dashboard features will be disabled."
|
|
11
|
+
warn "Install with: gem install faye-websocket"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
module DecisionAgent
|
|
15
|
+
module Monitoring
|
|
16
|
+
# Real-time monitoring dashboard server
|
|
17
|
+
class DashboardServer < Sinatra::Base
|
|
18
|
+
set :public_folder, File.expand_path("dashboard/public", __dir__)
|
|
19
|
+
set :views, File.expand_path("dashboard/views", __dir__)
|
|
20
|
+
set :bind, "0.0.0.0"
|
|
21
|
+
set :port, 4568
|
|
22
|
+
set :server, :puma
|
|
23
|
+
|
|
24
|
+
# Enable CORS
|
|
25
|
+
before do
|
|
26
|
+
headers["Access-Control-Allow-Origin"] = "*"
|
|
27
|
+
headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE, OPTIONS"
|
|
28
|
+
headers["Access-Control-Allow-Headers"] = "Content-Type"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
options "*" do
|
|
32
|
+
200
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Class-level configuration
|
|
36
|
+
class << self
|
|
37
|
+
attr_accessor :metrics_collector, :prometheus_exporter, :alert_manager
|
|
38
|
+
attr_reader :websocket_clients
|
|
39
|
+
|
|
40
|
+
def configure_monitoring(metrics_collector:, prometheus_exporter:, alert_manager:)
|
|
41
|
+
@metrics_collector = metrics_collector
|
|
42
|
+
@prometheus_exporter = prometheus_exporter
|
|
43
|
+
@alert_manager = alert_manager
|
|
44
|
+
@websocket_clients = []
|
|
45
|
+
|
|
46
|
+
setup_real_time_updates
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def setup_real_time_updates
|
|
50
|
+
# Register observer for real-time metric updates
|
|
51
|
+
@metrics_collector.add_observer do |event_type, metric|
|
|
52
|
+
broadcast_to_clients({
|
|
53
|
+
type: "metric_update",
|
|
54
|
+
event: event_type,
|
|
55
|
+
data: metric,
|
|
56
|
+
timestamp: Time.now.utc.iso8601
|
|
57
|
+
})
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Register alert handler
|
|
61
|
+
@alert_manager.add_handler do |alert|
|
|
62
|
+
broadcast_to_clients({
|
|
63
|
+
type: "alert",
|
|
64
|
+
data: alert,
|
|
65
|
+
timestamp: Time.now.utc.iso8601
|
|
66
|
+
})
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def broadcast_to_clients(message)
|
|
71
|
+
return unless WEBSOCKET_AVAILABLE
|
|
72
|
+
|
|
73
|
+
json_message = message.to_json
|
|
74
|
+
@websocket_clients.each do |client|
|
|
75
|
+
client.send(json_message) if client.ready_state == Faye::WebSocket::API::OPEN
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
warn "WebSocket send failed: #{e.message}"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def add_websocket_client(ws)
|
|
82
|
+
@websocket_clients << ws
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def remove_websocket_client(ws)
|
|
86
|
+
@websocket_clients.delete(ws)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Main dashboard page
|
|
91
|
+
get "/" do
|
|
92
|
+
send_file File.join(settings.public_folder, "index.html")
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# WebSocket endpoint for real-time updates
|
|
96
|
+
get "/ws" do
|
|
97
|
+
halt 503, { error: "WebSocket support not available. Install faye-websocket gem." }.to_json unless WEBSOCKET_AVAILABLE
|
|
98
|
+
|
|
99
|
+
if Faye::WebSocket.websocket?(request.env)
|
|
100
|
+
ws = Faye::WebSocket.new(request.env)
|
|
101
|
+
|
|
102
|
+
ws.on :open do |_event|
|
|
103
|
+
self.class.add_websocket_client(ws)
|
|
104
|
+
|
|
105
|
+
# Send initial state
|
|
106
|
+
ws.send({
|
|
107
|
+
type: "connected",
|
|
108
|
+
message: "Connected to DecisionAgent monitoring",
|
|
109
|
+
timestamp: Time.now.utc.iso8601
|
|
110
|
+
}.to_json)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
ws.on :message do |event|
|
|
114
|
+
# Handle client messages
|
|
115
|
+
handle_websocket_message(ws, event.data)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
ws.on :close do |_event|
|
|
119
|
+
self.class.remove_websocket_client(ws)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
ws.rack_response
|
|
123
|
+
else
|
|
124
|
+
status 426
|
|
125
|
+
{ error: "WebSocket connection required" }.to_json
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# API: Get current statistics
|
|
130
|
+
get "/api/stats" do
|
|
131
|
+
content_type :json
|
|
132
|
+
|
|
133
|
+
time_range = params[:time_range]&.to_i
|
|
134
|
+
stats = self.class.metrics_collector.statistics(time_range: time_range)
|
|
135
|
+
|
|
136
|
+
stats.to_json
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# API: Get time series data
|
|
140
|
+
get "/api/timeseries/:metric_type" do
|
|
141
|
+
content_type :json
|
|
142
|
+
|
|
143
|
+
metric_type = params[:metric_type].to_sym
|
|
144
|
+
bucket_size = (params[:bucket_size] || 60).to_i
|
|
145
|
+
time_range = (params[:time_range] || 3600).to_i
|
|
146
|
+
|
|
147
|
+
data = self.class.metrics_collector.time_series(
|
|
148
|
+
metric_type: metric_type,
|
|
149
|
+
bucket_size: bucket_size,
|
|
150
|
+
time_range: time_range
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
data.to_json
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# API: Prometheus metrics endpoint
|
|
157
|
+
get "/metrics" do
|
|
158
|
+
content_type PrometheusExporter::CONTENT_TYPE
|
|
159
|
+
self.class.prometheus_exporter.export
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# API: Get Prometheus metrics in JSON format
|
|
163
|
+
get "/api/metrics" do
|
|
164
|
+
content_type :json
|
|
165
|
+
self.class.prometheus_exporter.metrics_hash.to_json
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# API: Register custom KPI
|
|
169
|
+
post "/api/kpi" do
|
|
170
|
+
content_type :json
|
|
171
|
+
|
|
172
|
+
begin
|
|
173
|
+
data = JSON.parse(request.body.read, symbolize_names: true)
|
|
174
|
+
|
|
175
|
+
self.class.prometheus_exporter.register_kpi(
|
|
176
|
+
name: data[:name],
|
|
177
|
+
value: data[:value],
|
|
178
|
+
labels: data[:labels] || {},
|
|
179
|
+
help: data[:help]
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
{ success: true, message: "KPI registered" }.to_json
|
|
183
|
+
rescue StandardError => e
|
|
184
|
+
status 400
|
|
185
|
+
{ error: e.message }.to_json
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# API: Get active alerts
|
|
190
|
+
get "/api/alerts" do
|
|
191
|
+
content_type :json
|
|
192
|
+
self.class.alert_manager.active_alerts.to_json
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# API: Get all alerts
|
|
196
|
+
get "/api/alerts/all" do
|
|
197
|
+
content_type :json
|
|
198
|
+
limit = (params[:limit] || 100).to_i
|
|
199
|
+
self.class.alert_manager.all_alerts(limit: limit).to_json
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# API: Create alert rule
|
|
203
|
+
post "/api/alerts/rules" do
|
|
204
|
+
content_type :json
|
|
205
|
+
|
|
206
|
+
begin
|
|
207
|
+
data = JSON.parse(request.body.read, symbolize_names: true)
|
|
208
|
+
|
|
209
|
+
# Parse condition
|
|
210
|
+
condition = parse_alert_condition(data[:condition], data[:condition_type])
|
|
211
|
+
|
|
212
|
+
rule = self.class.alert_manager.add_rule(
|
|
213
|
+
name: data[:name],
|
|
214
|
+
condition: condition,
|
|
215
|
+
severity: (data[:severity] || :warning).to_sym,
|
|
216
|
+
threshold: data[:threshold],
|
|
217
|
+
message: data[:message],
|
|
218
|
+
cooldown: data[:cooldown] || 300
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
status 201
|
|
222
|
+
rule.to_json
|
|
223
|
+
rescue StandardError => e
|
|
224
|
+
status 400
|
|
225
|
+
{ error: e.message }.to_json
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# API: Toggle alert rule
|
|
230
|
+
put "/api/alerts/rules/:rule_id/toggle" do
|
|
231
|
+
content_type :json
|
|
232
|
+
|
|
233
|
+
begin
|
|
234
|
+
data = JSON.parse(request.body.read, symbolize_names: true)
|
|
235
|
+
enabled = data[:enabled] || false
|
|
236
|
+
|
|
237
|
+
self.class.alert_manager.toggle_rule(params[:rule_id], enabled)
|
|
238
|
+
|
|
239
|
+
{ success: true, message: "Rule #{enabled ? 'enabled' : 'disabled'}" }.to_json
|
|
240
|
+
rescue StandardError => e
|
|
241
|
+
status 400
|
|
242
|
+
{ error: e.message }.to_json
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# API: Acknowledge alert
|
|
247
|
+
post "/api/alerts/:alert_id/acknowledge" do
|
|
248
|
+
content_type :json
|
|
249
|
+
|
|
250
|
+
begin
|
|
251
|
+
data = JSON.parse(request.body.read, symbolize_names: true)
|
|
252
|
+
acknowledged_by = data[:acknowledged_by] || "user"
|
|
253
|
+
|
|
254
|
+
self.class.alert_manager.acknowledge_alert(params[:alert_id], acknowledged_by: acknowledged_by)
|
|
255
|
+
|
|
256
|
+
{ success: true, message: "Alert acknowledged" }.to_json
|
|
257
|
+
rescue StandardError => e
|
|
258
|
+
status 400
|
|
259
|
+
{ error: e.message }.to_json
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# API: Resolve alert
|
|
264
|
+
post "/api/alerts/:alert_id/resolve" do
|
|
265
|
+
content_type :json
|
|
266
|
+
|
|
267
|
+
begin
|
|
268
|
+
data = JSON.parse(request.body.read, symbolize_names: true)
|
|
269
|
+
resolved_by = data[:resolved_by] || "user"
|
|
270
|
+
|
|
271
|
+
self.class.alert_manager.resolve_alert(params[:alert_id], resolved_by: resolved_by)
|
|
272
|
+
|
|
273
|
+
{ success: true, message: "Alert resolved" }.to_json
|
|
274
|
+
rescue StandardError => e
|
|
275
|
+
status 400
|
|
276
|
+
{ error: e.message }.to_json
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Health check
|
|
281
|
+
get "/health" do
|
|
282
|
+
content_type :json
|
|
283
|
+
{
|
|
284
|
+
status: "ok",
|
|
285
|
+
version: DecisionAgent::VERSION,
|
|
286
|
+
websocket_clients: self.class.websocket_clients.size,
|
|
287
|
+
metrics_count: self.class.metrics_collector.metrics_count
|
|
288
|
+
}.to_json
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Class method to start the server
|
|
292
|
+
def self.start!(metrics_collector:, prometheus_exporter:, alert_manager:, port: 4568, host: "0.0.0.0")
|
|
293
|
+
configure_monitoring(
|
|
294
|
+
metrics_collector: metrics_collector,
|
|
295
|
+
prometheus_exporter: prometheus_exporter,
|
|
296
|
+
alert_manager: alert_manager
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
set :port, port
|
|
300
|
+
set :bind, host
|
|
301
|
+
run!
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
private
|
|
305
|
+
|
|
306
|
+
def handle_websocket_message(ws, data)
|
|
307
|
+
message = JSON.parse(data, symbolize_names: true)
|
|
308
|
+
|
|
309
|
+
case message[:action]
|
|
310
|
+
when "subscribe"
|
|
311
|
+
# Send current stats
|
|
312
|
+
stats = self.class.metrics_collector.statistics
|
|
313
|
+
ws.send({ type: "stats", data: stats }.to_json)
|
|
314
|
+
when "get_alerts"
|
|
315
|
+
alerts = self.class.alert_manager.active_alerts
|
|
316
|
+
ws.send({ type: "alerts", data: alerts }.to_json)
|
|
317
|
+
end
|
|
318
|
+
rescue StandardError => e
|
|
319
|
+
ws.send({ type: "error", message: e.message }.to_json)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def parse_alert_condition(condition_data, condition_type)
|
|
323
|
+
case condition_type
|
|
324
|
+
when "high_error_rate"
|
|
325
|
+
AlertManager.high_error_rate(threshold: condition_data[:threshold] || 0.1)
|
|
326
|
+
when "low_confidence"
|
|
327
|
+
AlertManager.low_confidence(threshold: condition_data[:threshold] || 0.5)
|
|
328
|
+
when "high_latency"
|
|
329
|
+
AlertManager.high_latency(threshold_ms: condition_data[:threshold_ms] || 1000)
|
|
330
|
+
when "error_spike"
|
|
331
|
+
AlertManager.error_spike(threshold: condition_data[:threshold] || 10)
|
|
332
|
+
when "custom"
|
|
333
|
+
condition_data
|
|
334
|
+
else
|
|
335
|
+
raise "Unknown condition type: #{condition_type}"
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|