activematrix 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +96 -28
- data/app/models/active_matrix/agent.rb +36 -1
- data/app/models/active_matrix/agent_store.rb +29 -0
- data/app/models/active_matrix/application_record.rb +8 -0
- data/app/models/active_matrix/chat_session.rb +29 -0
- data/app/models/active_matrix/knowledge_base.rb +26 -0
- data/exe/activematrix +7 -0
- data/lib/active_matrix/agent_manager.rb +160 -121
- data/lib/active_matrix/agent_registry.rb +25 -21
- data/lib/active_matrix/api.rb +8 -2
- data/lib/active_matrix/async_query.rb +58 -0
- data/lib/active_matrix/bot/base.rb +3 -3
- data/lib/active_matrix/bot/builtin_commands.rb +188 -0
- data/lib/active_matrix/bot/command_parser.rb +175 -0
- data/lib/active_matrix/cli.rb +273 -0
- data/lib/active_matrix/client.rb +21 -6
- data/lib/active_matrix/client_pool.rb +38 -27
- data/lib/active_matrix/daemon/probe_server.rb +118 -0
- data/lib/active_matrix/daemon/signal_handler.rb +156 -0
- data/lib/active_matrix/daemon/worker.rb +109 -0
- data/lib/active_matrix/daemon.rb +236 -0
- data/lib/active_matrix/engine.rb +7 -3
- data/lib/active_matrix/errors.rb +1 -1
- data/lib/active_matrix/event_router.rb +61 -49
- data/lib/active_matrix/events.rb +1 -0
- data/lib/active_matrix/instrumentation.rb +148 -0
- data/lib/active_matrix/memory/agent_memory.rb +7 -21
- data/lib/active_matrix/memory/conversation_memory.rb +4 -20
- data/lib/active_matrix/memory/global_memory.rb +15 -30
- data/lib/active_matrix/message_dispatcher.rb +197 -0
- data/lib/active_matrix/metrics.rb +424 -0
- data/lib/active_matrix/presence_manager.rb +181 -0
- data/lib/active_matrix/telemetry.rb +134 -0
- data/lib/active_matrix/version.rb +1 -1
- data/lib/active_matrix.rb +12 -2
- data/lib/generators/active_matrix/install/install_generator.rb +3 -15
- data/lib/generators/active_matrix/install/templates/README +5 -2
- data/lib/generators/active_matrix/install/templates/active_matrix.yml +32 -0
- metadata +142 -45
- data/lib/active_matrix/protocols/cs/message_relationships.rb +0 -318
- data/lib/generators/active_matrix/install/templates/create_agent_memories.rb +0 -17
- data/lib/generators/active_matrix/install/templates/create_conversation_contexts.rb +0 -21
- data/lib/generators/active_matrix/install/templates/create_global_memories.rb +0 -20
- data/lib/generators/active_matrix/install/templates/create_matrix_agents.rb +0 -26
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
require 'singleton'
|
|
5
|
+
|
|
6
|
+
module ActiveMatrix
|
|
7
|
+
# Metrics collection for Matrix agent operations
|
|
8
|
+
# Provides structured metrics that can be exported to monitoring systems
|
|
9
|
+
#
|
|
10
|
+
# @example Getting agent metrics
|
|
11
|
+
# metrics = ActiveMatrix::Metrics.instance.get_agent_metrics('agent_123')
|
|
12
|
+
# puts metrics[:overall_success_rate]
|
|
13
|
+
#
|
|
14
|
+
# @example Getting health summary
|
|
15
|
+
# summary = ActiveMatrix::Metrics.instance.get_health_summary
|
|
16
|
+
# puts "Healthy agents: #{summary[:healthy_agents]}"
|
|
17
|
+
#
|
|
18
|
+
class Metrics
|
|
19
|
+
include Singleton
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@metrics = Concurrent::Hash.new
|
|
23
|
+
@component_metrics = Concurrent::Hash.new
|
|
24
|
+
setup_notification_subscribers
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Record operation metrics
|
|
28
|
+
#
|
|
29
|
+
# @param operation [Symbol, String] Operation name
|
|
30
|
+
# @param component [String] Component name (e.g., 'MessageDispatcher')
|
|
31
|
+
# @param agent_id [String] Agent identifier
|
|
32
|
+
# @param status [String] 'success' or 'error'
|
|
33
|
+
# @param duration_ms [Float] Operation duration in milliseconds
|
|
34
|
+
# @param error_class [String, nil] Error class name if status is 'error'
|
|
35
|
+
# @param metadata [Hash] Additional metadata (user_id, room_id, etc.)
|
|
36
|
+
# rubocop:disable Metrics/ParameterLists
|
|
37
|
+
def record_operation(operation, component:, agent_id:, status:, duration_ms:, error_class: nil, **metadata)
|
|
38
|
+
component_key = "#{agent_id}:#{component}"
|
|
39
|
+
operation_key = "#{component_key}:#{operation}"
|
|
40
|
+
|
|
41
|
+
# Initialize metrics if needed
|
|
42
|
+
@component_metrics[component_key] ||= initialize_component_metrics(component, agent_id)
|
|
43
|
+
@metrics[operation_key] ||= initialize_operation_metrics(operation, component, agent_id)
|
|
44
|
+
|
|
45
|
+
# Update component-level metrics
|
|
46
|
+
update_component_metrics(@component_metrics[component_key], status, duration_ms)
|
|
47
|
+
|
|
48
|
+
# Update operation-level metrics
|
|
49
|
+
metric = @metrics[operation_key]
|
|
50
|
+
metric[:total_count].increment
|
|
51
|
+
metric[:last_operation_at] = Time.current
|
|
52
|
+
|
|
53
|
+
# Always update duration stats regardless of status
|
|
54
|
+
update_duration_stats(metric[:duration_stats], duration_ms)
|
|
55
|
+
|
|
56
|
+
case status
|
|
57
|
+
when 'success'
|
|
58
|
+
metric[:success_count].increment
|
|
59
|
+
when 'error'
|
|
60
|
+
metric[:error_count].increment
|
|
61
|
+
metric[:last_error_at] = Time.current
|
|
62
|
+
|
|
63
|
+
error_type = error_class || metadata[:error_type] || 'unknown'
|
|
64
|
+
metric[:error_breakdown][error_type] ||= Concurrent::AtomicFixnum.new(0)
|
|
65
|
+
metric[:error_breakdown][error_type].increment
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Track recent operations (sliding window) with thread-safe array
|
|
69
|
+
metric[:recent_operations] << {
|
|
70
|
+
timestamp: Time.current,
|
|
71
|
+
status: status,
|
|
72
|
+
duration_ms: duration_ms,
|
|
73
|
+
metadata: metadata.merge(error_class: error_class).slice(:error_type, :error_class, :user_id, :room_id)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Keep only last 100 operations
|
|
77
|
+
metric[:recent_operations].shift if metric[:recent_operations].size > 100
|
|
78
|
+
end
|
|
79
|
+
# rubocop:enable Metrics/ParameterLists
|
|
80
|
+
|
|
81
|
+
# Get metrics for a specific agent instance
|
|
82
|
+
#
|
|
83
|
+
# @param agent_id [String] Agent identifier
|
|
84
|
+
# @return [Hash] Agent metrics including components, success rates, and health status
|
|
85
|
+
def get_agent_metrics(agent_id)
|
|
86
|
+
agent_metrics = @metrics.select { |key, _| key.start_with?("#{agent_id}:") }
|
|
87
|
+
|
|
88
|
+
return {} if agent_metrics.empty?
|
|
89
|
+
|
|
90
|
+
components = {}
|
|
91
|
+
total_operations = 0
|
|
92
|
+
total_successes = 0
|
|
93
|
+
total_errors = 0
|
|
94
|
+
|
|
95
|
+
agent_metrics.each do |key, metrics|
|
|
96
|
+
parts = key.split(':', 3)
|
|
97
|
+
component = parts[1]
|
|
98
|
+
operation = parts[2]
|
|
99
|
+
|
|
100
|
+
components[component] ||= {
|
|
101
|
+
operations: {},
|
|
102
|
+
total_count: 0,
|
|
103
|
+
success_count: 0,
|
|
104
|
+
error_count: 0
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
total_count = metrics[:total_count].value
|
|
108
|
+
success_count = metrics[:success_count].value
|
|
109
|
+
error_count = metrics[:error_count].value
|
|
110
|
+
|
|
111
|
+
components[component][:total_count] += total_count
|
|
112
|
+
components[component][:success_count] += success_count
|
|
113
|
+
components[component][:error_count] += error_count
|
|
114
|
+
|
|
115
|
+
total_operations += total_count
|
|
116
|
+
total_successes += success_count
|
|
117
|
+
total_errors += error_count
|
|
118
|
+
|
|
119
|
+
components[component][:operations][operation] = {
|
|
120
|
+
total_count: total_count,
|
|
121
|
+
success_count: success_count,
|
|
122
|
+
error_count: error_count,
|
|
123
|
+
success_rate: calculate_success_rate(metrics),
|
|
124
|
+
avg_duration_ms: metrics[:duration_stats][:avg].value,
|
|
125
|
+
p95_duration_ms: metrics[:duration_stats][:p95].value,
|
|
126
|
+
last_operation_at: metrics[:last_operation_at],
|
|
127
|
+
last_error_at: metrics[:last_error_at],
|
|
128
|
+
error_breakdown: serialize_error_breakdown(metrics[:error_breakdown])
|
|
129
|
+
}
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
{
|
|
133
|
+
agent_id: agent_id,
|
|
134
|
+
total_operations: total_operations,
|
|
135
|
+
total_successes: total_successes,
|
|
136
|
+
total_errors: total_errors,
|
|
137
|
+
overall_success_rate: total_operations.positive? ? (total_successes.to_f / total_operations * 100).round(2) : 0,
|
|
138
|
+
components: components,
|
|
139
|
+
health_status: calculate_agent_health(total_operations, total_successes)
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Get metrics for a specific component
|
|
144
|
+
#
|
|
145
|
+
# @param agent_id [String] Agent identifier
|
|
146
|
+
# @param component [String] Component name
|
|
147
|
+
# @return [Hash] Component metrics
|
|
148
|
+
def get_component_metrics(agent_id, component)
|
|
149
|
+
component_key = "#{agent_id}:#{component}"
|
|
150
|
+
component_metrics = @component_metrics[component_key]
|
|
151
|
+
|
|
152
|
+
return default_component_metrics if component_metrics.nil?
|
|
153
|
+
|
|
154
|
+
operations = @metrics.select { |key, _| key.start_with?("#{component_key}:") }
|
|
155
|
+
|
|
156
|
+
{
|
|
157
|
+
component: component,
|
|
158
|
+
agent_id: agent_id,
|
|
159
|
+
total_operations: component_metrics[:total_count].value,
|
|
160
|
+
success_count: component_metrics[:success_count].value,
|
|
161
|
+
error_count: component_metrics[:error_count].value,
|
|
162
|
+
success_rate: calculate_success_rate(component_metrics),
|
|
163
|
+
avg_duration_ms: component_metrics[:duration_stats][:avg].value,
|
|
164
|
+
p95_duration_ms: component_metrics[:duration_stats][:p95].value,
|
|
165
|
+
operations: operations.transform_keys { |k| k.split(':', 3).last }
|
|
166
|
+
.transform_values { |v| operation_summary(v) }
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Get top operations by volume
|
|
171
|
+
#
|
|
172
|
+
# @param agent_id [String] Agent identifier
|
|
173
|
+
# @param limit [Integer] Maximum number of operations to return
|
|
174
|
+
# @return [Array<Hash>] Top operations sorted by count
|
|
175
|
+
def top_operations_by_volume(agent_id, limit: 10)
|
|
176
|
+
agent_metrics = @metrics.select { |key, _| key.start_with?("#{agent_id}:") }
|
|
177
|
+
|
|
178
|
+
operations = agent_metrics.map do |key, metrics|
|
|
179
|
+
parts = key.split(':', 3)
|
|
180
|
+
{
|
|
181
|
+
component: parts[1],
|
|
182
|
+
operation: parts[2],
|
|
183
|
+
count: metrics[:total_count].value,
|
|
184
|
+
success_rate: calculate_success_rate(metrics),
|
|
185
|
+
avg_duration_ms: metrics[:duration_stats][:avg].value
|
|
186
|
+
}
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
operations.sort_by { |op| -op[:count] }.first(limit)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Get recent errors
|
|
193
|
+
#
|
|
194
|
+
# @param agent_id [String] Agent identifier
|
|
195
|
+
# @param limit [Integer] Maximum number of errors to return
|
|
196
|
+
# @return [Array<Hash>] Recent errors sorted by timestamp (newest first)
|
|
197
|
+
def recent_errors(agent_id, limit: 20)
|
|
198
|
+
agent_metrics = @metrics.select { |key, _| key.start_with?("#{agent_id}:") }
|
|
199
|
+
errors = []
|
|
200
|
+
|
|
201
|
+
agent_metrics.each do |key, metrics|
|
|
202
|
+
parts = key.split(':', 3)
|
|
203
|
+
component = parts[1]
|
|
204
|
+
operation = parts[2]
|
|
205
|
+
|
|
206
|
+
metrics[:recent_operations].to_a.select { |op| op[:status] == 'error' }.each do |error_op|
|
|
207
|
+
errors << {
|
|
208
|
+
timestamp: error_op[:timestamp],
|
|
209
|
+
component: component,
|
|
210
|
+
operation: operation,
|
|
211
|
+
duration_ms: error_op[:duration_ms],
|
|
212
|
+
metadata: error_op[:metadata]
|
|
213
|
+
}
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
errors.sort_by { |e| -e[:timestamp].to_f }.first(limit)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Get health summary for all agents
|
|
221
|
+
#
|
|
222
|
+
# @return [Hash] Summary of agent health across the system
|
|
223
|
+
def get_health_summary
|
|
224
|
+
agent_ids = @metrics.keys.map { |key| key.split(':', 2).first }.uniq
|
|
225
|
+
|
|
226
|
+
agents = agent_ids.map { |agent_id| get_agent_metrics(agent_id) }
|
|
227
|
+
|
|
228
|
+
{
|
|
229
|
+
total_agents: agents.length,
|
|
230
|
+
healthy_agents: agents.count { |a| a[:health_status] == :healthy },
|
|
231
|
+
degraded_agents: agents.count { |a| a[:health_status] == :degraded },
|
|
232
|
+
unhealthy_agents: agents.count { |a| a[:health_status] == :unhealthy },
|
|
233
|
+
total_operations: agents.sum { |a| a[:total_operations] },
|
|
234
|
+
overall_success_rate: calculate_overall_success_rate(agents),
|
|
235
|
+
agents: agents.map do |agent|
|
|
236
|
+
{
|
|
237
|
+
agent_id: agent[:agent_id],
|
|
238
|
+
health_status: agent[:health_status],
|
|
239
|
+
success_rate: agent[:overall_success_rate],
|
|
240
|
+
total_operations: agent[:total_operations]
|
|
241
|
+
}
|
|
242
|
+
end
|
|
243
|
+
}
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Reset all metrics (useful for testing)
|
|
247
|
+
def reset!
|
|
248
|
+
@metrics.clear
|
|
249
|
+
@component_metrics.clear
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Reset metrics for specific agent
|
|
253
|
+
#
|
|
254
|
+
# @param agent_id [String] Agent identifier
|
|
255
|
+
def reset_agent!(agent_id)
|
|
256
|
+
@metrics.delete_if { |key, _| key.start_with?("#{agent_id}:") }
|
|
257
|
+
@component_metrics.delete_if { |key, _| key.start_with?("#{agent_id}:") }
|
|
258
|
+
ActiveMatrix.logger.info("Reset metrics for Matrix agent: #{agent_id}")
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
private
|
|
262
|
+
|
|
263
|
+
def setup_notification_subscribers
|
|
264
|
+
# Subscribe to ActiveMatrix events
|
|
265
|
+
ActiveSupport::Notifications.subscribe(/^activematrix\./) do |name, start, finish, _id, payload|
|
|
266
|
+
operation = name.sub('activematrix.', '')
|
|
267
|
+
duration_ms = ((finish - start) * 1000).round(2)
|
|
268
|
+
|
|
269
|
+
record_operation(
|
|
270
|
+
operation,
|
|
271
|
+
component: payload[:component] || 'Unknown',
|
|
272
|
+
agent_id: payload[:agent_id] || 'unknown',
|
|
273
|
+
status: payload[:status],
|
|
274
|
+
duration_ms: duration_ms,
|
|
275
|
+
error_type: payload[:error_category],
|
|
276
|
+
error_class: payload[:error_class],
|
|
277
|
+
user_id: payload[:user_id],
|
|
278
|
+
room_id: payload[:room_id]
|
|
279
|
+
)
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def initialize_component_metrics(component, agent_id)
|
|
284
|
+
{
|
|
285
|
+
component: component,
|
|
286
|
+
agent_id: agent_id,
|
|
287
|
+
total_count: Concurrent::AtomicFixnum.new(0),
|
|
288
|
+
success_count: Concurrent::AtomicFixnum.new(0),
|
|
289
|
+
error_count: Concurrent::AtomicFixnum.new(0),
|
|
290
|
+
duration_stats: initialize_duration_stats,
|
|
291
|
+
created_at: Time.current
|
|
292
|
+
}
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def initialize_operation_metrics(operation, component, agent_id)
|
|
296
|
+
{
|
|
297
|
+
operation: operation,
|
|
298
|
+
component: component,
|
|
299
|
+
agent_id: agent_id,
|
|
300
|
+
total_count: Concurrent::AtomicFixnum.new(0),
|
|
301
|
+
success_count: Concurrent::AtomicFixnum.new(0),
|
|
302
|
+
error_count: Concurrent::AtomicFixnum.new(0),
|
|
303
|
+
duration_stats: initialize_duration_stats,
|
|
304
|
+
error_breakdown: Concurrent::Hash.new,
|
|
305
|
+
recent_operations: Concurrent::Array.new,
|
|
306
|
+
created_at: Time.current,
|
|
307
|
+
last_operation_at: nil,
|
|
308
|
+
last_error_at: nil
|
|
309
|
+
}
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def initialize_duration_stats
|
|
313
|
+
Concurrent::Hash.new.tap do |stats|
|
|
314
|
+
stats[:total] = Concurrent::AtomicFixnum.new(0)
|
|
315
|
+
stats[:count] = Concurrent::AtomicFixnum.new(0)
|
|
316
|
+
stats[:avg] = Concurrent::AtomicReference.new(0)
|
|
317
|
+
stats[:min] = Concurrent::AtomicReference.new(Float::INFINITY)
|
|
318
|
+
stats[:max] = Concurrent::AtomicReference.new(0)
|
|
319
|
+
stats[:p95] = Concurrent::AtomicReference.new(0)
|
|
320
|
+
stats[:values] = Concurrent::Array.new
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def update_component_metrics(component_metrics, status, duration_ms)
|
|
325
|
+
component_metrics[:total_count].increment
|
|
326
|
+
|
|
327
|
+
case status
|
|
328
|
+
when 'success'
|
|
329
|
+
component_metrics[:success_count].increment
|
|
330
|
+
when 'error'
|
|
331
|
+
component_metrics[:error_count].increment
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
update_duration_stats(component_metrics[:duration_stats], duration_ms)
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def update_duration_stats(stats, duration_ms)
|
|
338
|
+
stats[:total].increment((duration_ms * 100).to_i) # Store as hundredths to preserve decimals
|
|
339
|
+
count = stats[:count].increment
|
|
340
|
+
stats[:avg].set((stats[:total].value.to_f / count / 100).round(2))
|
|
341
|
+
|
|
342
|
+
# Update min atomically
|
|
343
|
+
stats[:min].update { |current| [current, duration_ms].min }
|
|
344
|
+
|
|
345
|
+
# Update max atomically
|
|
346
|
+
stats[:max].update { |current| [current, duration_ms].max }
|
|
347
|
+
|
|
348
|
+
# Keep sliding window of durations for percentile calculation
|
|
349
|
+
stats[:values] << duration_ms
|
|
350
|
+
stats[:values].shift if stats[:values].size > 1000
|
|
351
|
+
|
|
352
|
+
# Calculate P95
|
|
353
|
+
values_array = stats[:values].to_a
|
|
354
|
+
if values_array.size >= 20
|
|
355
|
+
sorted = values_array.sort
|
|
356
|
+
p95_index = (sorted.length * 0.95).ceil - 1
|
|
357
|
+
stats[:p95].set(sorted[p95_index].round(2))
|
|
358
|
+
elsif values_array.size.positive?
|
|
359
|
+
# For small samples, use the max value as P95
|
|
360
|
+
stats[:p95].set(values_array.max.round(2))
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def calculate_success_rate(metrics)
|
|
365
|
+
total = metrics[:total_count].value
|
|
366
|
+
return 0 if total.zero?
|
|
367
|
+
|
|
368
|
+
((metrics[:success_count].value.to_f / total) * 100).round(2)
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def calculate_agent_health(total_operations, success_count)
|
|
372
|
+
return :unknown if total_operations < 10 # Need minimum operations
|
|
373
|
+
|
|
374
|
+
success_rate = (success_count.to_f / total_operations * 100)
|
|
375
|
+
|
|
376
|
+
if success_rate >= 95
|
|
377
|
+
:healthy
|
|
378
|
+
elsif success_rate >= 80
|
|
379
|
+
:degraded
|
|
380
|
+
else
|
|
381
|
+
:unhealthy
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
def calculate_overall_success_rate(agents)
|
|
386
|
+
return 0 if agents.empty?
|
|
387
|
+
|
|
388
|
+
total_ops = agents.sum { |a| a[:total_operations] }
|
|
389
|
+
return 0 if total_ops.zero?
|
|
390
|
+
|
|
391
|
+
total_successes = agents.sum { |a| a[:total_successes] }
|
|
392
|
+
((total_successes.to_f / total_ops) * 100).round(2)
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def serialize_error_breakdown(error_breakdown)
|
|
396
|
+
error_breakdown.transform_values(&:value)
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
def operation_summary(metrics)
|
|
400
|
+
{
|
|
401
|
+
total_count: metrics[:total_count].value,
|
|
402
|
+
success_count: metrics[:success_count].value,
|
|
403
|
+
error_count: metrics[:error_count].value,
|
|
404
|
+
success_rate: calculate_success_rate(metrics),
|
|
405
|
+
avg_duration_ms: metrics[:duration_stats][:avg].value,
|
|
406
|
+
p95_duration_ms: metrics[:duration_stats][:p95].value
|
|
407
|
+
}
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def default_component_metrics
|
|
411
|
+
{
|
|
412
|
+
component: 'Unknown',
|
|
413
|
+
agent_id: 'unknown',
|
|
414
|
+
total_operations: 0,
|
|
415
|
+
success_count: 0,
|
|
416
|
+
error_count: 0,
|
|
417
|
+
success_rate: 0,
|
|
418
|
+
avg_duration_ms: 0,
|
|
419
|
+
p95_duration_ms: 0,
|
|
420
|
+
operations: {}
|
|
421
|
+
}
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
end
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
|
|
5
|
+
module ActiveMatrix
|
|
6
|
+
# Manages Matrix presence for agents
|
|
7
|
+
# Provides automatic presence updates, wake hour awareness, and graceful shutdown
|
|
8
|
+
#
|
|
9
|
+
# @example Basic usage
|
|
10
|
+
# presence = ActiveMatrix::PresenceManager.new(api: api, user_id: '@bot:example.com')
|
|
11
|
+
# presence.start
|
|
12
|
+
# presence.set_online(status_msg: 'Ready to help')
|
|
13
|
+
# # ... later
|
|
14
|
+
# presence.stop
|
|
15
|
+
#
|
|
16
|
+
# @example With wake hours
|
|
17
|
+
# presence = ActiveMatrix::PresenceManager.new(
|
|
18
|
+
# api: api,
|
|
19
|
+
# user_id: '@bot:example.com',
|
|
20
|
+
# wake_hour: 6,
|
|
21
|
+
# sleep_hour: 22
|
|
22
|
+
# )
|
|
23
|
+
# presence.start # Will auto-set unavailable outside 6:00-22:00
|
|
24
|
+
#
|
|
25
|
+
class PresenceManager
|
|
26
|
+
include Instrumentation
|
|
27
|
+
|
|
28
|
+
attr_reader :user_id, :current_status, :current_message
|
|
29
|
+
|
|
30
|
+
# @param api [ActiveMatrix::Api] Matrix API instance
|
|
31
|
+
# @param user_id [String] The user ID to manage presence for
|
|
32
|
+
# @param refresh_interval [Integer] Seconds between presence refreshes (default: 300)
|
|
33
|
+
# @param wake_hour [Integer, nil] Hour (0-23) when bot becomes available (optional)
|
|
34
|
+
# @param sleep_hour [Integer, nil] Hour (0-23) when bot becomes unavailable (optional)
|
|
35
|
+
# @param timezone [String] Timezone for wake/sleep hours (default: system timezone)
|
|
36
|
+
def initialize(api:, user_id:, refresh_interval: 300, wake_hour: nil, sleep_hour: nil, timezone: nil)
|
|
37
|
+
@api = api
|
|
38
|
+
@user_id = user_id
|
|
39
|
+
@refresh_interval = refresh_interval
|
|
40
|
+
@wake_hour = wake_hour
|
|
41
|
+
@sleep_hour = sleep_hour
|
|
42
|
+
@timezone = timezone
|
|
43
|
+
|
|
44
|
+
@current_status = 'offline'
|
|
45
|
+
@current_message = nil
|
|
46
|
+
@running = Concurrent::AtomicBoolean.new(false)
|
|
47
|
+
@task = nil
|
|
48
|
+
@mutex = Mutex.new
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Start the presence manager
|
|
52
|
+
# Begins periodic presence updates
|
|
53
|
+
def start
|
|
54
|
+
return if @running.true?
|
|
55
|
+
|
|
56
|
+
@running.make_true
|
|
57
|
+
schedule_refresh
|
|
58
|
+
ActiveMatrix.logger.info("PresenceManager started for #{@user_id}")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Stop the presence manager
|
|
62
|
+
# Sets presence to offline and stops refresh loop
|
|
63
|
+
def stop
|
|
64
|
+
return unless @running.true?
|
|
65
|
+
|
|
66
|
+
@running.make_false
|
|
67
|
+
@task&.cancel
|
|
68
|
+
@task = nil
|
|
69
|
+
|
|
70
|
+
# Set offline on shutdown
|
|
71
|
+
set_offline
|
|
72
|
+
ActiveMatrix.logger.info("PresenceManager stopped for #{@user_id}")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Set presence to online
|
|
76
|
+
#
|
|
77
|
+
# @param status_msg [String, nil] Optional status message
|
|
78
|
+
def set_online(status_msg: nil)
|
|
79
|
+
set_presence('online', status_msg)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Set presence to unavailable
|
|
83
|
+
#
|
|
84
|
+
# @param status_msg [String, nil] Optional status message
|
|
85
|
+
def set_unavailable(status_msg: nil)
|
|
86
|
+
set_presence('unavailable', status_msg)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Set presence to offline
|
|
90
|
+
def set_offline
|
|
91
|
+
set_presence('offline', nil)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Check if currently within wake hours
|
|
95
|
+
#
|
|
96
|
+
# @return [Boolean] true if within wake hours or no wake hours configured
|
|
97
|
+
def within_wake_hours?
|
|
98
|
+
return true if @wake_hour.nil? || @sleep_hour.nil?
|
|
99
|
+
|
|
100
|
+
current_hour = current_time.hour
|
|
101
|
+
|
|
102
|
+
if @wake_hour < @sleep_hour
|
|
103
|
+
# Normal case: wake 6, sleep 22 -> active from 6:00 to 21:59
|
|
104
|
+
current_hour >= @wake_hour && current_hour < @sleep_hour
|
|
105
|
+
else
|
|
106
|
+
# Overnight case: wake 22, sleep 6 -> active from 22:00 to 5:59
|
|
107
|
+
current_hour >= @wake_hour || current_hour < @sleep_hour
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Get current presence status from server
|
|
112
|
+
#
|
|
113
|
+
# @return [Hash] Presence status including :presence and :status_msg
|
|
114
|
+
def get_status
|
|
115
|
+
instrument_operation(:get_presence, user_id: @user_id) do
|
|
116
|
+
@api.get_presence_status(@user_id)
|
|
117
|
+
end
|
|
118
|
+
rescue StandardError => e
|
|
119
|
+
ActiveMatrix.logger.warn("Failed to get presence for #{@user_id}: #{e.message}")
|
|
120
|
+
{ presence: @current_status, status_msg: @current_message }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
|
|
125
|
+
def agent_id
|
|
126
|
+
@user_id
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def set_presence(status, message)
|
|
130
|
+
@mutex.synchronize do
|
|
131
|
+
# Check wake hours before setting online
|
|
132
|
+
actual_status = if status == 'online' && !within_wake_hours?
|
|
133
|
+
'unavailable'
|
|
134
|
+
else
|
|
135
|
+
status
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
instrument_operation(:set_presence, user_id: @user_id, status: actual_status) do
|
|
139
|
+
@api.set_presence_status(@user_id, actual_status, message: message)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
@current_status = actual_status
|
|
143
|
+
@current_message = message
|
|
144
|
+
|
|
145
|
+
ActiveMatrix.logger.debug("Presence set to #{actual_status} for #{@user_id}")
|
|
146
|
+
end
|
|
147
|
+
rescue StandardError => e
|
|
148
|
+
ActiveMatrix.logger.error("Failed to set presence for #{@user_id}: #{e.message}")
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def schedule_refresh
|
|
152
|
+
return unless @running.true?
|
|
153
|
+
|
|
154
|
+
@task = Concurrent::ScheduledTask.execute(@refresh_interval) do
|
|
155
|
+
refresh_presence
|
|
156
|
+
schedule_refresh
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def refresh_presence
|
|
161
|
+
return unless @running.true?
|
|
162
|
+
|
|
163
|
+
# Re-check wake hours and update if needed
|
|
164
|
+
if within_wake_hours?
|
|
165
|
+
set_presence(@current_status == 'offline' ? 'online' : @current_status, @current_message)
|
|
166
|
+
else
|
|
167
|
+
set_presence('unavailable', @current_message)
|
|
168
|
+
end
|
|
169
|
+
rescue StandardError => e
|
|
170
|
+
ActiveMatrix.logger.error("Error refreshing presence: #{e.message}")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def current_time
|
|
174
|
+
if @timezone && defined?(ActiveSupport::TimeZone)
|
|
175
|
+
ActiveSupport::TimeZone[@timezone]&.now || Time.current
|
|
176
|
+
else
|
|
177
|
+
Time.current
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|