activematrix 0.0.5 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +96 -28
  3. data/app/jobs/active_matrix/application_job.rb +11 -0
  4. data/app/models/active_matrix/agent/jobs/memory_reaper.rb +87 -0
  5. data/app/models/active_matrix/agent.rb +166 -0
  6. data/app/models/active_matrix/agent_store.rb +80 -0
  7. data/app/models/active_matrix/application_record.rb +15 -0
  8. data/app/models/active_matrix/chat_session.rb +105 -0
  9. data/app/models/active_matrix/knowledge_base.rb +100 -0
  10. data/exe/activematrix +7 -0
  11. data/lib/active_matrix/agent_manager.rb +160 -121
  12. data/lib/active_matrix/agent_registry.rb +25 -21
  13. data/lib/active_matrix/api.rb +8 -2
  14. data/lib/active_matrix/async_query.rb +58 -0
  15. data/lib/active_matrix/bot/base.rb +3 -3
  16. data/lib/active_matrix/bot/builtin_commands.rb +188 -0
  17. data/lib/active_matrix/bot/command_parser.rb +175 -0
  18. data/lib/active_matrix/cli.rb +273 -0
  19. data/lib/active_matrix/client.rb +21 -6
  20. data/lib/active_matrix/client_pool.rb +38 -27
  21. data/lib/active_matrix/daemon/probe_server.rb +118 -0
  22. data/lib/active_matrix/daemon/signal_handler.rb +156 -0
  23. data/lib/active_matrix/daemon/worker.rb +109 -0
  24. data/lib/active_matrix/daemon.rb +236 -0
  25. data/lib/active_matrix/engine.rb +18 -0
  26. data/lib/active_matrix/errors.rb +1 -1
  27. data/lib/active_matrix/event_router.rb +61 -49
  28. data/lib/active_matrix/events.rb +1 -0
  29. data/lib/active_matrix/instrumentation.rb +148 -0
  30. data/lib/active_matrix/memory/agent_memory.rb +7 -21
  31. data/lib/active_matrix/memory/conversation_memory.rb +4 -20
  32. data/lib/active_matrix/memory/global_memory.rb +15 -30
  33. data/lib/active_matrix/message_dispatcher.rb +197 -0
  34. data/lib/active_matrix/metrics.rb +424 -0
  35. data/lib/active_matrix/presence_manager.rb +181 -0
  36. data/lib/active_matrix/railtie.rb +8 -0
  37. data/lib/active_matrix/telemetry.rb +134 -0
  38. data/lib/active_matrix/version.rb +1 -1
  39. data/lib/active_matrix.rb +18 -11
  40. data/lib/generators/active_matrix/install/install_generator.rb +3 -22
  41. data/lib/generators/active_matrix/install/templates/README +5 -2
  42. metadata +191 -31
  43. data/lib/generators/active_matrix/install/templates/agent_memory.rb +0 -47
  44. data/lib/generators/active_matrix/install/templates/conversation_context.rb +0 -72
  45. data/lib/generators/active_matrix/install/templates/create_agent_memories.rb +0 -17
  46. data/lib/generators/active_matrix/install/templates/create_conversation_contexts.rb +0 -21
  47. data/lib/generators/active_matrix/install/templates/create_global_memories.rb +0 -20
  48. data/lib/generators/active_matrix/install/templates/create_matrix_agents.rb +0 -26
  49. data/lib/generators/active_matrix/install/templates/global_memory.rb +0 -70
  50. data/lib/generators/active_matrix/install/templates/matrix_agent.rb +0 -127
@@ -0,0 +1,197 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveMatrix
4
+ # Dispatches Matrix messages with retry logic and typing indicators
5
+ #
6
+ # @example Basic usage
7
+ # dispatcher = ActiveMatrix::MessageDispatcher.new(api: api, room_id: '!abc:matrix.org')
8
+ # dispatcher.send_text('Hello!')
9
+ #
10
+ # @example With typing indicator
11
+ # dispatcher.send_text('Thinking...', typing_delay: 2.0)
12
+ #
13
+ # @example Thread reply
14
+ # dispatcher.send_text('Reply', thread_id: '$event_id')
15
+ #
16
+ class MessageDispatcher
17
+ include Instrumentation
18
+
19
+ # Default configuration
20
+ DEFAULT_RETRY_COUNT = 3
21
+ DEFAULT_BASE_DELAY = 1.0
22
+ DEFAULT_TYPING_DELAY = 0.5
23
+ DEFAULT_TYPING_TIMEOUT = 30
24
+
25
+ attr_reader :api, :room_id, :user_id
26
+
27
+ # @param api [ActiveMatrix::Api] Matrix API instance
28
+ # @param room_id [String] Room ID to send messages to
29
+ # @param user_id [String] User ID for typing indicator
30
+ # @param retry_count [Integer] Number of retries on failure
31
+ # @param base_delay [Float] Base delay in seconds for exponential backoff
32
+ # @param typing_delay [Float] Default typing delay in seconds
33
+ def initialize(api:, room_id:, user_id:, retry_count: DEFAULT_RETRY_COUNT,
34
+ base_delay: DEFAULT_BASE_DELAY, typing_delay: DEFAULT_TYPING_DELAY)
35
+ @api = api
36
+ @room_id = room_id
37
+ @user_id = user_id
38
+ @retry_count = retry_count
39
+ @base_delay = base_delay
40
+ @default_typing_delay = typing_delay
41
+ end
42
+
43
+ # Send a plain text message
44
+ #
45
+ # @param text [String] Message text
46
+ # @param msgtype [String] Message type (default: 'm.text')
47
+ # @param typing_delay [Float, nil] Seconds to show typing indicator (nil to skip)
48
+ # @param thread_id [String, nil] Event ID to reply in thread
49
+ # @return [Hash] Response with :event_id
50
+ def send_text(text, msgtype: 'm.text', typing_delay: nil, thread_id: nil)
51
+ content = {
52
+ msgtype: msgtype,
53
+ body: text
54
+ }
55
+
56
+ send_with_typing(content, typing_delay: typing_delay, thread_id: thread_id)
57
+ end
58
+
59
+ # Send an HTML message
60
+ #
61
+ # @param html [String] HTML content
62
+ # @param body [String, nil] Plain text fallback (auto-generated if nil)
63
+ # @param msgtype [String] Message type (default: 'm.text')
64
+ # @param typing_delay [Float, nil] Seconds to show typing indicator
65
+ # @param thread_id [String, nil] Event ID to reply in thread
66
+ # @return [Hash] Response with :event_id
67
+ def send_html(html, body: nil, msgtype: 'm.text', typing_delay: nil, thread_id: nil)
68
+ plain_body = body || strip_html(html)
69
+
70
+ content = {
71
+ msgtype: msgtype,
72
+ body: plain_body,
73
+ format: 'org.matrix.custom.html',
74
+ formatted_body: html
75
+ }
76
+
77
+ send_with_typing(content, typing_delay: typing_delay, thread_id: thread_id)
78
+ end
79
+
80
+ # Send a notice message (typically for bot responses)
81
+ #
82
+ # @param text [String] Notice text
83
+ # @param typing_delay [Float, nil] Seconds to show typing indicator
84
+ # @param thread_id [String, nil] Event ID to reply in thread
85
+ # @return [Hash] Response with :event_id
86
+ def send_notice(text, typing_delay: nil, thread_id: nil)
87
+ send_text(text, msgtype: 'm.notice', typing_delay: typing_delay, thread_id: thread_id)
88
+ end
89
+
90
+ # Send an HTML notice message
91
+ #
92
+ # @param html [String] HTML content
93
+ # @param body [String, nil] Plain text fallback
94
+ # @param typing_delay [Float, nil] Seconds to show typing indicator
95
+ # @param thread_id [String, nil] Event ID to reply in thread
96
+ # @return [Hash] Response with :event_id
97
+ def send_html_notice(html, body: nil, typing_delay: nil, thread_id: nil)
98
+ send_html(html, body: body, msgtype: 'm.notice', typing_delay: typing_delay, thread_id: thread_id)
99
+ end
100
+
101
+ # Send an emote message (/me action)
102
+ #
103
+ # @param text [String] Emote text
104
+ # @param typing_delay [Float, nil] Seconds to show typing indicator
105
+ # @param thread_id [String, nil] Event ID to reply in thread
106
+ # @return [Hash] Response with :event_id
107
+ def send_emote(text, typing_delay: nil, thread_id: nil)
108
+ send_text(text, msgtype: 'm.emote', typing_delay: typing_delay, thread_id: thread_id)
109
+ end
110
+
111
+ # Show typing indicator
112
+ #
113
+ # @param typing [Boolean] Whether to show or hide typing
114
+ # @param timeout [Integer] Timeout in seconds
115
+ def set_typing(typing: true, timeout: DEFAULT_TYPING_TIMEOUT)
116
+ @api.set_typing(@room_id, @user_id, typing: typing, timeout: timeout)
117
+ rescue StandardError => e
118
+ ActiveMatrix.logger.debug("Failed to set typing indicator: #{e.message}")
119
+ end
120
+
121
+ private
122
+
123
+ def agent_id
124
+ @user_id
125
+ end
126
+
127
+ def send_with_typing(content, typing_delay:, thread_id:)
128
+ effective_delay = typing_delay || @default_typing_delay
129
+
130
+ # Show typing indicator
131
+ if effective_delay.positive?
132
+ set_typing(typing: true)
133
+ sleep(effective_delay)
134
+ set_typing(typing: false)
135
+ end
136
+
137
+ # Add thread relation if specified
138
+ if thread_id
139
+ content[:'m.relates_to'] = {
140
+ rel_type: 'm.thread',
141
+ event_id: thread_id
142
+ }
143
+ end
144
+
145
+ send_with_retry(content)
146
+ end
147
+
148
+ def send_with_retry(content)
149
+ attempts = 0
150
+
151
+ instrument_operation(:send_message, room_id: @room_id) do
152
+ @api.send_message_event(@room_id, 'm.room.message', content)
153
+ rescue ActiveMatrix::MatrixRequestError => e
154
+ attempts += 1
155
+
156
+ if attempts <= @retry_count && retryable_error?(e)
157
+ delay = calculate_backoff(attempts)
158
+ ActiveMatrix.logger.warn("Message send failed (attempt #{attempts}/#{@retry_count}), retrying in #{delay}s: #{e.message}")
159
+ sleep(delay)
160
+ retry
161
+ end
162
+
163
+ raise
164
+ end
165
+ end
166
+
167
+ def retryable_error?(error)
168
+ # Retry on rate limiting, server errors, or network issues
169
+ case error
170
+ when ActiveMatrix::MatrixTooManyRequestsError
171
+ true
172
+ when ActiveMatrix::MatrixRequestError
173
+ error.httpstatus.to_i >= 500
174
+ else
175
+ false
176
+ end
177
+ end
178
+
179
+ def calculate_backoff(attempt)
180
+ # Exponential backoff with full jitter
181
+ max_delay = @base_delay * (2**(attempt - 1))
182
+ rand * max_delay
183
+ end
184
+
185
+ def strip_html(html)
186
+ # Simple HTML stripping - remove tags and decode entities
187
+ text = html.gsub(/<br\s*\/?>/i, "\n")
188
+ text = text.gsub(/<\/?[^>]+>/, '')
189
+ text = text.gsub('&nbsp;', ' ')
190
+ text = text.gsub('&lt;', '<')
191
+ text = text.gsub('&gt;', '>')
192
+ text = text.gsub('&amp;', '&')
193
+ text = text.gsub('&quot;', '"')
194
+ text.strip
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,424 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+ require 'singleton'
5
+
6
+ module ActiveMatrix
7
+ # Metrics collection for Matrix agent operations
8
+ # Provides structured metrics that can be exported to monitoring systems
9
+ #
10
+ # @example Getting agent metrics
11
+ # metrics = ActiveMatrix::Metrics.instance.get_agent_metrics('agent_123')
12
+ # puts metrics[:overall_success_rate]
13
+ #
14
+ # @example Getting health summary
15
+ # summary = ActiveMatrix::Metrics.instance.get_health_summary
16
+ # puts "Healthy agents: #{summary[:healthy_agents]}"
17
+ #
18
+ class Metrics
19
+ include Singleton
20
+
21
+ def initialize
22
+ @metrics = Concurrent::Hash.new
23
+ @component_metrics = Concurrent::Hash.new
24
+ setup_notification_subscribers
25
+ end
26
+
27
+ # Record operation metrics
28
+ #
29
+ # @param operation [Symbol, String] Operation name
30
+ # @param component [String] Component name (e.g., 'MessageDispatcher')
31
+ # @param agent_id [String] Agent identifier
32
+ # @param status [String] 'success' or 'error'
33
+ # @param duration_ms [Float] Operation duration in milliseconds
34
+ # @param error_class [String, nil] Error class name if status is 'error'
35
+ # @param metadata [Hash] Additional metadata (user_id, room_id, etc.)
36
+ # rubocop:disable Metrics/ParameterLists
37
+ def record_operation(operation, component:, agent_id:, status:, duration_ms:, error_class: nil, **metadata)
38
+ component_key = "#{agent_id}:#{component}"
39
+ operation_key = "#{component_key}:#{operation}"
40
+
41
+ # Initialize metrics if needed
42
+ @component_metrics[component_key] ||= initialize_component_metrics(component, agent_id)
43
+ @metrics[operation_key] ||= initialize_operation_metrics(operation, component, agent_id)
44
+
45
+ # Update component-level metrics
46
+ update_component_metrics(@component_metrics[component_key], status, duration_ms)
47
+
48
+ # Update operation-level metrics
49
+ metric = @metrics[operation_key]
50
+ metric[:total_count].increment
51
+ metric[:last_operation_at] = Time.current
52
+
53
+ # Always update duration stats regardless of status
54
+ update_duration_stats(metric[:duration_stats], duration_ms)
55
+
56
+ case status
57
+ when 'success'
58
+ metric[:success_count].increment
59
+ when 'error'
60
+ metric[:error_count].increment
61
+ metric[:last_error_at] = Time.current
62
+
63
+ error_type = error_class || metadata[:error_type] || 'unknown'
64
+ metric[:error_breakdown][error_type] ||= Concurrent::AtomicFixnum.new(0)
65
+ metric[:error_breakdown][error_type].increment
66
+ end
67
+
68
+ # Track recent operations (sliding window) with thread-safe array
69
+ metric[:recent_operations] << {
70
+ timestamp: Time.current,
71
+ status: status,
72
+ duration_ms: duration_ms,
73
+ metadata: metadata.merge(error_class: error_class).slice(:error_type, :error_class, :user_id, :room_id)
74
+ }
75
+
76
+ # Keep only last 100 operations
77
+ metric[:recent_operations].shift if metric[:recent_operations].size > 100
78
+ end
79
+ # rubocop:enable Metrics/ParameterLists
80
+
81
+ # Get metrics for a specific agent instance
82
+ #
83
+ # @param agent_id [String] Agent identifier
84
+ # @return [Hash] Agent metrics including components, success rates, and health status
85
+ def get_agent_metrics(agent_id)
86
+ agent_metrics = @metrics.select { |key, _| key.start_with?("#{agent_id}:") }
87
+
88
+ return {} if agent_metrics.empty?
89
+
90
+ components = {}
91
+ total_operations = 0
92
+ total_successes = 0
93
+ total_errors = 0
94
+
95
+ agent_metrics.each do |key, metrics|
96
+ parts = key.split(':', 3)
97
+ component = parts[1]
98
+ operation = parts[2]
99
+
100
+ components[component] ||= {
101
+ operations: {},
102
+ total_count: 0,
103
+ success_count: 0,
104
+ error_count: 0
105
+ }
106
+
107
+ total_count = metrics[:total_count].value
108
+ success_count = metrics[:success_count].value
109
+ error_count = metrics[:error_count].value
110
+
111
+ components[component][:total_count] += total_count
112
+ components[component][:success_count] += success_count
113
+ components[component][:error_count] += error_count
114
+
115
+ total_operations += total_count
116
+ total_successes += success_count
117
+ total_errors += error_count
118
+
119
+ components[component][:operations][operation] = {
120
+ total_count: total_count,
121
+ success_count: success_count,
122
+ error_count: error_count,
123
+ success_rate: calculate_success_rate(metrics),
124
+ avg_duration_ms: metrics[:duration_stats][:avg].value,
125
+ p95_duration_ms: metrics[:duration_stats][:p95].value,
126
+ last_operation_at: metrics[:last_operation_at],
127
+ last_error_at: metrics[:last_error_at],
128
+ error_breakdown: serialize_error_breakdown(metrics[:error_breakdown])
129
+ }
130
+ end
131
+
132
+ {
133
+ agent_id: agent_id,
134
+ total_operations: total_operations,
135
+ total_successes: total_successes,
136
+ total_errors: total_errors,
137
+ overall_success_rate: total_operations.positive? ? (total_successes.to_f / total_operations * 100).round(2) : 0,
138
+ components: components,
139
+ health_status: calculate_agent_health(total_operations, total_successes)
140
+ }
141
+ end
142
+
143
+ # Get metrics for a specific component
144
+ #
145
+ # @param agent_id [String] Agent identifier
146
+ # @param component [String] Component name
147
+ # @return [Hash] Component metrics
148
+ def get_component_metrics(agent_id, component)
149
+ component_key = "#{agent_id}:#{component}"
150
+ component_metrics = @component_metrics[component_key]
151
+
152
+ return default_component_metrics if component_metrics.nil?
153
+
154
+ operations = @metrics.select { |key, _| key.start_with?("#{component_key}:") }
155
+
156
+ {
157
+ component: component,
158
+ agent_id: agent_id,
159
+ total_operations: component_metrics[:total_count].value,
160
+ success_count: component_metrics[:success_count].value,
161
+ error_count: component_metrics[:error_count].value,
162
+ success_rate: calculate_success_rate(component_metrics),
163
+ avg_duration_ms: component_metrics[:duration_stats][:avg].value,
164
+ p95_duration_ms: component_metrics[:duration_stats][:p95].value,
165
+ operations: operations.transform_keys { |k| k.split(':', 3).last }
166
+ .transform_values { |v| operation_summary(v) }
167
+ }
168
+ end
169
+
170
+ # Get top operations by volume
171
+ #
172
+ # @param agent_id [String] Agent identifier
173
+ # @param limit [Integer] Maximum number of operations to return
174
+ # @return [Array<Hash>] Top operations sorted by count
175
+ def top_operations_by_volume(agent_id, limit: 10)
176
+ agent_metrics = @metrics.select { |key, _| key.start_with?("#{agent_id}:") }
177
+
178
+ operations = agent_metrics.map do |key, metrics|
179
+ parts = key.split(':', 3)
180
+ {
181
+ component: parts[1],
182
+ operation: parts[2],
183
+ count: metrics[:total_count].value,
184
+ success_rate: calculate_success_rate(metrics),
185
+ avg_duration_ms: metrics[:duration_stats][:avg].value
186
+ }
187
+ end
188
+
189
+ operations.sort_by { |op| -op[:count] }.first(limit)
190
+ end
191
+
192
+ # Get recent errors
193
+ #
194
+ # @param agent_id [String] Agent identifier
195
+ # @param limit [Integer] Maximum number of errors to return
196
+ # @return [Array<Hash>] Recent errors sorted by timestamp (newest first)
197
+ def recent_errors(agent_id, limit: 20)
198
+ agent_metrics = @metrics.select { |key, _| key.start_with?("#{agent_id}:") }
199
+ errors = []
200
+
201
+ agent_metrics.each do |key, metrics|
202
+ parts = key.split(':', 3)
203
+ component = parts[1]
204
+ operation = parts[2]
205
+
206
+ metrics[:recent_operations].to_a.select { |op| op[:status] == 'error' }.each do |error_op|
207
+ errors << {
208
+ timestamp: error_op[:timestamp],
209
+ component: component,
210
+ operation: operation,
211
+ duration_ms: error_op[:duration_ms],
212
+ metadata: error_op[:metadata]
213
+ }
214
+ end
215
+ end
216
+
217
+ errors.sort_by { |e| -e[:timestamp].to_f }.first(limit)
218
+ end
219
+
220
+ # Get health summary for all agents
221
+ #
222
+ # @return [Hash] Summary of agent health across the system
223
+ def get_health_summary
224
+ agent_ids = @metrics.keys.map { |key| key.split(':', 2).first }.uniq
225
+
226
+ agents = agent_ids.map { |agent_id| get_agent_metrics(agent_id) }
227
+
228
+ {
229
+ total_agents: agents.length,
230
+ healthy_agents: agents.count { |a| a[:health_status] == :healthy },
231
+ degraded_agents: agents.count { |a| a[:health_status] == :degraded },
232
+ unhealthy_agents: agents.count { |a| a[:health_status] == :unhealthy },
233
+ total_operations: agents.sum { |a| a[:total_operations] },
234
+ overall_success_rate: calculate_overall_success_rate(agents),
235
+ agents: agents.map do |agent|
236
+ {
237
+ agent_id: agent[:agent_id],
238
+ health_status: agent[:health_status],
239
+ success_rate: agent[:overall_success_rate],
240
+ total_operations: agent[:total_operations]
241
+ }
242
+ end
243
+ }
244
+ end
245
+
246
+ # Reset all metrics (useful for testing)
247
+ def reset!
248
+ @metrics.clear
249
+ @component_metrics.clear
250
+ end
251
+
252
+ # Reset metrics for specific agent
253
+ #
254
+ # @param agent_id [String] Agent identifier
255
+ def reset_agent!(agent_id)
256
+ @metrics.delete_if { |key, _| key.start_with?("#{agent_id}:") }
257
+ @component_metrics.delete_if { |key, _| key.start_with?("#{agent_id}:") }
258
+ ActiveMatrix.logger.info("Reset metrics for Matrix agent: #{agent_id}")
259
+ end
260
+
261
+ private
262
+
263
+ def setup_notification_subscribers
264
+ # Subscribe to ActiveMatrix events
265
+ ActiveSupport::Notifications.subscribe(/^activematrix\./) do |name, start, finish, _id, payload|
266
+ operation = name.sub('activematrix.', '')
267
+ duration_ms = ((finish - start) * 1000).round(2)
268
+
269
+ record_operation(
270
+ operation,
271
+ component: payload[:component] || 'Unknown',
272
+ agent_id: payload[:agent_id] || 'unknown',
273
+ status: payload[:status],
274
+ duration_ms: duration_ms,
275
+ error_type: payload[:error_category],
276
+ error_class: payload[:error_class],
277
+ user_id: payload[:user_id],
278
+ room_id: payload[:room_id]
279
+ )
280
+ end
281
+ end
282
+
283
+ def initialize_component_metrics(component, agent_id)
284
+ {
285
+ component: component,
286
+ agent_id: agent_id,
287
+ total_count: Concurrent::AtomicFixnum.new(0),
288
+ success_count: Concurrent::AtomicFixnum.new(0),
289
+ error_count: Concurrent::AtomicFixnum.new(0),
290
+ duration_stats: initialize_duration_stats,
291
+ created_at: Time.current
292
+ }
293
+ end
294
+
295
+ def initialize_operation_metrics(operation, component, agent_id)
296
+ {
297
+ operation: operation,
298
+ component: component,
299
+ agent_id: agent_id,
300
+ total_count: Concurrent::AtomicFixnum.new(0),
301
+ success_count: Concurrent::AtomicFixnum.new(0),
302
+ error_count: Concurrent::AtomicFixnum.new(0),
303
+ duration_stats: initialize_duration_stats,
304
+ error_breakdown: Concurrent::Hash.new,
305
+ recent_operations: Concurrent::Array.new,
306
+ created_at: Time.current,
307
+ last_operation_at: nil,
308
+ last_error_at: nil
309
+ }
310
+ end
311
+
312
+ def initialize_duration_stats
313
+ Concurrent::Hash.new.tap do |stats|
314
+ stats[:total] = Concurrent::AtomicFixnum.new(0)
315
+ stats[:count] = Concurrent::AtomicFixnum.new(0)
316
+ stats[:avg] = Concurrent::AtomicReference.new(0)
317
+ stats[:min] = Concurrent::AtomicReference.new(Float::INFINITY)
318
+ stats[:max] = Concurrent::AtomicReference.new(0)
319
+ stats[:p95] = Concurrent::AtomicReference.new(0)
320
+ stats[:values] = Concurrent::Array.new
321
+ end
322
+ end
323
+
324
+ def update_component_metrics(component_metrics, status, duration_ms)
325
+ component_metrics[:total_count].increment
326
+
327
+ case status
328
+ when 'success'
329
+ component_metrics[:success_count].increment
330
+ when 'error'
331
+ component_metrics[:error_count].increment
332
+ end
333
+
334
+ update_duration_stats(component_metrics[:duration_stats], duration_ms)
335
+ end
336
+
337
+ def update_duration_stats(stats, duration_ms)
338
+ stats[:total].increment((duration_ms * 100).to_i) # Store as hundredths to preserve decimals
339
+ count = stats[:count].increment
340
+ stats[:avg].set((stats[:total].value.to_f / count / 100).round(2))
341
+
342
+ # Update min atomically
343
+ stats[:min].update { |current| [current, duration_ms].min }
344
+
345
+ # Update max atomically
346
+ stats[:max].update { |current| [current, duration_ms].max }
347
+
348
+ # Keep sliding window of durations for percentile calculation
349
+ stats[:values] << duration_ms
350
+ stats[:values].shift if stats[:values].size > 1000
351
+
352
+ # Calculate P95
353
+ values_array = stats[:values].to_a
354
+ if values_array.size >= 20
355
+ sorted = values_array.sort
356
+ p95_index = (sorted.length * 0.95).ceil - 1
357
+ stats[:p95].set(sorted[p95_index].round(2))
358
+ elsif values_array.size.positive?
359
+ # For small samples, use the max value as P95
360
+ stats[:p95].set(values_array.max.round(2))
361
+ end
362
+ end
363
+
364
+ def calculate_success_rate(metrics)
365
+ total = metrics[:total_count].value
366
+ return 0 if total.zero?
367
+
368
+ ((metrics[:success_count].value.to_f / total) * 100).round(2)
369
+ end
370
+
371
+ def calculate_agent_health(total_operations, success_count)
372
+ return :unknown if total_operations < 10 # Need minimum operations
373
+
374
+ success_rate = (success_count.to_f / total_operations * 100)
375
+
376
+ if success_rate >= 95
377
+ :healthy
378
+ elsif success_rate >= 80
379
+ :degraded
380
+ else
381
+ :unhealthy
382
+ end
383
+ end
384
+
385
+ def calculate_overall_success_rate(agents)
386
+ return 0 if agents.empty?
387
+
388
+ total_ops = agents.sum { |a| a[:total_operations] }
389
+ return 0 if total_ops.zero?
390
+
391
+ total_successes = agents.sum { |a| a[:total_successes] }
392
+ ((total_successes.to_f / total_ops) * 100).round(2)
393
+ end
394
+
395
+ def serialize_error_breakdown(error_breakdown)
396
+ error_breakdown.transform_values(&:value)
397
+ end
398
+
399
+ def operation_summary(metrics)
400
+ {
401
+ total_count: metrics[:total_count].value,
402
+ success_count: metrics[:success_count].value,
403
+ error_count: metrics[:error_count].value,
404
+ success_rate: calculate_success_rate(metrics),
405
+ avg_duration_ms: metrics[:duration_stats][:avg].value,
406
+ p95_duration_ms: metrics[:duration_stats][:p95].value
407
+ }
408
+ end
409
+
410
+ def default_component_metrics
411
+ {
412
+ component: 'Unknown',
413
+ agent_id: 'unknown',
414
+ total_operations: 0,
415
+ success_count: 0,
416
+ error_count: 0,
417
+ success_rate: 0,
418
+ avg_duration_ms: 0,
419
+ p95_duration_ms: 0,
420
+ operations: {}
421
+ }
422
+ end
423
+ end
424
+ end