activematrix 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +96 -28
- data/app/models/active_matrix/agent.rb +36 -1
- data/app/models/active_matrix/agent_store.rb +29 -0
- data/app/models/active_matrix/application_record.rb +8 -0
- data/app/models/active_matrix/chat_session.rb +29 -0
- data/app/models/active_matrix/knowledge_base.rb +26 -0
- data/exe/activematrix +7 -0
- data/lib/active_matrix/agent_manager.rb +160 -121
- data/lib/active_matrix/agent_registry.rb +25 -21
- data/lib/active_matrix/api.rb +8 -2
- data/lib/active_matrix/async_query.rb +58 -0
- data/lib/active_matrix/bot/base.rb +3 -3
- data/lib/active_matrix/bot/builtin_commands.rb +188 -0
- data/lib/active_matrix/bot/command_parser.rb +175 -0
- data/lib/active_matrix/cli.rb +273 -0
- data/lib/active_matrix/client.rb +21 -6
- data/lib/active_matrix/client_pool.rb +38 -27
- data/lib/active_matrix/daemon/probe_server.rb +118 -0
- data/lib/active_matrix/daemon/signal_handler.rb +156 -0
- data/lib/active_matrix/daemon/worker.rb +109 -0
- data/lib/active_matrix/daemon.rb +236 -0
- data/lib/active_matrix/engine.rb +5 -1
- data/lib/active_matrix/errors.rb +1 -1
- data/lib/active_matrix/event_router.rb +61 -49
- data/lib/active_matrix/events.rb +1 -0
- data/lib/active_matrix/instrumentation.rb +148 -0
- data/lib/active_matrix/memory/agent_memory.rb +7 -21
- data/lib/active_matrix/memory/conversation_memory.rb +4 -20
- data/lib/active_matrix/memory/global_memory.rb +15 -30
- data/lib/active_matrix/message_dispatcher.rb +197 -0
- data/lib/active_matrix/metrics.rb +424 -0
- data/lib/active_matrix/presence_manager.rb +181 -0
- data/lib/active_matrix/telemetry.rb +134 -0
- data/lib/active_matrix/version.rb +1 -1
- data/lib/active_matrix.rb +12 -2
- data/lib/generators/active_matrix/install/install_generator.rb +3 -15
- data/lib/generators/active_matrix/install/templates/README +5 -2
- metadata +141 -45
- data/lib/active_matrix/protocols/cs/message_relationships.rb +0 -318
- data/lib/generators/active_matrix/install/templates/create_agent_memories.rb +0 -17
- data/lib/generators/active_matrix/install/templates/create_conversation_contexts.rb +0 -21
- data/lib/generators/active_matrix/install/templates/create_global_memories.rb +0 -20
- data/lib/generators/active_matrix/install/templates/create_matrix_agents.rb +0 -26
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'daemon/signal_handler'
|
|
4
|
+
require_relative 'daemon/probe_server'
|
|
5
|
+
require_relative 'daemon/worker'
|
|
6
|
+
require_relative 'telemetry'
|
|
7
|
+
|
|
8
|
+
module ActiveMatrix
|
|
9
|
+
# Main daemon coordinator for managing Matrix bot agents
|
|
10
|
+
#
|
|
11
|
+
# Responsibilities:
|
|
12
|
+
# - Fork and manage worker processes
|
|
13
|
+
# - Distribute agents across workers
|
|
14
|
+
# - Handle signals (TERM, INT, HUP, USR1, USR2)
|
|
15
|
+
# - Run HTTP health probe server
|
|
16
|
+
# - Monitor worker health and restart on crash
|
|
17
|
+
#
|
|
18
|
+
class Daemon
|
|
19
|
+
attr_reader :workers_count, :probe_port, :probe_host, :agent_names, :start_time
|
|
20
|
+
|
|
21
|
+
def initialize(workers: 1, probe_port: 3042, probe_host: '127.0.0.1', agent_names: nil)
|
|
22
|
+
@workers_count = workers
|
|
23
|
+
@probe_port = probe_port
|
|
24
|
+
@probe_host = probe_host
|
|
25
|
+
@agent_names = agent_names
|
|
26
|
+
@workers = {} # { pid => { index:, agent_ids: } }
|
|
27
|
+
@running = false
|
|
28
|
+
@start_time = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def worker_pids
|
|
32
|
+
@workers.keys
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def run
|
|
36
|
+
@start_time = Time.zone.now
|
|
37
|
+
@running = true
|
|
38
|
+
|
|
39
|
+
logger.info "Starting ActiveMatrix daemon (workers: #{workers_count}, probe: #{probe_host}:#{probe_port})"
|
|
40
|
+
|
|
41
|
+
# Initialize OpenTelemetry if available
|
|
42
|
+
logger.info 'OpenTelemetry tracing enabled' if Telemetry.configure!
|
|
43
|
+
|
|
44
|
+
install_signal_handlers
|
|
45
|
+
start_probe_server
|
|
46
|
+
start_workers
|
|
47
|
+
|
|
48
|
+
monitor_loop
|
|
49
|
+
ensure
|
|
50
|
+
shutdown
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def shutdown
|
|
54
|
+
return unless @running
|
|
55
|
+
|
|
56
|
+
@running = false
|
|
57
|
+
logger.info 'Shutting down ActiveMatrix daemon...'
|
|
58
|
+
|
|
59
|
+
stop_probe_server
|
|
60
|
+
stop_workers
|
|
61
|
+
|
|
62
|
+
Telemetry.shutdown
|
|
63
|
+
|
|
64
|
+
logger.info 'ActiveMatrix daemon stopped'
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def status
|
|
68
|
+
{
|
|
69
|
+
status: @running ? 'ok' : 'stopping',
|
|
70
|
+
uptime: @start_time ? (Time.zone.now - @start_time).to_i : 0,
|
|
71
|
+
workers: @workers.size,
|
|
72
|
+
agents: aggregate_agent_status
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def logger
|
|
79
|
+
ActiveMatrix.logger
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def install_signal_handlers
|
|
83
|
+
@signal_handler = SignalHandler.new(self)
|
|
84
|
+
@signal_handler.install
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def start_probe_server
|
|
88
|
+
@probe_server = ProbeServer.new(
|
|
89
|
+
host: probe_host,
|
|
90
|
+
port: probe_port,
|
|
91
|
+
daemon: self
|
|
92
|
+
)
|
|
93
|
+
@probe_server.start
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def stop_probe_server
|
|
97
|
+
@probe_server&.stop
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def start_workers
|
|
101
|
+
agents = load_agents
|
|
102
|
+
agent_groups = distribute_agents(agents, workers_count)
|
|
103
|
+
|
|
104
|
+
agent_groups.each_with_index do |agent_ids, index|
|
|
105
|
+
next if agent_ids.empty?
|
|
106
|
+
|
|
107
|
+
pid = fork_worker(index, agent_ids)
|
|
108
|
+
@workers[pid] = { index: index, agent_ids: agent_ids }
|
|
109
|
+
logger.info "Started worker #{index} (PID: #{pid}) with #{agent_ids.size} agents"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def fork_worker(index, agent_ids)
|
|
114
|
+
fork do
|
|
115
|
+
# Reset signal handlers in child
|
|
116
|
+
Signal.trap('TERM') { exit } # rubocop:disable Rails/Exit
|
|
117
|
+
Signal.trap('INT') { exit } # rubocop:disable Rails/Exit
|
|
118
|
+
|
|
119
|
+
# Close parent's probe server socket
|
|
120
|
+
@probe_server&.stop
|
|
121
|
+
|
|
122
|
+
# Run worker
|
|
123
|
+
worker = Worker.new(index: index, agent_ids: agent_ids)
|
|
124
|
+
worker.run
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def stop_workers
|
|
129
|
+
timeout = ActiveMatrix.config.shutdown_timeout || 30
|
|
130
|
+
|
|
131
|
+
# Send TERM to all workers
|
|
132
|
+
@workers.each_key do |pid|
|
|
133
|
+
Process.kill('TERM', pid)
|
|
134
|
+
rescue Errno::ESRCH
|
|
135
|
+
# Already dead
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Wait for graceful shutdown
|
|
139
|
+
deadline = Time.zone.now + timeout
|
|
140
|
+
while @workers.any? && Time.zone.now < deadline
|
|
141
|
+
@workers.reject! do |pid, _|
|
|
142
|
+
Process.waitpid(pid, Process::WNOHANG)
|
|
143
|
+
rescue Errno::ECHILD
|
|
144
|
+
true
|
|
145
|
+
end
|
|
146
|
+
sleep 0.5 if @workers.any?
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Force kill remaining
|
|
150
|
+
@workers.each_key do |pid|
|
|
151
|
+
logger.warn "Force killing worker #{pid}"
|
|
152
|
+
Process.kill('KILL', pid)
|
|
153
|
+
Process.waitpid(pid)
|
|
154
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
|
155
|
+
# Already dead
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
@workers.clear
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def monitor_loop
|
|
162
|
+
while @running
|
|
163
|
+
# Reap dead children
|
|
164
|
+
reap_workers
|
|
165
|
+
|
|
166
|
+
# Restart crashed workers
|
|
167
|
+
restart_crashed_workers if @running
|
|
168
|
+
|
|
169
|
+
sleep 1
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def reap_workers
|
|
174
|
+
@crashed_workers = []
|
|
175
|
+
|
|
176
|
+
loop do
|
|
177
|
+
pid = Process.waitpid(-1, Process::WNOHANG)
|
|
178
|
+
break unless pid
|
|
179
|
+
|
|
180
|
+
if @workers.key?(pid)
|
|
181
|
+
worker_info = @workers.delete(pid)
|
|
182
|
+
logger.warn "Worker #{worker_info[:index]} (PID: #{pid}) exited"
|
|
183
|
+
@crashed_workers << worker_info
|
|
184
|
+
end
|
|
185
|
+
rescue Errno::ECHILD
|
|
186
|
+
break
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def restart_crashed_workers
|
|
191
|
+
return if @crashed_workers.blank?
|
|
192
|
+
|
|
193
|
+
@crashed_workers.each do |worker_info|
|
|
194
|
+
break unless @running
|
|
195
|
+
|
|
196
|
+
# Restart with same agent assignment
|
|
197
|
+
pid = fork_worker(worker_info[:index], worker_info[:agent_ids])
|
|
198
|
+
@workers[pid] = worker_info.merge(pid: pid)
|
|
199
|
+
logger.info "Restarted worker #{worker_info[:index]} (PID: #{pid}) with #{worker_info[:agent_ids].size} agents"
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
@crashed_workers.clear
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def load_agents
|
|
206
|
+
scope = ActiveMatrix::Agent.where.not(state: :offline)
|
|
207
|
+
scope = scope.where(name: agent_names) if agent_names.present?
|
|
208
|
+
scope.pluck(:id)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def distribute_agents(agent_ids, num_workers)
|
|
212
|
+
return [agent_ids] if num_workers <= 1
|
|
213
|
+
|
|
214
|
+
# Round-robin distribution
|
|
215
|
+
groups = Array.new(num_workers) { [] }
|
|
216
|
+
agent_ids.each_with_index do |id, index|
|
|
217
|
+
groups[index % num_workers] << id
|
|
218
|
+
end
|
|
219
|
+
groups
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def aggregate_agent_status
|
|
223
|
+
# In multi-process mode, we'd need IPC to get real status
|
|
224
|
+
# For now, query the database
|
|
225
|
+
agents = ActiveMatrix::Agent.all
|
|
226
|
+
|
|
227
|
+
{
|
|
228
|
+
total: agents.count,
|
|
229
|
+
online: agents.where(state: %i[online_idle online_busy]).count,
|
|
230
|
+
connecting: agents.where(state: :connecting).count,
|
|
231
|
+
error: agents.where(state: :error).count,
|
|
232
|
+
offline: agents.where(state: :offline).count
|
|
233
|
+
}
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
data/lib/active_matrix/engine.rb
CHANGED
|
@@ -7,8 +7,12 @@ module ActiveMatrix
|
|
|
7
7
|
engine_name 'activematrix'
|
|
8
8
|
|
|
9
9
|
initializer 'activematrix.configure_logger' do
|
|
10
|
-
# Configure logger
|
|
11
10
|
ActiveMatrix.logger = Rails.logger
|
|
12
11
|
end
|
|
12
|
+
|
|
13
|
+
initializer 'activematrix.initialize_metrics', after: 'activematrix.configure_logger' do
|
|
14
|
+
# Eagerly initialize Metrics singleton to subscribe to notifications
|
|
15
|
+
ActiveMatrix::Metrics.instance
|
|
16
|
+
end
|
|
13
17
|
end
|
|
14
18
|
end
|
data/lib/active_matrix/errors.rb
CHANGED
|
@@ -54,7 +54,7 @@ module ActiveMatrix
|
|
|
54
54
|
# An error raised when errors occur in the connection layer
|
|
55
55
|
class MatrixConnectionError < MatrixError
|
|
56
56
|
def self.class_by_code(code)
|
|
57
|
-
return MatrixTimeoutError if code == 504
|
|
57
|
+
return MatrixTimeoutError if code.to_i == 504
|
|
58
58
|
|
|
59
59
|
MatrixConnectionError
|
|
60
60
|
end
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'singleton'
|
|
4
|
-
require '
|
|
4
|
+
require 'async'
|
|
5
|
+
require 'async/queue'
|
|
5
6
|
|
|
6
7
|
module ActiveMatrix
|
|
7
|
-
# Routes Matrix events to appropriate agents
|
|
8
|
+
# Routes Matrix events to appropriate agents using async fibers
|
|
8
9
|
class EventRouter
|
|
9
10
|
include Singleton
|
|
10
11
|
include ActiveMatrix::Logging
|
|
11
12
|
|
|
12
13
|
def initialize
|
|
13
|
-
@routes =
|
|
14
|
-
@
|
|
14
|
+
@routes = []
|
|
15
|
+
@mutex = Mutex.new
|
|
16
|
+
@event_queue = nil
|
|
15
17
|
@processing = false
|
|
16
|
-
@
|
|
18
|
+
@worker_task = nil
|
|
17
19
|
end
|
|
18
20
|
|
|
19
21
|
# Register an event route
|
|
@@ -28,8 +30,10 @@ module ActiveMatrix
|
|
|
28
30
|
handler: block
|
|
29
31
|
}
|
|
30
32
|
|
|
31
|
-
@
|
|
32
|
-
|
|
33
|
+
@mutex.synchronize do
|
|
34
|
+
@routes << route
|
|
35
|
+
@routes.sort_by! { |r| -r[:priority] } # Higher priority first
|
|
36
|
+
end
|
|
33
37
|
|
|
34
38
|
logger.debug "Registered route: #{route.except(:handler).inspect}"
|
|
35
39
|
route[:id]
|
|
@@ -37,29 +41,35 @@ module ActiveMatrix
|
|
|
37
41
|
|
|
38
42
|
# Unregister a route
|
|
39
43
|
def unregister_route(route_id)
|
|
40
|
-
@
|
|
44
|
+
@mutex.synchronize do
|
|
45
|
+
@routes.delete_if { |route| route[:id] == route_id }
|
|
46
|
+
end
|
|
41
47
|
end
|
|
42
48
|
|
|
43
49
|
# Clear all routes for an agent
|
|
44
50
|
def clear_agent_routes(agent_id)
|
|
45
|
-
@
|
|
51
|
+
@mutex.synchronize do
|
|
52
|
+
@routes.delete_if { |route| route[:agent_id] == agent_id }
|
|
53
|
+
end
|
|
46
54
|
end
|
|
47
55
|
|
|
48
56
|
# Route an event to appropriate agents
|
|
49
57
|
def route_event(event)
|
|
50
|
-
return unless @processing
|
|
58
|
+
return unless @processing && @event_queue
|
|
51
59
|
|
|
52
60
|
# Queue the event for processing
|
|
53
|
-
@event_queue
|
|
61
|
+
@event_queue.enqueue(event)
|
|
54
62
|
end
|
|
55
63
|
|
|
56
|
-
# Start the event router
|
|
64
|
+
# Start the event router (call from within async context)
|
|
57
65
|
def start
|
|
58
66
|
return if @processing
|
|
59
67
|
|
|
60
68
|
@processing = true
|
|
61
|
-
@
|
|
62
|
-
|
|
69
|
+
@event_queue = Async::Queue.new
|
|
70
|
+
|
|
71
|
+
@worker_task = Async(transient: true) do |task|
|
|
72
|
+
task.annotate 'event-router'
|
|
63
73
|
process_events
|
|
64
74
|
end
|
|
65
75
|
|
|
@@ -69,20 +79,22 @@ module ActiveMatrix
|
|
|
69
79
|
# Stop the event router
|
|
70
80
|
def stop
|
|
71
81
|
@processing = false
|
|
72
|
-
@
|
|
73
|
-
@event_queue
|
|
82
|
+
@worker_task&.stop
|
|
83
|
+
@event_queue = nil
|
|
74
84
|
|
|
75
85
|
logger.info 'Event router stopped'
|
|
76
86
|
end
|
|
77
87
|
|
|
78
88
|
# Check if router is running
|
|
79
89
|
def running?
|
|
80
|
-
@processing && @
|
|
90
|
+
@processing && @worker_task&.alive?
|
|
81
91
|
end
|
|
82
92
|
|
|
83
93
|
# Get routes for debugging
|
|
84
94
|
def routes_summary
|
|
85
|
-
@
|
|
95
|
+
@mutex.synchronize do
|
|
96
|
+
@routes.map { |r| r.except(:handler) }
|
|
97
|
+
end
|
|
86
98
|
end
|
|
87
99
|
|
|
88
100
|
# Broadcast an event to all agents
|
|
@@ -99,9 +111,7 @@ module ActiveMatrix
|
|
|
99
111
|
def process_events
|
|
100
112
|
while @processing
|
|
101
113
|
begin
|
|
102
|
-
|
|
103
|
-
event = nil
|
|
104
|
-
Timeout.timeout(1) { event = @event_queue.pop }
|
|
114
|
+
event = @event_queue.dequeue
|
|
105
115
|
|
|
106
116
|
next unless event
|
|
107
117
|
|
|
@@ -113,36 +123,40 @@ module ActiveMatrix
|
|
|
113
123
|
next
|
|
114
124
|
end
|
|
115
125
|
|
|
116
|
-
# Process routes in priority order
|
|
126
|
+
# Process routes in priority order (each in its own fiber)
|
|
117
127
|
matching_routes.each do |route|
|
|
118
|
-
|
|
128
|
+
Async do
|
|
129
|
+
process_route(route, event)
|
|
130
|
+
end
|
|
119
131
|
end
|
|
120
|
-
rescue
|
|
121
|
-
|
|
132
|
+
rescue Async::Stop
|
|
133
|
+
break
|
|
122
134
|
rescue StandardError => e
|
|
123
135
|
logger.error "Event router error: #{e.message}"
|
|
124
|
-
logger.error e.backtrace.join("\n")
|
|
136
|
+
logger.error e.backtrace.first(10).join("\n")
|
|
125
137
|
end
|
|
126
138
|
end
|
|
127
139
|
end
|
|
128
140
|
|
|
129
141
|
def find_matching_routes(event)
|
|
130
|
-
@
|
|
131
|
-
|
|
132
|
-
|
|
142
|
+
@mutex.synchronize do
|
|
143
|
+
@routes.select do |route|
|
|
144
|
+
# Check room match
|
|
145
|
+
next false if route[:room_id] && route[:room_id] != event[:room_id]
|
|
133
146
|
|
|
134
|
-
|
|
135
|
-
|
|
147
|
+
# Check event type match
|
|
148
|
+
next false if route[:event_type] && route[:event_type] != event[:type]
|
|
136
149
|
|
|
137
|
-
|
|
138
|
-
|
|
150
|
+
# Check user match
|
|
151
|
+
next false if route[:user_id] && route[:user_id] != event[:sender]
|
|
139
152
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
153
|
+
# Check if agent is running
|
|
154
|
+
registry = AgentRegistry.instance
|
|
155
|
+
agent_entry = registry.get(route[:agent_id])
|
|
156
|
+
next false unless agent_entry
|
|
144
157
|
|
|
145
|
-
|
|
158
|
+
true
|
|
159
|
+
end
|
|
146
160
|
end
|
|
147
161
|
end
|
|
148
162
|
|
|
@@ -154,18 +168,16 @@ module ActiveMatrix
|
|
|
154
168
|
|
|
155
169
|
bot = agent_entry[:instance]
|
|
156
170
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
bot._handle_event(event)
|
|
164
|
-
end
|
|
165
|
-
rescue StandardError => e
|
|
166
|
-
logger.error "Error processing route for agent #{agent_entry[:record].name}: #{e.message}"
|
|
167
|
-
logger.error e.backtrace.first(5).join("\n")
|
|
171
|
+
if route[:handler]
|
|
172
|
+
# Custom handler
|
|
173
|
+
route[:handler].call(bot, event)
|
|
174
|
+
elsif bot.respond_to?(:_handle_event)
|
|
175
|
+
# Default handling
|
|
176
|
+
bot._handle_event(event)
|
|
168
177
|
end
|
|
178
|
+
rescue StandardError => e
|
|
179
|
+
logger.error "Error processing route for agent #{agent_entry[:record].name}: #{e.message}"
|
|
180
|
+
logger.error e.backtrace.first(5).join("\n")
|
|
169
181
|
end
|
|
170
182
|
end
|
|
171
183
|
|
data/lib/active_matrix/events.rb
CHANGED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/concern'
|
|
4
|
+
require 'active_support/notifications'
|
|
5
|
+
require 'timeout'
|
|
6
|
+
require 'socket'
|
|
7
|
+
require 'json'
|
|
8
|
+
|
|
9
|
+
module ActiveMatrix
|
|
10
|
+
# Instrumentation module for Matrix bot operations
|
|
11
|
+
# Provides ActiveSupport::Notifications events and structured logging
|
|
12
|
+
#
|
|
13
|
+
# @example Include in a class
|
|
14
|
+
# class MyService
|
|
15
|
+
# include ActiveMatrix::Instrumentation
|
|
16
|
+
#
|
|
17
|
+
# def perform
|
|
18
|
+
# instrument_operation(:my_operation, room_id: '!abc:matrix.org') do
|
|
19
|
+
# # ... operation code
|
|
20
|
+
# end
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
module Instrumentation
|
|
25
|
+
extend ActiveSupport::Concern
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
# Instrument a Matrix bot operation with timing and result tracking
|
|
30
|
+
#
|
|
31
|
+
# @param operation [Symbol, String] Operation name (e.g., :send_message, :sync)
|
|
32
|
+
# @param metadata [Hash] Additional context to include in the event
|
|
33
|
+
# @yield Block to execute and instrument
|
|
34
|
+
# @return [Object] Result of the block
|
|
35
|
+
# @raise [StandardError] Re-raises any exception after logging
|
|
36
|
+
def instrument_operation(operation, **metadata)
|
|
37
|
+
event_data = metadata.merge(
|
|
38
|
+
operation: operation,
|
|
39
|
+
agent_id: respond_to?(:agent_id) ? agent_id : nil,
|
|
40
|
+
component: self.class.name&.demodulize || 'Unknown'
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
ActiveSupport::Notifications.instrument(
|
|
44
|
+
"activematrix.#{operation}",
|
|
45
|
+
event_data
|
|
46
|
+
) do |payload|
|
|
47
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
48
|
+
|
|
49
|
+
begin
|
|
50
|
+
result = yield
|
|
51
|
+
|
|
52
|
+
payload[:status] = 'success'
|
|
53
|
+
payload[:duration_ms] = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round(2)
|
|
54
|
+
payload[:result] = summarize_result(result)
|
|
55
|
+
|
|
56
|
+
log_operation_result(operation, 'SUCCESS', payload)
|
|
57
|
+
|
|
58
|
+
result
|
|
59
|
+
rescue StandardError => e
|
|
60
|
+
payload[:status] = 'error'
|
|
61
|
+
payload[:duration_ms] = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round(2)
|
|
62
|
+
payload[:error_class] = e.class.name
|
|
63
|
+
payload[:error_message] = e.message
|
|
64
|
+
payload[:error_category] = classify_error(e)
|
|
65
|
+
|
|
66
|
+
log_operation_result(operation, 'ERROR', payload)
|
|
67
|
+
|
|
68
|
+
raise
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Classify errors for better monitoring and alerting
|
|
74
|
+
#
|
|
75
|
+
# @param error [StandardError] The error to classify
|
|
76
|
+
# @return [String] Error category
|
|
77
|
+
def classify_error(error)
|
|
78
|
+
case error
|
|
79
|
+
when Timeout::Error
|
|
80
|
+
'timeout'
|
|
81
|
+
when SocketError, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ENETUNREACH
|
|
82
|
+
'network'
|
|
83
|
+
when JSON::ParserError
|
|
84
|
+
'parse'
|
|
85
|
+
when OpenSSL::SSL::SSLError
|
|
86
|
+
'ssl'
|
|
87
|
+
else
|
|
88
|
+
case error.class.name
|
|
89
|
+
when /ActiveMatrix::Errors::MatrixConnectionError/
|
|
90
|
+
'matrix_connection'
|
|
91
|
+
when /ActiveMatrix::Errors::MatrixRequestError/
|
|
92
|
+
'matrix_request'
|
|
93
|
+
when /ActiveMatrix::Errors::MatrixNotAuthorizedError/
|
|
94
|
+
'matrix_auth'
|
|
95
|
+
when /ActiveMatrix::Errors::MatrixForbiddenError/
|
|
96
|
+
'matrix_forbidden'
|
|
97
|
+
when /ActiveMatrix::Errors::MatrixNotFoundError/
|
|
98
|
+
'matrix_not_found'
|
|
99
|
+
when /PG::/
|
|
100
|
+
'database'
|
|
101
|
+
else
|
|
102
|
+
'application'
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Log operation result with structured data
|
|
108
|
+
#
|
|
109
|
+
# @param operation [Symbol, String] Operation name
|
|
110
|
+
# @param status [String] 'SUCCESS' or 'ERROR'
|
|
111
|
+
# @param data [Hash] Event payload data
|
|
112
|
+
def log_operation_result(operation, status, data)
|
|
113
|
+
component = data[:component] || 'Unknown'
|
|
114
|
+
agent_id = data[:agent_id] || 'unknown'
|
|
115
|
+
|
|
116
|
+
message = "#{operation} - #{status}"
|
|
117
|
+
message += " (#{data[:duration_ms]}ms)" if data[:duration_ms]
|
|
118
|
+
message = "[#{component}][agent:#{agent_id}] #{message}"
|
|
119
|
+
|
|
120
|
+
if status == 'ERROR'
|
|
121
|
+
ActiveMatrix.logger.error("#{message}: #{data[:error_class]} - #{data[:error_message]}")
|
|
122
|
+
else
|
|
123
|
+
ActiveMatrix.logger.debug(message)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Summarize result for logging without exposing sensitive data
|
|
128
|
+
#
|
|
129
|
+
# @param result [Object] The result to summarize
|
|
130
|
+
# @return [String] Human-readable summary
|
|
131
|
+
def summarize_result(result)
|
|
132
|
+
case result
|
|
133
|
+
when String
|
|
134
|
+
result.length > 100 ? "#{result[0...97]}..." : result
|
|
135
|
+
when Numeric, true, false
|
|
136
|
+
result.to_s
|
|
137
|
+
when nil
|
|
138
|
+
'nil'
|
|
139
|
+
when Hash
|
|
140
|
+
"Hash(#{result.keys.size} keys)"
|
|
141
|
+
when Array
|
|
142
|
+
"Array(#{result.size} items)"
|
|
143
|
+
else
|
|
144
|
+
result.class.name
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -14,19 +14,15 @@ module ActiveMatrix
|
|
|
14
14
|
# Get a value from agent memory
|
|
15
15
|
def get(key)
|
|
16
16
|
fetch_with_cache(key) do
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
memory = @agent.agent_memories.active.find_by(key: key)
|
|
17
|
+
memory = @agent.agent_stores.active.find_by(key: key)
|
|
20
18
|
memory&.value
|
|
21
19
|
end
|
|
22
20
|
end
|
|
23
21
|
|
|
24
22
|
# Set a value in agent memory
|
|
25
23
|
def set(key, value, expires_in: nil)
|
|
26
|
-
return false unless defined?(::AgentMemory)
|
|
27
|
-
|
|
28
24
|
write_through(key, value, expires_in: expires_in) do
|
|
29
|
-
memory = @agent.
|
|
25
|
+
memory = @agent.agent_stores.find_or_initialize_by(key: key)
|
|
30
26
|
memory.value = value
|
|
31
27
|
memory.expires_at = expires_in.present? ? Time.current + expires_in : nil
|
|
32
28
|
memory.save!
|
|
@@ -35,43 +31,33 @@ module ActiveMatrix
|
|
|
35
31
|
|
|
36
32
|
# Check if a key exists
|
|
37
33
|
def exists?(key)
|
|
38
|
-
return false unless defined?(::AgentMemory)
|
|
39
|
-
|
|
40
34
|
if @cache_enabled && Rails.cache.exist?(cache_key(key))
|
|
41
35
|
true
|
|
42
36
|
else
|
|
43
|
-
@agent.
|
|
37
|
+
@agent.agent_stores.active.exists?(key: key)
|
|
44
38
|
end
|
|
45
39
|
end
|
|
46
40
|
|
|
47
41
|
# Delete a key
|
|
48
42
|
def delete(key)
|
|
49
|
-
return false unless defined?(::AgentMemory)
|
|
50
|
-
|
|
51
43
|
delete_through(key) do
|
|
52
|
-
@agent.
|
|
44
|
+
@agent.agent_stores.where(key: key).destroy_all.any?
|
|
53
45
|
end
|
|
54
46
|
end
|
|
55
47
|
|
|
56
48
|
# Get all keys
|
|
57
49
|
def keys
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@agent.agent_memories.active.pluck(:key)
|
|
50
|
+
AsyncQuery.async_pluck(@agent.agent_stores.active, :key)
|
|
61
51
|
end
|
|
62
52
|
|
|
63
53
|
# Get all memory as hash
|
|
64
54
|
def all
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
@agent.agent_memories.active.pluck(:key, :value).to_h
|
|
55
|
+
AsyncQuery.async_pluck(@agent.agent_stores.active, :key, :value).to_h
|
|
68
56
|
end
|
|
69
57
|
|
|
70
58
|
# Clear all agent memory
|
|
71
59
|
def clear!
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@agent.agent_memories.destroy_all
|
|
60
|
+
@agent.agent_stores.destroy_all
|
|
75
61
|
|
|
76
62
|
# Clear cache entries
|
|
77
63
|
keys.each { |key| Rails.cache.delete(cache_key(key)) } if @cache_enabled
|