activematrix 0.0.5 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +96 -28
- data/app/jobs/active_matrix/application_job.rb +11 -0
- data/app/models/active_matrix/agent/jobs/memory_reaper.rb +87 -0
- data/app/models/active_matrix/agent.rb +166 -0
- data/app/models/active_matrix/agent_store.rb +80 -0
- data/app/models/active_matrix/application_record.rb +15 -0
- data/app/models/active_matrix/chat_session.rb +105 -0
- data/app/models/active_matrix/knowledge_base.rb +100 -0
- data/exe/activematrix +7 -0
- data/lib/active_matrix/agent_manager.rb +160 -121
- data/lib/active_matrix/agent_registry.rb +25 -21
- data/lib/active_matrix/api.rb +8 -2
- data/lib/active_matrix/async_query.rb +58 -0
- data/lib/active_matrix/bot/base.rb +3 -3
- data/lib/active_matrix/bot/builtin_commands.rb +188 -0
- data/lib/active_matrix/bot/command_parser.rb +175 -0
- data/lib/active_matrix/cli.rb +273 -0
- data/lib/active_matrix/client.rb +21 -6
- data/lib/active_matrix/client_pool.rb +38 -27
- data/lib/active_matrix/daemon/probe_server.rb +118 -0
- data/lib/active_matrix/daemon/signal_handler.rb +156 -0
- data/lib/active_matrix/daemon/worker.rb +109 -0
- data/lib/active_matrix/daemon.rb +236 -0
- data/lib/active_matrix/engine.rb +18 -0
- data/lib/active_matrix/errors.rb +1 -1
- data/lib/active_matrix/event_router.rb +61 -49
- data/lib/active_matrix/events.rb +1 -0
- data/lib/active_matrix/instrumentation.rb +148 -0
- data/lib/active_matrix/memory/agent_memory.rb +7 -21
- data/lib/active_matrix/memory/conversation_memory.rb +4 -20
- data/lib/active_matrix/memory/global_memory.rb +15 -30
- data/lib/active_matrix/message_dispatcher.rb +197 -0
- data/lib/active_matrix/metrics.rb +424 -0
- data/lib/active_matrix/presence_manager.rb +181 -0
- data/lib/active_matrix/railtie.rb +8 -0
- data/lib/active_matrix/telemetry.rb +134 -0
- data/lib/active_matrix/version.rb +1 -1
- data/lib/active_matrix.rb +18 -11
- data/lib/generators/active_matrix/install/install_generator.rb +3 -22
- data/lib/generators/active_matrix/install/templates/README +5 -2
- metadata +191 -31
- data/lib/generators/active_matrix/install/templates/agent_memory.rb +0 -47
- data/lib/generators/active_matrix/install/templates/conversation_context.rb +0 -72
- data/lib/generators/active_matrix/install/templates/create_agent_memories.rb +0 -17
- data/lib/generators/active_matrix/install/templates/create_conversation_contexts.rb +0 -21
- data/lib/generators/active_matrix/install/templates/create_global_memories.rb +0 -20
- data/lib/generators/active_matrix/install/templates/create_matrix_agents.rb +0 -26
- data/lib/generators/active_matrix/install/templates/global_memory.rb +0 -70
- data/lib/generators/active_matrix/install/templates/matrix_agent.rb +0 -127
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveMatrix
|
|
4
|
+
class Daemon
|
|
5
|
+
# Handles Unix signals for the daemon coordinator
|
|
6
|
+
#
|
|
7
|
+
# Signals:
|
|
8
|
+
# - TERM/INT: Graceful shutdown
|
|
9
|
+
# - HUP: Reload configuration and restart agents
|
|
10
|
+
# - USR1: Log rotation (reopen log files)
|
|
11
|
+
# - USR2: Dump debug information
|
|
12
|
+
#
|
|
13
|
+
class SignalHandler
|
|
14
|
+
SIGNALS = %w[TERM INT HUP USR1 USR2].freeze
|
|
15
|
+
|
|
16
|
+
attr_reader :daemon
|
|
17
|
+
|
|
18
|
+
def initialize(daemon)
|
|
19
|
+
@daemon = daemon
|
|
20
|
+
@self_pipe_reader, @self_pipe_writer = IO.pipe
|
|
21
|
+
@old_handlers = {}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def install
|
|
25
|
+
SIGNALS.each do |signal|
|
|
26
|
+
install_handler(signal)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Start signal processing thread
|
|
30
|
+
start_processor
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def uninstall
|
|
34
|
+
SIGNALS.each do |signal|
|
|
35
|
+
restore_handler(signal)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
@self_pipe_writer.close
|
|
39
|
+
@self_pipe_reader.close
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def install_handler(signal)
|
|
45
|
+
@old_handlers[signal] = Signal.trap(signal) do
|
|
46
|
+
# Write signal to pipe (non-blocking, safe in signal handler)
|
|
47
|
+
@self_pipe_writer.write_nonblock("#{signal}\n")
|
|
48
|
+
rescue Errno::EAGAIN, Errno::EWOULDBLOCK
|
|
49
|
+
# Pipe full, signal will be coalesced
|
|
50
|
+
end
|
|
51
|
+
rescue ArgumentError
|
|
52
|
+
# Signal not supported on this platform
|
|
53
|
+
logger.debug "Signal #{signal} not supported"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def restore_handler(signal)
|
|
57
|
+
return unless @old_handlers.key?(signal)
|
|
58
|
+
|
|
59
|
+
Signal.trap(signal, @old_handlers[signal])
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def start_processor
|
|
63
|
+
Thread.new do
|
|
64
|
+
Thread.current.name = 'activematrix-signal-processor'
|
|
65
|
+
|
|
66
|
+
loop do
|
|
67
|
+
# rubocop:disable Lint/IncompatibleIoSelectWithFiberScheduler
|
|
68
|
+
ready = IO.select([@self_pipe_reader], nil, nil, 1)
|
|
69
|
+
# rubocop:enable Lint/IncompatibleIoSelectWithFiberScheduler
|
|
70
|
+
next unless ready
|
|
71
|
+
|
|
72
|
+
signal = @self_pipe_reader.gets&.strip
|
|
73
|
+
next unless signal
|
|
74
|
+
|
|
75
|
+
handle_signal(signal)
|
|
76
|
+
rescue IOError
|
|
77
|
+
break
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def handle_signal(signal)
|
|
83
|
+
logger.info "Received signal: #{signal}"
|
|
84
|
+
|
|
85
|
+
case signal
|
|
86
|
+
when 'TERM', 'INT'
|
|
87
|
+
handle_shutdown
|
|
88
|
+
when 'HUP'
|
|
89
|
+
handle_reload
|
|
90
|
+
when 'USR1'
|
|
91
|
+
handle_log_rotation
|
|
92
|
+
when 'USR2'
|
|
93
|
+
handle_debug_dump
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def handle_shutdown
|
|
98
|
+
logger.info 'Initiating graceful shutdown...'
|
|
99
|
+
daemon.shutdown
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def handle_reload
|
|
103
|
+
logger.info 'Reloading agent configuration...'
|
|
104
|
+
# TODO: Implement reload
|
|
105
|
+
# 1. Query for new/removed agents
|
|
106
|
+
# 2. Send HUP to workers for them to reload
|
|
107
|
+
daemon.worker_pids.each do |pid|
|
|
108
|
+
Process.kill('HUP', pid)
|
|
109
|
+
rescue Errno::ESRCH
|
|
110
|
+
# Worker already dead
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def handle_log_rotation
|
|
115
|
+
logger.info 'Rotating log files...'
|
|
116
|
+
|
|
117
|
+
# Reopen stdout/stderr if they're files
|
|
118
|
+
if $stdout.respond_to?(:path) && $stdout.path
|
|
119
|
+
$stdout.reopen($stdout.path, 'a')
|
|
120
|
+
$stdout.sync = true
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
if $stderr.respond_to?(:path) && $stderr.path
|
|
124
|
+
$stderr.reopen($stderr.path, 'a')
|
|
125
|
+
$stderr.sync = true
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Signal workers to rotate their logs too
|
|
129
|
+
daemon.worker_pids.each do |pid|
|
|
130
|
+
Process.kill('USR1', pid)
|
|
131
|
+
rescue Errno::ESRCH
|
|
132
|
+
# Worker already dead
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def handle_debug_dump
|
|
137
|
+
logger.info 'Dumping debug information...'
|
|
138
|
+
|
|
139
|
+
# Dump current state
|
|
140
|
+
status = daemon.status
|
|
141
|
+
logger.info "Status: #{status.inspect}"
|
|
142
|
+
|
|
143
|
+
# Dump thread backtraces
|
|
144
|
+
Thread.list.each do |thread|
|
|
145
|
+
logger.info "Thread: #{thread.name || thread.object_id}"
|
|
146
|
+
logger.info thread.backtrace&.join("\n") || '(no backtrace)'
|
|
147
|
+
logger.info '---'
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def logger
|
|
152
|
+
ActiveMatrix.logger
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveMatrix
|
|
4
|
+
class Daemon
|
|
5
|
+
# Worker process that runs a subset of agents
|
|
6
|
+
#
|
|
7
|
+
# Each worker is a forked child process that:
|
|
8
|
+
# - Initializes its own AgentManager
|
|
9
|
+
# - Runs only assigned agents (by ID)
|
|
10
|
+
# - Handles signals for graceful shutdown
|
|
11
|
+
#
|
|
12
|
+
class Worker
|
|
13
|
+
attr_reader :index, :agent_ids
|
|
14
|
+
|
|
15
|
+
def initialize(index:, agent_ids:)
|
|
16
|
+
@index = index
|
|
17
|
+
@agent_ids = agent_ids
|
|
18
|
+
@running = false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def run
|
|
22
|
+
@running = true
|
|
23
|
+
|
|
24
|
+
set_process_name
|
|
25
|
+
install_signal_handlers
|
|
26
|
+
reconnect_database
|
|
27
|
+
|
|
28
|
+
logger.info "Worker #{index} starting with agents: #{agent_ids.join(', ')}"
|
|
29
|
+
|
|
30
|
+
run_agents
|
|
31
|
+
rescue StandardError => e
|
|
32
|
+
logger.error "Worker #{index} crashed: #{e.message}"
|
|
33
|
+
logger.error e.backtrace.join("\n")
|
|
34
|
+
raise
|
|
35
|
+
ensure
|
|
36
|
+
logger.info "Worker #{index} exiting"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def set_process_name
|
|
42
|
+
Process.setproctitle("activematrix[#{index}]: #{agent_ids.size} agents")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def install_signal_handlers
|
|
46
|
+
Signal.trap('TERM') do
|
|
47
|
+
@running = false
|
|
48
|
+
AgentManager.instance.stop_all
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
Signal.trap('INT') do
|
|
52
|
+
@running = false
|
|
53
|
+
AgentManager.instance.stop_all
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
Signal.trap('HUP') do
|
|
57
|
+
# Reload - restart agents with new config
|
|
58
|
+
logger.info "Worker #{index} received HUP, reloading..."
|
|
59
|
+
# For now, just log. Full reload would require:
|
|
60
|
+
# 1. Stop all agents
|
|
61
|
+
# 2. Re-query DB for agent list
|
|
62
|
+
# 3. Start new agents
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
Signal.trap('USR1') do
|
|
66
|
+
# Log rotation
|
|
67
|
+
if $stdout.respond_to?(:path) && $stdout.path
|
|
68
|
+
$stdout.reopen($stdout.path, 'a')
|
|
69
|
+
$stdout.sync = true
|
|
70
|
+
end
|
|
71
|
+
if $stderr.respond_to?(:path) && $stderr.path
|
|
72
|
+
$stderr.reopen($stderr.path, 'a')
|
|
73
|
+
$stderr.sync = true
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def reconnect_database
|
|
79
|
+
# After fork, we need fresh database connections
|
|
80
|
+
ActiveRecord::Base.connection_handler.clear_active_connections!
|
|
81
|
+
ActiveRecord::Base.establish_connection
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def run_agents
|
|
85
|
+
manager = AgentManager.instance
|
|
86
|
+
|
|
87
|
+
# Install signal handlers for the manager
|
|
88
|
+
manager.install_signal_handlers!
|
|
89
|
+
|
|
90
|
+
# Load only our assigned agents
|
|
91
|
+
agents = ActiveMatrix::Agent.where(id: agent_ids)
|
|
92
|
+
|
|
93
|
+
if agents.empty?
|
|
94
|
+
logger.warn "Worker #{index} has no agents to run"
|
|
95
|
+
return
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Start the manager with only our agents
|
|
99
|
+
Sync do
|
|
100
|
+
manager.start_agents(agents)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def logger
|
|
105
|
+
ActiveMatrix.logger
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'daemon/signal_handler'
|
|
4
|
+
require_relative 'daemon/probe_server'
|
|
5
|
+
require_relative 'daemon/worker'
|
|
6
|
+
require_relative 'telemetry'
|
|
7
|
+
|
|
8
|
+
module ActiveMatrix
|
|
9
|
+
# Main daemon coordinator for managing Matrix bot agents
|
|
10
|
+
#
|
|
11
|
+
# Responsibilities:
|
|
12
|
+
# - Fork and manage worker processes
|
|
13
|
+
# - Distribute agents across workers
|
|
14
|
+
# - Handle signals (TERM, INT, HUP, USR1, USR2)
|
|
15
|
+
# - Run HTTP health probe server
|
|
16
|
+
# - Monitor worker health and restart on crash
|
|
17
|
+
#
|
|
18
|
+
class Daemon
|
|
19
|
+
attr_reader :workers_count, :probe_port, :probe_host, :agent_names, :start_time
|
|
20
|
+
|
|
21
|
+
def initialize(workers: 1, probe_port: 3042, probe_host: '127.0.0.1', agent_names: nil)
|
|
22
|
+
@workers_count = workers
|
|
23
|
+
@probe_port = probe_port
|
|
24
|
+
@probe_host = probe_host
|
|
25
|
+
@agent_names = agent_names
|
|
26
|
+
@workers = {} # { pid => { index:, agent_ids: } }
|
|
27
|
+
@running = false
|
|
28
|
+
@start_time = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def worker_pids
|
|
32
|
+
@workers.keys
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def run
|
|
36
|
+
@start_time = Time.zone.now
|
|
37
|
+
@running = true
|
|
38
|
+
|
|
39
|
+
logger.info "Starting ActiveMatrix daemon (workers: #{workers_count}, probe: #{probe_host}:#{probe_port})"
|
|
40
|
+
|
|
41
|
+
# Initialize OpenTelemetry if available
|
|
42
|
+
logger.info 'OpenTelemetry tracing enabled' if Telemetry.configure!
|
|
43
|
+
|
|
44
|
+
install_signal_handlers
|
|
45
|
+
start_probe_server
|
|
46
|
+
start_workers
|
|
47
|
+
|
|
48
|
+
monitor_loop
|
|
49
|
+
ensure
|
|
50
|
+
shutdown
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def shutdown
|
|
54
|
+
return unless @running
|
|
55
|
+
|
|
56
|
+
@running = false
|
|
57
|
+
logger.info 'Shutting down ActiveMatrix daemon...'
|
|
58
|
+
|
|
59
|
+
stop_probe_server
|
|
60
|
+
stop_workers
|
|
61
|
+
|
|
62
|
+
Telemetry.shutdown
|
|
63
|
+
|
|
64
|
+
logger.info 'ActiveMatrix daemon stopped'
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def status
|
|
68
|
+
{
|
|
69
|
+
status: @running ? 'ok' : 'stopping',
|
|
70
|
+
uptime: @start_time ? (Time.zone.now - @start_time).to_i : 0,
|
|
71
|
+
workers: @workers.size,
|
|
72
|
+
agents: aggregate_agent_status
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def logger
|
|
79
|
+
ActiveMatrix.logger
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def install_signal_handlers
|
|
83
|
+
@signal_handler = SignalHandler.new(self)
|
|
84
|
+
@signal_handler.install
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def start_probe_server
|
|
88
|
+
@probe_server = ProbeServer.new(
|
|
89
|
+
host: probe_host,
|
|
90
|
+
port: probe_port,
|
|
91
|
+
daemon: self
|
|
92
|
+
)
|
|
93
|
+
@probe_server.start
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def stop_probe_server
|
|
97
|
+
@probe_server&.stop
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def start_workers
|
|
101
|
+
agents = load_agents
|
|
102
|
+
agent_groups = distribute_agents(agents, workers_count)
|
|
103
|
+
|
|
104
|
+
agent_groups.each_with_index do |agent_ids, index|
|
|
105
|
+
next if agent_ids.empty?
|
|
106
|
+
|
|
107
|
+
pid = fork_worker(index, agent_ids)
|
|
108
|
+
@workers[pid] = { index: index, agent_ids: agent_ids }
|
|
109
|
+
logger.info "Started worker #{index} (PID: #{pid}) with #{agent_ids.size} agents"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def fork_worker(index, agent_ids)
|
|
114
|
+
fork do
|
|
115
|
+
# Reset signal handlers in child
|
|
116
|
+
Signal.trap('TERM') { exit } # rubocop:disable Rails/Exit
|
|
117
|
+
Signal.trap('INT') { exit } # rubocop:disable Rails/Exit
|
|
118
|
+
|
|
119
|
+
# Close parent's probe server socket
|
|
120
|
+
@probe_server&.stop
|
|
121
|
+
|
|
122
|
+
# Run worker
|
|
123
|
+
worker = Worker.new(index: index, agent_ids: agent_ids)
|
|
124
|
+
worker.run
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def stop_workers
|
|
129
|
+
timeout = ActiveMatrix.config.shutdown_timeout || 30
|
|
130
|
+
|
|
131
|
+
# Send TERM to all workers
|
|
132
|
+
@workers.each_key do |pid|
|
|
133
|
+
Process.kill('TERM', pid)
|
|
134
|
+
rescue Errno::ESRCH
|
|
135
|
+
# Already dead
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Wait for graceful shutdown
|
|
139
|
+
deadline = Time.zone.now + timeout
|
|
140
|
+
while @workers.any? && Time.zone.now < deadline
|
|
141
|
+
@workers.reject! do |pid, _|
|
|
142
|
+
Process.waitpid(pid, Process::WNOHANG)
|
|
143
|
+
rescue Errno::ECHILD
|
|
144
|
+
true
|
|
145
|
+
end
|
|
146
|
+
sleep 0.5 if @workers.any?
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Force kill remaining
|
|
150
|
+
@workers.each_key do |pid|
|
|
151
|
+
logger.warn "Force killing worker #{pid}"
|
|
152
|
+
Process.kill('KILL', pid)
|
|
153
|
+
Process.waitpid(pid)
|
|
154
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
|
155
|
+
# Already dead
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
@workers.clear
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def monitor_loop
|
|
162
|
+
while @running
|
|
163
|
+
# Reap dead children
|
|
164
|
+
reap_workers
|
|
165
|
+
|
|
166
|
+
# Restart crashed workers
|
|
167
|
+
restart_crashed_workers if @running
|
|
168
|
+
|
|
169
|
+
sleep 1
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def reap_workers
|
|
174
|
+
@crashed_workers = []
|
|
175
|
+
|
|
176
|
+
loop do
|
|
177
|
+
pid = Process.waitpid(-1, Process::WNOHANG)
|
|
178
|
+
break unless pid
|
|
179
|
+
|
|
180
|
+
if @workers.key?(pid)
|
|
181
|
+
worker_info = @workers.delete(pid)
|
|
182
|
+
logger.warn "Worker #{worker_info[:index]} (PID: #{pid}) exited"
|
|
183
|
+
@crashed_workers << worker_info
|
|
184
|
+
end
|
|
185
|
+
rescue Errno::ECHILD
|
|
186
|
+
break
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def restart_crashed_workers
|
|
191
|
+
return if @crashed_workers.blank?
|
|
192
|
+
|
|
193
|
+
@crashed_workers.each do |worker_info|
|
|
194
|
+
break unless @running
|
|
195
|
+
|
|
196
|
+
# Restart with same agent assignment
|
|
197
|
+
pid = fork_worker(worker_info[:index], worker_info[:agent_ids])
|
|
198
|
+
@workers[pid] = worker_info.merge(pid: pid)
|
|
199
|
+
logger.info "Restarted worker #{worker_info[:index]} (PID: #{pid}) with #{worker_info[:agent_ids].size} agents"
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
@crashed_workers.clear
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def load_agents
|
|
206
|
+
scope = ActiveMatrix::Agent.where.not(state: :offline)
|
|
207
|
+
scope = scope.where(name: agent_names) if agent_names.present?
|
|
208
|
+
scope.pluck(:id)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def distribute_agents(agent_ids, num_workers)
|
|
212
|
+
return [agent_ids] if num_workers <= 1
|
|
213
|
+
|
|
214
|
+
# Round-robin distribution
|
|
215
|
+
groups = Array.new(num_workers) { [] }
|
|
216
|
+
agent_ids.each_with_index do |id, index|
|
|
217
|
+
groups[index % num_workers] << id
|
|
218
|
+
end
|
|
219
|
+
groups
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def aggregate_agent_status
|
|
223
|
+
# In multi-process mode, we'd need IPC to get real status
|
|
224
|
+
# For now, query the database
|
|
225
|
+
agents = ActiveMatrix::Agent.all
|
|
226
|
+
|
|
227
|
+
{
|
|
228
|
+
total: agents.count,
|
|
229
|
+
online: agents.where(state: %i[online_idle online_busy]).count,
|
|
230
|
+
connecting: agents.where(state: :connecting).count,
|
|
231
|
+
error: agents.where(state: :error).count,
|
|
232
|
+
offline: agents.where(state: :offline).count
|
|
233
|
+
}
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rails/engine'
|
|
4
|
+
|
|
5
|
+
module ActiveMatrix
|
|
6
|
+
class Engine < Rails::Engine
|
|
7
|
+
engine_name 'activematrix'
|
|
8
|
+
|
|
9
|
+
initializer 'activematrix.configure_logger' do
|
|
10
|
+
ActiveMatrix.logger = Rails.logger
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
initializer 'activematrix.initialize_metrics', after: 'activematrix.configure_logger' do
|
|
14
|
+
# Eagerly initialize Metrics singleton to subscribe to notifications
|
|
15
|
+
ActiveMatrix::Metrics.instance
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
data/lib/active_matrix/errors.rb
CHANGED
|
@@ -54,7 +54,7 @@ module ActiveMatrix
|
|
|
54
54
|
# An error raised when errors occur in the connection layer
|
|
55
55
|
class MatrixConnectionError < MatrixError
|
|
56
56
|
def self.class_by_code(code)
|
|
57
|
-
return MatrixTimeoutError if code == 504
|
|
57
|
+
return MatrixTimeoutError if code.to_i == 504
|
|
58
58
|
|
|
59
59
|
MatrixConnectionError
|
|
60
60
|
end
|