natswork-server 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +0 -0
- data/LICENSE +21 -0
- data/README.md +286 -0
- data/lib/natswork/cli.rb +420 -0
- data/lib/natswork/error_tracker.rb +338 -0
- data/lib/natswork/health_check.rb +252 -0
- data/lib/natswork/instrumentation.rb +141 -0
- data/lib/natswork/job_executor.rb +271 -0
- data/lib/natswork/job_hooks.rb +63 -0
- data/lib/natswork/logger.rb +183 -0
- data/lib/natswork/metrics.rb +241 -0
- data/lib/natswork/middleware.rb +142 -0
- data/lib/natswork/middleware_chain.rb +40 -0
- data/lib/natswork/monitoring.rb +397 -0
- data/lib/natswork/protocol.rb +454 -0
- data/lib/natswork/queue_manager.rb +164 -0
- data/lib/natswork/retry_handler.rb +125 -0
- data/lib/natswork/server/version.rb +7 -0
- data/lib/natswork/server.rb +47 -0
- data/lib/natswork/simple_worker.rb +101 -0
- data/lib/natswork/thread_pool.rb +192 -0
- data/lib/natswork/worker.rb +217 -0
- data/lib/natswork/worker_manager.rb +62 -0
- data/lib/natswork-server.rb +5 -0
- metadata +151 -0
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module NatsWork
|
6
|
+
class RetryHandler
|
7
|
+
DEFAULT_MAX_RETRIES = 3
|
8
|
+
DEFAULT_BASE_DELAY = 1
|
9
|
+
DEFAULT_MAX_DELAY = 300 # 5 minutes
|
10
|
+
|
11
|
+
attr_reader :base_delay, :max_delay, :jitter, :strategy
|
12
|
+
|
13
|
+
def initialize(options = {})
|
14
|
+
@base_delay = options[:base_delay] || DEFAULT_BASE_DELAY
|
15
|
+
@max_delay = options[:max_delay] || DEFAULT_MAX_DELAY
|
16
|
+
@jitter = options[:jitter] || false
|
17
|
+
@strategy = options[:strategy] || :exponential
|
18
|
+
|
19
|
+
@retry_callbacks = []
|
20
|
+
@failure_callbacks = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def should_retry?(job_message)
|
24
|
+
retry_count = job_message['retry_count'] || 0
|
25
|
+
max_retries = job_message['max_retries'] || DEFAULT_MAX_RETRIES
|
26
|
+
|
27
|
+
retry_count < max_retries
|
28
|
+
end
|
29
|
+
|
30
|
+
def calculate_delay(attempt)
|
31
|
+
delay = case @strategy
|
32
|
+
when :exponential
|
33
|
+
@base_delay * (2**attempt)
|
34
|
+
when :linear
|
35
|
+
@base_delay * (attempt + 1)
|
36
|
+
when :constant
|
37
|
+
@base_delay
|
38
|
+
when Proc
|
39
|
+
@strategy.call(attempt)
|
40
|
+
else
|
41
|
+
@base_delay * (2**attempt)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Apply max delay cap
|
45
|
+
delay = [delay, @max_delay].min
|
46
|
+
|
47
|
+
# Apply jitter if enabled (±10% randomness)
|
48
|
+
if @jitter && delay.positive?
|
49
|
+
jitter_amount = delay * 0.1
|
50
|
+
delay += (rand * 2 - 1) * jitter_amount
|
51
|
+
end
|
52
|
+
|
53
|
+
delay
|
54
|
+
end
|
55
|
+
|
56
|
+
def schedule_retry(connection, job_message, error)
|
57
|
+
retry_count = (job_message['retry_count'] || 0) + 1
|
58
|
+
delay = calculate_delay(retry_count - 1)
|
59
|
+
|
60
|
+
retry_message = job_message.merge(
|
61
|
+
'retry_count' => retry_count,
|
62
|
+
'retry_at' => (Time.now + delay).iso8601,
|
63
|
+
'last_error' => {
|
64
|
+
'type' => error.class.name,
|
65
|
+
'message' => error.message,
|
66
|
+
'backtrace' => error.backtrace&.first(10) || []
|
67
|
+
}
|
68
|
+
)
|
69
|
+
|
70
|
+
# Track retry history
|
71
|
+
retry_history = retry_message['retry_history'] || []
|
72
|
+
retry_history << {
|
73
|
+
'attempt' => retry_count,
|
74
|
+
'error' => error.message,
|
75
|
+
'retried_at' => Time.now.iso8601
|
76
|
+
}
|
77
|
+
retry_message['retry_history'] = retry_history
|
78
|
+
|
79
|
+
# Publish to retry queue
|
80
|
+
retry_queue = "natswork.queue.retry.#{job_message['queue'] || 'default'}"
|
81
|
+
connection.publish(retry_queue, retry_message)
|
82
|
+
|
83
|
+
# Call retry callbacks
|
84
|
+
@retry_callbacks.each do |callback|
|
85
|
+
callback.call(job_message, error)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def send_to_dead_letter(connection, job_message, error)
|
90
|
+
dead_letter_message = job_message.merge(
|
91
|
+
'final_error' => {
|
92
|
+
'type' => error.class.name,
|
93
|
+
'message' => error.message,
|
94
|
+
'backtrace' => error.backtrace || []
|
95
|
+
},
|
96
|
+
'failed_at' => Time.now.iso8601,
|
97
|
+
'exhausted_retries' => true,
|
98
|
+
'total_attempts' => (job_message['retry_count'] || 0) + 1
|
99
|
+
)
|
100
|
+
|
101
|
+
connection.publish('natswork.queue.dead_letter', dead_letter_message)
|
102
|
+
|
103
|
+
# Call failure callbacks
|
104
|
+
@failure_callbacks.each do |callback|
|
105
|
+
callback.call(job_message, error)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def on_retry(&block)
|
110
|
+
@retry_callbacks << block
|
111
|
+
end
|
112
|
+
|
113
|
+
def on_failure(&block)
|
114
|
+
@failure_callbacks << block
|
115
|
+
end
|
116
|
+
|
117
|
+
def handle_failure(connection, job_message, error)
|
118
|
+
if should_retry?(job_message)
|
119
|
+
schedule_retry(connection, job_message, error)
|
120
|
+
else
|
121
|
+
send_to_dead_letter(connection, job_message, error)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'natswork/server/version'
|
4
|
+
require 'concurrent'
|
5
|
+
|
6
|
+
module NatsWork
|
7
|
+
module Server
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
class << self
|
11
|
+
attr_accessor :worker_manager
|
12
|
+
|
13
|
+
def start
|
14
|
+
require 'natswork'
|
15
|
+
NatsWork.logger.info 'Starting NatsWork Server...'
|
16
|
+
|
17
|
+
# Initialize connection
|
18
|
+
NatsWork::Client.start
|
19
|
+
|
20
|
+
# Start worker manager with pool size as concurrency
|
21
|
+
self.worker_manager = WorkerManager.new(
|
22
|
+
concurrency: NatsWork.config.pool_size || 5
|
23
|
+
)
|
24
|
+
worker_manager.start
|
25
|
+
|
26
|
+
NatsWork.logger.info "NatsWork Server started with #{NatsWork.config.pool_size || 5} workers"
|
27
|
+
end
|
28
|
+
|
29
|
+
def stop
|
30
|
+
NatsWork.logger.info 'Stopping NatsWork Server...'
|
31
|
+
worker_manager&.stop
|
32
|
+
NatsWork::Client.stop if defined?(NatsWork::Client)
|
33
|
+
NatsWork.logger.info 'NatsWork Server stopped'
|
34
|
+
end
|
35
|
+
|
36
|
+
def running?
|
37
|
+
worker_manager&.running? || false
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Auto-load server components
|
42
|
+
autoload :Worker, 'natswork/worker'
|
43
|
+
autoload :WorkerManager, 'natswork/worker_manager'
|
44
|
+
autoload :JobExecutor, 'natswork/job_executor'
|
45
|
+
autoload :MiddlewareChain, 'natswork/middleware_chain'
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module NatsWork
|
6
|
+
module Server
|
7
|
+
class SimpleWorker
|
8
|
+
attr_reader :id, :pool, :queues
|
9
|
+
|
10
|
+
def initialize(id, queues = nil)
|
11
|
+
@id = id
|
12
|
+
@queues = queues || NatsWork.config.worker_queues || ['default']
|
13
|
+
@pool = Concurrent::FixedThreadPool.new(5)
|
14
|
+
@running = false
|
15
|
+
@subscriptions = []
|
16
|
+
end
|
17
|
+
|
18
|
+
def start
|
19
|
+
@running = true
|
20
|
+
NatsWork.logger.info "Worker #{@id} starting..."
|
21
|
+
|
22
|
+
# Subscribe to job queues
|
23
|
+
subscribe_to_queues
|
24
|
+
end
|
25
|
+
|
26
|
+
def stop
|
27
|
+
@running = false
|
28
|
+
|
29
|
+
# Unsubscribe from all queues
|
30
|
+
@subscriptions.each do |sid|
|
31
|
+
NatsWork::Client.instance.connection_pool.with_connection do |conn|
|
32
|
+
conn.unsubscribe(sid)
|
33
|
+
end
|
34
|
+
rescue StandardError => e
|
35
|
+
NatsWork.logger.error "Error unsubscribing: #{e.message}"
|
36
|
+
end
|
37
|
+
|
38
|
+
@subscriptions.clear
|
39
|
+
NatsWork.logger.info "Worker #{@id} stopped"
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def subscribe_to_queues
|
45
|
+
# Subscribe to configured queues
|
46
|
+
@queues.each do |queue|
|
47
|
+
subject = "natswork.queue.#{queue}"
|
48
|
+
|
49
|
+
NatsWork::Client.instance.connection_pool.with_connection do |conn|
|
50
|
+
sid = conn.subscribe(subject, queue: "workers.#{queue}") do |msg|
|
51
|
+
# Process job in thread pool
|
52
|
+
@pool.post do
|
53
|
+
process_job(msg, queue)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
@subscriptions << sid
|
58
|
+
NatsWork.logger.info "Worker #{@id} subscribed to #{subject}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
rescue StandardError => e
|
62
|
+
NatsWork.logger.error "Worker #{@id} subscription error: #{e.message}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def process_job(msg, queue)
|
66
|
+
return unless @running
|
67
|
+
|
68
|
+
begin
|
69
|
+
# Parse the message - it comes as a hash from NATS subscription
|
70
|
+
job_data = if msg.is_a?(Hash)
|
71
|
+
# Message comes pre-parsed from NATS subscription
|
72
|
+
msg.transform_keys(&:to_sym)
|
73
|
+
elsif msg.is_a?(String)
|
74
|
+
JSON.parse(msg, symbolize_names: true)
|
75
|
+
else
|
76
|
+
msg
|
77
|
+
end
|
78
|
+
|
79
|
+
NatsWork.logger.info "Worker #{@id} processing job from #{queue}: #{job_data[:job_class]}"
|
80
|
+
|
81
|
+
# Execute the job directly
|
82
|
+
job_class_name = job_data[:job_class] || job_data['job_class']
|
83
|
+
arguments = job_data[:arguments] || job_data['arguments'] || {}
|
84
|
+
|
85
|
+
# Get the job class
|
86
|
+
job_class = Object.const_get(job_class_name)
|
87
|
+
|
88
|
+
# Create job instance and execute
|
89
|
+
raise "Unknown job class: #{job_class_name}" unless job_class.respond_to?(:new)
|
90
|
+
|
91
|
+
job_instance = job_class.new
|
92
|
+
job_instance.perform(arguments)
|
93
|
+
NatsWork.logger.info "Worker #{@id} completed job: #{job_data[:job_id]}"
|
94
|
+
rescue StandardError => e
|
95
|
+
NatsWork.logger.error "Worker #{@id} job error: #{e.message}"
|
96
|
+
NatsWork.logger.error e.backtrace[0..5].join("\n") if e.backtrace
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'timeout'
|
4
|
+
require 'concurrent'
|
5
|
+
|
6
|
+
module NatsWork
|
7
|
+
class ThreadPoolError < StandardError; end
|
8
|
+
|
9
|
+
class ThreadPool
|
10
|
+
attr_reader :size, :max_queue_size
|
11
|
+
|
12
|
+
def initialize(size: 10, max_queue: nil)
|
13
|
+
@size = size
|
14
|
+
@max_queue_size = max_queue
|
15
|
+
|
16
|
+
# Use SizedQueue for limited queue, regular Queue otherwise
|
17
|
+
if max_queue&.positive?
|
18
|
+
@queue = SizedQueue.new(max_queue)
|
19
|
+
@limited = true
|
20
|
+
else
|
21
|
+
@queue = Queue.new
|
22
|
+
@limited = false
|
23
|
+
end
|
24
|
+
|
25
|
+
@workers = []
|
26
|
+
@shutdown = false
|
27
|
+
@mutex = Mutex.new
|
28
|
+
|
29
|
+
@active_count = Concurrent::AtomicFixnum.new(0)
|
30
|
+
@completed_count = Concurrent::AtomicFixnum.new(0)
|
31
|
+
@rejected_count = Concurrent::AtomicFixnum.new(0)
|
32
|
+
|
33
|
+
start_workers
|
34
|
+
end
|
35
|
+
|
36
|
+
def submit(&block)
|
37
|
+
raise ThreadPoolError, 'Pool is shutting down' if @shutdown
|
38
|
+
|
39
|
+
if @limited
|
40
|
+
# For SizedQueue, use non-blocking push
|
41
|
+
|
42
|
+
# Try non-blocking push
|
43
|
+
success = false
|
44
|
+
begin
|
45
|
+
@queue.push(block, true)
|
46
|
+
success = true
|
47
|
+
rescue ThreadError
|
48
|
+
# Queue is full
|
49
|
+
success = false
|
50
|
+
end
|
51
|
+
|
52
|
+
unless success
|
53
|
+
@rejected_count.increment
|
54
|
+
raise ThreadPoolError, "Queue is full (size: #{@max_queue_size})"
|
55
|
+
end
|
56
|
+
|
57
|
+
else
|
58
|
+
# Regular queue, just add
|
59
|
+
@queue.push(block)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def active_count
|
64
|
+
@active_count.value
|
65
|
+
end
|
66
|
+
|
67
|
+
def queue_size
|
68
|
+
@queue.size
|
69
|
+
end
|
70
|
+
|
71
|
+
def shutdown
|
72
|
+
@mutex.synchronize do
|
73
|
+
return if @shutdown
|
74
|
+
|
75
|
+
@shutdown = true
|
76
|
+
end
|
77
|
+
|
78
|
+
# Don't push terminate signals yet - let existing tasks complete
|
79
|
+
# The workers will check @shutdown flag
|
80
|
+
end
|
81
|
+
|
82
|
+
def shutdown!
|
83
|
+
@mutex.synchronize do
|
84
|
+
@shutdown = true
|
85
|
+
end
|
86
|
+
|
87
|
+
# Clear the queue and terminate immediately
|
88
|
+
@queue.clear
|
89
|
+
@workers.each(&:kill)
|
90
|
+
end
|
91
|
+
|
92
|
+
def wait_for_termination(timeout: nil)
|
93
|
+
if timeout
|
94
|
+
deadline = Time.now + timeout
|
95
|
+
|
96
|
+
# Wait for queue to empty and active tasks to complete
|
97
|
+
sleep 0.01 while (@queue.size.positive? || @active_count.value.positive?) && Time.now < deadline
|
98
|
+
|
99
|
+
# Then wait for workers to finish
|
100
|
+
@workers.each do |worker|
|
101
|
+
remaining = deadline - Time.now
|
102
|
+
return false if remaining <= 0
|
103
|
+
|
104
|
+
joined = worker.join(remaining)
|
105
|
+
return false unless joined
|
106
|
+
end
|
107
|
+
|
108
|
+
# Check if all tasks completed
|
109
|
+
@queue.empty? && @active_count.value.zero?
|
110
|
+
else
|
111
|
+
# Wait indefinitely for queue to empty and active tasks to complete
|
112
|
+
sleep 0.01 while @queue.size.positive? || @active_count.value.positive?
|
113
|
+
|
114
|
+
# Then join all workers
|
115
|
+
@workers.each(&:join)
|
116
|
+
true
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def stats
|
121
|
+
{
|
122
|
+
size: @size,
|
123
|
+
active: active_count,
|
124
|
+
queued: queue_size,
|
125
|
+
max_queue: @max_queue_size,
|
126
|
+
completed: @completed_count.value,
|
127
|
+
rejected: @rejected_count.value,
|
128
|
+
shutdown: @shutdown,
|
129
|
+
memory_usage: memory_usage
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
def memory_usage
|
134
|
+
# Get process memory usage
|
135
|
+
if defined?(GetProcessMem)
|
136
|
+
GetProcessMem.new.bytes
|
137
|
+
else
|
138
|
+
# Fallback to RSS from /proc (Linux)
|
139
|
+
begin
|
140
|
+
File.read("/proc/#{Process.pid}/status").match(/VmRSS:\s+(\d+)/)[1].to_i * 1024
|
141
|
+
rescue StandardError
|
142
|
+
# Fallback for non-Linux or if reading fails
|
143
|
+
0
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
|
150
|
+
def start_workers
|
151
|
+
@size.times do
|
152
|
+
worker = Thread.new do
|
153
|
+
worker_loop
|
154
|
+
end
|
155
|
+
@workers << worker
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def worker_loop
|
160
|
+
loop do
|
161
|
+
task = nil
|
162
|
+
|
163
|
+
# Non-blocking check for tasks
|
164
|
+
begin
|
165
|
+
task = @queue.pop(true) # Non-blocking pop
|
166
|
+
rescue ThreadError
|
167
|
+
# Queue is empty
|
168
|
+
break if @shutdown
|
169
|
+
|
170
|
+
sleep 0.01
|
171
|
+
next
|
172
|
+
end
|
173
|
+
|
174
|
+
# Process the task
|
175
|
+
next unless task.respond_to?(:call)
|
176
|
+
|
177
|
+
begin
|
178
|
+
@active_count.increment
|
179
|
+
task.call
|
180
|
+
@completed_count.increment
|
181
|
+
rescue StandardError
|
182
|
+
# Log error but don't crash the worker
|
183
|
+
# In production, would use proper logging
|
184
|
+
ensure
|
185
|
+
@active_count.decrement
|
186
|
+
end
|
187
|
+
end
|
188
|
+
rescue StandardError
|
189
|
+
# Worker crashed, log in production
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'concurrent'
|
6
|
+
|
7
|
+
module NatsWork
|
8
|
+
class Worker
|
9
|
+
attr_reader :name, :queues, :concurrency, :connection
|
10
|
+
|
11
|
+
def initialize(connection, options = {})
|
12
|
+
@connection = connection
|
13
|
+
@name = options[:name] || generate_worker_name
|
14
|
+
@queues = Array(options[:queues] || 'default')
|
15
|
+
@concurrency = options[:concurrency] || 10
|
16
|
+
|
17
|
+
@running = false
|
18
|
+
@paused = false
|
19
|
+
@stopping = false
|
20
|
+
@mutex = Mutex.new
|
21
|
+
|
22
|
+
@jobs_processed = Concurrent::AtomicFixnum.new(0)
|
23
|
+
@jobs_failed = Concurrent::AtomicFixnum.new(0)
|
24
|
+
@active_jobs = Concurrent::AtomicFixnum.new(0)
|
25
|
+
|
26
|
+
@heartbeat_thread = nil
|
27
|
+
@polling_threads = []
|
28
|
+
@started_at = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def start
|
32
|
+
@mutex.synchronize do
|
33
|
+
return if @running
|
34
|
+
|
35
|
+
@running = true
|
36
|
+
@stopping = false
|
37
|
+
@started_at = Time.now
|
38
|
+
|
39
|
+
start_heartbeat
|
40
|
+
start_polling
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def stop
|
45
|
+
@mutex.synchronize do
|
46
|
+
return unless @running
|
47
|
+
|
48
|
+
@stopping = true
|
49
|
+
wait_for_jobs
|
50
|
+
|
51
|
+
stop_polling
|
52
|
+
stop_heartbeat
|
53
|
+
|
54
|
+
@running = false
|
55
|
+
@stopping = false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def pause
|
60
|
+
@mutex.synchronize do
|
61
|
+
@paused = true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def resume
|
66
|
+
@mutex.synchronize do
|
67
|
+
@paused = false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def running?
|
72
|
+
@running
|
73
|
+
end
|
74
|
+
|
75
|
+
def paused?
|
76
|
+
@paused
|
77
|
+
end
|
78
|
+
|
79
|
+
def stopping?
|
80
|
+
@stopping
|
81
|
+
end
|
82
|
+
|
83
|
+
def accepting_jobs?
|
84
|
+
@running && !@paused && !@stopping
|
85
|
+
end
|
86
|
+
|
87
|
+
def stats
|
88
|
+
{
|
89
|
+
name: @name,
|
90
|
+
status: current_status,
|
91
|
+
queues: @queues,
|
92
|
+
concurrency: @concurrency,
|
93
|
+
jobs_processed: @jobs_processed.value,
|
94
|
+
jobs_failed: @jobs_failed.value,
|
95
|
+
active_jobs: @active_jobs.value,
|
96
|
+
started_at: @started_at,
|
97
|
+
uptime: @started_at ? Time.now - @started_at : 0
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def heartbeat
|
102
|
+
@connection.publish('natswork.workers.heartbeat', {
|
103
|
+
worker_id: @name,
|
104
|
+
status: current_status,
|
105
|
+
queues: @queues,
|
106
|
+
concurrency: @concurrency,
|
107
|
+
jobs_processed: @jobs_processed.value,
|
108
|
+
jobs_failed: @jobs_failed.value,
|
109
|
+
active_jobs: @active_jobs.value,
|
110
|
+
timestamp: Time.now.to_f
|
111
|
+
})
|
112
|
+
end
|
113
|
+
|
114
|
+
def graceful_shutdown(timeout: 30)
|
115
|
+
stop_thread = Thread.new { stop }
|
116
|
+
stop_thread.join(timeout)
|
117
|
+
|
118
|
+
return unless stop_thread.alive?
|
119
|
+
|
120
|
+
stop_thread.kill
|
121
|
+
force_shutdown
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
|
126
|
+
def generate_worker_name
|
127
|
+
"worker-#{Socket.gethostname}-#{Process.pid}-#{SecureRandom.hex(4)}"
|
128
|
+
end
|
129
|
+
|
130
|
+
def current_status
|
131
|
+
return :stopped unless @running
|
132
|
+
return :paused if @paused
|
133
|
+
return :stopping if @stopping
|
134
|
+
|
135
|
+
:running
|
136
|
+
end
|
137
|
+
|
138
|
+
def start_heartbeat
|
139
|
+
@heartbeat_thread = Thread.new do
|
140
|
+
loop do
|
141
|
+
break unless @running
|
142
|
+
|
143
|
+
begin
|
144
|
+
heartbeat
|
145
|
+
rescue StandardError
|
146
|
+
# Log error but don't crash heartbeat thread
|
147
|
+
end
|
148
|
+
|
149
|
+
sleep 5
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def stop_heartbeat
|
155
|
+
return unless @heartbeat_thread
|
156
|
+
|
157
|
+
@heartbeat_thread.kill if @heartbeat_thread.alive?
|
158
|
+
@heartbeat_thread = nil
|
159
|
+
end
|
160
|
+
|
161
|
+
def start_polling
|
162
|
+
@queues.each do |queue|
|
163
|
+
thread = Thread.new do
|
164
|
+
poll_queue(queue)
|
165
|
+
end
|
166
|
+
@polling_threads << thread
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def stop_polling
|
171
|
+
@polling_threads.each do |thread|
|
172
|
+
thread.kill if thread&.alive?
|
173
|
+
end
|
174
|
+
@polling_threads.clear
|
175
|
+
end
|
176
|
+
|
177
|
+
def poll_queue(_queue)
|
178
|
+
loop do
|
179
|
+
break unless @running
|
180
|
+
next if @paused
|
181
|
+
|
182
|
+
# Polling logic will be implemented with queue subscription
|
183
|
+
sleep 0.1
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def wait_for_jobs
|
188
|
+
timeout = 30
|
189
|
+
deadline = Time.now + timeout
|
190
|
+
|
191
|
+
sleep 0.1 while @active_jobs.value.positive? && Time.now < deadline
|
192
|
+
end
|
193
|
+
|
194
|
+
def force_shutdown
|
195
|
+
@mutex.synchronize do
|
196
|
+
stop_polling
|
197
|
+
stop_heartbeat
|
198
|
+
@running = false
|
199
|
+
@stopping = false
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def heartbeat_loop
|
204
|
+
loop do
|
205
|
+
break unless @running
|
206
|
+
|
207
|
+
begin
|
208
|
+
heartbeat
|
209
|
+
rescue StandardError
|
210
|
+
# Log error
|
211
|
+
end
|
212
|
+
|
213
|
+
sleep 5
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|