natswork-server 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module NatsWork
6
+ class RetryHandler
7
+ DEFAULT_MAX_RETRIES = 3
8
+ DEFAULT_BASE_DELAY = 1
9
+ DEFAULT_MAX_DELAY = 300 # 5 minutes
10
+
11
+ attr_reader :base_delay, :max_delay, :jitter, :strategy
12
+
13
+ def initialize(options = {})
14
+ @base_delay = options[:base_delay] || DEFAULT_BASE_DELAY
15
+ @max_delay = options[:max_delay] || DEFAULT_MAX_DELAY
16
+ @jitter = options[:jitter] || false
17
+ @strategy = options[:strategy] || :exponential
18
+
19
+ @retry_callbacks = []
20
+ @failure_callbacks = []
21
+ end
22
+
23
+ def should_retry?(job_message)
24
+ retry_count = job_message['retry_count'] || 0
25
+ max_retries = job_message['max_retries'] || DEFAULT_MAX_RETRIES
26
+
27
+ retry_count < max_retries
28
+ end
29
+
30
+ def calculate_delay(attempt)
31
+ delay = case @strategy
32
+ when :exponential
33
+ @base_delay * (2**attempt)
34
+ when :linear
35
+ @base_delay * (attempt + 1)
36
+ when :constant
37
+ @base_delay
38
+ when Proc
39
+ @strategy.call(attempt)
40
+ else
41
+ @base_delay * (2**attempt)
42
+ end
43
+
44
+ # Apply max delay cap
45
+ delay = [delay, @max_delay].min
46
+
47
+ # Apply jitter if enabled (±10% randomness)
48
+ if @jitter && delay.positive?
49
+ jitter_amount = delay * 0.1
50
+ delay += (rand * 2 - 1) * jitter_amount
51
+ end
52
+
53
+ delay
54
+ end
55
+
56
+ def schedule_retry(connection, job_message, error)
57
+ retry_count = (job_message['retry_count'] || 0) + 1
58
+ delay = calculate_delay(retry_count - 1)
59
+
60
+ retry_message = job_message.merge(
61
+ 'retry_count' => retry_count,
62
+ 'retry_at' => (Time.now + delay).iso8601,
63
+ 'last_error' => {
64
+ 'type' => error.class.name,
65
+ 'message' => error.message,
66
+ 'backtrace' => error.backtrace&.first(10) || []
67
+ }
68
+ )
69
+
70
+ # Track retry history
71
+ retry_history = retry_message['retry_history'] || []
72
+ retry_history << {
73
+ 'attempt' => retry_count,
74
+ 'error' => error.message,
75
+ 'retried_at' => Time.now.iso8601
76
+ }
77
+ retry_message['retry_history'] = retry_history
78
+
79
+ # Publish to retry queue
80
+ retry_queue = "natswork.queue.retry.#{job_message['queue'] || 'default'}"
81
+ connection.publish(retry_queue, retry_message)
82
+
83
+ # Call retry callbacks
84
+ @retry_callbacks.each do |callback|
85
+ callback.call(job_message, error)
86
+ end
87
+ end
88
+
89
+ def send_to_dead_letter(connection, job_message, error)
90
+ dead_letter_message = job_message.merge(
91
+ 'final_error' => {
92
+ 'type' => error.class.name,
93
+ 'message' => error.message,
94
+ 'backtrace' => error.backtrace || []
95
+ },
96
+ 'failed_at' => Time.now.iso8601,
97
+ 'exhausted_retries' => true,
98
+ 'total_attempts' => (job_message['retry_count'] || 0) + 1
99
+ )
100
+
101
+ connection.publish('natswork.queue.dead_letter', dead_letter_message)
102
+
103
+ # Call failure callbacks
104
+ @failure_callbacks.each do |callback|
105
+ callback.call(job_message, error)
106
+ end
107
+ end
108
+
109
+ def on_retry(&block)
110
+ @retry_callbacks << block
111
+ end
112
+
113
+ def on_failure(&block)
114
+ @failure_callbacks << block
115
+ end
116
+
117
+ def handle_failure(connection, job_message, error)
118
+ if should_retry?(job_message)
119
+ schedule_retry(connection, job_message, error)
120
+ else
121
+ send_to_dead_letter(connection, job_message, error)
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NatsWork
4
+ module Server
5
+ VERSION = '0.0.1'
6
+ end
7
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'natswork/server/version'
4
+ require 'concurrent'
5
+
6
+ module NatsWork
7
+ module Server
8
+ class Error < StandardError; end
9
+
10
+ class << self
11
+ attr_accessor :worker_manager
12
+
13
+ def start
14
+ require 'natswork'
15
+ NatsWork.logger.info 'Starting NatsWork Server...'
16
+
17
+ # Initialize connection
18
+ NatsWork::Client.start
19
+
20
+ # Start worker manager with pool size as concurrency
21
+ self.worker_manager = WorkerManager.new(
22
+ concurrency: NatsWork.config.pool_size || 5
23
+ )
24
+ worker_manager.start
25
+
26
+ NatsWork.logger.info "NatsWork Server started with #{NatsWork.config.pool_size || 5} workers"
27
+ end
28
+
29
+ def stop
30
+ NatsWork.logger.info 'Stopping NatsWork Server...'
31
+ worker_manager&.stop
32
+ NatsWork::Client.stop if defined?(NatsWork::Client)
33
+ NatsWork.logger.info 'NatsWork Server stopped'
34
+ end
35
+
36
+ def running?
37
+ worker_manager&.running? || false
38
+ end
39
+ end
40
+
41
+ # Auto-load server components
42
+ autoload :Worker, 'natswork/worker'
43
+ autoload :WorkerManager, 'natswork/worker_manager'
44
+ autoload :JobExecutor, 'natswork/job_executor'
45
+ autoload :MiddlewareChain, 'natswork/middleware_chain'
46
+ end
47
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module NatsWork
6
+ module Server
7
+ class SimpleWorker
8
+ attr_reader :id, :pool, :queues
9
+
10
+ def initialize(id, queues = nil)
11
+ @id = id
12
+ @queues = queues || NatsWork.config.worker_queues || ['default']
13
+ @pool = Concurrent::FixedThreadPool.new(5)
14
+ @running = false
15
+ @subscriptions = []
16
+ end
17
+
18
+ def start
19
+ @running = true
20
+ NatsWork.logger.info "Worker #{@id} starting..."
21
+
22
+ # Subscribe to job queues
23
+ subscribe_to_queues
24
+ end
25
+
26
+ def stop
27
+ @running = false
28
+
29
+ # Unsubscribe from all queues
30
+ @subscriptions.each do |sid|
31
+ NatsWork::Client.instance.connection_pool.with_connection do |conn|
32
+ conn.unsubscribe(sid)
33
+ end
34
+ rescue StandardError => e
35
+ NatsWork.logger.error "Error unsubscribing: #{e.message}"
36
+ end
37
+
38
+ @subscriptions.clear
39
+ NatsWork.logger.info "Worker #{@id} stopped"
40
+ end
41
+
42
+ private
43
+
44
+ def subscribe_to_queues
45
+ # Subscribe to configured queues
46
+ @queues.each do |queue|
47
+ subject = "natswork.queue.#{queue}"
48
+
49
+ NatsWork::Client.instance.connection_pool.with_connection do |conn|
50
+ sid = conn.subscribe(subject, queue: "workers.#{queue}") do |msg|
51
+ # Process job in thread pool
52
+ @pool.post do
53
+ process_job(msg, queue)
54
+ end
55
+ end
56
+
57
+ @subscriptions << sid
58
+ NatsWork.logger.info "Worker #{@id} subscribed to #{subject}"
59
+ end
60
+ end
61
+ rescue StandardError => e
62
+ NatsWork.logger.error "Worker #{@id} subscription error: #{e.message}"
63
+ end
64
+
65
+ def process_job(msg, queue)
66
+ return unless @running
67
+
68
+ begin
69
+ # Parse the message - it comes as a hash from NATS subscription
70
+ job_data = if msg.is_a?(Hash)
71
+ # Message comes pre-parsed from NATS subscription
72
+ msg.transform_keys(&:to_sym)
73
+ elsif msg.is_a?(String)
74
+ JSON.parse(msg, symbolize_names: true)
75
+ else
76
+ msg
77
+ end
78
+
79
+ NatsWork.logger.info "Worker #{@id} processing job from #{queue}: #{job_data[:job_class]}"
80
+
81
+ # Execute the job directly
82
+ job_class_name = job_data[:job_class] || job_data['job_class']
83
+ arguments = job_data[:arguments] || job_data['arguments'] || {}
84
+
85
+ # Get the job class
86
+ job_class = Object.const_get(job_class_name)
87
+
88
+ # Create job instance and execute
89
+ raise "Unknown job class: #{job_class_name}" unless job_class.respond_to?(:new)
90
+
91
+ job_instance = job_class.new
92
+ job_instance.perform(arguments)
93
+ NatsWork.logger.info "Worker #{@id} completed job: #{job_data[:job_id]}"
94
+ rescue StandardError => e
95
+ NatsWork.logger.error "Worker #{@id} job error: #{e.message}"
96
+ NatsWork.logger.error e.backtrace[0..5].join("\n") if e.backtrace
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'timeout'
4
+ require 'concurrent'
5
+
6
+ module NatsWork
7
+ class ThreadPoolError < StandardError; end
8
+
9
+ class ThreadPool
10
+ attr_reader :size, :max_queue_size
11
+
12
+ def initialize(size: 10, max_queue: nil)
13
+ @size = size
14
+ @max_queue_size = max_queue
15
+
16
+ # Use SizedQueue for limited queue, regular Queue otherwise
17
+ if max_queue&.positive?
18
+ @queue = SizedQueue.new(max_queue)
19
+ @limited = true
20
+ else
21
+ @queue = Queue.new
22
+ @limited = false
23
+ end
24
+
25
+ @workers = []
26
+ @shutdown = false
27
+ @mutex = Mutex.new
28
+
29
+ @active_count = Concurrent::AtomicFixnum.new(0)
30
+ @completed_count = Concurrent::AtomicFixnum.new(0)
31
+ @rejected_count = Concurrent::AtomicFixnum.new(0)
32
+
33
+ start_workers
34
+ end
35
+
36
+ def submit(&block)
37
+ raise ThreadPoolError, 'Pool is shutting down' if @shutdown
38
+
39
+ if @limited
40
+ # For SizedQueue, use non-blocking push
41
+
42
+ # Try non-blocking push
43
+ success = false
44
+ begin
45
+ @queue.push(block, true)
46
+ success = true
47
+ rescue ThreadError
48
+ # Queue is full
49
+ success = false
50
+ end
51
+
52
+ unless success
53
+ @rejected_count.increment
54
+ raise ThreadPoolError, "Queue is full (size: #{@max_queue_size})"
55
+ end
56
+
57
+ else
58
+ # Regular queue, just add
59
+ @queue.push(block)
60
+ end
61
+ end
62
+
63
+ def active_count
64
+ @active_count.value
65
+ end
66
+
67
+ def queue_size
68
+ @queue.size
69
+ end
70
+
71
+ def shutdown
72
+ @mutex.synchronize do
73
+ return if @shutdown
74
+
75
+ @shutdown = true
76
+ end
77
+
78
+ # Don't push terminate signals yet - let existing tasks complete
79
+ # The workers will check @shutdown flag
80
+ end
81
+
82
+ def shutdown!
83
+ @mutex.synchronize do
84
+ @shutdown = true
85
+ end
86
+
87
+ # Clear the queue and terminate immediately
88
+ @queue.clear
89
+ @workers.each(&:kill)
90
+ end
91
+
92
+ def wait_for_termination(timeout: nil)
93
+ if timeout
94
+ deadline = Time.now + timeout
95
+
96
+ # Wait for queue to empty and active tasks to complete
97
+ sleep 0.01 while (@queue.size.positive? || @active_count.value.positive?) && Time.now < deadline
98
+
99
+ # Then wait for workers to finish
100
+ @workers.each do |worker|
101
+ remaining = deadline - Time.now
102
+ return false if remaining <= 0
103
+
104
+ joined = worker.join(remaining)
105
+ return false unless joined
106
+ end
107
+
108
+ # Check if all tasks completed
109
+ @queue.empty? && @active_count.value.zero?
110
+ else
111
+ # Wait indefinitely for queue to empty and active tasks to complete
112
+ sleep 0.01 while @queue.size.positive? || @active_count.value.positive?
113
+
114
+ # Then join all workers
115
+ @workers.each(&:join)
116
+ true
117
+ end
118
+ end
119
+
120
+ def stats
121
+ {
122
+ size: @size,
123
+ active: active_count,
124
+ queued: queue_size,
125
+ max_queue: @max_queue_size,
126
+ completed: @completed_count.value,
127
+ rejected: @rejected_count.value,
128
+ shutdown: @shutdown,
129
+ memory_usage: memory_usage
130
+ }
131
+ end
132
+
133
+ def memory_usage
134
+ # Get process memory usage
135
+ if defined?(GetProcessMem)
136
+ GetProcessMem.new.bytes
137
+ else
138
+ # Fallback to RSS from /proc (Linux)
139
+ begin
140
+ File.read("/proc/#{Process.pid}/status").match(/VmRSS:\s+(\d+)/)[1].to_i * 1024
141
+ rescue StandardError
142
+ # Fallback for non-Linux or if reading fails
143
+ 0
144
+ end
145
+ end
146
+ end
147
+
148
+ private
149
+
150
+ def start_workers
151
+ @size.times do
152
+ worker = Thread.new do
153
+ worker_loop
154
+ end
155
+ @workers << worker
156
+ end
157
+ end
158
+
159
+ def worker_loop
160
+ loop do
161
+ task = nil
162
+
163
+ # Non-blocking check for tasks
164
+ begin
165
+ task = @queue.pop(true) # Non-blocking pop
166
+ rescue ThreadError
167
+ # Queue is empty
168
+ break if @shutdown
169
+
170
+ sleep 0.01
171
+ next
172
+ end
173
+
174
+ # Process the task
175
+ next unless task.respond_to?(:call)
176
+
177
+ begin
178
+ @active_count.increment
179
+ task.call
180
+ @completed_count.increment
181
+ rescue StandardError
182
+ # Log error but don't crash the worker
183
+ # In production, would use proper logging
184
+ ensure
185
+ @active_count.decrement
186
+ end
187
+ end
188
+ rescue StandardError
189
+ # Worker crashed, log in production
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+ require 'securerandom'
5
+ require 'concurrent'
6
+
7
+ module NatsWork
8
+ class Worker
9
+ attr_reader :name, :queues, :concurrency, :connection
10
+
11
+ def initialize(connection, options = {})
12
+ @connection = connection
13
+ @name = options[:name] || generate_worker_name
14
+ @queues = Array(options[:queues] || 'default')
15
+ @concurrency = options[:concurrency] || 10
16
+
17
+ @running = false
18
+ @paused = false
19
+ @stopping = false
20
+ @mutex = Mutex.new
21
+
22
+ @jobs_processed = Concurrent::AtomicFixnum.new(0)
23
+ @jobs_failed = Concurrent::AtomicFixnum.new(0)
24
+ @active_jobs = Concurrent::AtomicFixnum.new(0)
25
+
26
+ @heartbeat_thread = nil
27
+ @polling_threads = []
28
+ @started_at = nil
29
+ end
30
+
31
+ def start
32
+ @mutex.synchronize do
33
+ return if @running
34
+
35
+ @running = true
36
+ @stopping = false
37
+ @started_at = Time.now
38
+
39
+ start_heartbeat
40
+ start_polling
41
+ end
42
+ end
43
+
44
+ def stop
45
+ @mutex.synchronize do
46
+ return unless @running
47
+
48
+ @stopping = true
49
+ wait_for_jobs
50
+
51
+ stop_polling
52
+ stop_heartbeat
53
+
54
+ @running = false
55
+ @stopping = false
56
+ end
57
+ end
58
+
59
+ def pause
60
+ @mutex.synchronize do
61
+ @paused = true
62
+ end
63
+ end
64
+
65
+ def resume
66
+ @mutex.synchronize do
67
+ @paused = false
68
+ end
69
+ end
70
+
71
+ def running?
72
+ @running
73
+ end
74
+
75
+ def paused?
76
+ @paused
77
+ end
78
+
79
+ def stopping?
80
+ @stopping
81
+ end
82
+
83
+ def accepting_jobs?
84
+ @running && !@paused && !@stopping
85
+ end
86
+
87
+ def stats
88
+ {
89
+ name: @name,
90
+ status: current_status,
91
+ queues: @queues,
92
+ concurrency: @concurrency,
93
+ jobs_processed: @jobs_processed.value,
94
+ jobs_failed: @jobs_failed.value,
95
+ active_jobs: @active_jobs.value,
96
+ started_at: @started_at,
97
+ uptime: @started_at ? Time.now - @started_at : 0
98
+ }
99
+ end
100
+
101
+ def heartbeat
102
+ @connection.publish('natswork.workers.heartbeat', {
103
+ worker_id: @name,
104
+ status: current_status,
105
+ queues: @queues,
106
+ concurrency: @concurrency,
107
+ jobs_processed: @jobs_processed.value,
108
+ jobs_failed: @jobs_failed.value,
109
+ active_jobs: @active_jobs.value,
110
+ timestamp: Time.now.to_f
111
+ })
112
+ end
113
+
114
+ def graceful_shutdown(timeout: 30)
115
+ stop_thread = Thread.new { stop }
116
+ stop_thread.join(timeout)
117
+
118
+ return unless stop_thread.alive?
119
+
120
+ stop_thread.kill
121
+ force_shutdown
122
+ end
123
+
124
+ private
125
+
126
+ def generate_worker_name
127
+ "worker-#{Socket.gethostname}-#{Process.pid}-#{SecureRandom.hex(4)}"
128
+ end
129
+
130
+ def current_status
131
+ return :stopped unless @running
132
+ return :paused if @paused
133
+ return :stopping if @stopping
134
+
135
+ :running
136
+ end
137
+
138
+ def start_heartbeat
139
+ @heartbeat_thread = Thread.new do
140
+ loop do
141
+ break unless @running
142
+
143
+ begin
144
+ heartbeat
145
+ rescue StandardError
146
+ # Log error but don't crash heartbeat thread
147
+ end
148
+
149
+ sleep 5
150
+ end
151
+ end
152
+ end
153
+
154
+ def stop_heartbeat
155
+ return unless @heartbeat_thread
156
+
157
+ @heartbeat_thread.kill if @heartbeat_thread.alive?
158
+ @heartbeat_thread = nil
159
+ end
160
+
161
+ def start_polling
162
+ @queues.each do |queue|
163
+ thread = Thread.new do
164
+ poll_queue(queue)
165
+ end
166
+ @polling_threads << thread
167
+ end
168
+ end
169
+
170
+ def stop_polling
171
+ @polling_threads.each do |thread|
172
+ thread.kill if thread&.alive?
173
+ end
174
+ @polling_threads.clear
175
+ end
176
+
177
+ def poll_queue(_queue)
178
+ loop do
179
+ break unless @running
180
+ next if @paused
181
+
182
+ # Polling logic will be implemented with queue subscription
183
+ sleep 0.1
184
+ end
185
+ end
186
+
187
+ def wait_for_jobs
188
+ timeout = 30
189
+ deadline = Time.now + timeout
190
+
191
+ sleep 0.1 while @active_jobs.value.positive? && Time.now < deadline
192
+ end
193
+
194
+ def force_shutdown
195
+ @mutex.synchronize do
196
+ stop_polling
197
+ stop_heartbeat
198
+ @running = false
199
+ @stopping = false
200
+ end
201
+ end
202
+
203
+ def heartbeat_loop
204
+ loop do
205
+ break unless @running
206
+
207
+ begin
208
+ heartbeat
209
+ rescue StandardError
210
+ # Log error
211
+ end
212
+
213
+ sleep 5
214
+ end
215
+ end
216
+ end
217
+ end