natswork-server 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +0 -0
- data/LICENSE +21 -0
- data/README.md +286 -0
- data/lib/natswork/cli.rb +420 -0
- data/lib/natswork/error_tracker.rb +338 -0
- data/lib/natswork/health_check.rb +252 -0
- data/lib/natswork/instrumentation.rb +141 -0
- data/lib/natswork/job_executor.rb +271 -0
- data/lib/natswork/job_hooks.rb +63 -0
- data/lib/natswork/logger.rb +183 -0
- data/lib/natswork/metrics.rb +241 -0
- data/lib/natswork/middleware.rb +142 -0
- data/lib/natswork/middleware_chain.rb +40 -0
- data/lib/natswork/monitoring.rb +397 -0
- data/lib/natswork/protocol.rb +454 -0
- data/lib/natswork/queue_manager.rb +164 -0
- data/lib/natswork/retry_handler.rb +125 -0
- data/lib/natswork/server/version.rb +7 -0
- data/lib/natswork/server.rb +47 -0
- data/lib/natswork/simple_worker.rb +101 -0
- data/lib/natswork/thread_pool.rb +192 -0
- data/lib/natswork/worker.rb +217 -0
- data/lib/natswork/worker_manager.rb +62 -0
- data/lib/natswork-server.rb +5 -0
- metadata +151 -0
data/lib/natswork/cli.rb
ADDED
@@ -0,0 +1,420 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module NatsWork
|
7
|
+
class CLI
|
8
|
+
def initialize(argv = ARGV)
|
9
|
+
@argv = argv.dup
|
10
|
+
@options = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
parser = create_parser
|
15
|
+
parser.parse!(@argv)
|
16
|
+
|
17
|
+
command = @argv.shift || 'help'
|
18
|
+
|
19
|
+
case command
|
20
|
+
when 'status'
|
21
|
+
show_status
|
22
|
+
when 'stats'
|
23
|
+
show_stats
|
24
|
+
when 'health'
|
25
|
+
show_health
|
26
|
+
when 'errors'
|
27
|
+
show_errors
|
28
|
+
when 'workers'
|
29
|
+
show_workers
|
30
|
+
when 'monitor'
|
31
|
+
start_monitor
|
32
|
+
when 'help', '--help', '-h'
|
33
|
+
puts parser.help
|
34
|
+
else
|
35
|
+
puts "Unknown command: #{command}"
|
36
|
+
puts parser.help
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def create_parser
|
44
|
+
OptionParser.new do |opts|
|
45
|
+
opts.banner = 'Usage: natswork [OPTIONS] COMMAND'
|
46
|
+
opts.separator ''
|
47
|
+
opts.separator 'Commands:'
|
48
|
+
opts.separator ' status Show overall system status'
|
49
|
+
opts.separator ' stats Show performance statistics'
|
50
|
+
opts.separator ' health Show health check results'
|
51
|
+
opts.separator ' errors Show recent errors'
|
52
|
+
opts.separator ' workers Show worker information'
|
53
|
+
opts.separator ' monitor Start real-time monitoring'
|
54
|
+
opts.separator ''
|
55
|
+
opts.separator 'Options:'
|
56
|
+
|
57
|
+
opts.on('-f', '--format FORMAT', 'Output format (text, json)') do |format|
|
58
|
+
@options[:format] = format.to_sym
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on('-l', '--limit N', Integer, 'Limit number of results') do |limit|
|
62
|
+
@options[:limit] = limit
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on('-w', '--watch', 'Watch mode (refresh automatically)') do
|
66
|
+
@options[:watch] = true
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on('-i', '--interval N', Float, 'Watch interval in seconds') do |interval|
|
70
|
+
@options[:interval] = interval
|
71
|
+
end
|
72
|
+
|
73
|
+
opts.on('--nats-url URL', 'NATS server URL') do |url|
|
74
|
+
@options[:nats_url] = url
|
75
|
+
end
|
76
|
+
|
77
|
+
opts.on('--verbose', 'Verbose output') do
|
78
|
+
@options[:verbose] = true
|
79
|
+
end
|
80
|
+
|
81
|
+
opts.on('-h', '--help', 'Show this help') do
|
82
|
+
puts opts
|
83
|
+
exit
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def show_status
|
89
|
+
status = collect_status_data
|
90
|
+
|
91
|
+
case @options[:format]
|
92
|
+
when :json
|
93
|
+
puts JSON.pretty_generate(status)
|
94
|
+
else
|
95
|
+
display_status_table(status)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def show_stats
|
100
|
+
stats = collect_stats_data
|
101
|
+
|
102
|
+
case @options[:format]
|
103
|
+
when :json
|
104
|
+
puts JSON.pretty_generate(stats)
|
105
|
+
else
|
106
|
+
display_stats_table(stats)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def show_health
|
111
|
+
health = collect_health_data
|
112
|
+
|
113
|
+
case @options[:format]
|
114
|
+
when :json
|
115
|
+
puts JSON.pretty_generate(health)
|
116
|
+
else
|
117
|
+
display_health_table(health)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def show_errors
|
122
|
+
errors = collect_error_data
|
123
|
+
|
124
|
+
case @options[:format]
|
125
|
+
when :json
|
126
|
+
puts JSON.pretty_generate(errors)
|
127
|
+
else
|
128
|
+
display_errors_table(errors)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def show_workers
|
133
|
+
workers = collect_worker_data
|
134
|
+
|
135
|
+
case @options[:format]
|
136
|
+
when :json
|
137
|
+
puts JSON.pretty_generate(workers)
|
138
|
+
else
|
139
|
+
display_workers_table(workers)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def start_monitor
|
144
|
+
interval = @options[:interval] || 2.0
|
145
|
+
|
146
|
+
begin
|
147
|
+
loop do
|
148
|
+
clear_screen unless @options[:format] == :json
|
149
|
+
|
150
|
+
puts "NatsWork Monitor (#{Time.now.strftime('%Y-%m-%d %H:%M:%S')})"
|
151
|
+
puts 'Press Ctrl+C to exit'
|
152
|
+
puts '=' * 60
|
153
|
+
|
154
|
+
show_status
|
155
|
+
puts "\n"
|
156
|
+
|
157
|
+
sleep interval
|
158
|
+
end
|
159
|
+
rescue Interrupt
|
160
|
+
puts "\nMonitoring stopped."
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def collect_status_data
|
165
|
+
# This would connect to actual NatsWork instances
|
166
|
+
# For now, return mock data structure
|
167
|
+
{
|
168
|
+
timestamp: Time.now.iso8601,
|
169
|
+
system: {
|
170
|
+
status: :healthy,
|
171
|
+
uptime: Time.now - $PROGRAM_START_TIME,
|
172
|
+
version: '1.0.0'
|
173
|
+
},
|
174
|
+
workers: {
|
175
|
+
total: 3,
|
176
|
+
running: 2,
|
177
|
+
paused: 0,
|
178
|
+
stopped: 1
|
179
|
+
},
|
180
|
+
jobs: {
|
181
|
+
processed_total: 1247,
|
182
|
+
failed_total: 23,
|
183
|
+
active: 5,
|
184
|
+
queued: 12
|
185
|
+
},
|
186
|
+
queues: {
|
187
|
+
'default' => { depth: 8, workers: 2 },
|
188
|
+
'critical' => { depth: 2, workers: 1 },
|
189
|
+
'low' => { depth: 2, workers: 0 }
|
190
|
+
}
|
191
|
+
}
|
192
|
+
end
|
193
|
+
|
194
|
+
def collect_stats_data
|
195
|
+
# Collect metrics from Metrics.global or similar
|
196
|
+
{
|
197
|
+
timestamp: Time.now.iso8601,
|
198
|
+
performance: {
|
199
|
+
jobs_per_second: 15.3,
|
200
|
+
avg_job_duration: 234.5,
|
201
|
+
memory_usage: 89.4,
|
202
|
+
cpu_usage: 12.1
|
203
|
+
},
|
204
|
+
top_job_classes: [
|
205
|
+
{ name: 'EmailJob', count: 456, avg_duration: 189.3 },
|
206
|
+
{ name: 'DataProcessJob', count: 234, avg_duration: 456.7 },
|
207
|
+
{ name: 'NotificationJob', count: 123, avg_duration: 45.2 }
|
208
|
+
],
|
209
|
+
error_rates: {
|
210
|
+
last_hour: 0.03,
|
211
|
+
last_day: 0.018,
|
212
|
+
last_week: 0.021
|
213
|
+
}
|
214
|
+
}
|
215
|
+
end
|
216
|
+
|
217
|
+
def collect_health_data
|
218
|
+
# Get from HealthChecker.global
|
219
|
+
{
|
220
|
+
timestamp: Time.now.iso8601,
|
221
|
+
overall_status: :healthy,
|
222
|
+
checks: {
|
223
|
+
'nats_connection' => {
|
224
|
+
status: :healthy,
|
225
|
+
message: 'Connected to NATS server',
|
226
|
+
last_checked: Time.now.iso8601
|
227
|
+
},
|
228
|
+
'memory_usage' => {
|
229
|
+
status: :healthy,
|
230
|
+
message: 'Memory usage within limits (89.4MB)',
|
231
|
+
last_checked: Time.now.iso8601
|
232
|
+
},
|
233
|
+
'worker_status' => {
|
234
|
+
status: :healthy,
|
235
|
+
message: '2/3 workers running',
|
236
|
+
last_checked: Time.now.iso8601
|
237
|
+
}
|
238
|
+
}
|
239
|
+
}
|
240
|
+
end
|
241
|
+
|
242
|
+
def collect_error_data
|
243
|
+
limit = @options[:limit] || 20
|
244
|
+
|
245
|
+
# Get from ErrorTracker.global
|
246
|
+
{
|
247
|
+
timestamp: Time.now.iso8601,
|
248
|
+
total_errors: 15,
|
249
|
+
recent_errors: [
|
250
|
+
{
|
251
|
+
type: 'ArgumentError',
|
252
|
+
message: 'Invalid job arguments',
|
253
|
+
count: 5,
|
254
|
+
last_seen: '2023-12-01T10:30:00Z',
|
255
|
+
fingerprint: 'abc123def456'
|
256
|
+
},
|
257
|
+
{
|
258
|
+
type: 'TimeoutError',
|
259
|
+
message: 'Job execution timeout',
|
260
|
+
count: 3,
|
261
|
+
last_seen: '2023-12-01T10:25:00Z',
|
262
|
+
fingerprint: 'def456ghi789'
|
263
|
+
}
|
264
|
+
][0, limit]
|
265
|
+
}
|
266
|
+
end
|
267
|
+
|
268
|
+
def collect_worker_data
|
269
|
+
# Get from worker registry or connection
|
270
|
+
{
|
271
|
+
timestamp: Time.now.iso8601,
|
272
|
+
workers: [
|
273
|
+
{
|
274
|
+
name: 'worker-host1-12345-abc1',
|
275
|
+
status: :running,
|
276
|
+
queues: %w[default critical],
|
277
|
+
jobs_processed: 456,
|
278
|
+
jobs_failed: 12,
|
279
|
+
active_jobs: 2,
|
280
|
+
started_at: '2023-12-01T08:00:00Z'
|
281
|
+
},
|
282
|
+
{
|
283
|
+
name: 'worker-host1-12346-def2',
|
284
|
+
status: :running,
|
285
|
+
queues: ['default'],
|
286
|
+
jobs_processed: 234,
|
287
|
+
jobs_failed: 5,
|
288
|
+
active_jobs: 1,
|
289
|
+
started_at: '2023-12-01T08:00:00Z'
|
290
|
+
}
|
291
|
+
]
|
292
|
+
}
|
293
|
+
end
|
294
|
+
|
295
|
+
def display_status_table(status)
|
296
|
+
puts "System Status: #{colorize_status(status[:system][:status])}"
|
297
|
+
puts "Uptime: #{format_duration(status[:system][:uptime])}"
|
298
|
+
puts
|
299
|
+
|
300
|
+
puts "Workers: #{status[:workers][:running]}/#{status[:workers][:total]} running"
|
301
|
+
puts "Jobs: #{status[:jobs][:active]} active, #{status[:jobs][:queued]} queued"
|
302
|
+
puts "Total Processed: #{status[:jobs][:processed_total]} (#{status[:jobs][:failed_total]} failed)"
|
303
|
+
puts
|
304
|
+
|
305
|
+
puts 'Queues:'
|
306
|
+
status[:queues].each do |name, info|
|
307
|
+
puts " #{name.ljust(12)} #{info[:depth].to_s.rjust(6)} jobs, #{info[:workers]} workers"
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def display_stats_table(stats)
|
312
|
+
puts 'Performance Metrics'
|
313
|
+
puts '=' * 30
|
314
|
+
puts "Jobs/sec: #{stats[:performance][:jobs_per_second]}"
|
315
|
+
puts "Avg Duration: #{stats[:performance][:avg_job_duration]}ms"
|
316
|
+
puts "Memory: #{stats[:performance][:memory_usage]}MB"
|
317
|
+
puts "CPU: #{stats[:performance][:cpu_usage]}%"
|
318
|
+
puts
|
319
|
+
|
320
|
+
puts 'Top Job Classes'
|
321
|
+
puts '=' * 30
|
322
|
+
stats[:top_job_classes].each do |job|
|
323
|
+
puts "#{job[:name].ljust(20)} #{job[:count].to_s.rjust(6)} (#{job[:avg_duration]}ms avg)"
|
324
|
+
end
|
325
|
+
puts
|
326
|
+
|
327
|
+
puts 'Error Rates'
|
328
|
+
puts '=' * 30
|
329
|
+
puts "Last Hour: #{(stats[:error_rates][:last_hour] * 100).round(2)}%"
|
330
|
+
puts "Last Day: #{(stats[:error_rates][:last_day] * 100).round(2)}%"
|
331
|
+
puts "Last Week: #{(stats[:error_rates][:last_week] * 100).round(2)}%"
|
332
|
+
end
|
333
|
+
|
334
|
+
def display_health_table(health)
|
335
|
+
puts "Overall Health: #{colorize_status(health[:overall_status])}"
|
336
|
+
puts
|
337
|
+
puts 'Health Checks:'
|
338
|
+
puts '=' * 50
|
339
|
+
|
340
|
+
health[:checks].each do |name, check|
|
341
|
+
status_icon = case check[:status]
|
342
|
+
when :healthy then '✓'
|
343
|
+
when :degraded then '⚠'
|
344
|
+
when :unhealthy then '✗'
|
345
|
+
else '?'
|
346
|
+
end
|
347
|
+
|
348
|
+
puts "#{status_icon} #{name.ljust(20)} #{colorize_status(check[:status])} - #{check[:message]}"
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
def display_errors_table(errors)
|
353
|
+
puts "Recent Errors (#{errors[:total_errors]} total)"
|
354
|
+
puts '=' * 60
|
355
|
+
|
356
|
+
errors[:recent_errors].each do |error|
|
357
|
+
puts "#{error[:type]}: #{error[:message]}"
|
358
|
+
puts " Count: #{error[:count]}, Last seen: #{error[:last_seen]}"
|
359
|
+
puts " Fingerprint: #{error[:fingerprint]}"
|
360
|
+
puts
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def display_workers_table(workers)
|
365
|
+
puts "Workers (#{workers[:workers].size} total)"
|
366
|
+
puts '=' * 80
|
367
|
+
|
368
|
+
workers[:workers].each do |worker|
|
369
|
+
status_icon = case worker[:status]
|
370
|
+
when :running then '●'
|
371
|
+
when :paused then '⏸'
|
372
|
+
when :stopped then '○'
|
373
|
+
else '?'
|
374
|
+
end
|
375
|
+
|
376
|
+
puts "#{status_icon} #{worker[:name]}"
|
377
|
+
puts " Status: #{colorize_status(worker[:status])}"
|
378
|
+
puts " Queues: #{worker[:queues].join(', ')}"
|
379
|
+
puts " Jobs: #{worker[:jobs_processed]} processed, #{worker[:jobs_failed]} failed, #{worker[:active_jobs]} active"
|
380
|
+
puts " Started: #{worker[:started_at]}"
|
381
|
+
puts
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
def colorize_status(status)
|
386
|
+
return status.to_s unless $stdout.tty?
|
387
|
+
|
388
|
+
case status.to_sym
|
389
|
+
when :healthy, :running
|
390
|
+
"\e[32m#{status}\e[0m" # Green
|
391
|
+
when :degraded, :paused
|
392
|
+
"\e[33m#{status}\e[0m" # Yellow
|
393
|
+
when :unhealthy, :stopped
|
394
|
+
"\e[31m#{status}\e[0m" # Red
|
395
|
+
else
|
396
|
+
status.to_s
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
def format_duration(seconds)
|
401
|
+
days = seconds / 86_400
|
402
|
+
hours = (seconds % 86_400) / 3600
|
403
|
+
minutes = (seconds % 3600) / 60
|
404
|
+
|
405
|
+
if days.positive?
|
406
|
+
"#{days.to_i}d #{hours.to_i}h #{minutes.to_i}m"
|
407
|
+
elsif hours.positive?
|
408
|
+
"#{hours.to_i}h #{minutes.to_i}m"
|
409
|
+
elsif minutes.positive?
|
410
|
+
"#{minutes.to_i}m #{(seconds % 60).to_i}s"
|
411
|
+
else
|
412
|
+
"#{seconds.round(1)}s"
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
def clear_screen
|
417
|
+
print "\e[2J\e[H"
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|