natswork-server 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +0 -0
- data/LICENSE +21 -0
- data/README.md +286 -0
- data/lib/natswork/cli.rb +420 -0
- data/lib/natswork/error_tracker.rb +338 -0
- data/lib/natswork/health_check.rb +252 -0
- data/lib/natswork/instrumentation.rb +141 -0
- data/lib/natswork/job_executor.rb +271 -0
- data/lib/natswork/job_hooks.rb +63 -0
- data/lib/natswork/logger.rb +183 -0
- data/lib/natswork/metrics.rb +241 -0
- data/lib/natswork/middleware.rb +142 -0
- data/lib/natswork/middleware_chain.rb +40 -0
- data/lib/natswork/monitoring.rb +397 -0
- data/lib/natswork/protocol.rb +454 -0
- data/lib/natswork/queue_manager.rb +164 -0
- data/lib/natswork/retry_handler.rb +125 -0
- data/lib/natswork/server/version.rb +7 -0
- data/lib/natswork/server.rb +47 -0
- data/lib/natswork/simple_worker.rb +101 -0
- data/lib/natswork/thread_pool.rb +192 -0
- data/lib/natswork/worker.rb +217 -0
- data/lib/natswork/worker_manager.rb +62 -0
- data/lib/natswork-server.rb +5 -0
- metadata +151 -0
@@ -0,0 +1,397 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'socket'
|
5
|
+
|
6
|
+
module NatsWork
|
7
|
+
module Monitoring
|
8
|
+
# Prometheus metrics exporter
|
9
|
+
class PrometheusExporter
|
10
|
+
def initialize(options = {})
|
11
|
+
@metrics = options[:metrics] || Metrics.global
|
12
|
+
@port = options[:port] || 9090
|
13
|
+
@path = options[:path] || '/metrics'
|
14
|
+
@server = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_server
|
18
|
+
require 'webrick'
|
19
|
+
|
20
|
+
@server = WEBrick::HTTPServer.new(
|
21
|
+
Port: @port,
|
22
|
+
Logger: WEBrick::Log.new('/dev/null'),
|
23
|
+
AccessLog: []
|
24
|
+
)
|
25
|
+
|
26
|
+
@server.mount_proc(@path) do |_req, res|
|
27
|
+
res['Content-Type'] = 'text/plain'
|
28
|
+
res.body = export_metrics
|
29
|
+
end
|
30
|
+
|
31
|
+
Thread.new { @server.start }
|
32
|
+
end
|
33
|
+
|
34
|
+
def stop_server
|
35
|
+
@server&.shutdown
|
36
|
+
end
|
37
|
+
|
38
|
+
def export_metrics
|
39
|
+
lines = []
|
40
|
+
snapshot = @metrics.snapshot
|
41
|
+
|
42
|
+
# Export counters
|
43
|
+
snapshot[:counters].each do |metric, value|
|
44
|
+
name, labels = parse_metric_name(metric)
|
45
|
+
lines << "# TYPE natswork_#{name} counter"
|
46
|
+
lines << "natswork_#{name}#{format_labels(labels)} #{value}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# Export gauges
|
50
|
+
snapshot[:gauges].each do |metric, value|
|
51
|
+
name, labels = parse_metric_name(metric)
|
52
|
+
lines << "# TYPE natswork_#{name} gauge"
|
53
|
+
lines << "natswork_#{name}#{format_labels(labels)} #{value}"
|
54
|
+
end
|
55
|
+
|
56
|
+
# Export histograms
|
57
|
+
snapshot[:histograms].each do |metric, stats|
|
58
|
+
next unless stats
|
59
|
+
|
60
|
+
name, labels = parse_metric_name(metric)
|
61
|
+
lines << "# TYPE natswork_#{name} histogram"
|
62
|
+
lines << "natswork_#{name}_count#{format_labels(labels)} #{stats[:count]}"
|
63
|
+
lines << "natswork_#{name}_sum#{format_labels(labels)} #{stats[:count] * stats[:mean]}"
|
64
|
+
|
65
|
+
# Add percentiles as separate metrics
|
66
|
+
%w[50 95 99].each do |percentile|
|
67
|
+
pct_key = "p#{percentile}".to_sym
|
68
|
+
if stats[pct_key]
|
69
|
+
lines << "natswork_#{name}_percentile#{format_labels(labels.merge(percentile: percentile))} #{stats[pct_key]}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
"#{lines.join("\n")}\n"
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def parse_metric_name(metric)
|
80
|
+
if metric.include?('{')
|
81
|
+
name, labels_str = metric.split('{', 2)
|
82
|
+
labels_str = labels_str.chomp('}')
|
83
|
+
labels = Hash[labels_str.split(',').map { |pair| pair.split(':', 2) }]
|
84
|
+
[name.tr('.', '_'), labels]
|
85
|
+
else
|
86
|
+
[metric.tr('.', '_'), {}]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def format_labels(labels)
|
91
|
+
return '' if labels.empty?
|
92
|
+
|
93
|
+
label_pairs = labels.map { |k, v| "#{k}=\"#{v}\"" }.join(',')
|
94
|
+
"{#{label_pairs}}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# StatsD integration
|
99
|
+
class StatsDReporter
|
100
|
+
def initialize(host = 'localhost', port = 8125, prefix = 'natswork')
|
101
|
+
@host = host
|
102
|
+
@port = port
|
103
|
+
@prefix = prefix
|
104
|
+
@socket = UDPSocket.new
|
105
|
+
end
|
106
|
+
|
107
|
+
def report_counter(metric, value, tags = {})
|
108
|
+
send_metric("#{@prefix}.#{metric}", value, 'c', tags)
|
109
|
+
end
|
110
|
+
|
111
|
+
def report_gauge(metric, value, tags = {})
|
112
|
+
send_metric("#{@prefix}.#{metric}", value, 'g', tags)
|
113
|
+
end
|
114
|
+
|
115
|
+
def report_timer(metric, value, tags = {})
|
116
|
+
send_metric("#{@prefix}.#{metric}", value, 'ms', tags)
|
117
|
+
end
|
118
|
+
|
119
|
+
def report_histogram(metric, value, tags = {})
|
120
|
+
send_metric("#{@prefix}.#{metric}", value, 'h', tags)
|
121
|
+
end
|
122
|
+
|
123
|
+
def close
|
124
|
+
@socket.close
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
def send_metric(metric, value, type, tags = {})
|
130
|
+
tag_str = tags.empty? ? '' : "|##{tags.map { |k, v| "#{k}:#{v}" }.join(',')}"
|
131
|
+
message = "#{metric}:#{value}|#{type}#{tag_str}"
|
132
|
+
|
133
|
+
@socket.send(message, 0, @host, @port)
|
134
|
+
rescue StandardError => e
|
135
|
+
# Don't let StatsD errors break the application
|
136
|
+
warn "StatsD error: #{e.message}"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# New Relic custom metrics (simplified)
|
141
|
+
class NewRelicReporter
|
142
|
+
def initialize
|
143
|
+
@enabled = defined?(NewRelic::Agent)
|
144
|
+
end
|
145
|
+
|
146
|
+
def report_counter(metric, value, _tags = {})
|
147
|
+
return unless @enabled
|
148
|
+
|
149
|
+
NewRelic::Agent.record_metric("Custom/NatsWork/#{metric}", value)
|
150
|
+
end
|
151
|
+
|
152
|
+
def report_gauge(metric, value, _tags = {})
|
153
|
+
return unless @enabled
|
154
|
+
|
155
|
+
NewRelic::Agent.record_metric("Custom/NatsWork/#{metric}", value)
|
156
|
+
end
|
157
|
+
|
158
|
+
def report_timer(metric, value, _tags = {})
|
159
|
+
return unless @enabled
|
160
|
+
|
161
|
+
# Convert to seconds for New Relic
|
162
|
+
NewRelic::Agent.record_metric("Custom/NatsWork/#{metric}", value / 1000.0)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Generic webhook reporter
|
167
|
+
class WebhookReporter
|
168
|
+
def initialize(webhook_url, options = {})
|
169
|
+
@webhook_url = webhook_url
|
170
|
+
@batch_size = options.fetch(:batch_size, 100)
|
171
|
+
@flush_interval = options.fetch(:flush_interval, 60)
|
172
|
+
@metrics_buffer = []
|
173
|
+
@mutex = Mutex.new
|
174
|
+
|
175
|
+
start_flush_timer
|
176
|
+
end
|
177
|
+
|
178
|
+
def report_metric(type, metric, value, tags = {})
|
179
|
+
@mutex.synchronize do
|
180
|
+
@metrics_buffer << {
|
181
|
+
type: type,
|
182
|
+
metric: metric,
|
183
|
+
value: value,
|
184
|
+
tags: tags,
|
185
|
+
timestamp: Time.now.to_f
|
186
|
+
}
|
187
|
+
|
188
|
+
flush_metrics if @metrics_buffer.size >= @batch_size
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def report_counter(metric, value, tags = {})
|
193
|
+
report_metric('counter', metric, value, tags)
|
194
|
+
end
|
195
|
+
|
196
|
+
def report_gauge(metric, value, tags = {})
|
197
|
+
report_metric('gauge', metric, value, tags)
|
198
|
+
end
|
199
|
+
|
200
|
+
def report_timer(metric, value, tags = {})
|
201
|
+
report_metric('timer', metric, value, tags)
|
202
|
+
end
|
203
|
+
|
204
|
+
def flush_metrics
|
205
|
+
return if @metrics_buffer.empty?
|
206
|
+
|
207
|
+
metrics = @metrics_buffer.dup
|
208
|
+
@metrics_buffer.clear
|
209
|
+
|
210
|
+
# Send webhook asynchronously
|
211
|
+
Thread.new do
|
212
|
+
send_webhook(metrics)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
private
|
217
|
+
|
218
|
+
def start_flush_timer
|
219
|
+
Thread.new do
|
220
|
+
loop do
|
221
|
+
sleep @flush_interval
|
222
|
+
@mutex.synchronize { flush_metrics }
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def send_webhook(metrics)
|
228
|
+
{
|
229
|
+
timestamp: Time.now.iso8601,
|
230
|
+
hostname: Socket.gethostname,
|
231
|
+
metrics: metrics
|
232
|
+
}
|
233
|
+
|
234
|
+
# This would use an HTTP client in a real implementation
|
235
|
+
NatsWork::Logger.debug('Webhook metrics batch',
|
236
|
+
url: @webhook_url,
|
237
|
+
count: metrics.size)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# Monitoring coordinator that manages all reporters
|
242
|
+
class Coordinator
|
243
|
+
def initialize
|
244
|
+
@reporters = []
|
245
|
+
@enabled = true
|
246
|
+
end
|
247
|
+
|
248
|
+
def add_reporter(reporter)
|
249
|
+
@reporters << reporter
|
250
|
+
end
|
251
|
+
|
252
|
+
def remove_reporter(reporter)
|
253
|
+
@reporters.delete(reporter)
|
254
|
+
end
|
255
|
+
|
256
|
+
def report_counter(metric, value, tags = {})
|
257
|
+
return unless @enabled
|
258
|
+
|
259
|
+
@reporters.each do |reporter|
|
260
|
+
reporter.report_counter(metric, value, tags) if reporter.respond_to?(:report_counter)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def report_gauge(metric, value, tags = {})
|
265
|
+
return unless @enabled
|
266
|
+
|
267
|
+
@reporters.each do |reporter|
|
268
|
+
reporter.report_gauge(metric, value, tags) if reporter.respond_to?(:report_gauge)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def report_timer(metric, value, tags = {})
|
273
|
+
return unless @enabled
|
274
|
+
|
275
|
+
@reporters.each do |reporter|
|
276
|
+
reporter.report_timer(metric, value, tags) if reporter.respond_to?(:report_timer)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def report_histogram(metric, value, tags = {})
|
281
|
+
return unless @enabled
|
282
|
+
|
283
|
+
@reporters.each do |reporter|
|
284
|
+
reporter.report_histogram(metric, value, tags) if reporter.respond_to?(:report_histogram)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def enable!
|
289
|
+
@enabled = true
|
290
|
+
end
|
291
|
+
|
292
|
+
def disable!
|
293
|
+
@enabled = false
|
294
|
+
end
|
295
|
+
|
296
|
+
def shutdown
|
297
|
+
@reporters.each do |reporter|
|
298
|
+
reporter.close if reporter.respond_to?(:close)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
class << self
|
303
|
+
def global
|
304
|
+
@global ||= new
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# Monitoring setup helpers
|
310
|
+
module Setup
|
311
|
+
def self.prometheus(port: 9090, path: '/metrics')
|
312
|
+
exporter = PrometheusExporter.new(port: port, path: path)
|
313
|
+
exporter.start_server
|
314
|
+
Coordinator.global.add_reporter(exporter)
|
315
|
+
exporter
|
316
|
+
end
|
317
|
+
|
318
|
+
def self.statsd(host: 'localhost', port: 8125, prefix: 'natswork')
|
319
|
+
reporter = StatsDReporter.new(host, port, prefix)
|
320
|
+
|
321
|
+
# Wire up metrics to StatsD
|
322
|
+
metrics_collector = Class.new(MetricsCollector) do
|
323
|
+
define_method(:initialize) { @reporter = reporter }
|
324
|
+
|
325
|
+
define_method(:collect) do |type, metric, value, tags|
|
326
|
+
case type
|
327
|
+
when :counter then @reporter.report_counter(metric, value, tags)
|
328
|
+
when :gauge then @reporter.report_gauge(metric, value, tags)
|
329
|
+
when :timer then @reporter.report_timer(metric, value, tags)
|
330
|
+
when :histogram then @reporter.report_histogram(metric, value, tags)
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
Metrics.global.add_collector(metrics_collector.new)
|
336
|
+
reporter
|
337
|
+
end
|
338
|
+
|
339
|
+
def self.new_relic
|
340
|
+
reporter = NewRelicReporter.new
|
341
|
+
Coordinator.global.add_reporter(reporter)
|
342
|
+
reporter
|
343
|
+
end
|
344
|
+
|
345
|
+
def self.webhook(url, options = {})
|
346
|
+
reporter = WebhookReporter.new(url, options)
|
347
|
+
Coordinator.global.add_reporter(reporter)
|
348
|
+
reporter
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Built-in system metrics collector
|
353
|
+
class SystemMetrics
|
354
|
+
def initialize(metrics = nil)
|
355
|
+
@metrics = metrics || Metrics.global
|
356
|
+
@enabled = false
|
357
|
+
end
|
358
|
+
|
359
|
+
def start_collection(interval = 30)
|
360
|
+
return if @enabled
|
361
|
+
|
362
|
+
@enabled = true
|
363
|
+
@collection_thread = Thread.new do
|
364
|
+
while @enabled
|
365
|
+
collect_system_metrics
|
366
|
+
sleep interval
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
def stop_collection
|
372
|
+
@enabled = false
|
373
|
+
@collection_thread&.join
|
374
|
+
end
|
375
|
+
|
376
|
+
private
|
377
|
+
|
378
|
+
def collect_system_metrics
|
379
|
+
# Memory usage
|
380
|
+
if defined?(GC.stat)
|
381
|
+
memory_bytes = GC.stat(:heap_allocated_pages) * GC::INTERNAL_CONSTANTS[:HEAP_PAGE_SIZE]
|
382
|
+
@metrics.gauge('system.memory.used_bytes', memory_bytes)
|
383
|
+
|
384
|
+
@metrics.gauge('system.memory.gc.count', GC.count)
|
385
|
+
@metrics.gauge('system.memory.gc.heap_pages', GC.stat(:heap_allocated_pages))
|
386
|
+
end
|
387
|
+
|
388
|
+
# Process info
|
389
|
+
@metrics.gauge('system.process.pid', Process.pid)
|
390
|
+
@metrics.gauge('system.process.uptime', Time.now - $PROGRAM_START_TIME)
|
391
|
+
|
392
|
+
# Thread count
|
393
|
+
@metrics.gauge('system.threads.count', Thread.list.size)
|
394
|
+
end
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|