journaled 6.2.2 → 6.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ed85996fe76daec652ed49ec5c128e27f906c9ce91193de1b394843e11ed3971
|
|
4
|
+
data.tar.gz: '09335faed12c2732e425535b849f36d7973a2a614ffd52bd1a77ac0e5b251ba5'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e0f867425b1f9a033644b007b97f144665d1bf5c30f498621ad2a805d3d607158a219fe46e3ef189389a89b77c038289dc45943e659519c9932b1bcc5832b1fa
|
|
7
|
+
data.tar.gz: 1788bf801f0c3a3d2dafb5108532e350dce725319312367351fd0b9505cc554ce7a03d85c15b36a2c2cddbef313818d3770beeee9aaf1c8550e812925830fdc9
|
|
@@ -68,7 +68,12 @@ module Journaled
|
|
|
68
68
|
event = stream_events[index]
|
|
69
69
|
|
|
70
70
|
if record_result.error_code
|
|
71
|
-
failed << create_failed_event(
|
|
71
|
+
failed << create_failed_event(
|
|
72
|
+
event,
|
|
73
|
+
error_code: record_result.error_code,
|
|
74
|
+
error_message: record_result.error_message,
|
|
75
|
+
transient: PERMANENT_ERROR_CODES.exclude?(record_result.error_code),
|
|
76
|
+
)
|
|
72
77
|
else
|
|
73
78
|
succeeded << event
|
|
74
79
|
end
|
|
@@ -77,12 +82,14 @@ module Journaled
|
|
|
77
82
|
{ succeeded:, failed: }
|
|
78
83
|
end
|
|
79
84
|
|
|
80
|
-
def create_failed_event(event,
|
|
85
|
+
def create_failed_event(event, error_code:, error_message:, transient:)
|
|
86
|
+
Outbox::MetricEmitter.emit_kinesis_failure(event:, error_code:)
|
|
87
|
+
|
|
81
88
|
Journaled::KinesisFailedEvent.new(
|
|
82
89
|
event:,
|
|
83
|
-
error_code
|
|
84
|
-
error_message
|
|
85
|
-
transient
|
|
90
|
+
error_code:,
|
|
91
|
+
error_message:,
|
|
92
|
+
transient:,
|
|
86
93
|
)
|
|
87
94
|
end
|
|
88
95
|
|
|
@@ -90,8 +97,8 @@ module Journaled
|
|
|
90
97
|
Rails.logger.error("Kinesis batch send failed (transient): #{error.class} - #{error.message}")
|
|
91
98
|
|
|
92
99
|
failed = stream_events.map do |event|
|
|
93
|
-
|
|
94
|
-
event
|
|
100
|
+
create_failed_event(
|
|
101
|
+
event,
|
|
95
102
|
error_code: error.class.to_s,
|
|
96
103
|
error_message: error.message,
|
|
97
104
|
transient: true,
|
|
@@ -58,17 +58,23 @@ module Journaled
|
|
|
58
58
|
event
|
|
59
59
|
rescue *PERMANENT_ERROR_CLASSES => e
|
|
60
60
|
Rails.logger.error("[Journaled] Kinesis event send failed (permanent): #{e.class} - #{e.message}")
|
|
61
|
+
error_code = e.class.to_s
|
|
62
|
+
Outbox::MetricEmitter.emit_kinesis_failure(event:, error_code:)
|
|
63
|
+
|
|
61
64
|
Journaled::KinesisFailedEvent.new(
|
|
62
65
|
event:,
|
|
63
|
-
error_code
|
|
66
|
+
error_code:,
|
|
64
67
|
error_message: e.message,
|
|
65
68
|
transient: false,
|
|
66
69
|
)
|
|
67
70
|
rescue StandardError => e
|
|
68
71
|
Rails.logger.error("[Journaled] Kinesis event send failed (transient): #{e.class} - #{e.message}")
|
|
72
|
+
error_code = e.class.to_s
|
|
73
|
+
Outbox::MetricEmitter.emit_kinesis_failure(event:, error_code:)
|
|
74
|
+
|
|
69
75
|
Journaled::KinesisFailedEvent.new(
|
|
70
76
|
event:,
|
|
71
|
-
error_code
|
|
77
|
+
error_code:,
|
|
72
78
|
error_message: e.message,
|
|
73
79
|
transient: true,
|
|
74
80
|
)
|
|
@@ -2,83 +2,92 @@
|
|
|
2
2
|
|
|
3
3
|
module Journaled
|
|
4
4
|
module Outbox
|
|
5
|
-
# Handles metric emission for the Worker
|
|
5
|
+
# Handles metric emission for the Worker and Kinesis senders
|
|
6
6
|
#
|
|
7
|
-
# This class
|
|
7
|
+
# This class provides utility methods for collecting and emitting metrics.
|
|
8
8
|
class MetricEmitter
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
class << self
|
|
10
|
+
# Emit batch processing metrics
|
|
11
|
+
#
|
|
12
|
+
# @param stats [Hash] Processing statistics with :succeeded, :failed_permanently, :failed_transiently
|
|
13
|
+
# @param worker_id [String] ID of the worker processing the batch
|
|
14
|
+
def emit_batch_metrics(stats, worker_id:)
|
|
15
|
+
total_events = stats[:succeeded] + stats[:failed_permanently] + stats[:failed_transiently]
|
|
12
16
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
emit_metric('journaled.outbox_event.processed', value: total_events, worker_id:)
|
|
18
|
+
emit_metric('journaled.outbox_event.sent', value: stats[:succeeded], worker_id:)
|
|
19
|
+
emit_metric('journaled.outbox_event.failed', value: stats[:failed_permanently], worker_id:)
|
|
20
|
+
emit_metric('journaled.outbox_event.errored', value: stats[:failed_transiently], worker_id:)
|
|
21
|
+
end
|
|
18
22
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
# Collect and emit queue metrics
|
|
24
|
+
#
|
|
25
|
+
# This calculates various queue statistics and emits individual metrics for each.
|
|
26
|
+
# @param worker_id [String] ID of the worker collecting metrics
|
|
27
|
+
def emit_queue_metrics(worker_id:)
|
|
28
|
+
metrics = calculate_queue_metrics
|
|
24
29
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
metrics = calculate_queue_metrics
|
|
30
|
+
emit_metric('journaled.worker.queue_total_count', value: metrics[:total_count], worker_id:)
|
|
31
|
+
emit_metric('journaled.worker.queue_workable_count', value: metrics[:workable_count], worker_id:)
|
|
32
|
+
emit_metric('journaled.worker.queue_failed_count', value: metrics[:failed_count], worker_id:)
|
|
33
|
+
emit_metric('journaled.worker.queue_oldest_age_seconds', value: metrics[:oldest_age_seconds], worker_id:)
|
|
30
34
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
Rails.logger.info(
|
|
36
|
+
"Queue metrics: total=#{metrics[:total_count]}, " \
|
|
37
|
+
"workable=#{metrics[:workable_count]}, " \
|
|
38
|
+
"failed=#{metrics[:failed_count]}, " \
|
|
39
|
+
"oldest_age=#{metrics[:oldest_age_seconds].round(2)}s",
|
|
40
|
+
)
|
|
41
|
+
end
|
|
35
42
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
# Emit a metric notification for a Kinesis send failure
|
|
44
|
+
#
|
|
45
|
+
# @param event [Journaled::Outbox::Event] The failed event
|
|
46
|
+
# @param error_code [String] The error code (e.g., 'ProvisionedThroughputExceededException')
|
|
47
|
+
def emit_kinesis_failure(event:, error_code:)
|
|
48
|
+
emit_metric(
|
|
49
|
+
'journaled.kinesis.send_failure',
|
|
50
|
+
partition_key: event.partition_key,
|
|
51
|
+
error_code:,
|
|
52
|
+
stream_name: event.stream_name,
|
|
53
|
+
event_type: event.event_type,
|
|
54
|
+
)
|
|
55
|
+
end
|
|
43
56
|
|
|
44
|
-
|
|
57
|
+
private
|
|
45
58
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
ActiveSupport::Notifications.instrument(
|
|
54
|
-
event_name,
|
|
55
|
-
payload.merge(worker_id:),
|
|
56
|
-
)
|
|
57
|
-
end
|
|
59
|
+
# Emit a single metric notification
|
|
60
|
+
#
|
|
61
|
+
# @param event_name [String] The name of the metric event
|
|
62
|
+
# @param payload [Hash] Additional payload data (event_count, value, etc.)
|
|
63
|
+
def emit_metric(event_name, payload)
|
|
64
|
+
ActiveSupport::Notifications.instrument(event_name, payload)
|
|
65
|
+
end
|
|
58
66
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
67
|
+
# Calculate queue metrics
|
|
68
|
+
#
|
|
69
|
+
# @return [Hash] Metrics including counts and oldest event timestamp
|
|
70
|
+
def calculate_queue_metrics
|
|
71
|
+
# Use a single query with COUNT(*) FILTER to calculate all counts in one table scan
|
|
72
|
+
result = Event.connection.select_one(
|
|
73
|
+
Event.select(
|
|
74
|
+
'COUNT(*) AS total_count',
|
|
75
|
+
'COUNT(*) FILTER (WHERE failed_at IS NULL) AS workable_count',
|
|
76
|
+
'COUNT(*) FILTER (WHERE failure_reason IS NOT NULL AND failed_at IS NULL) AS failed_count',
|
|
77
|
+
'MIN(created_at) FILTER (WHERE failed_at IS NULL) AS oldest_non_failed_timestamp',
|
|
78
|
+
).to_sql,
|
|
79
|
+
)
|
|
72
80
|
|
|
73
|
-
|
|
74
|
-
|
|
81
|
+
oldest_timestamp = result['oldest_non_failed_timestamp']
|
|
82
|
+
oldest_age_seconds = oldest_timestamp ? Time.current - oldest_timestamp : 0
|
|
75
83
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
84
|
+
{
|
|
85
|
+
total_count: result['total_count'],
|
|
86
|
+
workable_count: result['workable_count'],
|
|
87
|
+
failed_count: result['failed_count'],
|
|
88
|
+
oldest_age_seconds:,
|
|
89
|
+
}
|
|
90
|
+
end
|
|
82
91
|
end
|
|
83
92
|
end
|
|
84
93
|
end
|
|
@@ -18,7 +18,6 @@ module Journaled
|
|
|
18
18
|
@worker_id = "#{Socket.gethostname}-#{Process.pid}"
|
|
19
19
|
self.running = false
|
|
20
20
|
@processor = BatchProcessor.new
|
|
21
|
-
@metric_emitter = MetricEmitter.new(worker_id: @worker_id)
|
|
22
21
|
self.shutdown_requested = false
|
|
23
22
|
@last_metrics_emission = Time.current
|
|
24
23
|
end
|
|
@@ -50,7 +49,7 @@ module Journaled
|
|
|
50
49
|
|
|
51
50
|
private
|
|
52
51
|
|
|
53
|
-
attr_reader :worker_id, :processor
|
|
52
|
+
attr_reader :worker_id, :processor
|
|
54
53
|
attr_accessor :shutdown_requested, :running, :last_metrics_emission
|
|
55
54
|
|
|
56
55
|
def run_loop
|
|
@@ -77,7 +76,7 @@ module Journaled
|
|
|
77
76
|
def process_batch
|
|
78
77
|
stats = processor.process_batch
|
|
79
78
|
|
|
80
|
-
|
|
79
|
+
MetricEmitter.emit_batch_metrics(stats, worker_id:)
|
|
81
80
|
end
|
|
82
81
|
|
|
83
82
|
def check_prerequisites!
|
|
@@ -120,7 +119,7 @@ module Journaled
|
|
|
120
119
|
|
|
121
120
|
# Collect and emit queue metrics
|
|
122
121
|
def collect_and_emit_metrics
|
|
123
|
-
|
|
122
|
+
MetricEmitter.emit_queue_metrics(worker_id:)
|
|
124
123
|
end
|
|
125
124
|
end
|
|
126
125
|
end
|
data/lib/journaled/version.rb
CHANGED