cosmonats 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +300 -187
- data/lib/cosmo/active_job/adapter.rb +46 -0
- data/lib/cosmo/active_job/executor.rb +16 -0
- data/lib/cosmo/active_job/options.rb +50 -0
- data/lib/cosmo/active_job.rb +29 -0
- data/lib/cosmo/api/busy.rb +2 -2
- data/lib/cosmo/api/counter.rb +2 -2
- data/lib/cosmo/api/cron/entry.rb +99 -0
- data/lib/cosmo/api/cron.rb +118 -0
- data/lib/cosmo/api/kv.rb +36 -14
- data/lib/cosmo/api/stream.rb +27 -9
- data/lib/cosmo/api.rb +1 -0
- data/lib/cosmo/cli.rb +27 -9
- data/lib/cosmo/client.rb +75 -5
- data/lib/cosmo/config.rb +14 -32
- data/lib/cosmo/engine.rb +1 -1
- data/lib/cosmo/job/data.rb +1 -1
- data/lib/cosmo/job/limit.rb +51 -0
- data/lib/cosmo/job/processor.rb +82 -63
- data/lib/cosmo/job.rb +51 -2
- data/lib/cosmo/logger.rb +4 -1
- data/lib/cosmo/processor.rb +108 -0
- data/lib/cosmo/railtie.rb +21 -0
- data/lib/cosmo/stream/processor.rb +24 -60
- data/lib/cosmo/stream.rb +4 -3
- data/lib/cosmo/utils/hash.rb +13 -24
- data/lib/cosmo/utils/overrides.rb +1 -1
- data/lib/cosmo/utils/ttl_cache.rb +44 -0
- data/lib/cosmo/utils.rb +1 -0
- data/lib/cosmo/version.rb +1 -1
- data/lib/cosmo/web/assets/app.css +88 -0
- data/lib/cosmo/web/controllers/crons.rb +41 -0
- data/lib/cosmo/web/controllers/jobs.rb +7 -3
- data/lib/cosmo/web/controllers/streams.rb +36 -10
- data/lib/cosmo/web/helpers/application.rb +17 -2
- data/lib/cosmo/web/views/actions/index.erb +1 -1
- data/lib/cosmo/web/views/crons/_table.erb +58 -0
- data/lib/cosmo/web/views/crons/index.erb +10 -0
- data/lib/cosmo/web/views/jobs/_busy.erb +54 -49
- data/lib/cosmo/web/views/jobs/_dead.erb +70 -65
- data/lib/cosmo/web/views/jobs/_enqueued.erb +82 -56
- data/lib/cosmo/web/views/jobs/_scheduled.erb +53 -48
- data/lib/cosmo/web/views/jobs/_tabs.erb +6 -0
- data/lib/cosmo/web/views/jobs/busy.erb +8 -6
- data/lib/cosmo/web/views/jobs/dead.erb +6 -5
- data/lib/cosmo/web/views/jobs/enqueued.erb +8 -6
- data/lib/cosmo/web/views/jobs/index.erb +1 -1
- data/lib/cosmo/web/views/jobs/scheduled.erb +6 -5
- data/lib/cosmo/web/views/layout.erb +1 -1
- data/lib/cosmo/web/views/streams/_info.erb +3 -0
- data/lib/cosmo/web/views/streams/_pause_banner.erb +17 -0
- data/lib/cosmo/web/views/streams/_stream_row.erb +42 -0
- data/lib/cosmo/web/views/streams/_table.erb +4 -21
- data/lib/cosmo/web.rb +7 -0
- data/lib/cosmo.rb +1 -0
- data/sig/cosmo/active_job/adapter.rbs +13 -0
- data/sig/cosmo/active_job/executor.rbs +9 -0
- data/sig/cosmo/active_job/options.rbs +14 -0
- data/sig/cosmo/api/cron/entry.rbs +30 -0
- data/sig/cosmo/api/cron.rbs +25 -0
- data/sig/cosmo/api/kv.rbs +4 -6
- data/sig/cosmo/api/stream.rbs +7 -1
- data/sig/cosmo/client.rbs +20 -4
- data/sig/cosmo/config.rbs +3 -15
- data/sig/cosmo/job/data.rbs +1 -1
- data/sig/cosmo/job/limit.rbs +18 -0
- data/sig/cosmo/job/processor.rbs +19 -9
- data/sig/cosmo/job.rbs +9 -4
- data/sig/cosmo/processor.rbs +26 -0
- data/sig/cosmo/railtie.rbs +4 -0
- data/sig/cosmo/stream/processor.rbs +4 -10
- data/sig/cosmo/utils/hash.rbs +4 -8
- data/sig/cosmo/utils/ttl_cache.rbs +20 -0
- metadata +25 -3
- data/lib/cosmo/defaults.yml +0 -70
data/lib/cosmo/client.rb
CHANGED
|
@@ -38,18 +38,63 @@ module Cosmo
|
|
|
38
38
|
js.delete_stream(name, params)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
def update_stream(name, config)
|
|
42
|
+
js.update_stream(name: name, **config)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Create/update a stream, falling back to create when there's no stream.
|
|
46
|
+
# @param name [String] Stream name
|
|
47
|
+
# @param config [Hash] Full desired stream configuration
|
|
48
|
+
def setup_stream(name, config)
|
|
49
|
+
update_stream(name, config)
|
|
50
|
+
rescue NATS::JetStream::Error::StreamNotFound
|
|
51
|
+
create_stream(name, config)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Return all subjects in +stream_name+ that match +filter+ using NATS's
|
|
55
|
+
# subjects_filter on STREAM.INFO (requires NATS ≥ 2.9).
|
|
56
|
+
# @return [Array<String>]
|
|
57
|
+
def cron_subjects_in_stream(stream_name, filter)
|
|
58
|
+
payload = Utils::Json.dump({ subjects_filter: filter })
|
|
59
|
+
resp = nc.request("$JS.API.STREAM.INFO.#{stream_name}", payload)
|
|
60
|
+
data = Utils::Json.parse(resp.data, symbolize_names: false)
|
|
61
|
+
(data&.dig("state", "subjects") || {}).keys
|
|
62
|
+
rescue StandardError
|
|
63
|
+
[]
|
|
64
|
+
end
|
|
65
|
+
|
|
41
66
|
def list_streams
|
|
42
67
|
response = nc.request("$JS.API.STREAM.LIST", "")
|
|
43
68
|
data = Utils::Json.parse(response.data, symbolize_names: false)
|
|
44
69
|
return [] if data.nil? || data["streams"].nil?
|
|
45
70
|
|
|
46
|
-
data["streams"]
|
|
71
|
+
data["streams"]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def pause_stream(name)
|
|
75
|
+
config = stream_info(name).config.to_h
|
|
76
|
+
config[:metadata] ||= {}
|
|
77
|
+
config[:metadata][:"_cosmo.paused"] = "true"
|
|
78
|
+
update_stream(name, config)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def unpause_stream(name)
|
|
82
|
+
config = stream_info(name).config.to_h
|
|
83
|
+
config[:metadata] ||= {}
|
|
84
|
+
config[:metadata].delete(:"_cosmo.paused")
|
|
85
|
+
update_stream(name, config)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def stream_paused?(name)
|
|
89
|
+
stream_info(name).config.metadata&.[](:"_cosmo.paused") == "true"
|
|
90
|
+
rescue NATS::IO::Timeout
|
|
91
|
+
false
|
|
47
92
|
end
|
|
48
93
|
|
|
49
94
|
def list_consumers(stream_name)
|
|
50
95
|
response = nc.request("$JS.API.CONSUMER.LIST.#{stream_name}", "")
|
|
51
|
-
data = Utils::Json.parse(response.data, symbolize_names: false)
|
|
52
|
-
data["consumers"]
|
|
96
|
+
data = Utils::Json.parse(response.data, default: {}, symbolize_names: false)
|
|
97
|
+
Array(data["consumers"])
|
|
53
98
|
end
|
|
54
99
|
|
|
55
100
|
def consumer_info(stream_name, consumer_name)
|
|
@@ -74,14 +119,39 @@ module Cosmo
|
|
|
74
119
|
result["purged"] # number of messages purged
|
|
75
120
|
end
|
|
76
121
|
|
|
77
|
-
def kv(name, **options)
|
|
122
|
+
def kv(name, allow_msg_ttl: false, **options)
|
|
78
123
|
js.key_value(name)
|
|
79
124
|
rescue NATS::KeyValue::BucketNotFoundError
|
|
80
|
-
js.create_key_value({ bucket: name }.merge(options))
|
|
125
|
+
allow_msg_ttl ? create_kv_with_msg_ttl(name, **options) : js.create_key_value({ bucket: name }.merge(options))
|
|
81
126
|
end
|
|
82
127
|
|
|
83
128
|
def close
|
|
84
129
|
nc.close
|
|
85
130
|
end
|
|
131
|
+
|
|
132
|
+
private
|
|
133
|
+
|
|
134
|
+
# NOTE: KV manager in nats-pure hardcodes the fields it copies into StreamConfig,
|
|
135
|
+
# so `allow_msg_ttl` is never forwarded via create_key_value. Send the raw stream-create API request instead.
|
|
136
|
+
def create_kv_with_msg_ttl(name, **options)
|
|
137
|
+
payload = Utils::Json.dump({
|
|
138
|
+
name: "KV_#{name}",
|
|
139
|
+
subjects: ["$KV.#{name}.>"],
|
|
140
|
+
storage: "file",
|
|
141
|
+
allow_direct: true,
|
|
142
|
+
allow_msg_ttl: true,
|
|
143
|
+
allow_rollup_hdrs: true,
|
|
144
|
+
max_msgs_per_subject: 1
|
|
145
|
+
}.merge(options))
|
|
146
|
+
resp = nc.request("$JS.API.STREAM.CREATE.KV_#{name}", payload)
|
|
147
|
+
result = Utils::Json.parse(resp.data, symbolize_names: false)
|
|
148
|
+
if result&.dig("error")
|
|
149
|
+
msg = result.dig("error", "description").to_s
|
|
150
|
+
# Two worker processes starting simultaneously can both attempt creation.
|
|
151
|
+
# If another process won the race, fall back to looking up the existing bucket.
|
|
152
|
+
raise NATS::JetStream::Error, msg unless msg.match?(/already in use|already exists/i)
|
|
153
|
+
end
|
|
154
|
+
js.key_value(name)
|
|
155
|
+
end
|
|
86
156
|
end
|
|
87
157
|
end
|
data/lib/cosmo/config.rb
CHANGED
|
@@ -4,7 +4,7 @@ require "yaml"
|
|
|
4
4
|
require "forwardable"
|
|
5
5
|
|
|
6
6
|
module Cosmo
|
|
7
|
-
class Config
|
|
7
|
+
class Config < ::Hash
|
|
8
8
|
NANO = 1_000_000_000
|
|
9
9
|
DEFAULT_PATH = "config/cosmo.yml"
|
|
10
10
|
|
|
@@ -31,11 +31,20 @@ module Cosmo
|
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
config[:setup]&.each_key do |type|
|
|
34
|
+
next if type == :cron
|
|
35
|
+
|
|
34
36
|
config[:setup][type]&.each_key do |name|
|
|
35
37
|
c = config[:setup][type][name]
|
|
36
38
|
c[:max_age] = c[:max_age].to_i * NANO if c[:max_age]
|
|
37
39
|
c[:duplicate_window] = c[:duplicate_window].to_i * NANO if c[:duplicate_window]
|
|
38
40
|
c[:subjects] = c[:subjects].map { |s| format(s, name: name) } if c[:subjects]
|
|
41
|
+
|
|
42
|
+
next unless type == :jobs # Every jobs stream supports NATS 2.14 message scheduling.
|
|
43
|
+
|
|
44
|
+
c[:allow_msg_schedules] = true
|
|
45
|
+
cron_subject = "#{API::Cron::Entry::SUBJECT_PREFIX}.#{name}.>"
|
|
46
|
+
c[:subjects] = Array(c[:subjects])
|
|
47
|
+
c[:subjects] << cron_subject unless c[:subjects].include?(cron_subject)
|
|
39
48
|
end
|
|
40
49
|
end
|
|
41
50
|
end
|
|
@@ -59,45 +68,18 @@ module Cosmo
|
|
|
59
68
|
@instance ||= new
|
|
60
69
|
end
|
|
61
70
|
|
|
62
|
-
def self.
|
|
63
|
-
@
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def initialize
|
|
67
|
-
@config = nil
|
|
68
|
-
@system = {}
|
|
69
|
-
@defaults = self.class.parse_file(File.expand_path("defaults.yml", __dir__))
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def [](key)
|
|
73
|
-
dig(key)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def fetch(key, default = nil)
|
|
77
|
-
return @config.fetch(key, default) if @config && Utils::Hash.keys?(@config, key)
|
|
78
|
-
|
|
79
|
-
@defaults.fetch(key, default)
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
def dig(*keys)
|
|
83
|
-
return @config&.dig(*keys) if @config && Utils::Hash.keys?(@config, *keys)
|
|
84
|
-
|
|
85
|
-
@defaults.dig(*keys)
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def to_h
|
|
89
|
-
Utils::Hash.merge(@defaults, @config)
|
|
71
|
+
def self.internal
|
|
72
|
+
@internal ||= {}
|
|
90
73
|
end
|
|
91
74
|
|
|
92
75
|
def set(...)
|
|
93
|
-
|
|
94
|
-
Utils::Hash.set(@config, ...)
|
|
76
|
+
Utils::Hash.set(self, ...)
|
|
95
77
|
end
|
|
96
78
|
|
|
97
79
|
def load(path = nil)
|
|
98
80
|
return unless path
|
|
99
81
|
|
|
100
|
-
|
|
82
|
+
replace(self.class.parse_file(path))
|
|
101
83
|
end
|
|
102
84
|
end
|
|
103
85
|
end
|
data/lib/cosmo/engine.rb
CHANGED
|
@@ -25,7 +25,7 @@ module Cosmo
|
|
|
25
25
|
|
|
26
26
|
def run(type, options)
|
|
27
27
|
handler = Utils::Signal.trap(:INT, :TERM)
|
|
28
|
-
Logger.info "Starting processing, hit Ctrl-C to stop"
|
|
28
|
+
Logger.info "Starting processing, hit Ctrl-C to stop [concurrency=#{@concurrency}]"
|
|
29
29
|
|
|
30
30
|
processor_classes = type && PROCESSORS.key?(type.to_sym) ? [PROCESSORS[type.to_sym]] : PROCESSORS.values
|
|
31
31
|
@processors = processor_classes.map { _1.run(@pool, @running, options) }
|
data/lib/cosmo/job/data.rb
CHANGED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cosmo
|
|
4
|
+
module Job
|
|
5
|
+
# Distributed concurrency limiter backed by NATS Key-Value with per-message TTL.
|
|
6
|
+
#
|
|
7
|
+
# Each unit of concurrency is a numbered KV slot:
|
|
8
|
+
# "{concurrency_key}/0", "{concurrency_key}/1", ..., "{concurrency_key}/{limit-1}"
|
|
9
|
+
#
|
|
10
|
+
# Acquiring a slot is a single atomic `set` (CAS with last-revision=0).
|
|
11
|
+
# Only one worker can win a given slot; losers try the next number.
|
|
12
|
+
# When a job finishes the slot is deleted; if the worker crashes NATS
|
|
13
|
+
# expires it automatically via the per-message Nats-TTL header.
|
|
14
|
+
class Limit
|
|
15
|
+
BUCKET = "cosmo_jobs_limits"
|
|
16
|
+
|
|
17
|
+
def self.instance
|
|
18
|
+
@instance ||= new
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@kv = API::KV.new(BUCKET, allow_msg_ttl: true)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Try to acquire one of the numbered slots for +key+.
|
|
26
|
+
#
|
|
27
|
+
# @param key [String] concurrency key
|
|
28
|
+
# @param jid [String] stored as the slot value for observability
|
|
29
|
+
# @param limit [Integer] number of slots (0 … limit-1)
|
|
30
|
+
# @param duration [Integer] seconds before the slot is auto-expired by NATS
|
|
31
|
+
# @return [String, nil] the acquired slot key, or nil when all slots are taken
|
|
32
|
+
def acquire(key, jid:, limit:, duration:)
|
|
33
|
+
0.upto(limit - 1) do |i|
|
|
34
|
+
slot = "#{key}/#{i}"
|
|
35
|
+
@kv.set(slot, jid, ttl: duration)
|
|
36
|
+
return slot
|
|
37
|
+
rescue NATS::KeyValue::KeyWrongLastSequenceError
|
|
38
|
+
next # slot is live, try the next one
|
|
39
|
+
end
|
|
40
|
+
nil # all slots occupied
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Release a previously acquired slot.
|
|
44
|
+
def release(slot)
|
|
45
|
+
@kv.delete(slot)
|
|
46
|
+
rescue NATS::Error
|
|
47
|
+
# best effort — slot TTL will reclaim it if delete fails
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
data/lib/cosmo/job/processor.rb
CHANGED
|
@@ -1,75 +1,36 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "timeout"
|
|
4
|
+
|
|
3
5
|
module Cosmo
|
|
4
6
|
module Job
|
|
5
|
-
class Processor < ::Cosmo::Processor
|
|
6
|
-
def initialize(pool, running, options)
|
|
7
|
-
super
|
|
8
|
-
@weights = []
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
def stop(timeout = Config[:timeout])
|
|
12
|
-
@running.make_false
|
|
13
|
-
@pool.shutdown
|
|
14
|
-
@consumers.each { |(s, _)| s.unsubscribe rescue nil }
|
|
15
|
-
@pool.wait_for_termination(timeout)
|
|
16
|
-
[@work_thread, @schedule_thread].compact.each { _1.join(timeout) || _1.kill }
|
|
17
|
-
@consumers.clear
|
|
18
|
-
end
|
|
19
|
-
|
|
7
|
+
class Processor < ::Cosmo::Processor
|
|
20
8
|
private
|
|
21
9
|
|
|
22
|
-
def run_loop
|
|
23
|
-
@work_thread = Thread.new { work_loop }
|
|
24
|
-
@schedule_thread = Thread.new { schedule_loop }
|
|
25
|
-
end
|
|
26
|
-
|
|
27
10
|
def setup
|
|
11
|
+
# Initialize singletons before starting to process messages
|
|
12
|
+
API::Busy.instance
|
|
13
|
+
API::Counter.instance
|
|
14
|
+
Limit.instance
|
|
15
|
+
|
|
28
16
|
jobs_config = Config.dig(:consumers, :jobs)
|
|
29
17
|
jobs_config&.each do |stream_name, config|
|
|
30
|
-
|
|
31
|
-
consumer_name = "consumer-#{stream_name}"
|
|
32
|
-
subject = config.delete(:subject)
|
|
33
|
-
priority = config.delete(:priority)
|
|
34
|
-
@weights += ([stream_name] * priority.to_i) if priority
|
|
35
|
-
subscription = client.subscribe(subject, consumer_name, config)
|
|
36
|
-
@consumers << [subscription, stream_name]
|
|
37
|
-
end
|
|
38
|
-
end
|
|
18
|
+
next if stream_name == :scheduled # scheduled jobs are handled in schedule_loop
|
|
39
19
|
|
|
40
|
-
|
|
41
|
-
shutdown = false
|
|
42
|
-
|
|
43
|
-
while running?
|
|
44
|
-
break if shutdown
|
|
45
|
-
|
|
46
|
-
@weights.shuffle.each do |stream_name|
|
|
47
|
-
break unless running?
|
|
48
|
-
|
|
49
|
-
begin
|
|
50
|
-
timeout = ENV.fetch("COSMO_JOBS_FETCH_TIMEOUT", 0.1).to_f
|
|
51
|
-
@pool.post do
|
|
52
|
-
subscription = @consumers.find { |(_, sn)| sn == stream_name }&.first
|
|
53
|
-
messages = lock(stream_name) { fetch(subscription, batch_size: 1, timeout:) }
|
|
54
|
-
process(messages) if messages&.any?
|
|
55
|
-
end
|
|
56
|
-
rescue Concurrent::RejectedExecutionError
|
|
57
|
-
shutdown = true
|
|
58
|
-
break # pool doesn't accept new jobs, we are shutting down
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
break unless running?
|
|
62
|
-
end
|
|
20
|
+
@consumers << subscribe(stream_name, config)
|
|
63
21
|
end
|
|
64
22
|
end
|
|
65
23
|
|
|
66
24
|
def schedule_loop # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize
|
|
25
|
+
config = Config.dig(:consumers, :jobs, :scheduled)
|
|
26
|
+
return unless config
|
|
27
|
+
|
|
28
|
+
subscription, = subscribe(:scheduled, config)
|
|
67
29
|
while running?
|
|
68
30
|
break unless running?
|
|
69
31
|
|
|
70
32
|
now = Time.now.to_i
|
|
71
33
|
timeout = ENV.fetch("COSMO_JOBS_SCHEDULER_FETCH_TIMEOUT", 5).to_f
|
|
72
|
-
subscription = @consumers.find { |(_, sn)| sn == :scheduled }&.first
|
|
73
34
|
messages = fetch(subscription, batch_size: 100, timeout:)
|
|
74
35
|
messages&.each do |message|
|
|
75
36
|
headers = message.header.except("X-Stream", "X-Subject", "X-Execute-At", "Nats-Expected-Stream")
|
|
@@ -90,7 +51,7 @@ module Cosmo
|
|
|
90
51
|
end
|
|
91
52
|
end
|
|
92
53
|
|
|
93
|
-
def process(messages) # rubocop:disable Metrics/MethodLength, Metrics/
|
|
54
|
+
def process(messages, _) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
94
55
|
message = messages.first
|
|
95
56
|
Logger.debug "received messages #{messages.inspect}"
|
|
96
57
|
data = Utils::Json.parse(message.data)
|
|
@@ -107,30 +68,67 @@ module Cosmo
|
|
|
107
68
|
return
|
|
108
69
|
end
|
|
109
70
|
|
|
71
|
+
if worker_class.limits_concurrency?
|
|
72
|
+
slot = acquire_concurrency_slot(worker_class, message, data)
|
|
73
|
+
return if slot == false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
duration = worker_class.default_options[:limit]&.dig(:duration)&.to_i
|
|
77
|
+
|
|
110
78
|
with_stats(message) do
|
|
111
79
|
sw = stopwatch
|
|
112
80
|
Logger.with(jid: data[:jid])
|
|
113
81
|
Logger.info "start"
|
|
114
82
|
instance = worker_class.new
|
|
115
83
|
instance.jid = data[:jid]
|
|
116
|
-
|
|
84
|
+
if duration
|
|
85
|
+
Timeout.timeout(duration) { instance.perform(*data[:args]) }
|
|
86
|
+
else
|
|
87
|
+
instance.perform(*data[:args])
|
|
88
|
+
end
|
|
117
89
|
message.ack
|
|
118
90
|
Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "done" }
|
|
119
91
|
true
|
|
92
|
+
rescue Timeout::Error
|
|
93
|
+
Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail[timeout]" }
|
|
94
|
+
dropped = handle_failure(message, data)
|
|
95
|
+
false if dropped
|
|
120
96
|
rescue StandardError => e
|
|
121
97
|
Logger.debug e
|
|
122
|
-
Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail" }
|
|
98
|
+
Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail[error]" }
|
|
123
99
|
dropped = handle_failure(message, data)
|
|
124
100
|
false if dropped
|
|
125
101
|
rescue Exception # rubocop:disable Lint/RescueException
|
|
126
|
-
Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail" }
|
|
102
|
+
Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail[exception]" }
|
|
127
103
|
raise
|
|
128
104
|
end
|
|
129
105
|
ensure
|
|
106
|
+
Limit.instance.release(slot) if slot
|
|
130
107
|
Logger.without(:jid)
|
|
131
108
|
Logger.debug "processed message #{message.inspect}"
|
|
132
109
|
end
|
|
133
110
|
|
|
111
|
+
# Tries to acquire a concurrency slot for the job.
|
|
112
|
+
# Returns the slot key (String) on success, or false if all slots are
|
|
113
|
+
# taken (message is NAK'd with a delay equal to +duration+ before returning).
|
|
114
|
+
def acquire_concurrency_slot(worker_class, message, data)
|
|
115
|
+
options = worker_class.concurrency_options
|
|
116
|
+
key = worker_class.concurrency_key(data[:args])
|
|
117
|
+
|
|
118
|
+
slot = Limit.instance.acquire(key, jid: data[:jid], limit: options[:limit], duration: options[:duration])
|
|
119
|
+
return slot if slot
|
|
120
|
+
|
|
121
|
+
message.nak(delay: options[:duration] * Config::NANO)
|
|
122
|
+
Logger.debug "concurrency limit reached for #{data[:class]}, re-queueing back #{data[:jid]}"
|
|
123
|
+
false
|
|
124
|
+
rescue NATS::Error => e
|
|
125
|
+
# Unexpected KV failure (e.g. transient NATS error). NAK immediately so
|
|
126
|
+
# the message is retried rather than stuck in-flight until ack_wait expires.
|
|
127
|
+
Logger.error e
|
|
128
|
+
message.nak
|
|
129
|
+
false
|
|
130
|
+
end
|
|
131
|
+
|
|
134
132
|
def handle_failure(message, data) # rubocop:disable Naming/PredicateMethod
|
|
135
133
|
current_attempt = message.metadata.num_delivered
|
|
136
134
|
max_retries = data[:retry].to_i + 1
|
|
@@ -139,7 +137,7 @@ module Cosmo
|
|
|
139
137
|
# NATS will auto-retry with delay (exponential backoff based on current attempt).
|
|
140
138
|
# When max_deliver is reached, NATS stops redelivering the message and marks it as "max deliveries exceeded".
|
|
141
139
|
# The message is effectively abandoned by NATS — it stays in the stream (consuming a slot) but will never be delivered again to that consumer.
|
|
142
|
-
delay_ns = ((current_attempt**4) + 15) *
|
|
140
|
+
delay_ns = ((current_attempt**4) + 15) * Config::NANO
|
|
143
141
|
message.nak(delay: delay_ns)
|
|
144
142
|
return false
|
|
145
143
|
end
|
|
@@ -148,6 +146,15 @@ module Cosmo
|
|
|
148
146
|
true
|
|
149
147
|
end
|
|
150
148
|
|
|
149
|
+
def subscribe(stream_name, config)
|
|
150
|
+
config = config.dup
|
|
151
|
+
config[:batch_size] = 1
|
|
152
|
+
config[:stream] = stream_name
|
|
153
|
+
consumer_name = "consumer-#{stream_name}"
|
|
154
|
+
subscription = client.subscribe(config[:subject], consumer_name, config.except(:subject, :priority, :stream, :batch_size))
|
|
155
|
+
[subscription, config, nil]
|
|
156
|
+
end
|
|
157
|
+
|
|
151
158
|
def drop_message(message, data)
|
|
152
159
|
message.term
|
|
153
160
|
Logger.debug "job dropped #{data[:jid]}"
|
|
@@ -161,16 +168,28 @@ module Cosmo
|
|
|
161
168
|
Logger.debug "job moved #{data&.dig(:jid)} to DLQ"
|
|
162
169
|
end
|
|
163
170
|
|
|
171
|
+
def scheduler?
|
|
172
|
+
true
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def consumers
|
|
176
|
+
@weights ||= @consumers.filter_map { |(_, c, _)| [c[:stream]] * [c[:priority].to_i, 1].max }.flatten
|
|
177
|
+
@weights.shuffle.map { |s| @consumers.find { |(_, c, _)| c[:stream] == s } }
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def fetch_subjects(config)
|
|
181
|
+
config[:subject]
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def fetch_timeout(_config)
|
|
185
|
+
ENV.fetch("COSMO_JOBS_FETCH_TIMEOUT", 0.1).to_f
|
|
186
|
+
end
|
|
187
|
+
|
|
164
188
|
def with_stats(message, &block)
|
|
165
189
|
API::Busy.instance.with(message) do
|
|
166
190
|
API::Counter.instance.with(&block)
|
|
167
191
|
end
|
|
168
192
|
end
|
|
169
|
-
|
|
170
|
-
def lock(stream_name, &)
|
|
171
|
-
@mutexes ||= Hash.new { |h, k| h[k] = Mutex.new }
|
|
172
|
-
@mutexes[stream_name].synchronize(&)
|
|
173
|
-
end
|
|
174
193
|
end
|
|
175
194
|
end
|
|
176
195
|
end
|
data/lib/cosmo/job.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "cosmo/job/data"
|
|
4
|
+
require "cosmo/job/limit"
|
|
4
5
|
require "cosmo/job/processor"
|
|
5
6
|
|
|
6
7
|
module Cosmo
|
|
@@ -10,8 +11,56 @@ module Cosmo
|
|
|
10
11
|
end
|
|
11
12
|
|
|
12
13
|
module ClassMethods
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
# @option config [Symbol] :stream NATS stream to publish to (default: :default)
|
|
15
|
+
# @option config [Integer] :retry max delivery attempts before giving up (default: 3)
|
|
16
|
+
# @option config [Boolean] :dead move to dead-letter stream after retries exhausted (default: true)
|
|
17
|
+
# @option config [Hash] :limit execution limits:
|
|
18
|
+
#
|
|
19
|
+
# limit: { duration: 30 }
|
|
20
|
+
# limit: { duration: 30, concurrency: 3 }
|
|
21
|
+
# limit: { duration: 30, concurrency: { to: 3, key: ->(id) { id } } }
|
|
22
|
+
#
|
|
23
|
+
# @option config [Integer] :"limit[:duration]" hard execution timeout in seconds. The job thread is
|
|
24
|
+
# killed after this many seconds and counts as a failed attempt (retried with exponential backoff,
|
|
25
|
+
# moved to DLQ after retries exhausted).
|
|
26
|
+
# @option config [Integer, Hash] :"limit[:concurrency]" caps how many instances run at once across all
|
|
27
|
+
# workers. Jobs that cannot acquire a slot are NAK'd with a delay equal to +duration+ so they are not
|
|
28
|
+
# re-delivered until the slot is guaranteed free. Requires +duration+.
|
|
29
|
+
# Pass an Integer for a class-wide cap, or <tt>{ to: N, key: ->(args) {} }</tt> to scope per key.
|
|
30
|
+
def options(**config)
|
|
31
|
+
if config[:limit] && config.dig(:limit, :concurrency) && !config.dig(:limit, :duration).to_i.positive?
|
|
32
|
+
raise ArgumentError, "limit: duration is required when concurrency is set"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
default_options.merge!(config)
|
|
36
|
+
end
|
|
37
|
+
alias cosmo_options options
|
|
38
|
+
|
|
39
|
+
def limits_concurrency?
|
|
40
|
+
!!concurrency_options
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Returns a normalized concurrency config hash, or +nil+ when not configured.
|
|
44
|
+
# Always contains +:limit+, +:key+, and +:duration+.
|
|
45
|
+
def concurrency_options
|
|
46
|
+
value = default_options.dig(:limit, :concurrency)
|
|
47
|
+
duration = default_options.dig(:limit, :duration).to_i
|
|
48
|
+
return unless value
|
|
49
|
+
|
|
50
|
+
case value
|
|
51
|
+
when Integer then { limit: value, key: nil, duration: duration }
|
|
52
|
+
when Hash then { limit: value.fetch(:to), key: value[:key], duration: duration }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Derive the fully-scoped concurrency key for a given args array.
|
|
57
|
+
def concurrency_key(args)
|
|
58
|
+
config = concurrency_options
|
|
59
|
+
return unless config
|
|
60
|
+
|
|
61
|
+
base = Utils::String.underscore(name)
|
|
62
|
+
suffix = config[:key]&.call(*args)
|
|
63
|
+
suffix ? "#{base}/#{suffix}" : base
|
|
15
64
|
end
|
|
16
65
|
|
|
17
66
|
def perform(*args, async: true, **options)
|
data/lib/cosmo/logger.rb
CHANGED
|
@@ -60,7 +60,10 @@ module Cosmo
|
|
|
60
60
|
end
|
|
61
61
|
|
|
62
62
|
def self.instance
|
|
63
|
-
@instance ||= ::Logger.new($stdout).tap
|
|
63
|
+
@instance ||= ::Logger.new($stdout).tap do |logger|
|
|
64
|
+
logger.formatter = SimpleFormatter.new
|
|
65
|
+
logger.level = ::Logger::Severity.coerce(ENV.fetch("COSMO_LOG_LEVEL", "info"))
|
|
66
|
+
end
|
|
64
67
|
end
|
|
65
68
|
|
|
66
69
|
def self.instance=(logger)
|