cosmonats 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +300 -187
  3. data/lib/cosmo/active_job/adapter.rb +46 -0
  4. data/lib/cosmo/active_job/executor.rb +16 -0
  5. data/lib/cosmo/active_job/options.rb +50 -0
  6. data/lib/cosmo/active_job.rb +29 -0
  7. data/lib/cosmo/api/busy.rb +2 -2
  8. data/lib/cosmo/api/counter.rb +2 -2
  9. data/lib/cosmo/api/cron/entry.rb +99 -0
  10. data/lib/cosmo/api/cron.rb +118 -0
  11. data/lib/cosmo/api/kv.rb +36 -14
  12. data/lib/cosmo/api/stream.rb +27 -9
  13. data/lib/cosmo/api.rb +1 -0
  14. data/lib/cosmo/cli.rb +27 -9
  15. data/lib/cosmo/client.rb +75 -5
  16. data/lib/cosmo/config.rb +14 -32
  17. data/lib/cosmo/engine.rb +1 -1
  18. data/lib/cosmo/job/data.rb +1 -1
  19. data/lib/cosmo/job/limit.rb +51 -0
  20. data/lib/cosmo/job/processor.rb +82 -63
  21. data/lib/cosmo/job.rb +51 -2
  22. data/lib/cosmo/logger.rb +4 -1
  23. data/lib/cosmo/processor.rb +108 -0
  24. data/lib/cosmo/railtie.rb +21 -0
  25. data/lib/cosmo/stream/processor.rb +24 -60
  26. data/lib/cosmo/stream.rb +4 -3
  27. data/lib/cosmo/utils/hash.rb +13 -24
  28. data/lib/cosmo/utils/overrides.rb +1 -1
  29. data/lib/cosmo/utils/ttl_cache.rb +44 -0
  30. data/lib/cosmo/utils.rb +1 -0
  31. data/lib/cosmo/version.rb +1 -1
  32. data/lib/cosmo/web/assets/app.css +88 -0
  33. data/lib/cosmo/web/controllers/crons.rb +41 -0
  34. data/lib/cosmo/web/controllers/jobs.rb +7 -3
  35. data/lib/cosmo/web/controllers/streams.rb +36 -10
  36. data/lib/cosmo/web/helpers/application.rb +17 -2
  37. data/lib/cosmo/web/views/actions/index.erb +1 -1
  38. data/lib/cosmo/web/views/crons/_table.erb +58 -0
  39. data/lib/cosmo/web/views/crons/index.erb +10 -0
  40. data/lib/cosmo/web/views/jobs/_busy.erb +54 -49
  41. data/lib/cosmo/web/views/jobs/_dead.erb +70 -65
  42. data/lib/cosmo/web/views/jobs/_enqueued.erb +82 -56
  43. data/lib/cosmo/web/views/jobs/_scheduled.erb +53 -48
  44. data/lib/cosmo/web/views/jobs/_tabs.erb +6 -0
  45. data/lib/cosmo/web/views/jobs/busy.erb +8 -6
  46. data/lib/cosmo/web/views/jobs/dead.erb +6 -5
  47. data/lib/cosmo/web/views/jobs/enqueued.erb +8 -6
  48. data/lib/cosmo/web/views/jobs/index.erb +1 -1
  49. data/lib/cosmo/web/views/jobs/scheduled.erb +6 -5
  50. data/lib/cosmo/web/views/layout.erb +1 -1
  51. data/lib/cosmo/web/views/streams/_info.erb +3 -0
  52. data/lib/cosmo/web/views/streams/_pause_banner.erb +17 -0
  53. data/lib/cosmo/web/views/streams/_stream_row.erb +42 -0
  54. data/lib/cosmo/web/views/streams/_table.erb +4 -21
  55. data/lib/cosmo/web.rb +7 -0
  56. data/lib/cosmo.rb +1 -0
  57. data/sig/cosmo/active_job/adapter.rbs +13 -0
  58. data/sig/cosmo/active_job/executor.rbs +9 -0
  59. data/sig/cosmo/active_job/options.rbs +14 -0
  60. data/sig/cosmo/api/cron/entry.rbs +30 -0
  61. data/sig/cosmo/api/cron.rbs +25 -0
  62. data/sig/cosmo/api/kv.rbs +4 -6
  63. data/sig/cosmo/api/stream.rbs +7 -1
  64. data/sig/cosmo/client.rbs +20 -4
  65. data/sig/cosmo/config.rbs +3 -15
  66. data/sig/cosmo/job/data.rbs +1 -1
  67. data/sig/cosmo/job/limit.rbs +18 -0
  68. data/sig/cosmo/job/processor.rbs +19 -9
  69. data/sig/cosmo/job.rbs +9 -4
  70. data/sig/cosmo/processor.rbs +26 -0
  71. data/sig/cosmo/railtie.rbs +4 -0
  72. data/sig/cosmo/stream/processor.rbs +4 -10
  73. data/sig/cosmo/utils/hash.rbs +4 -8
  74. data/sig/cosmo/utils/ttl_cache.rbs +20 -0
  75. metadata +25 -3
  76. data/lib/cosmo/defaults.yml +0 -70
data/lib/cosmo/client.rb CHANGED
@@ -38,18 +38,63 @@ module Cosmo
38
38
  js.delete_stream(name, params)
39
39
  end
40
40
 
41
+ def update_stream(name, config)
42
+ js.update_stream(name: name, **config)
43
+ end
44
+
45
+ # Create/update a stream, falling back to create when there's no stream.
46
+ # @param name [String] Stream name
47
+ # @param config [Hash] Full desired stream configuration
48
+ def setup_stream(name, config)
49
+ update_stream(name, config)
50
+ rescue NATS::JetStream::Error::StreamNotFound
51
+ create_stream(name, config)
52
+ end
53
+
54
+ # Return all subjects in +stream_name+ that match +filter+ using NATS's
55
+ # subjects_filter on STREAM.INFO (requires NATS ≥ 2.9).
56
+ # @return [Array<String>]
57
+ def cron_subjects_in_stream(stream_name, filter)
58
+ payload = Utils::Json.dump({ subjects_filter: filter })
59
+ resp = nc.request("$JS.API.STREAM.INFO.#{stream_name}", payload)
60
+ data = Utils::Json.parse(resp.data, symbolize_names: false)
61
+ (data&.dig("state", "subjects") || {}).keys
62
+ rescue StandardError
63
+ []
64
+ end
65
+
41
66
  def list_streams
42
67
  response = nc.request("$JS.API.STREAM.LIST", "")
43
68
  data = Utils::Json.parse(response.data, symbolize_names: false)
44
69
  return [] if data.nil? || data["streams"].nil?
45
70
 
46
- data["streams"].filter_map { _1.dig("config", "name") }
71
+ data["streams"]
72
+ end
73
+
74
+ def pause_stream(name)
75
+ config = stream_info(name).config.to_h
76
+ config[:metadata] ||= {}
77
+ config[:metadata][:"_cosmo.paused"] = "true"
78
+ update_stream(name, config)
79
+ end
80
+
81
+ def unpause_stream(name)
82
+ config = stream_info(name).config.to_h
83
+ config[:metadata] ||= {}
84
+ config[:metadata].delete(:"_cosmo.paused")
85
+ update_stream(name, config)
86
+ end
87
+
88
+ def stream_paused?(name)
89
+ stream_info(name).config.metadata&.[](:"_cosmo.paused") == "true"
90
+ rescue NATS::IO::Timeout
91
+ false
47
92
  end
48
93
 
49
94
  def list_consumers(stream_name)
50
95
  response = nc.request("$JS.API.CONSUMER.LIST.#{stream_name}", "")
51
- data = Utils::Json.parse(response.data, symbolize_names: false)
52
- data["consumers"]
96
+ data = Utils::Json.parse(response.data, default: {}, symbolize_names: false)
97
+ Array(data["consumers"])
53
98
  end
54
99
 
55
100
  def consumer_info(stream_name, consumer_name)
@@ -74,14 +119,39 @@ module Cosmo
74
119
  result["purged"] # number of messages purged
75
120
  end
76
121
 
77
- def kv(name, **options)
122
+ def kv(name, allow_msg_ttl: false, **options)
78
123
  js.key_value(name)
79
124
  rescue NATS::KeyValue::BucketNotFoundError
80
- js.create_key_value({ bucket: name }.merge(options))
125
+ allow_msg_ttl ? create_kv_with_msg_ttl(name, **options) : js.create_key_value({ bucket: name }.merge(options))
81
126
  end
82
127
 
83
128
  def close
84
129
  nc.close
85
130
  end
131
+
132
+ private
133
+
134
+ # NOTE: KV manager in nats-pure hardcodes the fields it copies into StreamConfig,
135
+ # so `allow_msg_ttl` is never forwarded via create_key_value. Send the raw stream-create API request instead.
136
+ def create_kv_with_msg_ttl(name, **options)
137
+ payload = Utils::Json.dump({
138
+ name: "KV_#{name}",
139
+ subjects: ["$KV.#{name}.>"],
140
+ storage: "file",
141
+ allow_direct: true,
142
+ allow_msg_ttl: true,
143
+ allow_rollup_hdrs: true,
144
+ max_msgs_per_subject: 1
145
+ }.merge(options))
146
+ resp = nc.request("$JS.API.STREAM.CREATE.KV_#{name}", payload)
147
+ result = Utils::Json.parse(resp.data, symbolize_names: false)
148
+ if result&.dig("error")
149
+ msg = result.dig("error", "description").to_s
150
+ # Two worker processes starting simultaneously can both attempt creation.
151
+ # If another process won the race, fall back to looking up the existing bucket.
152
+ raise NATS::JetStream::Error, msg unless msg.match?(/already in use|already exists/i)
153
+ end
154
+ js.key_value(name)
155
+ end
86
156
  end
87
157
  end
data/lib/cosmo/config.rb CHANGED
@@ -4,7 +4,7 @@ require "yaml"
4
4
  require "forwardable"
5
5
 
6
6
  module Cosmo
7
- class Config
7
+ class Config < ::Hash
8
8
  NANO = 1_000_000_000
9
9
  DEFAULT_PATH = "config/cosmo.yml"
10
10
 
@@ -31,11 +31,20 @@ module Cosmo
31
31
  end
32
32
 
33
33
  config[:setup]&.each_key do |type|
34
+ next if type == :cron
35
+
34
36
  config[:setup][type]&.each_key do |name|
35
37
  c = config[:setup][type][name]
36
38
  c[:max_age] = c[:max_age].to_i * NANO if c[:max_age]
37
39
  c[:duplicate_window] = c[:duplicate_window].to_i * NANO if c[:duplicate_window]
38
40
  c[:subjects] = c[:subjects].map { |s| format(s, name: name) } if c[:subjects]
41
+
42
+ next unless type == :jobs # Every jobs stream supports NATS 2.14 message scheduling.
43
+
44
+ c[:allow_msg_schedules] = true
45
+ cron_subject = "#{API::Cron::Entry::SUBJECT_PREFIX}.#{name}.>"
46
+ c[:subjects] = Array(c[:subjects])
47
+ c[:subjects] << cron_subject unless c[:subjects].include?(cron_subject)
39
48
  end
40
49
  end
41
50
  end
@@ -59,45 +68,18 @@ module Cosmo
59
68
  @instance ||= new
60
69
  end
61
70
 
62
- def self.system
63
- @system ||= {}
64
- end
65
-
66
- def initialize
67
- @config = nil
68
- @system = {}
69
- @defaults = self.class.parse_file(File.expand_path("defaults.yml", __dir__))
70
- end
71
-
72
- def [](key)
73
- dig(key)
74
- end
75
-
76
- def fetch(key, default = nil)
77
- return @config.fetch(key, default) if @config && Utils::Hash.keys?(@config, key)
78
-
79
- @defaults.fetch(key, default)
80
- end
81
-
82
- def dig(*keys)
83
- return @config&.dig(*keys) if @config && Utils::Hash.keys?(@config, *keys)
84
-
85
- @defaults.dig(*keys)
86
- end
87
-
88
- def to_h
89
- Utils::Hash.merge(@defaults, @config)
71
+ def self.internal
72
+ @internal ||= {}
90
73
  end
91
74
 
92
75
  def set(...)
93
- @config ||= {}
94
- Utils::Hash.set(@config, ...)
76
+ Utils::Hash.set(self, ...)
95
77
  end
96
78
 
97
79
  def load(path = nil)
98
80
  return unless path
99
81
 
100
- @config = self.class.parse_file(path)
82
+ replace(self.class.parse_file(path))
101
83
  end
102
84
  end
103
85
  end
data/lib/cosmo/engine.rb CHANGED
@@ -25,7 +25,7 @@ module Cosmo
25
25
 
26
26
  def run(type, options)
27
27
  handler = Utils::Signal.trap(:INT, :TERM)
28
- Logger.info "Starting processing, hit Ctrl-C to stop"
28
+ Logger.info "Starting processing, hit Ctrl-C to stop [concurrency=#{@concurrency}]"
29
29
 
30
30
  processor_classes = type && PROCESSORS.key?(type.to_sym) ? [PROCESSORS[type.to_sym]] : PROCESSORS.values
31
31
  @processors = processor_classes.map { _1.run(@pool, @running, options) }
@@ -5,7 +5,7 @@ require "json"
5
5
  module Cosmo
6
6
  module Job
7
7
  class Data
8
- DEFAULTS = { stream: :default, retry: 3, dead: true }.freeze
8
+ DEFAULTS = { stream: :default, retry: 3, dead: true, limit: nil }.freeze
9
9
 
10
10
  attr_reader :jid
11
11
 
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cosmo
4
+ module Job
5
+ # Distributed concurrency limiter backed by NATS Key-Value with per-message TTL.
6
+ #
7
+ # Each unit of concurrency is a numbered KV slot:
8
+ # "{concurrency_key}/0", "{concurrency_key}/1", ..., "{concurrency_key}/{limit-1}"
9
+ #
10
+ # Acquiring a slot is a single atomic `set` (CAS with last-revision=0).
11
+ # Only one worker can win a given slot; losers try the next number.
12
+ # When a job finishes the slot is deleted; if the worker crashes NATS
13
+ # expires it automatically via the per-message Nats-TTL header.
14
+ class Limit
15
+ BUCKET = "cosmo_jobs_limits"
16
+
17
+ def self.instance
18
+ @instance ||= new
19
+ end
20
+
21
+ def initialize
22
+ @kv = API::KV.new(BUCKET, allow_msg_ttl: true)
23
+ end
24
+
25
+ # Try to acquire one of the numbered slots for +key+.
26
+ #
27
+ # @param key [String] concurrency key
28
+ # @param jid [String] stored as the slot value for observability
29
+ # @param limit [Integer] number of slots (0 … limit-1)
30
+ # @param duration [Integer] seconds before the slot is auto-expired by NATS
31
+ # @return [String, nil] the acquired slot key, or nil when all slots are taken
32
+ def acquire(key, jid:, limit:, duration:)
33
+ 0.upto(limit - 1) do |i|
34
+ slot = "#{key}/#{i}"
35
+ @kv.set(slot, jid, ttl: duration)
36
+ return slot
37
+ rescue NATS::KeyValue::KeyWrongLastSequenceError
38
+ next # slot is live, try the next one
39
+ end
40
+ nil # all slots occupied
41
+ end
42
+
43
+ # Release a previously acquired slot.
44
+ def release(slot)
45
+ @kv.delete(slot)
46
+ rescue NATS::Error
47
+ # best effort — slot TTL will reclaim it if delete fails
48
+ end
49
+ end
50
+ end
51
+ end
@@ -1,75 +1,36 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "timeout"
4
+
3
5
  module Cosmo
4
6
  module Job
5
- class Processor < ::Cosmo::Processor # rubocop:disable Metrics/ClassLength
6
- def initialize(pool, running, options)
7
- super
8
- @weights = []
9
- end
10
-
11
- def stop(timeout = Config[:timeout])
12
- @running.make_false
13
- @pool.shutdown
14
- @consumers.each { |(s, _)| s.unsubscribe rescue nil }
15
- @pool.wait_for_termination(timeout)
16
- [@work_thread, @schedule_thread].compact.each { _1.join(timeout) || _1.kill }
17
- @consumers.clear
18
- end
19
-
7
+ class Processor < ::Cosmo::Processor
20
8
  private
21
9
 
22
- def run_loop
23
- @work_thread = Thread.new { work_loop }
24
- @schedule_thread = Thread.new { schedule_loop }
25
- end
26
-
27
10
  def setup
11
+ # Initialize singletons before starting to process messages
12
+ API::Busy.instance
13
+ API::Counter.instance
14
+ Limit.instance
15
+
28
16
  jobs_config = Config.dig(:consumers, :jobs)
29
17
  jobs_config&.each do |stream_name, config|
30
- config = config.dup
31
- consumer_name = "consumer-#{stream_name}"
32
- subject = config.delete(:subject)
33
- priority = config.delete(:priority)
34
- @weights += ([stream_name] * priority.to_i) if priority
35
- subscription = client.subscribe(subject, consumer_name, config)
36
- @consumers << [subscription, stream_name]
37
- end
38
- end
18
+ next if stream_name == :scheduled # scheduled jobs are handled in schedule_loop
39
19
 
40
- def work_loop # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize
41
- shutdown = false
42
-
43
- while running?
44
- break if shutdown
45
-
46
- @weights.shuffle.each do |stream_name|
47
- break unless running?
48
-
49
- begin
50
- timeout = ENV.fetch("COSMO_JOBS_FETCH_TIMEOUT", 0.1).to_f
51
- @pool.post do
52
- subscription = @consumers.find { |(_, sn)| sn == stream_name }&.first
53
- messages = lock(stream_name) { fetch(subscription, batch_size: 1, timeout:) }
54
- process(messages) if messages&.any?
55
- end
56
- rescue Concurrent::RejectedExecutionError
57
- shutdown = true
58
- break # pool doesn't accept new jobs, we are shutting down
59
- end
60
-
61
- break unless running?
62
- end
20
+ @consumers << subscribe(stream_name, config)
63
21
  end
64
22
  end
65
23
 
66
24
  def schedule_loop # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize
25
+ config = Config.dig(:consumers, :jobs, :scheduled)
26
+ return unless config
27
+
28
+ subscription, = subscribe(:scheduled, config)
67
29
  while running?
68
30
  break unless running?
69
31
 
70
32
  now = Time.now.to_i
71
33
  timeout = ENV.fetch("COSMO_JOBS_SCHEDULER_FETCH_TIMEOUT", 5).to_f
72
- subscription = @consumers.find { |(_, sn)| sn == :scheduled }&.first
73
34
  messages = fetch(subscription, batch_size: 100, timeout:)
74
35
  messages&.each do |message|
75
36
  headers = message.header.except("X-Stream", "X-Subject", "X-Execute-At", "Nats-Expected-Stream")
@@ -90,7 +51,7 @@ module Cosmo
90
51
  end
91
52
  end
92
53
 
93
- def process(messages) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
54
+ def process(messages, _) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
94
55
  message = messages.first
95
56
  Logger.debug "received messages #{messages.inspect}"
96
57
  data = Utils::Json.parse(message.data)
@@ -107,30 +68,67 @@ module Cosmo
107
68
  return
108
69
  end
109
70
 
71
+ if worker_class.limits_concurrency?
72
+ slot = acquire_concurrency_slot(worker_class, message, data)
73
+ return if slot == false
74
+ end
75
+
76
+ duration = worker_class.default_options[:limit]&.dig(:duration)&.to_i
77
+
110
78
  with_stats(message) do
111
79
  sw = stopwatch
112
80
  Logger.with(jid: data[:jid])
113
81
  Logger.info "start"
114
82
  instance = worker_class.new
115
83
  instance.jid = data[:jid]
116
- instance.perform(*data[:args])
84
+ if duration
85
+ Timeout.timeout(duration) { instance.perform(*data[:args]) }
86
+ else
87
+ instance.perform(*data[:args])
88
+ end
117
89
  message.ack
118
90
  Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "done" }
119
91
  true
92
+ rescue Timeout::Error
93
+ Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail[timeout]" }
94
+ dropped = handle_failure(message, data)
95
+ false if dropped
120
96
  rescue StandardError => e
121
97
  Logger.debug e
122
- Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail" }
98
+ Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail[error]" }
123
99
  dropped = handle_failure(message, data)
124
100
  false if dropped
125
101
  rescue Exception # rubocop:disable Lint/RescueException
126
- Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail" }
102
+ Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail[exception]" }
127
103
  raise
128
104
  end
129
105
  ensure
106
+ Limit.instance.release(slot) if slot
130
107
  Logger.without(:jid)
131
108
  Logger.debug "processed message #{message.inspect}"
132
109
  end
133
110
 
111
+ # Tries to acquire a concurrency slot for the job.
112
+ # Returns the slot key (String) on success, or false if all slots are
113
+ # taken (message is NAK'd with a delay equal to +duration+ before returning).
114
+ def acquire_concurrency_slot(worker_class, message, data)
115
+ options = worker_class.concurrency_options
116
+ key = worker_class.concurrency_key(data[:args])
117
+
118
+ slot = Limit.instance.acquire(key, jid: data[:jid], limit: options[:limit], duration: options[:duration])
119
+ return slot if slot
120
+
121
+ message.nak(delay: options[:duration] * Config::NANO)
122
+ Logger.debug "concurrency limit reached for #{data[:class]}, re-queueing back #{data[:jid]}"
123
+ false
124
+ rescue NATS::Error => e
125
+ # Unexpected KV failure (e.g. transient NATS error). NAK immediately so
126
+ # the message is retried rather than stuck in-flight until ack_wait expires.
127
+ Logger.error e
128
+ message.nak
129
+ false
130
+ end
131
+
134
132
  def handle_failure(message, data) # rubocop:disable Naming/PredicateMethod
135
133
  current_attempt = message.metadata.num_delivered
136
134
  max_retries = data[:retry].to_i + 1
@@ -139,7 +137,7 @@ module Cosmo
139
137
  # NATS will auto-retry with delay (exponential backoff based on current attempt).
140
138
  # When max_deliver is reached, NATS stops redelivering the message and marks it as "max deliveries exceeded".
141
139
  # The message is effectively abandoned by NATS — it stays in the stream (consuming a slot) but will never be delivered again to that consumer.
142
- delay_ns = ((current_attempt**4) + 15) * 1_000_000_000
140
+ delay_ns = ((current_attempt**4) + 15) * Config::NANO
143
141
  message.nak(delay: delay_ns)
144
142
  return false
145
143
  end
@@ -148,6 +146,15 @@ module Cosmo
148
146
  true
149
147
  end
150
148
 
149
+ def subscribe(stream_name, config)
150
+ config = config.dup
151
+ config[:batch_size] = 1
152
+ config[:stream] = stream_name
153
+ consumer_name = "consumer-#{stream_name}"
154
+ subscription = client.subscribe(config[:subject], consumer_name, config.except(:subject, :priority, :stream, :batch_size))
155
+ [subscription, config, nil]
156
+ end
157
+
151
158
  def drop_message(message, data)
152
159
  message.term
153
160
  Logger.debug "job dropped #{data[:jid]}"
@@ -161,16 +168,28 @@ module Cosmo
161
168
  Logger.debug "job moved #{data&.dig(:jid)} to DLQ"
162
169
  end
163
170
 
171
+ def scheduler?
172
+ true
173
+ end
174
+
175
+ def consumers
176
+ @weights ||= @consumers.filter_map { |(_, c, _)| [c[:stream]] * [c[:priority].to_i, 1].max }.flatten
177
+ @weights.shuffle.map { |s| @consumers.find { |(_, c, _)| c[:stream] == s } }
178
+ end
179
+
180
+ def fetch_subjects(config)
181
+ config[:subject]
182
+ end
183
+
184
+ def fetch_timeout(_config)
185
+ ENV.fetch("COSMO_JOBS_FETCH_TIMEOUT", 0.1).to_f
186
+ end
187
+
164
188
  def with_stats(message, &block)
165
189
  API::Busy.instance.with(message) do
166
190
  API::Counter.instance.with(&block)
167
191
  end
168
192
  end
169
-
170
- def lock(stream_name, &)
171
- @mutexes ||= Hash.new { |h, k| h[k] = Mutex.new }
172
- @mutexes[stream_name].synchronize(&)
173
- end
174
193
  end
175
194
  end
176
195
  end
data/lib/cosmo/job.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "cosmo/job/data"
4
+ require "cosmo/job/limit"
4
5
  require "cosmo/job/processor"
5
6
 
6
7
  module Cosmo
@@ -10,8 +11,56 @@ module Cosmo
10
11
  end
11
12
 
12
13
  module ClassMethods
13
- def options(stream: nil, retry: nil, dead: nil)
14
- default_options.merge!({ stream:, retry:, dead: }.compact)
14
+ # @option config [Symbol] :stream NATS stream to publish to (default: :default)
15
+ # @option config [Integer] :retry max delivery attempts before giving up (default: 3)
16
+ # @option config [Boolean] :dead move to dead-letter stream after retries exhausted (default: true)
17
+ # @option config [Hash] :limit execution limits:
18
+ #
19
+ # limit: { duration: 30 }
20
+ # limit: { duration: 30, concurrency: 3 }
21
+ # limit: { duration: 30, concurrency: { to: 3, key: ->(id) { id } } }
22
+ #
23
+ # @option config [Integer] :"limit[:duration]" hard execution timeout in seconds. The job thread is
24
+ # killed after this many seconds and counts as a failed attempt (retried with exponential backoff,
25
+ # moved to DLQ after retries exhausted).
26
+ # @option config [Integer, Hash] :"limit[:concurrency]" caps how many instances run at once across all
27
+ # workers. Jobs that cannot acquire a slot are NAK'd with a delay equal to +duration+ so they are not
28
+ # re-delivered until the slot is guaranteed free. Requires +duration+.
29
+ # Pass an Integer for a class-wide cap, or <tt>{ to: N, key: ->(args) {} }</tt> to scope per key.
30
+ def options(**config)
31
+ if config[:limit] && config.dig(:limit, :concurrency) && !config.dig(:limit, :duration).to_i.positive?
32
+ raise ArgumentError, "limit: duration is required when concurrency is set"
33
+ end
34
+
35
+ default_options.merge!(config)
36
+ end
37
+ alias cosmo_options options
38
+
39
+ def limits_concurrency?
40
+ !!concurrency_options
41
+ end
42
+
43
+ # Returns a normalized concurrency config hash, or +nil+ when not configured.
44
+ # Always contains +:limit+, +:key+, and +:duration+.
45
+ def concurrency_options
46
+ value = default_options.dig(:limit, :concurrency)
47
+ duration = default_options.dig(:limit, :duration).to_i
48
+ return unless value
49
+
50
+ case value
51
+ when Integer then { limit: value, key: nil, duration: duration }
52
+ when Hash then { limit: value.fetch(:to), key: value[:key], duration: duration }
53
+ end
54
+ end
55
+
56
+ # Derive the fully-scoped concurrency key for a given args array.
57
+ def concurrency_key(args)
58
+ config = concurrency_options
59
+ return unless config
60
+
61
+ base = Utils::String.underscore(name)
62
+ suffix = config[:key]&.call(*args)
63
+ suffix ? "#{base}/#{suffix}" : base
15
64
  end
16
65
 
17
66
  def perform(*args, async: true, **options)
data/lib/cosmo/logger.rb CHANGED
@@ -60,7 +60,10 @@ module Cosmo
60
60
  end
61
61
 
62
62
  def self.instance
63
- @instance ||= ::Logger.new($stdout).tap { _1.formatter = SimpleFormatter.new }
63
+ @instance ||= ::Logger.new($stdout).tap do |logger|
64
+ logger.formatter = SimpleFormatter.new
65
+ logger.level = ::Logger::Severity.coerce(ENV.fetch("COSMO_LOG_LEVEL", "info"))
66
+ end
64
67
  end
65
68
 
66
69
  def self.instance=(logger)