cosmonats 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -7
  3. data/lib/cosmo/api/busy.rb +66 -0
  4. data/lib/cosmo/api/counter.rb +70 -0
  5. data/lib/cosmo/api/job.rb +46 -0
  6. data/lib/cosmo/api/kv.rb +63 -0
  7. data/lib/cosmo/api/stats.rb +44 -0
  8. data/lib/cosmo/api/stream.rb +110 -0
  9. data/lib/cosmo/api.rb +11 -0
  10. data/lib/cosmo/cli.rb +6 -4
  11. data/lib/cosmo/client.rb +35 -2
  12. data/lib/cosmo/config.rb +8 -6
  13. data/lib/cosmo/defaults.yml +31 -30
  14. data/lib/cosmo/job/processor.rb +58 -19
  15. data/lib/cosmo/job.rb +1 -1
  16. data/lib/cosmo/logger.rb +4 -0
  17. data/lib/cosmo/processor.rb +7 -1
  18. data/lib/cosmo/stream/data.rb +1 -0
  19. data/lib/cosmo/stream/processor.rb +18 -3
  20. data/lib/cosmo/stream.rb +2 -2
  21. data/lib/cosmo/utils/overrides.rb +15 -0
  22. data/lib/cosmo/utils/warnings.rb +17 -0
  23. data/lib/cosmo/utils.rb +14 -0
  24. data/lib/cosmo/version.rb +1 -1
  25. data/lib/cosmo/web/assets/app.css +431 -0
  26. data/lib/cosmo/web/assets/htmx.2.0.8.min.js.gz +0 -0
  27. data/lib/cosmo/web/context.rb +28 -0
  28. data/lib/cosmo/web/controllers/actions.rb +16 -0
  29. data/lib/cosmo/web/controllers/application.rb +43 -0
  30. data/lib/cosmo/web/controllers/jobs.rb +97 -0
  31. data/lib/cosmo/web/controllers/streams.rb +44 -0
  32. data/lib/cosmo/web/helpers/application.rb +76 -0
  33. data/lib/cosmo/web/renderer.rb +58 -0
  34. data/lib/cosmo/web/views/actions/index.erb +7 -0
  35. data/lib/cosmo/web/views/jobs/_busy.erb +50 -0
  36. data/lib/cosmo/web/views/jobs/_dead.erb +65 -0
  37. data/lib/cosmo/web/views/jobs/_enqueued.erb +60 -0
  38. data/lib/cosmo/web/views/jobs/_scheduled.erb +49 -0
  39. data/lib/cosmo/web/views/jobs/_stats.erb +69 -0
  40. data/lib/cosmo/web/views/jobs/busy.erb +16 -0
  41. data/lib/cosmo/web/views/jobs/dead.erb +17 -0
  42. data/lib/cosmo/web/views/jobs/enqueued.erb +16 -0
  43. data/lib/cosmo/web/views/jobs/index.erb +12 -0
  44. data/lib/cosmo/web/views/jobs/scheduled.erb +17 -0
  45. data/lib/cosmo/web/views/layout.erb +33 -0
  46. data/lib/cosmo/web/views/streams/_info.erb +89 -0
  47. data/lib/cosmo/web/views/streams/_table.erb +42 -0
  48. data/lib/cosmo/web/views/streams/index.erb +11 -0
  49. data/lib/cosmo/web/views/streams/info.erb +11 -0
  50. data/lib/cosmo/web.rb +66 -0
  51. data/lib/cosmo.rb +2 -7
  52. data/sig/cosmo/api/busy.rbs +35 -0
  53. data/sig/cosmo/api/counter.rbs +34 -0
  54. data/sig/cosmo/api/job.rbs +31 -0
  55. data/sig/cosmo/api/kv.rbs +30 -0
  56. data/sig/cosmo/api/stats.rbs +21 -0
  57. data/sig/cosmo/api/stream.rbs +44 -0
  58. data/sig/cosmo/client.rbs +13 -3
  59. data/sig/cosmo/processor.rbs +1 -1
  60. data/sig/cosmo/stream/data.rbs +1 -1
  61. data/sig/cosmo/stream/processor.rbs +2 -0
  62. data/sig/cosmo/stream.rbs +1 -0
  63. metadata +59 -3
  64. /data/sig/cosmo/{message.rbs → stream/message.rbs} +0 -0
@@ -2,22 +2,32 @@
2
2
 
3
3
  module Cosmo
4
4
  module Job
5
- class Processor < ::Cosmo::Processor
5
+ class Processor < ::Cosmo::Processor # rubocop:disable Metrics/ClassLength
6
6
  def initialize(pool, running, options)
7
7
  super
8
8
  @weights = []
9
9
  end
10
10
 
11
+ def stop(timeout = Config[:timeout])
12
+ @running.make_false
13
+ @pool.shutdown
14
+ @consumers.each { |(s, _)| s.unsubscribe rescue nil }
15
+ @pool.wait_for_termination(timeout)
16
+ [@work_thread, @schedule_thread].compact.each { _1.join(timeout) || _1.kill }
17
+ @consumers.clear
18
+ end
19
+
11
20
  private
12
21
 
13
22
  def run_loop
14
- Thread.new { work_loop }
15
- Thread.new { schedule_loop }
23
+ @work_thread = Thread.new { work_loop }
24
+ @schedule_thread = Thread.new { schedule_loop }
16
25
  end
17
26
 
18
27
  def setup
19
28
  jobs_config = Config.dig(:consumers, :jobs)
20
29
  jobs_config&.each do |stream_name, config|
30
+ config = config.dup
21
31
  consumer_name = "consumer-#{stream_name}"
22
32
  subject = config.delete(:subject)
23
33
  priority = config.delete(:priority)
@@ -28,7 +38,11 @@ module Cosmo
28
38
  end
29
39
 
30
40
  def work_loop # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize
41
+ shutdown = false
42
+
31
43
  while running?
44
+ break if shutdown
45
+
32
46
  @weights.shuffle.each do |stream_name|
33
47
  break unless running?
34
48
 
@@ -36,10 +50,11 @@ module Cosmo
36
50
  timeout = ENV.fetch("COSMO_JOBS_FETCH_TIMEOUT", 0.1).to_f
37
51
  @pool.post do
38
52
  subscription = @consumers.find { |(_, sn)| sn == stream_name }&.first
39
- messages = fetch_messages(subscription, batch_size: 1, timeout:)
53
+ messages = lock(stream_name) { fetch(subscription, batch_size: 1, timeout:) }
40
54
  process(messages) if messages&.any?
41
55
  end
42
56
  rescue Concurrent::RejectedExecutionError
57
+ shutdown = true
43
58
  break # pool doesn't accept new jobs, we are shutting down
44
59
  end
45
60
 
@@ -55,7 +70,7 @@ module Cosmo
55
70
  now = Time.now.to_i
56
71
  timeout = ENV.fetch("COSMO_JOBS_SCHEDULER_FETCH_TIMEOUT", 5).to_f
57
72
  subscription = @consumers.find { |(_, sn)| sn == :scheduled }&.first
58
- messages = fetch_messages(subscription, batch_size: 100, timeout:)
73
+ messages = fetch(subscription, batch_size: 100, timeout:)
59
74
  messages&.each do |message|
60
75
  headers = message.header.except("X-Stream", "X-Subject", "X-Execute-At", "Nats-Expected-Stream")
61
76
  stream, subject, execute_at = message.header.values_at("X-Stream", "X-Subject", "X-Execute-At")
@@ -80,17 +95,19 @@ module Cosmo
80
95
  Logger.debug "received messages #{messages.inspect}"
81
96
  data = Utils::Json.parse(message.data)
82
97
  unless data
83
- Logger.debug ArgumentError.new("malformed payload")
98
+ Logger.error ArgumentError.new("malformed payload")
99
+ move_message(message)
84
100
  return
85
101
  end
86
102
 
87
103
  worker_class = Utils::String.safe_constantize(data[:class])
88
104
  unless worker_class
89
- Logger.debug ArgumentError.new("#{data[:class]} class not found")
105
+ Logger.error ArgumentError.new("#{data[:class]} class not found")
106
+ move_message(message, data)
90
107
  return
91
108
  end
92
109
 
93
- begin
110
+ with_stats(message) do
94
111
  sw = stopwatch
95
112
  Logger.with(jid: data[:jid])
96
113
  Logger.info "start"
@@ -99,10 +116,12 @@ module Cosmo
99
116
  instance.perform(*data[:args])
100
117
  message.ack
101
118
  Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "done" }
119
+ true
102
120
  rescue StandardError => e
103
121
  Logger.debug e
104
122
  Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail" }
105
- handle_failure(message, data)
123
+ dropped = handle_failure(message, data)
124
+ false if dropped
106
125
  rescue Exception # rubocop:disable Lint/RescueException
107
126
  Logger.with(elapsed: sw.elapsed_seconds) { Logger.info "fail" }
108
127
  raise
@@ -112,26 +131,46 @@ module Cosmo
112
131
  Logger.debug "processed message #{message.inspect}"
113
132
  end
114
133
 
115
- def handle_failure(message, data) # rubocop:disable Metrics/AbcSize
134
+ def handle_failure(message, data) # rubocop:disable Naming/PredicateMethod
116
135
  current_attempt = message.metadata.num_delivered
117
136
  max_retries = data[:retry].to_i + 1
118
137
 
119
138
  if current_attempt < max_retries
120
- # NATS will auto-retry based on max_deliver with exponential backoff
139
+ # NATS will auto-retry with delay (exponential backoff based on current attempt).
140
+ # When max_deliver is reached, NATS stops redelivering the message and marks it as "max deliveries exceeded".
141
+ # The message is effectively abandoned by NATS — it stays in the stream (consuming a slot) but will never be delivered again to that consumer.
121
142
  delay_ns = ((current_attempt**4) + 15) * 1_000_000_000
122
143
  message.nak(delay: delay_ns)
123
- return
144
+ return false
124
145
  end
125
146
 
126
- if data[:dead]
127
- Client.instance.publish("jobs.dead.#{Utils::String.underscore(data[:class])}", message.data)
128
- message.ack
129
- Logger.debug "job moved #{data[:jid]} to DLQ"
130
- else
131
- message.term
132
- Logger.debug "job dropped #{data[:jid]}"
147
+ data[:dead] ? move_message(message, data) : drop_message(message, data)
148
+ true
149
+ end
150
+
151
+ def drop_message(message, data)
152
+ message.term
153
+ Logger.debug "job dropped #{data[:jid]}"
154
+ end
155
+
156
+ def move_message(message, data = nil)
157
+ klass = data ? Utils::String.underscore(data[:class]) : "default"
158
+ headers = { "X-Stream" => message.metadata.stream, "X-Subject" => message.subject }
159
+ Client.instance.publish("jobs.dead.#{klass}", message.data, header: headers)
160
+ message.ack
161
+ Logger.debug "job moved #{data&.dig(:jid)} to DLQ"
162
+ end
163
+
164
+ def with_stats(message, &block)
165
+ API::Busy.instance.with(message) do
166
+ API::Counter.instance.with(&block)
133
167
  end
134
168
  end
169
+
170
+ def lock(stream_name, &)
171
+ @mutexes ||= Hash.new { |h, k| h[k] = Mutex.new }
172
+ @mutexes[stream_name].synchronize(&)
173
+ end
135
174
  end
136
175
  end
137
176
  end
data/lib/cosmo/job.rb CHANGED
@@ -54,7 +54,7 @@ module Cosmo
54
54
  end
55
55
  end
56
56
 
57
- attr_reader :jid
57
+ attr_accessor :jid
58
58
 
59
59
  def perform(...)
60
60
  raise NotImplementedError, "#{self.class}#perform must be implemented"
data/lib/cosmo/logger.rb CHANGED
@@ -62,5 +62,9 @@ module Cosmo
62
62
  def self.instance
63
63
  @instance ||= ::Logger.new($stdout).tap { _1.formatter = SimpleFormatter.new }
64
64
  end
65
+
66
+ def self.instance=(logger)
67
+ @instance = logger
68
+ end
65
69
  end
66
70
  end
@@ -39,10 +39,16 @@ module Cosmo
39
39
  @running.true?
40
40
  end
41
41
 
42
- def fetch_messages(subscription, batch_size:, timeout:)
42
+ def fetch(subscription, batch_size:, timeout:)
43
43
  subscription.fetch(batch_size, timeout:)
44
44
  rescue NATS::Timeout
45
45
  # No messages, continue
46
+ rescue StandardError => e
47
+ Logger.error "Snap! Error just happened"
48
+ Logger.error "#{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
49
+
50
+ backoff = ENV.fetch("COSMO_STREAMS_FETCH_BACKOFF", 5).to_f
51
+ sleep([timeout, backoff].max) # backoff before retry
46
52
  end
47
53
 
48
54
  def client
@@ -7,6 +7,7 @@ module Cosmo
7
7
  class Data
8
8
  DEFAULTS = {
9
9
  batch_size: 100,
10
+ fetch_timeout: 10.0,
10
11
  consumer: {
11
12
  ack_policy: "explicit",
12
13
  max_deliver: 1,
@@ -19,21 +19,26 @@ module Cosmo
19
19
  setup_consumers
20
20
  end
21
21
 
22
- def work_loop # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
22
+ def work_loop # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
23
+ shutdown = false
24
+
23
25
  while running?
26
+ break if shutdown
27
+
24
28
  @consumers.each do |(subscription, config, processor)|
25
29
  break unless running?
26
30
 
27
31
  begin
28
- timeout = ENV.fetch("COSMO_STREAMS_FETCH_TIMEOUT", 0.1).to_f
29
32
  @pool.post do
33
+ timeout = convert_timeout(config[:fetch_timeout])
30
34
  Logger.debug "fetching #{config.dig(:consumer, :subjects).inspect}, timeout=#{timeout}"
31
- messages = fetch_messages(subscription, batch_size: config[:batch_size], timeout:)
35
+ messages = fetch(subscription, batch_size: config[:batch_size], timeout:)
32
36
  Logger.debug "fetched (#{messages&.size.to_i}) messages"
33
37
  process(messages, processor) if messages&.any?
34
38
  Logger.debug "processed (#{messages&.size.to_i}) messages"
35
39
  end
36
40
  rescue Concurrent::RejectedExecutionError
41
+ shutdown = true
37
42
  break # pool doesn't accept new jobs, we are shutting down
38
43
  end
39
44
 
@@ -95,6 +100,16 @@ module Cosmo
95
100
  def dynamic_config
96
101
  Config.system[:streams].map { _1.default_options.merge(class: _1) }
97
102
  end
103
+
104
+ def convert_timeout(value)
105
+ timeout = value.to_f
106
+ if timeout <= 0
107
+ Logger.warn "Ignoring `fetch_timeout: #{timeout}` (causes high CPU usage) with #{Data::DEFAULTS[:fetch_timeout]}s instead"
108
+ timeout = Data::DEFAULTS[:fetch_timeout].to_f
109
+ end
110
+
111
+ timeout
112
+ end
98
113
  end
99
114
  end
100
115
  end
data/lib/cosmo/stream.rb CHANGED
@@ -12,9 +12,9 @@ module Cosmo
12
12
  end
13
13
 
14
14
  module ClassMethods
15
- def options(stream: nil, consumer_name: nil, batch_size: nil, start_position: nil, consumer: nil, publisher: nil) # rubocop:disable Metrics/ParameterLists
15
+ def options(stream: nil, consumer_name: nil, batch_size: nil, fetch_timeout: nil, start_position: nil, consumer: nil, publisher: nil) # rubocop:disable Metrics/ParameterLists
16
16
  register
17
- default_options.merge!({ stream:, consumer_name:, batch_size:, start_position:, consumer:, publisher: }.compact)
17
+ default_options.merge!({ stream:, consumer_name:, batch_size:, fetch_timeout:, start_position:, consumer:, publisher: }.compact)
18
18
  end
19
19
 
20
20
  def publish(data, subject: nil, **options)
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ Cosmo::Utils::Warnings.silence do
4
+ members = NATS::JetStream::API::StreamConfig.members + [:allow_msg_counter]
5
+ NATS::JetStream::API::StreamConfig = Struct.new(*members, keyword_init: true) do
6
+ def initialize(opts = {})
7
+ rem = opts.keys - members
8
+ opts.delete_if { |k| rem.include?(k) }
9
+ super
10
+ end
11
+ end
12
+
13
+ members = NATS::JetStream::PubAck.members + [:val]
14
+ NATS::JetStream::PubAck = Struct.new(*members, keyword_init: true)
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cosmo
4
+ module Utils
5
+ module Warnings
6
+ module_function
7
+
8
+ def silence
9
+ verbose = $VERBOSE
10
+ $VERBOSE = nil
11
+ yield
12
+ ensure
13
+ $VERBOSE = verbose
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cosmo/utils/hash"
4
+ require "cosmo/utils/json"
5
+ require "cosmo/utils/string"
6
+ require "cosmo/utils/signal"
7
+ require "cosmo/utils/warnings"
8
+ require "cosmo/utils/stopwatch"
9
+ require "cosmo/utils/thread_pool"
10
+
11
+ module Cosmo
12
+ module Utils
13
+ end
14
+ end
data/lib/cosmo/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cosmo
4
- VERSION = "0.1.3"
4
+ VERSION = "0.2.0"
5
5
  end