cosmonats 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +300 -187
- data/lib/cosmo/active_job/adapter.rb +46 -0
- data/lib/cosmo/active_job/executor.rb +16 -0
- data/lib/cosmo/active_job/options.rb +50 -0
- data/lib/cosmo/active_job.rb +29 -0
- data/lib/cosmo/api/busy.rb +2 -2
- data/lib/cosmo/api/counter.rb +2 -2
- data/lib/cosmo/api/cron/entry.rb +99 -0
- data/lib/cosmo/api/cron.rb +118 -0
- data/lib/cosmo/api/kv.rb +36 -14
- data/lib/cosmo/api/stream.rb +27 -9
- data/lib/cosmo/api.rb +1 -0
- data/lib/cosmo/cli.rb +27 -9
- data/lib/cosmo/client.rb +75 -5
- data/lib/cosmo/config.rb +14 -32
- data/lib/cosmo/engine.rb +1 -1
- data/lib/cosmo/job/data.rb +1 -1
- data/lib/cosmo/job/limit.rb +51 -0
- data/lib/cosmo/job/processor.rb +82 -63
- data/lib/cosmo/job.rb +51 -2
- data/lib/cosmo/logger.rb +4 -1
- data/lib/cosmo/processor.rb +108 -0
- data/lib/cosmo/railtie.rb +21 -0
- data/lib/cosmo/stream/processor.rb +24 -60
- data/lib/cosmo/stream.rb +4 -3
- data/lib/cosmo/utils/hash.rb +13 -24
- data/lib/cosmo/utils/overrides.rb +1 -1
- data/lib/cosmo/utils/ttl_cache.rb +44 -0
- data/lib/cosmo/utils.rb +1 -0
- data/lib/cosmo/version.rb +1 -1
- data/lib/cosmo/web/assets/app.css +88 -0
- data/lib/cosmo/web/controllers/crons.rb +41 -0
- data/lib/cosmo/web/controllers/jobs.rb +7 -3
- data/lib/cosmo/web/controllers/streams.rb +36 -10
- data/lib/cosmo/web/helpers/application.rb +17 -2
- data/lib/cosmo/web/views/actions/index.erb +1 -1
- data/lib/cosmo/web/views/crons/_table.erb +58 -0
- data/lib/cosmo/web/views/crons/index.erb +10 -0
- data/lib/cosmo/web/views/jobs/_busy.erb +54 -49
- data/lib/cosmo/web/views/jobs/_dead.erb +70 -65
- data/lib/cosmo/web/views/jobs/_enqueued.erb +82 -56
- data/lib/cosmo/web/views/jobs/_scheduled.erb +53 -48
- data/lib/cosmo/web/views/jobs/_tabs.erb +6 -0
- data/lib/cosmo/web/views/jobs/busy.erb +8 -6
- data/lib/cosmo/web/views/jobs/dead.erb +6 -5
- data/lib/cosmo/web/views/jobs/enqueued.erb +8 -6
- data/lib/cosmo/web/views/jobs/index.erb +1 -1
- data/lib/cosmo/web/views/jobs/scheduled.erb +6 -5
- data/lib/cosmo/web/views/layout.erb +1 -1
- data/lib/cosmo/web/views/streams/_info.erb +3 -0
- data/lib/cosmo/web/views/streams/_pause_banner.erb +17 -0
- data/lib/cosmo/web/views/streams/_stream_row.erb +42 -0
- data/lib/cosmo/web/views/streams/_table.erb +4 -21
- data/lib/cosmo/web.rb +7 -0
- data/lib/cosmo.rb +1 -0
- data/sig/cosmo/active_job/adapter.rbs +13 -0
- data/sig/cosmo/active_job/executor.rbs +9 -0
- data/sig/cosmo/active_job/options.rbs +14 -0
- data/sig/cosmo/api/cron/entry.rbs +30 -0
- data/sig/cosmo/api/cron.rbs +25 -0
- data/sig/cosmo/api/kv.rbs +4 -6
- data/sig/cosmo/api/stream.rbs +7 -1
- data/sig/cosmo/client.rbs +20 -4
- data/sig/cosmo/config.rbs +3 -15
- data/sig/cosmo/job/data.rbs +1 -1
- data/sig/cosmo/job/limit.rbs +18 -0
- data/sig/cosmo/job/processor.rbs +19 -9
- data/sig/cosmo/job.rbs +9 -4
- data/sig/cosmo/processor.rbs +26 -0
- data/sig/cosmo/railtie.rbs +4 -0
- data/sig/cosmo/stream/processor.rbs +4 -10
- data/sig/cosmo/utils/hash.rbs +4 -8
- data/sig/cosmo/utils/ttl_cache.rbs +20 -0
- metadata +25 -3
- data/lib/cosmo/defaults.yml +0 -70
data/README.md
CHANGED
|
@@ -1,12 +1,65 @@
|
|
|
1
|
-
# 🚀 Cosmonats
|
|
1
|
+
# 🚀 Cosmonats
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
Background jobs + real-time event streaming for Ruby — unified, in one gem, backed by NATS.
|
|
4
|
+
**No Redis. No DB polling. Disk-backed, horizontally scalable — no message is ever silently dropped.**
|
|
5
|
+
|
|
6
|
+
<div align="center">
|
|
7
7
|
|
|
8
8
|

|
|
9
9
|
|
|
10
|
+
[](https://rubygems.org/gems/cosmonats)
|
|
11
|
+
[](https://rubygems.org/gems/cosmonats)
|
|
12
|
+
[](https://www.ruby-lang.org)
|
|
13
|
+
[](LICENSE.txt)
|
|
14
|
+
[](https://github.com/bitsbeam/cosmonats/actions)
|
|
15
|
+
|
|
16
|
+
*Battle-tested in production. Tens of millions of jobs processed and counting.*
|
|
17
|
+
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
## ⚡ Taste it
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
# Define a job with a familiar look
|
|
25
|
+
class SendEmailJob
|
|
26
|
+
include Cosmo::Job
|
|
27
|
+
options stream: :default, retry: 3, dead: true
|
|
28
|
+
|
|
29
|
+
def perform(user_id, template)
|
|
30
|
+
EmailService.send(user_id, template)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Enqueue it
|
|
35
|
+
SendEmailJob.perform_async(123, "welcome")
|
|
36
|
+
SendEmailJob.perform_in(1.day, 123, "followup")
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
# Process a continuous real-time event stream
|
|
41
|
+
class ClicksProcessor
|
|
42
|
+
include Cosmo::Stream
|
|
43
|
+
options stream: :clickstream, batch_size: 100,
|
|
44
|
+
consumer: { subjects: ["events.clicks.>"] }
|
|
45
|
+
|
|
46
|
+
def process_one
|
|
47
|
+
Analytics.track(message.data)
|
|
48
|
+
message.ack
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
ClicksProcessor.publish({ user_id: 123, page: "/home" }, subject: "events.clicks.homepage")
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
bundle exec cosmo -C config/cosmo.yml -c 20 # Run jobs + streams with 20 threads
|
|
57
|
+
bundle exec cosmo -C config/cosmo.yml -c 20 jobs # Jobs only
|
|
58
|
+
bundle exec cosmo -C config/cosmo.yml -c 20 streams # Streams only
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+

|
|
62
|
+
|
|
10
63
|
## 📖 Index
|
|
11
64
|
|
|
12
65
|
- [Why?](#-why)
|
|
@@ -25,65 +78,73 @@ disk-backed persistence.
|
|
|
25
78
|
|
|
26
79
|
|
|
27
80
|
## 🎯 Why?
|
|
28
|
-
Among many others, why creating another? Cosmonats is a background processing framework for Ruby, powered by **[NATS](https://nats.io/)**.
|
|
29
|
-
It's designed to solve the fundamental scaling problems that plague Redis/DB-based job queues and at the same time to provide both job and stream
|
|
30
|
-
processing capabilities.
|
|
31
81
|
|
|
32
|
-
|
|
82
|
+
Most background job libraries use Redis or Postgres — tools that were never designed for this. Think of NATS as Redis — but Redis is KV first then messaging;
|
|
83
|
+
NATS is messaging first, then KV. What NATS is:
|
|
84
|
+
|
|
85
|
+
- **~20 MB binary, ~10 MB RAM at idle** Trivial to run anywhere.
|
|
86
|
+
- **Disk-backed persistent streams** Messages survive restarts, don't require RAM to fit.
|
|
87
|
+
- **True horizontal clustering** Lose a node — other nodes take over, zero message loss.
|
|
88
|
+
- **Multilingual** Official clients for Ruby, Go, Python, Rust, Java, .NET, and more. Any service can publish or consume.
|
|
89
|
+
|
|
90
|
+
One NATS server replaces your message broker, job queue, and KV store — with lower operational overhead.
|
|
91
|
+
|
|
92
|
+
| | Redis/DB-backed | NATS/Cosmonats |
|
|
93
|
+
|-------------------|-------------------------------|----------------------------|
|
|
94
|
+
| Persistence | In-memory / DB bloat | Disk-backed, TB-scale |
|
|
95
|
+
| Scaling | Sentinel only / Vertical only | True horizontal clustering |
|
|
96
|
+
| Background jobs | Yes | Yes |
|
|
97
|
+
| Real-time stream | No | Yes |
|
|
98
|
+
| Zero message loss | No | Yes |
|
|
99
|
+
| Message replay | No | Yes |
|
|
100
|
+
| Backpressure | No, grow unbounded | Yes |
|
|
101
|
+
| Multi-DC | Complex setup | Native geo-distribution |
|
|
33
102
|
|
|
34
|
-
- **Single-threaded command processing** - All operations serialized, creating contention with many workers
|
|
35
|
-
- **Memory-only persistence** - Everything must fit in RAM, expensive to scale
|
|
36
|
-
- **Vertical scaling only** - Can't truly distribute a single queue across nodes
|
|
37
|
-
- **Polling overhead** - Thousands of blocked connections
|
|
38
|
-
- **No native backpressure** - Queues can grow unbounded
|
|
39
|
-
- **Weak durability** - Async replication can lose jobs during failures
|
|
40
103
|
|
|
41
|
-
|
|
104
|
+
### Killer Features:
|
|
42
105
|
|
|
43
|
-
|
|
106
|
+
#### — Jobs + Streams, unified in one gem.
|
|
44
107
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
- **Vacuum/autovacuum impact** - High-churn job tables degrade database performance
|
|
49
|
-
- **Vertical scaling only** - Limited by single database instance capabilities
|
|
50
|
-
- **Index bloat** - High UPDATE/DELETE volume causes index degradation over time
|
|
51
|
-
- **Table bloat** - Constant row updates fragment tables, requiring maintenance
|
|
52
|
-
- **`LISTEN/NOTIFY` limitations** - 8KB payload limit, no persistence, breaks down at high volumes (10K+ notifications/sec)
|
|
53
|
-
- **No native horizontal scaling** - Cannot distribute a single job queue across multiple database nodes
|
|
108
|
+
Most Ruby gems handle exactly that — background jobs. If you also need to consume a continuous event feed, that's a second system, second config, second set of
|
|
109
|
+
worker processes, second Dockerfile entry. Cosmonats is the only Ruby gem with a first-class `Job` primitive *and* a first-class `Stream` primitive, sharing
|
|
110
|
+
one server, one config, one CLI, one monitoring endpoint.
|
|
54
111
|
|
|
55
|
-
|
|
112
|
+
#### — Message replay and time-travel debugging.
|
|
56
113
|
|
|
57
|
-
|
|
114
|
+
NATS persists messages to disk and lets any consumer rewind to any point — beginning of time, a specific timestamp, or only new messages.
|
|
115
|
+
- **Incident recovery** — your pipeline crashed for 3 hours. Replay from the crash timestamp.
|
|
116
|
+
- **New consumer bootstrap** — a new service needs historical events. Start it from the beginning.
|
|
117
|
+
- **Bug reproduction** — replay the exact sequence of messages that caused a production issue.
|
|
58
118
|
|
|
59
|
-
|
|
119
|
+
#### — Multi-datacenter queues, natively.
|
|
60
120
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
121
|
+
NATS has a first-class cluster + leaf-node architecture for geo-distribution. Spanning multiple regions or datacenters is a config block — not a separate
|
|
122
|
+
product or a third-party replication tool. NATS was built for edge computing, IoT, and satellite communication — multi-DC is a first-class concern, not an
|
|
123
|
+
afterthought.
|
|
124
|
+
|
|
125
|
+
#### — Transport-level deduplication + built-in KV. No extra infrastructure.
|
|
126
|
+
|
|
127
|
+
NATS deduplicates messages at the **broker** — same-ID messages within the configured window are dropped before they ever reach a worker. No uniqueness gems,
|
|
128
|
+
no advisory locks, no extra round-trips. It also ships a built-in Key/Value store usable for distributed locks and rate limiting — no Redis, no Memcached,
|
|
129
|
+
nothing else to run.
|
|
68
130
|
|
|
69
131
|
|
|
70
132
|
## ✨ Features
|
|
71
133
|
|
|
72
134
|
### 🎪 Job Processing
|
|
73
|
-
- **Familiar
|
|
74
|
-
- **Priority queues**
|
|
75
|
-
- **Scheduled jobs**
|
|
76
|
-
- **Automatic retries**
|
|
77
|
-
- **Dead letter queue**
|
|
78
|
-
- **Job uniqueness**
|
|
135
|
+
- **Familiar API** — `perform_async`, `perform_in`, `perform_at`
|
|
136
|
+
- **Priority queues** — critical, high, default, low with weighted round-robin
|
|
137
|
+
- **Scheduled jobs** — execute at a specific time or after a delay
|
|
138
|
+
- **Automatic retries** — exponential backoff, configurable attempts
|
|
139
|
+
- **Dead letter queue** — capture permanently failed jobs
|
|
140
|
+
- **Job uniqueness** — prevent duplicate execution
|
|
79
141
|
|
|
80
142
|
### 🌊 Stream Processing
|
|
81
|
-
- **Real-time
|
|
82
|
-
- **Batch processing**
|
|
83
|
-
- **Message replay**
|
|
84
|
-
- **Consumer groups** -
|
|
85
|
-
- **
|
|
86
|
-
- **Custom serialization** - JSON, MessagePack, Protobuf support
|
|
143
|
+
- **Real-time event streams** — process continuous data feeds
|
|
144
|
+
- **Batch processing** — handle multiple messages in one go
|
|
145
|
+
- **Message replay** — reprocess from any point in time
|
|
146
|
+
- **Consumer groups** — load-balanced across workers
|
|
147
|
+
- **Custom serialization** — JSON, MessagePack, Protobuf
|
|
87
148
|
|
|
88
149
|
|
|
89
150
|
## 📦 Installation
|
|
@@ -93,64 +154,88 @@ Built on **NATS**, `cosmonats` provides:
|
|
|
93
154
|
gem "cosmonats"
|
|
94
155
|
```
|
|
95
156
|
|
|
96
|
-
**Requirements:** Ruby 3.1
|
|
157
|
+
**Requirements:** Ruby ≥ 3.1, NATS Server ([install guide](https://docs.nats.io/running-a-nats-service/introduction/installation))
|
|
158
|
+
|
|
159
|
+
Spin up NATS instantly with Docker — one command, that's it:
|
|
160
|
+
```bash
|
|
161
|
+
docker run -p 4222:4222 -p 8222:8222 nats:alpine -js
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Or add it to your existing `docker-compose.yml`:
|
|
165
|
+
```yaml
|
|
166
|
+
services:
|
|
167
|
+
nats:
|
|
168
|
+
image: nats:alpine
|
|
169
|
+
command: -js
|
|
170
|
+
ports:
|
|
171
|
+
- "4222:4222"
|
|
172
|
+
- "8222:8222"
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Mount the monitoring UI in your Rack app:
|
|
176
|
+
```ruby
|
|
177
|
+
require "cosmo/web"
|
|
178
|
+
|
|
179
|
+
# Rails
|
|
180
|
+
mount Cosmo::Web => "/cosmo"
|
|
181
|
+
|
|
182
|
+
# Any Rack app (config.ru)
|
|
183
|
+
map "/cosmo" { run Cosmo::Web }
|
|
184
|
+
```
|
|
97
185
|
|
|
98
186
|
|
|
99
187
|
## 🚀 Quick Start
|
|
100
188
|
|
|
101
|
-
### 1. Create
|
|
189
|
+
### 1. Create `config/cosmo.yml`
|
|
190
|
+
|
|
191
|
+
```yaml
|
|
192
|
+
concurrency: 5 # Number of worker threads
|
|
193
|
+
|
|
194
|
+
consumers: # Declare consumer groups for streams, things that pull messages and process them
|
|
195
|
+
jobs: # Consumer configs for jobs (or streams)
|
|
196
|
+
default: # Stream name
|
|
197
|
+
ack_policy: explicit # Acknowledgment required for each message, can be explicit, none, or all
|
|
198
|
+
max_deliver: 10 # Max retry attempts before sending to a dead stream
|
|
199
|
+
max_ack_pending: 10 # Max messages waiting for ack, if exceeded, the server will stop delivering new messages until some are acked
|
|
200
|
+
ack_wait: 15 # Seconds to wait for ack before redelivering
|
|
201
|
+
subject: jobs.%{name}.> # Subject pattern for this consumer, %{name} replaced with stream name, becomes `jobs.default.>`
|
|
202
|
+
|
|
203
|
+
setup: # Initial stream creation only `cosmo -S`
|
|
204
|
+
jobs: # Stream configs for jobs (or streams)
|
|
205
|
+
default: # Stream name
|
|
206
|
+
storage: file # Storage type (file or memory)
|
|
207
|
+
retention: workqueue # Retention policy (limits, interest, workqueue). workqueue - deletes acked/nacked, limits - append only
|
|
208
|
+
subjects: ["jobs.%{name}.>"] # Subject pattern for this stream, %{name} replaced with stream name
|
|
209
|
+
allow_direct: true # Allow direct messages to stream (required for web UI)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### 2. Create streams in NATS (one-time), grabs config from setup section of `config/cosmo.yml`
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
bundle exec cosmo -S
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### 3. Define a job in `app/jobs/`
|
|
102
219
|
|
|
103
220
|
```ruby
|
|
104
221
|
class SendEmailJob
|
|
105
222
|
include Cosmo::Job
|
|
106
|
-
|
|
107
|
-
# configure job options (optional)
|
|
108
223
|
options stream: :default, retry: 3, dead: true
|
|
109
224
|
|
|
110
225
|
def perform(user_id, email_type)
|
|
111
|
-
|
|
112
|
-
UserMailer.send(email_type, user).deliver_now
|
|
226
|
+
UserMailer.send(email_type, user_id).deliver_now
|
|
113
227
|
end
|
|
114
228
|
end
|
|
115
229
|
```
|
|
116
230
|
|
|
117
|
-
###
|
|
231
|
+
### 4. Enqueue & run
|
|
118
232
|
|
|
119
233
|
```ruby
|
|
120
|
-
SendEmailJob.perform_async(
|
|
121
|
-
SendEmailJob.perform_in(1.hour, 123, 'reminder') # Delayed
|
|
122
|
-
SendEmailJob.perform_at(1.day.from_now, 123, 'test') # Scheduled
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### 3. Configure (config/cosmo.yml)
|
|
126
|
-
|
|
127
|
-
```yaml
|
|
128
|
-
concurrency: 10
|
|
129
|
-
max_retries: 3
|
|
130
|
-
|
|
131
|
-
consumers:
|
|
132
|
-
jobs:
|
|
133
|
-
default:
|
|
134
|
-
ack_policy: explicit
|
|
135
|
-
max_deliver: 3
|
|
136
|
-
max_ack_pending: 3
|
|
137
|
-
ack_wait: 60
|
|
138
|
-
|
|
139
|
-
streams:
|
|
140
|
-
default:
|
|
141
|
-
storage: file
|
|
142
|
-
retention: workqueue
|
|
143
|
-
subjects: ["jobs.default.>"]
|
|
234
|
+
SendEmailJob.perform_async(42, "welcome")
|
|
144
235
|
```
|
|
145
236
|
|
|
146
|
-
### 4. Setup & Run
|
|
147
|
-
|
|
148
237
|
```bash
|
|
149
|
-
|
|
150
|
-
cosmo -C config/cosmo.yml --setup
|
|
151
|
-
|
|
152
|
-
# Start processing
|
|
153
|
-
cosmo -C config/cosmo.yml -c 10 -r ./app/jobs jobs
|
|
238
|
+
bundle exec cosmo -C config/cosmo.yml -c 10 -r ./app/jobs jobs
|
|
154
239
|
```
|
|
155
240
|
|
|
156
241
|
|
|
@@ -158,16 +243,14 @@ cosmo -C config/cosmo.yml -c 10 -r ./app/jobs jobs
|
|
|
158
243
|
|
|
159
244
|
### Jobs
|
|
160
245
|
|
|
161
|
-
Simple background tasks with a familiar API:
|
|
162
|
-
|
|
163
246
|
```ruby
|
|
164
247
|
class ReportJob
|
|
165
248
|
include Cosmo::Job
|
|
166
|
-
|
|
249
|
+
|
|
167
250
|
options(
|
|
168
251
|
stream: :critical, # Stream name
|
|
169
252
|
retry: 5, # Retry attempts
|
|
170
|
-
dead: true #
|
|
253
|
+
dead: true # Send to dead letter queue on final failure
|
|
171
254
|
)
|
|
172
255
|
|
|
173
256
|
def perform(report_id)
|
|
@@ -175,20 +258,18 @@ class ReportJob
|
|
|
175
258
|
Report.find(report_id).generate!
|
|
176
259
|
rescue StandardError => e
|
|
177
260
|
logger.error "Failed: #{e.message}"
|
|
178
|
-
raise # Triggers retry
|
|
261
|
+
raise # Triggers retry with exponential backoff
|
|
179
262
|
end
|
|
180
263
|
end
|
|
181
264
|
|
|
182
|
-
# Usage
|
|
183
265
|
ReportJob.perform_async(42) # Enqueue now
|
|
184
266
|
ReportJob.perform_in(30.minutes, 42) # Delayed
|
|
185
|
-
ReportJob.perform_at(Time.parse(
|
|
267
|
+
ReportJob.perform_at(Time.parse("2026-01-25 10:00"), 42) # Scheduled
|
|
268
|
+
ReportJob.perform_sync(42) # Inline, no NATS (great for tests)
|
|
186
269
|
```
|
|
187
270
|
|
|
188
271
|
### Streams
|
|
189
272
|
|
|
190
|
-
Real-time event processing with powerful features:
|
|
191
|
-
|
|
192
273
|
```ruby
|
|
193
274
|
class ClicksProcessor
|
|
194
275
|
include Cosmo::Stream
|
|
@@ -205,62 +286,115 @@ class ClicksProcessor
|
|
|
205
286
|
}
|
|
206
287
|
)
|
|
207
288
|
|
|
208
|
-
# Process one message
|
|
289
|
+
# Process one message at a time
|
|
209
290
|
def process_one
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
message.ack # Success
|
|
291
|
+
Analytics.track_click(message.data)
|
|
292
|
+
message.ack
|
|
213
293
|
end
|
|
214
|
-
|
|
215
|
-
# OR process batch
|
|
294
|
+
|
|
295
|
+
# OR process a batch
|
|
216
296
|
def process(messages)
|
|
217
|
-
Analytics.
|
|
297
|
+
Analytics.bulk_track(messages.map(&:data))
|
|
218
298
|
messages.each(&:ack)
|
|
219
299
|
end
|
|
220
300
|
end
|
|
221
301
|
|
|
222
302
|
# Publishing
|
|
223
|
-
ClicksProcessor.publish(
|
|
224
|
-
{ user_id: 123, page: '/home' },
|
|
225
|
-
subject: 'events.clicks.homepage'
|
|
226
|
-
)
|
|
303
|
+
ClicksProcessor.publish({ user_id: 123, page: "/home" }, subject: "events.clicks.homepage")
|
|
227
304
|
|
|
228
|
-
#
|
|
305
|
+
# Acknowledgment strategies
|
|
229
306
|
message.ack # Success
|
|
230
|
-
message.nack(delay: 5_000_000_000) # Retry
|
|
307
|
+
message.nack(delay: 5_000_000_000) # Retry in 5 seconds (nanoseconds)
|
|
231
308
|
message.term # Permanent failure, no retry
|
|
232
309
|
```
|
|
233
310
|
|
|
234
311
|
### Configuration
|
|
235
312
|
|
|
236
|
-
**
|
|
313
|
+
**NATS subjects** follow a dot-separated hierarchy (`events.clicks.homepage`).
|
|
314
|
+
The `>` wildcard matches everything after that prefix. Think of subjects as topic names — flexible routing with no extra configuration.
|
|
315
|
+
|
|
316
|
+
**Full `config/cosmo.yml` example:**
|
|
237
317
|
```yaml
|
|
238
|
-
timeout: 25
|
|
239
|
-
concurrency:
|
|
240
|
-
max_retries: 3
|
|
318
|
+
timeout: 25 # Shutdown timeout in seconds
|
|
319
|
+
concurrency: &concurrency 1 # Number of worker threads
|
|
320
|
+
max_retries: &max_retries 3 # Default max retries
|
|
321
|
+
|
|
322
|
+
stream_config: &stream_config
|
|
323
|
+
storage: file # storage type (file or memory)
|
|
324
|
+
retention: workqueue # retention policy (limits, interest, workqueue)
|
|
325
|
+
duplicate_window: 120 # time window for duplicate message detection in seconds
|
|
326
|
+
discard: old # discard new messages when stream is full (discard new or old)
|
|
327
|
+
allow_direct: true # allow direct messages to stream, required for web UI
|
|
328
|
+
subjects:
|
|
329
|
+
- jobs.%{name}.> # subject pattern for stream, %{name} will be replaced with stream name
|
|
330
|
+
|
|
331
|
+
consumer_config: &consumer_config
|
|
332
|
+
ack_policy: explicit # ack policy (explicit, none, all), each individual message must be acknowledged
|
|
333
|
+
max_deliver: 10 # maximum number of times a message will be delivered before it's considered failed
|
|
334
|
+
max_ack_pending: 20 # maximum number of messages with pending ack for this consumer
|
|
335
|
+
ack_wait: 60 # time in seconds to wait for an ack before redelivering the message
|
|
336
|
+
subject: jobs.%{name}.> # subject pattern for consumer, %{name} will be replaced with stream name
|
|
241
337
|
|
|
242
338
|
consumers:
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
339
|
+
jobs:
|
|
340
|
+
critical:
|
|
341
|
+
<<: *consumer_config
|
|
342
|
+
priority: 50
|
|
343
|
+
high:
|
|
344
|
+
<<: *consumer_config
|
|
345
|
+
priority: 30
|
|
346
|
+
default:
|
|
347
|
+
<<: *consumer_config
|
|
348
|
+
priority: 15
|
|
349
|
+
low:
|
|
350
|
+
<<: *consumer_config
|
|
351
|
+
priority: 5
|
|
352
|
+
scheduled:
|
|
353
|
+
<<: *consumer_config
|
|
354
|
+
max_deliver: 1
|
|
355
|
+
max_ack_pending: 100
|
|
356
|
+
ack_wait: 10
|
|
250
357
|
|
|
251
358
|
setup:
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
359
|
+
jobs:
|
|
360
|
+
critical:
|
|
361
|
+
<<: *stream_config
|
|
362
|
+
description: Very critical priority jobs
|
|
363
|
+
high:
|
|
364
|
+
<<: *stream_config
|
|
365
|
+
description: Higher priority jobs
|
|
366
|
+
default:
|
|
367
|
+
<<: *stream_config
|
|
368
|
+
description: Default priority jobs
|
|
369
|
+
low:
|
|
370
|
+
<<: *stream_config
|
|
371
|
+
description: Lower priority jobs
|
|
372
|
+
scheduled:
|
|
373
|
+
<<: *stream_config
|
|
374
|
+
description: Scheduled jobs
|
|
375
|
+
dead:
|
|
376
|
+
<<: *stream_config
|
|
377
|
+
retention: limits
|
|
378
|
+
max_msgs: 10000
|
|
379
|
+
max_age: 604800 # 7d
|
|
380
|
+
description: Broken jobs (DLQ)
|
|
381
|
+
|
|
382
|
+
development:
|
|
383
|
+
verbose: false
|
|
384
|
+
concurrency: *concurrency
|
|
385
|
+
|
|
386
|
+
staging:
|
|
387
|
+
verbose: true
|
|
388
|
+
concurrency: 3
|
|
389
|
+
|
|
390
|
+
production:
|
|
391
|
+
concurrency: 3
|
|
258
392
|
```
|
|
259
393
|
|
|
260
394
|
**Programmatic:**
|
|
261
395
|
```ruby
|
|
262
396
|
Cosmo::Config.set(:concurrency, 20)
|
|
263
|
-
Cosmo::Config.set(:setup, :streams, :custom, { storage:
|
|
397
|
+
Cosmo::Config.set(:setup, :streams, :custom, { storage: "file", subjects: ["custom.>"] })
|
|
264
398
|
```
|
|
265
399
|
|
|
266
400
|
**Environment variables:**
|
|
@@ -277,28 +411,15 @@ export COSMO_STREAMS_FETCH_TIMEOUT=0.1
|
|
|
277
411
|
```ruby
|
|
278
412
|
class UrgentJob
|
|
279
413
|
include Cosmo::Job
|
|
280
|
-
options stream: :critical # priority: 50 in config
|
|
414
|
+
options stream: :critical # priority: 50 in config — polled most frequently
|
|
281
415
|
end
|
|
282
|
-
|
|
283
|
-
# config/cosmo.yml
|
|
284
|
-
consumers:
|
|
285
|
-
jobs:
|
|
286
|
-
critical:
|
|
287
|
-
priority: 50 # Polled more frequently
|
|
288
|
-
default:
|
|
289
|
-
priority: 15
|
|
290
416
|
```
|
|
291
417
|
|
|
292
418
|
**Custom Serializers:**
|
|
293
419
|
```ruby
|
|
294
420
|
module MessagePackSerializer
|
|
295
|
-
def self.serialize(data)
|
|
296
|
-
|
|
297
|
-
end
|
|
298
|
-
|
|
299
|
-
def self.deserialize(payload)
|
|
300
|
-
MessagePack.unpack(payload)
|
|
301
|
-
end
|
|
421
|
+
def self.serialize(data) = MessagePack.pack(data)
|
|
422
|
+
def self.deserialize(payload) = MessagePack.unpack(payload)
|
|
302
423
|
end
|
|
303
424
|
|
|
304
425
|
class FastStream
|
|
@@ -317,21 +438,21 @@ class ResilientJob
|
|
|
317
438
|
process_data(data)
|
|
318
439
|
rescue RetryableError => e
|
|
319
440
|
logger.warn "Retryable: #{e.message}"
|
|
320
|
-
raise # Will retry
|
|
441
|
+
raise # Will retry with exponential backoff
|
|
321
442
|
rescue FatalError => e
|
|
322
443
|
logger.error "Fatal: #{e.message}"
|
|
323
|
-
# Don't raise
|
|
444
|
+
# Don't raise — won't retry, won't go to DLQ
|
|
324
445
|
end
|
|
325
446
|
end
|
|
326
447
|
```
|
|
327
448
|
|
|
328
449
|
**Testing:**
|
|
329
450
|
```ruby
|
|
330
|
-
# Synchronous
|
|
331
|
-
SendEmailJob.perform_sync(123,
|
|
451
|
+
# Synchronous — no NATS needed
|
|
452
|
+
SendEmailJob.perform_sync(123, "test")
|
|
332
453
|
|
|
333
|
-
#
|
|
334
|
-
jid = SendEmailJob.perform_async(123,
|
|
454
|
+
# Async — returns a job ID
|
|
455
|
+
jid = SendEmailJob.perform_async(123, "welcome")
|
|
335
456
|
assert_kind_of String, jid
|
|
336
457
|
```
|
|
337
458
|
|
|
@@ -339,29 +460,24 @@ assert_kind_of String, jid
|
|
|
339
460
|
## 🖥️ CLI Reference
|
|
340
461
|
|
|
341
462
|
```bash
|
|
342
|
-
#
|
|
343
|
-
cosmo -C config/cosmo.yml
|
|
344
|
-
|
|
345
|
-
#
|
|
346
|
-
cosmo -C config/cosmo.yml -c 20 -r ./app/jobs jobs # Jobs only
|
|
347
|
-
cosmo -C config/cosmo.yml -c 20 streams # Streams only
|
|
348
|
-
cosmo -C config/cosmo.yml -c 20 # Both
|
|
463
|
+
cosmo -C config/cosmo.yml --setup # Create streams in NATS (idempotent)
|
|
464
|
+
cosmo -C config/cosmo.yml -c 20 -r ./app/jobs jobs # Jobs only
|
|
465
|
+
cosmo -C config/cosmo.yml -c 20 streams # Streams only
|
|
466
|
+
cosmo -C config/cosmo.yml -c 20 # Both
|
|
349
467
|
```
|
|
350
468
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
|
354
|
-
|
|
355
|
-
| `-
|
|
356
|
-
| `-
|
|
357
|
-
| `-
|
|
358
|
-
| `-t, --timeout NUM` | Shutdown timeout (sec) | `-t 60` |
|
|
359
|
-
| `-S, --setup` | Setup streams & exit | `--setup` |
|
|
469
|
+
| Flag | Description | Example |
|
|
470
|
+
|-------------------------|------------------------|-----------------------|
|
|
471
|
+
| `-C, --config PATH` | Config file path | `-C config/cosmo.yml` |
|
|
472
|
+
| `-c, --concurrency INT` | Worker threads | `-c 20` |
|
|
473
|
+
| `-r, --require PATH` | Auto-require directory | `-r ./app/jobs` |
|
|
474
|
+
| `-t, --timeout NUM` | Shutdown timeout (sec) | `-t 60` |
|
|
475
|
+
| `-S, --setup` | Setup streams & exit | `--setup` |
|
|
360
476
|
|
|
361
477
|
|
|
362
478
|
## 🚢 Deployment
|
|
363
479
|
|
|
364
|
-
**NATS Cluster:**
|
|
480
|
+
**NATS Cluster config:**
|
|
365
481
|
```bash
|
|
366
482
|
# nats-server.conf
|
|
367
483
|
port: 4222
|
|
@@ -385,7 +501,7 @@ services:
|
|
|
385
501
|
volumes:
|
|
386
502
|
- ./nats.conf:/etc/nats/nats-server.conf
|
|
387
503
|
- nats-data:/var/lib/nats
|
|
388
|
-
|
|
504
|
+
|
|
389
505
|
worker:
|
|
390
506
|
build: .
|
|
391
507
|
environment:
|
|
@@ -419,17 +535,14 @@ SyslogIdentifier=cosmo
|
|
|
419
535
|
WantedBy=multi-user.target
|
|
420
536
|
```
|
|
421
537
|
|
|
422
|
-
Enable and start:
|
|
423
538
|
```bash
|
|
424
|
-
sudo systemctl enable cosmo
|
|
425
|
-
sudo systemctl start cosmo
|
|
426
|
-
sudo systemctl status cosmo
|
|
539
|
+
sudo systemctl enable cosmo && sudo systemctl start cosmo
|
|
427
540
|
```
|
|
428
541
|
|
|
429
542
|
|
|
430
543
|
## 📊 Monitoring
|
|
431
544
|
|
|
432
|
-
**Structured
|
|
545
|
+
**Structured logs:**
|
|
433
546
|
```
|
|
434
547
|
2026-01-23T10:15:30.123Z INFO pid=12345 tid=abc jid=def: start
|
|
435
548
|
2026-01-23T10:15:32.456Z INFO pid=12345 tid=abc jid=def elapsed=2.333: done
|
|
@@ -438,22 +551,22 @@ sudo systemctl status cosmo
|
|
|
438
551
|
**Stream Metrics:**
|
|
439
552
|
```ruby
|
|
440
553
|
client = Cosmo::Client.instance
|
|
441
|
-
info = client.stream_info(
|
|
554
|
+
info = client.stream_info("default")
|
|
442
555
|
|
|
443
556
|
info.state.messages # Total messages
|
|
444
557
|
info.state.bytes # Total bytes
|
|
445
558
|
info.state.consumer_count # Number of consumers
|
|
446
559
|
```
|
|
447
560
|
|
|
448
|
-
**Prometheus
|
|
449
|
-
- `jetstream_server_store_msgs`
|
|
450
|
-
- `jetstream_consumer_delivered_msgs`
|
|
451
|
-
- `jetstream_consumer_ack_pending`
|
|
561
|
+
**Prometheus** — NATS exposes metrics at `:8222/metrics`:
|
|
562
|
+
- `jetstream_server_store_msgs` — Messages in stream
|
|
563
|
+
- `jetstream_consumer_delivered_msgs` — Delivered messages
|
|
564
|
+
- `jetstream_consumer_ack_pending` — Pending acknowledgments
|
|
452
565
|
|
|
453
566
|
|
|
454
567
|
## 💼 Examples
|
|
455
568
|
|
|
456
|
-
**Email
|
|
569
|
+
**Email queue with scheduling:**
|
|
457
570
|
```ruby
|
|
458
571
|
class EmailJob
|
|
459
572
|
include Cosmo::Job
|
|
@@ -465,8 +578,8 @@ class EmailJob
|
|
|
465
578
|
end
|
|
466
579
|
end
|
|
467
580
|
|
|
468
|
-
EmailJob.perform_async(123,
|
|
469
|
-
EmailJob.perform_in(1.day, 123,
|
|
581
|
+
EmailJob.perform_async(123, "welcome")
|
|
582
|
+
EmailJob.perform_in(1.day, 123, "followup")
|
|
470
583
|
```
|
|
471
584
|
|
|
472
585
|
**Image Processing Pipeline:**
|
|
@@ -475,37 +588,37 @@ class ImageProcessor
|
|
|
475
588
|
include Cosmo::Stream
|
|
476
589
|
options(
|
|
477
590
|
stream: :images,
|
|
478
|
-
consumer: { subjects: [
|
|
591
|
+
consumer: { subjects: ["images.uploaded.>"] }
|
|
479
592
|
)
|
|
480
593
|
|
|
481
594
|
def process_one
|
|
482
|
-
processed = ImageService.process(message.data[
|
|
483
|
-
publish(processed, subject:
|
|
595
|
+
processed = ImageService.process(message.data["url"])
|
|
596
|
+
publish(processed, subject: "images.processed.optimized")
|
|
484
597
|
message.ack
|
|
485
598
|
rescue => e
|
|
486
599
|
logger.error "Processing failed: #{e.message}"
|
|
487
|
-
message.nack(delay: 30_000_000_000)
|
|
600
|
+
message.nack(delay: 30_000_000_000) # retry in 30s
|
|
488
601
|
end
|
|
489
602
|
end
|
|
490
603
|
|
|
491
|
-
ImageProcessor.publish({ url:
|
|
604
|
+
ImageProcessor.publish({ url: "https://example.com/image.jpg" }, subject: "images.uploaded.user")
|
|
492
605
|
```
|
|
493
606
|
|
|
494
607
|
**Real-Time Analytics:**
|
|
495
608
|
```ruby
|
|
496
609
|
class AnalyticsAggregator
|
|
497
610
|
include Cosmo::Stream
|
|
498
|
-
options batch_size: 1000, consumer: { subjects: [
|
|
611
|
+
options batch_size: 1000, consumer: { subjects: ["events.*.>"] }
|
|
499
612
|
|
|
500
613
|
def process(messages)
|
|
501
|
-
|
|
502
|
-
aggregates = events.group_by { |e| e['type'] }.transform_values(&:count)
|
|
614
|
+
aggregates = messages.map(&:data).group_by { |e| e["type"] }.transform_values(&:count)
|
|
503
615
|
Analytics.bulk_insert(aggregates)
|
|
504
616
|
messages.each(&:ack)
|
|
505
617
|
end
|
|
506
618
|
end
|
|
507
619
|
```
|
|
508
620
|
|
|
621
|
+
---
|
|
509
622
|
|
|
510
623
|
<div align="center">
|
|
511
624
|
|