minigun 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 91222c29ebae83a2a2a640752d125275fdf8638c30fdfd7f4d0f3fea3f22e7fd
4
+ data.tar.gz: fbdd980afbb33a00e415a6cfd750f765eb8b1705f00e99b67e6c97a417c7906b
5
+ SHA512:
6
+ metadata.gz: 783aeeaa1dd262976e475c845b9ec28b8cd7a4a22bed03f98819090961b685f8fb1ae692d7e535ac1f4e0fad3c6d949b08bcf61db902916767b92e86d00d1a16
7
+ data.tar.gz: 96f88b41a520314f90e196e3a488ffff9f74f58ce685d4a21aba46d9b393355ea389ab4b283bad18382539f52efabe84f42d304ebfda644c17f7edaf6eebde1b
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) TableCheck Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Minigun go BRRR
2
+
3
+ Minigun is a lightweight framework for rapid-fire batch job processing
4
+ using a Producer-Accumulator-Consumer pattern. Minigun uses forking and threads
5
+ to maximize system resource utilization.
6
+
7
+ In many use cases, Minigun can replace queue systems like Resque, Solid Queue, or Sidekiq.
8
+ Minigun itself is run entire in Ruby's memory, and is database and application agnostic.
9
+
10
+ ## Enough talk, show me the code!
11
+
12
+ Here is a trivial proof-of-concept--Minigun can do a lot more than this!
13
+
14
+ ```ruby
15
+ require 'minigun'
16
+
17
+ class NewsletterSender
18
+ include Minigun::Runner
19
+
20
+ max_threads 10
21
+ max_consumer_forks 5
22
+
23
+ def initialize(start_time: Time.now - 1.day)
24
+ @start_time = start_time
25
+ end
26
+
27
+ producer do
28
+ # fix this
29
+ User.where("created_at >= ?", @start_time)
30
+ .not.where()
31
+ .each do |user|
32
+ produce(user)
33
+ end
34
+ end
35
+
36
+ consumer do |user|
37
+ NewsletterMailer.my_newsletter(user).deliver_now
38
+ user.update(newsletter_sent_at: Time.now)
39
+ end
40
+ end
41
+
42
+ # Run the Minigun job
43
+ NewsletterSender.new.go_brrr!
44
+
45
+
46
+ Use cases for Minigun include:
47
+ - Send a large number of emails to users in a target audience.
48
+
49
+
50
+ ## Installation
51
+
52
+ ```cmd
53
+ gem install minigun
54
+ ```
55
+
56
+ ### Special Thanks
57
+
58
+ Alex Nicholson for the original idea for Minigun.
@@ -0,0 +1,413 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Minigun
4
+ class Runner
5
+ ACCUMULATOR_MAX_SINGLE_QUEUE = 2000 # 10_000
6
+ ACCUMULATOR_MAX_ALL_QUEUES = ACCUMULATOR_MAX_SINGLE_QUEUE * 2 # 3
7
+ ACCUMULATOR_CHECK_INTERVAL = 100
8
+ CONSUMER_THREAD_BATCH_SIZE = 200 # 1000
9
+ CONSUMER_QUERY_BATCH_SIZE = 200
10
+ DEFAULT_MAX_RETRIES = 10
11
+ TIME_ZONE = 'Asia/Tokyo'
12
+ LOCALE = :en
13
+ MODEL_INCLUDES = {}.freeze
14
+ MODELS_TRANSACTIONAL = %w[ Foo
15
+ Bar ].freeze
16
+
17
+ def initialize(models: nil,
18
+ start_time: nil,
19
+ end_time: nil,
20
+ max_processes: nil,
21
+ max_threads: nil,
22
+ max_retries: nil)
23
+ @raw_models = Array(models) if models
24
+ @start_time = start_time
25
+ @end_time = end_time
26
+ time_range
27
+ @max_processes = max_processes
28
+ @max_threads = max_threads
29
+ @max_retries = max_retries || DEFAULT_MAX_RETRIES
30
+ @produced_count = 0
31
+ @accumulated_count = 0
32
+ end
33
+
34
+ def perform
35
+ in_time_zone_and_locale do
36
+ bootstrap!
37
+ job_start_at
38
+ report_job_started
39
+ before_job_start!
40
+ producer_thread = start_producer_thread
41
+ accumulator_thread = start_accumulator_thread
42
+ producer_thread.join
43
+ accumulator_thread.join
44
+ wait_all_consumer_processes
45
+ report_job_finished
46
+ after_job_finished!
47
+ end
48
+ end
49
+ alias_method :perform, :go_brrr
50
+ alias_method :perform, :go_brrr!
51
+
52
+ private
53
+
54
+ attr_reader :max_retries
55
+
56
+ def models
57
+ @models ||= (@raw_models || default_models).map {|model| load_model(model) }
58
+ end
59
+
60
+ def load_model(model)
61
+ return model if model.is_a?(Module)
62
+ model = "::#{model}" unless model.include?(':')
63
+ Object.const_get(model)
64
+ end
65
+
66
+ def start_producer_thread
67
+ @object_id_queue = SizedQueue.new(max_processes * max_threads * 2)
68
+ Thread.new do
69
+ Rails.logger.info { "[Producer] Started master producer thread." }
70
+ @producer_model_threads = []
71
+ @producer_semaphore = Concurrent::Semaphore.new(max_threads)
72
+ @producer_mutex = Mutex.new
73
+ models_queue = models.dup
74
+ while (model = models_queue.pop)
75
+ @producer_model_threads << start_producer_model_thread(model)
76
+ end
77
+ @producer_model_threads.each(&:join)
78
+ @object_id_queue << end_object
79
+ Rails.logger.info { "[Producer] Done. #{@produced_count} object IDs produced." }
80
+ end
81
+ end
82
+
83
+ def start_producer_model_thread(model)
84
+ @producer_semaphore.acquire
85
+ Thread.new do
86
+ with_mongo_secondary(model) do
87
+ model_name = model.to_s.demodulize
88
+ Rails.logger.info { "[Producer] #{model_name}: Started model thread." }
89
+ time_range_in_batches(model).each do |range|
90
+ on_retry = ->(e, attempts) { Rails.logger.warn { "[Producer] #{model_name}: Error fetching IDs in #{format_time_range(range)}, attempt #{attempts} of #{max_retries}: #{e.class}: #{e.message}. Retrying..." } }
91
+ on_failure = ->(e, _attempts) { Rails.logger.error { "[Producer] #{model_name}: Failed fetching IDs in #{format_time_range(range)} after #{max_retries} attempts: #{e.class}: #{e.message}. Skipping." } }
92
+ with_retry(on_retry: on_retry, on_failure: on_failure) do
93
+ Rails.logger.info { "[Producer] #{model_name}: Producing time range #{format_time_range(range)}..." }
94
+ count = produce_model(model, range)
95
+ Rails.logger.info { "[Producer] #{model_name}: Produced #{count} IDs in time range #{format_time_range(range)}." }
96
+ end
97
+ end
98
+ end
99
+ GC.start
100
+ @producer_semaphore.release
101
+ end
102
+ end
103
+
104
+ def produce_model(model, range)
105
+ count = 0
106
+ model.unscoped.where(updated_at: range).pluck_each(:_id) do |id|
107
+ @object_id_queue << [model, id.to_s.freeze].freeze
108
+ @producer_mutex.synchronize { @produced_count += 1 }
109
+ count += 1
110
+ end
111
+ count
112
+ end
113
+
114
+ def start_accumulator_thread
115
+ @consumer_pids = []
116
+ Thread.new do
117
+ Rails.logger.info { "[Accumulator] Started accumulator thread." }
118
+ accumulator_map = Hash.new {|h, k| h[k] = Set.new }
119
+
120
+ i = 0
121
+ until (model, id = @object_id_queue.pop) == end_object
122
+ accumulator_map[model] << id
123
+ i += 1
124
+ check_accumulator(accumulator_map) if i >= ACCUMULATOR_MAX_SINGLE_QUEUE && i % ACCUMULATOR_CHECK_INTERVAL == 0
125
+ end
126
+
127
+ # Handle any remaining IDs. Since the producer thread will have finished
128
+ # by this point, there no need to fork a child consumer.
129
+ consume_object_ids(accumulator_map)
130
+ @accumulated_count += accumulator_map.values.sum(&:size)
131
+ end
132
+ end
133
+
134
+ def check_accumulator(accumulator_map)
135
+ # Fork if any queue contains more than N IDs
136
+ accumulator_map.each do |model, ids|
137
+ next unless (count = ids.size) >= ACCUMULATOR_MAX_SINGLE_QUEUE
138
+ fork_consumer({ model => accumulator_map.delete(model) })
139
+ @accumulated_count += count
140
+ GC.start
141
+ end
142
+
143
+ # Fork if all queues together contain more than M IDs
144
+ if (count = accumulator_map.values.sum(&:size)) > ACCUMULATOR_MAX_ALL_QUEUES # rubocop:disable Style/GuardClause
145
+ fork_consumer(accumulator_map)
146
+ accumulator_map.clear
147
+ @accumulated_count += count
148
+ GC.start
149
+ end
150
+ end
151
+
152
+ def fork_consumer(object_map)
153
+ wait_max_consumer_processes
154
+ before_consumer_fork!
155
+ Rails.logger.info { "[Consumer] Forking..." }
156
+ @consumer_pids << fork do
157
+ after_consumer_fork!
158
+ GC.start
159
+ @pid = Process.pid
160
+ Rails.logger.info { "[Consumer]#{format_pid} started." }
161
+ consume_object_ids(object_map)
162
+ end
163
+ end
164
+
165
+ def consume_object_ids(object_map)
166
+ @consumer_thread_index = 0
167
+ @consumed_count = 0
168
+ @consumer_threads = []
169
+ @consumer_mutex = Mutex.new
170
+ @consumer_semaphore = Concurrent::Semaphore.new(max_threads)
171
+ object_map.each do |model, object_ids|
172
+ object_ids.uniq.in_groups_of(CONSUMER_THREAD_BATCH_SIZE, false).each do |object_ids_batch|
173
+ @consumer_threads << start_consumer_thread(model, object_ids_batch)
174
+ end
175
+ end
176
+ @consumer_threads.each(&:join)
177
+ after_consumer_finished!
178
+ Rails.logger.info { "[Consumer]#{format_pid}: Done. #{@consumed_count} objects consumed." }
179
+ end
180
+
181
+ def start_consumer_thread(model, object_ids)
182
+ @consumer_semaphore.acquire
183
+ thread_index = @consumer_mutex.synchronize { @consumer_thread_index += 1 }
184
+ Thread.new do
185
+ with_mongo_secondary(model) do
186
+ model_name = model.to_s.demodulize
187
+ Rails.logger.info { "[Consumer]#{format_pid}: Started thread #{thread_index}." }
188
+ object_ids.in_groups_of(CONSUMER_QUERY_BATCH_SIZE, false).each do |object_ids_batch|
189
+ on_retry = ->(e, attempts) { Rails.logger.warn { "[Consumer]#{format_pid}, Thread #{thread_index}: Error consuming #{model_name}, attempt #{attempts} of #{max_retries}: #{e.class}: #{e.message}. Retrying..." } }
190
+ on_failure = ->(e, _attempts) { Rails.logger.error { "[Consumer]#{format_pid}, Thread #{thread_index}: Failed consuming #{model_name} after #{max_retries} attempts: #{e.class}: #{e.message}. Skipping." } }
191
+ with_retry(on_retry: on_retry, on_failure: on_failure) do
192
+ count = consume_batch(model, object_ids_batch)
193
+ @consumer_mutex.synchronize { @consumed_count += count }
194
+ Rails.logger.info { "[Consumer]#{format_pid}, Thread #{thread_index}: Consumed #{count} #{model_name} objects." }
195
+ end
196
+ end
197
+ end
198
+ @consumer_semaphore.release
199
+ end
200
+ end
201
+
202
+ def consume_batch(model, object_ids)
203
+ count = 0
204
+ consumer_scope(model, object_ids).each do |object|
205
+ consume_object(object)
206
+ count += 1
207
+ rescue StandardError => e
208
+ Bugsnag.notify(e) {|r| r.add_metadata('publisher', model: model.to_s, object_id: object&._id) }
209
+ end
210
+ count
211
+ end
212
+
213
+ def consumer_scope(model, object_ids)
214
+ includes = MODEL_INCLUDES[model.to_s].presence
215
+ scope = model.unscoped.any_in(_id: object_ids)
216
+ scope = scope.includes(includes) if includes
217
+ scope
218
+ end
219
+
220
+ def wait_max_consumer_processes
221
+ return if @consumer_pids.size < max_processes
222
+ begin
223
+ pid = Process.wait
224
+ @consumer_pids.delete(pid)
225
+ rescue Errno::ECHILD # rubocop:disable Lint/SuppressedException
226
+ end
227
+ end
228
+
229
+ def wait_all_consumer_processes
230
+ @consumer_pids.each do |pid|
231
+ Process.wait(pid)
232
+ @consumer_pids.delete(pid)
233
+ rescue Errno::ECHILD
234
+ @consumer_pids.delete(pid)
235
+ end
236
+ end
237
+
238
+ def time_range
239
+ @time_range ||= in_time_zone_and_locale do
240
+ start_time = @start_time&.beginning_of_day if @start_time.is_a?(Date)
241
+ start_time ||= @start_time&.in_time_zone
242
+ raise ArgumentError.new('Must specify :start_time') unless start_time
243
+
244
+ end_time = @end_time&.end_of_day if @end_time.is_a?(Date)
245
+ end_time ||= @end_time&.in_time_zone
246
+
247
+ start_time..end_time
248
+ end
249
+ end
250
+
251
+ def time_range_in_batches(model)
252
+ time_ranges = []
253
+ t = time_range.first
254
+ t_end = time_range.end
255
+ now = Time.current
256
+ batch_size = time_range_batch_size(model)
257
+ while t < (t_end || now)
258
+ t_next = t + batch_size
259
+ t_batch_end = if t_end&.<=(t_next)
260
+ t_end
261
+ elsif now <= t_next
262
+ nil
263
+ else
264
+ t_next
265
+ end
266
+ time_ranges << (t..t_batch_end)
267
+ t = t_next
268
+ end
269
+ time_ranges
270
+ end
271
+
272
+ def time_range_batch_size(model)
273
+ @time_range_batch_size ||= model.to_s.in?(MODELS_TRANSACTIONAL) ? 1.hour : 1.day
274
+ end
275
+
276
+ def in_time_zone_and_locale(&block)
277
+ Time.use_zone(TIME_ZONE) do
278
+ I18n.with_locale(LOCALE, &block)
279
+ end
280
+ end
281
+
282
+ def max_processes
283
+ @max_processes ||= ENV['WEB_CONCURRENCY']&.to_i ||
284
+ ENV['MAX_PROCESSES']&.to_i ||
285
+ (Rails.env.in?(%w[production staging]) ? `nproc`.to_i : 1)
286
+ end
287
+
288
+ def max_threads
289
+ @max_threads ||= ENV['RAILS_MAX_THREADS']&.to_i ||
290
+ Mongoid.default_client.options['max_pool_size']
291
+ end
292
+
293
+ def job_start_at
294
+ @job_start_at ||= Time.current
295
+ end
296
+
297
+ def job_consumer_start_at
298
+ @job_consumer_start_at ||= Time.current
299
+ end
300
+
301
+ def job_end_at
302
+ @job_end_at ||= Time.current
303
+ end
304
+
305
+ def bootstrap!
306
+ max_processes
307
+ max_threads
308
+ Rails.application.eager_load!
309
+ end
310
+
311
+ def before_job_start!
312
+ # Can be overridden in subclass
313
+ end
314
+
315
+ def before_consumer_fork!
316
+ ::Mongoid.disconnect_clients
317
+ # Can be overridden in subclass
318
+ end
319
+
320
+ def after_consumer_fork!
321
+ ::Mongoid.reconnect_clients
322
+ # Can be overridden in subclass
323
+ end
324
+
325
+ def after_consumer_finished!
326
+ # Can be overridden in subclass
327
+ end
328
+
329
+ def after_job_finished!
330
+ # Can be overridden in subclass
331
+ end
332
+
333
+ def end_object
334
+ :eoq
335
+ end
336
+
337
+ def with_retry(on_retry: nil, on_failure: nil)
338
+ attempts = 0
339
+ begin
340
+ yield
341
+ rescue StandardError => e
342
+ attempts += 1
343
+ if attempts <= max_retries
344
+ on_retry&.call(e, attempts)
345
+ sleep_with_backoff(attempts)
346
+ retry
347
+ else
348
+ on_failure&.call(e, attempts)
349
+ end
350
+ end
351
+ end
352
+
353
+ def sleep_with_backoff(attempts)
354
+ sleep((5**attempts) / 100.0 + rand(0.05..1))
355
+ end
356
+
357
+ def with_mongo_secondary(model, &)
358
+ read_opts = { mode: :secondary_preferred }
359
+ model.with(read: read_opts, &)
360
+ end
361
+
362
+ def report_job_started
363
+ Rails.logger.info { "#{job_name} started.\n#{job_info_message}" }
364
+ end
365
+
366
+ def report_job_finished
367
+ Rails.logger.info { "#{job_name} finished.\n#{job_info_message(finished: true)}" }
368
+ end
369
+
370
+ def job_name
371
+ self.class.name.demodulize
372
+ end
373
+
374
+ def job_info_message(finished: false)
375
+ data = job_info_data(finished: finished)
376
+ just = data.keys.map(&:size).max
377
+ data.map do |k, v|
378
+ " #{k.to_s.ljust(just)} #{v}"
379
+ end.join("\n")
380
+ end
381
+
382
+ def job_info_data(finished: false)
383
+ data = { job_start_at: format_time(job_start_at) }
384
+ if finished
385
+ count = @accumulated_count
386
+ runtime = job_end_at - job_start_at
387
+ rate = count / (runtime / 60.0)
388
+ data[:job_end_at] = format_time(job_end_at)
389
+ data[:object_count] = "#{count} objects published"
390
+ data[:job_runtime] = "#{runtime.round} seconds"
391
+ data[:job_rate] = "#{rate.round} objects / minute"
392
+ end
393
+ data[:query_start_at] = format_time(time_range.begin) || 'none'
394
+ data[:query_end_at] = format_time(time_range.end) || 'none'
395
+ data[:max_processes] = max_processes
396
+ data[:max_threads] = max_threads
397
+ data[:max_retries] = max_retries
398
+ data.compact_blank!
399
+ end
400
+
401
+ def format_time_range(range)
402
+ "#{format_time(range.begin)}..#{format_time(range.end)}"
403
+ end
404
+
405
+ def format_time(time)
406
+ time&.strftime('%Y-%m-%d %H:%M:%S %z')
407
+ end
408
+
409
+ def format_pid
410
+ " PID #{@pid}" if @pid
411
+ end
412
+ end
413
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Minigun
4
+ VERSION = '0.0.1'
5
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: minigun
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Johnny Shields
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-09-27 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A lightweight framework for rapid-fire batch job processing.
14
+ email: johnny.shields@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - LICENSE
20
+ - README.md
21
+ - lib/minigun/runner.rb
22
+ - lib/minigun/version.rb
23
+ homepage: https://github.com/tablecheck/minigun
24
+ licenses:
25
+ - MIT
26
+ metadata:
27
+ rubygems_mfa_required: 'true'
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '2.7'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: 1.3.6
42
+ requirements: []
43
+ rubygems_version: 3.5.10
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: A lightweight framework for rapid-fire batch job processing.
47
+ test_files: []