minigun 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 91222c29ebae83a2a2a640752d125275fdf8638c30fdfd7f4d0f3fea3f22e7fd
4
+ data.tar.gz: fbdd980afbb33a00e415a6cfd750f765eb8b1705f00e99b67e6c97a417c7906b
5
+ SHA512:
6
+ metadata.gz: 783aeeaa1dd262976e475c845b9ec28b8cd7a4a22bed03f98819090961b685f8fb1ae692d7e535ac1f4e0fad3c6d949b08bcf61db902916767b92e86d00d1a16
7
+ data.tar.gz: 96f88b41a520314f90e196e3a488ffff9f74f58ce685d4a21aba46d9b393355ea389ab4b283bad18382539f52efabe84f42d304ebfda644c17f7edaf6eebde1b
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) TableCheck Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Minigun go BRRR
2
+
3
+ Minigun is a lightweight framework for rapid-fire batch job processing
4
+ using a Producer-Accumulator-Consumer pattern. Minigun uses forking and threads
5
+ to maximize system resource utilization.
6
+
7
+ In many use cases, Minigun can replace queue systems like Resque, Solid Queue, or Sidekiq.
8
+ Minigun itself is run entire in Ruby's memory, and is database and application agnostic.
9
+
10
+ ## Enough talk, show me the code!
11
+
12
+ Here is a trivial proof-of-concept--Minigun can do a lot more than this!
13
+
14
+ ```ruby
15
+ require 'minigun'
16
+
17
+ class NewsletterSender
18
+ include Minigun::Runner
19
+
20
+ max_threads 10
21
+ max_consumer_forks 5
22
+
23
+ def initialize(start_time: Time.now - 1.day)
24
+ @start_time = start_time
25
+ end
26
+
27
+ producer do
28
+ # fix this
29
+ User.where("created_at >= ?", @start_time)
30
+ .not.where()
31
+ .each do |user|
32
+ produce(user)
33
+ end
34
+ end
35
+
36
+ consumer do |user|
37
+ NewsletterMailer.my_newsletter(user).deliver_now
38
+ user.update(newsletter_sent_at: Time.now)
39
+ end
40
+ end
41
+
42
+ # Run the Minigun job
43
+ NewsletterSender.new.go_brrr!
44
+
45
+
46
+ Use cases for Minigun include:
47
+ - Send a large number of emails to users in a target audience.
48
+
49
+
50
+ ## Installation
51
+
52
+ ```cmd
53
+ gem install minigun
54
+ ```
55
+
56
+ ### Special Thanks
57
+
58
+ Alex Nicholson for the original idea for Minigun.
@@ -0,0 +1,413 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Minigun
4
+ class Runner
5
+ ACCUMULATOR_MAX_SINGLE_QUEUE = 2000 # 10_000
6
+ ACCUMULATOR_MAX_ALL_QUEUES = ACCUMULATOR_MAX_SINGLE_QUEUE * 2 # 3
7
+ ACCUMULATOR_CHECK_INTERVAL = 100
8
+ CONSUMER_THREAD_BATCH_SIZE = 200 # 1000
9
+ CONSUMER_QUERY_BATCH_SIZE = 200
10
+ DEFAULT_MAX_RETRIES = 10
11
+ TIME_ZONE = 'Asia/Tokyo'
12
+ LOCALE = :en
13
+ MODEL_INCLUDES = {}.freeze
14
+ MODELS_TRANSACTIONAL = %w[ Foo
15
+ Bar ].freeze
16
+
17
+ def initialize(models: nil,
18
+ start_time: nil,
19
+ end_time: nil,
20
+ max_processes: nil,
21
+ max_threads: nil,
22
+ max_retries: nil)
23
+ @raw_models = Array(models) if models
24
+ @start_time = start_time
25
+ @end_time = end_time
26
+ time_range
27
+ @max_processes = max_processes
28
+ @max_threads = max_threads
29
+ @max_retries = max_retries || DEFAULT_MAX_RETRIES
30
+ @produced_count = 0
31
+ @accumulated_count = 0
32
+ end
33
+
34
+ def perform
35
+ in_time_zone_and_locale do
36
+ bootstrap!
37
+ job_start_at
38
+ report_job_started
39
+ before_job_start!
40
+ producer_thread = start_producer_thread
41
+ accumulator_thread = start_accumulator_thread
42
+ producer_thread.join
43
+ accumulator_thread.join
44
+ wait_all_consumer_processes
45
+ report_job_finished
46
+ after_job_finished!
47
+ end
48
+ end
49
+ alias_method :perform, :go_brrr
50
+ alias_method :perform, :go_brrr!
51
+
52
+ private
53
+
54
+ attr_reader :max_retries
55
+
56
+ def models
57
+ @models ||= (@raw_models || default_models).map {|model| load_model(model) }
58
+ end
59
+
60
+ def load_model(model)
61
+ return model if model.is_a?(Module)
62
+ model = "::#{model}" unless model.include?(':')
63
+ Object.const_get(model)
64
+ end
65
+
66
+ def start_producer_thread
67
+ @object_id_queue = SizedQueue.new(max_processes * max_threads * 2)
68
+ Thread.new do
69
+ Rails.logger.info { "[Producer] Started master producer thread." }
70
+ @producer_model_threads = []
71
+ @producer_semaphore = Concurrent::Semaphore.new(max_threads)
72
+ @producer_mutex = Mutex.new
73
+ models_queue = models.dup
74
+ while (model = models_queue.pop)
75
+ @producer_model_threads << start_producer_model_thread(model)
76
+ end
77
+ @producer_model_threads.each(&:join)
78
+ @object_id_queue << end_object
79
+ Rails.logger.info { "[Producer] Done. #{@produced_count} object IDs produced." }
80
+ end
81
+ end
82
+
83
+ def start_producer_model_thread(model)
84
+ @producer_semaphore.acquire
85
+ Thread.new do
86
+ with_mongo_secondary(model) do
87
+ model_name = model.to_s.demodulize
88
+ Rails.logger.info { "[Producer] #{model_name}: Started model thread." }
89
+ time_range_in_batches(model).each do |range|
90
+ on_retry = ->(e, attempts) { Rails.logger.warn { "[Producer] #{model_name}: Error fetching IDs in #{format_time_range(range)}, attempt #{attempts} of #{max_retries}: #{e.class}: #{e.message}. Retrying..." } }
91
+ on_failure = ->(e, _attempts) { Rails.logger.error { "[Producer] #{model_name}: Failed fetching IDs in #{format_time_range(range)} after #{max_retries} attempts: #{e.class}: #{e.message}. Skipping." } }
92
+ with_retry(on_retry: on_retry, on_failure: on_failure) do
93
+ Rails.logger.info { "[Producer] #{model_name}: Producing time range #{format_time_range(range)}..." }
94
+ count = produce_model(model, range)
95
+ Rails.logger.info { "[Producer] #{model_name}: Produced #{count} IDs in time range #{format_time_range(range)}." }
96
+ end
97
+ end
98
+ end
99
+ GC.start
100
+ @producer_semaphore.release
101
+ end
102
+ end
103
+
104
+ def produce_model(model, range)
105
+ count = 0
106
+ model.unscoped.where(updated_at: range).pluck_each(:_id) do |id|
107
+ @object_id_queue << [model, id.to_s.freeze].freeze
108
+ @producer_mutex.synchronize { @produced_count += 1 }
109
+ count += 1
110
+ end
111
+ count
112
+ end
113
+
114
+ def start_accumulator_thread
115
+ @consumer_pids = []
116
+ Thread.new do
117
+ Rails.logger.info { "[Accumulator] Started accumulator thread." }
118
+ accumulator_map = Hash.new {|h, k| h[k] = Set.new }
119
+
120
+ i = 0
121
+ until (model, id = @object_id_queue.pop) == end_object
122
+ accumulator_map[model] << id
123
+ i += 1
124
+ check_accumulator(accumulator_map) if i >= ACCUMULATOR_MAX_SINGLE_QUEUE && i % ACCUMULATOR_CHECK_INTERVAL == 0
125
+ end
126
+
127
+ # Handle any remaining IDs. Since the producer thread will have finished
128
+ # by this point, there no need to fork a child consumer.
129
+ consume_object_ids(accumulator_map)
130
+ @accumulated_count += accumulator_map.values.sum(&:size)
131
+ end
132
+ end
133
+
134
+ def check_accumulator(accumulator_map)
135
+ # Fork if any queue contains more than N IDs
136
+ accumulator_map.each do |model, ids|
137
+ next unless (count = ids.size) >= ACCUMULATOR_MAX_SINGLE_QUEUE
138
+ fork_consumer({ model => accumulator_map.delete(model) })
139
+ @accumulated_count += count
140
+ GC.start
141
+ end
142
+
143
+ # Fork if all queues together contain more than M IDs
144
+ if (count = accumulator_map.values.sum(&:size)) > ACCUMULATOR_MAX_ALL_QUEUES # rubocop:disable Style/GuardClause
145
+ fork_consumer(accumulator_map)
146
+ accumulator_map.clear
147
+ @accumulated_count += count
148
+ GC.start
149
+ end
150
+ end
151
+
152
+ def fork_consumer(object_map)
153
+ wait_max_consumer_processes
154
+ before_consumer_fork!
155
+ Rails.logger.info { "[Consumer] Forking..." }
156
+ @consumer_pids << fork do
157
+ after_consumer_fork!
158
+ GC.start
159
+ @pid = Process.pid
160
+ Rails.logger.info { "[Consumer]#{format_pid} started." }
161
+ consume_object_ids(object_map)
162
+ end
163
+ end
164
+
165
+ def consume_object_ids(object_map)
166
+ @consumer_thread_index = 0
167
+ @consumed_count = 0
168
+ @consumer_threads = []
169
+ @consumer_mutex = Mutex.new
170
+ @consumer_semaphore = Concurrent::Semaphore.new(max_threads)
171
+ object_map.each do |model, object_ids|
172
+ object_ids.uniq.in_groups_of(CONSUMER_THREAD_BATCH_SIZE, false).each do |object_ids_batch|
173
+ @consumer_threads << start_consumer_thread(model, object_ids_batch)
174
+ end
175
+ end
176
+ @consumer_threads.each(&:join)
177
+ after_consumer_finished!
178
+ Rails.logger.info { "[Consumer]#{format_pid}: Done. #{@consumed_count} objects consumed." }
179
+ end
180
+
181
+ def start_consumer_thread(model, object_ids)
182
+ @consumer_semaphore.acquire
183
+ thread_index = @consumer_mutex.synchronize { @consumer_thread_index += 1 }
184
+ Thread.new do
185
+ with_mongo_secondary(model) do
186
+ model_name = model.to_s.demodulize
187
+ Rails.logger.info { "[Consumer]#{format_pid}: Started thread #{thread_index}." }
188
+ object_ids.in_groups_of(CONSUMER_QUERY_BATCH_SIZE, false).each do |object_ids_batch|
189
+ on_retry = ->(e, attempts) { Rails.logger.warn { "[Consumer]#{format_pid}, Thread #{thread_index}: Error consuming #{model_name}, attempt #{attempts} of #{max_retries}: #{e.class}: #{e.message}. Retrying..." } }
190
+ on_failure = ->(e, _attempts) { Rails.logger.error { "[Consumer]#{format_pid}, Thread #{thread_index}: Failed consuming #{model_name} after #{max_retries} attempts: #{e.class}: #{e.message}. Skipping." } }
191
+ with_retry(on_retry: on_retry, on_failure: on_failure) do
192
+ count = consume_batch(model, object_ids_batch)
193
+ @consumer_mutex.synchronize { @consumed_count += count }
194
+ Rails.logger.info { "[Consumer]#{format_pid}, Thread #{thread_index}: Consumed #{count} #{model_name} objects." }
195
+ end
196
+ end
197
+ end
198
+ @consumer_semaphore.release
199
+ end
200
+ end
201
+
202
+ def consume_batch(model, object_ids)
203
+ count = 0
204
+ consumer_scope(model, object_ids).each do |object|
205
+ consume_object(object)
206
+ count += 1
207
+ rescue StandardError => e
208
+ Bugsnag.notify(e) {|r| r.add_metadata('publisher', model: model.to_s, object_id: object&._id) }
209
+ end
210
+ count
211
+ end
212
+
213
+ def consumer_scope(model, object_ids)
214
+ includes = MODEL_INCLUDES[model.to_s].presence
215
+ scope = model.unscoped.any_in(_id: object_ids)
216
+ scope = scope.includes(includes) if includes
217
+ scope
218
+ end
219
+
220
+ def wait_max_consumer_processes
221
+ return if @consumer_pids.size < max_processes
222
+ begin
223
+ pid = Process.wait
224
+ @consumer_pids.delete(pid)
225
+ rescue Errno::ECHILD # rubocop:disable Lint/SuppressedException
226
+ end
227
+ end
228
+
229
+ def wait_all_consumer_processes
230
+ @consumer_pids.each do |pid|
231
+ Process.wait(pid)
232
+ @consumer_pids.delete(pid)
233
+ rescue Errno::ECHILD
234
+ @consumer_pids.delete(pid)
235
+ end
236
+ end
237
+
238
+ def time_range
239
+ @time_range ||= in_time_zone_and_locale do
240
+ start_time = @start_time&.beginning_of_day if @start_time.is_a?(Date)
241
+ start_time ||= @start_time&.in_time_zone
242
+ raise ArgumentError.new('Must specify :start_time') unless start_time
243
+
244
+ end_time = @end_time&.end_of_day if @end_time.is_a?(Date)
245
+ end_time ||= @end_time&.in_time_zone
246
+
247
+ start_time..end_time
248
+ end
249
+ end
250
+
251
+ def time_range_in_batches(model)
252
+ time_ranges = []
253
+ t = time_range.first
254
+ t_end = time_range.end
255
+ now = Time.current
256
+ batch_size = time_range_batch_size(model)
257
+ while t < (t_end || now)
258
+ t_next = t + batch_size
259
+ t_batch_end = if t_end&.<=(t_next)
260
+ t_end
261
+ elsif now <= t_next
262
+ nil
263
+ else
264
+ t_next
265
+ end
266
+ time_ranges << (t..t_batch_end)
267
+ t = t_next
268
+ end
269
+ time_ranges
270
+ end
271
+
272
+ def time_range_batch_size(model)
273
+ @time_range_batch_size ||= model.to_s.in?(MODELS_TRANSACTIONAL) ? 1.hour : 1.day
274
+ end
275
+
276
+ def in_time_zone_and_locale(&block)
277
+ Time.use_zone(TIME_ZONE) do
278
+ I18n.with_locale(LOCALE, &block)
279
+ end
280
+ end
281
+
282
+ def max_processes
283
+ @max_processes ||= ENV['WEB_CONCURRENCY']&.to_i ||
284
+ ENV['MAX_PROCESSES']&.to_i ||
285
+ (Rails.env.in?(%w[production staging]) ? `nproc`.to_i : 1)
286
+ end
287
+
288
+ def max_threads
289
+ @max_threads ||= ENV['RAILS_MAX_THREADS']&.to_i ||
290
+ Mongoid.default_client.options['max_pool_size']
291
+ end
292
+
293
+ def job_start_at
294
+ @job_start_at ||= Time.current
295
+ end
296
+
297
+ def job_consumer_start_at
298
+ @job_consumer_start_at ||= Time.current
299
+ end
300
+
301
+ def job_end_at
302
+ @job_end_at ||= Time.current
303
+ end
304
+
305
+ def bootstrap!
306
+ max_processes
307
+ max_threads
308
+ Rails.application.eager_load!
309
+ end
310
+
311
+ def before_job_start!
312
+ # Can be overridden in subclass
313
+ end
314
+
315
+ def before_consumer_fork!
316
+ ::Mongoid.disconnect_clients
317
+ # Can be overridden in subclass
318
+ end
319
+
320
+ def after_consumer_fork!
321
+ ::Mongoid.reconnect_clients
322
+ # Can be overridden in subclass
323
+ end
324
+
325
+ def after_consumer_finished!
326
+ # Can be overridden in subclass
327
+ end
328
+
329
+ def after_job_finished!
330
+ # Can be overridden in subclass
331
+ end
332
+
333
+ def end_object
334
+ :eoq
335
+ end
336
+
337
+ def with_retry(on_retry: nil, on_failure: nil)
338
+ attempts = 0
339
+ begin
340
+ yield
341
+ rescue StandardError => e
342
+ attempts += 1
343
+ if attempts <= max_retries
344
+ on_retry&.call(e, attempts)
345
+ sleep_with_backoff(attempts)
346
+ retry
347
+ else
348
+ on_failure&.call(e, attempts)
349
+ end
350
+ end
351
+ end
352
+
353
+ def sleep_with_backoff(attempts)
354
+ sleep((5**attempts) / 100.0 + rand(0.05..1))
355
+ end
356
+
357
+ def with_mongo_secondary(model, &)
358
+ read_opts = { mode: :secondary_preferred }
359
+ model.with(read: read_opts, &)
360
+ end
361
+
362
+ def report_job_started
363
+ Rails.logger.info { "#{job_name} started.\n#{job_info_message}" }
364
+ end
365
+
366
+ def report_job_finished
367
+ Rails.logger.info { "#{job_name} finished.\n#{job_info_message(finished: true)}" }
368
+ end
369
+
370
+ def job_name
371
+ self.class.name.demodulize
372
+ end
373
+
374
+ def job_info_message(finished: false)
375
+ data = job_info_data(finished: finished)
376
+ just = data.keys.map(&:size).max
377
+ data.map do |k, v|
378
+ " #{k.to_s.ljust(just)} #{v}"
379
+ end.join("\n")
380
+ end
381
+
382
+ def job_info_data(finished: false)
383
+ data = { job_start_at: format_time(job_start_at) }
384
+ if finished
385
+ count = @accumulated_count
386
+ runtime = job_end_at - job_start_at
387
+ rate = count / (runtime / 60.0)
388
+ data[:job_end_at] = format_time(job_end_at)
389
+ data[:object_count] = "#{count} objects published"
390
+ data[:job_runtime] = "#{runtime.round} seconds"
391
+ data[:job_rate] = "#{rate.round} objects / minute"
392
+ end
393
+ data[:query_start_at] = format_time(time_range.begin) || 'none'
394
+ data[:query_end_at] = format_time(time_range.end) || 'none'
395
+ data[:max_processes] = max_processes
396
+ data[:max_threads] = max_threads
397
+ data[:max_retries] = max_retries
398
+ data.compact_blank!
399
+ end
400
+
401
+ def format_time_range(range)
402
+ "#{format_time(range.begin)}..#{format_time(range.end)}"
403
+ end
404
+
405
+ def format_time(time)
406
+ time&.strftime('%Y-%m-%d %H:%M:%S %z')
407
+ end
408
+
409
+ def format_pid
410
+ " PID #{@pid}" if @pid
411
+ end
412
+ end
413
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Minigun
4
+ VERSION = '0.0.1'
5
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: minigun
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Johnny Shields
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-09-27 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A lightweight framework for rapid-fire batch job processing.
14
+ email: johnny.shields@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - LICENSE
20
+ - README.md
21
+ - lib/minigun/runner.rb
22
+ - lib/minigun/version.rb
23
+ homepage: https://github.com/tablecheck/minigun
24
+ licenses:
25
+ - MIT
26
+ metadata:
27
+ rubygems_mfa_required: 'true'
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '2.7'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: 1.3.6
42
+ requirements: []
43
+ rubygems_version: 3.5.10
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: A lightweight framework for rapid-fire batch job processing.
47
+ test_files: []