dispatch_policy 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/MIT-LICENSE +16 -17
  3. data/README.md +433 -388
  4. data/app/assets/stylesheets/dispatch_policy/application.css +157 -0
  5. data/app/controllers/dispatch_policy/application_controller.rb +45 -1
  6. data/app/controllers/dispatch_policy/dashboard_controller.rb +91 -0
  7. data/app/controllers/dispatch_policy/partitions_controller.rb +122 -0
  8. data/app/controllers/dispatch_policy/policies_controller.rb +94 -267
  9. data/app/controllers/dispatch_policy/staged_jobs_controller.rb +9 -0
  10. data/app/models/dispatch_policy/adaptive_concurrency_stats.rb +11 -81
  11. data/app/models/dispatch_policy/inflight_job.rb +12 -0
  12. data/app/models/dispatch_policy/partition.rb +21 -0
  13. data/app/models/dispatch_policy/staged_job.rb +4 -97
  14. data/app/models/dispatch_policy/tick_sample.rb +11 -0
  15. data/app/views/dispatch_policy/dashboard/index.html.erb +109 -0
  16. data/app/views/dispatch_policy/partitions/index.html.erb +63 -0
  17. data/app/views/dispatch_policy/partitions/show.html.erb +106 -0
  18. data/app/views/dispatch_policy/policies/index.html.erb +15 -37
  19. data/app/views/dispatch_policy/policies/show.html.erb +139 -223
  20. data/app/views/dispatch_policy/shared/_capacity.html.erb +67 -0
  21. data/app/views/dispatch_policy/shared/_hints.html.erb +13 -0
  22. data/app/views/dispatch_policy/shared/_partition_row.html.erb +12 -0
  23. data/app/views/dispatch_policy/staged_jobs/show.html.erb +31 -0
  24. data/app/views/layouts/dispatch_policy/application.html.erb +95 -238
  25. data/config/routes.rb +18 -2
  26. data/db/migrate/20260501000001_create_dispatch_policy_tables.rb +103 -0
  27. data/lib/dispatch_policy/bypass.rb +23 -0
  28. data/lib/dispatch_policy/config.rb +85 -0
  29. data/lib/dispatch_policy/context.rb +50 -0
  30. data/lib/dispatch_policy/cursor_pagination.rb +121 -0
  31. data/lib/dispatch_policy/decision.rb +22 -0
  32. data/lib/dispatch_policy/engine.rb +4 -27
  33. data/lib/dispatch_policy/forwarder.rb +63 -0
  34. data/lib/dispatch_policy/gate.rb +10 -38
  35. data/lib/dispatch_policy/gates/adaptive_concurrency.rb +99 -97
  36. data/lib/dispatch_policy/gates/concurrency.rb +45 -26
  37. data/lib/dispatch_policy/gates/throttle.rb +65 -41
  38. data/lib/dispatch_policy/inflight_tracker.rb +174 -0
  39. data/lib/dispatch_policy/job_extension.rb +155 -0
  40. data/lib/dispatch_policy/operator_hints.rb +126 -0
  41. data/lib/dispatch_policy/pipeline.rb +48 -0
  42. data/lib/dispatch_policy/policy.rb +61 -59
  43. data/lib/dispatch_policy/policy_dsl.rb +120 -0
  44. data/lib/dispatch_policy/railtie.rb +35 -0
  45. data/lib/dispatch_policy/registry.rb +46 -0
  46. data/lib/dispatch_policy/repository.rb +723 -0
  47. data/lib/dispatch_policy/serializer.rb +36 -0
  48. data/lib/dispatch_policy/tick.rb +260 -256
  49. data/lib/dispatch_policy/tick_loop.rb +59 -26
  50. data/lib/dispatch_policy/version.rb +1 -1
  51. data/lib/dispatch_policy.rb +71 -52
  52. data/lib/generators/dispatch_policy/install/install_generator.rb +70 -0
  53. data/lib/generators/dispatch_policy/install/templates/create_dispatch_policy_tables.rb.tt +95 -0
  54. data/lib/generators/dispatch_policy/install/templates/dispatch_tick_loop_job.rb.tt +53 -0
  55. data/lib/generators/dispatch_policy/install/templates/initializer.rb.tt +11 -0
  56. metadata +101 -43
  57. data/CHANGELOG.md +0 -43
  58. data/app/models/dispatch_policy/partition_inflight_count.rb +0 -42
  59. data/app/models/dispatch_policy/partition_observation.rb +0 -76
  60. data/app/models/dispatch_policy/throttle_bucket.rb +0 -41
  61. data/db/migrate/20260424000001_create_dispatch_policy_tables.rb +0 -80
  62. data/db/migrate/20260424000002_create_adaptive_concurrency_stats.rb +0 -22
  63. data/db/migrate/20260424000003_create_adaptive_concurrency_samples.rb +0 -25
  64. data/db/migrate/20260424000004_rename_samples_to_partition_observations.rb +0 -32
  65. data/db/migrate/20260425000001_add_duration_to_partition_observations.rb +0 -8
  66. data/lib/dispatch_policy/active_job_perform_all_later_patch.rb +0 -32
  67. data/lib/dispatch_policy/dispatch_context.rb +0 -53
  68. data/lib/dispatch_policy/dispatchable.rb +0 -123
  69. data/lib/dispatch_policy/gates/fair_interleave.rb +0 -32
  70. data/lib/dispatch_policy/gates/global_cap.rb +0 -26
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+ require "json"
5
+
6
+ module DispatchPolicy
7
+ # Tiny keyset-pagination helper for the engine UI. Each sort mode declares
8
+ # a single sortable column plus the row id as a deterministic tiebreaker
9
+ # so two rows can never share the same cursor. NULLable columns are
10
+ # coalesced to a sentinel ('1970-01-01' for timestamps) so the cursor
11
+ # clause stays a simple tuple comparison.
12
+ module CursorPagination
13
+ SENTINEL_TS = "1970-01-01 00:00:00".freeze
14
+
15
+ # name => { sql_order:, cursor_sql:, direction:, label: }
16
+ # cursor_sql is the expression to extract the sort key for a row
17
+ # (used both in ORDER BY and to build the cursor tuple).
18
+ SORTS = {
19
+ "pending" => {
20
+ sql_order: "pending_count DESC, id ASC",
21
+ cursor_sql: "pending_count",
22
+ direction: :desc,
23
+ label: "pending desc"
24
+ },
25
+ "admitted" => {
26
+ sql_order: "total_admitted DESC, id ASC",
27
+ cursor_sql: "total_admitted",
28
+ direction: :desc,
29
+ label: "lifetime admitted"
30
+ },
31
+ "stale" => {
32
+ sql_order: "COALESCE(last_checked_at, TIMESTAMP '#{SENTINEL_TS}') ASC, id ASC",
33
+ cursor_sql: "COALESCE(last_checked_at, TIMESTAMP '#{SENTINEL_TS}')",
34
+ direction: :asc,
35
+ label: "stalest (round-trip)"
36
+ },
37
+ "recent" => {
38
+ sql_order: "COALESCE(last_admit_at, TIMESTAMP '#{SENTINEL_TS}') DESC, id ASC",
39
+ cursor_sql: "COALESCE(last_admit_at, TIMESTAMP '#{SENTINEL_TS}')",
40
+ direction: :desc,
41
+ label: "recent admit"
42
+ },
43
+ "key" => {
44
+ sql_order: "partition_key ASC, id ASC",
45
+ cursor_sql: "partition_key",
46
+ direction: :asc,
47
+ label: "partition key"
48
+ }
49
+ }.freeze
50
+
51
+ DEFAULT_SORT = "pending"
52
+
53
+ module_function
54
+
55
+ def sort_for(name)
56
+ SORTS[name] || SORTS.fetch(DEFAULT_SORT)
57
+ end
58
+
59
+ def encode(value, id)
60
+ Base64.urlsafe_encode64(JSON.dump([value, id]), padding: false)
61
+ end
62
+
63
+ def decode(cursor)
64
+ return nil if cursor.nil? || cursor.empty?
65
+
66
+ decoded = JSON.parse(Base64.urlsafe_decode64(cursor))
67
+ return nil unless decoded.is_a?(Array) && decoded.size == 2
68
+
69
+ decoded
70
+ rescue StandardError
71
+ nil
72
+ end
73
+
74
+ # Apply a cursor tuple (value, id) to an AR scope under the given sort.
75
+ # The tiebreaker on id is always ASC so id strictly advances forward.
76
+ def apply(scope, sort_name, cursor)
77
+ sort = sort_for(sort_name)
78
+ return scope if cursor.nil?
79
+
80
+ value, last_id = cursor
81
+ case sort[:direction]
82
+ when :desc
83
+ scope.where(
84
+ "(#{sort[:cursor_sql]} < ?) OR (#{sort[:cursor_sql]} = ? AND id > ?)",
85
+ value, value, last_id
86
+ )
87
+ when :asc
88
+ scope.where(
89
+ "(#{sort[:cursor_sql]} > ?) OR (#{sort[:cursor_sql]} = ? AND id > ?)",
90
+ value, value, last_id
91
+ )
92
+ end
93
+ end
94
+
95
+ # Read the cursor key from a row using the given sort. Returns the
96
+ # raw value the cursor was built from (for emitting to the next link).
97
+ def extract(row, sort_name)
98
+ sort = sort_for(sort_name)
99
+ column = sort[:cursor_sql]
100
+ # cursor_sql may include a COALESCE(...). For row-side extraction we
101
+ # mirror that with Ruby. The columns we coalesce are timestamps; we
102
+ # use Time.at(0) as the equivalent sentinel.
103
+ raw = case column
104
+ when "pending_count", "total_admitted", "partition_key"
105
+ row.send(column)
106
+ when /COALESCE\(last_checked_at,/
107
+ row.last_checked_at || Time.at(0)
108
+ when /COALESCE\(last_admit_at,/
109
+ row.last_admit_at || Time.at(0)
110
+ end
111
+ [serialize_value(raw), row.id]
112
+ end
113
+
114
+ def serialize_value(v)
115
+ case v
116
+ when Time, ActiveSupport::TimeWithZone then v.utc.iso8601(6)
117
+ else v
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DispatchPolicy
4
+ class Decision
5
+ attr_reader :allowed, :retry_after, :gate_state_patch, :reason
6
+
7
+ def initialize(allowed:, retry_after: nil, gate_state_patch: nil, reason: nil)
8
+ @allowed = allowed
9
+ @retry_after = retry_after
10
+ @gate_state_patch = gate_state_patch
11
+ @reason = reason
12
+ end
13
+
14
+ def self.unlimited
15
+ new(allowed: Float::INFINITY)
16
+ end
17
+
18
+ def self.deny(retry_after: nil, reason: nil)
19
+ new(allowed: 0, retry_after: retry_after, reason: reason)
20
+ end
21
+ end
22
+ end
@@ -3,34 +3,11 @@
3
3
  require "rails/engine"
4
4
 
5
5
  module DispatchPolicy
6
+ # Mounted by the host app. Views, controllers, and AR models live under
7
+ # `app/`; the layout inlines the engine CSS by reading
8
+ # `app/assets/stylesheets/dispatch_policy/application.css` at render time,
9
+ # so no asset pipeline integration is required.
6
10
  class Engine < ::Rails::Engine
7
11
  isolate_namespace DispatchPolicy
8
-
9
- initializer "dispatch_policy.reference_gates" do
10
- config.to_prepare do
11
- # Reference the built-in gates so they register in Gate.registry.
12
- DispatchPolicy::Gates::Concurrency
13
- DispatchPolicy::Gates::Throttle
14
- DispatchPolicy::Gates::GlobalCap
15
- DispatchPolicy::Gates::FairInterleave
16
- DispatchPolicy::Gates::AdaptiveConcurrency
17
-
18
- DispatchPolicy::ActiveJobPerformAllLaterPatch
19
- end
20
- end
21
-
22
- initializer "dispatch_policy.boot_prune", after: :load_config_initializers do
23
- config.to_prepare do
24
- begin
25
- DispatchPolicy::Tick.prune_orphan_gate_rows
26
- DispatchPolicy::Tick.prune_idle_partitions
27
- DispatchPolicy::PartitionObservation.prune!
28
- rescue ActiveRecord::NoDatabaseError,
29
- ActiveRecord::StatementInvalid,
30
- ActiveRecord::ConnectionNotEstablished
31
- # DB not ready — skip silently.
32
- end
33
- end
34
- end
35
12
  end
36
13
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DispatchPolicy
4
+ # Re-enqueues admitted jobs onto the real ActiveJob adapter under a
5
+ # `Bypass.with` block, so the around_enqueue callback that staged them
6
+ # in the first place lets the call through.
7
+ #
8
+ # Called from inside Tick's admission transaction. With a PG-backed
9
+ # adapter (good_job / solid_queue) the adapter's INSERT shares the
10
+ # transaction, so any exception here aborts the whole admission
11
+ # atomically (staged_jobs return, inflight rows disappear, partition
12
+ # counters revert, adapter rows revert). There is intentionally no
13
+ # rescue here: failures must propagate to roll back the surrounding TX.
14
+ #
15
+ # Bulk path: rows without scheduled_at go through ActiveJob.perform_all_later,
16
+ # which collapses to a single multi-row INSERT on adapters that implement
17
+ # enqueue_all natively (good_job, solid_queue). Rows with scheduled_at
18
+ # keep the per-row path because perform_all_later doesn't accept a
19
+ # wait_until per job.
20
+ module Forwarder
21
+ module_function
22
+
23
+ # @param rows [Array<Hash>] admitted staged_job rows (already deleted from staging)
24
+ # @raise StandardError propagates any error from deserialize / adapter enqueue
25
+ # @raise EnqueueFailed if the adapter's enqueue_all returned without
26
+ # raising but flagged any job as not-successfully-enqueued (the
27
+ # atomic contract requires caller-visible failure so the surrounding
28
+ # TX rolls back).
29
+ def dispatch(rows)
30
+ return if rows.empty?
31
+
32
+ scheduled, immediate = rows.partition { |row| row["scheduled_at"] }
33
+
34
+ if immediate.any?
35
+ jobs = immediate.map { |row| Serializer.deserialize(row["job_data"]) }
36
+ Bypass.with { ::ActiveJob.perform_all_later(jobs) }
37
+ not_enqueued = jobs.reject { |j| j.respond_to?(:successfully_enqueued?) ? j.successfully_enqueued? : true }
38
+ if not_enqueued.any?
39
+ ids = not_enqueued.map(&:job_id).join(", ")
40
+ raise EnqueueFailed,
41
+ "perform_all_later soft-failed #{not_enqueued.size}/#{jobs.size} jobs (#{ids})"
42
+ end
43
+ end
44
+
45
+ scheduled.each do |row|
46
+ job = Serializer.deserialize(row["job_data"])
47
+ wait_until = enqueue_wait_until(row)
48
+ Bypass.with { job.set(wait_until: wait_until).enqueue }
49
+ if job.respond_to?(:successfully_enqueued?) && !job.successfully_enqueued?
50
+ raise EnqueueFailed, "scheduled enqueue soft-failed for #{job.job_id}"
51
+ end
52
+ end
53
+ end
54
+
55
+ def enqueue_wait_until(row)
56
+ ts = row["scheduled_at"]
57
+ return nil unless ts
58
+ ts.is_a?(Time) ? ts : Time.parse(ts.to_s)
59
+ rescue ArgumentError
60
+ nil
61
+ end
62
+ end
63
+ end
@@ -2,48 +2,20 @@
2
2
 
3
3
  module DispatchPolicy
4
4
  class Gate
5
- class << self
6
- def registry
7
- @registry ||= {}
8
- end
9
-
10
- def register(name, klass)
11
- registry[name.to_sym] = klass
12
- end
13
- end
14
-
15
- attr_reader :policy, :partition_by, :name
16
-
17
- def initialize(policy:, name:, partition_by: nil, **opts)
18
- @policy = policy
19
- @name = name
20
- @partition_by = partition_by
21
- configure(**opts)
22
- end
23
-
24
- def configure(**_opts); end
25
-
26
- # Resolve a partition key for a given context.
27
- def partition_key_for(ctx)
28
- return "default" if @partition_by.nil?
29
- @partition_by.call(ctx).to_s
30
- end
31
-
32
- # Subclasses must implement.
33
- def filter(_batch, _context)
5
+ def name
34
6
  raise NotImplementedError
35
7
  end
36
8
 
37
- # Whether this gate keeps an in-flight count that must be released
38
- # when the job finishes.
39
- def tracks_inflight?
40
- false
9
+ # @param ctx [DispatchPolicy::Context]
10
+ # @param partition [Hash] the partitions row (string keys)
11
+ # @param admit_budget [Integer] the budget remaining from earlier gates
12
+ # @return [DispatchPolicy::Decision]
13
+ def evaluate(_ctx, _partition, _admit_budget)
14
+ raise NotImplementedError
41
15
  end
42
16
 
43
- protected
44
-
45
- def resolve(value, ctx)
46
- value.respond_to?(:call) ? value.call(ctx) : value
47
- end
17
+ # Called after a successful admit to update gate-local state.
18
+ # Returns a hash patch to merge into partition.gate_state, or nil.
19
+ def consume(_decision, _admitted_count); nil; end
48
20
  end
49
21
  end
@@ -2,122 +2,124 @@
2
2
 
3
3
  module DispatchPolicy
4
4
  module Gates
5
- # Adaptive variant of :concurrency. The cap per partition (current_max)
6
- # shrinks when the adapter queue backs up (recent queue_lag > target) or
7
- # when performs fail; grows back when workers drain admissions quickly
8
- # (queue_lag near zero). The signal is pure queue wait — admitted_at →
9
- # perform_start — so it reflects "are we admitting too fast?" without
10
- # getting polluted by how long the external work takes.
5
+ # Self-tuning concurrency gate. Like :concurrency but with a
6
+ # per-partition cap (`current_max`) that grows when the adapter
7
+ # queue is empty and shrinks when it builds up. AIMD loop persisted
8
+ # in `dispatch_policy_adaptive_concurrency_stats`.
11
9
  #
12
- # AIMD loop on a per-partition stats row; the underlying in-flight
13
- # counter is the same PartitionInflightCount used by :concurrency.
10
+ # Feedback signal is `queue_lag_ms = perform_start - admitted_at`
11
+ # (time the job spent waiting in the adapter after admission).
12
+ # Pure saturation signal — slow performs in the downstream service
13
+ # don't punish admissions if workers still drain the queue quickly.
14
+ #
15
+ # Update rule applied after each perform (in InflightTracker.track):
16
+ #
17
+ # succeeded? & ewma_lag <= target_lag_ms → current_max += 1
18
+ # succeeded? & ewma_lag > target_lag_ms → current_max *= slow_factor
19
+ # failed? → current_max *= fail_factor
20
+ #
21
+ # Always clamped to >= min. Never grows without bound — the
22
+ # algorithm self-limits via target_lag_ms.
14
23
  class AdaptiveConcurrency < Gate
15
- # alpha is fast enough that a single spike is forgotten in ~3
16
- # observations instead of ~15. slow_factor 0.95 halves the per-
17
- # observation shrink magnitude so the cap no longer overshoots
18
- # after a burst drains the adapter queue.
19
- DEFAULT_EWMA_ALPHA = 0.5
20
- DEFAULT_FAIL_FACTOR = 0.5
21
- DEFAULT_SLOW_FACTOR = 0.95
24
+ DEFAULT_FULL_BACKOFF = 1.0 # seconds
25
+ DEFAULT_EWMA_ALPHA = 0.5 # weight of the new sample in the EWMA
26
+ DEFAULT_FAIL_FACTOR = 0.5 # halve on perform raise
27
+ DEFAULT_SLOW_FACTOR = 0.95 # gentle shrink on overload
22
28
 
23
- # target_lag_ms accepts the legacy alias `target_latency` for
24
- # backwards compatibility.
25
- def configure(initial_max:,
26
- target_lag_ms: nil,
27
- target_latency: nil,
28
- min: 1,
29
- ewma_alpha: DEFAULT_EWMA_ALPHA,
30
- failure_decrease_factor: DEFAULT_FAIL_FACTOR,
31
- overload_decrease_factor: DEFAULT_SLOW_FACTOR)
32
- @initial_max = initial_max
33
- @min = min
34
- @target_lag_ms = target_lag_ms || target_latency
35
- @ewma_alpha = ewma_alpha
36
- @fail_factor = failure_decrease_factor
37
- @slow_factor = overload_decrease_factor
38
- raise ArgumentError, "adaptive_concurrency requires target_lag_ms" if @target_lag_ms.nil?
39
- end
29
+ attr_reader :initial_max, :target_lag_ms, :min,
30
+ :ewma_alpha, :fail_factor, :slow_factor, :full_backoff
40
31
 
41
- def tracks_inflight?
42
- true
32
+ def initialize(initial_max:, target_lag_ms:, min: 1,
33
+ ewma_alpha: DEFAULT_EWMA_ALPHA,
34
+ failure_decrease_factor: DEFAULT_FAIL_FACTOR,
35
+ overload_decrease_factor: DEFAULT_SLOW_FACTOR,
36
+ full_backoff: DEFAULT_FULL_BACKOFF)
37
+ super()
38
+ @initial_max = Integer(initial_max)
39
+ @target_lag_ms = Float(target_lag_ms)
40
+ @min = Integer(min)
41
+ @ewma_alpha = Float(ewma_alpha)
42
+ @fail_factor = Float(failure_decrease_factor)
43
+ @slow_factor = Float(overload_decrease_factor)
44
+ @full_backoff = Float(full_backoff)
45
+ raise ArgumentError, "target_lag_ms must be > 0" unless @target_lag_ms.positive?
46
+ raise ArgumentError, "min must be >= 1" unless @min >= 1
47
+ raise ArgumentError, "initial_max must be >= min" unless @initial_max >= @min
43
48
  end
44
49
 
45
- attr_reader :initial_max, :min, :target_lag_ms,
46
- :ewma_alpha, :fail_factor, :slow_factor
47
-
48
- def filter(batch, context)
49
- by_partition = batch.group_by { |staged| partition_key_for(context.for(staged)) }
50
-
51
- # Seed any missing stats rows so the first admission has something
52
- # to read. Cheap: one INSERT ... ON CONFLICT DO NOTHING per key.
53
- by_partition.each_key do |key|
54
- AdaptiveConcurrencyStats.seed!(
55
- policy_name: policy.name,
56
- gate_name: name,
57
- partition_key: key,
58
- initial_max: resolve(@initial_max, nil).to_i
59
- )
60
- end
50
+ def name
51
+ :adaptive_concurrency
52
+ end
61
53
 
62
- stats = AdaptiveConcurrencyStats.fetch_many(
63
- policy_name: policy.name,
64
- gate_name: name,
65
- partition_keys: by_partition.keys
66
- )
54
+ def evaluate(ctx, partition, admit_budget)
55
+ policy_name = partition["policy_name"]
56
+ key = inflight_partition_key(policy_name, ctx)
67
57
 
68
- in_flight = PartitionInflightCount.fetch_many(
69
- policy_name: policy.name,
70
- gate_name: name,
71
- partition_keys: by_partition.keys
58
+ # Seed lazily so the very first admission has a row to read
59
+ # (and so record_observation can UPDATE without a check).
60
+ Repository.adaptive_seed!(
61
+ policy_name: policy_name,
62
+ partition_key: key,
63
+ initial_max: @initial_max
72
64
  )
73
65
 
74
- min_v = resolve(@min, nil).to_i
66
+ cap = Repository.adaptive_current_max(
67
+ policy_name: policy_name,
68
+ partition_key: key
69
+ ) || @initial_max
70
+ cap = [cap, @min].max
75
71
 
76
- admitted = []
77
- by_partition.each do |partition_key, jobs|
78
- effective_max = stats.dig(partition_key, :current_max) || resolve(@initial_max, nil).to_i
79
- effective_max = [ effective_max, min_v ].max
80
- used = in_flight.fetch(partition_key, 0)
72
+ in_flight = Repository.count_inflight(
73
+ policy_name: policy_name,
74
+ partition_key: key
75
+ )
76
+ remaining = cap - in_flight
81
77
 
82
- # Safety valve: if nothing is in-flight for this partition and
83
- # there's pending, the adapter queue is (or is about to be)
84
- # empty and workers will idle. Ensure we hand over at least
85
- # initial_max so the stream never dries up on its own.
86
- if used.zero? && jobs.any?
87
- effective_max = [ effective_max, resolve(@initial_max, nil).to_i ].max
88
- end
78
+ # Safety valve. AIMD can shrink current_max during a slow burst;
79
+ # if the partition then idles, no observations come in to grow
80
+ # the cap back. When in_flight == 0 we ensure at least
81
+ # initial_max so the partition never fossilizes at min.
82
+ remaining = [remaining, @initial_max].max if in_flight.zero?
89
83
 
90
- jobs.each do |staged|
91
- break unless used < effective_max
92
- admitted << [ staged, partition_key ]
93
- used += 1
94
- end
84
+ if remaining <= 0
85
+ return Decision.new(allowed: 0,
86
+ retry_after: @full_backoff,
87
+ reason: "adaptive_concurrency_full")
95
88
  end
96
89
 
97
- context.record_partitions(admitted, gate: name)
98
- admitted.map(&:first)
90
+ Decision.new(allowed: [remaining, admit_budget].min)
99
91
  end
100
92
 
101
- # Called by Dispatchable#around_perform for each adaptive gate that
102
- # touched this job. Lives on the gate instance because configuration
103
- # (alpha, target_latency, etc.) is per gate.
104
- def record_observation(partition_key:, queue_lag_ms:, succeeded:)
105
- AdaptiveConcurrencyStats.record_observation!(
106
- policy_name: policy.name,
107
- gate_name: name,
108
- partition_key: partition_key.to_s,
109
- queue_lag_ms: queue_lag_ms,
110
- succeeded: succeeded,
111
- alpha: @ewma_alpha,
112
- min: resolve(@min, nil).to_i,
113
- target_lag_ms: resolve(@target_lag_ms, nil).to_f,
114
- fail_factor: @fail_factor,
115
- slow_factor: @slow_factor,
116
- initial_max: resolve(@initial_max, nil).to_i
93
+ # Same canonical scope as the staged_jobs partition_key — every
94
+ # gate in a policy uses `policy.partition_for(ctx)` so the
95
+ # inflight count and the adaptive stats line up exactly.
96
+ def inflight_partition_key(policy_name, ctx)
97
+ policy = DispatchPolicy.registry.fetch(policy_name)
98
+ raise InvalidPolicy, "unknown policy #{policy_name.inspect}" unless policy
99
+ policy.partition_for(ctx)
100
+ end
101
+
102
+ # Called from InflightTracker.track after each perform completes
103
+ # (success or failure). Updates the AIMD state atomically in one
104
+ # SQL statement.
105
+ def record_observation(policy_name:, partition_key:, queue_lag_ms:, succeeded:)
106
+ Repository.adaptive_seed!(
107
+ policy_name: policy_name,
108
+ partition_key: partition_key.to_s,
109
+ initial_max: @initial_max
110
+ )
111
+ Repository.adaptive_record!(
112
+ policy_name: policy_name,
113
+ partition_key: partition_key.to_s,
114
+ queue_lag_ms: queue_lag_ms,
115
+ succeeded: succeeded,
116
+ alpha: @ewma_alpha,
117
+ target_lag_ms: @target_lag_ms,
118
+ fail_factor: @fail_factor,
119
+ slow_factor: @slow_factor,
120
+ min: @min
117
121
  )
118
122
  end
119
123
  end
120
-
121
- Gate.register(:adaptive_concurrency, AdaptiveConcurrency)
122
124
  end
123
125
  end
@@ -2,42 +2,61 @@
2
2
 
3
3
  module DispatchPolicy
4
4
  module Gates
5
+ # Concurrency gate: caps in-flight jobs per partition.
6
+ #
7
+ # The partition scope is the policy's `partition_by`. Inflight rows
8
+ # are written by InflightTracker around_perform with the same key,
9
+ # so this gate's COUNT(*) aggregates the same canonical scope as
10
+ # the staged_jobs row.
5
11
  class Concurrency < Gate
6
- def configure(max:)
7
- @max = max
12
+ DEFAULT_FULL_BACKOFF = 1.0 # seconds
13
+
14
+ attr_reader :max_proc, :full_backoff
15
+
16
+ def initialize(max:, full_backoff: DEFAULT_FULL_BACKOFF)
17
+ super()
18
+ @max_proc = max.respond_to?(:call) ? max : ->(_ctx) { max }
19
+ @full_backoff = full_backoff.to_f
8
20
  end
9
21
 
10
- def tracks_inflight?
11
- true
22
+ def name
23
+ :concurrency
12
24
  end
13
25
 
14
- def filter(batch, context)
15
- by_partition = batch.group_by { |staged| partition_key_for(context.for(staged)) }
26
+ def evaluate(ctx, partition, admit_budget)
27
+ cap = capacity_for(ctx)
28
+ return Decision.deny(retry_after: @full_backoff, reason: "max=0") if cap <= 0
16
29
 
17
- in_flight = PartitionInflightCount.fetch_many(
18
- policy_name: policy.name,
19
- gate_name: name,
20
- partition_keys: by_partition.keys
30
+ in_flight = Repository.count_inflight(
31
+ policy_name: partition["policy_name"],
32
+ partition_key: inflight_partition_key(partition["policy_name"], ctx)
21
33
  )
22
-
23
- admitted = []
24
- by_partition.each do |partition_key, jobs|
25
- jobs.each do |staged|
26
- ctx = context.for(staged)
27
- limit = resolve(@max, ctx).to_i
28
- used = in_flight.fetch(partition_key, 0)
29
- if used < limit
30
- admitted << [ staged, partition_key ]
31
- in_flight[partition_key] = used + 1
32
- end
33
- end
34
+ remaining = cap - in_flight
35
+ if remaining <= 0
36
+ # Stop hammering this partition with COUNT(*) every tick — back off
37
+ # until enough jobs are likely to have finished.
38
+ return Decision.new(allowed: 0, retry_after: @full_backoff, reason: "concurrency_full")
34
39
  end
35
40
 
36
- context.record_partitions(admitted, gate: name)
37
- admitted.map(&:first)
41
+ Decision.new(allowed: [remaining, admit_budget].min)
38
42
  end
39
- end
40
43
 
41
- Gate.register(:concurrency, Concurrency)
44
+ # The inflight key is always the policy's canonical partition
45
+ # value — same as what's stored in staged_jobs.partition_key.
46
+ # This is what makes throttle + concurrency in the same policy
47
+ # enforce their state at exactly one consistent scope.
48
+ def inflight_partition_key(policy_name, ctx)
49
+ policy = DispatchPolicy.registry.fetch(policy_name)
50
+ raise InvalidPolicy, "unknown policy #{policy_name.inspect}" unless policy
51
+ policy.partition_for(ctx)
52
+ end
53
+
54
+ private
55
+
56
+ def capacity_for(ctx)
57
+ value = @max_proc.call(ctx)
58
+ value.nil? ? 0 : Integer(value)
59
+ end
60
+ end
42
61
  end
43
62
  end